Re: [PATCH 2/3] x86/boot: Explain discard_initial_images() and untangle PV initrd handling

2024-04-29 Thread Jan Beulich
On 26.04.2024 16:01, Andrew Cooper wrote:
> discard_initial_images() frees the memory backing the boot modules.  It is
> called by dom0_construct_pv{,h}(), but obtains the module list by global
> pointer which is why there is no apparent link with the initrd pointer.
> 
> Generally, module contents are copied into dom0 as it's being built, but the
> initrd for PV dom0 might be directly mapped instead of copied.
> 
> dom0_construct_pv() does it's own ad-hoc freeing of the module in the copy
> case, and points the global reference at the new copy, then sets the size to
> 0.  This only functions because init_domheap_pages(x, x) happens to be a nop.
> 
> Delete the ad-hoc freeing, and leave it to discard_initial_images().  This
> requires (not) adjusting initd->mod_start in the copy case, and only setting
> the size to 0 in the mapping case.
> 
> Alter discard_initial_images() to explicitly check for an ignored module, and
> explain what's going on.  This is more robust and will allow for fixing other
> problems with module handling.
> 
> The later logic in dom0_construct_pv() now cannot use initrd->mod_start, but
> that's fine because initrd_mfn is already a duplicate of the information
> wanted, and is more consistent with initrd_{pfn,len} used elsewhere.
> 
> Invalidate the initrd pointer with LIST_POISON1 to make it clearer that it
> shouldn't be used.
> 
> No practical change in behaviour, but a substantial reduction in the
> complexity of how this works.
> 
> Signed-off-by: Andrew Cooper 
> ---
> CC: Jan Beulich 
> CC: Roger Pau Monné 
> CC: Stefano Stabellini 
> CC: Daniel Smith 
> CC: Christopher Clark 
> 
> In other akward questions, why does initial_images_nrpages() account for all
> modules when only 1 or 2 are relevant for how we construct dom0 ?
> ---
>  xen/arch/x86/pv/dom0_build.c | 22 +++---
>  xen/arch/x86/setup.c |  9 -
>  2 files changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
> index d8043fa58a27..64d9984b8308 100644
> --- a/xen/arch/x86/pv/dom0_build.c
> +++ b/xen/arch/x86/pv/dom0_build.c
> @@ -630,18 +630,20 @@ int __init dom0_construct_pv(struct domain *d,
>  }
>  memcpy(page_to_virt(page), mfn_to_virt(initrd->mod_start),
> initrd_len);
> -mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
> -init_domheap_pages(mpt_alloc,
> -   mpt_alloc + PAGE_ALIGN(initrd_len));
> -initrd->mod_start = initrd_mfn = mfn_x(page_to_mfn(page));
> +initrd_mfn = mfn_x(page_to_mfn(page));
>  }
>  else
>  {
>  while ( count-- )
>  if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) )
>  BUG();
> +/*
> + * Mapped rather than copied.  Tell discard_initial_images() to
> + * ignore it.
> + */
> +initrd->mod_end = 0;
>  }
> -initrd->mod_end = 0;
> +initrd = LIST_POISON1; /* No longer valid to use. */
>  
>  iommu_memory_setup(d, "initrd", mfn_to_page(_mfn(initrd_mfn)),
> PFN_UP(initrd_len), _flags);
> @@ -653,12 +655,10 @@ int __init dom0_construct_pv(struct domain *d,
>  if ( domain_tot_pages(d) < nr_pages )
>  printk(" (%lu pages to be allocated)",
> nr_pages - domain_tot_pages(d));
> -if ( initrd )
> -{
> -mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
> +if ( initrd_len )
>  printk("\n Init. ramdisk: %"PRIpaddr"->%"PRIpaddr,
> -   mpt_alloc, mpt_alloc + initrd_len);
> -}
> +   pfn_to_paddr(initrd_mfn),
> +   pfn_to_paddr(initrd_mfn) + initrd_len);
>  
>  printk("\nVIRTUAL MEMORY ARRANGEMENT:\n");
>  printk(" Loaded kernel: %p->%p\n", _p(vkern_start), _p(vkern_end));

Between what this and the following hunk touch there is

if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) )
mfn = pfn++;
else
mfn = initrd_mfn++;

I can't help thinking that this invalidates ...

> @@ -881,7 +881,7 @@ int __init dom0_construct_pv(struct domain *d,
>  if ( pfn >= initrd_pfn )
>  {
>  if ( pfn < initrd_pfn + PFN_UP(initrd_len) )
> -mfn = initrd->mod_start + (pfn - initrd_pfn);
> +mfn = initrd_mfn + (pfn - initrd_pfn);
>  else
>  mfn -= PFN_UP(initrd_len);
>  }

... the use of the variable here.

> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -294,7 +294,7 @@ unsigned long __init initial_images_nrpages(nodeid_t node)
>  return nr;
>  }
>  
> -void __init discard_initial_images(void)
> +void __init discard_initial_images(void) /* a.k.a. free multiboot modules */
>  {
>  unsigned int i;
>  
> @@ -302,6 +302,13 @@ void __init 

[PATCH 2/3] x86/boot: Explain discard_initial_images() and untangle PV initrd handling

2024-04-26 Thread Andrew Cooper
discard_initial_images() frees the memory backing the boot modules.  It is
called by dom0_construct_pv{,h}(), but obtains the module list by global
pointer which is why there is no apparent link with the initrd pointer.

Generally, module contents are copied into dom0 as it's being built, but the
initrd for PV dom0 might be directly mapped instead of copied.

dom0_construct_pv() does it's own ad-hoc freeing of the module in the copy
case, and points the global reference at the new copy, then sets the size to
0.  This only functions because init_domheap_pages(x, x) happens to be a nop.

Delete the ad-hoc freeing, and leave it to discard_initial_images().  This
requires (not) adjusting initd->mod_start in the copy case, and only setting
the size to 0 in the mapping case.

Alter discard_initial_images() to explicitly check for an ignored module, and
explain what's going on.  This is more robust and will allow for fixing other
problems with module handling.

The later logic in dom0_construct_pv() now cannot use initrd->mod_start, but
that's fine because initrd_mfn is already a duplicate of the information
wanted, and is more consistent with initrd_{pfn,len} used elsewhere.

Invalidate the initrd pointer with LIST_POISON1 to make it clearer that it
shouldn't be used.

No practical change in behaviour, but a substantial reduction in the
complexity of how this works.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Stefano Stabellini 
CC: Daniel Smith 
CC: Christopher Clark 

In other akward questions, why does initial_images_nrpages() account for all
modules when only 1 or 2 are relevant for how we construct dom0 ?
---
 xen/arch/x86/pv/dom0_build.c | 22 +++---
 xen/arch/x86/setup.c |  9 -
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index d8043fa58a27..64d9984b8308 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -630,18 +630,20 @@ int __init dom0_construct_pv(struct domain *d,
 }
 memcpy(page_to_virt(page), mfn_to_virt(initrd->mod_start),
initrd_len);
-mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
-init_domheap_pages(mpt_alloc,
-   mpt_alloc + PAGE_ALIGN(initrd_len));
-initrd->mod_start = initrd_mfn = mfn_x(page_to_mfn(page));
+initrd_mfn = mfn_x(page_to_mfn(page));
 }
 else
 {
 while ( count-- )
 if ( assign_pages(mfn_to_page(_mfn(mfn++)), 1, d, 0) )
 BUG();
+/*
+ * Mapped rather than copied.  Tell discard_initial_images() to
+ * ignore it.
+ */
+initrd->mod_end = 0;
 }
-initrd->mod_end = 0;
+initrd = LIST_POISON1; /* No longer valid to use. */
 
 iommu_memory_setup(d, "initrd", mfn_to_page(_mfn(initrd_mfn)),
PFN_UP(initrd_len), _flags);
@@ -653,12 +655,10 @@ int __init dom0_construct_pv(struct domain *d,
 if ( domain_tot_pages(d) < nr_pages )
 printk(" (%lu pages to be allocated)",
nr_pages - domain_tot_pages(d));
-if ( initrd )
-{
-mpt_alloc = (paddr_t)initrd->mod_start << PAGE_SHIFT;
+if ( initrd_len )
 printk("\n Init. ramdisk: %"PRIpaddr"->%"PRIpaddr,
-   mpt_alloc, mpt_alloc + initrd_len);
-}
+   pfn_to_paddr(initrd_mfn),
+   pfn_to_paddr(initrd_mfn) + initrd_len);
 
 printk("\nVIRTUAL MEMORY ARRANGEMENT:\n");
 printk(" Loaded kernel: %p->%p\n", _p(vkern_start), _p(vkern_end));
@@ -881,7 +881,7 @@ int __init dom0_construct_pv(struct domain *d,
 if ( pfn >= initrd_pfn )
 {
 if ( pfn < initrd_pfn + PFN_UP(initrd_len) )
-mfn = initrd->mod_start + (pfn - initrd_pfn);
+mfn = initrd_mfn + (pfn - initrd_pfn);
 else
 mfn -= PFN_UP(initrd_len);
 }
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 59907fae095f..785a46a44995 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -294,7 +294,7 @@ unsigned long __init initial_images_nrpages(nodeid_t node)
 return nr;
 }
 
-void __init discard_initial_images(void)
+void __init discard_initial_images(void) /* a.k.a. free multiboot modules */
 {
 unsigned int i;
 
@@ -302,6 +302,13 @@ void __init discard_initial_images(void)
 {
 uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT;
 
+/*
+ * Sometimes the initrd is mapped, rather than copied, into dom0.
+ * end=0 signifies that we should leave it alone.
+ */
+if ( initial_images[i].mod_end == 0 )
+continue;
+
 init_domheap_pages(start,
start + PAGE_ALIGN(initial_images[i].mod_end));
 }
--