The first time aarch64 actually delivered a bootstrap module — via
QEMU's `-device guest-loader,kernel=<file>,addr=<addr>` populating
the /chosen/multiboot,module DTB node that load_boot_modules_from_dtb
already reads — two independent bugs surfaced together. Both were
latent until now because the prior commits panicked at
"No bootstrap modules loaded with Mach" before reaching
bootstrap_create.
1. phystokv contract in load_boot_modules_from_dtb
============================================================
kern/bootstrap.c reads the multiboot module table by dereferencing
boot_info.mods_addr after phystokv(), and each per-module .string
field the same way:
bmods = (struct multiboot_module *) phystokv(boot_info.mods_addr);
...
strchr((char *) phystokv(bmods[0].string), ' ');
That is: both fields are expected to hold *physical* addresses, and
the caller does its own phys-to-virt translation when dereferencing.
On x86 this is trivially satisfied — the multiboot loader hands
gnumach a structure already living in physical RAM and gnumach
stores the physical pointers verbatim.
On aarch64 we synthesise the table in-kernel (see
load_boot_modules_from_dtb). The boot_modules[] backing array is
ordinary kernel-virtual storage, and prop.data points into the DTB
which c_boot_entry already phystokv'd into the high-half mapping.
Storing those virtual pointers verbatim made phystokv() in
bootstrap_create() compute bogus addresses in the unmapped
split-half range (0xfffe0000_...), which trapped as level-0
translation faults the moment bootstrap_create() looked at the
module list.
Convert with kvtophys at population time so the values we store
honour the same contract as the x86 path.
2. Reserve module physical pages from the heap
============================================================
kern/bootstrap.c calls free_bootstrap_pages() once it has finished
exec'ing each multiboot module, and free_bootstrap_pages() asserts
that every page it releases is VM_PT_RESERVED:
assert(page->type == VM_PT_RESERVED);
On x86 that contract is satisfied by biosmem: it parses the
multiboot memory map plus the module list at boot, narrows the
range it hands to vm_page_load_heap so module-occupied pages stay
outside the heap, and vm_page_init then defaults them to
VM_PT_RESERVED.
The aarch64 port discovers physical RAM from the DTB's /memory node
and hands the entire range to vm_page_load_heap as a single
contiguous block. Modules placed in the middle of RAM ended up
inside the heap, were marked VM_PT_FREE during vm_page_init, and
tripped free_bootstrap_pages' assertion the moment bootstrap_create()
tried to release them.
Plug pmap into the same module discovery the bootstrap path
already does:
* Add pmap_reserve_phys_range(start, end) — records the lowest
reserved address across all calls (single-segment carve-out
for now; the vm_page subsystem supports up to 4 segments per
type, which would let us recover the gap above modules later).
* In pmap_bootstrap_misc, cap the heap-range upper bound at
module_phys_min before calling vm_page_load_heap. Pages above
the cap stay registered with vm_page_load but default to
VM_PT_RESERVED — exactly the state free_bootstrap_pages wants.
* In load_boot_modules_from_dtb, call pmap_reserve_phys_range
for each module the DTB advertises.
* Reorder c_boot_entry so load_boot_modules_from_dtb runs after
pmap_bootstrap (sp/x29 now virtual) but before
pmap_bootstrap_misc (which is where vm_page_load_heap fires).
Tested under qemu-system-aarch64 -M virt with the tests/test-hello
module placed at RAM_BASE + 256 MB by guest-loader: vm_page sees
the module's pages as VM_PT_RESERVED, the bootstrap task exec's
the module successfully, the module prints its success marker, and
free_bootstrap_pages then releases the module pages back to the
buddy allocator without tripping the assertion.
Trade-off worth noting: a single-segment carve-out means all
physical memory above the lowest module is lost to the allocator
until the multi-segment fix lands. For the test harness this is
fine — `-m 512` with a module at 0x50000000 leaves ~256 MB usable.
---
aarch64/aarch64/model_dep.c | 40 ++++++++++++++++++++++++++++++----
aarch64/aarch64/pmap.c | 43 ++++++++++++++++++++++++++++++++++++-
aarch64/aarch64/pmap.h | 12 +++++++++++
3 files changed, 90 insertions(+), 5 deletions(-)
diff --git a/aarch64/aarch64/model_dep.c b/aarch64/aarch64/model_dep.c
index f67d30a8..9c09c052 100644
--- a/aarch64/aarch64/model_dep.c
+++ b/aarch64/aarch64/model_dep.c
@@ -20,6 +20,7 @@
#include "aarch64/locore.h"
#include "aarch64/hwcaps.h"
#include "aarch64/fpu.h"
+#include "aarch64/pmap.h"
#include "aarch64/bits/spsr.h"
#include "arm/gic-v2.h"
#include "arm/pl011.h"
@@ -313,6 +314,15 @@ void __attribute__((noreturn)) c_boot_entry(dtb_t dtb)
kr = dtb_load(dtb);
assert(kr == KERN_SUCCESS);
+ /*
+ * Discover boot modules and tell pmap about their physical
+ * ranges *before* pmap_bootstrap_misc() runs vm_page_load_heap,
+ * so module-occupied pages stay outside the heap and default
+ * to VM_PT_RESERVED — that's the contract free_bootstrap_pages
+ * relies on once the kernel has finished exec'ing each module.
+ */
+ load_boot_modules_from_dtb();
+
pmap_bootstrap_misc();
load_exception_vector_table();
@@ -335,8 +345,6 @@ void __attribute__((noreturn)) c_boot_entry(dtb_t dtb)
machine_slot[0].cpu_type = CPU_TYPE_ARM64;
init_percpu(0);
- load_boot_modules_from_dtb();
-
setup_main();
__builtin_unreachable();
}
@@ -392,7 +400,13 @@ static __attribute__((noinline)) void
load_boot_modules_from_dtb(void)
panic("No bootargs for bootstrap module %d %s\n",
i, node.name);
printf("module %d: %s\n", i, (const char *) prop.data);
- boot_modules[i].string = (vm_offset_t) prop.data;
+ /*
+ * kern/bootstrap.c calls phystokv(string) to dereference the
+ * cmdline, so we have to store a physical address here even
+ * though prop.data is a kernel-virtual pointer into the DTB
+ * (which itself was phystokv'd earlier in c_boot_entry).
+ */
+ boot_modules[i].string = kvtophys((vm_offset_t) prop.data);
prop = dtb_node_find_prop(&node, "reg");
assert(!DTB_IS_SENTINEL(prop));
@@ -415,6 +429,18 @@ static __attribute__((noinline)) void
load_boot_modules_from_dtb(void)
boot_modules[i].mod_end = boot_modules[i].mod_start
+ dtb_prop_read_cells(&prop, size_cells, &off);
boot_modules[i].reserved = 0;
+
+ /*
+ * Keep the module's physical pages out of the heap range
+ * passed to vm_page_load_heap. vm_page_init will then
+ * default them to VM_PT_RESERVED — exactly what
+ * free_bootstrap_pages expects when it later releases
+ * them back to the allocator after the kernel has exec'd
+ * the module.
+ */
+ pmap_reserve_phys_range(round_page(boot_modules[i].mod_start),
+ round_page(boot_modules[i].mod_end));
+
i++;
}
@@ -422,7 +448,13 @@ static __attribute__((noinline)) void
load_boot_modules_from_dtb(void)
panic("No bootstrap modules loaded with Mach\n");
boot_info.mods_count = i;
- boot_info.mods_addr = (vm_offset_t) boot_modules;
+ /*
+ * Same phystokv contract as for the per-module .string field:
+ * kern/bootstrap.c reads `bmods = phystokv(boot_info.mods_addr)`,
+ * so we have to give it a physical address even though we built
+ * the boot_modules[] array as ordinary kernel-virtual storage.
+ */
+ boot_info.mods_addr = kvtophys((vm_offset_t) boot_modules);
boot_info.flags |= MULTIBOOT_MODS;
printf("%d bootstrap modules\n", i);
}
diff --git a/aarch64/aarch64/pmap.c b/aarch64/aarch64/pmap.c
index d8e558c3..495420e0 100644
--- a/aarch64/aarch64/pmap.c
+++ b/aarch64/aarch64/pmap.c
@@ -149,6 +149,35 @@ vm_offset_t kernel_virtual_end;
static phys_addr_t phys_mem_start;
static vm_size_t phys_mem_size;
+/*
+ * Lowest physical address occupied by a reserved range that needs to
+ * stay outside the page allocator's free pool — currently used to
+ * keep boot modules' physical pages marked VM_PT_RESERVED so
+ * free_bootstrap_pages() can release them back once the kernel has
+ * finished exec'ing them.
+ *
+ * Set by pmap_reserve_phys_range(); consumed in pmap_bootstrap_misc
+ * when sizing the heap range passed to vm_page_load_heap(). Any
+ * pages between this address and phys_mem_start + phys_mem_size
+ * stay registered with vm_page but default to VM_PT_RESERVED, which
+ * is exactly the contract free_bootstrap_pages() expects. Default
+ * value of (phys_addr_t)-1 means "no reservation — use the full
+ * heap range".
+ *
+ * TODO: this is a single-segment carve-out (everything above the
+ * lowest module is lost to the allocator). A multi-segment heap
+ * would let us recover the gap between modules and the top of RAM.
+ * The vm_page subsystem supports up to 4 segments per type.
+ */
+static phys_addr_t module_phys_min = (phys_addr_t) -1;
+
+void pmap_reserve_phys_range(phys_addr_t start, phys_addr_t end)
+{
+ (void) end;
+ if (start < module_phys_min)
+ module_phys_min = start;
+}
+
extern const void __text_start;
extern const void _image_end;
@@ -447,7 +476,19 @@ void pmap_bootstrap_misc(void)
kernel_pmap->l0_base = PT_ENTRY_NULL;
kernel_pmap->asid = 0;
- vm_page_load_heap(VM_PAGE_SEG_DMA, heap_start, phys_mem_start +
phys_mem_size);
+ {
+ /*
+ * Cap heap_end below any reserved physical range (e.g. boot
+ * modules placed in the middle of RAM by QEMU's guest-loader).
+ * Pages above heap_end stay registered with vm_page_load but
+ * default to VM_PT_RESERVED — exactly the contract
+ * free_bootstrap_pages() expects.
+ */
+ phys_addr_t heap_end = phys_mem_start + phys_mem_size;
+ if (module_phys_min < heap_end)
+ heap_end = module_phys_min;
+ vm_page_load_heap(VM_PAGE_SEG_DMA, heap_start, heap_end);
+ }
pmap_init_mapwindows();
vm_fault_dirty_handling = TRUE;
diff --git a/aarch64/aarch64/pmap.h b/aarch64/aarch64/pmap.h
index 71d3d710..e2e6e874 100644
--- a/aarch64/aarch64/pmap.h
+++ b/aarch64/aarch64/pmap.h
@@ -77,6 +77,18 @@ extern void pmap_discover_physical_memory(const struct
dtb_node *node);
extern void pmap_bootstrap(void);
extern void pmap_bootstrap_misc(void);
+/*
+ * pmap_reserve_phys_range:
+ *
+ * Record a physical range that should not be added to the heap when
+ * pmap_bootstrap_misc() calls vm_page_load_heap(). Must be called
+ * after pmap_discover_physical_memory() (so phys_mem_* are known)
+ * and before pmap_bootstrap_misc() (so heap-range sizing sees the
+ * reservation). Used for boot modules whose physical pages must
+ * stay VM_PT_RESERVED until free_bootstrap_pages() releases them.
+ */
+extern void pmap_reserve_phys_range(phys_addr_t start, phys_addr_t end);
+
extern void pmap_zero_page(phys_addr_t);
extern void pmap_copy_page(phys_addr_t, phys_addr_t);
--
2.54.0