As noted in one of the email threads, the aarch64 build of OSv fails
to boot with the following error: "dtb_setup: failed to move dtb (dtb too 
large?)".

It turns out that the dtb ("Device Tree Blob") provided by QEMU is 1MB in size 
which is far
larger that the 64K buffer reserved at the beginning OSV_KERNEL_BASE address. 
Because of
this, the fdt_move() call to relocate (copy) dtb from the original place
to the reserved buffer fails. It could be that in early days dtb might
have been small enough to fit into 64K area and fdt_move() would succeed.

It is also not clear why dtb had to relocated in first place. Possibly
it was done because the memory where dtb was originally located
would not be unavailable (not mapped) after switching to runtime page tables.

One way to fix this error would be to increase the buffer to 1MB.
But what if that will not be enough in future. Another way is to not
relocate dtb at all and simply pre-read the configuration values from original
dtb and store them in known variables so they can be accessed as needed without
having to read from dtb later.

So this patch modifies the dtb contructor function in arch-dtb.cc to
parse following configuration values from dtb:

- dtb_cpu_count
- dtb_cpus_mpids
- dtb_timer_irq
- dtb_pci_irqmask
- dtb_pci_irqmap_count
- dtb_pci_bdfs
- dtb_pci_irq_ids

It also modifies relevant dtb_get_* functions to simply return the values
of the variables above to the caller instead of parsing them from dtb.

This patch is enough to make OSv aarch64 build and boot again. This
has only been tested on Fedora 29.

./scripts/build -j4 image=native-example fs=ramfs arch=aarch64

qemu-system-aarch64 -s -nographic -machine virt \
 -machine gic-version=2 -kernel ./build/release.aarch64/loader.img \
 -cpu cortex-a57 -m 1024M \
 -append "--verbose --nomount --maxnic=0 /tools/uush.so" -smp 2

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 arch/aarch64/arch-dtb.cc   | 208 ++++++++++++++++++++++++-------------
 arch/aarch64/arch-setup.cc |   5 +-
 2 files changed, 136 insertions(+), 77 deletions(-)

diff --git a/arch/aarch64/arch-dtb.cc b/arch/aarch64/arch-dtb.cc
index 986b3c3b..b59f1dcc 100644
--- a/arch/aarch64/arch-dtb.cc
+++ b/arch/aarch64/arch-dtb.cc
@@ -149,74 +149,6 @@ static bool dtb_get_int_spec(int node, struct dtb_int_spec 
*s, int n)
     return true;
 }
 
-void  __attribute__((constructor(init_prio::dtb))) dtb_setup()
-{
-    void *olddtb;
-    int node;
-    char *cmdline_override;
-    int len;
-
-    if (fdt_check_header(dtb) != 0) {
-        abort("dtb_setup: device tree blob invalid.\n");
-    }
-
-    memory::phys_mem_size = dtb_get_phys_memory(&mmu::mem_addr);
-    if (!memory::phys_mem_size) {
-        abort("dtb_setup: failed to parse memory information.\n");
-    }
-
-    /* command line will be overwritten with DTB: move it inside DTB */
-
-    node = fdt_path_offset(dtb, "/chosen");
-    if (node < 0) {
-        node = fdt_path_offset(dtb, "/");
-        if (node >= 0) {
-            node = fdt_add_subnode(dtb, node, "chosen");
-        }
-    }
-    if (node < 0) {
-        abort("dtb_setup: failed to add node /chosen for cmdline.\n");
-    }
-
-    cmdline_override = (char *)fdt_getprop(dtb, node, "bootargs", &len);
-    if (cmdline_override) {
-        cmdline = cmdline_override;
-    } else {
-        len = strlen(cmdline) + 1;
-        if (fdt_setprop(dtb, node, "bootargs", cmdline, len) < 0) {
-            abort("dtb_setup: failed to set bootargs in /chosen.\n");
-        }
-    }
-    if ((size_t)len > max_cmdline) {
-        abort("dtb_setup: command line too long.\n");
-    }
-    olddtb = dtb;
-    dtb = (void *)OSV_KERNEL_BASE;
-
-    if (fdt_move(olddtb, dtb, 0x10000) != 0) {
-        abort("dtb_setup: failed to move dtb (dtb too large?)\n");
-    }
-
-    cmdline = (char *)fdt_getprop(dtb, node, "bootargs", NULL);
-    if (!cmdline) {
-        abort("dtb_setup: cannot find cmdline after dtb move.\n");
-    }
-    register u64 edata;
-    asm volatile ("adrp %0, .edata" : "=r"(edata));
-
-    /* import from loader.cc */
-    extern elf::Elf64_Ehdr *elf_header;
-    extern size_t elf_size;
-    extern void *elf_start;
-
-    elf_start = reinterpret_cast<void *>(elf_header);
-    elf_size = (u64)edata - (u64)elf_start;
-
-    /* remove amount of memory used for ELF from avail memory */
-    mmu::phys addr = (mmu::phys)elf_start + elf_size;
-    memory::phys_mem_size -= addr - mmu::mem_addr;
-}
-
 size_t dtb_get_phys_memory(u64 *addr)
 {
     size_t retval;
@@ -260,8 +192,8 @@ u64 dtb_get_uart(int *irqid)
 /* this gets the virtual timer irq, we are not interested
  * about the other timers.
  */
-
-int dtb_get_timer_irq()
+static int dtb_timer_irq = -1;
+static int dtb_parse_timer_irq()
 {
     int node;
     struct dtb_int_spec int_spec[4];
@@ -279,6 +211,11 @@ int dtb_get_timer_irq()
     return int_spec[2].irq_id;
 }
 
+int dtb_get_timer_irq()
+{
+    return dtb_timer_irq;
+}
+
 /* this gets the GIC distributor and cpu interface addresses */
 bool dtb_get_gic_v2(u64 *dist, size_t *dist_len, u64 *cpu, size_t *cpu_len)
 {
@@ -304,7 +241,8 @@ bool dtb_get_gic_v2(u64 *dist, size_t *dist_len, u64 *cpu, 
size_t *cpu_len)
 }
 
 /* this gets the cpus node and returns the number of cpu elements in it. */
-int dtb_get_cpus_count()
+static int dtb_cpu_count = -1;
+static int dtb_parse_cpus_count()
 {
     int node, subnode, count;
     if (!dtb)
@@ -321,11 +259,22 @@ int dtb_get_cpus_count()
     return count;
 }
 
+int dtb_get_cpus_count()
+{
+    return dtb_cpu_count;
+}
+
 /* this gets the cpu mpidr values for all cpus */
-bool dtb_get_cpus_mpid(u64 *mpids, int n)
+#define DTB_MAX_CPU_COUNT 32
+static u64 dtb_cpus_mpids[DTB_MAX_CPU_COUNT];
+bool dtb_parse_cpus_mpid(u64 *mpids, int n)
 {
     int node, subnode;
 
+    if (n > DTB_MAX_CPU_COUNT) {
+        abort("dtb_parse_cpus_mpid: number of cpus greater than maximum. 
Increase the DTB_MAX_CPU_COUNT!\n");
+    }
+
     if (!dtb)
         return false;
 
@@ -342,6 +291,13 @@ bool dtb_get_cpus_mpid(u64 *mpids, int n)
     return true;
 }
 
+bool dtb_get_cpus_mpid(u64 *mpids, int n) {
+    for (auto i = 0; i < n; i++) {
+        mpids[i] = dtb_cpus_mpids[i];
+    }
+    return true;
+}
+
 static int dtb_get_pci_node()
 {
     if (dtb_pci_node >= 0) {
@@ -455,7 +411,8 @@ static int dtb_get_pua_cells(u32 phandle)
 }
 
 /* get the number of mappings between pci devices and platform IRQs. */
-int dtb_get_pci_irqmap_count()
+static int dtb_pci_irqmap_count = -1;
+static int dtb_parse_pci_irqmap_count()
 {
     int count;
     if (!dtb)
@@ -494,8 +451,14 @@ int dtb_get_pci_irqmap_count()
     return count;
 }
 
+int dtb_get_pci_irqmap_count()
+{
+    return dtb_pci_irqmap_count;
+}
+
 /* gets the mask for just the slot member of the pci address. */
-u32 dtb_get_pci_irqmask()
+static int dtb_pci_irqmask = -1;
+u32 dtb_parse_pci_irqmask()
 {
     u32 *prop;
     int node, size;
@@ -518,8 +481,20 @@ u32 dtb_get_pci_irqmask()
     return (fdt32_to_cpu(prop[0]) & DTB_PHYSHI_BDF_MASK) | DTB_PIN_MASK;
 }
 
-bool dtb_get_pci_irqmap(u32 *bdfs, int *irq_ids, int n)
+u32 dtb_get_pci_irqmask()
 {
+    return dtb_pci_irqmask;
+}
+
+#define DTB_MAX_IRQ_COUNT 32
+static u32 dtb_pci_bdfs[DTB_MAX_IRQ_COUNT];
+static int dtb_pci_irq_ids[DTB_MAX_IRQ_COUNT];
+static bool dtb_parse_pci_irqmap(u32 *bdfs, int *irq_ids, int n)
+{
+    if (n > DTB_MAX_IRQ_COUNT) {
+        abort("dtb_parse_pci_irqmap: number of iqrs greater than maximum. 
Increase the DTB_MAX_IRQ_COUNT!\n");
+    }
+
     if (!dtb)
         return false;
 
@@ -570,6 +545,15 @@ bool dtb_get_pci_irqmap(u32 *bdfs, int *irq_ids, int n)
     return true;
 }
 
+bool dtb_get_pci_irqmap(u32 *bdfs, int *irq_ids, int n)
+{
+    for (auto i = 0; i < n; i++) {
+        bdfs[i] = dtb_pci_bdfs[i];
+        irq_ids[i] = dtb_pci_irq_ids[i];
+    }
+    return true;
+}
+
 bool dtb_get_vmm_is_xen()
 {
     if (fdt_check_header(dtb) != 0)
@@ -577,3 +561,77 @@ bool dtb_get_vmm_is_xen()
 
     return fdt_node_offset_by_compatible(dtb, -1, "xen,xen") >= 0;
 }
+
+void  __attribute__((constructor(init_prio::dtb))) dtb_setup()
+{
+    int node;
+    char *cmdline_override;
+    int len;
+
+    if (fdt_check_header(dtb) != 0) {
+        abort("dtb_setup: device tree blob invalid.\n");
+    }
+
+    memory::phys_mem_size = dtb_get_phys_memory(&mmu::mem_addr);
+    if (!memory::phys_mem_size) {
+        abort("dtb_setup: failed to parse memory information.\n");
+    }
+
+    /* command line will be overwritten with DTB: move it inside DTB */
+
+    node = fdt_path_offset(dtb, "/chosen");
+    if (node < 0) {
+        node = fdt_path_offset(dtb, "/");
+        if (node >= 0) {
+            node = fdt_add_subnode(dtb, node, "chosen");
+        }
+    }
+    if (node < 0) {
+        abort("dtb_setup: failed to add node /chosen for cmdline.\n");
+    }
+
+    cmdline_override = (char *)fdt_getprop(dtb, node, "bootargs", &len);
+    if (cmdline_override) {
+        cmdline = cmdline_override;
+    } else {
+        len = strlen(cmdline) + 1;
+        if (fdt_setprop(dtb, node, "bootargs", cmdline, len) < 0) {
+            abort("dtb_setup: failed to set bootargs in /chosen.\n");
+        }
+    }
+    if ((size_t)len > max_cmdline) {
+        abort("dtb_setup: command line too long.\n");
+    }
+
+    cmdline = (char *)fdt_getprop(dtb, node, "bootargs", NULL);
+    if (!cmdline) {
+        abort("dtb_setup: cannot find cmdline after dtb move.\n");
+    }
+    // Parse some dtb configuration ahead of time
+    dtb_cpu_count = dtb_parse_cpus_count();
+    if (!dtb_parse_cpus_mpid(dtb_cpus_mpids, dtb_cpu_count)) {
+        abort("dtb_setup: failed to parse cpu mpid.\n");
+    }
+
+    dtb_timer_irq = dtb_parse_timer_irq();
+    dtb_pci_irqmask = dtb_parse_pci_irqmask();
+    dtb_pci_irqmap_count = dtb_parse_pci_irqmap_count();
+    if (!dtb_parse_pci_irqmap(dtb_pci_bdfs, dtb_pci_irq_ids, 
dtb_pci_irqmap_count)) {
+        abort("dtb_setup: failed to parse pci_irq_map.\n");
+    }
+
+    register u64 edata;
+    asm volatile ("adrp %0, .edata" : "=r"(edata));
+
+    /* import from loader.cc */
+    extern elf::Elf64_Ehdr *elf_header;
+    extern size_t elf_size;
+    extern void *elf_start;
+
+    elf_start = reinterpret_cast<void *>(elf_header);
+    elf_size = (u64)edata - (u64)elf_start;
+
+    /* remove amount of memory used for ELF from avail memory */
+    mmu::phys addr = (mmu::phys)elf_start + elf_size;
+    memory::phys_mem_size -= addr - mmu::mem_addr;
+}
diff --git a/arch/aarch64/arch-setup.cc b/arch/aarch64/arch-setup.cc
index 4f4be836..bdbb1266 100644
--- a/arch/aarch64/arch-setup.cc
+++ b/arch/aarch64/arch-setup.cc
@@ -112,9 +112,10 @@ void arch_setup_free_memory()
 
     arch_setup_pci();
 
-    mmu::switch_to_runtime_page_tables();
-
+    // get rid of the command line, before memory is unmapped
     osv::parse_cmdline(cmdline);
+
+    mmu::switch_to_runtime_page_tables();
 }
 
 void arch_setup_tls(void *tls, const elf::tls_data& info)
-- 
2.20.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20191230044051.12162-1-jwkozaczuk%40gmail.com.

Reply via email to