On Sun, Jun 04, 2017 at 12:46:27AM +0200, Patrick Wildt wrote:
> Hi,
>
> with some guidance from Theo I have random-order kernels on arm64. In
> one instance the kernel did not boot up. This could be a real bug, or
> not.
>
> Please try this diff and make sure the machine still boots up. If it
> does not: Please send me the gap.S and the lorder file from the kernel
> compile directory.
>
> Thanks,
> Patrick
Updated diff which applies cleanly to -current and even unmaps the
bootstrap code.
Patrick
diff --git a/sys/arch/arm64/arm64/autoconf.c b/sys/arch/arm64/arm64/autoconf.c
index 96fe23613bc..5d4e2d901ac 100644
--- a/sys/arch/arm64/arm64/autoconf.c
+++ b/sys/arch/arm64/arm64/autoconf.c
@@ -21,6 +21,7 @@
#include <sys/device.h>
#include <sys/reboot.h>
#include <sys/hibernate.h>
+#include <uvm/uvm.h>
#include <machine/bootconfig.h>
@@ -31,6 +32,18 @@ struct device *bootdv = NULL;
enum devclass bootdev_class = DV_DULL;
void
+unmap_startup(void)
+{
+ extern void *_start, *endboot;
+ vaddr_t p = (vaddr_t)&_start;
+
+ do {
+ pmap_kremove(p, PAGE_SIZE);
+ p += PAGE_SIZE;
+ } while (p < (vaddr_t)&endboot);
+}
+
+void
cpu_configure(void)
{
(void)splhigh();
@@ -38,6 +51,8 @@ cpu_configure(void)
softintr_init();
(void)config_rootfound("mainbus", NULL);
+ unmap_startup();
+
cold = 0;
spl0();
}
diff --git a/sys/arch/arm64/arm64/locore.S b/sys/arch/arm64/arm64/locore.S
index 76b585ebb46..b7ef5321e59 100644
--- a/sys/arch/arm64/arm64/locore.S
+++ b/sys/arch/arm64/arm64/locore.S
@@ -30,232 +30,14 @@
#include "assym.h"
#include <sys/syscall.h>
#include <machine/asm.h>
-#include <machine/armreg.h>
-#include <machine/hypervisor.h>
#include <machine/param.h>
-#include <machine/pte.h>
-
-#define VIRT_BITS 39
-
-#define DEVICE_MEM 0
-#define NORMAL_UNCACHED 1
-#define NORMAL_MEM 2
-
-/*
- * We assume:
- * MMU on with an identity map, or off
- * D-Cache: off
- * I-Cache: on or off
- * We are loaded at a 2MiB aligned address
- */
-
-#define INIT_STACK_SIZE (PAGE_SIZE * 4)
-
- .text
- .globl _start
-_start:
- mov x21, x0
- mov x22, x1
- mov x23, x2
-
- /* Drop to EL1 */
- bl drop_to_el1
-
- /*
- * Disable the MMU. We may have entered the kernel with it on and
- * will need to update the tables later. If this has been set up
- * with anything other than a VA == PA map then this will fail,
- * but in this case the code to find where we are running from
- * would have also failed.
- */
- dsb sy
- mrs x2, sctlr_el1
- bic x2, x2, SCTLR_M
- msr sctlr_el1, x2
- isb
-
- /* Set the context id */
- msr contextidr_el1, xzr
-
- /* Get the virt -> phys offset */
- bl get_virt_delta
-
- /* Store symbol value. */
- adr x0, .Lesym
- ldr x0, [x0]
- sub x0, x0, x29
- add x21, x21, x29
- str x21, [x0]
-
- /*
- * At this point:
- * x29 = PA - VA
- * x28 = Our physical load address
- */
-
- /* Create the page tables */
- bl create_pagetables
-
- /*
- * At this point:
- * x27 = TTBR0 table
- * x26 = TTBR1 table
- */
-
- /* Enable the mmu */
- bl start_mmu
-
- /* Jump to the virtual address space */
- ldr x15, .Lvirtdone
- br x15
-
-.Linitstack:
- .xword initstack
-.Linitstack_end:
- .xword initstack_end
-virtdone:
- /* Set up the stack */
- adr x25, .Linitstack_end
- ldr x25, [x25]
- mov sp, x25
- mov x8, #TRAPFRAME_SIZEOF
- sub x8, x8, (STACKALIGNBYTES)
- and x8, x8, ~(STACKALIGNBYTES)
-
- // pass base of kernel stack as proc0
- adr x25, .Linitstack
- ldr x25, [x25]
-
- sub sp, sp, x8
-
- /* Zero the BSS */
- ldr x15, .Lbss
- ldr x14, .Lend
-1:
- str xzr, [x15], #8
- cmp x15, x14
- b.lo 1b
-
- /* Backup the module pointer */
- mov x1, x0
-
- /* Make the page table base a virtual address */
- sub x26, x26, x29
-
- // XXX - shouldn't this be 8 * 5 (struct grew from 4 -> 5)
- sub sp, sp, #(64 * 4)
- mov x0, sp
-
- /* Negate the delta so it is VA -> PA */
- neg x29, x29
-
- str x1, [x0] /* modulep */
- str x26, [x0, 8] /* kern_l1pt */
- str x29, [x0, 16] /* kern_delta */
- str x25, [x0, 24] /* kern_stack */
- str x21, [x0, 32] /* ? (x0 arg on boot) */
- str x22, [x0, 40] /* ? (x1 arg on boot) */
- str x23, [x0, 48] /* fdt (x2 arg on boot) */
-
- /* trace back starts here */
- mov fp, #0
- /* Branch to C code */
- bl initarm
- bl _C_LABEL(main)
-
- /* We should not get here */
- brk 0
-
- .align 3
-.Lvirtdone:
- .quad virtdone
-.Lbss:
- .quad __bss_start
-.Lstart:
- .quad _start
-.Lend:
- .quad _end
-.Lcpu_info_primary:
- .quad _C_LABEL(cpu_info_primary)
-
-/*
- * If we are started in EL2, configure the required hypervisor
- * registers and drop to EL1.
- */
-drop_to_el1:
- mrs x1, CurrentEL
- lsr x1, x1, #2
- cmp x1, #0x2
- b.eq 1f
- ret
-1:
- /* Configure the Hypervisor */
- mov x2, #(HCR_RW)
- msr hcr_el2, x2
-
- /* Load the Virtualization Process ID Register */
- mrs x2, midr_el1
- msr vpidr_el2, x2
-
- /* Load the Virtualization Multiprocess ID Register */
- mrs x2, mpidr_el1
- msr vmpidr_el2, x2
-
- /* Set the bits that need to be 1 in sctlr_el1 */
- ldr x2, .Lsctlr_res1
- msr sctlr_el1, x2
-
- /* Don't trap to EL2 for exceptions */
- mov x2, #CPTR_RES1
- msr cptr_el2, x2
-
- /* Don't trap to EL2 for CP15 traps */
- msr hstr_el2, xzr
-
- /* Enable access to the physical timers at EL1 */
- mrs x2, cnthctl_el2
- orr x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN)
- msr cnthctl_el2, x2
-
- /* Set the counter offset to a known value */
- msr cntvoff_el2, xzr
-
- /* Hypervisor trap functions */
- adr x2, hyp_vectors
- msr vbar_el2, x2
-
- mov x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
- msr spsr_el2, x2
-
- /* Configure GICv3 CPU interface */
- mrs x2, id_aa64pfr0_el1
- /* Extract GIC bits from the register */
- ubfx x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS
- /* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */
- cmp x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT)
- b.ne 2f
-
- mrs x2, icc_sre_el2
- orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */
- orr x2, x2, #ICC_SRE_EL2_SRE /* Enable system registers */
- msr icc_sre_el2, x2
-2:
-
- /* Set the address to return to our return address */
- msr elr_el2, x30
- isb
-
- eret
-
- .align 3
-.Lsctlr_res1:
- .quad SCTLR_RES1
#define VECT_EMPTY \
.align 7; \
1: b 1b
.align 11
+ .globl hyp_vectors
hyp_vectors:
VECT_EMPTY /* Synchronous EL2t */
VECT_EMPTY /* IRQ EL2t */
@@ -277,370 +59,7 @@ hyp_vectors:
VECT_EMPTY /* FIQ 32-bit EL1 */
VECT_EMPTY /* Error 32-bit EL1 */
-/*
- * Get the delta between the physical address we were loaded to and the
- * virtual address we expect to run from. This is used when building the
- * initial page table.
- */
- .globl get_virt_delta
-get_virt_delta:
- /* Load the physical address of virt_map */
- adr x28, virt_map
- /* Load the virtual address of virt_map stored in virt_map */
- ldr x29, [x28]
- /* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */
- sub x29, x29, x28
- and x28, x28, #~0x0003ffff // should be 2MB?
-
- ret
-
- .align 3
-virt_map:
- .quad virt_map
-
-/*
- * This builds the page tables containing the identity map, and the kernel
- * virtual map.
- *
- * It relys on:
- * We were loaded to an address that is on a 2MiB boundary
- * All the memory must not cross a 1GiB boundaty
- * x28 contains the physical address we were loaded from
- *
- * There are 3 pages before that address for the page tables
- * These pages are allocated aligned in .data
- * The pages used are:
- * - The identity (PA = VA) table (TTBR0)
- * - The Kernel L1 table (TTBR1)
- * - The PA == VA L2 table for kernel
- */
-.Lpagetable:
- .xword pagetable
-.Lpagetable_end:
- .xword pagetable_end
-
-.Lesym:
- .xword esym
-
-create_pagetables:
- /* Save the Link register */
- mov x5, x30
-
- /* Clean the page table */
- adr x6, .Lpagetable
- ldr x6, [x6]
- sub x6, x6, x29 // VA -> PA
- mov x26, x6
- adr x27, .Lpagetable_end
- ldr x27, [x27]
- sub x27, x27, x29 // VA -> PA
-1:
- stp xzr, xzr, [x6], #16
- stp xzr, xzr, [x6], #16
- stp xzr, xzr, [x6], #16
- stp xzr, xzr, [x6], #16
- cmp x6, x27
- b.lo 1b
-
- /*
- * Build the TTBR1 maps.
- */
-
- /* Find the size of the kernel */
- adr x6, .Lstart
- ldr x6, [x6]
- sub x6, x6, x29
-
- /* End is the symbol address */
- adr x7, .Lesym
- ldr x7, [x7]
- sub x7, x7, x29
- ldr x7, [x7]
- sub x7, x7, x29
-
- /* Find the end - begin */
- sub x8, x7, x6
- /* Get the number of l2 pages to allocate, rounded down */
- lsr x10, x8, #(L2_SHIFT)
- /* Add 4 MiB for any rounding above and the module data */
- add x10, x10, #2
-
- /* Create the kernel space L2 table */
- mov x6, x26 // pagetable:
- mov x7, #NORMAL_MEM
- add x8, x28, x29
- mov x9, x28
- bl build_l2_block_pagetable
-
- /* Move to the l1 table */
- add x26, x26, #PAGE_SIZE*2 // pagetable_l1_ttbr1:
-
- /* Link the l1 -> l2 table */
- mov x9, x6
- mov x6, x26
- bl link_l1_pagetable
-
- /*
- * Build the TTBR0 maps.
- */
- add x27, x26, #PAGE_SIZE * 2 // pagetable_l1_ttbr0:
-
- mov x6, x27 /* The initial page table */
-#if defined(SOCDEV_PA) && defined(SOCDEV_VA)
- /* Create a table for the UART */
- mov x7, #DEVICE_MEM
- mov x8, #(SOCDEV_VA) /* VA start */
- mov x9, #(SOCDEV_PA) /* PA start */
- mov x10, #1
- bl build_l1_block_pagetable
-#endif
-
- /* Create the VA = PA map */
- mov x7, #NORMAL_MEM // #NORMAL
- mov x9, x27
- mov x8, x9 /* VA start (== PA start) */
- mov x10, #1
- bl build_l1_block_pagetable
-
- /* Create a mapping for the FDT */
- mov x7, #NORMAL_MEM // #NORMAL
- mov x9, x23
- mov x8, x9 /* VA start (== PA start) */
- mov x10, #1
- bl build_l1_block_pagetable
-
- /* Move to the l0 table */
- add x27, x27, #PAGE_SIZE * 2 // pagetable_l0_ttbr0:
-
- /* Link the l0 -> l1 table */
- mov x9, x6
- mov x6, x27
- mov x10, #1
- bl link_l0_pagetable
-
- /* Restore the Link register */
- mov x30, x5
- ret
-
-/*
- * Builds an L0 -> L1 table descriptor
- *
- * This is a link for a 512GiB block of memory with up to 1GiB regions mapped
- * within it by build_l1_block_pagetable.
- *
- * x6 = L0 table
- * x8 = Virtual Address
- * x9 = L1 PA (trashed)
- * x10 = Entry count
- * x11, x12 and x13 are trashed
- */
-link_l0_pagetable:
- /*
- * Link an L0 -> L1 table entry.
- */
- /* Find the table index */
- lsr x11, x8, #L0_SHIFT
- and x11, x11, #Ln_ADDR_MASK
-
- /* Build the L0 block entry */
- mov x12, #L0_TABLE
-
- /* Only use the output address bits */
- lsr x9, x9, #PAGE_SHIFT
-1: orr x13, x12, x9, lsl #PAGE_SHIFT
-
- /* Store the entry */
- str x13, [x6, x11, lsl #3]
-
- sub x10, x10, #1
- add x11, x11, #1
- add x9, x9, #1
- cbnz x10, 1b
-
- ret
-
-/*
- * Builds an L1 -> L2 table descriptor
- *
- * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
- * within it by build_l2_block_pagetable.
- *
- * x6 = L1 table
- * x8 = Virtual Address
- * x9 = L2 PA (trashed)
- * x11, x12 and x13 are trashed
- */
-link_l1_pagetable:
- /*
- * Link an L1 -> L2 table entry.
- */
- /* Find the table index */
- lsr x11, x8, #L1_SHIFT
- and x11, x11, #Ln_ADDR_MASK
-
- /* Build the L1 block entry */
- mov x12, #L1_TABLE
-
- /* Only use the output address bits */
- lsr x9, x9, #PAGE_SHIFT
- orr x13, x12, x9, lsl #PAGE_SHIFT
-
- /* Store the entry */
- str x13, [x6, x11, lsl #3]
-
- ret
-
-/*
- * Builds count 1 GiB page table entry
- * x6 = L1 table
- * x7 = Type (0 = Device, 1 = Normal)
- * x8 = VA start
- * x9 = PA start (trashed)
- * x10 = Entry count
- * x11, x12 and x13 are trashed
- */
-build_l1_block_pagetable:
- /*
- * Build the L1 table entry.
- */
- /* Find the table index */
- lsr x11, x8, #L1_SHIFT
- and x11, x11, #Ln_ADDR_MASK
-
- /* Build the L1 block entry */
- lsl x12, x7, #2
- orr x12, x12, #L1_BLOCK
- orr x12, x12, #(ATTR_AF)
- orr x12, x12, ATTR_SH(SH_INNER)
-
- /* Only use the output address bits */
- lsr x9, x9, #L1_SHIFT
-
- /* Set the physical address for this virtual address */
-1: orr x13, x12, x9, lsl #L1_SHIFT
-
- /* Store the entry */
- str x13, [x6, x11, lsl #3]
-
- sub x10, x10, #1
- add x11, x11, #1
- add x9, x9, #1
- cbnz x10, 1b
-
- ret
-
-/*
- * Builds count 2 MiB page table entry
- * x6 = L2 table
- * x7 = Type (0 = Device, 1 = Normal)
- * x8 = VA start
- * x9 = PA start (trashed)
- * x10 = Entry count
- * x11, x12 and x13 are trashed
- */
-build_l2_block_pagetable:
- /*
- * Build the L2 table entry.
- */
- /* Find the table index */
- lsr x11, x8, #L2_SHIFT
- and x11, x11, #Ln_ADDR_MASK
-
- /* Build the L2 block entry */
- lsl x12, x7, #2
- orr x12, x12, #L2_BLOCK
- orr x12, x12, #(ATTR_AF)
- orr x12, x12, ATTR_SH(SH_INNER)
-
- /* Only use the output address bits */
- lsr x9, x9, #L2_SHIFT
-
- /* Set the physical address for this virtual address */
-1: orr x13, x12, x9, lsl #L2_SHIFT
-
- /* Store the entry */
- str x13, [x6, x11, lsl #3]
-
- sub x10, x10, #1
- add x11, x11, #1
- add x9, x9, #1
- cbnz x10, 1b
-
- ret
-
-start_mmu:
- dsb sy
-
- /* Load the exception vectors */
- ldr x2, =exception_vectors
- msr vbar_el1, x2
-
- /* Load ttbr0 and ttbr1 */
- msr ttbr0_el1, x27
- msr ttbr1_el1, x26
- isb
-
- /* Clear the Monitor Debug System control register */
- msr mdscr_el1, xzr
-
- /* Invalidate the TLB */
- tlbi vmalle1is
-
- ldr x2, mair
- msr mair_el1, x2
-
- /*
- * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1.
- * Some machines have physical memory mapped >512GiB, which can not
- * be identity-mapped using the default 39 VA bits. Thus, use
- * 48 VA bits for now and switch back to 39 after the VA jump.
- */
- ldr x2, tcr
- mrs x3, id_aa64mmfr0_el1
- bfi x2, x3, #32, #3
- msr tcr_el1, x2
-
- /* Setup SCTLR */
- ldr x2, sctlr_set
- ldr x3, sctlr_clear
- mrs x1, sctlr_el1
- bic x1, x1, x3 /* Clear the required bits */
- orr x1, x1, x2 /* Set the required bits */
- msr sctlr_el1, x1
- isb
-
- ret
- .globl switch_mmu_kernel
-switch_mmu_kernel:
- dsb sy
- /* Invalidate the TLB */
- tlbi vmalle1is
- /* Load ttbr1 (kernel) */
- msr ttbr1_el1, x0
- isb
- ret
-
-
-
.align 3
-mair:
- /* Device | Normal (no cache, write-back, write-through) */
- .quad MAIR_ATTR(0x00, 0) | \
- MAIR_ATTR(0x44, 1) | \
- MAIR_ATTR(0xff, 2) | \
- MAIR_ATTR(0x88, 3)
-tcr:
- .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
- TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
-sctlr_set:
- /* Bits to set */
- .quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
- SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | SCTLR_M)
-sctlr_clear:
- /* Bits to clear */
- .quad (SCTLR_EE | SCTLR_EOE | SCTLR_WXN | SCTLR_UMA | SCTLR_ITD | \
- SCTLR_THEE | SCTLR_CP15BEN | SCTLR_A)
-
.globl abort
abort:
b abort
@@ -651,7 +70,6 @@ abort:
.global _C_LABEL(esym)
_C_LABEL(esym): .xword _C_LABEL(end)
- //.section .init_pagetable
data_align_pad:
.space 32
.align 12 /* 4KiB aligned */
@@ -670,20 +88,14 @@ pagetable_l1_ttbr0:
.space PAGE_SIZE * 2 // allocate 2 pages, reused later in pmap
pagetable_l0_ttbr0:
.space PAGE_SIZE
+ .globl pagetable_end
pagetable_end:
-
- .text
-#if 0
- .globl init_pt_va
-init_pt_va:
- .quad pagetable /* XXX: Keep page tables VA */
-#endif
-
.bss
.align 4
.globl initstack
initstack:
.space USPACE
+ .globl initstack_end
initstack_end:
.text
diff --git a/sys/arch/arm64/arm64/locore0.S b/sys/arch/arm64/arm64/locore0.S
new file mode 100644
index 00000000000..75cd4275448
--- /dev/null
+++ b/sys/arch/arm64/arm64/locore0.S
@@ -0,0 +1,619 @@
+/* $OpenBSD: locore.S,v 1.16 2017/05/18 05:47:13 jsg Exp $ */
+/*-
+ * Copyright (c) 2012-2014 Andrew Turner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/arm64/arm64/locore.S 282867 2015-05-13 18:57:03Z zbb $
+ */
+
+#include "assym.h"
+#include <machine/asm.h>
+#include <machine/armreg.h>
+#include <machine/hypervisor.h>
+#include <machine/param.h>
+#include <machine/pte.h>
+
+#define VIRT_BITS 39
+
+#define DEVICE_MEM 0
+#define NORMAL_UNCACHED 1
+#define NORMAL_MEM 2
+
+/*
+ * We assume:
+ * MMU on with an identity map, or off
+ * D-Cache: off
+ * I-Cache: on or off
+ * We are loaded at a 2MiB aligned address
+ */
+
+#define INIT_STACK_SIZE (PAGE_SIZE * 4)
+
+ .text
+ .globl _start
+_start:
+ mov x21, x0
+ mov x22, x1
+ mov x23, x2
+
+ /* Drop to EL1 */
+ bl drop_to_el1
+
+ /*
+ * Disable the MMU. We may have entered the kernel with it on and
+ * will need to update the tables later. If this has been set up
+ * with anything other than a VA == PA map then this will fail,
+ * but in this case the code to find where we are running from
+ * would have also failed.
+ */
+ dsb sy
+ mrs x2, sctlr_el1
+ bic x2, x2, SCTLR_M
+ msr sctlr_el1, x2
+ isb
+
+ /* Set the context id */
+ msr contextidr_el1, xzr
+
+ /* Get the virt -> phys offset */
+ bl get_virt_delta
+
+ /* Store symbol value. */
+ adr x0, .Lesym
+ ldr x0, [x0]
+ sub x0, x0, x29
+ add x21, x21, x29
+ str x21, [x0]
+
+ /*
+ * At this point:
+ * x29 = PA - VA
+ * x28 = Our physical load address
+ */
+
+ /* Create the page tables */
+ bl create_pagetables
+
+ /*
+ * At this point:
+ * x27 = TTBR0 table
+ * x26 = TTBR1 table
+ */
+
+ /* Enable the mmu */
+ bl start_mmu
+
+ /* Jump to the virtual address space */
+ ldr x15, .Lvirtdone
+ br x15
+
+.Linitstack:
+ .xword initstack
+.Linitstack_end:
+ .xword initstack_end
+virtdone:
+ /* Set up the stack */
+ adr x25, .Linitstack_end
+ ldr x25, [x25]
+ mov sp, x25
+ mov x8, #TRAPFRAME_SIZEOF
+ sub x8, x8, (STACKALIGNBYTES)
+ and x8, x8, ~(STACKALIGNBYTES)
+
+ // pass base of kernel stack as proc0
+ adr x25, .Linitstack
+ ldr x25, [x25]
+
+ sub sp, sp, x8
+
+ /* Zero the BSS */
+ ldr x15, .Lbss
+ ldr x14, .Lend
+1:
+ str xzr, [x15], #8
+ cmp x15, x14
+ b.lo 1b
+
+ /* Backup the module pointer */
+ mov x1, x0
+
+ /* Make the page table base a virtual address */
+ sub x26, x26, x29
+
+ // XXX - shouldn't this be 8 * 5 (struct grew from 4 -> 5)
+ sub sp, sp, #(64 * 4)
+ mov x0, sp
+
+ /* Negate the delta so it is VA -> PA */
+ neg x29, x29
+
+ str x1, [x0] /* modulep */
+ str x26, [x0, 8] /* kern_l1pt */
+ str x29, [x0, 16] /* kern_delta */
+ str x25, [x0, 24] /* kern_stack */
+ str x21, [x0, 32] /* ? (x0 arg on boot) */
+ str x22, [x0, 40] /* ? (x1 arg on boot) */
+ str x23, [x0, 48] /* fdt (x2 arg on boot) */
+
+ /* trace back starts here */
+ mov fp, #0
+ /* Branch to C code */
+ bl initarm
+ bl _C_LABEL(main)
+
+ /* We should not get here */
+ brk 0
+
+
+ .align 3
+.Lvirtdone:
+ .quad virtdone
+.Lbss:
+ .quad __bss_start
+.Lstart:
+ .quad _start
+.Lend:
+ .quad _end
+
+/*
+ * If we are started in EL2, configure the required hypervisor
+ * registers and drop to EL1.
+ */
+drop_to_el1:
+ mrs x1, CurrentEL
+ lsr x1, x1, #2
+ cmp x1, #0x2
+ b.eq 1f
+ ret
+1:
+ /* Configure the Hypervisor */
+ mov x2, #(HCR_RW)
+ msr hcr_el2, x2
+
+ /* Load the Virtualization Process ID Register */
+ mrs x2, midr_el1
+ msr vpidr_el2, x2
+
+ /* Load the Virtualization Multiprocess ID Register */
+ mrs x2, mpidr_el1
+ msr vmpidr_el2, x2
+
+ /* Set the bits that need to be 1 in sctlr_el1 */
+ ldr x2, .Lsctlr_res1
+ msr sctlr_el1, x2
+
+ /* Don't trap to EL2 for exceptions */
+ mov x2, #CPTR_RES1
+ msr cptr_el2, x2
+
+ /* Don't trap to EL2 for CP15 traps */
+ msr hstr_el2, xzr
+
+ /* Enable access to the physical timers at EL1 */
+ mrs x2, cnthctl_el2
+ orr x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN)
+ msr cnthctl_el2, x2
+
+ /* Set the counter offset to a known value */
+ msr cntvoff_el2, xzr
+
+ /* Hypervisor trap functions */
+ adr x2, .Lhyp_vectors
+ ldr x2, [x2]
+ sub x2, x2, x29 // VA -> PA
+ msr vbar_el2, x2
+
+ mov x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
+ msr spsr_el2, x2
+
+ /* Configure GICv3 CPU interface */
+ mrs x2, id_aa64pfr0_el1
+ /* Extract GIC bits from the register */
+ ubfx x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS
+ /* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */
+ cmp x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT)
+ b.ne 2f
+
+ mrs x2, icc_sre_el2
+ orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */
+ orr x2, x2, #ICC_SRE_EL2_SRE /* Enable system registers */
+ msr icc_sre_el2, x2
+2:
+
+ /* Set the address to return to our return address */
+ msr elr_el2, x30
+ isb
+
+ eret
+
+ .align 3
+.Lhyp_vectors:
+ .xword hyp_vectors
+.Lsctlr_res1:
+ .quad SCTLR_RES1
+
+/*
+ * Get the delta between the physical address we were loaded to and the
+ * virtual address we expect to run from. This is used when building the
+ * initial page table.
+ */
+ .globl get_virt_delta
+get_virt_delta:
+ /* Load the physical address of virt_map */
+ adr x28, virt_map
+ /* Load the virtual address of virt_map stored in virt_map */
+ ldr x29, [x28]
+ /* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */
+ sub x29, x29, x28
+ and x28, x28, #~0x0003ffff // should be 2MB?
+
+ ret
+
+ .align 3
+virt_map:
+ .quad virt_map
+
+/*
+ * This builds the page tables containing the identity map, and the kernel
+ * virtual map.
+ *
+ * It relys on:
+ * We were loaded to an address that is on a 2MiB boundary
+ * All the memory must not cross a 1GiB boundaty
+ * x28 contains the physical address we were loaded from
+ *
+ * There are 3 pages before that address for the page tables
+ * These pages are allocated aligned in .data
+ * The pages used are:
+ * - The identity (PA = VA) table (TTBR0)
+ * - The Kernel L1 table (TTBR1)
+ * - The PA == VA L2 table for kernel
+ */
+.Lpagetable:
+ .xword pagetable
+.Lpagetable_end:
+ .xword pagetable_end
+
+.Lesym:
+ .xword esym
+
+create_pagetables:
+ /* Save the Link register */
+ mov x5, x30
+
+ /* Clean the page table */
+ adr x6, .Lpagetable
+ ldr x6, [x6]
+ sub x6, x6, x29 // VA -> PA
+ mov x26, x6
+ adr x27, .Lpagetable_end
+ ldr x27, [x27]
+ sub x27, x27, x29 // VA -> PA
+1:
+ stp xzr, xzr, [x6], #16
+ stp xzr, xzr, [x6], #16
+ stp xzr, xzr, [x6], #16
+ stp xzr, xzr, [x6], #16
+ cmp x6, x27
+ b.lo 1b
+
+ /*
+ * Build the TTBR1 maps.
+ */
+
+ /* Find the size of the kernel */
+ adr x6, .Lstart
+ ldr x6, [x6]
+ sub x6, x6, x29
+
+ /* End is the symbol address */
+ adr x7, .Lesym
+ ldr x7, [x7]
+ sub x7, x7, x29
+ ldr x7, [x7]
+ sub x7, x7, x29
+
+ /* Find the end - begin */
+ sub x8, x7, x6
+ /* Get the number of l2 pages to allocate, rounded down */
+ lsr x10, x8, #(L2_SHIFT)
+ /* Add 4 MiB for any rounding above and the module data */
+ add x10, x10, #2
+
+ /* Create the kernel space L2 table */
+ mov x6, x26 // pagetable:
+ mov x7, #NORMAL_MEM
+ add x8, x28, x29
+ mov x9, x28
+ bl build_l2_block_pagetable
+
+ /* Move to the l1 table */
+ add x26, x26, #PAGE_SIZE*2 // pagetable_l1_ttbr1:
+
+ /* Link the l1 -> l2 table */
+ mov x9, x6
+ mov x6, x26
+ bl link_l1_pagetable
+
+ /*
+ * Build the TTBR0 maps.
+ */
+ add x27, x26, #PAGE_SIZE * 2 // pagetable_l1_ttbr0:
+
+ mov x6, x27 /* The initial page table */
+#if defined(SOCDEV_PA) && defined(SOCDEV_VA)
+ /* Create a table for the UART */
+ mov x7, #DEVICE_MEM
+ mov x8, #(SOCDEV_VA) /* VA start */
+ mov x9, #(SOCDEV_PA) /* PA start */
+ mov x10, #1
+ bl build_l1_block_pagetable
+#endif
+
+ /* Create the VA = PA map */
+ mov x7, #NORMAL_MEM // #NORMAL
+ mov x9, x27
+ mov x8, x9 /* VA start (== PA start) */
+ mov x10, #1
+ bl build_l1_block_pagetable
+
+ /* Create a mapping for the FDT */
+ mov x7, #NORMAL_MEM // #NORMAL
+ mov x9, x23
+ mov x8, x9 /* VA start (== PA start) */
+ mov x10, #1
+ bl build_l1_block_pagetable
+
+ /* Move to the l0 table */
+ add x27, x27, #PAGE_SIZE * 2 // pagetable_l0_ttbr0:
+
+ /* Link the l0 -> l1 table */
+ mov x9, x6
+ mov x6, x27
+ mov x10, #1
+ bl link_l0_pagetable
+
+ /* Restore the Link register */
+ mov x30, x5
+ ret
+
+/*
+ * Builds an L0 -> L1 table descriptor
+ *
+ * This is a link for a 512GiB block of memory with up to 1GiB regions mapped
+ * within it by build_l1_block_pagetable.
+ *
+ * x6 = L0 table
+ * x8 = Virtual Address
+ * x9 = L1 PA (trashed)
+ * x10 = Entry count
+ * x11, x12 and x13 are trashed
+ */
+link_l0_pagetable:
+ /*
+ * Link an L0 -> L1 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L0_SHIFT
+ and x11, x11, #Ln_ADDR_MASK
+
+ /* Build the L0 block entry */
+ mov x12, #L0_TABLE
+
+ /* Only use the output address bits */
+ lsr x9, x9, #PAGE_SHIFT
+1: orr x13, x12, x9, lsl #PAGE_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ sub x10, x10, #1
+ add x11, x11, #1
+ add x9, x9, #1
+ cbnz x10, 1b
+
+ ret
+
+/*
+ * Builds an L1 -> L2 table descriptor
+ *
+ * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
+ * within it by build_l2_block_pagetable.
+ *
+ * x6 = L1 table
+ * x8 = Virtual Address
+ * x9 = L2 PA (trashed)
+ * x11, x12 and x13 are trashed
+ */
+link_l1_pagetable:
+ /*
+ * Link an L1 -> L2 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L1_SHIFT
+ and x11, x11, #Ln_ADDR_MASK
+
+ /* Build the L1 block entry */
+ mov x12, #L1_TABLE
+
+ /* Only use the output address bits */
+ lsr x9, x9, #PAGE_SHIFT
+ orr x13, x12, x9, lsl #PAGE_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ ret
+
+/*
+ * Builds count 1 GiB page table entry
+ * x6 = L1 table
+ * x7 = Type (0 = Device, 1 = Normal)
+ * x8 = VA start
+ * x9 = PA start (trashed)
+ * x10 = Entry count
+ * x11, x12 and x13 are trashed
+ */
+build_l1_block_pagetable:
+ /*
+ * Build the L1 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L1_SHIFT
+ and x11, x11, #Ln_ADDR_MASK
+
+ /* Build the L1 block entry */
+ lsl x12, x7, #2
+ orr x12, x12, #L1_BLOCK
+ orr x12, x12, #(ATTR_AF)
+ orr x12, x12, ATTR_SH(SH_INNER)
+
+ /* Only use the output address bits */
+ lsr x9, x9, #L1_SHIFT
+
+ /* Set the physical address for this virtual address */
+1: orr x13, x12, x9, lsl #L1_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ sub x10, x10, #1
+ add x11, x11, #1
+ add x9, x9, #1
+ cbnz x10, 1b
+
+ ret
+
+/*
+ * Builds count 2 MiB page table entry
+ * x6 = L2 table
+ * x7 = Type (0 = Device, 1 = Normal)
+ * x8 = VA start
+ * x9 = PA start (trashed)
+ * x10 = Entry count
+ * x11, x12 and x13 are trashed
+ */
+build_l2_block_pagetable:
+ /*
+ * Build the L2 table entry.
+ */
+ /* Find the table index */
+ lsr x11, x8, #L2_SHIFT
+ and x11, x11, #Ln_ADDR_MASK
+
+ /* Build the L2 block entry */
+ lsl x12, x7, #2
+ orr x12, x12, #L2_BLOCK
+ orr x12, x12, #(ATTR_AF)
+ orr x12, x12, ATTR_SH(SH_INNER)
+
+ /* Only use the output address bits */
+ lsr x9, x9, #L2_SHIFT
+
+ /* Set the physical address for this virtual address */
+1: orr x13, x12, x9, lsl #L2_SHIFT
+
+ /* Store the entry */
+ str x13, [x6, x11, lsl #3]
+
+ sub x10, x10, #1
+ add x11, x11, #1
+ add x9, x9, #1
+ cbnz x10, 1b
+
+ ret
+
+start_mmu:
+ dsb sy
+
+ /* Load the exception vectors */
+ ldr x2, =exception_vectors
+ msr vbar_el1, x2
+
+ /* Load ttbr0 and ttbr1 */
+ msr ttbr0_el1, x27
+ msr ttbr1_el1, x26
+ isb
+
+ /* Clear the Monitor Debug System control register */
+ msr mdscr_el1, xzr
+
+ /* Invalidate the TLB */
+ tlbi vmalle1is
+
+ ldr x2, mair
+ msr mair_el1, x2
+
+ /*
+ * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1.
+ * Some machines have physical memory mapped >512GiB, which can not
+ * be identity-mapped using the default 39 VA bits. Thus, use
+ * 48 VA bits for now and switch back to 39 after the VA jump.
+ */
+ ldr x2, tcr
+ mrs x3, id_aa64mmfr0_el1
+ bfi x2, x3, #32, #3
+ msr tcr_el1, x2
+
+ /* Setup SCTLR */
+ ldr x2, sctlr_set
+ ldr x3, sctlr_clear
+ mrs x1, sctlr_el1
+ bic x1, x1, x3 /* Clear the required bits */
+ orr x1, x1, x2 /* Set the required bits */
+ msr sctlr_el1, x1
+ isb
+
+ ret
+ .globl switch_mmu_kernel
+switch_mmu_kernel:
+ dsb sy
+ /* Invalidate the TLB */
+ tlbi vmalle1is
+ /* Load ttbr1 (kernel) */
+ msr ttbr1_el1, x0
+ isb
+ ret
+
+
+ .align 3
+mair:
+ /* Device | Normal (no cache, write-back, write-through) */
+ .quad MAIR_ATTR(0x00, 0) | \
+ MAIR_ATTR(0x44, 1) | \
+ MAIR_ATTR(0xff, 2) | \
+ MAIR_ATTR(0x88, 3)
+tcr:
+ .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
+ TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
+sctlr_set:
+ /* Bits to set */
+ .quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
+ SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | SCTLR_M)
+sctlr_clear:
+ /* Bits to clear */
+ .quad (SCTLR_EE | SCTLR_EOE | SCTLR_WXN | SCTLR_UMA | SCTLR_ITD | \
+ SCTLR_THEE | SCTLR_CP15BEN | SCTLR_A)
+
+ .text
diff --git a/sys/arch/arm64/conf/Makefile.arm64
b/sys/arch/arm64/conf/Makefile.arm64
index ae0251b0b54..bf02961fee5 100644
--- a/sys/arch/arm64/conf/Makefile.arm64
+++ b/sys/arch/arm64/conf/Makefile.arm64
@@ -32,9 +32,13 @@ CMACHFLAGS= -march=armv8-a+nofp+nosimd \
-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer \
-ffixed-x18
CMACHFLAGS+= -ffreestanding ${NOPIE_FLAGS}
+SORTR= sort -R
.if ${IDENT:M-DNO_PROPOLICE}
CMACHFLAGS+= -fno-stack-protector
.endif
+.if ${IDENT:M-DSMALL_KERNEL}
+SORTR= cat
+.endif
DEBUG?= -g
COPTS?= -O2
@@ -68,8 +72,8 @@ NORMAL_S= ${CC} ${AFLAGS} ${CPPFLAGS} -c $<
# ${SYSTEM_LD_HEAD}
# ${SYSTEM_LD} swapxxx.o
# ${SYSTEM_LD_TAIL}
-SYSTEM_HEAD= locore.o param.o ioconf.o
-SYSTEM_OBJ= ${SYSTEM_HEAD} ${OBJS}
+SYSTEM_HEAD= locore0.o gap.o
+SYSTEM_OBJ= ${SYSTEM_HEAD} ${OBJS} param.o ioconf.o
SYSTEM_DEP= Makefile ${SYSTEM_OBJ}
SYSTEM_LD_HEAD= @rm -f $@
SYSTEM_LD_HEAD+=; \
@@ -78,7 +82,8 @@ SYSTEM_LD_HEAD+=; \
-e 's/@KERNEL_BASE_VIRT@/${KERNEL_BASE_VIRT}/' > ldscript
SYSTEM_LD= @echo ${LD} ${LINKFLAGS} -o $@ '$${SYSTEM_HEAD} vers.o
$${OBJS}'; \
umask 007; \
- ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} vers.o ${OBJS}
+ echo ${OBJS} param.o ioconf.o vers.o | tr " " "\n" | ${SORTR} >
lorder; \
+ ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} `cat lorder`
SYSTEM_LD_TAIL= @${SIZE} $@
.if ${DEBUG} == "-g"
@@ -122,8 +127,15 @@ vers.o: ${SYSTEM_DEP} ${SYSTEM_SWAP_DEP}
sh $S/conf/newvers.sh
${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c
+gap.S: ${SYSTEM_SWAP_DEP} Makefile $S/conf/makegap.sh
+ sh $S/conf/makegap.sh 0xcc > gap.S
+
+gap.o: gap.S
+ ${CC} ${AFLAGS} ${CPPFLAGS} ${PROF} -c gap.S
+
clean:
- rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} param.c
+ rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} \
+ gap.S lorder param.c
cleandir: clean
rm -f Makefile *.h ioconf.c options machine ${_mach} vers.c
@@ -135,9 +147,10 @@ db_structinfo.h: $S/ddb/db_structinfo.c
$S/ddb/parse_structinfo.pl
objdump -g db_structinfo.o | perl $S/ddb/parse_structinfo.pl > $@
rm -f db_structinfo.o
-locore.o: ${_archdir}/${_arch}/locore.S assym.h
+locore0.o: ${_archdir}/${_arch}/locore0.S assym.h
in_cksum_arm.o fiq_subr.o bcopyinout.o copystr.o sigcode.o copy.o: assym.h
vectors.o cpuswitch.o exception.o bcopy_page.o irq_dispatch.o support.o:
assym.h
+locore.o: assym.h
# The install target can be redefined by putting a
# install-kernel-${MACHINE_NAME} target into /etc/mk.conf
diff --git a/sys/arch/arm64/conf/files.arm64 b/sys/arch/arm64/conf/files.arm64
index 8dae608f001..b792844ec28 100644
--- a/sys/arch/arm64/conf/files.arm64
+++ b/sys/arch/arm64/conf/files.arm64
@@ -14,6 +14,7 @@ file arch/arm64/arm64/copystr.S
file arch/arm64/arm64/cpuswitch.S
file arch/arm64/arm64/conf.c
file arch/arm64/arm64/disksubr.c
+file arch/arm64/arm64/locore.S
file arch/arm64/arm64/machdep.c
file arch/arm64/arm64/mem.c
file arch/arm64/arm64/pmap.c