Module Name: src
Committed By: matt
Date: Sun Mar 30 15:20:54 UTC 2014
Modified Files:
src/sys/arch/arm/cortex: a9_mpsubr.S
Log Message:
Improve MP startup code. We now use a two stage startup, after creating
the initial L1PT and turning on the MMU/caches, we spinup the secondary CPUs
waiting for them to get the same state as the boot processor. Once the
real L1PT is initialized and used, the secondary CPUs are kicked so they can
use it (and the initial L1PT is discarded). Finally, wait until NetBSD
kicks the secondary CPUs then load the stack from the idlelwp and then hatch
the cpu and then jump to idle_loop.
To generate a diff of this commit:
cvs rdiff -u -r1.13 -r1.14 src/sys/arch/arm/cortex/a9_mpsubr.S
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/arm/cortex/a9_mpsubr.S
diff -u src/sys/arch/arm/cortex/a9_mpsubr.S:1.13 src/sys/arch/arm/cortex/a9_mpsubr.S:1.14
--- src/sys/arch/arm/cortex/a9_mpsubr.S:1.13 Fri Feb 21 22:22:48 2014
+++ src/sys/arch/arm/cortex/a9_mpsubr.S Sun Mar 30 15:20:54 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: a9_mpsubr.S,v 1.13 2014/02/21 22:22:48 matt Exp $ */
+/* $NetBSD: a9_mpsubr.S,v 1.14 2014/03/30 15:20:54 matt Exp $ */
/*-
* Copyright (c) 2012 The NetBSD Foundation, Inc.
* All rights reserved.
@@ -37,40 +37,48 @@
#include <arm/cortex/scu_reg.h>
#include "assym.h"
+//#define MPDEBUG
-/* We'll modify va and pa at run time so we can use relocatable addresses. */
+// We'll modify va and pa at run time so we can use relocatable addresses.
#define MMU_INIT(va,pa,n_sec,attr) \
- .word va ; \
- .word pa ; \
- .word n_sec ; \
- .word attr ;
-
-/*
- * Set up a preliminary mapping in the MMU to allow us to run
- * at KERNEL_BASE with caches on.
- */
+ .word (va)|(n_sec) ; \
+ .word (pa)|(attr) ; \
+
+// Set up a preliminary mapping in the MMU to allow us to run at KERNEL_BASE
+// with caches on. If we are MULTIPROCESSOR, save the TTB address.
+//
arm_boot_l1pt_init:
- mov ip, r1 @ save mmu table addr
- /* Build page table from scratch */
- mov r1, r0 /* Start address to clear memory. */
- /* Zero the entire table so all virtual addresses are invalid. */
- mov r2, #L1_TABLE_SIZE /* in bytes */
- mov r3, #0
- mov r4, r3
- mov r5, r3
- mov r6, r3
- mov r7, r3
- mov r8, r3
- mov r10, r3
- mov r11, r3
-1: stmia r1!, {r3-r8,r10-r11}
- stmia r1!, {r3-r8,r10-r11}
- stmia r1!, {r3-r8,r10-r11}
- stmia r1!, {r3-r8,r10-r11}
- subs r2, r2, #(4 * 4 * 8) /* bytes per loop */
- bne 1b
+#if defined(MULTIPROCESSOR)
+#if defined(KERNEL_BASES_EQUAL)
+ movw r3, #:lower16:cortex_mmuinfo
+ movt r3, #:upper16:cortex_mmuinfo
+#else
+ adr r3, arm_boot_l1pt_init
+ movw r2, #:lower16:cortex_mmuinfo
+ movt r2, #:upper16:cortex_mmuinfo
+ bfi r3, r2, #0, #28
+#endif
+ str r0, [r3]
+
+ // Make sure the info makes into memory
+ mcr p15, 0, r3, c7, c10, 1 // writeback the cache line
+ dsb
+#endif
- /* Now create our entries per the mmu_init_table. */
+ mov ip, r1 // save mmu table addr
+ // Build page table from scratch
+ mov r1, r0 // Start address to clear memory.
+ // Zero the entire table so all virtual addresses are invalid.
+ add r2, r1, #L1_TABLE_SIZE // Ending address
+ mov r4, #0
+ mov r5, #0
+ mov r6, #0
+ mov r7, #0
+1: stmia r1!, {r4-r7} // 16 bytes at a time
+ cmp r1, r2
+ blt 1b
+
+ // Now create our entries per the mmu_init_table.
l1table .req r0
va .req r1
pa .req r2
@@ -78,7 +86,11 @@ arm_boot_l1pt_init:
attr .req r4
itable .req r5
- mov itable, ip @ reclaim table address
+ mov attr, #0
+ mrc p15, 0, r3, c0, c0, 5 // MPIDR read
+ cmp r3, #0 // not zero?
+ movne attr, #L1_S_V6_S // yes, shareable attribute
+ mov itable, ip // reclaim table address
b 3f
2: str pa, [l1table, va, lsl #2]
@@ -87,20 +99,18 @@ arm_boot_l1pt_init:
subs n_sec, n_sec, #1
bhi 2b
-3: ldmia itable!, {va,pa,n_sec,attr}
- /* Convert va to l1 offset: va = 4 * (va >> L1_S_SHIFT) */
+3: ldmia itable!, {va, pa}
+ // Convert va to l1 offset: va = 4 * (va >> L1_S_SHIFT)
+ ubfx n_sec, va, #0, #L1_S_SHIFT
lsr va, va, #L1_S_SHIFT
- /* Convert pa to l1 entry: pa = (pa & L1_S_FRAME) | attr */
-#ifdef _ARM_ARCH_7
- bfc pa, #0, #L1_S_SHIFT
-#else
- lsr pa, pa, #L1_S_SHIFT
- lsl pa, pa, #L1_S_SHIFT
-#endif
- orr pa, pa, attr
- cmp n_sec, #0
+
+ // Do we need add sharing for this?
+ tst pa, #(L1_S_C|L1_S_B) // is this entry cacheable?
+ orrne pa, pa, attr // add sharing
+
+4: cmp n_sec, #0
bne 2b
- bx lr @ return
+ bx lr // return
.unreq va
.unreq pa
@@ -109,6 +119,9 @@ arm_boot_l1pt_init:
.unreq itable
.unreq l1table
+//
+// Coprocessor register initialization values
+//
#if defined(CPU_CORTEXA8)
#undef CPU_CONTROL_SWP_ENABLE // not present on A8
#define CPU_CONTROL_SWP_ENABLE 0
@@ -126,6 +139,8 @@ arm_boot_l1pt_init:
#define CPU_CONTROL_AFLT_ENABLE_SET CPU_CONTROL_AFLT_ENABLE
#endif
+// bits to set in the Control Register
+//
#define CPU_CONTROL_SET \
(CPU_CONTROL_MMU_ENABLE | \
CPU_CONTROL_AFLT_ENABLE_SET | \
@@ -136,124 +151,120 @@ arm_boot_l1pt_init:
CPU_CONTROL_EX_BEND_SET | \
CPU_CONTROL_UNAL_ENABLE)
+// bits to clear in the Control Register
+//
#define CPU_CONTROL_CLR \
(CPU_CONTROL_AFLT_ENABLE_CLR)
arm_cpuinit:
- /*
- * In theory, because the MMU is off, we shouldn't need all of this,
- * but let's not take any chances and do a typical sequence to set
- * the Translation Table Base.
- */
+ // Because the MMU may already be on do a typical sequence to set
+ // the Translation Table Base(s).
mov ip, lr
- mov r10, r0
+ mov r10, r0 // save TTBR
mov r1, #0
mcr p15, 0, r1, c7, c5, 0 // invalidate I cache
- mrc p15, 0, r2, c1, c0, 0 // read SCTRL
+ mrc p15, 0, r2, c1, c0, 0 // SCTRL read
movw r1, #(CPU_CONTROL_DC_ENABLE|CPU_CONTROL_IC_ENABLE)
bic r2, r2, r1 // clear I+D cache enable
#ifdef __ARMEB__
- /*
- * SCTRL.EE determines the endianness of translation table lookups.
- * So we need to make sure it's set before starting to use the new
- * translation tables (which are big endian).
- */
+ // SCTRL.EE determines the endianness of translation table lookups.
+ // So we need to make sure it's set before starting to use the new
+ // translation tables (which are big endian).
+ //
orr r2, r2, #CPU_CONTROL_EX_BEND
bic r2, r2, #CPU_CONTROL_MMU_ENABLE
- pli [pc, #32] /* preload the next few cachelines */
+ pli [pc, #32] // preload the next few cachelines
pli [pc, #64]
pli [pc, #96]
pli [pc, #128]
#endif
- mcr p15, 0, r2, c1, c0, 0 /* write SCTRL */
+ mcr p15, 0, r2, c1, c0, 0 // SCTRL write
XPUTC(#70)
- dsb /* Drain the write buffers. */
+ dsb // Drain the write buffers.
1:
XPUTC(#71)
- mrc p15, 0, r1, c0, c0, 5 /* get MPIDR */
+ mrc p15, 0, r1, c0, c0, 5 // MPIDR read
cmp r1, #0
- orrlt r10, r10, #0x5b /* MP, cachable (Normal WB) */
- orrge r10, r10, #0x1b /* Non-MP, cacheable, normal WB */
- mcr p15, 0, r10, c2, c0, 0 /* Set Translation Table Base */
+ orrlt r10, r10, #0x5b // MP, cachable (Normal WB)
+ orrge r10, r10, #0x1b // Non-MP, cacheable, normal WB
+ XPUTC(#48)
+ mcr p15, 0, r10, c2, c0, 0 // TTBR0 write
+#if defined(ARM_MMU_EXTENDED)
+ // When using split TTBRs, we need to set both since the physical
+ // addresses we were/are using might be in either.
+ XPUTC(#49)
+ mcr p15, 0, r10, c2, c0, 1 // TTBR1 write
+#endif
XPUTC(#72)
- mov r1, #0
- mcr p15, 0, r1, c2, c0, 2 /* Set Translation Table Control */
+#if defined(ARM_MMU_EXTENDED)
+ XPUTC(#49)
+ mov r1, #TTBCR_S_N_1 // make sure TTBCR_S_N is 1
+#else
+ XPUTC(#48)
+ mov r1, #0 // make sure TTBCR is 0
+#endif
+ mcr p15, 0, r1, c2, c0, 2 // TTBCR write
XPUTC(#73)
mov r1, #0
- mcr p15, 0, r1, c8, c7, 0 /* Invalidate TLBs */
+ mcr p15, 0, r1, c8, c7, 0 // TLBIALL (just this core)
- /* Set the Domain Access register. Very important! */
XPUTC(#74)
- mov r1, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
- mcr p15, 0, r1, c3, c0, 0
+ mov r1, #0 // get KERNEL_PID
+ mcr p15, 0, r1, c13, c0, 1 // CONTEXTIDR write
- /*
- * Enable the MMU, etc.
- */
+ // Set the Domain Access register. Very important!
XPUTC(#75)
- mrc p15, 0, r0, c1, c0, 0
+ mov r1, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
+ mcr p15, 0, r1, c3, c0, 0 // DACR write
+
+ //
+ // Enable the MMU, etc.
+ //
+ XPUTC(#76)
+ mrc p15, 0, r1, c1, c0, 0 // SCTRL read
movw r3, #:lower16:CPU_CONTROL_SET
#if (CPU_CONTROL_SET & 0xffff0000)
movt r3, #:upper16:CPU_CONTROL_SET
#endif
- orr r0, r0, r3
+ orr r0, r1, r3
#if defined(CPU_CONTROL_CLR) && (CPU_CONTROL_CLR != 0)
bic r0, r0, #CPU_CONTROL_CLR
#endif
+ //cmp r0, r1 // any changes to SCTRL?
+ //bxeq ip // no, then return.
+
pli 1f
-
dsb
- @ turn mmu on!
- mov r0, r0 /* fetch instruction cacheline */
-1: mcr p15, 0, r0, c1, c0, 0
-
- /*
- * Ensure that the coprocessor has finished turning on the MMU.
- */
- mrc p15, 0, r0, c0, c0, 0 /* Read an arbitrary value. */
- mov r0, r0 /* Stall until read completes. */
-1: XPUTC(#76)
- bx ip /* return */
+ // turn mmu on!
+ //
+ mov r0, r0 // fetch instruction cacheline
+1: mcr p15, 0, r0, c1, c0, 0 // SCTRL write
+
+ // Ensure that the coprocessor has finished turning on the MMU.
+ //
+ mrc p15, 0, r0, c0, c0, 0 // Read an arbitrary value.
+ mov r0, r0 // Stall until read completes.
+ XPUTC(#77)
-/*
- * Coprocessor register initialization values
- */
+ bx ip // return
.p2align 2
- /* bits to set in the Control Register */
-
#if defined(VERBOSE_INIT_ARM) && XPUTC_COM
#define TIMO 0x25000
#ifndef COM_MULT
#define COM_MULT 1
#endif
xputc:
-#ifdef MULTIPROCESSOR
- adr r3, xputc
- movw r2, #:lower16:comlock
- movt r2, #:upper16:comlock
- bfi r3, r2, #0, #28
- mov r2, #1
-10:
- ldrex r1, [r3]
- cmp r1, #0
- bne 10b
- strex r1, r2, [r3]
- cmp r1, #0
- bne 10b
- dsb
-#endif
-
mov r2, #TIMO
#ifdef CONADDR
movw r3, #:lower16:CONADDR
@@ -312,274 +323,422 @@ xputc:
subs r2, r2, #1
bne 3b
4:
-#ifdef MULTIPROCESSOR
- adr r3, xputc
- movw r2, #:lower16:comlock
- movt r2, #:upper16:comlock
- bfi r3, r2, #0, #28
- mov r0, #0
- str r0, [r3]
- dsb
-#endif
bx lr
-
-#ifdef MULTIPROCESSOR
- .pushsection .data
-comlock:
- .p2align 4
- .word 0 @ not in bss
- .p2align 4
-
- .popsection
-#endif /* MULTIPROCESSOR */
#endif /* VERBOSE_INIT_ARM */
+//
+// Perform the initialization of the Cortex core required by NetBSD.
+//
+//
cortex_init:
- mov r10, lr @ save lr
+ mov r10, lr // save lr
- cpsid if, #PSR_SVC32_MODE
+ cpsid if, #PSR_SVC32_MODE // SVC32 with no interrupts
+ mov r0, #0
+ msr spsr_sxc, r0 // set SPSR[23:8] to known value
+
+#if defined(CPU_CORTEXA7) || defined(CPU_CORTEXA15)
+ //
+ // If SMP is already enabled, don't do anything.
+ //
+ mrc p15, 0, r0, c1, c0, 1 // ACTLR read
+ tst r0, #CORTEXA9_AUXCTL_SMP // test SMP
+ bxne r10 // return if set
+#endif
+
+#if defined(CPU_CORTEXA7)
+ mrc p15, 0, r4, c1, c0, 0 // SCTLR read
+ //
+ // Before turning on SMP, turn off the caches and the MMU.
+ //
+ dsb
+ movw r1,#(CPU_CONTROL_IC_ENABLE|CPU_CONTROL_DC_ENABLE\
+ |CPU_CONTROL_MMU_ENABLE)
+ bic r0, r4, r1 // disable icache/dcache/mmu
+ mcr p15, 0, r0, c1, c0, 0 // SCTLR write
+ dsb
+ isb
+#endif
XPUTC(#64)
+#if defined(KERNEL_BASES_EQUAL)
+ bl _C_LABEL(armv7_icache_inv_all)
+#else
adr ip, cortex_init
movw r0, #:lower16:_C_LABEL(armv7_icache_inv_all)
movt r0, #:upper16:_C_LABEL(armv7_icache_inv_all)
bfi ip, r0, #0, #28
- blx ip @ toss i-cache
+ blx ip // toss i-cache
+#endif
-#ifdef CPU_CORTEXA9
- /*
- * Step 1a, invalidate the all cache tags in all ways on the SCU.
- */
+#if defined(CPU_CORTEXA5) || defined(CPU_CORTEXA9)
+ //
+ // Step 1a, invalidate the all cache tags in all ways on the SCU.
+ //
XPUTC(#65)
- mrc p15, 4, r3, c15, c0, 0 @ read cbar
- ldr r0, [r3, #SCU_CFG] @ read scu config
- and r0, r0, #7 @ get cpu max
- add r0, r0, #2 @ adjust to cpu num
- mov r1, #0xf @ select all ways
- lsl r1, r1, r0 @ shift into place
- str r1, [r3, #SCU_INV_ALL_REG] @ write scu invalidate all
+ mrc p15, 4, r3, c15, c0, 0 // read cbar
+#ifdef __ARMEB__
+ setend le
+#endif
+ ldr r0, [r3, #SCU_CFG] // read scu config
+ and r0, r0, #7 // get cpu max
+ add r0, r0, #2 // adjust to cpu num shift
+ mov r1, #0xf // select all ways
+ lsl r1, r1, r0 // shift into place
+ str r1, [r3, #SCU_INV_ALL_REG] // write scu invalidate all
+#ifdef __ARMEB__
+ setend be
+#endif
dsb
isb
#endif
- /*
- * Step 1b, invalidate the data cache
- */
+ //
+ // Step 1b, invalidate the data cache
+ //
XPUTC(#66)
+#if defined(KERNEL_BASES_EQUAL)
+ bl _C_LABEL(armv7_dcache_wbinv_all)
+#else
adr ip, cortex_init
movw r0, #:lower16:_C_LABEL(armv7_dcache_wbinv_all)
movt r0, #:upper16:_C_LABEL(armv7_dcache_wbinv_all)
bfi ip, r0, #0, #28
- blx ip @ writeback & toss d-cache
+ blx ip // writeback & toss d-cache
+#endif
XPUTC(#67)
-#ifdef CPU_CORTEXA9
- /*
- * Step 2, disable the data cache
- */
- mrc p15, 0, r2, c1, c0, 0 @ get system ctl register (save)
- bic r1, r2, #CPU_CONTROL_DC_ENABLE @ clear data cache enable
- mcr p15, 0, r1, c1, c0, 0 @ set system ctl register
+ //
+ // Check to see if we are really MP before enabling SMP mode
+ //
+ mrc p15, 0, r1, c0, c0, 5 // MPIDR get
+ ubfx r1, r1, #30, #2 // get MP bits
+ cmp r1, #2 // is it MP?
+ bxne r10 // no, return
+
+#ifndef CPU_CORTEXA7
+ //
+ // Step 2, disable the data cache
+ //
+ mrc p15, 0, r2, c1, c0, 0 // SCTLR read
+ bic r2, r2, #CPU_CONTROL_DC_ENABLE // clear data cache enable
+ mcr p15, 0, r2, c1, c0, 0 // SCTLR write
isb
XPUTC(#49)
+#endif
- /*
- * Step 3, enable the SCU (and set SMP mode)
- */
- mrc p15, 4, r3, c15, c0, 0 @ read cbar
- ldr r1, [r3, #SCU_CTL] @ read scu control
- orr r1, r1, #SCU_CTL_SCU_ENA @ set scu enable flag
- str r1, [r3, #SCU_CTL] @ write scu control
+#if defined(CPU_CORTEXA5) || defined(CPU_CORTEXA9)
+ //
+ // Step 3, enable the SCU
+ //
+ mrc p15, 4, r3, c15, c0, 0 // read cbar
+#ifdef __ARMEB__
+ setend le
+#endif
+ ldr r1, [r3, #SCU_CTL] // read scu control
+ orr r1, r1, #SCU_CTL_SCU_ENA // set scu enable flag
+ str r1, [r3, #SCU_CTL] // write scu control
+#ifdef __ARMEB__
+ setend be
+#endif
dsb
isb
XPUTC(#50)
+#endif /* CORTEXA5 || CORTEXA9 */
- /*
- * Step 4a, enable the data cache
- */
- orr r2, r2, #CPU_CONTROL_DC_ENABLE @ set data cache enable
- mcr p15, 0, r2, c1, c0, 0 @ reenable caches
- isb
- XPUTC(#51)
+#ifdef CPU_CORTEXA7
+ //
+ // The MMU is off. Make sure the TLB is invalidated before
+ // turning on SMP.
+ //
+ mov r0, #0
+ mcr p15, 0, r1, c8, c7, 0 // TLBIALL (just this core)
#endif
-#ifdef MULTIPROCESSOR
- /*
- * Step 4b, set ACTLR.SMP=1 (and on A9, ACTRL.FX=1)
- */
- mrc p15, 0, r0, c1, c0, 1 @ read aux ctl
- orr r0, r0, #CORTEXA9_AUXCTL_SMP @ enable SMP
- mcr p15, 0, r0, c1, c0, 1 @ write aux ctl
+ //
+ // Step 4b, set ACTLR.SMP=1
+ //
+ mrc p15, 0, r0, c1, c0, 1 // ACTLR read
+ orr r0, r0, #CORTEXA9_AUXCTL_SMP // enable SMP
+ mcr p15, 0, r0, c1, c0, 1 // ACTLR write
isb
-#ifdef CPU_CORTEXA9
- orr r0, r0, #CORTEXA9_AUXCTL_FW @ enable cache/tlb/coherency
- mcr p15, 0, r0, c1, c0, 1 @ write aux ctl
+
+#if defined(MULTIPROCESSOR) && (defined(CPU_CORTEXA5) || defined(CPU_CORTEXA9))
+ //
+ // Step 4b (continued on A5/A9), ACTRL.FW=1)
+ //
+ orr r0, r0, #CORTEXA9_AUXCTL_FW // enable cache/tlb/coherency
+ mcr p15, 0, r0, c1, c0, 1 // ACTRL write
isb
+ dsb
#endif
- XPUTC(#52)
-#endif /* MULTIPROCESSOR */
+
+ //
+ // Step 4a, resoter SCTRL (enable the data cache)
+ //
+ orr r4, r4, #CPU_CONTROL_IC_ENABLE // enable icache
+ orr r4, r4, #CPU_CONTROL_DC_ENABLE // enable dcache
+ mcr p15, 0, r4, c1, c0, 0 // SCTRL write
+ isb
+ XPUTC(#45)
bx r10
ASEND(cortex_init)
-/*
- * Secondary processors come here after exiting the SKU ROM.
- * Running native endian until we have SMP enabled. Since no data
- * is accessed, that shouldn't be a problem.
- */
-cortex_mpstart:
- cpsid if, #PSR_SVC32_MODE @ make sure we are in SVC mode
- mrs r0, cpsr @ fetch CPSR value
- msr spsr_sxc, r0 @ set SPSR[23:8] to known value
+#ifdef MULTIPROCESSOR
+ .pushsection .data
+ .align 2
+ .globl cortex_mmuinfo
+ .type cortex_mmuinfo,%object
+cortex_mmuinfo:
+ .space 4
+//
+// If something goes wrong in the inital mpstartup, catch and record it.
+//
+#ifdef MPDEBUG
+ .globl cortex_mpfault
+ .type cortex_mpfault,%object
+cortex_mpfault:
+ .space 16 // PC, LR, FSR, FAR
+#endif
+ .popsection
+#endif // MULTIPROCESSOR
+// Secondary processors come here after exiting the SKU ROM.
+// Switches to kernel's endian almost immediately.
+//
+cortex_mpstart:
#ifndef MULTIPROCESSOR
- /*
- * If not MULTIPROCESSOR, drop CPU into power saving state.
- */
-3: wfe
+ //
+ // If not MULTIPROCESSOR, drop CPU into power saving state.
+ //
+3: wfi
b 3b
#else
- /*
- * Step 1, invalidate the caches
- */
- adr ip, cortex_mpstart
- movw r0, #:lower16:_C_LABEL(armv7_icache_inv_all)
- movt r0, #:upper16:_C_LABEL(armv7_icache_inv_all)
- bfi ip, r0, #0, #28
- blx ip @ toss i-cache
- adr ip, cortex_mpstart
- movw ip, #:lower16:_C_LABEL(armv7_dcache_inv_all)
- movt ip, #:upper16:_C_LABEL(armv7_dcache_inv_all)
- bfi ip, r0, #0, #28
- blx ip @ toss d-cache
-
-#if defined(CPU_CORTEXA9)
- /*
- * Step 2, wait for the SCU to be enabled
- */
- mrc p15, 4, r3, c15, c0, 0 @ read cbar
-1: ldr r0, [r3, #SCU_CTL] @ read scu control
- tst r0, #SCU_CTL_SCU_ENA @ enable bit set yet?
- bne 1b @ try again
-#endif
-
- /*
- * Step 3, set ACTLR.SMP=1 (and ACTRL.FX=1)
- */
- mrc p15, 0, r0, c1, c0, 1 @ read aux ctl
- orr r0, #CORTEXA9_AUXCTL_SMP @ enable SMP
- mcr p15, 0, r0, c1, c0, 1 @ write aux ctl
- mov r0, r0
-#if defined(CPU_CORTEXA9)
- orr r0, #CORTEXA9_AUXCTL_FW @ enable cache/tlb/coherency
- mcr p15, 0, r0, c1, c0, 1 @ write aux ctl
- mov r0, r0
-#endif
-
- /*
- * We should be in SMP mode now.
- */
- mrc p15, 0, r4, c0, c0, 5 @ get MPIDR
- and r4, r4, #7 @ get our cpu numder
-
#ifdef __ARMEB__
- setend be @ switch to BE now
+ setend be // switch to BE now
#endif
-#if defined(VERBOSE_INIT_ARM)
- add r0, r4, #48
- bl xputc
+#if 0
+ mrc p15, 0, r0, c1, c1, 2 // NSACR read
+ // Allow non-secure access to ACTRL[SMP]
+ orr r0, r0, #NSACR_SMP
+#ifdef FPU_VFP
+ // Allow non-secure access to VFP/Neon
+ orr r0, r0, #NSACR_VFPCP
+#endif
+ mcr p15, 0, r0, c1, c1, 2 // NSACR write
+
+ // Allow non-secure access to CPSR[A,F], go to non-secure mode
+ mrc p15, 0, r0, c1, c1, 0 // SCR read
+ orr r0, r0, #0x31
+ bic r0, r4, #0x0e // non monitor extabt, irq, fiq
+ mcr p15, 0, r0, c1, c1, 0 // SCR write
+ isb
#endif
- /*
- * To access things are not in .start, we need to replace the upper
- * 4 bits of the address with where we are current executing.
- */
- adr r10, cortex_mpstart
- lsr r10, r10, #28
+ bl cortex_init
- movw r0, #:lower16:_C_LABEL(arm_cpu_hatched)
- movt r0, #:upper16:_C_LABEL(arm_cpu_hatched)
- bfi r0, r10, #28, #4 // replace top 4 bits
- add r0, r0, r10
- mov r5, #1
- lsl r5, r5, r4
- /*
- * We inline the atomic_or_32 call since we might be in a different
- * area of memory.
- */
-2: ldrex r1, [r0]
- orr r1, r1, r5
- strex r2, r1, [r0]
- cmp r2, #0
- bne 2b
-
- XPUTC(#97)
-
- /* Now we will wait for someone tell this cpu to start running */
- movw r0, #:lower16:_C_LABEL(arm_cpu_mbox)
- movt r0, #:upper16:_C_LABEL(arm_cpu_mbox)
- bfi r0, r10, #28, #4
- add r0, r0, r10
-3: dmb
- ldr r2, [r0]
- tst r2, r5
- wfeeq
- beq 3b
-
- XPUTC(#98)
- movw r0, #:lower16:_C_LABEL(arm_cpu_marker)
- movt r0, #:upper16:_C_LABEL(arm_cpu_marker)
- bfi r0, r10, #28, #4
- str pc, [r0]
+ // We are in SMP mode now.
+ //
- movw r0, #:lower16:_C_LABEL(kernel_l1pt)
- movt r0, #:upper16:_C_LABEL(kernel_l1pt)
- bfi r0, r10, #28, #4 /* get address of l1pt pvaddr */
- ldr r0, [r0, #PV_PA] /* Now get the phys addr */
- /*
- * After we turn on the MMU, we will no longer in .start so setup
- * return to rest of MP startup code in .text.
- */
+ // Get our initial temporary TTB so we can switch to it.
+#if defined(KERNEL_BASES_EQUAL)
+ movw r7, #:lower16:_C_LABEL(cortex_mmuinfo)
+ movt r7, #:upper16:_C_LABEL(cortex_mmuinfo)
+#else
+ adr r7, cortex_mpstart
+ movw r8, #:lower16:_C_LABEL(cortex_mmuinfo)
+ movt r8, #:upper16:_C_LABEL(cortex_mmuinfo)
+ bfi r7, r8, #0, #28
+#endif
+ dmb
+ ldr r0, [r7] // load saved TTB address
+
+ // After we turn on the MMU, we will return to do rest of the
+ // MP startup code in .text.
+ //
movw lr, #:lower16:cortex_mpcontinuation
movt lr, #:upper16:cortex_mpcontinuation
b arm_cpuinit
-#endif /* MULTIPROCESSOR */
+#endif // MULTIPROCESSOR
ASEND(cortex_mpstart)
#ifdef MULTIPROCESSOR
.pushsection .text
cortex_mpcontinuation:
- /* MMU, L1, are now on. */
+#ifdef MPDEBUG
+ //
+ // Setup VBAR to catch errors
+ //
+ adr r2, cortex_mpvector
+ mcr p15, 0, r2, c12, c0, 0 // VBAR set
+ isb
+
+ mrc p15, 0, r0, c1, c0, 0 // SCTRL read
+#ifdef MULTIPROCESSOR
+ bic r0, r0, #CPU_CONTROL_VECRELOC // use VBAR
+#endif
+ mcr p15, 0, r0, c1, c0, 0 // SCTRL write
+ dsb
+ isb
+#endif
+
+#ifdef MPDEBUG
+ movw r9, #:lower16:_C_LABEL(arm_cpu_marker)
+ movt r9, #:upper16:_C_LABEL(arm_cpu_marker)
+ str pc, [r9]
+ str r2, [r9, #4]
+#endif
+
+ mrc p15, 0, r4, c0, c0, 5 // MPIDR get
+ and r4, r4, #7 // get our cpu numder
+ mov r5, #1 // make a bitmask of it
+ lsl r5, r5, r4 // shift into position
+#ifdef MPDEBUG
+ str pc, [r9]
+#endif
+
+ mov r1, r5
+ movw r0, #:lower16:_C_LABEL(arm_cpu_hatched)
+ movt r0, #:upper16:_C_LABEL(arm_cpu_hatched)
+ bl _C_LABEL(atomic_or_32) // show we've hatched
+ sev
+
+ //
+ // Now we wait for cpu_boot_secondary_processors to kick us the
+ // first time. This means the kernel L1PT is ready for us to use.
+ //
+ movw r6, #:lower16:_C_LABEL(arm_cpu_mbox)
+ movt r6, #:upper16:_C_LABEL(arm_cpu_mbox)
+#ifdef MPDEBUG
+ str pc, [r9]
+#endif
+3: dmb // make stores visible
+ ldr r2, [r6] // load mbox
+ tst r2, r5 // is our bit set?
+#ifdef MPDEBUG
+ str pc, [r9]
+ str r2, [r9, #4]
+#endif
+ wfeeq // no, back to sleep
+ beq 3b // no, and try again
+
+#ifdef MPDEBUG
+ str pc, [r9]
+#endif
+
+ movw r0, #:lower16:_C_LABEL(kernel_l1pt)
+ movt r0, #:upper16:_C_LABEL(kernel_l1pt)
+ ldr r0, [r0, #PV_PA] // now get the phys addr
+#ifdef MPDEBUG
+ str pc, [r9]
+ str r0, [r9, #4]
+#endif
+#ifdef ARM_MMU_EXTENDED
+ mov r1, #0
+#endif
+ bl _C_LABEL(armv7_setttb) // set the TTB
- movw r0, #:lower16:_C_LABEL(arm_cpu_marker)
- movt r0, #:upper16:_C_LABEL(arm_cpu_marker)
- str pc, [r0]
+ mov r0, #DOMAIN_DEFAULT
+ mcr p15, 0, r0, c3, c0, 0 // DACR write
+
+ mov r1, #0
+ mcr p15, 0, r1, c8, c7, 0 // invalidate the TLB
+
+ mrc p15, 0, r1, c2, c0, 2 // TTBCR get
+ orr r1, r1, #TTBCR_S_PD0 // prevent lookups via TTBR0
+ mrc p15, 0, r1, c2, c0, 2 // TTBCR set
+
+#ifdef MPDEBUG
+ str pc, [r9] // we've got this far
+ str r4, [r9, #4]
+#endif
+
+ //
+ // Tell arm32_kvminit we've load the new TTB
+ //
+ mov r0, r6
+ mvn r1, r5 // pass inverted mask to clear
+ bl _C_LABEL(atomic_and_32)
+ sev // wake the master
+
+#ifdef MPDEBUG
+ str pc, [r9] // we've got this far
+#endif
+
+ // Wait for cpu_boot_secondary_processors the second time.
+ //
+4: dmb // data memory barrier
+ ldr r2, [r6] // load mbox
+ tst r2, r5 // is our bit set?
+ wfeeq // no, back to waiting
+ beq 4b // no, and try again
+
+#ifdef MPDEBUG
+ str pc, [r9] // we've got this far
+#endif
movw r0, #:lower16:cpu_info
- movt r0, #:upper16:cpu_info /* get pointer to cpu_infos */
- ldr r5, [r0, r4, lsl #2] /* load our cpu_info */
- ldr r6, [r5, #CI_IDLELWP] /* get the idlelwp */
- ldr r7, [r6, #L_PCB] /* now get its pcb */
- ldr sp, [r7, #PCB_KSP] /* finally, we can load our SP */
+ movt r0, #:upper16:cpu_info // get pointer to cpu_infos
+ ldr r5, [r0, r4, lsl #2] // load our cpu_info
+ ldr r6, [r5, #CI_IDLELWP] // get the idlelwp
+ ldr r7, [r6, #L_PCB] // now get its pcb
+ ldr sp, [r7, #PCB_KSP] // finally, we can load our SP
#ifdef TPIDRPRW_IS_CURCPU
- mcr p15, 0, r5, c13, c0, 4 /* squirrel away curcpu() */
+ mcr p15, 0, r5, c13, c0, 4 // squirrel away curcpu()
#elif defined(TPIDRPRW_IS_CURLWP)
- mcr p15, 0, r6, c13, c0, 4 /* squirrel away curlwp() */
+ mcr p15, 0, r6, c13, c0, 4 // squirrel away curlwp()
#else
#error either TPIDRPRW_IS_CURCPU or TPIDRPRW_IS_CURLWP must be defined
#endif
- str r6, [r5, #CI_CURLWP] /* and note we are running on it */
+ str r6, [r5, #CI_CURLWP] // and note we are running on it
- str pc, [r0] // r0 still have arm_cpu_marker
+#ifdef MPDEBUG
+ str pc, [r9] // r9 still has arm_cpu_marker
+#endif
mov r0, r5 // pass cpu_info
mov r1, r4 // pass cpu_id
movw r2, #:lower16:MD_CPU_HATCH // pass md_cpu_hatch
movt r2, #:upper16:MD_CPU_HATCH // pass md_cpu_hatch
bl _C_LABEL(cpu_hatch)
- b _C_LABEL(idle_loop)
+ b _C_LABEL(idle_loop) // never to return
ASEND(cortex_mpcontinuation)
- /* NOT REACHED */
+
+#ifdef MPDEBUG
+// Our exception table. We only care about prefetch/data/address aborts.
+//
+ .p2align 5
+cortex_mpvector:
+ b . @ reset
+ b . @ undefined
+ b . @ swi
+ b xprefetch_abort
+ b xdata_abort
+ b xaddress_abort
+ b . @ irq
+ b . @ fiq
+
+xprefetch_abort:
+ adr r10, xprefetch_abort
+ mrc p15, 0, r11, c5, c0, 1 // IFSR
+ mrc p15, 0, r12, c6, c0, 1 // IFAR
+ b xcommon_abort
+xdata_abort:
+ adr r10, xdata_abort
+ mrc p15, 0, r11, c5, c0, 0 // DFSR
+ mrc p15, 0, r12, c6, c0, 0 // DFAR
+ b xcommon_abort
+xaddress_abort:
+ adr r10, xaddress_abort
+ mrc p15, 0, r11, c5, c0, 0 // DFSR
+ mrc p15, 0, r12, c6, c0, 0 // DFAR
+xcommon_abort:
+ movw r8, #:lower16:cortex_mpfault // where we should be
+ movt r8, #:upper16:cortex_mpfault // where we should be
+ stmia r8, {r10-r12,lr} // save type, PC, FSR, FAR
+ b . // loop forever
+#endif
.popsection
-#endif /* MULTIPROCESSOR */
+#endif // MULTIPROCESSOR