On Cortex-A7 there is a magic SMP bit in the Auxiliary Control
Register that "Enables coherent requests to the processor".  The
Cortex-A7 Technical Reference Manual mentions that:

  When coherent requests are disabled:

  * loads to cacheable memory are not cached by the processor.

So effectively, not setting the bit means that we would be running the
CPU with its caches disabled.  We do attempt to set this bit, but the
code actually toggled the bit.  So if the firmware had set the bit,
like it does on my Banana Pi, we would unset it.  The result was that
the CPU was running at glacial speed.

Now the Cortex-A7 TRM also has the following note:

  You must ensure this bit is set to 1 before the caches and MMU are
  enabled, or any cache and TLB maintenance operations are
  performed. The only time this bit is set to 0 is during a processor
  power-down sequence.

So we really should be setting the Auxiliary Control Register before
we enable the MMU and caches by setting the System Control Register.
The diff below fixes these issues and brings in some symbolic
constants for the Auxiliary Control Register bits from NetBSD.

Tested on both Cortex-A9 and Cortex-A7.

ok?


Index: arch/arm/arm/cpufunc.c
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/cpufunc.c,v
retrieving revision 1.44
diff -u -p -r1.44 cpufunc.c
--- arch/arm/arm/cpufunc.c      10 Aug 2016 21:22:43 -0000      1.44
+++ arch/arm/arm/cpufunc.c      13 Aug 2016 18:23:39 -0000
@@ -568,6 +568,26 @@ armv7_setup()
        uint32_t auxctrl, auxctrlmask;
        uint32_t cpuctrl, cpuctrlmask;
 
+       auxctrl = auxctrlmask = 0;
+
+       switch (cputype & CPU_ID_CORTEX_MASK) {
+       case CPU_ID_CORTEX_A5:
+       case CPU_ID_CORTEX_A9:
+               /* Cache and TLB maintenance broadcast */
+#ifdef notyet
+               auxctrlmask |= CORTEXA9_AUXCTL_FW;
+               auxctrl |= CORTEXA9_AUXCTL_FW;
+#endif
+               /* FALLTHROUGH */
+       case CPU_ID_CORTEX_A7:
+       case CPU_ID_CORTEX_A15:
+       case CPU_ID_CORTEX_A17:
+               /* Set SMP to allow LDREX/STREX */
+               auxctrlmask |= CORTEXA9_AUXCTL_SMP;
+               auxctrl |= CORTEXA9_AUXCTL_SMP;
+               break;
+       }
+
        cpuctrlmask = CPU_CONTROL_MMU_ENABLE
            | CPU_CONTROL_AFLT_ENABLE
            | CPU_CONTROL_DC_ENABLE
@@ -590,29 +610,16 @@ armv7_setup()
        /* Clear out the cache */
        cpu_idcache_wbinv_all();
 
+       /*
+        * Set the auxilliary control register first, as the SMP bit
+        * needs to be set to 1 before the caches and the MMU are
+        * enabled.
+        */
+       cpu_auxcontrol(auxctrlmask, auxctrl);
+
        /* Set the control register */
        curcpu()->ci_ctrl = cpuctrl;
        cpu_control(cpuctrlmask, cpuctrl);
-
-       auxctrl = auxctrlmask = 0;
-
-       switch (cputype & CPU_ID_CORTEX_MASK) {
-       case CPU_ID_CORTEX_A5:
-       case CPU_ID_CORTEX_A9:
-               /* Cache and TLB maintenance broadcast */
-#ifdef notyet
-               auxctrl |= (1 << 0);
-#endif
-               /* FALLTHROUGH */
-       case CPU_ID_CORTEX_A7:
-       case CPU_ID_CORTEX_A15:
-       case CPU_ID_CORTEX_A17:
-               /* Set SMP to allow LDREX/STREX */
-               auxctrl |= (1 << 6);
-               break;
-       }
-
-       cpu_auxcontrol(auxctrlmask, auxctrl);
 
        /* And again. */
        cpu_idcache_wbinv_all();
Index: arch/arm/include/armreg.h
===================================================================
RCS file: /cvs/src/sys/arch/arm/include/armreg.h,v
retrieving revision 1.33
diff -u -p -r1.33 armreg.h
--- arch/arm/include/armreg.h   6 Aug 2016 16:46:25 -0000       1.33
+++ arch/arm/include/armreg.h   13 Aug 2016 18:23:39 -0000
@@ -269,6 +269,16 @@
 #define XSCALE_AUXCTL_MD_WT    0x00000020 /* mini-D$ wt, read-allocate */
 #define XSCALE_AUXCTL_MD_MASK  0x00000030
 
+/* Cortex-A9 Auxiliary Control Register (CP15 register 1, opcode 1) */
+#define CORTEXA9_AUXCTL_FW     (1 << 0) /* Cache and TLB updates broadcast */
+#define CORTEXA9_AUXCTL_L2PE   (1 << 1) /* Prefetch hint enable */
+#define CORTEXA9_AUXCTL_L1PE   (1 << 2) /* Data prefetch hint enable */
+#define CORTEXA9_AUXCTL_WR_ZERO        (1 << 3) /* Ena. write full line of 0s 
mode */
+#define CORTEXA9_AUXCTL_SMP    (1 << 6) /* Coherency is active */
+#define CORTEXA9_AUXCTL_EXCL   (1 << 7) /* Exclusive cache bit */
+#define CORTEXA9_AUXCTL_ONEWAY (1 << 8) /* Allocate in on cache way only */
+#define CORTEXA9_AUXCTL_PARITY (1 << 9) /* Support parity checking */
+
 /* Cache type register definitions */
 #define CPU_CT_ISIZE(x)                ((x) & 0xfff)           /* I$ info */
 #define CPU_CT_DSIZE(x)                (((x) >> 12) & 0xfff)   /* D$ info */

Reply via email to