This patch adds the code to copy itself from bootrom location to a
different location (TEXT_BASE) to the Octeon platform. Its used in
this case to copy the complete U-Boot image into L2 cache, which
greatly improves the bootup time - especially in regard to the
very long and complex DDR4 init code.

The Kconfig symbol CONFIG_MIPS_MACH_EARLY_INIT is enabled with this
patch for Octeon.

Signed-off-by: Stefan Roese <s...@denx.de>

---

Changes in v2:
- Change mips_mach_early_init() as suggested by Daniel to make it
  easier to understand and smaller
- Drop CONFIG_BOARD_SIZE_LIMIT

 arch/mips/Kconfig                     |  1 +
 arch/mips/mach-octeon/lowlevel_init.S | 50 +++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 327fd4848a..bcf6f26457 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -114,6 +114,7 @@ config ARCH_OCTEON
        select DM
        select DM_SERIAL
        select MIPS_L2_CACHE
+       select MIPS_MACH_EARLY_INIT
        select MIPS_TUNE_OCTEON3
        select ROM_EXCEPTION_VECTORS
        select SUPPORTS_BIG_ENDIAN
diff --git a/arch/mips/mach-octeon/lowlevel_init.S 
b/arch/mips/mach-octeon/lowlevel_init.S
index d9aab38cde..fa87cb4e34 100644
--- a/arch/mips/mach-octeon/lowlevel_init.S
+++ b/arch/mips/mach-octeon/lowlevel_init.S
@@ -17,3 +17,53 @@ LEAF(lowlevel_init)
        jr      ra
         nop
        END(lowlevel_init)
+
+LEAF(mips_mach_early_init)
+
+       move    s0, ra
+
+       bal     __dummy
+        nop
+
+__dummy:
+       /* Get the actual address that we are running at */
+       PTR_LA  a7, __dummy
+       dsubu   t3, ra, a7      /* t3 now has reloc offset */
+
+       PTR_LA  t1, _start
+       daddu   t0, t1, t3      /* t0 now has actual address of _start */
+
+       /* Calculate end address of copy loop */
+       PTR_LA  t2, _end
+       daddiu  t2, t2, 0x4000  /* Increase size to include appended DTB */
+       daddiu  t2, t2, 127
+       ins     t2, zero, 0, 7  /* Round up to cache line for memcpy */
+
+       /* Copy ourself to the L2 cache from flash, 32 bytes at a time */
+1:
+       ld      a0, 0(t0)
+       ld      a1, 8(t0)
+       ld      a2, 16(t0)
+       ld      a3, 24(t0)
+       sd      a0, 0(t1)
+       sd      a1, 8(t1)
+       sd      a2, 16(t1)
+       sd      a3, 24(t1)
+       addiu   t0, 32
+       addiu   t1, 32
+       bne     t1, t2, 1b
+        nop
+
+       sync
+
+       /*
+        * Return to start.S now running from TEXT_BASE, which points
+        * to DRAM address space, which effectively is L2 cache now.
+        * This speeds up the init process extremely, especially the
+        * DDR init code.
+        */
+       dsubu   s0, s0, t3      /* Fixup return address with reloc offset */
+       jr.hb   s0              /* Jump back with hazard barrier */
+        nop
+
+       END(mips_mach_early_init)
-- 
2.27.0

Reply via email to