On 14.05.20 01:43, Daniel Schwierzeck wrote:


Am 02.05.20 um 10:59 schrieb Stefan Roese:
From: Aaron Williams <awilli...@marvell.com>

This patch adds very basic support for the Octeon III SoCs. Only
CFI parallel NOR flash and UART is supported for now.

Please note that the basic Octeon port does not include the DDR3/4
initialization yet. This will be added in some follow-up patches
later. To still use U-Boot on with this port, the L2 cache (4MiB on
Octeon III CN73xx) is used as RAM. This way, U-Boot can boot to the
prompt on such boards.

Signed-off-by: Aaron Williams <awilli...@marvell.com>
Signed-off-by: Stefan Roese <s...@denx.de>
---

  MAINTAINERS                                  |    6 +
  arch/Kconfig                                 |    1 +
  arch/mips/Kconfig                            |   49 +-
  arch/mips/Makefile                           |    7 +
  arch/mips/cpu/Makefile                       |    4 +-
  arch/mips/include/asm/arch-octeon/cavm-reg.h |   42 +
  arch/mips/include/asm/arch-octeon/clock.h    |   24 +
  arch/mips/mach-octeon/Kconfig                |   92 ++
  arch/mips/mach-octeon/Makefile               |   10 +
  arch/mips/mach-octeon/clock.c                |   22 +
  arch/mips/mach-octeon/cpu.c                  |   55 +
  arch/mips/mach-octeon/dram.c                 |   27 +
  arch/mips/mach-octeon/include/ioremap.h      |   30 +
  arch/mips/mach-octeon/start.S                | 1241 ++++++++++++++++++
  14 files changed, 1608 insertions(+), 2 deletions(-)
  create mode 100644 arch/mips/include/asm/arch-octeon/cavm-reg.h
  create mode 100644 arch/mips/include/asm/arch-octeon/clock.h
  create mode 100644 arch/mips/mach-octeon/Kconfig
  create mode 100644 arch/mips/mach-octeon/Makefile
  create mode 100644 arch/mips/mach-octeon/clock.c
  create mode 100644 arch/mips/mach-octeon/cpu.c
  create mode 100644 arch/mips/mach-octeon/dram.c
  create mode 100644 arch/mips/mach-octeon/include/ioremap.h
  create mode 100644 arch/mips/mach-octeon/start.S


I couldn't completely understand the start.S. There is too much stuff in
it for an initial merge. But I don't see a hard reason against using the
generic start.S. So the first patch series should only implement the
bare minimum needed to boot from flash, init the boot CPU core, maybe
suspend all other cores and relocate to L2 cache.

I already worked on using the common start.S with minimal custom
additions for Octeon. This will be included in v2 of the base Octeon
patchset.

I know the current start.S is not really suited yet but I'm working on a
refactoring to add some more hooks which a SoC/CPU can implement. Once
we have your initial patch series and the refactoring in mainline, it
should be possible to gradually add more Octeon stuff like memory init.

Basic idea for refactoring is something like this:

reset:
     - mips_cpu_early_init()       # custom early init, fix errata
     - init CP0 registers, Watch registers
     - mips_cache_disable()        # set K0 CCA to uncached
     - mips_cpu_core_init()        # per CPU core init
                                   # -> generic code issues wait instr.
                                   # -> custom code can do custom init
                                   #    or custom boot protocols
     - mips_cm_map()               # init CM if available
     - mips_cache_init()           # init caches, set K0 CCA to non-coh.
     - mips_sram_init()            # init SRAM, Scratch RAM if avail
     - setup initial stack and global_data
     - debug_uart_init()
     - mips_mem_init()             # init external memory, C env avail.
     - init malloc_f
     - board_init_f()

Thanks Daniel, this sounds like a very good approach. I'll send v2 later
today (as its already finished). We can then work on how to integrate
it, either by using the currently available functions like
mips_sram_init(), or by extending start.S (and the Octeon custom code)
with some other, newly introduced functions.

Thanks,
Stefan

+
+#endif /* __ASM_MACH_OCTEON_IOREMAP_H */
diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S
new file mode 100644
index 0000000000..acb967201a
--- /dev/null
+++ b/arch/mips/mach-octeon/start.S
@@ -0,0 +1,1241 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ *  Startup Code for OCTEON 64-bit CPU-core
+ *
+ *  Copyright (c) 2003 Wolfgang Denk <w...@denx.de>
+ *  Copyright 2004, 2005, 2010 - 2015 Cavium Inc..
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/asm.h>
+
+#define BOOT_VECTOR_NUM_WORDS          8
+
+#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET      0x70
+#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET     0x78
+
+#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW        0xdb00110ad358eacd
+#define OCTEON_BOOT_MOVEABLE_MAGIC1    OCTEON_BOOT_MOVEABLE_MAGIC1_RAW
+
+#define OCTEON_CIU_SOFT_RST            0x8001070000000740
+
+#define        OCTEON_L2C_WPAR_PP0             0x8001180080840000
+#define OCTEON_MIO_BOOT_BASE           0x8001180000000000
+#define OCTEON_MIO_BOOT_REG_CFG0_OFF   0x0000
+#define OCTEON_MIO_BOOT_LOC_CFG0_OFF   0x0080
+#define OCTEON_MIO_BOOT_LOC_ADR_OFF    0x0090
+#define OCTEON_MIO_BOOT_LOC_DAT_OFF    0x0098
+#define        OCTEON_MIO_RST_BOOT             0x8001180000001600
+#define OCTEON_MIO_BOOT_REG_CFG0       0x8001180000000000
+#define        OCTEON_MIO_BOOT_REG_TIM0        0x8001180000000040
+#define OCTEON_MIO_BOOT_LOC_CFG0       0x8001180000000080
+#define OCTEON_MIO_BOOT_LOC_ADR                0x8001180000000090
+#define OCTEON_MIO_BOOT_LOC_DAT                0x8001180000000098
+#define        OCTEON_MIO_FUSE_DAT3            0x8001180000001418
+#define OCTEON_L2D_FUS3                        0x80011800800007B8
+#define        OCTEON_LMC0_DDR_PLL_CTL         0x8001180088000258
+
+#define OCTEON_RST                     0x8001180006000000
+#define OCTEON_RST_BOOT_OFFSET         0x1600
+#define OCTEON_RST_SOFT_RST_OFFSET     0x1680
+#define OCTEON_RST_COLD_DATAX_OFFSET(X)        (0x17C0 + (X) * 8)
+#define OCTEON_RST_BOOT                        0x8001180006001600
+#define OCTEON_RST_SOFT_RST            0x8001180006001680
+#define OCTEON_RST_COLD_DATAX(X)       (0x80011800060017C0 + (X) * 8)
+
+#define OCTEON_OCX_COM_NODE            0x8001180011000000
+#define OCTEON_L2C_OCI_CTL             0x8001180080800020
+#define OCTEON_L2C_TAD_CTL             0x8001180080800018
+#define OCTEON_L2C_CTL                 0x8001180080800000
+
+#define OCTEON_DBG_DATA                        0x80011F00000001E8
+#define OCTEON_PCI_READ_CMD_E          0x80011F0000001188
+#define OCTEON_NPEI_DBG_DATA           0x80011F0000008510
+#define OCTEON_CIU_WDOG(X)             (0x8001070000000500 + (X) * 8)
+#define OCTEON_CIU_PP_POKE(X)          (0x8001070000000580 + (X) * 8)
+#define OCTEON_CIU3_WDOG(X)            (0x8001010000020000 + (X) * 8)
+#define OCTEON_CIU3_PP_POKE(X)         (0x8001010000030000 + (X) * 8)
+#define OCTEON_OCX_COM_LINKX_CTL(X)    (0x8001180011000020 + (X) * 8)
+#define OCTEON_SLI_CTL_STATUS          0x80011F0000028570
+#define OCTEON_GSERX_SCRATCH(X)                (0x8001180090000020 + (X) * 
0x1000000)
+
+/** PRID for CN56XX */
+#define OCTEON_PRID_CN56XX             0x04
+/** PRID for CN52XX */
+#define OCTEON_PRID_CN52XX             0x07
+/** PRID for CN63XX */
+#define OCTEON_PRID_CN63XX             0x90
+/** PRID for CN68XX */
+#define OCTEON_PRID_CN68XX             0x91
+/** PRID for CN66XX */
+#define OCTEON_PRID_CN66XX             0x92
+/** PRID for CN61XX */
+#define OCTEON_PRID_CN61XX             0x93
+/** PRID for CNF71XX */
+#define OCTEON_PRID_CNF71XX            0x94
+/** PRID for CN78XX */
+#define OCTEON_PRID_CN78XX             0x95
+/** PRID for CN70XX */
+#define OCTEON_PRID_CN70XX             0x96
+/** PRID for CN73XX */
+#define OCTEON_PRID_CN73XX             0x97
+/** PRID for CNF75XX */
+#define OCTEON_PRID_CNF75XX            0x98
+
+/* func argument is used to create a  mark, must be unique */
+#define GETOFFSET(reg, func)   \
+       .balign 8;              \
+       bal     func ##_mark;   \
+       nop;                    \
+       .dword  .;              \
+func ##_mark:                  \
+       ld      reg, 0(ra);     \
+       dsubu   reg, ra, reg;
+
+#define JAL(func)              \
+       .balign 8;              \
+       bal     func ##_mark;   \
+        nop;                   \
+       .dword .;               \
+func ##_mark:                  \
+       ld      t8, 0(ra);      \
+       dsubu   t8, ra, t8;     \
+       dla     t9, func;       \
+       daddu   t9, t9, t8;     \
+       jalr    t9;             \
+        nop;
+
+       .set    arch=octeon3
+       .set    noreorder
+
+       .macro uhi_mips_exception
+       move    k0, t9          # preserve t9 in k0
+       move    k1, a0          # preserve a0 in k1
+       li      t9, 15          # UHI exception operation
+       li      a0, 0           # Use hard register context
+       sdbbp   1               # Invoke UHI operation
+       .endm
+
+       .macro setup_stack_gd
+       li      t0, -16
+       PTR_LI  t1, big_stack_start
+       and     sp, t1, t0              # force 16 byte alignment
+       PTR_SUBU \
+               sp, sp, GD_SIZE         # reserve space for gd
+       and     sp, sp, t0              # force 16 byte alignment
+       move    k0, sp                  # save gd pointer
+#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
+    !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+       li      t2, CONFIG_VAL(SYS_MALLOC_F_LEN)
+       PTR_SUBU \
+               sp, sp, t2              # reserve space for early malloc
+       and     sp, sp, t0              # force 16 byte alignment
+#endif
+       move    fp, sp
+
+       /* Clear gd */
+       move    t0, k0
+1:
+       PTR_S   zero, 0(t0)
+       PTR_ADDIU t0, PTRSIZE
+       blt     t0, t1, 1b
+        nop
+
+#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
+    !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+       PTR_S   sp, GD_MALLOC_BASE(k0)  # gd->malloc_base offset
+#endif
+       .endm
+
+/* Saved register usage:
+ * s0: not used
+ * s1: not used
+ * s2: Address U-Boot loaded into in L2 cache
+ * s3: Start address
+ * s4: flags
+ *             1:      booting from RAM
+ *             2:      executing out of cache
+ *             4:      booting from flash
+ * s5: u-boot size (data end - _start)
+ * s6: offset in flash.
+ * s7: _start physical address
+ * s8:
+ */
+
+ENTRY(_start)
+       /* U-Boot entry point */
+       b       reset
+
+       /* The above jump instruction/nop are considered part of the
+        * bootloader_header_t structure but are not changed when the header is
+        * updated.
+        */
+
+       /* Leave room for bootloader_header_t header at start of binary.  This
+        * header is used to identify the board the bootloader is for, what
+        * address it is linked at, failsafe/normal, etc.  It also contains a
+        * CRC of the entire image.
+        */
+
+#if defined(CONFIG_ROM_EXCEPTION_VECTORS)
+       /*
+        * Exception vector entry points. When running from ROM, an exception
+        * cannot be handled. Halt execution and transfer control to debugger,
+        * if one is attached.
+        */
+       .org 0x200
+       /* TLB refill, 32 bit task */
+       uhi_mips_exception
+
+       .org 0x280
+       /* XTLB refill, 64 bit task */
+       uhi_mips_exception
+
+       .org 0x300
+       /* Cache error exception */
+       uhi_mips_exception
+
+       .org 0x380
+       /* General exception */
+       uhi_mips_exception
+
+       .org 0x400
+       /* Catch interrupt exceptions */
+       uhi_mips_exception
+
+       .org 0x480
+       /* EJTAG debug exception */
+1:     b       1b
+        nop
+
+       .org 0x500
+#endif
+
+/* Reserve extra space so that when we use the boot bus local memory
+ * segment to remap the debug exception vector we don't overwrite
+ * anything useful
+ */
+
+/* Basic exception handler (dump registers) in all ASM.         When using the 
TLB for
+ * mapping u-boot C code, we can't branch to that C code for exception handling
+ * (TLB is disabled for some exceptions.
+ */
+
+/* RESET/start here */
+       .balign 8
+reset:
+       nop
+       synci   0(zero)
+       mfc0    k0, CP0_STATUS
+       ori     k0, 0x00E0              /* enable 64 bit mode for CSR access */
+       mtc0    k0, CP0_STATUS
+
+       /* Save the address we're booting from, strip off low bits */
+       bal     1f
+        nop
+1:
+       move    s3, ra
+       dins    s3, zero, 0, 12
+
+       /* Disable boot bus moveable regions */
+       PTR_LI  k0, OCTEON_MIO_BOOT_LOC_CFG0
+       sd      zero, 0(k0)
+       sd      zero, 8(k0)
+
+       /* Disable the watchdog timer
+        * First we check if we're running on CN78XX, CN73XX or CNF75XX to see
+        * if we use CIU3 or CIU.
+        */
+       mfc0    t0, CP0_PRID
+       ext     t0, t0, 8, 8
+       /* Assume CIU */
+       PTR_LI  t1, OCTEON_CIU_WDOG(0)
+       PTR_LI  t2, OCTEON_CIU_PP_POKE(0)
+       blt     t0, OCTEON_PRID_CN78XX, wd_use_ciu
+        nop
+       beq     t0, OCTEON_PRID_CN70XX, wd_use_ciu
+        nop
+       /* Use CIU3 */
+       PTR_LI  t1, OCTEON_CIU3_WDOG(0)
+       PTR_LI  t2, OCTEON_CIU3_PP_POKE(0)
+wd_use_ciu:
+       sd      zero, 0(t2)             /* Pet the dog */
+       sd      zero, 0(t1)             /* Disable watchdog timer */
+
+       /* Errata: CN76XX has a node ID of 3. change it to zero here.
+        * This needs to be done before we relocate to L2 as addresses change
+        * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID],
+        * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE].
+        */
+       mfc0    a4, CP0_PRID
+       /* Check for 78xx pass 1.x processor ID */
+       andi    a4, 0xffff
+       blt     a4, (OCTEON_PRID_CN78XX << 8), 1f
+        nop
+
+       /* Zero out alternate package for now */
+       dins    a4, zero, 6, 1
+       bge     a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f
+        nop
+
+       /* 78xx or 76xx here, first check for bug #27141 */
+       PTR_LI  a5, OCTEON_SLI_CTL_STATUS
+       ld      a6, 0(a5)
+       andi    a7, a4, 0xff
+       andi    a6, a6, 0xff
+
+       beq     a6, a7, not_bug27141
+        nop
+
+       /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */
+       /* We just hit bug #27141.  Need to reset the chip and try again */
+
+       PTR_LI  a4, OCTEON_RST_SOFT_RST
+       ori     a5, zero, 0x1   /* set the reset bit */
+
+reset_78xx_27141:
+       sync
+       synci   0(zero)
+       cache   9, 0(zero)
+       sd      a5, 0(a4)
+       wait
+       b       reset_78xx_27141
+        nop
+
+not_bug27141:
+       /* 76XX pass 1.x has the node number set to 3 */
+       mfc0    a4, CP0_EBASE
+       ext     a4, a4, 0, 10
+       bne     a4, 0x180, 1f   /* Branch if not node 3 core 0 */
+        nop
+
+       /* Clear OCX_COM_NODE[ID] */
+       PTR_LI  a5, OCTEON_OCX_COM_NODE
+       ld      a4, 0(a5)
+       dins    a4, zero, 0, 2
+       sd      a4, 0(a5)
+       ld      zero, 0(a5)
+
+       /* Clear L2C_OCI_CTL[GKSEGNODE] */
+       PTR_LI  a5, OCTEON_L2C_OCI_CTL
+       ld      a4, 0(a5)
+       dins    a4, zero, 4, 2
+       sd      a4, 0(a5)
+       ld      zero, 0(a5)
+
+       /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */
+       dmfc0   a4, CP0_CVMMEMCTL2
+       dins    a4, zero, 12, 2
+       dmtc0   a4, CP0_CVMMEMCTL2
+
+       /* Put the flash address in the start of the EBASE register to
+        * enable our exception handler but only for core 0.
+        */
+       mfc0    a4, CP0_EBASE
+       dext    a4, a4, 0, 10
+       bnez    a4, no_flash
+       /* OK in delay slot */
+       dext    a6, a6, 0, 16           /* Get the base address in flash */
+       sll     a6, a6, 16
+       mtc0    a6, CP0_EBASE   /* Enable exceptions */
+
+no_flash:
+       /* Zero out various registers */
+       mtc0    zero, CP0_DEPC
+       mtc0    zero, CP0_EPC
+       mtc0    zero, CP0_CAUSE
+       mfc0    a4, CP0_PRID
+       ext     a4, a4, 8, 8
+       mtc0    zero, CP0_DESAVE
+
+       /* The following are only available on Octeon 2 or later */
+       mtc0    zero, CP0_KSCRATCH1
+       mtc0    zero, CP0_KSCRATCH2
+       mtc0    zero, CP0_KSCRATCH3
+       mtc0    zero, CP0_USERLOCAL
+
+       /* Turn off ROMEN bit to disable ROM */
+       PTR_LI  a1, OCTEON_MIO_RST_BOOT
+       /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT.
+        * The difference is bits 24-26 are 6 instead of 0 for the address.
+        */
+       /* For Octeon 2 and CN70XX we can ignore the watchdog */
+       blt     a4, OCTEON_PRID_CN78XX, watchdog_ok
+        nop
+
+       PTR_LI  a1, OCTEON_RST_BOOT
+
+       beq     a4, OCTEON_PRID_CN70XX, watchdog_ok
+        nop
+
+       ld      a2, 0(a1)
+       /* There is a bug where some registers don't get properly reset when
+        * the watchdog timer causes a reset.  In this case we need to force
+        * a reset.
+        */
+       bbit0   a2, 11, watchdog_ok     /* Skip if watchdog not hit */
+        dins   a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */
+       /* Clear bit indicating reset due to watchdog */
+       ori     a2, 1 << 11
+       sd      a2, 0(a1)
+
+       /* Disable watchdog */
+       PTR_LI  a1, OCTEON_CIU3_PP_POKE(0)
+       sd      zero, 0(a1)
+       PTR_LI  a1, OCTEON_CIU3_WDOG(0)
+       sd      zero, 0(a1)
+
+       /* Record this in the GSER0_SCRATCH register in bit 11 */
+       PTR_LI  a1, OCTEON_GSERX_SCRATCH(0)
+       ld      a2, 0(a1)
+       ori     a2, 1 << 11
+       sd      a2, 0(a1)
+
+       PTR_LI  a1, OCTEON_RST_SOFT_RST
+       li      a2, 1
+       sd      a2, 0(a1)
+       wait
+
+       /* We should never get here */
+
+watchdog_ok:
+       ld      a2, 0(a1)
+       /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */
+       dins    a2, zero, 2, 18
+       dins    a2, zero, 60, 1 /* Clear ROMEN bit */
+       sd      a2, 0(a1)
+
+       /* Start of Octeon setup */
+
+       /* Check what core we are - if core 0, branch to init tlb
+        * loop in flash.  Otherwise, look up address of init tlb
+        * loop that was saved in the boot vector block.
+        */
+       mfc0    a0, CP0_EBASE
+       andi    a0, EBASE_CPUNUM                /* get core */
+       beqz    a0, InitTLBStart_local
+        nop
+
+       break
+       /* We should never get here - non-zero cores now go directly to
+        * tlb init from the boot stub in movable region.
+        */
+
+       .globl InitTLBStart
+InitTLBStart:
+InitTLBStart_local:
+       /* If we don't have working memory yet configure a bunch of
+        * scratch memory, and set the stack pointer to the top
+        * of it.  This allows us to go to C code without having
+        * memory set up
+        *
+        * Warning: do not change SCRATCH_STACK_LINES as this can impact the
+        * transition from start.S to crti.asm. crti requires 590 bytes of
+        * stack space.
+        */
+       cache   1,0(zero)       /* Clear Dcache so cvmseg works right */
+#if CONFIG_OCTEON_BIG_STACK_SIZE
+       rdhwr   v0, $0
+       bnez    v0, 1f
+        nop
+       PTR_LA  sp, big_stack_start - 16
+       b       stack_clear_done
+        nop
+1:
+#endif
+#define SCRATCH_STACK_LINES 0x36   /* MAX is 0x36 */
+       dmfc0   v0, CP0_CVMMEMCTL
+       dins    v0, zero, 0, 9
+       /* setup SCRATCH_STACK_LINES scratch lines of scratch */
+       ori     v0, 0x100 | SCRATCH_STACK_LINES
+       dmtc0   v0, CP0_CVMMEMCTL
+       /* set stack to top of scratch memory */
+       li      sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128)
+       /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/
+       li      t0, 0xffffffffffff8000
+clear_scratch:
+       sd      zero, 0(t0)
+       addiu   t0, 8
+       bne     t0, sp, clear_scratch
+        nop
+
+       /* This code run on all cores - core 0 from flash,
+        * the rest from DRAM.  When booting from PCI, non-zero cores
+        * come directly here from the boot vector - no earlier code in this
+        * file is executed.
+        */
+
+       /* Some generic initialization is done here as well, as we need this
+        * done on all cores even when booting from PCI
+        */
+stack_clear_done:
+       /* Clear watch registers. */
+       mtc0    zero, CP0_WATCHLO
+       mtc0    zero, CP0_WATCHHI
+
+       /* STATUS register */
+       mfc0    k0, CP0_STATUS
+       li      k1, ~ST0_IE
+       and     k0, k1
+       mtc0    k0, CP0_STATUS
+
+       /* CAUSE register */
+       mtc0    zero, CP0_CAUSE
+
+       /* Init Timer */
+       dmtc0   zero, CP0_COUNT
+       dmtc0   zero, CP0_COMPARE
+
+
+       mfc0    a5, CP0_STATUS
+       li      v0, 0xE0                /* enable 64 bit mode for CSR access */
+       or      v0, v0, a5
+       mtc0    v0, CP0_STATUS
+
+
+       dli     v0, 1 << 29  /* Enable large physical address support in TLB */
+       mtc0    v0, CP0_PAGEGRAIN
+
+InitTLB:
+       dmtc0   zero, CP0_ENTRYLO0
+       dmtc0   zero, CP0_ENTRYLO1
+       mtc0    zero, CP0_PAGEMASK
+       dmtc0   zero, CP0_CONTEXT
+       /* Use an offset into kseg0 so we won't conflict with Mips1 legacy
+        * TLB clearing
+        */
+       PTR_LI  v0, 0xFFFFFFFF90000000
+       mfc0    a0, CP0_CONFIG1
+       srl     a0, a0, 25
+       /* Check if config4 reg present */
+       mfc0    a1, CP0_CONFIG3
+       bbit0   a1, 31, 2f
+        and    a0, a0, 0x3F            /* a0 now has the max mmu entry index */
+       mfc0    a1, CP0_CONFIG4
+       bbit0   a1, 14, 2f              /* check config4[MMUExtDef] */
+        nop
+       /* append config4[MMUSizeExt] to most significant bit of
+        * config1[MMUSize-1]
+        */
+       ins     a0, a1, 6, 8
+       and     a0, a0, 0x3fff  /* a0 now includes max entries for cn6xxx */
+2:
+       dmtc0   zero, CP0_XCONTEXT
+       mtc0    zero, CP0_WIRED
+
+InitTLBloop:
+       dmtc0   v0, CP0_ENTRYHI
+       tlbp
+       mfc0    v1, CP0_INDEX
+       daddiu  v0, v0, 1<<13
+       bgez    v1, InitTLBloop
+
+       mtc0    a0, CP0_INDEX
+       tlbwi
+       bnez    a0, InitTLBloop
+        daddiu a0, -1
+
+       mthi    zero
+       mtlo    zero
+
+       /* Set up status register */
+       mfc0    v0, CP0_STATUS
+       /* Enable COP0 and COP2 access */
+       li      a4, (1 << 28) | (1 << 30)
+       or      v0, a4
+
+       /* Must leave BEV set here, as DRAM is not configured for core 0.
+        * Also, BEV must be 1 later on when the exception base address is set.
+        */
+
+       /* Mask all interrupts */
+       ins     v0, zero, 0, 16
+       /* Clear NMI (used to start cores other than core 0) */
+       ori     v0, 0xE4                /* enable 64 bit, disable interrupts */
+       mtc0    v0, CP0_STATUS
+
+       dli     v0,0xE000000F           /* enable all readhw locations */
+       mtc0    v0, CP0_HWRENA
+
+       dmfc0   v0, CP0_CVMCTL
+       ori     v0, 1<<14 /* enable fixup of unaligned mem access */
+       dmtc0   v0, CP0_CVMCTL
+
+       /* Setup scratch memory.  This is also done in
+        * cvmx_user_app_init, and this code will be removed
+        * from the bootloader in the near future.
+        */
+
+       /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */
+       mfc0    a4, CP0_PRID
+       ext     a4, a4, 8, 8
+       blt     a4, OCTEON_PRID_CN73XX, 72f
+       nop
+       PTR_LI  v0, OCTEON_L2C_TAD_CTL
+       ld      t1, 0(v0)
+       dins    t1, zero, 0, 4
+       sd      t1, 0(v0)
+       ld      zero, 0(v0)
+
+72:
+
+       /* clear these to avoid immediate interrupt in noperf mode */
+       dmtc0   zero, CP0_COMPARE       /* clear timer interrupt */
+       dmtc0   zero, CP0_COUNT         /* clear timer interrupt */
+       dmtc0   zero, CP0_PERF_CNT0     /* clear perfCnt0 */
+       dmtc0   zero, CP0_PERF_CNT1     /* clear perfCnt1 */
+       dmtc0   zero, CP0_PERF_CNT2
+       dmtc0   zero, CP0_PERF_CNT3
+
+       /* If we're running on a node other than 0 then we need to set KSEGNODE
+        * to 0.  The nice thing with this code is that it also autodetects if
+        * we're running on a processor that supports CVMMEMCTL2 or not since
+        * only processors that have this will have a non-zero node ID.  Because
+        * of this there's no need to check if we're running on a 78XX.
+        */
+       mfc0    t1, CP0_EBASE
+       dext    t1, t1, 7, 3            /* Extract node number */
+       beqz    t1, is_node0            /* If non-zero then we're not node 0 */
+        nop
+       dmfc0   t1, CP0_CVMMEMCTL2
+       dins    t1, zero, 12, 4
+       dmtc0   t1, CP0_CVMMEMCTL2
+is_node0:
+
+       /* Set up TLB mappings for u-boot code in flash. */
+
+       /* Use a bal to get the current PC into ra.  Since this bal is to
+        * the address immediately following the delay slot, the ra is
+        * the address of the label.  We then use this to get the actual
+        * address that we are executing from.
+        */
+       bal     __dummy
+        nop
+
+__dummy:
+       /* Get the actual address that we are running at */
+       PTR_LA  a6, _start              /* Linked address of _start */
+       PTR_LA  a7, __dummy
+       dsubu   t0, a7, a6              /* offset of __dummy label from _start*/
+       dsubu   a7, ra, t0              /* a7 now has actual address of _start*/
+
+       /* Save actual _start address in s7.  This is where we
+        * are executing from, as opposed to where the code is
+        * linked.
+        */
+       move    s7, a7
+       move    s4, zero
+
+       /* s7 has actual address of _start.  If this is
+        * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF.
+        * If it is on the boot bus, use 0xBFC00000 as the physical address
+        * for the TLB mapping, as we will be adjusting the boot bus
+        * to make this adjustment.
+        * If we are running from DRAM (remote-boot), then we want to use the
+        * real address in DRAM.
+        */
+
+       /* Check to see if we are running from flash - we expect that to
+        * be 0xffffffffb0000000-0xffffffffbfffffff
+        * (0x10000000-0x1fffffff, unmapped/uncached)
+        */
+       dli     t2, 0xffffffffb0000000
+       dsubu   t2, s7
+       slt     s4, s7, t2
+       bltz    t2, uboot_in_flash
+        nop
+
+       /* If we're not core 0 then we don't care about cache */
+       mfc0    t2, CP0_EBASE
+       andi    t2, EBASE_CPUNUM
+       bnez    t2, uboot_in_ram
+        nop
+
+       /* Find out if we're OCTEON I or OCTEON + which don't support running
+        * out of cache.
+        */
+       mfc0    t2, CP0_PRID
+       ext     t2, t2, 8, 8
+       li      s4, 1
+       blt     t2, 0x90, uboot_in_ram
+        nop
+
+       /* U-Boot can be executing either in RAM or L2 cache.  Now we need to
+        * check if DRAM is initialized.  The way we do that is to look at
+        * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7)
+        */
+       PTR_LI  t2, OCTEON_LMC0_DDR_PLL_CTL
+       ld      t2, 0(t2)
+       bbit1   t2, 7, uboot_in_ram
+        nop
+
+       /* We must be executing out of cache */
+       b       uboot_in_ram
+        li     s4, 2
+
+uboot_in_flash:
+       /* Set s4 to 4 to indicate we're running in FLASH */
+       li      s4, 4
+
+#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING)
+       /* By default, L2C index aliasing is enabled.  In some cases it may
+        * need to be disabled.  The L2C index aliasing can only be disabled
+        * if U-Boot is running out of L2 cache and the L2 cache has not been
+        * used to store anything.
+        */
+       PTR_LI  t1, OCTEON_L2C_CTL
+       ld      t2, 0(t1)
+       ori     t2, 1
+       sd      t2, 0(t1)
+#endif
+
+       /* Use BFC00000 as physical address for TLB mappings when booting
+        * from flash, as we will adjust the boot bus mappings to make this
+        * mapping correct.
+        */
+       dli     a7, 0xFFFFFFFFBFC00000
+       dsubu   s6, s7, a7  /* Save flash offset in s6 */
+
+#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2)
+       /* For OCTEON II we check to see if the L2 cache is big enough to hold
+        * U-Boot.  If it is big enough then we copy ourself from flash to the
+        * L2 cache in order to speed up execution.
+        */
+
+       /* Check for OCTEON 2 */
+       mfc0    t1, CP0_PRID
+       ext     t1, t1, 8, 8
+       /* Get number of L2 cache sets */
+       beq     t1, OCTEON_PRID_CNF71XX, got_l2_sets    /* CNF71XX */
+        li     t2, 1 << 9
+       beq     t1, OCTEON_PRID_CN78XX, got_l2_sets     /* CN78XX */
+        li     t2, 1 << 13
+       beq     t1, OCTEON_PRID_CN70XX, got_l2_sets     /* CN70XX */
+        li     t2, 1 << 10
+       beq     t1, OCTEON_PRID_CN73XX, got_l2_sets     /* CN73XX */
+        li     t2, 1 << 11
+       beq     t1, OCTEON_PRID_CNF75XX, got_l2_sets    /* CNF75XX */
+        li     t2, 1 << 11
+       b       l2_cache_too_small      /* Unknown OCTEON model */
+        nop
+
+got_l2_sets:
+       /* Get number of associations */
+       PTR_LI  t0, OCTEON_MIO_FUSE_DAT3
+       ld      t0, 0(t0)
+       dext    t0, t0, 32, 3
+
+       beq     t1, OCTEON_PRID_CN70XX, process_70xx_l2sets
+        nop
+       /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */
+       beqz    t0, got_l2_ways
+        li     t3, 16
+       beq     t0, 1, got_l2_ways
+        li     t3, 12
+       beq     t0, 2, got_l2_ways
+        li     t3, 8
+       beq     t0, 3, got_l2_ways
+        li     t3, 4
+       b       l2_cache_too_small
+        nop
+
+process_70xx_l2sets:
+       /* For 70XX, the number of ways is defined as:
+        * 0 - full cache (4-way) 512K
+        * 1 - 3/4 ways (3-way) 384K
+        * 2 - 1/2 ways (2-way) 256K
+        * 3 - 1/4 ways (1-way) 128K
+        * 4-7 illegal (aliased to 0-3)
+        */
+       andi    t0, 3
+       beqz    t0, got_l2_ways
+        li     t3, 4
+       beq     t0, 1, got_l2_ways
+        li     t3, 3
+       beq     t0, 2, got_l2_ways
+        li     t3, 2
+       li      t3, 1
+
+got_l2_ways:
+       dmul    a1, t2, t3              /* Calculate cache size */
+       dsll    a1, 7                   /* Ways * Sets * cache line sz (128) */
+       daddiu  a1, a1, -128            /* Adjust cache size for copy code */
+
+       /* Calculate size of U-Boot image */
+       /*
+        * "uboot_end - _start" is not correct, as the image also
+        * includes the DTB appended to the end (OF_EMBED is deprecated).
+        * Lets use a defined max for now here.
+        */
+       PTR_LI  s5, CONFIG_BOARD_SIZE_LIMIT
+
+       daddu   t2, s5, s7      /* t2 = end address */
+       daddiu  t2, t2, 127
+       ins     t2, zero, 0, 7  /* Round up to cache line for memcpy */
+
+       slt     t1, a1, s5      /* See if we're bigger than the L2 cache */
+       bnez    t1, l2_cache_too_small
+        nop
+       /* Address we plan to load at in the L2 cache */
+       PTR_LI  t9, CONFIG_OCTEON_L2_UBOOT_ADDR
+# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE
+       /* Enable all ways for PP0.  Authentik ROM may have disabled these */
+       PTR_LI  a1, OCTEON_L2C_WPAR_PP0
+       sd      zero, 0(a1)
+
+       /* Address to place our memcpy code */
+       PTR_LI  a0, CONFIG_OCTEON_L2_MEMCPY_ADDR
+       /* The following code writes a simple memcpy routine into the cache
+        * to copy ourself from flash into the L2 cache.  This makes the
+        * memcpy routine a lot faster since each instruction can potentially
+        * require four read cycles to flash over the boot bus.
+        */
+       /* Zero cache line in the L2 cache */
+       zcb     (a0)
+       synci   0(zero)
+       dli     a1, 0xdd840000dd850008  /* ld a0, 0(t0);  ld a1, 8(t0) */
+       sd      a1, 0(a0)
+       dli     a1, 0xdd860010dd870018  /* ld a2, 16(t0); ld a3, 24(t0) */
+       sd      a1, 8(a0)
+       dli     a1, 0xfda40000fda50008  /* sd a0, 0(t1);  sd a1, 8(t1) */
+       sd      a1, 16(a0)
+       dli     a1, 0xfda60010fda70018  /* sd a2, 16(t1); sd a3, 24(t1) */
+       sd      a1, 24(a0)
+       dli     a1, 0x258c0020158efff6  /* addiu t0, 32; bne t0, t2, -40 */
+       sd      a1, 32(a0)
+       dli     a1, 0x25ad002003e00008  /* addiu t1, 32; jr ra */
+       sd      a1, 40(a0)
+       sd      zero, 48(a0)            /* nop; nop */
+
+       /* Synchronize the caches */
+       sync
+       synci   0(zero)
+
+       move    t0, s7
+       move    t1, t9
+
+       /* Do the memcpy operation in L2 cache to copy ourself from flash
+        * to the L2 cache.
+        */
+       jalr    a0
+        nop
+
+# else
+       /* Copy ourself to the L2 cache from flash, 32 bytes at a time */
+       /* This code is now written to the L2 cache using the code above */
+1:
+       ld      a0, 0(t0)
+       ld      a1, 8(t0)
+       ld      a2, 16(t0)
+       ld      a3, 24(t0)
+       sd      a0, 0(t1)
+       sd      a1, 8(t1)
+       sd      a2, 16(t1)
+       sd      a3, 24(t1)
+       addiu   t0, 32
+       bne     t0, t2, 1b
+       addiu   t1, 32
+# endif        /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */
+
+       /* Adjust the start address of U-Boot and the global pointer */
+       subu    t0, s7, t9      /* t0 = address difference */
+       move    s7, t9          /* Update physical address */
+       move    s2, t9
+       sync
+       synci   0(zero)
+
+       /* Now we branch to the L2 cache.  We first get our PC then adjust it
+        */
+       bal     3f
+        nop
+3:
+       /* Don't add any instructions here! */
+       subu    t9, ra, t0
+       /* Give ourself 16 bytes */
+       addiu   t9, 0x10
+
+       jal     t9              /* Branch to address in L2 cache */
+
+        nop
+       nop
+       /* Add instructions after here */
+
+       move    a7, s7
+
+       b       uboot_in_ram
+        ori    s4, 2           /* Running out of L2 cache */
+
+l2_cache_too_small:    /* We go here if we can't copy ourself to L2 */
+#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */
+
+       /* This code is only executed if booting from flash. */
+       /*  For flash boot (_not_ RAM boot), we do a workaround for
+        * an LLM errata on CN38XX and CN58XX parts.
+        */
+
+uboot_in_ram:
+       /* U-boot address is now in reg a7, and is 4 MByte aligned.
+        * (boot bus addressing has been adjusted to make this happen for flash,
+        * and for DRAM this alignment must be provided by the remote boot
+        * utility.
+        */
+       /* See if we're in KSEG0 range, if so set EBASE register to handle
+        * exceptions.
+        */
+       dli     a1, 0x20000000
+       bge     a7, a1, 1f
+        nop
+       /* Convert our physical address to KSEG0 */
+       PTR_LI  a1, 0xffffffff80000000
+       or      a1, a1, a7
+       mtc0    a1, CP0_EBASE
+1:
+       /* U-boot now starts at 0xBFC00000.  Use a single 4 MByte TLB mapping
+        * to map u-boot.
+        */
+       move    a0, a6          /* Virtual addr in a0 */
+       dins    a0, zero, 0, 16 /* Zero out offset bits */
+       move    a1, a7          /* Physical addr in a1 */
+
+       /* Now we need to remove the MIPS address space bits.  For this we
+        * need to determine if it is a 32 bit compatibility address or not.
+        */
+
+       /* 'lowest' address in compatibility space */
+       PTR_LI  t0, 0xffffffff80000000
+       dsubu   t0, t0, a1
+       bltz    t0, compat_space
+        nop
+
+       /* We have a xkphys address, so strip off top bit */
+       b       addr_fixup_done
+        dins   a1, zero, 63, 1
+
+compat_space:
+       PTR_LI  a2, 0x1fffffff
+       and     a1, a1, a2  /* Mask phy addr to remove address space bits */
+
+addr_fixup_done:
+       /* Currenty the u-boot image size is limited to 4 MBytes.  In order to
+        * support larger images the flash mapping will need to be changed to
+        * be able to access more than that before C code is run.  Until that
+        * is done, we just use a 4 MByte mapping for the secondary cores as
+        * well.
+        */
+       /* page size (only support 4 Meg binary size for now for core 0)
+        * This limitation is due to the fact that the boot vector is
+        * 0xBFC00000 which only makes 4MB available.  Later more flash
+        * address space will be available after U-Boot has been copied to
+        * RAM.  For now assume that it is in flash.
+        */
+       li      a2, 2*1024*1024
+
+       mfc0    a4, CP0_EBASE
+       andi    a4, EBASE_CPUNUM                /* get core */
+       beqz    a4, core_0_tlb
+        nop
+
+       /* Now determine how big a mapping to use for secondary cores,
+        * which need to map all of u-boot + heap in DRAM
+        */
+       /* Here we look at the alignment of the the physical address,
+        * and use the largest page size possible.  In some cases
+        * this can result in an oversize mapping, but for secondary cores
+        * this mapping is very short lived.
+        */
+
+       /* Physical address in a1 */
+       li      a2, 1
+1:
+       sll     a2, 1
+       and     a5, a1, a2
+       beqz    a5, 1b
+        nop
+
+       /* a2 now contains largest page size we can use */
+core_0_tlb:
+       JAL(single_tlb_setup)
+
+       /* Check if we're running from cache */
+       bbit1   s4, 1, uboot_in_cache
+        nop
+
+       /* If we are already running from ram, we don't need to muck
+        * with boot bus mappings.
+        */
+       PTR_LI  t2, 0xffffffffb0000000
+       dsubu   t2, s7
+       /* See if our starting address is lower than the boot bus */
+       bgez    t2, uboot_in_ram2       /* If yes, booting from RAM */
+        nop
+
+uboot_in_cache:
+#if CONFIG_OCTEON_BIG_STACK_SIZE
+       /* The large stack is only for core 0.  For all other cores we need to
+        * use the L1 cache otherwise the other cores will stomp on top of each
+        * other unless even more space is reserved for the stack space for
+        * each core.  With potentially 96 cores this gets excessive.
+        */
+       mfc0    v0, CP0_EBASE
+       andi    a0, EBASE_CPUNUM
+       bnez    a0, no_big_stack
+        nop
+       PTR_LA  sp, big_stack_start
+       daddiu  sp, -16
+
+no_big_stack:
+#endif
+       /* We now have the TLB set up, so we need to remap the boot bus.
+        * This is tricky, as we are running from flash, and will be changing
+        * the addressing of the flash.
+        */
+       /* Enable movable boot bus region 0, at address 0x10000000 */
+       PTR_LI  a4, OCTEON_MIO_BOOT_BASE
+       dli     a5, 0x81000000  /* EN + base address 0x11000000 */
+       sd      a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4)
+
+       /* Copy code to that remaps the boot bus to movable region */
+       sd      zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
+
+       PTR_LA  a6, change_boot_mappings
+       GETOFFSET(a5, change_boot_mappings);
+       daddu   a5, a5, a6
+
+       /* The code is 16 bytes (2 DWORDS) */
+       ld      a7, 0(a5)
+       sd      a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
+       ld      a7, 8(a5)
+       sd      a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
+
+       /* Read from an RML register to ensure that the previous writes have
+        * completed before we branch to the movable region.
+        */
+       ld      zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4)
+
+       /* Compute value for boot bus configuration register */
+       /* Read region 0 config so we can _modify_ the base address field */
+       PTR_LI  a4, OCTEON_MIO_BOOT_REG_CFG0    /* region 0 config */
+       ld      a0, 0(a4)
+       dli     a4, 0xf0000000          /* Mask off bits we want to save */
+       and     a4, a4, a0
+       dli     a0, 0x0fff0000          /* Force size to max */
+       or      a4, a4, a0
+
+       move    a5, s6
+       /* Convert to 64k blocks, as used by boot bus config */
+       srl     a5, 16
+       li      a6, 0x1fc0      /* 'normal' boot bus base config value */
+       subu    a6, a6, a5      /* Subtract offset */
+       /* combine into register value to pass to boot bus routine */
+       or      a0, a4, a6
+
+       /* Branch there */
+       PTR_LA  a1, __mapped_continue_label
+       PTR_LI  a2, OCTEON_MIO_BOOT_REG_CFG0
+       /* If region 0 is not enabled we can skip it */
+       ld      a4, 0(a2)
+       bbit0   a4, 31, __mapped_continue_label
+        nop
+       li      a4, 0x10000000
+       j       a4
+        synci  0(zero)
+
+       /* We never get here, as we go directly to __mapped_continue_label */
+       break
+
+
+uboot_in_ram2:
+
+       /* Now jump to address in TLB mapped memory to continue execution */
+       PTR_LA  a4, __mapped_continue_label
+       synci   0(a4)
+       j       a4
+        nop
+
+__mapped_continue_label:
+       /* Check if we are core 0, if we are not then we need
+        * to vector to code in DRAM to do application setup, and
+        * skip the rest of the bootloader.  Only core 0 runs the bootloader
+        * and sets up the tables that the other cores will use for
+        * configuration.
+        */
+       mfc0    a0, CP0_EBASE
+       andi    a0, EBASE_CPUNUM   /* get core */
+       /* if (__all_cores_are_equal==0 && core==0),
+        * then jump to execute BL on core 0; else 'go to next line'
+        * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID))
+        */
+       lw      t0, __all_cores_are_equal
+       beq     a0, t0, core_0_cont1
+        nop
+
+       /* other cores look up addr from dram */
+        /* DRAM controller already set up by first core */
+        li      a1, (BOOT_VECTOR_NUM_WORDS * 4)
+        mul     a0, a0, a1
+
+        /* Now find out the boot vector base address from the moveable boot
+         * bus region.
+         */
+
+        /* Get the address of the boot bus moveable region */
+        PTR_LI     t8, OCTEON_MIO_BOOT_BASE
+        ld      t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8)
+        /* Make sure it's enabled */
+        bbit0   t9, 31, invalid_boot_vector
+         dext   t9, t9, 3, 24
+        dsll    t9, t9, 7
+        /* Make address XKPHYS */
+       li      t0, 1
+       dins    t9, t0, 63, 1
+
+        ld      t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9)
+        dli     t1, OCTEON_BOOT_MOVEABLE_MAGIC1
+        bne     t0, t1, invalid_boot_vector
+         nop
+
+        /* Load base address of boot vector table */
+        ld      t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9)
+        /* Add offset for core */
+        daddu   a1, t0, a0
+
+       mfc0    v0, CP0_STATUS
+       move    v1, v0
+       ins     v1, zero, 19, 1         /* Clear NMI bit */
+       mtc0    v1, CP0_STATUS
+
+        /* Get app start function address */
+        lw      t9, 8(a1)
+        beqz    t9, invalid_boot_vector
+         nop
+
+        j       t9
+         lw      k0, 12(a1)      /* Load global data (deprecated) */
+
+invalid_boot_vector:
+        wait
+        b       invalid_boot_vector
+         nop
+
+__all_cores_are_equal:
+       /* The following .word tell if 'all_cores_are_equal' or core0 is special
+        * By default (for the first execution) the core0 should be special,
+        * in order to behave like the old(existing not-modified) bootloader
+        * and run the bootloader on core 0 to follow the existing design.
+        * However after that we make 'all_cores_equal' which allows to run SE
+        * applications on core0 like on any other core. NOTE that value written
+        * to '__all_cores_are_equal' should not match any core ID.
+        */
+       .word   0
+
+core_0_cont1:
+       li      t0, 0xffffffff
+       sw      t0, __all_cores_are_equal
+       /* From here on, only core 0 runs, other cores have branched
+        * away.
+        */
+#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM
+       /* Set up initial stack and global data */
+       setup_stack_gd
+# ifdef CONFIG_DEBUG_UART
+       PTR_LA  t9, debug_uart_init
+       jalr    t9
+        nop
+# endif
+#endif
+       move    a0, zero                # a0 <-- boot_flags = 0
+       PTR_LA  t9, board_init_f
+
+       jr      t9
+        move   ra, zero
+       END(_start)
+
+       .balign 8
+       .globl  single_tlb_setup
+       .ent    single_tlb_setup
+       /* Sets up a single TLB entry.  Virtual/physical addresses
+        * must be properly aligned.
+        * a0  Virtual address
+        * a1  Physical address
+        * a2  page (_not_ mapping) size
+        */
+single_tlb_setup:
+       /* Determine the number of TLB entries available, and
+        * use the top one.
+        */
+       mfc0    a3, CP0_CONFIG1
+       dext    a3, a3, 25, 6           /* a3 now has the max mmu entry index */
+       mfc0    a5, CP0_CONFIG3         /* Check if config4 reg present */
+       bbit0   a5, 31, single_tlb_setup_cont
+        nop
+       mfc0    a5, CP0_CONFIG4
+       bbit0   a5, 14, single_tlb_setup_cont   /* check config4[MMUExtDef] */
+        nop
+       /* append config4[MMUSizeExt] to most significant bit of
+        * config1[MMUSize-1]
+        */
+       dins    a3, a5, 6, 8
+       and     a3, a3, 0x3fff  /* a3 now includes max entries for cn6xxx */
+
+single_tlb_setup_cont:
+
+       /* Format physical address for entry low */
+       nop
+       dsrl    a1, a1, 12
+       dsll    a1, a1, 6
+       ori     a1, a1, 0x7     /* set DVG bits */
+
+       move    a4, a2
+       daddu   a5, a4, a4      /* mapping size */
+       dsll    a6, a4, 1
+       daddiu  a6, a6, -1      /* pagemask */
+       dsrl    a4, a4, 6       /* adjust for adding with entrylo */
+
+       /* Now set up mapping */
+       mtc0    a6, CP0_PAGEMASK
+       mtc0    a3, CP0_INDEX
+
+       dmtc0   a1, CP0_ENTRYLO0
+       daddu   a1, a1, a4
+
+       dmtc0   a1, CP0_ENTRYLO1
+       daddu   a1, a1, a4
+
+       dmtc0   a0, CP0_ENTRYHI
+       daddu   a0, a0, a5
+
+       ehb
+       tlbwi
+       jr  ra
+        nop
+       .end   single_tlb_setup
+
+
+/**
+ * This code is moved to a movable boot bus region,
+ * and it is responsible for changing the flash mappings and
+ * jumping to run from the TLB mapped address.
+ *
+ * @param a0   New address for boot bus region 0
+ * @param a1   Address to branch to afterwards
+ * @param a2   Address of MIO_BOOT_REG_CFG0
+ */
+       .balign 8
+change_boot_mappings:
+       sd      a0, 0(a2)
+       sync
+       j a1        /* Jump to new TLB mapped location */
+        synci  0(zero)
+
+/* If we need a large stack, allocate it here. */
+#if CONFIG_OCTEON_BIG_STACK_SIZE
+       /* Allocate the stack here so it's in L2 cache or DRAM */
+       .balign 16
+big_stack_end:
+       .skip   CONFIG_OCTEON_BIG_STACK_SIZE, 0
+big_stack_start:
+       .dword  0
+#endif




Viele Grüße,
Stefan

--
DENX Software Engineering GmbH,      Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-51 Fax: (+49)-8142-66989-80 Email: s...@denx.de

Reply via email to