+
+#endif /* __ASM_MACH_OCTEON_IOREMAP_H */
diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S
new file mode 100644
index 0000000000..acb967201a
--- /dev/null
+++ b/arch/mips/mach-octeon/start.S
@@ -0,0 +1,1241 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Startup Code for OCTEON 64-bit CPU-core
+ *
+ * Copyright (c) 2003 Wolfgang Denk <w...@denx.de>
+ * Copyright 2004, 2005, 2010 - 2015 Cavium Inc..
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/asm.h>
+
+#define BOOT_VECTOR_NUM_WORDS 8
+
+#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET 0x70
+#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET 0x78
+
+#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW 0xdb00110ad358eacd
+#define OCTEON_BOOT_MOVEABLE_MAGIC1 OCTEON_BOOT_MOVEABLE_MAGIC1_RAW
+
+#define OCTEON_CIU_SOFT_RST 0x8001070000000740
+
+#define OCTEON_L2C_WPAR_PP0 0x8001180080840000
+#define OCTEON_MIO_BOOT_BASE 0x8001180000000000
+#define OCTEON_MIO_BOOT_REG_CFG0_OFF 0x0000
+#define OCTEON_MIO_BOOT_LOC_CFG0_OFF 0x0080
+#define OCTEON_MIO_BOOT_LOC_ADR_OFF 0x0090
+#define OCTEON_MIO_BOOT_LOC_DAT_OFF 0x0098
+#define OCTEON_MIO_RST_BOOT 0x8001180000001600
+#define OCTEON_MIO_BOOT_REG_CFG0 0x8001180000000000
+#define OCTEON_MIO_BOOT_REG_TIM0 0x8001180000000040
+#define OCTEON_MIO_BOOT_LOC_CFG0 0x8001180000000080
+#define OCTEON_MIO_BOOT_LOC_ADR 0x8001180000000090
+#define OCTEON_MIO_BOOT_LOC_DAT 0x8001180000000098
+#define OCTEON_MIO_FUSE_DAT3 0x8001180000001418
+#define OCTEON_L2D_FUS3 0x80011800800007B8
+#define OCTEON_LMC0_DDR_PLL_CTL 0x8001180088000258
+
+#define OCTEON_RST 0x8001180006000000
+#define OCTEON_RST_BOOT_OFFSET 0x1600
+#define OCTEON_RST_SOFT_RST_OFFSET 0x1680
+#define OCTEON_RST_COLD_DATAX_OFFSET(X) (0x17C0 + (X) * 8)
+#define OCTEON_RST_BOOT 0x8001180006001600
+#define OCTEON_RST_SOFT_RST 0x8001180006001680
+#define OCTEON_RST_COLD_DATAX(X) (0x80011800060017C0 + (X) * 8)
+
+#define OCTEON_OCX_COM_NODE 0x8001180011000000
+#define OCTEON_L2C_OCI_CTL 0x8001180080800020
+#define OCTEON_L2C_TAD_CTL 0x8001180080800018
+#define OCTEON_L2C_CTL 0x8001180080800000
+
+#define OCTEON_DBG_DATA 0x80011F00000001E8
+#define OCTEON_PCI_READ_CMD_E 0x80011F0000001188
+#define OCTEON_NPEI_DBG_DATA 0x80011F0000008510
+#define OCTEON_CIU_WDOG(X) (0x8001070000000500 + (X) * 8)
+#define OCTEON_CIU_PP_POKE(X) (0x8001070000000580 + (X) * 8)
+#define OCTEON_CIU3_WDOG(X) (0x8001010000020000 + (X) * 8)
+#define OCTEON_CIU3_PP_POKE(X) (0x8001010000030000 + (X) * 8)
+#define OCTEON_OCX_COM_LINKX_CTL(X) (0x8001180011000020 + (X) * 8)
+#define OCTEON_SLI_CTL_STATUS 0x80011F0000028570
+#define OCTEON_GSERX_SCRATCH(X) (0x8001180090000020 + (X) *
0x1000000)
+
+/** PRID for CN56XX */
+#define OCTEON_PRID_CN56XX 0x04
+/** PRID for CN52XX */
+#define OCTEON_PRID_CN52XX 0x07
+/** PRID for CN63XX */
+#define OCTEON_PRID_CN63XX 0x90
+/** PRID for CN68XX */
+#define OCTEON_PRID_CN68XX 0x91
+/** PRID for CN66XX */
+#define OCTEON_PRID_CN66XX 0x92
+/** PRID for CN61XX */
+#define OCTEON_PRID_CN61XX 0x93
+/** PRID for CNF71XX */
+#define OCTEON_PRID_CNF71XX 0x94
+/** PRID for CN78XX */
+#define OCTEON_PRID_CN78XX 0x95
+/** PRID for CN70XX */
+#define OCTEON_PRID_CN70XX 0x96
+/** PRID for CN73XX */
+#define OCTEON_PRID_CN73XX 0x97
+/** PRID for CNF75XX */
+#define OCTEON_PRID_CNF75XX 0x98
+
+/* func argument is used to create a mark, must be unique */
+#define GETOFFSET(reg, func) \
+ .balign 8; \
+ bal func ##_mark; \
+ nop; \
+ .dword .; \
+func ##_mark: \
+ ld reg, 0(ra); \
+ dsubu reg, ra, reg;
+
+#define JAL(func) \
+ .balign 8; \
+ bal func ##_mark; \
+ nop; \
+ .dword .; \
+func ##_mark: \
+ ld t8, 0(ra); \
+ dsubu t8, ra, t8; \
+ dla t9, func; \
+ daddu t9, t9, t8; \
+ jalr t9; \
+ nop;
+
+ .set arch=octeon3
+ .set noreorder
+
+ .macro uhi_mips_exception
+ move k0, t9 # preserve t9 in k0
+ move k1, a0 # preserve a0 in k1
+ li t9, 15 # UHI exception operation
+ li a0, 0 # Use hard register context
+ sdbbp 1 # Invoke UHI operation
+ .endm
+
+ .macro setup_stack_gd
+ li t0, -16
+ PTR_LI t1, big_stack_start
+ and sp, t1, t0 # force 16 byte alignment
+ PTR_SUBU \
+ sp, sp, GD_SIZE # reserve space for gd
+ and sp, sp, t0 # force 16 byte alignment
+ move k0, sp # save gd pointer
+#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
+ !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+ li t2, CONFIG_VAL(SYS_MALLOC_F_LEN)
+ PTR_SUBU \
+ sp, sp, t2 # reserve space for early malloc
+ and sp, sp, t0 # force 16 byte alignment
+#endif
+ move fp, sp
+
+ /* Clear gd */
+ move t0, k0
+1:
+ PTR_S zero, 0(t0)
+ PTR_ADDIU t0, PTRSIZE
+ blt t0, t1, 1b
+ nop
+
+#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \
+ !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F)
+ PTR_S sp, GD_MALLOC_BASE(k0) # gd->malloc_base offset
+#endif
+ .endm
+
+/* Saved register usage:
+ * s0: not used
+ * s1: not used
+ * s2: Address U-Boot loaded into in L2 cache
+ * s3: Start address
+ * s4: flags
+ * 1: booting from RAM
+ * 2: executing out of cache
+ * 4: booting from flash
+ * s5: u-boot size (data end - _start)
+ * s6: offset in flash.
+ * s7: _start physical address
+ * s8:
+ */
+
+ENTRY(_start)
+ /* U-Boot entry point */
+ b reset
+
+ /* The above jump instruction/nop are considered part of the
+ * bootloader_header_t structure but are not changed when the header is
+ * updated.
+ */
+
+ /* Leave room for bootloader_header_t header at start of binary. This
+ * header is used to identify the board the bootloader is for, what
+ * address it is linked at, failsafe/normal, etc. It also contains a
+ * CRC of the entire image.
+ */
+
+#if defined(CONFIG_ROM_EXCEPTION_VECTORS)
+ /*
+ * Exception vector entry points. When running from ROM, an exception
+ * cannot be handled. Halt execution and transfer control to debugger,
+ * if one is attached.
+ */
+ .org 0x200
+ /* TLB refill, 32 bit task */
+ uhi_mips_exception
+
+ .org 0x280
+ /* XTLB refill, 64 bit task */
+ uhi_mips_exception
+
+ .org 0x300
+ /* Cache error exception */
+ uhi_mips_exception
+
+ .org 0x380
+ /* General exception */
+ uhi_mips_exception
+
+ .org 0x400
+ /* Catch interrupt exceptions */
+ uhi_mips_exception
+
+ .org 0x480
+ /* EJTAG debug exception */
+1: b 1b
+ nop
+
+ .org 0x500
+#endif
+
+/* Reserve extra space so that when we use the boot bus local memory
+ * segment to remap the debug exception vector we don't overwrite
+ * anything useful
+ */
+
+/* Basic exception handler (dump registers) in all ASM. When using the
TLB for
+ * mapping u-boot C code, we can't branch to that C code for exception handling
+ * (TLB is disabled for some exceptions.
+ */
+
+/* RESET/start here */
+ .balign 8
+reset:
+ nop
+ synci 0(zero)
+ mfc0 k0, CP0_STATUS
+ ori k0, 0x00E0 /* enable 64 bit mode for CSR access */
+ mtc0 k0, CP0_STATUS
+
+ /* Save the address we're booting from, strip off low bits */
+ bal 1f
+ nop
+1:
+ move s3, ra
+ dins s3, zero, 0, 12
+
+ /* Disable boot bus moveable regions */
+ PTR_LI k0, OCTEON_MIO_BOOT_LOC_CFG0
+ sd zero, 0(k0)
+ sd zero, 8(k0)
+
+ /* Disable the watchdog timer
+ * First we check if we're running on CN78XX, CN73XX or CNF75XX to see
+ * if we use CIU3 or CIU.
+ */
+ mfc0 t0, CP0_PRID
+ ext t0, t0, 8, 8
+ /* Assume CIU */
+ PTR_LI t1, OCTEON_CIU_WDOG(0)
+ PTR_LI t2, OCTEON_CIU_PP_POKE(0)
+ blt t0, OCTEON_PRID_CN78XX, wd_use_ciu
+ nop
+ beq t0, OCTEON_PRID_CN70XX, wd_use_ciu
+ nop
+ /* Use CIU3 */
+ PTR_LI t1, OCTEON_CIU3_WDOG(0)
+ PTR_LI t2, OCTEON_CIU3_PP_POKE(0)
+wd_use_ciu:
+ sd zero, 0(t2) /* Pet the dog */
+ sd zero, 0(t1) /* Disable watchdog timer */
+
+ /* Errata: CN76XX has a node ID of 3. change it to zero here.
+ * This needs to be done before we relocate to L2 as addresses change
+ * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID],
+ * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE].
+ */
+ mfc0 a4, CP0_PRID
+ /* Check for 78xx pass 1.x processor ID */
+ andi a4, 0xffff
+ blt a4, (OCTEON_PRID_CN78XX << 8), 1f
+ nop
+
+ /* Zero out alternate package for now */
+ dins a4, zero, 6, 1
+ bge a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f
+ nop
+
+ /* 78xx or 76xx here, first check for bug #27141 */
+ PTR_LI a5, OCTEON_SLI_CTL_STATUS
+ ld a6, 0(a5)
+ andi a7, a4, 0xff
+ andi a6, a6, 0xff
+
+ beq a6, a7, not_bug27141
+ nop
+
+ /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */
+ /* We just hit bug #27141. Need to reset the chip and try again */
+
+ PTR_LI a4, OCTEON_RST_SOFT_RST
+ ori a5, zero, 0x1 /* set the reset bit */
+
+reset_78xx_27141:
+ sync
+ synci 0(zero)
+ cache 9, 0(zero)
+ sd a5, 0(a4)
+ wait
+ b reset_78xx_27141
+ nop
+
+not_bug27141:
+ /* 76XX pass 1.x has the node number set to 3 */
+ mfc0 a4, CP0_EBASE
+ ext a4, a4, 0, 10
+ bne a4, 0x180, 1f /* Branch if not node 3 core 0 */
+ nop
+
+ /* Clear OCX_COM_NODE[ID] */
+ PTR_LI a5, OCTEON_OCX_COM_NODE
+ ld a4, 0(a5)
+ dins a4, zero, 0, 2
+ sd a4, 0(a5)
+ ld zero, 0(a5)
+
+ /* Clear L2C_OCI_CTL[GKSEGNODE] */
+ PTR_LI a5, OCTEON_L2C_OCI_CTL
+ ld a4, 0(a5)
+ dins a4, zero, 4, 2
+ sd a4, 0(a5)
+ ld zero, 0(a5)
+
+ /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */
+ dmfc0 a4, CP0_CVMMEMCTL2
+ dins a4, zero, 12, 2
+ dmtc0 a4, CP0_CVMMEMCTL2
+
+ /* Put the flash address in the start of the EBASE register to
+ * enable our exception handler but only for core 0.
+ */
+ mfc0 a4, CP0_EBASE
+ dext a4, a4, 0, 10
+ bnez a4, no_flash
+ /* OK in delay slot */
+ dext a6, a6, 0, 16 /* Get the base address in flash */
+ sll a6, a6, 16
+ mtc0 a6, CP0_EBASE /* Enable exceptions */
+
+no_flash:
+ /* Zero out various registers */
+ mtc0 zero, CP0_DEPC
+ mtc0 zero, CP0_EPC
+ mtc0 zero, CP0_CAUSE
+ mfc0 a4, CP0_PRID
+ ext a4, a4, 8, 8
+ mtc0 zero, CP0_DESAVE
+
+ /* The following are only available on Octeon 2 or later */
+ mtc0 zero, CP0_KSCRATCH1
+ mtc0 zero, CP0_KSCRATCH2
+ mtc0 zero, CP0_KSCRATCH3
+ mtc0 zero, CP0_USERLOCAL
+
+ /* Turn off ROMEN bit to disable ROM */
+ PTR_LI a1, OCTEON_MIO_RST_BOOT
+ /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT.
+ * The difference is bits 24-26 are 6 instead of 0 for the address.
+ */
+ /* For Octeon 2 and CN70XX we can ignore the watchdog */
+ blt a4, OCTEON_PRID_CN78XX, watchdog_ok
+ nop
+
+ PTR_LI a1, OCTEON_RST_BOOT
+
+ beq a4, OCTEON_PRID_CN70XX, watchdog_ok
+ nop
+
+ ld a2, 0(a1)
+ /* There is a bug where some registers don't get properly reset when
+ * the watchdog timer causes a reset. In this case we need to force
+ * a reset.
+ */
+ bbit0 a2, 11, watchdog_ok /* Skip if watchdog not hit */
+ dins a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */
+ /* Clear bit indicating reset due to watchdog */
+ ori a2, 1 << 11
+ sd a2, 0(a1)
+
+ /* Disable watchdog */
+ PTR_LI a1, OCTEON_CIU3_PP_POKE(0)
+ sd zero, 0(a1)
+ PTR_LI a1, OCTEON_CIU3_WDOG(0)
+ sd zero, 0(a1)
+
+ /* Record this in the GSER0_SCRATCH register in bit 11 */
+ PTR_LI a1, OCTEON_GSERX_SCRATCH(0)
+ ld a2, 0(a1)
+ ori a2, 1 << 11
+ sd a2, 0(a1)
+
+ PTR_LI a1, OCTEON_RST_SOFT_RST
+ li a2, 1
+ sd a2, 0(a1)
+ wait
+
+ /* We should never get here */
+
+watchdog_ok:
+ ld a2, 0(a1)
+ /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */
+ dins a2, zero, 2, 18
+ dins a2, zero, 60, 1 /* Clear ROMEN bit */
+ sd a2, 0(a1)
+
+ /* Start of Octeon setup */
+
+ /* Check what core we are - if core 0, branch to init tlb
+ * loop in flash. Otherwise, look up address of init tlb
+ * loop that was saved in the boot vector block.
+ */
+ mfc0 a0, CP0_EBASE
+ andi a0, EBASE_CPUNUM /* get core */
+ beqz a0, InitTLBStart_local
+ nop
+
+ break
+ /* We should never get here - non-zero cores now go directly to
+ * tlb init from the boot stub in movable region.
+ */
+
+ .globl InitTLBStart
+InitTLBStart:
+InitTLBStart_local:
+ /* If we don't have working memory yet configure a bunch of
+ * scratch memory, and set the stack pointer to the top
+ * of it. This allows us to go to C code without having
+ * memory set up
+ *
+ * Warning: do not change SCRATCH_STACK_LINES as this can impact the
+ * transition from start.S to crti.asm. crti requires 590 bytes of
+ * stack space.
+ */
+ cache 1,0(zero) /* Clear Dcache so cvmseg works right */
+#if CONFIG_OCTEON_BIG_STACK_SIZE
+ rdhwr v0, $0
+ bnez v0, 1f
+ nop
+ PTR_LA sp, big_stack_start - 16
+ b stack_clear_done
+ nop
+1:
+#endif
+#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */
+ dmfc0 v0, CP0_CVMMEMCTL
+ dins v0, zero, 0, 9
+ /* setup SCRATCH_STACK_LINES scratch lines of scratch */
+ ori v0, 0x100 | SCRATCH_STACK_LINES
+ dmtc0 v0, CP0_CVMMEMCTL
+ /* set stack to top of scratch memory */
+ li sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128)
+ /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/
+ li t0, 0xffffffffffff8000
+clear_scratch:
+ sd zero, 0(t0)
+ addiu t0, 8
+ bne t0, sp, clear_scratch
+ nop
+
+ /* This code run on all cores - core 0 from flash,
+ * the rest from DRAM. When booting from PCI, non-zero cores
+ * come directly here from the boot vector - no earlier code in this
+ * file is executed.
+ */
+
+ /* Some generic initialization is done here as well, as we need this
+ * done on all cores even when booting from PCI
+ */
+stack_clear_done:
+ /* Clear watch registers. */
+ mtc0 zero, CP0_WATCHLO
+ mtc0 zero, CP0_WATCHHI
+
+ /* STATUS register */
+ mfc0 k0, CP0_STATUS
+ li k1, ~ST0_IE
+ and k0, k1
+ mtc0 k0, CP0_STATUS
+
+ /* CAUSE register */
+ mtc0 zero, CP0_CAUSE
+
+ /* Init Timer */
+ dmtc0 zero, CP0_COUNT
+ dmtc0 zero, CP0_COMPARE
+
+
+ mfc0 a5, CP0_STATUS
+ li v0, 0xE0 /* enable 64 bit mode for CSR access */
+ or v0, v0, a5
+ mtc0 v0, CP0_STATUS
+
+
+ dli v0, 1 << 29 /* Enable large physical address support in TLB */
+ mtc0 v0, CP0_PAGEGRAIN
+
+InitTLB:
+ dmtc0 zero, CP0_ENTRYLO0
+ dmtc0 zero, CP0_ENTRYLO1
+ mtc0 zero, CP0_PAGEMASK
+ dmtc0 zero, CP0_CONTEXT
+ /* Use an offset into kseg0 so we won't conflict with Mips1 legacy
+ * TLB clearing
+ */
+ PTR_LI v0, 0xFFFFFFFF90000000
+ mfc0 a0, CP0_CONFIG1
+ srl a0, a0, 25
+ /* Check if config4 reg present */
+ mfc0 a1, CP0_CONFIG3
+ bbit0 a1, 31, 2f
+ and a0, a0, 0x3F /* a0 now has the max mmu entry index */
+ mfc0 a1, CP0_CONFIG4
+ bbit0 a1, 14, 2f /* check config4[MMUExtDef] */
+ nop
+ /* append config4[MMUSizeExt] to most significant bit of
+ * config1[MMUSize-1]
+ */
+ ins a0, a1, 6, 8
+ and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */
+2:
+ dmtc0 zero, CP0_XCONTEXT
+ mtc0 zero, CP0_WIRED
+
+InitTLBloop:
+ dmtc0 v0, CP0_ENTRYHI
+ tlbp
+ mfc0 v1, CP0_INDEX
+ daddiu v0, v0, 1<<13
+ bgez v1, InitTLBloop
+
+ mtc0 a0, CP0_INDEX
+ tlbwi
+ bnez a0, InitTLBloop
+ daddiu a0, -1
+
+ mthi zero
+ mtlo zero
+
+ /* Set up status register */
+ mfc0 v0, CP0_STATUS
+ /* Enable COP0 and COP2 access */
+ li a4, (1 << 28) | (1 << 30)
+ or v0, a4
+
+ /* Must leave BEV set here, as DRAM is not configured for core 0.
+ * Also, BEV must be 1 later on when the exception base address is set.
+ */
+
+ /* Mask all interrupts */
+ ins v0, zero, 0, 16
+ /* Clear NMI (used to start cores other than core 0) */
+ ori v0, 0xE4 /* enable 64 bit, disable interrupts */
+ mtc0 v0, CP0_STATUS
+
+ dli v0,0xE000000F /* enable all readhw locations */
+ mtc0 v0, CP0_HWRENA
+
+ dmfc0 v0, CP0_CVMCTL
+ ori v0, 1<<14 /* enable fixup of unaligned mem access */
+ dmtc0 v0, CP0_CVMCTL
+
+ /* Setup scratch memory. This is also done in
+ * cvmx_user_app_init, and this code will be removed
+ * from the bootloader in the near future.
+ */
+
+ /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */
+ mfc0 a4, CP0_PRID
+ ext a4, a4, 8, 8
+ blt a4, OCTEON_PRID_CN73XX, 72f
+ nop
+ PTR_LI v0, OCTEON_L2C_TAD_CTL
+ ld t1, 0(v0)
+ dins t1, zero, 0, 4
+ sd t1, 0(v0)
+ ld zero, 0(v0)
+
+72:
+
+ /* clear these to avoid immediate interrupt in noperf mode */
+ dmtc0 zero, CP0_COMPARE /* clear timer interrupt */
+ dmtc0 zero, CP0_COUNT /* clear timer interrupt */
+ dmtc0 zero, CP0_PERF_CNT0 /* clear perfCnt0 */
+ dmtc0 zero, CP0_PERF_CNT1 /* clear perfCnt1 */
+ dmtc0 zero, CP0_PERF_CNT2
+ dmtc0 zero, CP0_PERF_CNT3
+
+ /* If we're running on a node other than 0 then we need to set KSEGNODE
+ * to 0. The nice thing with this code is that it also autodetects if
+ * we're running on a processor that supports CVMMEMCTL2 or not since
+ * only processors that have this will have a non-zero node ID. Because
+ * of this there's no need to check if we're running on a 78XX.
+ */
+ mfc0 t1, CP0_EBASE
+ dext t1, t1, 7, 3 /* Extract node number */
+ beqz t1, is_node0 /* If non-zero then we're not node 0 */
+ nop
+ dmfc0 t1, CP0_CVMMEMCTL2
+ dins t1, zero, 12, 4
+ dmtc0 t1, CP0_CVMMEMCTL2
+is_node0:
+
+ /* Set up TLB mappings for u-boot code in flash. */
+
+ /* Use a bal to get the current PC into ra. Since this bal is to
+ * the address immediately following the delay slot, the ra is
+ * the address of the label. We then use this to get the actual
+ * address that we are executing from.
+ */
+ bal __dummy
+ nop
+
+__dummy:
+ /* Get the actual address that we are running at */
+ PTR_LA a6, _start /* Linked address of _start */
+ PTR_LA a7, __dummy
+ dsubu t0, a7, a6 /* offset of __dummy label from _start*/
+ dsubu a7, ra, t0 /* a7 now has actual address of _start*/
+
+ /* Save actual _start address in s7. This is where we
+ * are executing from, as opposed to where the code is
+ * linked.
+ */
+ move s7, a7
+ move s4, zero
+
+ /* s7 has actual address of _start. If this is
+ * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF.
+ * If it is on the boot bus, use 0xBFC00000 as the physical address
+ * for the TLB mapping, as we will be adjusting the boot bus
+ * to make this adjustment.
+ * If we are running from DRAM (remote-boot), then we want to use the
+ * real address in DRAM.
+ */
+
+ /* Check to see if we are running from flash - we expect that to
+ * be 0xffffffffb0000000-0xffffffffbfffffff
+ * (0x10000000-0x1fffffff, unmapped/uncached)
+ */
+ dli t2, 0xffffffffb0000000
+ dsubu t2, s7
+ slt s4, s7, t2
+ bltz t2, uboot_in_flash
+ nop
+
+ /* If we're not core 0 then we don't care about cache */
+ mfc0 t2, CP0_EBASE
+ andi t2, EBASE_CPUNUM
+ bnez t2, uboot_in_ram
+ nop
+
+ /* Find out if we're OCTEON I or OCTEON + which don't support running
+ * out of cache.
+ */
+ mfc0 t2, CP0_PRID
+ ext t2, t2, 8, 8
+ li s4, 1
+ blt t2, 0x90, uboot_in_ram
+ nop
+
+ /* U-Boot can be executing either in RAM or L2 cache. Now we need to
+ * check if DRAM is initialized. The way we do that is to look at
+ * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7)
+ */
+ PTR_LI t2, OCTEON_LMC0_DDR_PLL_CTL
+ ld t2, 0(t2)
+ bbit1 t2, 7, uboot_in_ram
+ nop
+
+ /* We must be executing out of cache */
+ b uboot_in_ram
+ li s4, 2
+
+uboot_in_flash:
+ /* Set s4 to 4 to indicate we're running in FLASH */
+ li s4, 4
+
+#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING)
+ /* By default, L2C index aliasing is enabled. In some cases it may
+ * need to be disabled. The L2C index aliasing can only be disabled
+ * if U-Boot is running out of L2 cache and the L2 cache has not been
+ * used to store anything.
+ */
+ PTR_LI t1, OCTEON_L2C_CTL
+ ld t2, 0(t1)
+ ori t2, 1
+ sd t2, 0(t1)
+#endif
+
+ /* Use BFC00000 as physical address for TLB mappings when booting
+ * from flash, as we will adjust the boot bus mappings to make this
+ * mapping correct.
+ */
+ dli a7, 0xFFFFFFFFBFC00000
+ dsubu s6, s7, a7 /* Save flash offset in s6 */
+
+#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2)
+ /* For OCTEON II we check to see if the L2 cache is big enough to hold
+ * U-Boot. If it is big enough then we copy ourself from flash to the
+ * L2 cache in order to speed up execution.
+ */
+
+ /* Check for OCTEON 2 */
+ mfc0 t1, CP0_PRID
+ ext t1, t1, 8, 8
+ /* Get number of L2 cache sets */
+ beq t1, OCTEON_PRID_CNF71XX, got_l2_sets /* CNF71XX */
+ li t2, 1 << 9
+ beq t1, OCTEON_PRID_CN78XX, got_l2_sets /* CN78XX */
+ li t2, 1 << 13
+ beq t1, OCTEON_PRID_CN70XX, got_l2_sets /* CN70XX */
+ li t2, 1 << 10
+ beq t1, OCTEON_PRID_CN73XX, got_l2_sets /* CN73XX */
+ li t2, 1 << 11
+ beq t1, OCTEON_PRID_CNF75XX, got_l2_sets /* CNF75XX */
+ li t2, 1 << 11
+ b l2_cache_too_small /* Unknown OCTEON model */
+ nop
+
+got_l2_sets:
+ /* Get number of associations */
+ PTR_LI t0, OCTEON_MIO_FUSE_DAT3
+ ld t0, 0(t0)
+ dext t0, t0, 32, 3
+
+ beq t1, OCTEON_PRID_CN70XX, process_70xx_l2sets
+ nop
+ /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */
+ beqz t0, got_l2_ways
+ li t3, 16
+ beq t0, 1, got_l2_ways
+ li t3, 12
+ beq t0, 2, got_l2_ways
+ li t3, 8
+ beq t0, 3, got_l2_ways
+ li t3, 4
+ b l2_cache_too_small
+ nop
+
+process_70xx_l2sets:
+ /* For 70XX, the number of ways is defined as:
+ * 0 - full cache (4-way) 512K
+ * 1 - 3/4 ways (3-way) 384K
+ * 2 - 1/2 ways (2-way) 256K
+ * 3 - 1/4 ways (1-way) 128K
+ * 4-7 illegal (aliased to 0-3)
+ */
+ andi t0, 3
+ beqz t0, got_l2_ways
+ li t3, 4
+ beq t0, 1, got_l2_ways
+ li t3, 3
+ beq t0, 2, got_l2_ways
+ li t3, 2
+ li t3, 1
+
+got_l2_ways:
+ dmul a1, t2, t3 /* Calculate cache size */
+ dsll a1, 7 /* Ways * Sets * cache line sz (128) */
+ daddiu a1, a1, -128 /* Adjust cache size for copy code */
+
+ /* Calculate size of U-Boot image */
+ /*
+ * "uboot_end - _start" is not correct, as the image also
+ * includes the DTB appended to the end (OF_EMBED is deprecated).
+ * Lets use a defined max for now here.
+ */
+ PTR_LI s5, CONFIG_BOARD_SIZE_LIMIT
+
+ daddu t2, s5, s7 /* t2 = end address */
+ daddiu t2, t2, 127
+ ins t2, zero, 0, 7 /* Round up to cache line for memcpy */
+
+ slt t1, a1, s5 /* See if we're bigger than the L2 cache */
+ bnez t1, l2_cache_too_small
+ nop
+ /* Address we plan to load at in the L2 cache */
+ PTR_LI t9, CONFIG_OCTEON_L2_UBOOT_ADDR
+# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE
+ /* Enable all ways for PP0. Authentik ROM may have disabled these */
+ PTR_LI a1, OCTEON_L2C_WPAR_PP0
+ sd zero, 0(a1)
+
+ /* Address to place our memcpy code */
+ PTR_LI a0, CONFIG_OCTEON_L2_MEMCPY_ADDR
+ /* The following code writes a simple memcpy routine into the cache
+ * to copy ourself from flash into the L2 cache. This makes the
+ * memcpy routine a lot faster since each instruction can potentially
+ * require four read cycles to flash over the boot bus.
+ */
+ /* Zero cache line in the L2 cache */
+ zcb (a0)
+ synci 0(zero)
+ dli a1, 0xdd840000dd850008 /* ld a0, 0(t0); ld a1, 8(t0) */
+ sd a1, 0(a0)
+ dli a1, 0xdd860010dd870018 /* ld a2, 16(t0); ld a3, 24(t0) */
+ sd a1, 8(a0)
+ dli a1, 0xfda40000fda50008 /* sd a0, 0(t1); sd a1, 8(t1) */
+ sd a1, 16(a0)
+ dli a1, 0xfda60010fda70018 /* sd a2, 16(t1); sd a3, 24(t1) */
+ sd a1, 24(a0)
+ dli a1, 0x258c0020158efff6 /* addiu t0, 32; bne t0, t2, -40 */
+ sd a1, 32(a0)
+ dli a1, 0x25ad002003e00008 /* addiu t1, 32; jr ra */
+ sd a1, 40(a0)
+ sd zero, 48(a0) /* nop; nop */
+
+ /* Synchronize the caches */
+ sync
+ synci 0(zero)
+
+ move t0, s7
+ move t1, t9
+
+ /* Do the memcpy operation in L2 cache to copy ourself from flash
+ * to the L2 cache.
+ */
+ jalr a0
+ nop
+
+# else
+ /* Copy ourself to the L2 cache from flash, 32 bytes at a time */
+ /* This code is now written to the L2 cache using the code above */
+1:
+ ld a0, 0(t0)
+ ld a1, 8(t0)
+ ld a2, 16(t0)
+ ld a3, 24(t0)
+ sd a0, 0(t1)
+ sd a1, 8(t1)
+ sd a2, 16(t1)
+ sd a3, 24(t1)
+ addiu t0, 32
+ bne t0, t2, 1b
+ addiu t1, 32
+# endif /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */
+
+ /* Adjust the start address of U-Boot and the global pointer */
+ subu t0, s7, t9 /* t0 = address difference */
+ move s7, t9 /* Update physical address */
+ move s2, t9
+ sync
+ synci 0(zero)
+
+ /* Now we branch to the L2 cache. We first get our PC then adjust it
+ */
+ bal 3f
+ nop
+3:
+ /* Don't add any instructions here! */
+ subu t9, ra, t0
+ /* Give ourself 16 bytes */
+ addiu t9, 0x10
+
+ jal t9 /* Branch to address in L2 cache */
+
+ nop
+ nop
+ /* Add instructions after here */
+
+ move a7, s7
+
+ b uboot_in_ram
+ ori s4, 2 /* Running out of L2 cache */
+
+l2_cache_too_small: /* We go here if we can't copy ourself to L2 */
+#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */
+
+ /* This code is only executed if booting from flash. */
+ /* For flash boot (_not_ RAM boot), we do a workaround for
+ * an LLM errata on CN38XX and CN58XX parts.
+ */
+
+uboot_in_ram:
+ /* U-boot address is now in reg a7, and is 4 MByte aligned.
+ * (boot bus addressing has been adjusted to make this happen for flash,
+ * and for DRAM this alignment must be provided by the remote boot
+ * utility.
+ */
+ /* See if we're in KSEG0 range, if so set EBASE register to handle
+ * exceptions.
+ */
+ dli a1, 0x20000000
+ bge a7, a1, 1f
+ nop
+ /* Convert our physical address to KSEG0 */
+ PTR_LI a1, 0xffffffff80000000
+ or a1, a1, a7
+ mtc0 a1, CP0_EBASE
+1:
+ /* U-boot now starts at 0xBFC00000. Use a single 4 MByte TLB mapping
+ * to map u-boot.
+ */
+ move a0, a6 /* Virtual addr in a0 */
+ dins a0, zero, 0, 16 /* Zero out offset bits */
+ move a1, a7 /* Physical addr in a1 */
+
+ /* Now we need to remove the MIPS address space bits. For this we
+ * need to determine if it is a 32 bit compatibility address or not.
+ */
+
+ /* 'lowest' address in compatibility space */
+ PTR_LI t0, 0xffffffff80000000
+ dsubu t0, t0, a1
+ bltz t0, compat_space
+ nop
+
+ /* We have a xkphys address, so strip off top bit */
+ b addr_fixup_done
+ dins a1, zero, 63, 1
+
+compat_space:
+ PTR_LI a2, 0x1fffffff
+ and a1, a1, a2 /* Mask phy addr to remove address space bits */
+
+addr_fixup_done:
+ /* Currenty the u-boot image size is limited to 4 MBytes. In order to
+ * support larger images the flash mapping will need to be changed to
+ * be able to access more than that before C code is run. Until that
+ * is done, we just use a 4 MByte mapping for the secondary cores as
+ * well.
+ */
+ /* page size (only support 4 Meg binary size for now for core 0)
+ * This limitation is due to the fact that the boot vector is
+ * 0xBFC00000 which only makes 4MB available. Later more flash
+ * address space will be available after U-Boot has been copied to
+ * RAM. For now assume that it is in flash.
+ */
+ li a2, 2*1024*1024
+
+ mfc0 a4, CP0_EBASE
+ andi a4, EBASE_CPUNUM /* get core */
+ beqz a4, core_0_tlb
+ nop
+
+ /* Now determine how big a mapping to use for secondary cores,
+ * which need to map all of u-boot + heap in DRAM
+ */
+ /* Here we look at the alignment of the the physical address,
+ * and use the largest page size possible. In some cases
+ * this can result in an oversize mapping, but for secondary cores
+ * this mapping is very short lived.
+ */
+
+ /* Physical address in a1 */
+ li a2, 1
+1:
+ sll a2, 1
+ and a5, a1, a2
+ beqz a5, 1b
+ nop
+
+ /* a2 now contains largest page size we can use */
+core_0_tlb:
+ JAL(single_tlb_setup)
+
+ /* Check if we're running from cache */
+ bbit1 s4, 1, uboot_in_cache
+ nop
+
+ /* If we are already running from ram, we don't need to muck
+ * with boot bus mappings.
+ */
+ PTR_LI t2, 0xffffffffb0000000
+ dsubu t2, s7
+ /* See if our starting address is lower than the boot bus */
+ bgez t2, uboot_in_ram2 /* If yes, booting from RAM */
+ nop
+
+uboot_in_cache:
+#if CONFIG_OCTEON_BIG_STACK_SIZE
+ /* The large stack is only for core 0. For all other cores we need to
+ * use the L1 cache otherwise the other cores will stomp on top of each
+ * other unless even more space is reserved for the stack space for
+ * each core. With potentially 96 cores this gets excessive.
+ */
+ mfc0 v0, CP0_EBASE
+ andi a0, EBASE_CPUNUM
+ bnez a0, no_big_stack
+ nop
+ PTR_LA sp, big_stack_start
+ daddiu sp, -16
+
+no_big_stack:
+#endif
+ /* We now have the TLB set up, so we need to remap the boot bus.
+ * This is tricky, as we are running from flash, and will be changing
+ * the addressing of the flash.
+ */
+ /* Enable movable boot bus region 0, at address 0x10000000 */
+ PTR_LI a4, OCTEON_MIO_BOOT_BASE
+ dli a5, 0x81000000 /* EN + base address 0x11000000 */
+ sd a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4)
+
+ /* Copy code to that remaps the boot bus to movable region */
+ sd zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
+
+ PTR_LA a6, change_boot_mappings
+ GETOFFSET(a5, change_boot_mappings);
+ daddu a5, a5, a6
+
+ /* The code is 16 bytes (2 DWORDS) */
+ ld a7, 0(a5)
+ sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
+ ld a7, 8(a5)
+ sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4)
+
+ /* Read from an RML register to ensure that the previous writes have
+ * completed before we branch to the movable region.
+ */
+ ld zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4)
+
+ /* Compute value for boot bus configuration register */
+ /* Read region 0 config so we can _modify_ the base address field */
+ PTR_LI a4, OCTEON_MIO_BOOT_REG_CFG0 /* region 0 config */
+ ld a0, 0(a4)
+ dli a4, 0xf0000000 /* Mask off bits we want to save */
+ and a4, a4, a0
+ dli a0, 0x0fff0000 /* Force size to max */
+ or a4, a4, a0
+
+ move a5, s6
+ /* Convert to 64k blocks, as used by boot bus config */
+ srl a5, 16
+ li a6, 0x1fc0 /* 'normal' boot bus base config value */
+ subu a6, a6, a5 /* Subtract offset */
+ /* combine into register value to pass to boot bus routine */
+ or a0, a4, a6
+
+ /* Branch there */
+ PTR_LA a1, __mapped_continue_label
+ PTR_LI a2, OCTEON_MIO_BOOT_REG_CFG0
+ /* If region 0 is not enabled we can skip it */
+ ld a4, 0(a2)
+ bbit0 a4, 31, __mapped_continue_label
+ nop
+ li a4, 0x10000000
+ j a4
+ synci 0(zero)
+
+ /* We never get here, as we go directly to __mapped_continue_label */
+ break
+
+
+uboot_in_ram2:
+
+ /* Now jump to address in TLB mapped memory to continue execution */
+ PTR_LA a4, __mapped_continue_label
+ synci 0(a4)
+ j a4
+ nop
+
+__mapped_continue_label:
+ /* Check if we are core 0, if we are not then we need
+ * to vector to code in DRAM to do application setup, and
+ * skip the rest of the bootloader. Only core 0 runs the bootloader
+ * and sets up the tables that the other cores will use for
+ * configuration.
+ */
+ mfc0 a0, CP0_EBASE
+ andi a0, EBASE_CPUNUM /* get core */
+ /* if (__all_cores_are_equal==0 && core==0),
+ * then jump to execute BL on core 0; else 'go to next line'
+ * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID))
+ */
+ lw t0, __all_cores_are_equal
+ beq a0, t0, core_0_cont1
+ nop
+
+ /* other cores look up addr from dram */
+ /* DRAM controller already set up by first core */
+ li a1, (BOOT_VECTOR_NUM_WORDS * 4)
+ mul a0, a0, a1
+
+ /* Now find out the boot vector base address from the moveable boot
+ * bus region.
+ */
+
+ /* Get the address of the boot bus moveable region */
+ PTR_LI t8, OCTEON_MIO_BOOT_BASE
+ ld t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8)
+ /* Make sure it's enabled */
+ bbit0 t9, 31, invalid_boot_vector
+ dext t9, t9, 3, 24
+ dsll t9, t9, 7
+ /* Make address XKPHYS */
+ li t0, 1
+ dins t9, t0, 63, 1
+
+ ld t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9)
+ dli t1, OCTEON_BOOT_MOVEABLE_MAGIC1
+ bne t0, t1, invalid_boot_vector
+ nop
+
+ /* Load base address of boot vector table */
+ ld t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9)
+ /* Add offset for core */
+ daddu a1, t0, a0
+
+ mfc0 v0, CP0_STATUS
+ move v1, v0
+ ins v1, zero, 19, 1 /* Clear NMI bit */
+ mtc0 v1, CP0_STATUS
+
+ /* Get app start function address */
+ lw t9, 8(a1)
+ beqz t9, invalid_boot_vector
+ nop
+
+ j t9
+ lw k0, 12(a1) /* Load global data (deprecated) */
+
+invalid_boot_vector:
+ wait
+ b invalid_boot_vector
+ nop
+
+__all_cores_are_equal:
+ /* The following .word tell if 'all_cores_are_equal' or core0 is special
+ * By default (for the first execution) the core0 should be special,
+ * in order to behave like the old(existing not-modified) bootloader
+ * and run the bootloader on core 0 to follow the existing design.
+ * However after that we make 'all_cores_equal' which allows to run SE
+ * applications on core0 like on any other core. NOTE that value written
+ * to '__all_cores_are_equal' should not match any core ID.
+ */
+ .word 0
+
+core_0_cont1:
+ li t0, 0xffffffff
+ sw t0, __all_cores_are_equal
+ /* From here on, only core 0 runs, other cores have branched
+ * away.
+ */
+#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM
+ /* Set up initial stack and global data */
+ setup_stack_gd
+# ifdef CONFIG_DEBUG_UART
+ PTR_LA t9, debug_uart_init
+ jalr t9
+ nop
+# endif
+#endif
+ move a0, zero # a0 <-- boot_flags = 0
+ PTR_LA t9, board_init_f
+
+ jr t9
+ move ra, zero
+ END(_start)
+
+ .balign 8
+ .globl single_tlb_setup
+ .ent single_tlb_setup
+ /* Sets up a single TLB entry. Virtual/physical addresses
+ * must be properly aligned.
+ * a0 Virtual address
+ * a1 Physical address
+ * a2 page (_not_ mapping) size
+ */
+single_tlb_setup:
+ /* Determine the number of TLB entries available, and
+ * use the top one.
+ */
+ mfc0 a3, CP0_CONFIG1
+ dext a3, a3, 25, 6 /* a3 now has the max mmu entry index */
+ mfc0 a5, CP0_CONFIG3 /* Check if config4 reg present */
+ bbit0 a5, 31, single_tlb_setup_cont
+ nop
+ mfc0 a5, CP0_CONFIG4
+ bbit0 a5, 14, single_tlb_setup_cont /* check config4[MMUExtDef] */
+ nop
+ /* append config4[MMUSizeExt] to most significant bit of
+ * config1[MMUSize-1]
+ */
+ dins a3, a5, 6, 8
+ and a3, a3, 0x3fff /* a3 now includes max entries for cn6xxx */
+
+single_tlb_setup_cont:
+
+ /* Format physical address for entry low */
+ nop
+ dsrl a1, a1, 12
+ dsll a1, a1, 6
+ ori a1, a1, 0x7 /* set DVG bits */
+
+ move a4, a2
+ daddu a5, a4, a4 /* mapping size */
+ dsll a6, a4, 1
+ daddiu a6, a6, -1 /* pagemask */
+ dsrl a4, a4, 6 /* adjust for adding with entrylo */
+
+ /* Now set up mapping */
+ mtc0 a6, CP0_PAGEMASK
+ mtc0 a3, CP0_INDEX
+
+ dmtc0 a1, CP0_ENTRYLO0
+ daddu a1, a1, a4
+
+ dmtc0 a1, CP0_ENTRYLO1
+ daddu a1, a1, a4
+
+ dmtc0 a0, CP0_ENTRYHI
+ daddu a0, a0, a5
+
+ ehb
+ tlbwi
+ jr ra
+ nop
+ .end single_tlb_setup
+
+
+/**
+ * This code is moved to a movable boot bus region,
+ * and it is responsible for changing the flash mappings and
+ * jumping to run from the TLB mapped address.
+ *
+ * @param a0 New address for boot bus region 0
+ * @param a1 Address to branch to afterwards
+ * @param a2 Address of MIO_BOOT_REG_CFG0
+ */
+ .balign 8
+change_boot_mappings:
+ sd a0, 0(a2)
+ sync
+ j a1 /* Jump to new TLB mapped location */
+ synci 0(zero)
+
+/* If we need a large stack, allocate it here. */
+#if CONFIG_OCTEON_BIG_STACK_SIZE
+ /* Allocate the stack here so it's in L2 cache or DRAM */
+ .balign 16
+big_stack_end:
+ .skip CONFIG_OCTEON_BIG_STACK_SIZE, 0
+big_stack_start:
+ .dword 0
+#endif