From: Chao-ying Fu <[email protected]> Add initial platform support for the P8700-F, a high-performance multi-core RV64GC SoC with optional multi-cluster configuration and hardware multithreading.
This patch introduces the initial platform code necessary to support the P8700 CPU in U-Boot. Signed-off-by: Chao-ying Fu <[email protected]> Signed-off-by: Uros Stajic <[email protected]> --- arch/riscv/Kconfig | 1 + arch/riscv/cpu/p8700/Kconfig | 14 +++ arch/riscv/cpu/p8700/Makefile | 7 ++ arch/riscv/cpu/p8700/cache.c | 93 ++++++++++++++++++ arch/riscv/cpu/p8700/cpu.c | 111 ++++++++++++++++++++++ arch/riscv/cpu/p8700/dram.c | 37 ++++++++ arch/riscv/include/asm/arch-p8700/p8700.h | 110 +++++++++++++++++++++ 7 files changed, 373 insertions(+) create mode 100644 arch/riscv/cpu/p8700/Kconfig create mode 100644 arch/riscv/cpu/p8700/Makefile create mode 100644 arch/riscv/cpu/p8700/cache.c create mode 100644 arch/riscv/cpu/p8700/cpu.c create mode 100644 arch/riscv/cpu/p8700/dram.c create mode 100644 arch/riscv/include/asm/arch-p8700/p8700.h diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 265b5320777..b4896544868 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -131,6 +131,7 @@ source "arch/riscv/cpu/jh7110/Kconfig" source "arch/riscv/cpu/k1/Kconfig" source "arch/riscv/cpu/k230/Kconfig" source "arch/riscv/cpu/th1520/Kconfig" +source "arch/riscv/cpu/p8700/Kconfig" # architecture-specific options below diff --git a/arch/riscv/cpu/p8700/Kconfig b/arch/riscv/cpu/p8700/Kconfig new file mode 100644 index 00000000000..0913a6ce8f2 --- /dev/null +++ b/arch/riscv/cpu/p8700/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0+ +# +# Copyright (C) 2021, Chao-ying Fu <[email protected]> + +config P8700_RISCV + bool + select ARCH_EARLY_INIT_R + imply CPU + imply CPU_RISCV + imply RISCV_ACLINT if (RISCV_MMODE || SPL_RISCV_MMODE) + imply CMD_CPU + imply SPL_CPU_SUPPORT + imply SPL_OPENSBI + imply SPL_LOAD_FIT diff --git a/arch/riscv/cpu/p8700/Makefile b/arch/riscv/cpu/p8700/Makefile new file mode 100644 index 00000000000..32c5e2dce21 --- /dev/null +++ b/arch/riscv/cpu/p8700/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0+ +# +# Copyright (C) 2021, Chao-ying Fu <[email protected]> + +obj-y += cache.o +obj-y += cpu.o +obj-y += dram.o diff --git a/arch/riscv/cpu/p8700/cache.c b/arch/riscv/cpu/p8700/cache.c new file mode 100644 index 00000000000..7559c688321 --- /dev/null +++ b/arch/riscv/cpu/p8700/cache.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2021, Chao-ying Fu <[email protected]> + */ + +#include <cpu_func.h> +#include <asm/global_data.h> +#include <asm/io.h> +#include <asm/arch-p8700/p8700.h> + +#define MCACHE_BASE_INST 0xec0500f3 + +/* NOTE: We force to use a0 in mcache to encode via .word. + * 0xec0500f3 is a manually encoded custom RISC-V MCACHE instruction. + * The bits [19:15] are set to 01010, selecting register x10 (a0) + * as the source operand. + * The bits [24:20] represent the 'op' field, which is currently set to 0. + * Different cache operations are applied by OR-ing (op << 20) dynamically + * to this base value. + * Because of this encoding, the variable 'addr' is forced into register a0, + * so that the MCACHE instruction uses the address in a0 as its operand. + */ +#define cache_loop(start, end, lsize, op) do { \ + const __typeof__(lsize) __lsize = (lsize); \ + const register void *addr asm("a0") = (const void *)((start) & ~(__lsize - 1)); \ + const void *aend = (const void *)(((end) - 1) & ~(__lsize - 1)); \ + for (; addr <= aend; addr += __lsize) \ + asm volatile (".word %0 | %1 # force to use %2" \ + ::"i"(MCACHE_BASE_INST), "i"((op) << 20), "r"(addr)); \ +} while (0) + +static unsigned long lsize; +static unsigned long l1d_total_size; +static unsigned long slsize; + +static void probe_cache_config(void) +{ + lsize = 64; + l1d_total_size = 64 * 1024; + + int l2_config = 0; + + l2_config = readl((void __iomem *)GCR_L2_CONFIG); + int l2_line_size_info = (l2_config >> L2_LINE_SIZE_SHIFT) + & L2_LINE_SIZE_MASK; + slsize = (l2_line_size_info == 0) ? 0 : 1 << (l2_line_size_info + 1); +} + +void flush_dcache_range(unsigned long start, unsigned long end) +{ + if (lsize == 0) + probe_cache_config(); + + /* aend will be miscalculated when size is zero, so we return here */ + if (start >= end) + return; + + cache_loop(start, end, lsize, HIT_WRITEBACK_INV_D); + + /* flush L2 cache */ + if (slsize) + cache_loop(start, end, slsize, HIT_WRITEBACK_INV_SD); + + /* Instruction Hazard Barrier (IHB) — a hint-encoded SLLI (rd=0, rs1=0, imm=1). + * Ensures that all subsequent instruction fetches, including speculative ones, + * observe state changes from prior instructions. + * Required after MCACHE instructions when instruction fetch depends on cache ops. + */ + asm volatile ("slli x0,x0,1 # ihb"); +} + +void invalidate_dcache_range(unsigned long start, unsigned long end) +{ + if (lsize == 0) + probe_cache_config(); + + /* aend will be miscalculated when size is zero, so we return here */ + if (start >= end) + return; + + /* invalidate L2 cache */ + if (slsize) + cache_loop(start, end, slsize, HIT_INVALIDATE_SD); + + cache_loop(start, end, lsize, HIT_INVALIDATE_D); + + /* Instruction Hazard Barrier (IHB) — a hint-encoded SLLI (rd=0, rs1=0, imm=1). + * Ensures that all subsequent instruction fetches, including speculative ones, + * observe state changes from prior instructions. + * Required after MCACHE instructions when instruction fetch depends on cache ops. + */ + asm volatile ("slli x0,x0,1 # ihb"); +} diff --git a/arch/riscv/cpu/p8700/cpu.c b/arch/riscv/cpu/p8700/cpu.c new file mode 100644 index 00000000000..d63f7073d75 --- /dev/null +++ b/arch/riscv/cpu/p8700/cpu.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2021, Chao-ying Fu <[email protected]> + */ + +#include <asm/encoding.h> +#include <asm/io.h> +#include <linux/types.h> +#include <asm/arch-p8700/p8700.h> + +static __noreturn void jump_to_addr(ulong addr) +{ + asm volatile ("jr %0" :: "r"(addr) : "memory"); + __builtin_unreachable(); +} + +void harts_early_init(void) +{ + if (!IS_ENABLED(CONFIG_RISCV_MMODE)) + return; + + ulong hartid = csr_read(CSR_MHARTID); + + /* Wait for DDR3 calibration */ + while (!(readl((void __iomem *)BOSTON_PLAT_DDR3STAT) & + BOSTON_PLAT_DDR3STAT_CALIB)) { + /* busy-wait */ + } + + /* + * Only mhartid[3:0] == 0 performs CM/GCR programming. + * Other harts skip CM/GCR setup and go straight to PMP/PMA setup. + */ + if ((hartid & 0xFULL) == 0) { + ulong cm_base = CM_BASE; + void __iomem *gcr_win = (void __iomem *)0x1fb80000; + ulong cluster = (hartid >> MHARTID_CLUSTER_SHIFT) & + MHARTID_CLUSTER_MASK; + + cm_base += cluster << CM_BASE_CLUSTER_SHIFT; + + if ((hartid & 0xFFFFUL) == 0) + writeq(cm_base, gcr_win + GCR_BASE_OFFSET); + + ulong core = (hartid >> MHARTID_CORE_SHIFT) & MHARTID_CORE_MASK; + + /* Enable coherency for the current core */ + cm_base += core << CM_BASE_CORE_SHIFT; + writeq((u64)GCR_CL_COH_EN_EN, + (void __iomem *)(cm_base + P8700_GCR_C0_COH_EN)); + + /* + * On hart 0, default PCIe DMA mapping should be the non-IOCU + * target. + */ + if (hartid == 0) { + writel(0x00, (void __iomem *)BOSTON_PLAT_NOCPCIE0ADDR); + writel(0x00, (void __iomem *)BOSTON_PLAT_NOCPCIE1ADDR); + writel(0x00, (void __iomem *)BOSTON_PLAT_NOCPCIE2ADDR); + } + } + + /* PMP setup */ + csr_write(pmpaddr1, 0x2fffffffUL); + csr_write(pmpaddr2, 0x07ff7fffUL); + csr_write(pmpaddr3, 0x07f3ffffUL); + csr_write(pmpaddr4, 0x1fffffffffffffffUL); + + unsigned long pmpcfg = ((unsigned long)(PMP_NAPOT | PMP_R | PMP_W | + PMP_X) << 32) | + ((unsigned long)(PMP_NAPOT | PMP_R | + PMP_X) << 24) | + ((unsigned long)(PMP_NAPOT | PMP_R | PMP_W | + PMP_X) << 16) | + ((unsigned long)(PMP_NAPOT | PMP_R | PMP_W | + PMP_X) << 8); + + csr_write(pmpcfg0, pmpcfg); + + /* PMA/cache attributes */ + ulong pmacfg0; + + if (hartid == 0) { + /* + * Hart 0: cacheable for pmp0, pmp1, pmp3; uncacheable for + * pmp2, pmp4. + */ + pmacfg0 = ((unsigned long)CCA_CACHE_DISABLE << 32) | + ((unsigned long)CCA_CACHE_ENABLE << 24) | + ((unsigned long)CCA_CACHE_DISABLE << 16) | + ((unsigned long)CCA_CACHE_ENABLE << 8) | + ((unsigned long)CCA_CACHE_ENABLE); + } else { + /* + * Hart 1 or above: cacheable for pmp0, pmp1; uncacheable for + * pmp2, pmp3, pmp4. + */ + pmacfg0 = ((unsigned long)CCA_CACHE_DISABLE << 32) | + ((unsigned long)CCA_CACHE_DISABLE << 24) | + ((unsigned long)CCA_CACHE_DISABLE << 16) | + ((unsigned long)CCA_CACHE_ENABLE << 8) | + ((unsigned long)CCA_CACHE_ENABLE); + } + + asm volatile ("csrw %0, %1" :: "i"(CSR_PMACFG0), "r"(pmacfg0)); + asm volatile ("fence" ::: "memory"); + + /* Secondary harts: after early setup, jump to the common entry point */ + if (hartid != 0) + jump_to_addr(CONFIG_SYS_LOAD_ADDR); +} diff --git a/arch/riscv/cpu/p8700/dram.c b/arch/riscv/cpu/p8700/dram.c new file mode 100644 index 00000000000..2b54326be39 --- /dev/null +++ b/arch/riscv/cpu/p8700/dram.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018, Bin Meng <[email protected]> + */ + +#include <fdtdec.h> +#include <init.h> +#include <linux/sizes.h> + +DECLARE_GLOBAL_DATA_PTR; + +int dram_init(void) +{ + return fdtdec_setup_mem_size_base(); +} + +int dram_init_banksize(void) +{ + return fdtdec_setup_memory_banksize(); +} + +phys_size_t board_get_usable_ram_top(phys_size_t total_size) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + /* + * Ensure that we run from first 4GB so that all + * addresses used by U-Boot are 32bit addresses. + * + * This in-turn ensures that 32bit DMA capable + * devices work fine because DMA mapping APIs will + * provide 32bit DMA addresses only. + */ + if (gd->ram_top > SZ_4G) + return SZ_4G; + } + return gd->ram_top; +} diff --git a/arch/riscv/include/asm/arch-p8700/p8700.h b/arch/riscv/include/asm/arch-p8700/p8700.h new file mode 100644 index 00000000000..5ca9b4b9497 --- /dev/null +++ b/arch/riscv/include/asm/arch-p8700/p8700.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021, Chao-ying Fu <[email protected]> + */ + +#ifndef __P8700_H__ +#define __P8700_H__ + +#define CSR_MIPSCONFIG7 0x7d7 +#define CSR_PMACFG0 0x7e0 + +#define MHARTID_HART_SHIFT 0 +#define MHARTID_HART_MASK 0xf +#define MHARTID_CORE_SHIFT 4 +#define MHARTID_CORE_MASK 0xff +#define MHARTID_CLUSTER_SHIFT 16 +#define MHARTID_CLUSTER_MASK 0xf + +#define MARCHID_UARCH_SHIFT 0 +#define MARCHID_UARCH_MASK 0xff +#define MARCHID_CLASS_SHIFT 8 +#define MARCHID_CLASS_MASK 0xff +#define MARCHID_CLASS_M 0 +#define MARCHID_CLASS_I 1 +#define MARCHID_CLASS_P 2 + +#define CM_BASE_CORE_SHIFT 8 +#define CM_BASE_CLUSTER_SHIFT 19 + +#define P8700_TIMER_ADDR 0x16108050 + +#define CCA_CACHE_ENABLE 0 +#define CCA_BUFFER_CACHE 1 +#define CCA_CACHE_DISABLE 2 +#define CCA_UNCACHE_ACC 3 +#define PMA_SPECULATION (0x1 << 3) + +#define L1_I_CACHE 0 +#define L1_D_CACHE 1 +#define L3_CACHE 2 +#define L2_CACHE 3 + +#define HIT_INVALIDATE 4 +#define HIT_WRITEBACK_INV 5 + +#define HIT_INVALIDATE_D ((HIT_INVALIDATE << 2) | L1_D_CACHE) +#define HIT_INVALIDATE_SD ((HIT_INVALIDATE << 2) | L2_CACHE) +#define HIT_WRITEBACK_INV_D ((HIT_WRITEBACK_INV << 2) | L1_D_CACHE) +#define HIT_WRITEBACK_INV_SD ((HIT_WRITEBACK_INV << 2) | L2_CACHE) + +#define L1D_LINE_SIZE_SHIFT 10 +#define L1D_LINE_SIZE_MASK 0x7 + +#define GCR_L2_CONFIG 0x16100130 +#define L2_LINE_SIZE_SHIFT 8 +#define L2_LINE_SIZE_MASK 0xf + +#define PMP_R 0x01 +#define PMP_W 0x02 +#define PMP_X 0x04 +#define PMP_TOR 0x8 +#define PMP_NA4 0x10 +#define PMP_NAPOT 0x18 + +#define CM_BASE 0x16100000 +#define CPC_BASE (CM_BASE + 0x8000) + +/* CPC Block offsets */ +#define CPC_OFF_LOCAL 0x2000 + +#define CPC_PWRUP_CTL 0x0030 + +#define CPC_SYS_CONFIG 0x0140 + +#define CPC_Cx_CMD 0x0000 +#define CPC_Cx_CMD_RESET 0x4 + +#define P8700_GCR_C0_COH_EN 0x20f8 +#define P8700_GCR_C1_COH_EN 0x21f8 +#define P8700_GCR_C2_COH_EN 0x22f8 +#define P8700_GCR_C3_COH_EN 0x23f8 +#define P8700_GCR_C4_COH_EN 0x24f8 +#define P8700_GCR_C5_COH_EN 0x25f8 + +#define GCR_CL_COH_EN 0x2008 +#define GCR_CL_COH_EN_EN (0x1 << 0) +#define GCR_BASE_OFFSET 0x0008 +#define GIC_BASE_OFFSET 0x0080 +#define CPC_BASE_OFFSET 0x0088 +#define ENABLE 0x1 +#define COUNT_STOP (0x1 << 28) +#define GIC_LOCAL_SECTION_OFS 0x8000 +#define GIC_VL_MASK 0x08 +#define GIC_VL_RMASK 0x0c +#define GIC_VL_SMASK 0x10 +#define GIC_VL_COMPARE_MAP 0x44 + +#define INDEXED(op, reg, idx, offset, base) \ + li idx, offset ;\ + add idx, idx, base ;\ + op reg, (idx) + +#define BOSTON_PLAT_BASE 0x17ffd000 +#define BOSTON_PLAT_DDR3STAT (BOSTON_PLAT_BASE + 0x14) +#define BOSTON_PLAT_DDR3STAT_CALIB (0x1 << 2) +#define BOSTON_PLAT_NOCPCIE0ADDR (BOSTON_PLAT_BASE + 0x3c) +#define BOSTON_PLAT_NOCPCIE1ADDR (BOSTON_PLAT_BASE + 0x40) +#define BOSTON_PLAT_NOCPCIE2ADDR (BOSTON_PLAT_BASE + 0x44) + +#endif /* __P8700_H__ */ -- 2.34.1

