From: Chao-ying Fu <[email protected]>

Add initial platform support for the P8700-F, a high-performance
multi-core RV64GC SoC with optional multi-cluster configuration and
hardware multithreading.

This patch introduces the initial platform code necessary to support
the P8700 CPU in U-Boot.

Signed-off-by: Chao-ying Fu <[email protected]>
Signed-off-by: Uros Stajic <[email protected]>
---
 arch/riscv/Kconfig                        |   1 +
 arch/riscv/cpu/p8700/Kconfig              |  14 +++
 arch/riscv/cpu/p8700/Makefile             |   7 ++
 arch/riscv/cpu/p8700/cache.c              |  93 ++++++++++++++++++
 arch/riscv/cpu/p8700/cpu.c                | 111 ++++++++++++++++++++++
 arch/riscv/cpu/p8700/dram.c               |  37 ++++++++
 arch/riscv/include/asm/arch-p8700/p8700.h | 110 +++++++++++++++++++++
 7 files changed, 373 insertions(+)
 create mode 100644 arch/riscv/cpu/p8700/Kconfig
 create mode 100644 arch/riscv/cpu/p8700/Makefile
 create mode 100644 arch/riscv/cpu/p8700/cache.c
 create mode 100644 arch/riscv/cpu/p8700/cpu.c
 create mode 100644 arch/riscv/cpu/p8700/dram.c
 create mode 100644 arch/riscv/include/asm/arch-p8700/p8700.h

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 265b5320777..b4896544868 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -131,6 +131,7 @@ source "arch/riscv/cpu/jh7110/Kconfig"
 source "arch/riscv/cpu/k1/Kconfig"
 source "arch/riscv/cpu/k230/Kconfig"
 source "arch/riscv/cpu/th1520/Kconfig"
+source "arch/riscv/cpu/p8700/Kconfig"
 
 # architecture-specific options below
 
diff --git a/arch/riscv/cpu/p8700/Kconfig b/arch/riscv/cpu/p8700/Kconfig
new file mode 100644
index 00000000000..0913a6ce8f2
--- /dev/null
+++ b/arch/riscv/cpu/p8700/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright (C) 2021, Chao-ying Fu <[email protected]>
+
+config P8700_RISCV
+       bool
+       select ARCH_EARLY_INIT_R
+       imply CPU
+       imply CPU_RISCV
+       imply RISCV_ACLINT if (RISCV_MMODE || SPL_RISCV_MMODE)
+       imply CMD_CPU
+       imply SPL_CPU_SUPPORT
+       imply SPL_OPENSBI
+       imply SPL_LOAD_FIT
diff --git a/arch/riscv/cpu/p8700/Makefile b/arch/riscv/cpu/p8700/Makefile
new file mode 100644
index 00000000000..32c5e2dce21
--- /dev/null
+++ b/arch/riscv/cpu/p8700/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright (C) 2021, Chao-ying Fu <[email protected]>
+
+obj-y += cache.o
+obj-y += cpu.o
+obj-y += dram.o
diff --git a/arch/riscv/cpu/p8700/cache.c b/arch/riscv/cpu/p8700/cache.c
new file mode 100644
index 00000000000..7559c688321
--- /dev/null
+++ b/arch/riscv/cpu/p8700/cache.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2021, Chao-ying Fu <[email protected]>
+ */
+
+#include <cpu_func.h>
+#include <asm/global_data.h>
+#include <asm/io.h>
+#include <asm/arch-p8700/p8700.h>
+
+#define MCACHE_BASE_INST 0xec0500f3
+
+/* NOTE: We force to use a0 in mcache to encode via .word.
+ * 0xec0500f3 is a manually encoded custom RISC-V MCACHE instruction.
+ * The bits [19:15] are set to 01010, selecting register x10 (a0)
+ * as the source operand.
+ * The bits [24:20] represent the 'op' field, which is currently set to 0.
+ * Different cache operations are applied by OR-ing (op << 20) dynamically
+ * to this base value.
+ * Because of this encoding, the variable 'addr' is forced into register a0,
+ * so that the MCACHE instruction uses the address in a0 as its operand.
+ */
+#define cache_loop(start, end, lsize, op) do {                         \
+       const __typeof__(lsize) __lsize = (lsize);                      \
+       const register void *addr asm("a0") = (const void *)((start) & 
~(__lsize - 1)); \
+       const void *aend = (const void *)(((end) - 1) & ~(__lsize - 1));        
\
+       for (; addr <= aend; addr += __lsize)                           \
+               asm volatile (".word %0 | %1 # force to use %2" \
+                                       ::"i"(MCACHE_BASE_INST), "i"((op) << 
20), "r"(addr)); \
+} while (0)
+
+static unsigned long lsize;
+static unsigned long l1d_total_size;
+static unsigned long slsize;
+
+static void probe_cache_config(void)
+{
+       lsize = 64;
+       l1d_total_size = 64 * 1024;
+
+       int l2_config = 0;
+
+       l2_config = readl((void __iomem *)GCR_L2_CONFIG);
+       int l2_line_size_info = (l2_config >> L2_LINE_SIZE_SHIFT)
+                               & L2_LINE_SIZE_MASK;
+       slsize = (l2_line_size_info == 0) ? 0 : 1 << (l2_line_size_info + 1);
+}
+
+void flush_dcache_range(unsigned long start, unsigned long end)
+{
+       if (lsize == 0)
+               probe_cache_config();
+
+       /* aend will be miscalculated when size is zero, so we return here */
+       if (start >= end)
+               return;
+
+       cache_loop(start, end, lsize, HIT_WRITEBACK_INV_D);
+
+       /* flush L2 cache */
+       if (slsize)
+               cache_loop(start, end, slsize, HIT_WRITEBACK_INV_SD);
+
+       /* Instruction Hazard Barrier (IHB) — a hint-encoded SLLI (rd=0, rs1=0, 
imm=1).
+        * Ensures that all subsequent instruction fetches, including 
speculative ones,
+        * observe state changes from prior instructions.
+        * Required after MCACHE instructions when instruction fetch depends on 
cache ops.
+        */
+       asm volatile ("slli x0,x0,1 # ihb");
+}
+
+void invalidate_dcache_range(unsigned long start, unsigned long end)
+{
+       if (lsize == 0)
+               probe_cache_config();
+
+       /* aend will be miscalculated when size is zero, so we return here */
+       if (start >= end)
+               return;
+
+       /* invalidate L2 cache */
+       if (slsize)
+               cache_loop(start, end, slsize, HIT_INVALIDATE_SD);
+
+       cache_loop(start, end, lsize, HIT_INVALIDATE_D);
+
+       /* Instruction Hazard Barrier (IHB) — a hint-encoded SLLI (rd=0, rs1=0, 
imm=1).
+        * Ensures that all subsequent instruction fetches, including 
speculative ones,
+        * observe state changes from prior instructions.
+        * Required after MCACHE instructions when instruction fetch depends on 
cache ops.
+        */
+       asm volatile ("slli x0,x0,1 # ihb");
+}
diff --git a/arch/riscv/cpu/p8700/cpu.c b/arch/riscv/cpu/p8700/cpu.c
new file mode 100644
index 00000000000..d63f7073d75
--- /dev/null
+++ b/arch/riscv/cpu/p8700/cpu.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2021, Chao-ying Fu <[email protected]>
+ */
+
+#include <asm/encoding.h>
+#include <asm/io.h>
+#include <linux/types.h>
+#include <asm/arch-p8700/p8700.h>
+
+static __noreturn void jump_to_addr(ulong addr)
+{
+       asm volatile ("jr %0" :: "r"(addr) : "memory");
+       __builtin_unreachable();
+}
+
+void harts_early_init(void)
+{
+       if (!IS_ENABLED(CONFIG_RISCV_MMODE))
+               return;
+
+       ulong hartid = csr_read(CSR_MHARTID);
+
+       /* Wait for DDR3 calibration */
+       while (!(readl((void __iomem *)BOSTON_PLAT_DDR3STAT) &
+                BOSTON_PLAT_DDR3STAT_CALIB)) {
+               /* busy-wait */
+       }
+
+       /*
+        * Only mhartid[3:0] == 0 performs CM/GCR programming.
+        * Other harts skip CM/GCR setup and go straight to PMP/PMA setup.
+        */
+       if ((hartid & 0xFULL) == 0) {
+               ulong cm_base = CM_BASE;
+               void __iomem *gcr_win = (void __iomem *)0x1fb80000;
+               ulong cluster = (hartid >> MHARTID_CLUSTER_SHIFT) &
+                               MHARTID_CLUSTER_MASK;
+
+               cm_base += cluster << CM_BASE_CLUSTER_SHIFT;
+
+               if ((hartid & 0xFFFFUL) == 0)
+                       writeq(cm_base, gcr_win + GCR_BASE_OFFSET);
+
+               ulong core = (hartid >> MHARTID_CORE_SHIFT) & MHARTID_CORE_MASK;
+
+               /* Enable coherency for the current core */
+               cm_base += core << CM_BASE_CORE_SHIFT;
+               writeq((u64)GCR_CL_COH_EN_EN,
+                      (void __iomem *)(cm_base + P8700_GCR_C0_COH_EN));
+
+               /*
+                * On hart 0, default PCIe DMA mapping should be the non-IOCU
+                * target.
+                */
+               if (hartid == 0) {
+                       writel(0x00, (void __iomem *)BOSTON_PLAT_NOCPCIE0ADDR);
+                       writel(0x00, (void __iomem *)BOSTON_PLAT_NOCPCIE1ADDR);
+                       writel(0x00, (void __iomem *)BOSTON_PLAT_NOCPCIE2ADDR);
+               }
+       }
+
+       /* PMP setup */
+       csr_write(pmpaddr1, 0x2fffffffUL);
+       csr_write(pmpaddr2, 0x07ff7fffUL);
+       csr_write(pmpaddr3, 0x07f3ffffUL);
+       csr_write(pmpaddr4, 0x1fffffffffffffffUL);
+
+       unsigned long pmpcfg = ((unsigned long)(PMP_NAPOT | PMP_R | PMP_W |
+                                               PMP_X) << 32) |
+                               ((unsigned long)(PMP_NAPOT | PMP_R |
+                                               PMP_X) << 24) |
+                               ((unsigned long)(PMP_NAPOT | PMP_R | PMP_W |
+                                               PMP_X) << 16) |
+                               ((unsigned long)(PMP_NAPOT | PMP_R | PMP_W |
+                                               PMP_X) << 8);
+
+       csr_write(pmpcfg0, pmpcfg);
+
+       /* PMA/cache attributes */
+       ulong pmacfg0;
+
+       if (hartid == 0) {
+               /*
+                * Hart 0: cacheable for pmp0, pmp1, pmp3; uncacheable for
+                * pmp2, pmp4.
+                */
+               pmacfg0 = ((unsigned long)CCA_CACHE_DISABLE << 32) |
+                       ((unsigned long)CCA_CACHE_ENABLE  << 24) |
+                       ((unsigned long)CCA_CACHE_DISABLE << 16) |
+                       ((unsigned long)CCA_CACHE_ENABLE  << 8)  |
+                       ((unsigned long)CCA_CACHE_ENABLE);
+       } else {
+               /*
+                * Hart 1 or above: cacheable for pmp0, pmp1; uncacheable for
+                * pmp2, pmp3, pmp4.
+                */
+               pmacfg0 = ((unsigned long)CCA_CACHE_DISABLE << 32) |
+                       ((unsigned long)CCA_CACHE_DISABLE << 24) |
+                       ((unsigned long)CCA_CACHE_DISABLE << 16) |
+                       ((unsigned long)CCA_CACHE_ENABLE  << 8)  |
+                       ((unsigned long)CCA_CACHE_ENABLE);
+       }
+
+       asm volatile ("csrw %0, %1" :: "i"(CSR_PMACFG0), "r"(pmacfg0));
+       asm volatile ("fence" ::: "memory");
+
+       /* Secondary harts: after early setup, jump to the common entry point */
+       if (hartid != 0)
+               jump_to_addr(CONFIG_SYS_LOAD_ADDR);
+}
diff --git a/arch/riscv/cpu/p8700/dram.c b/arch/riscv/cpu/p8700/dram.c
new file mode 100644
index 00000000000..2b54326be39
--- /dev/null
+++ b/arch/riscv/cpu/p8700/dram.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018, Bin Meng <[email protected]>
+ */
+
+#include <fdtdec.h>
+#include <init.h>
+#include <linux/sizes.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+int dram_init(void)
+{
+       return fdtdec_setup_mem_size_base();
+}
+
+int dram_init_banksize(void)
+{
+       return fdtdec_setup_memory_banksize();
+}
+
+phys_size_t board_get_usable_ram_top(phys_size_t total_size)
+{
+       if (IS_ENABLED(CONFIG_64BIT)) {
+               /*
+                * Ensure that we run from first 4GB so that all
+                * addresses used by U-Boot are 32bit addresses.
+                *
+                * This in-turn ensures that 32bit DMA capable
+                * devices work fine because DMA mapping APIs will
+                * provide 32bit DMA addresses only.
+                */
+               if (gd->ram_top > SZ_4G)
+                       return SZ_4G;
+       }
+       return gd->ram_top;
+}
diff --git a/arch/riscv/include/asm/arch-p8700/p8700.h 
b/arch/riscv/include/asm/arch-p8700/p8700.h
new file mode 100644
index 00000000000..5ca9b4b9497
--- /dev/null
+++ b/arch/riscv/include/asm/arch-p8700/p8700.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Chao-ying Fu <[email protected]>
+ */
+
+#ifndef __P8700_H__
+#define __P8700_H__
+
+#define CSR_MIPSCONFIG7                0x7d7
+#define CSR_PMACFG0                    0x7e0
+
+#define MHARTID_HART_SHIFT     0
+#define MHARTID_HART_MASK      0xf
+#define MHARTID_CORE_SHIFT     4
+#define MHARTID_CORE_MASK      0xff
+#define MHARTID_CLUSTER_SHIFT  16
+#define MHARTID_CLUSTER_MASK   0xf
+
+#define MARCHID_UARCH_SHIFT    0
+#define MARCHID_UARCH_MASK     0xff
+#define MARCHID_CLASS_SHIFT    8
+#define MARCHID_CLASS_MASK     0xff
+#define MARCHID_CLASS_M                0
+#define MARCHID_CLASS_I                1
+#define MARCHID_CLASS_P                2
+
+#define CM_BASE_CORE_SHIFT     8
+#define CM_BASE_CLUSTER_SHIFT  19
+
+#define P8700_TIMER_ADDR       0x16108050
+
+#define CCA_CACHE_ENABLE       0
+#define CCA_BUFFER_CACHE       1
+#define CCA_CACHE_DISABLE      2
+#define CCA_UNCACHE_ACC                3
+#define PMA_SPECULATION                (0x1 << 3)
+
+#define L1_I_CACHE      0
+#define L1_D_CACHE      1
+#define L3_CACHE        2
+#define L2_CACHE        3
+
+#define HIT_INVALIDATE          4
+#define HIT_WRITEBACK_INV       5
+
+#define HIT_INVALIDATE_D        ((HIT_INVALIDATE << 2) | L1_D_CACHE)
+#define HIT_INVALIDATE_SD       ((HIT_INVALIDATE << 2) | L2_CACHE)
+#define HIT_WRITEBACK_INV_D     ((HIT_WRITEBACK_INV << 2) | L1_D_CACHE)
+#define HIT_WRITEBACK_INV_SD    ((HIT_WRITEBACK_INV << 2) | L2_CACHE)
+
+#define L1D_LINE_SIZE_SHIFT    10
+#define L1D_LINE_SIZE_MASK     0x7
+
+#define GCR_L2_CONFIG  0x16100130
+#define L2_LINE_SIZE_SHIFT     8
+#define L2_LINE_SIZE_MASK      0xf
+
+#define PMP_R                  0x01
+#define PMP_W                  0x02
+#define PMP_X                  0x04
+#define PMP_TOR                        0x8
+#define PMP_NA4                        0x10
+#define PMP_NAPOT              0x18
+
+#define CM_BASE                        0x16100000
+#define CPC_BASE               (CM_BASE + 0x8000)
+
+/* CPC Block offsets */
+#define CPC_OFF_LOCAL          0x2000
+
+#define CPC_PWRUP_CTL          0x0030
+
+#define CPC_SYS_CONFIG         0x0140
+
+#define CPC_Cx_CMD             0x0000
+#define CPC_Cx_CMD_RESET       0x4
+
+#define P8700_GCR_C0_COH_EN    0x20f8
+#define P8700_GCR_C1_COH_EN    0x21f8
+#define P8700_GCR_C2_COH_EN    0x22f8
+#define P8700_GCR_C3_COH_EN    0x23f8
+#define P8700_GCR_C4_COH_EN    0x24f8
+#define P8700_GCR_C5_COH_EN    0x25f8
+
+#define GCR_CL_COH_EN          0x2008
+#define GCR_CL_COH_EN_EN       (0x1 << 0)
+#define GCR_BASE_OFFSET                0x0008
+#define GIC_BASE_OFFSET                0x0080
+#define CPC_BASE_OFFSET                0x0088
+#define ENABLE                 0x1
+#define COUNT_STOP             (0x1 << 28)
+#define GIC_LOCAL_SECTION_OFS  0x8000
+#define GIC_VL_MASK            0x08
+#define GIC_VL_RMASK           0x0c
+#define GIC_VL_SMASK           0x10
+#define GIC_VL_COMPARE_MAP     0x44
+
+#define INDEXED(op, reg, idx, offset, base) \
+       li      idx, offset     ;\
+       add     idx, idx, base  ;\
+       op      reg, (idx)
+
+#define BOSTON_PLAT_BASE       0x17ffd000
+#define BOSTON_PLAT_DDR3STAT   (BOSTON_PLAT_BASE + 0x14)
+#define BOSTON_PLAT_DDR3STAT_CALIB     (0x1 << 2)
+#define BOSTON_PLAT_NOCPCIE0ADDR        (BOSTON_PLAT_BASE + 0x3c)
+#define BOSTON_PLAT_NOCPCIE1ADDR        (BOSTON_PLAT_BASE + 0x40)
+#define BOSTON_PLAT_NOCPCIE2ADDR        (BOSTON_PLAT_BASE + 0x44)
+
+#endif /* __P8700_H__ */
-- 
2.34.1

Reply via email to