Hi Marc, Thanks a lot for your comments!
> -----Original Message----- > From: Marc Zyngier <m...@kernel.org> > Sent: 2022年2月26日 19:10 > To: Z.Q. Hou <zhiqiang....@nxp.com> > Cc: u-boot@lists.denx.de; s...@chromium.org; > bharat.go...@broadcom.com; Priyanka Jain <priyanka.j...@nxp.com>; > mich...@walle.cc > Subject: Re: [PATCH] arch: arm: recode the initialization of GICv3 ITS > Re-Distributor tables > > On Fri, 25 Feb 2022 13:41:06 +0000, > Zhiqiang Hou <zhiqiang....@nxp.com> wrote: > > > > From: Hou Zhiqiang <zhiqiang....@nxp.com> > > > > The current implementation needs the caller provides the memory region > > for the property and pending tables and the number of re-distibutor, > > and it doesn't handle the address alignment of the tables and doesn't > > help to add the reserved-memory node for the tables. > > > > This patch change to use the device tree blob as argument and deal > > with the aboves in the internal of this helper to make it easier to use. > > > > Signed-off-by: Hou Zhiqiang <zhiqiang....@nxp.com> > > --- > > arch/arm/Kconfig | 1 - > > arch/arm/cpu/armv8/fsl-layerscape/fdt.c | 4 +- > > arch/arm/cpu/armv8/fsl-layerscape/soc.c | 46 +------- > > arch/arm/include/asm/gic-v3.h | 4 +- > > arch/arm/lib/gic-v3-its.c | 142 ++++++++++-------------- > > 5 files changed, 62 insertions(+), 135 deletions(-) > > > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index > > 391a77c2b4..0f6a32b428 100644 > > --- a/arch/arm/Kconfig > > +++ b/arch/arm/Kconfig > > @@ -82,7 +82,6 @@ config GICV3 > > > > config GIC_V3_ITS > > bool "ARM GICV3 ITS" > > - select IRQ > > help > > ARM GICV3 Interrupt translation service (ITS). > > Basic support for programming locality specific peripheral diff > > --git a/arch/arm/cpu/armv8/fsl-layerscape/fdt.c > > b/arch/arm/cpu/armv8/fsl-layerscape/fdt.c > > index 2fa7ebf163..10cb675fae 100644 > > --- a/arch/arm/cpu/armv8/fsl-layerscape/fdt.c > > +++ b/arch/arm/cpu/armv8/fsl-layerscape/fdt.c > > @@ -1,7 +1,7 @@ > > // SPDX-License-Identifier: GPL-2.0+ > > /* > > * Copyright 2014-2015 Freescale Semiconductor, Inc. > > - * Copyright 2020-2021 NXP > > + * Copyright 2020-2022 NXP > > Really? Isn't that what the git log is for? This is required by NXP policy. > > > */ > > > > #include <common.h> > > @@ -653,7 +653,7 @@ void ft_cpu_setup(void *blob, struct bd_info *bd) > > get_board_sys_clk(), 1); > > > > #ifdef CONFIG_GIC_V3_ITS > > - ls_gic_rd_tables_init(blob); > > + gic_lpi_tables_init(blob); > > #endif > > gic_lpi_tables_init() already has a definition when CONFIG_GIC_V3_ITS isn't > selected. Why the #ifdef-ery? I'll remove it in next version. > > > > > #if defined(CONFIG_PCIE_LAYERSCAPE) || > > defined(CONFIG_PCIE_LAYERSCAPE_GEN4) > > diff --git a/arch/arm/cpu/armv8/fsl-layerscape/soc.c > > b/arch/arm/cpu/armv8/fsl-layerscape/soc.c > > index d3a5cfaac1..51ed942f57 100644 > > --- a/arch/arm/cpu/armv8/fsl-layerscape/soc.c > > +++ b/arch/arm/cpu/armv8/fsl-layerscape/soc.c > > @@ -1,17 +1,15 @@ > > // SPDX-License-Identifier: GPL-2.0+ > > /* > > * Copyright 2014-2015 Freescale Semiconductor > > - * Copyright 2019-2021 NXP > > + * Copyright 2019-2022 NXP > > */ > > > > #include <common.h> > > #include <clock_legacy.h> > > -#include <cpu_func.h> > > #include <env.h> > > #include <fsl_immap.h> > > #include <fsl_ifc.h> > > #include <init.h> > > -#include <linux/sizes.h> > > #include <log.h> > > #include <asm/arch/fsl_serdes.h> > > #include <asm/arch/soc.h> > > @@ -21,7 +19,6 @@ > > #include <asm/arch-fsl-layerscape/config.h> > > #include <asm/arch-fsl-layerscape/ns_access.h> > > #include <asm/arch-fsl-layerscape/fsl_icid.h> > > -#include <asm/gic-v3.h> > > #ifdef CONFIG_LAYERSCAPE_NS_ACCESS > > #include <fsl_csu.h> > > #endif > > @@ -36,47 +33,6 @@ > > #include <dm.h> > > #include <dm/device_compat.h> > > #include <linux/err.h> > > -#ifdef CONFIG_GIC_V3_ITS > > -DECLARE_GLOBAL_DATA_PTR; > > -#endif > > - > > -#ifdef CONFIG_GIC_V3_ITS > > -#define PENDTABLE_MAX_SZ ALIGN(BIT(ITS_MAX_LPI_NRBITS), SZ_64K) > > -#define PROPTABLE_MAX_SZ ALIGN(BIT(ITS_MAX_LPI_NRBITS) / 8, > SZ_64K) > > -#define GIC_LPI_SIZE ALIGN(cpu_numcores() * > PENDTABLE_MAX_SZ + \ > > - PROPTABLE_MAX_SZ, SZ_1M) > > -static int fdt_add_resv_mem_gic_rd_tables(void *blob, u64 base, > > size_t size) -{ > > - int err; > > - struct fdt_memory gic_rd_tables; > > - > > - gic_rd_tables.start = base; > > - gic_rd_tables.end = base + size - 1; > > - err = fdtdec_add_reserved_memory(blob, "gic-rd-tables", > &gic_rd_tables, > > - NULL, 0, NULL, 0); > > - if (err < 0) > > - debug("%s: failed to add reserved memory: %d\n", __func__, err); > > - > > - return err; > > -} > > - > > -int ls_gic_rd_tables_init(void *blob) -{ > > - u64 gic_lpi_base; > > - int ret; > > - > > - gic_lpi_base = ALIGN(gd->arch.resv_ram - GIC_LPI_SIZE, SZ_64K); > > - ret = fdt_add_resv_mem_gic_rd_tables(blob, gic_lpi_base, > GIC_LPI_SIZE); > > - if (ret) > > - return ret; > > - > > - ret = gic_lpi_tables_init(gic_lpi_base, cpu_numcores()); > > - if (ret) > > - debug("%s: failed to init gic-lpi-tables\n", __func__); > > - > > - return ret; > > -} > > -#endif > > > > bool soc_has_dp_ddr(void) > > { > > diff --git a/arch/arm/include/asm/gic-v3.h > > b/arch/arm/include/asm/gic-v3.h index 5131fabec4..e2e175f065 100644 > > --- a/arch/arm/include/asm/gic-v3.h > > +++ b/arch/arm/include/asm/gic-v3.h > > @@ -127,9 +127,9 @@ > > #define GIC_REDISTRIBUTOR_OFFSET 0x20000 > > > > #ifdef CONFIG_GIC_V3_ITS > > -int gic_lpi_tables_init(u64 base, u32 max_redist); > > +int gic_lpi_tables_init(void *blob); > > #else > > -int gic_lpi_tables_init(u64 base, u32 max_redist) > > +int gic_lpi_tables_init(void *blob); > > { > > return 0; > > } > > You obviously haven't compiled this. And this should be made inline. > Thanks for the catch! Will remove the wrongly added ';' and change it to inline. > > diff --git a/arch/arm/lib/gic-v3-its.c b/arch/arm/lib/gic-v3-its.c > > index f6211a2d92..3ef1e74954 100644 > > --- a/arch/arm/lib/gic-v3-its.c > > +++ b/arch/arm/lib/gic-v3-its.c > > @@ -1,92 +1,65 @@ > > // SPDX-License-Identifier: GPL-2.0+ > > /* > > * Copyright 2019 Broadcom. > > + * Copyright 2022 NXP > > You got to love all these copyright attribution for nothing... > > > */ > > + > > #include <common.h> > > #include <cpu_func.h> > > -#include <dm.h> > > +#include <fdtdec.h> > > #include <asm/gic.h> > > #include <asm/gic-v3.h> > > #include <asm/io.h> > > #include <linux/bitops.h> > > #include <linux/sizes.h> > > +#include <memalign.h> > > > > static u32 lpi_id_bits; > > > > #define LPI_NRBITS lpi_id_bits > > -#define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K) > > +#define LPI_PROPBASE_SZ BIT(LPI_NRBITS) > > #define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K) > > > > -/* > > - * gic_v3_its_priv - gic details > > - * > > - * @gicd_base: gicd base address > > - * @gicr_base: gicr base address > > - */ > > -struct gic_v3_its_priv { > > - ulong gicd_base; > > - ulong gicr_base; > > -}; > > - > > -static int gic_v3_its_get_gic_addr(struct gic_v3_its_priv *priv) -{ > > - struct udevice *dev; > > - fdt_addr_t addr; > > - int ret; > > - > > - ret = uclass_get_device_by_driver(UCLASS_IRQ, > > - DM_DRIVER_GET(arm_gic_v3_its), &dev); > > - if (ret) { > > - pr_err("%s: failed to get %s irq device\n", __func__, > > - DM_DRIVER_GET(arm_gic_v3_its)->name); > > - return ret; > > - } > > - > > - addr = dev_read_addr_index(dev, 0); > > - if (addr == FDT_ADDR_T_NONE) { > > - pr_err("%s: failed to get GICD address\n", __func__); > > - return -EINVAL; > > - } > > - priv->gicd_base = addr; > > - > > - addr = dev_read_addr_index(dev, 1); > > - if (addr == FDT_ADDR_T_NONE) { > > - pr_err("%s: failed to get GICR address\n", __func__); > > - return -EINVAL; > > - } > > - priv->gicr_base = addr; > > - > > - return 0; > > -} > > - > > Hold on: why is it valid to delete this code? This is obviously a change in > behaviour affecting the Broadcom platforms. Maybe this does nothing > useful, but you really have to explain *why*. As the macros GICD_BASE/GICR_BASE have been used in armv8/start.S, for simplicity directly use them here. Currently only Layerscape platform code called the function gic_lpi_tables_init(), so it doesn't affect other platforms. > > > /* > > * Program the GIC LPI configuration tables for all > > * the re-distributors and enable the LPI table > > - * base: Configuration table address > > - * num_redist: number of redistributors > > + * blob: Device tree blob > > */ > > -int gic_lpi_tables_init(u64 base, u32 num_redist) > > +int gic_lpi_tables_init(void *blob) > > { > > - struct gic_v3_its_priv priv; > > + struct fdt_memory lpi_tables; > > + ulong pend_tab_total_sz; > > + u64 pend_table_base; > > + u32 num_redist = 0; > > + void *pend_tab_va; > > + int cpu_node = -1; > > u32 gicd_typer; > > - u64 val; > > - u64 tmp; > > - int i; > > - u64 redist_lpi_base; > > u64 pend_base; > > - ulong pend_tab_total_sz = num_redist * LPI_PENDBASE_SZ; > > - void *pend_tab_va; > > - > > - if (gic_v3_its_get_gic_addr(&priv)) > > - return -EINVAL; > > + u64 val, tmp; > > + void *base; > > + int err, i; > > > > - gicd_typer = readl((uintptr_t)(priv.gicd_base + GICD_TYPER)); > > + gicd_typer = readl((uintptr_t)(GICD_BASE + GICD_TYPER)); > > Where is GICD_BASE coming from? Are you now assuming that all platforms > will define this constant instead of retrieving it from... or wait: the device > tree? Yes, I once thought getting from device tree, but as mentioned above for simplicity used GICD_BASE/GICR_BASE. The GICD_BASE/GICR_BASE must be defined if they defined CONFIG_GICV3, otherwise the armv8/start.S will failed to build. > > > /* GIC support for Locality specific peripheral interrupts (LPI's) */ > > if (!(gicd_typer & GICD_TYPER_LPIS)) { > > pr_err("GIC implementation does not support LPI's\n"); > > return -EINVAL; > > } > > > > + /* lpi_id_bits to get LPI_PENDBASE_SZ and LPI_PROPBASE_SZ */ > > + lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gicd_typer), > > + ITS_MAX_LPI_NRBITS); > > + > > + while (1) { > > + cpu_node = fdt_node_offset_by_prop_value(blob, cpu_node, > > + "device_type", > > + "cpu", 4); > > + if (cpu_node == -FDT_ERR_NOTFOUND) > > + break; > > + > > + num_redist++; > > + } > > NAK. The device tree binding tells you how many redistributors you have. > Counting CPUs is wrong. Please use the binding as specified (and please > account for the difference is size between GICv3 and GICv4). Do you mean calculate the number of redistributors base on the properties 'redistributor-regions' and 'redistributor-stride'? But in P26 of 'IHI0069G_gic_architecture_specification.pdf', it notes 'There is one copy of each of the Redistributor registers per PE', does it mean there are the same numbers of redistributors as the CPUs? > > > + > > /* > > * Check for LPI is disabled for all the redistributors. > > * Once the LPI table is enabled, can not program the @@ -95,48 > > +68,58 @@ int gic_lpi_tables_init(u64 base, u32 num_redist) > > for (i = 0; i < num_redist; i++) { > > u32 offset = i * GIC_REDISTRIBUTOR_OFFSET; > > > > - if ((readl((uintptr_t)(priv.gicr_base + offset))) & > > + if ((readl((uintptr_t)(GICR_BASE + offset))) & > > Oh, because you actually believe there is a *SINGLE* redistributor range? > Time to face reality: you can have an *infinite* number of them. You need to > handle ranges of redistributors, and each redistributor inside the range. This patch didn't change the property/pending base initialization logic, seems it only works on single redistributor range case, it can be improved. But I doesn't find the illustration on the redistributor range in gic architecture spec, can you help point out the section? And what do you mean of *infinite* number of redistributors, my understanding is each PE needs one redistributor connecting to its cpu interface, can you help understand what is the extra redistributors for? > > > GICR_CTLR_ENABLE_LPIS) { > > - pr_err("Re-Distributor %d LPI is already enabled\n", > > - i); > > + pr_err("Re-Distributor %d LPI is already enabled\n", i); > > return -EINVAL; > > } > > } > > > > - /* lpi_id_bits to get LPI_PENDBASE_SZ and LPi_PROPBASE_SZ */ > > - lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gicd_typer), > > - ITS_MAX_LPI_NRBITS); > > + pend_tab_total_sz = num_redist * LPI_PENDBASE_SZ; > > + > > + base = memalign(SZ_64K, LPI_PROPBASE_SZ + pend_tab_total_sz); > > + if (!base) > > + return -ENOMEM; > > + > > + lpi_tables.start = (phys_addr_t)base; > > + lpi_tables.end = (phys_addr_t)base + LPI_PROPBASE_SZ + > > + pend_tab_total_sz - 1; > > + err = fdtdec_add_reserved_memory(blob, "gic-lpi-tables", &lpi_tables, > > + NULL, 0, NULL, 0); > > + if (err) { > > + free(base); > > + return err; > > + } > > > > /* Set PropBase */ > > - val = (base | > > + val = (((phys_addr_t)base + pend_tab_total_sz) | > > GICR_PROPBASER_INNERSHAREABLE | > > GICR_PROPBASER_RAWAWB | > > ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK)); > > > > - writeq(val, (uintptr_t)(priv.gicr_base + GICR_PROPBASER)); > > - tmp = readl((uintptr_t)(priv.gicr_base + GICR_PROPBASER)); > > + writeq(val, (uintptr_t)(GICR_BASE + GICR_PROPBASER)); > > + tmp = readl((uintptr_t)(GICR_BASE + GICR_PROPBASER)); > > This is also broken. The register exist *on each redistributor*. The fact that > some implementations allow you to only write it once in IMPDEF, and you > can't rely on it. Think of a multi-socket system. How do you expect this to > work? Agree that the current code rely on IMPDEF and it can be improved. For the multi-socket system, it is out of my imagination, can you help to explain how GICv3 implementation works on this kind of system? Thanks! Thanks, Zhiqiang > > > if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { > > if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) { > > val &= ~(GICR_PROPBASER_SHAREABILITY_MASK | > > GICR_PROPBASER_CACHEABILITY_MASK); > > val |= GICR_PROPBASER_NC; > > - writeq(val, > > - (uintptr_t)(priv.gicr_base + GICR_PROPBASER)); > > + writeq(val, (uintptr_t)(GICR_BASE + GICR_PROPBASER)); > > } > > } > > > > - redist_lpi_base = base + LPI_PROPBASE_SZ; > > - pend_tab_va = map_physmem(redist_lpi_base, pend_tab_total_sz, > > + pend_table_base = (phys_addr_t)base; > > + pend_tab_va = map_physmem(pend_table_base, pend_tab_total_sz, > > MAP_NOCACHE); > > memset(pend_tab_va, 0, pend_tab_total_sz); > > flush_cache((ulong)pend_tab_va, pend_tab_total_sz); > > unmap_physmem(pend_tab_va, MAP_NOCACHE); > > > > - pend_base = priv.gicr_base + GICR_PENDBASER; > > + pend_base = GICR_BASE + GICR_PENDBASER; > > for (i = 0; i < num_redist; i++) { > > u32 offset = i * GIC_REDISTRIBUTOR_OFFSET; > > > > - val = ((redist_lpi_base + (i * LPI_PENDBASE_SZ)) | > > + val = ((pend_table_base + (i * LPI_PENDBASE_SZ)) | > > GICR_PENDBASER_INNERSHAREABLE | > > GICR_PENDBASER_RAWAWB | > > GICR_PENDBASER_PTZ); > > @@ -152,19 +135,8 @@ int gic_lpi_tables_init(u64 base, u32 num_redist) > > > > /* Enable LPI for the redistributor */ > > writel(GICR_CTLR_ENABLE_LPIS, > > - (uintptr_t)(priv.gicr_base + offset)); > > + (uintptr_t)(GICR_BASE + offset)); > > } > > All of this is equally broken for the reasons mentioned above. > > > > > return 0; > > } > > - > > -static const struct udevice_id gic_v3_its_ids[] = { > > - { .compatible = "arm,gic-v3" }, > > - {} > > -}; > > - > > -U_BOOT_DRIVER(arm_gic_v3_its) = { > > - .name = "gic-v3", > > - .id = UCLASS_IRQ, > > - .of_match = gic_v3_its_ids, > > -}; > > And this smells like an awful regression. > > M. > > -- > Without deviation from the norm, progress is not possible.