[PATCH 21/21] memory: mtk-smi: Add mt8192 support
Add mt8192 smi support. Signed-off-by: Yong Wu --- drivers/memory/mtk-smi.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index f2f6100c74ef..5d0268630e70 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -252,6 +252,10 @@ static const struct mtk_smi_larb_gen mtk_smi_larb_mt8183 = { /* IPU0 | IPU1 | CCU */ }; +static const struct mtk_smi_larb_gen mtk_smi_larb_mt8192 = { + .config_port= mtk_smi_larb_config_port_gen2_general, +}; + static const struct of_device_id mtk_smi_larb_of_ids[] = { { .compatible = "mediatek,mt8173-smi-larb", @@ -269,6 +273,10 @@ static const struct of_device_id mtk_smi_larb_of_ids[] = { .compatible = "mediatek,mt8183-smi-larb", .data = &mtk_smi_larb_mt8183 }, + { + .compatible = "mediatek,mt8192-smi-larb", + .data = &mtk_smi_larb_mt8192 + }, {} }; @@ -401,6 +409,13 @@ static const struct mtk_smi_common_plat mtk_smi_common_mt8183 = { F_MMU1_LARB(7), }; +static const struct mtk_smi_common_plat mtk_smi_common_mt8192 = { + .gen = MTK_SMI_GEN2, + .has_gals = true, + .bus_sel = F_MMU1_LARB(1) | F_MMU1_LARB(2) | F_MMU1_LARB(5) | + F_MMU1_LARB(6), +}; + static const struct of_device_id mtk_smi_common_of_ids[] = { { .compatible = "mediatek,mt8173-smi-common", @@ -418,6 +433,10 @@ static const struct of_device_id mtk_smi_common_of_ids[] = { .compatible = "mediatek,mt8183-smi-common", .data = &mtk_smi_common_mt8183, }, + { + .compatible = "mediatek,mt8192-smi-common", + .data = &mtk_smi_common_mt8192, + }, {} }; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 17/21] iommu/mediatek: Support report iova 34bit translation fault in ISR
If the iova is over 32bit, the fault status register bit is a little different. Add a flag for the special register bits. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 9c6649a97bd7..766c9e73d541 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -4,6 +4,7 @@ * Author: Yong Wu */ #include +#include #include #include #include @@ -87,6 +88,9 @@ #define F_REG_MMU1_FAULT_MASK GENMASK(13, 7) #define REG_MMU0_FAULT_VA 0x13c +#define F_MMU_INVAL_VA_31_12_MASK GENMASK(31, 12) +#define F_MMU_INVAL_VA_34_32_MASK GENMASK(11, 9) +#define F_MMU_INVAL_PA_34_32_MASK GENMASK(8, 6) #define F_MMU_FAULT_VA_WRITE_BIT BIT(1) #define F_MMU_FAULT_VA_LAYER_BIT BIT(0) @@ -110,6 +114,7 @@ #define OUT_ORDER_WR_ENBIT(4) #define HAS_SUB_COMM BIT(5) #define WR_THROT_ENBIT(6) +#define IOVA_34_EN BIT(7) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ pdata)->flags) & (_x)) == (_x)) @@ -269,8 +274,9 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) { struct mtk_iommu_data *data = dev_id; struct mtk_iommu_domain *dom = data->m4u_dom; - u32 int_state, regval, fault_iova, fault_pa; unsigned int fault_larb, fault_port, sub_comm = 0; + u32 int_state, regval, va34_32, pa34_32; + u64 fault_iova, fault_pa; bool layer, write; /* Read error info from registers */ @@ -286,6 +292,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) } layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) { + va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova); + pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova); + fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK; + fault_iova |= (u64)va34_32 << 32; + fault_pa |= (u64)pa34_32 << 32; + } + fault_port = F_MMU_INT_ID_PORT_ID(regval); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) { fault_larb = F_MMU_INT_ID_COMM_ID(regval); @@ -299,7 +313,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) { dev_err_ratelimited( data->dev, - "fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n", + "fault type=0x%x iova=0x%llx pa=0x%llx larb=%d port=%d layer=%d %s\n", int_state, fault_iova, fault_pa, fault_larb, fault_port, layer, write ? "write" : "read"); } -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 20/21] iommu/mediatek: Add mt8192 support
Add mt8192 iommu support. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 20 drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index a4ac41e60c4f..da7d055af919 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -168,6 +168,14 @@ static const struct mtk_iommu_iova_region single_domain[] = { {.iova_base = 0, .size = SZ_4G}, }; +static const struct mtk_iommu_iova_region mt8192_multi_dom[] = { + { .iova_base = 0x0, .size = SZ_4G}, /* disp : 0 ~ 4G */ + { .iova_base = SZ_4G, .size = SZ_4G}, /* vdec : 4G ~ 8G */ + { .iova_base = SZ_4G * 2, .size = SZ_4G}, /* CAM/MDP: 8G ~ 12G */ + { .iova_base = 0x24000ULL, .size = 0x400}, /* CCU0 */ + { .iova_base = 0x24400ULL, .size = 0x400}, /* CCU1 */ +}; + /* * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain * for the performance. @@ -991,11 +999,23 @@ static const struct mtk_iommu_plat_data mt8183_data = { .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; +static const struct mtk_iommu_plat_data mt8192_data = { + .m4u_plat = M4U_MT8192, + .flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN + | IOVA_34_EN, + .inv_sel_reg= REG_MMU_INV_SEL_GEN2, + .iova_region= mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, + {0, 14, 16}, {0, 13, 18, 17}}, +}; + static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, + { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, {} }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 11795b8d82ff..d42a250156bd 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -41,6 +41,7 @@ enum mtk_iommu_plat { M4U_MT6779, M4U_MT8173, M4U_MT8183, + M4U_MT8192, }; struct mtk_iommu_iova_region; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 19/21] iommu/mediatek: Adjust the structure
Add "struct mtk_iommu_data *" in the "struct mtk_iommu_domain", reduce the call mtk_iommu_get_m4u_data(). No functional change. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 7dfd8071a858..a4ac41e60c4f 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -123,6 +123,7 @@ struct mtk_iommu_domain { struct io_pgtable_cfg cfg; struct io_pgtable_ops *iop; + struct mtk_iommu_data *data; struct iommu_domain domain; }; @@ -359,7 +360,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct mtk_iommu_data *data = dom->data; /* Use the exist domain as there is one m4u pgtable here. */ if (data->m4u_dom) { @@ -408,6 +409,7 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) if (iommu_get_dma_cookie(&dom->domain)) goto free_dom; + dom->data = data; if (mtk_iommu_domain_finalise(dom)) goto put_dma_cookie; @@ -475,10 +477,9 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); /* The "4GB mode" M4U physically can not use the lower remap of Dram. */ - if (data->enable_4GB) + if (dom->data->enable_4GB) paddr |= BIT_ULL(32); /* Synchronize with the tlb_lock */ @@ -496,31 +497,32 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) { - mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data()); + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + + mtk_iommu_tlb_flush_all(dom->data); } static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct mtk_iommu_domain *dom = to_mtk_domain(domain); size_t length = gather->end - gather->start; if (gather->start == ULONG_MAX) return; mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize, - data); + dom->data); } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); phys_addr_t pa; pa = dom->iop->iova_to_phys(dom->iop, iova); - if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) + if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE) pa &= ~BIT_ULL(32); return pa; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 18/21] iommu/mediatek: Add support for multi domain
Some HW IP(ex: CCU) require the special iova range. That means the iova got from dma_alloc_attrs for that devices must locate in his special range. In this patch, we allocate a special iova_range for each a special requirement and create each a iommu domain for each a iova_range. meanwhile we still use one pagetable which support 16GB iova. After this patch, If the iova range of a master is over 4G, the master should: a) Declare its special dma_ranges in its dtsi node. For example, If we preassign the iova 4G-8G for vcodec, then the vcodec dtsi node should: dma-ranges = <0x1 0x0 0x1 0x0 0x1 0x0>; /* 4G ~ 8G */ b) Update the dma_mask: dma_set_mask_and_coherent(dev, DMA_BIT_MASK(33)); Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 49 --- drivers/iommu/mtk_iommu.h | 3 ++- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 766c9e73d541..7dfd8071a858 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -361,6 +361,14 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + /* Use the exist domain as there is one m4u pgtable here. */ + if (data->m4u_dom) { + dom->iop = data->m4u_dom->iop; + dom->cfg = data->m4u_dom->cfg; + dom->domain.pgsize_bitmap = data->m4u_dom->cfg.pgsize_bitmap; + return 0; + } + dom->cfg = (struct io_pgtable_cfg) { .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | @@ -386,6 +394,8 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) { + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + const struct mtk_iommu_iova_region *region; struct mtk_iommu_domain *dom; if (type != IOMMU_DOMAIN_DMA) @@ -401,8 +411,10 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) if (mtk_iommu_domain_finalise(dom)) goto put_dma_cookie; - dom->domain.geometry.aperture_start = 0; - dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); + region = data->plat_data->iova_region + data->cur_domid; + dom->domain.geometry.aperture_start = region->iova_base; + dom->domain.geometry.aperture_end = region->iova_base + + region->size - 1; dom->domain.geometry.force_aperture = true; return &dom->domain; @@ -540,19 +552,31 @@ static void mtk_iommu_release_device(struct device *dev) static struct iommu_group *mtk_iommu_device_group(struct device *dev) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct iommu_group *group; + int domid; if (!data) return ERR_PTR(-ENODEV); - /* All the client devices are in the same m4u iommu-group */ - if (!data->m4u_group) { - data->m4u_group = iommu_group_alloc(); - if (IS_ERR(data->m4u_group)) + domid = MTK_M4U_TO_DOM(fwspec->ids[0]); + if (domid >= data->plat_data->iova_region_nr) { + dev_err(dev, "domain id(%d/%d) is error.\n", domid, + data->plat_data->iova_region_nr); + return ERR_PTR(-EINVAL); + } + + group = data->m4u_group[domid]; + if (!group) { + group = iommu_group_alloc(); + if (IS_ERR(group)) dev_err(dev, "Failed to allocate M4U IOMMU group\n"); + data->m4u_group[domid] = group; } else { - iommu_group_ref_get(data->m4u_group); + iommu_group_ref_get(group); } - return data->m4u_group; + data->cur_domid = domid; + return group; } static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) @@ -581,14 +605,21 @@ static void mtk_iommu_get_resv_regions(struct device *dev, struct list_head *head) { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - const struct mtk_iommu_iova_region *resv; + const struct mtk_iommu_iova_region *resv, *curdom; struct iommu_resv_region *region; int prot = IOMMU_WRITE | IOMMU_READ; unsigned int i; + curdom = data->plat_data->iova_region + data->cur_domid; for (i = 0; i < data->plat_data->iova_region_nr; i++) { resv = data->plat_data->iova_region + i; + /* Only reserve when the region is in the current domain */ + if (resv->iova_base <= curdom->iova_base || + resv->iova_base + resv->size >= + curdom->iova_base + curdom->size) +
[PATCH 16/21] iommu/mediatek: Support up to 34bit iova in tlb invalid
If the iova is 34bit, the iova[32][33] is the bit0/1 in the tlb flush register. Add a new macro for this. there is a minor change unrelated with this patch. it also use the new macro. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 3b2714bea45a..9c6649a97bd7 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -123,6 +123,8 @@ struct mtk_iommu_domain { static const struct iommu_ops mtk_iommu_ops; +#define MTK_IOMMU_ADDR(addr) (lower_32_bits(addr) | upper_32_bits(addr)) + /* * In M4U 4GB mode, the physical address is remapped as below: * @@ -225,8 +227,9 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); - writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); - writel_relaxed(iova + size - 1, + writel_relaxed(MTK_IOMMU_ADDR(iova), + data->base + REG_MMU_INVLD_START_A); + writel_relaxed(MTK_IOMMU_ADDR(iova + size - 1), data->base + REG_MMU_INVLD_END_A); writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); @@ -653,8 +656,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) if (data->plat_data->m4u_plat == M4U_MT8173) regval = (data->protect_base >> 1) | (data->enable_4GB << 31); else - regval = lower_32_bits(data->protect_base) | -upper_32_bits(data->protect_base); + regval = MTK_IOMMU_ADDR(data->protect_base); writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR); if (data->enable_4GB && -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 11/21] iommu/mediatek: Add power-domain operation
In the previous SoC, the M4U HW is in the EMI power domain which is always on. the latest M4U is in the display power domain which may be turned on/off, thus we have to add pm_runtime interface for it. we should enable its power before M4U hw initial. and disable it after HW initialize. When the engine work, the engine always enable the power and clocks for smi-larb/smi-common, then the M4U's power will always be powered on automatically via the device link with smi-common. Note: we don't enable the M4U power in iommu_map/unmap for tlb flush. If its power already is on, of course it is ok. if the power is off, the main tlb will be reset while M4U power on, thus the tlb flush while m4u power off is unnecessary, just skip it. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 54 ++- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 931fdd19c8f3..03a6d66f4bef 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -172,6 +173,19 @@ static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) return container_of(dom, struct mtk_iommu_domain, domain); } +static int mtk_iommu_rpm_get(struct device *dev) +{ + if (pm_runtime_enabled(dev)) + return pm_runtime_get_sync(dev); + return 0; +} + +static void mtk_iommu_rpm_put(struct device *dev) +{ + if (pm_runtime_enabled(dev)) + pm_runtime_put_autosuspend(dev); +} + static void mtk_iommu_tlb_flush_all(void *cookie) { struct mtk_iommu_data *data = cookie; @@ -193,6 +207,11 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size, u32 tmp; for_each_m4u(data) { + /* skip tlb flush when pm is not active */ + if (pm_runtime_enabled(data->dev) && + !pm_runtime_active(data->dev)) + continue; + spin_lock_irqsave(&data->tlb_lock, flags); writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + data->plat_data->inv_sel_reg); @@ -377,15 +396,20 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct mtk_iommu_domain *dom = to_mtk_domain(domain); + int ret; if (!data) return -ENODEV; /* Update the pgtable base address register of the M4U HW */ if (!data->m4u_dom) { + ret = mtk_iommu_rpm_get(dev); + if (ret < 0) + return ret; data->m4u_dom = dom; writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, data->base + REG_MMU_PT_BASE_ADDR); + mtk_iommu_rpm_put(dev); } mtk_iommu_config(data, dev, true); @@ -543,10 +567,14 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) u32 regval; int ret; - ret = clk_prepare_enable(data->bclk); - if (ret) { - dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret); - return ret; + /* bclk will be enabled in pm callback in power-domain case. */ + if (!pm_runtime_enabled(data->dev)) { + ret = clk_prepare_enable(data->bclk); + if (ret) { + dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", + ret); + return ret; + } } if (data->plat_data->m4u_plat == M4U_MT8173) { @@ -728,7 +756,15 @@ static int mtk_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); + if (dev->pm_domain) + pm_runtime_enable(dev); + + ret = mtk_iommu_rpm_get(dev); + if (ret < 0) + return ret; + ret = mtk_iommu_hw_init(data); + mtk_iommu_rpm_put(dev); if (ret) return ret; @@ -801,6 +837,10 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); return ret; } + + /* Avoid first resume to affect the default value of registers below. */ + if (!m4u_dom) + return 0; writel_relaxed(reg->wr_len_ctrl, base + REG_MMU_WR_LEN_CTRL); writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); @@ -809,13 +849,13 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL); writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); -
[PATCH 15/21] iommu/mediatek: Support master use iova over 32bit
After extending v7s, our pagetable already support iova reach 16GB(34bit). the master got the iova via dma_alloc_attrs may reach 34bits, but its HW register still is 32bit. then how to set the bit32/bit33 iova? this depend on a SMI larb setting(bank_sel). we separate whole 16GB iova to four banks: bank: 0: 0~4G; 1: 4~8G; 2: 8-12G; 3: 12-16G; The bank number is (iova >> 32). We will preassign which bank the larbs belong to. currently we don't have a interface for master to adjust its bank number. Each a bank is a iova_region which is a independent iommu-domain. the iova range for each iommu-domain can't cross 4G. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 12 +--- drivers/memory/mtk-smi.c | 5 + include/soc/mediatek/smi.h | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index db1f06324ecc..3b2714bea45a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -315,17 +315,23 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, bool enable) { struct mtk_smi_larb_iommu*larb_mmu; - unsigned int larbid, portid; + unsigned int larbid, portid, domid; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + const struct mtk_iommu_iova_region *region; int i; for (i = 0; i < fwspec->num_ids; ++i) { larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); portid = MTK_M4U_TO_PORT(fwspec->ids[i]); + domid = MTK_M4U_TO_DOM(fwspec->ids[i]); + larb_mmu = &data->larb_imu[larbid]; + region = data->plat_data->iova_region + domid; + larb_mmu->bank[portid] = upper_32_bits(region->iova_base); - dev_dbg(dev, "%s iommu port: %d\n", - enable ? "enable" : "disable", portid); + dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n", + enable ? "enable" : "disable", dev_name(larb_mmu->dev), + portid, domid, larb_mmu->bank[portid]); if (enable) larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index f6516921287f..f2f6100c74ef 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -41,6 +41,8 @@ /* mt2712 */ #define SMI_LARB_NONSEC_CON(id)(0x380 + ((id) * 4)) #define F_MMU_EN BIT(0) +#define BANK_SEL(a)a) & 0x3) << 8) | (((a) & 0x3) << 10) |\ +(((a) & 0x3) << 12) | (((a) & 0x3) << 14)) /* SMI COMMON */ #define SMI_BUS_SEL0x220 @@ -85,6 +87,7 @@ struct mtk_smi_larb { /* larb: local arbiter */ const struct mtk_smi_larb_gen *larb_gen; int larbid; u32 *mmu; + unsigned char *bank; }; static int mtk_smi_clk_enable(const struct mtk_smi *smi) @@ -151,6 +154,7 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data) if (dev == larb_mmu[i].dev) { larb->larbid = i; larb->mmu = &larb_mmu[i].mmu; + larb->bank = larb_mmu[i].bank; return 0; } } @@ -169,6 +173,7 @@ static void mtk_smi_larb_config_port_gen2_general(struct device *dev) for_each_set_bit(i, (unsigned long *)larb->mmu, 32) { reg = readl_relaxed(larb->base + SMI_LARB_NONSEC_CON(i)); reg |= F_MMU_EN; + reg |= BANK_SEL(larb->bank[i]); writel(reg, larb->base + SMI_LARB_NONSEC_CON(i)); } } diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 9371bf572ab8..4cf445dbbdaa 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -16,6 +16,7 @@ struct mtk_smi_larb_iommu { struct device *dev; unsigned int mmu; + unsigned char bank[32]; }; /* -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 14/21] iommu/mediatek: Add single domain
Defaultly the iova range is 0-4G. here we add a single-domain(0-4G) for the previous SoC. this also is a preparing patch for supporting multi-domains. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index fdfdb75706e0..db1f06324ecc 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -156,6 +156,10 @@ struct mtk_iommu_iova_region { size_t size; }; +static const struct mtk_iommu_iova_region single_domain[] = { + {.iova_base = 0, .size = SZ_4G}, +}; + /* * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain * for the performance. @@ -900,6 +904,8 @@ static const struct mtk_iommu_plat_data mt2712_data = { .m4u_plat = M4U_MT2712, .flags= HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}, }; @@ -907,6 +913,8 @@ static const struct mtk_iommu_plat_data mt6779_data = { .m4u_plat = M4U_MT6779, .flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}}, }; @@ -914,6 +922,8 @@ static const struct mtk_iommu_plat_data mt8173_data = { .m4u_plat = M4U_MT8173, .flags= HAS_4GB_MODE | HAS_BCLK | RESET_AXI, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */ }; @@ -921,6 +931,8 @@ static const struct mtk_iommu_plat_data mt8183_data = { .m4u_plat = M4U_MT8183, .flags= RESET_AXI, .inv_sel_reg = REG_MMU_INV_SEL_GEN1, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), .larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}}, }; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 13/21] iommu/mediatek: Make MTK_IOMMU depend on ARM64
Originally MTK_IOMMU could depend on ARM || ARM64. Both build ok. actually the source code don't support ARM. this patch changes it only depend on ARM64. This is a preparing patch for support multi-domain. otherwise it will build warning in ARM case. This is the build warning log: drivers/iommu/mtk_iommu.c:163:27: note: in expansion of macro 'SZ_4G' {.iova_base = 0, .size = SZ_4G}, include/uapi/linux/const.h:20:19: warning: large integer implicitly truncated to unsigned type [-Woverflow] #define __AC(X,Y) (X##Y) ^ include/uapi/linux/const.h:21:18: note: in expansion of macro '__AC' #define _AC(X,Y) __AC(X,Y) ^ include/linux/sizes.h:46:18: note: in expansion of macro '_AC' #define SZ_4G_AC(0x1, ULL) Signed-off-by: Yong Wu --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b510f67dfa49..6bebfd3e0021 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -467,6 +467,7 @@ config S390_AP_IOMMU config MTK_IOMMU bool "MTK IOMMU Support" depends on HAS_DMA + depends on ARM64 depends on ARCH_MEDIATEK || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 12/21] iommu/mediatek: Add iova reserved function
For multiple iommu_domains, we need to reserve some iova regions, so we will add mtk_iommu_iova_region structure. It includes the base address and size of the range. This is a preparing patch for supporting multi-domain. Signed-off-by: Anan sun Signed-off-by: Hao Chao Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 37 + drivers/iommu/mtk_iommu.h | 5 + 2 files changed, 42 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 03a6d66f4bef..fdfdb75706e0 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -151,6 +151,11 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) +struct mtk_iommu_iova_region { + dma_addr_t iova_base; + size_t size; +}; + /* * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain * for the performance. @@ -545,6 +550,36 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) return iommu_fwspec_add_ids(dev, args->args, 1); } +static void mtk_iommu_get_resv_regions(struct device *dev, + struct list_head *head) +{ + struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + const struct mtk_iommu_iova_region *resv; + struct iommu_resv_region *region; + int prot = IOMMU_WRITE | IOMMU_READ; + unsigned int i; + + for (i = 0; i < data->plat_data->iova_region_nr; i++) { + resv = data->plat_data->iova_region + i; + + region = iommu_alloc_resv_region(resv->iova_base, resv->size, +prot, IOMMU_RESV_RESERVED); + if (!region) + return; + + list_add_tail(®ion->list, head); + } +} + +static void mtk_iommu_put_resv_regions(struct device *dev, + struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); +} + static const struct iommu_ops mtk_iommu_ops = { .domain_alloc = mtk_iommu_domain_alloc, .domain_free= mtk_iommu_domain_free, @@ -559,6 +594,8 @@ static const struct iommu_ops mtk_iommu_ops = { .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, .of_xlate = mtk_iommu_of_xlate, + .get_resv_regions = mtk_iommu_get_resv_regions, + .put_resv_regions = mtk_iommu_put_resv_regions, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index e965bcb169c0..bb929b875d8c 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -43,10 +43,15 @@ enum mtk_iommu_plat { M4U_MT8183, }; +struct mtk_iommu_iova_region; + struct mtk_iommu_plat_data { enum mtk_iommu_plat m4u_plat; u32 flags; u32 inv_sel_reg; + + unsigned intiova_region_nr; + const struct mtk_iommu_iova_region *iova_region; unsigned char larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX]; }; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 10/21] iommu/mediatek: Add device link for smi-common and m4u
In the lastest SoC, M4U has its special power domain. thus, If the engine begin to work, it should help enable the power for M4U firstly. Currently if the engine work, it always enable the power/clocks for smi-larbs/smi-common. This patch adds device_link for smi-common and M4U. then, if smi-common power is enabled, the M4U power also is powered on automatically. In this patch, a M4U connects with several smi-larbs and their smi-common always are the same, thus it adds the device-link once is enough. And the devicelink only is needed while m4u has power-domain. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 26 +- drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index a6412d454e0b..931fdd19c8f3 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -675,8 +675,9 @@ static int mtk_iommu_probe(struct platform_device *pdev) return larb_nr; for (i = 0; i < larb_nr; i++) { - struct device_node *larbnode; + struct device_node *larbnode, *smicomm_node; struct platform_device *plarbdev; + struct device_link *link; u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); @@ -701,6 +702,28 @@ static int mtk_iommu_probe(struct platform_device *pdev) component_match_add_release(dev, &match, release_of, compare_of, larbnode); + + /* +* Add link for smi-common and m4u once is ok. and the link is +* only needed while m4u has power-domain. +*/ + if (i || !pm_runtime_enabled(dev)) + continue; + + smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0); + if (!smicomm_node) { + of_node_put(larbnode); + return -EINVAL; + } + + plarbdev = of_find_device_by_node(smicomm_node); + of_node_put(smicomm_node); + data->smicomm_dev = &plarbdev->dev; + + link = device_link_add(&plarbdev->dev, dev, + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + if (!link) + dev_err(dev, "Unable link %s.\n", plarbdev->name); } platform_set_drvdata(pdev, data); @@ -740,6 +763,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) if (iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, NULL); + device_link_remove(data->smicomm_dev, &pdev->dev); clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_com_ops); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 1a087af50a4e..e965bcb169c0 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -66,6 +66,7 @@ struct mtk_iommu_data { struct iommu_device iommu; const struct mtk_iommu_plat_data *plat_data; + struct device *smicomm_dev; struct list_headlist; struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 05/21] iommu/mediatek: Use the common mtk-smi-larb-port.h
Use the common larb-port header in the source code. Signed-off-by: Yong Wu --- drivers/iommu/mtk_iommu.c | 7 --- drivers/iommu/mtk_iommu.h | 1 + drivers/memory/mtk-smi.c | 1 + include/soc/mediatek/smi.h | 2 -- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 59e5a62a34db..a8d8a874a209 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -101,13 +101,6 @@ #define MTK_PROTECT_PA_ALIGN 256 -/* - * Get the local arbiter ID and the portid within the larb arbiter - * from mtk_m4u_id which is defined by MTK_M4U_ID. - */ -#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0xf) -#define MTK_M4U_TO_PORT(id)((id) & 0x1f) - #define HAS_4GB_MODE BIT(0) /* HW will use the EMI clock if there isn't the "bclk". */ #define HAS_BCLK BIT(1) diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 214898578026..1a087af50a4e 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -16,6 +16,7 @@ #include #include #include +#include #define MTK_LARB_COM_MAX 8 #define MTK_LARB_SUBCOM_MAX4 diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index a113e811faab..f6516921287f 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* mt8173 */ diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 5a34b87d89e3..9371bf572ab8 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -11,8 +11,6 @@ #ifdef CONFIG_MTK_SMI -#define MTK_LARB_NR_MAX16 - #define MTK_SMI_MMU_EN(port) BIT(port) struct mtk_smi_larb_iommu { -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 08/21] iommu/io-pgtable-arm-v7s: Add cfg as a param in some macros
Add "cfg" as a parameter for some macros. This is a preparing patch for mediatek extend the lvl1 pgtable. No functional change. Signed-off-by: Yong Wu --- drivers/iommu/io-pgtable-arm-v7s.c | 34 +++--- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 2830718b9d83..e1c98be61e1b 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -53,17 +53,17 @@ #define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl))) #define ARM_V7S_TABLE_SHIFT10 -#define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl)) -#define ARM_V7S_TABLE_SIZE(lvl) \ - (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte)) +#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl)) +#define ARM_V7S_TABLE_SIZE(lvl, cfg) \ + (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte)) #define ARM_V7S_BLOCK_SIZE(lvl)(1UL << ARM_V7S_LVL_SHIFT(lvl)) #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl))) #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT)) -#define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1) -#define ARM_V7S_LVL_IDX(addr, lvl) ({ \ +#define _ARM_V7S_IDX_MASK(lvl, cfg)(ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1) +#define ARM_V7S_LVL_IDX(addr, lvl, cfg)({ \ int _l = lvl; \ - ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \ + ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ }) /* @@ -239,7 +239,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, struct device *dev = cfg->iommu_dev; phys_addr_t phys; dma_addr_t dma; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); void *table = NULL; if (lvl == 1) @@ -285,7 +285,7 @@ static void __arm_v7s_free_table(void *table, int lvl, { struct io_pgtable_cfg *cfg = &data->iop.cfg; struct device *dev = cfg->iommu_dev; - size_t size = ARM_V7S_TABLE_SIZE(lvl); + size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, @@ -429,7 +429,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, arm_v7s_iopte *tblp; size_t sz = ARM_V7S_BLOCK_SIZE(lvl); - tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl); + tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg); if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz, sz, lvl, tblp) != sz)) return -EINVAL; @@ -482,7 +482,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl); /* Find our entry at the current level */ - ptep += ARM_V7S_LVL_IDX(iova, lvl); + ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg); /* If we can install a leaf entry at this level, then do so */ if (num_entries) @@ -555,7 +555,7 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop) struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); int i; - for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) { + for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, &data->iop.cfg); i++) { arm_v7s_iopte pte = data->pgd[i]; if (ARM_V7S_PTE_IS_TABLE(pte, 1)) @@ -607,9 +607,9 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, if (!tablep) return 0; /* Bytes unmapped */ - num_ptes = ARM_V7S_PTES_PER_LVL(2); + num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg); num_entries = size >> ARM_V7S_LVL_SHIFT(2); - unmap_idx = ARM_V7S_LVL_IDX(iova, 2); + unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg); pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); if (num_entries > 1) @@ -651,7 +651,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, if (WARN_ON(lvl > 2)) return 0; - idx = ARM_V7S_LVL_IDX(iova, lvl); + idx = ARM_V7S_LVL_IDX(iova, lvl, &iop->cfg); ptep += idx; do { pte[i] = READ_ONCE(ptep[i]); @@ -737,7 +737,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, u32 mask; do { - ptep += ARM_V7S_LVL_IDX(iova, ++lvl); + ptep += ARM_V7S_LVL_IDX(iova, ++lvl, &data->iop.cfg); pte = READ_ONCE(*ptep); ptep = iopte_deref(pte,
[PATCH 09/21] iommu/io-pgtable-arm-v7s: Quad lvl1 pgtable for MediaTek
The standard input iova bits is 32. MediaTek quad the lvl1 pagetable(4*lvl1). No change for lvl2 pagetable. Then the iova bits can reach 34bit. Signed-off-by: Yong Wu --- drivers/iommu/io-pgtable-arm-v7s.c | 10 +++--- drivers/iommu/mtk_iommu.c | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index e1c98be61e1b..cad314a92abc 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -50,10 +50,14 @@ */ #define ARM_V7S_ADDR_BITS 32 #define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4) +/* MediaTek: totally 34bits, 14bits at lvl1 and 8bits at lvl2. */ +#define _ARM_V7S_LVL_BITS_MTK(lvl) (20 - (lvl) * 6) #define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl))) #define ARM_V7S_TABLE_SHIFT10 -#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl)) +#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (!arm_v7s_is_mtk_enabled(cfg) ?\ + (1 << _ARM_V7S_LVL_BITS(lvl)) : (1 << _ARM_V7S_LVL_BITS_MTK(lvl))) + #define ARM_V7S_TABLE_SIZE(lvl, cfg) \ (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte)) @@ -63,7 +67,7 @@ #define _ARM_V7S_IDX_MASK(lvl, cfg)(ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1) #define ARM_V7S_LVL_IDX(addr, lvl, cfg)({ \ int _l = lvl; \ - ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ + ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \ }) /* @@ -756,7 +760,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, { struct arm_v7s_io_pgtable *data; - if (cfg->ias > ARM_V7S_ADDR_BITS) + if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) return NULL; if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS)) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c3b4d21760ed..a6412d454e0b 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -316,7 +316,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_EXT, .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, - .ias = 32, + .ias = 34, .oas = 35, .tlb = &mtk_iommu_flush_ops, .iommu_dev = data->dev, -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 04/21] dt-binding: mediatek: Add binding for mt8192 IOMMU and SMI
This patch adds decriptions for mt8192 IOMMU and SMI. mt8192 also is MTK IOMMU gen2 which uses ARM Short-Descriptor translation table format. The M4U-SMI HW diagram is as below: EMI | M4U | SMI Common | +---+--+--+--+---+ | | | | .. | | | | | | | | larb0 larb1 larb2 larb4 .. larb19 larb20 disp0 disp1 mdpvdec IPE IPE All the connections are HW fixed, SW can NOT adjust it. mt8192 M4U support 0~16GB iova range. we preassign different engines into different iova ranges: domain-id module iova-range larbs 0 disp0 ~ 4G larb0/1 1 vcodec 4G ~ 8G larb4/5/7 2 cam/mdp 8G ~ 12G larb2/9/11/13/14/16/17/18/19/20 3 CCU00x4000_ ~ 0x43ff_ larb13: port 9/10 4 CCU10x4400_ ~ 0x47ff_ larb14: port 4/5 The iova range for CCU0/1(camera control unit) is HW requirement. Signed-off-by: Yong Wu --- .../bindings/iommu/mediatek,iommu.txt | 8 +- .../mediatek,smi-common.txt | 5 +- .../memory-controllers/mediatek,smi-larb.txt | 3 +- include/dt-bindings/memory/mt8192-larb-port.h | 237 ++ 4 files changed, 247 insertions(+), 6 deletions(-) create mode 100644 include/dt-bindings/memory/mt8192-larb-port.h diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt index c1ccd8582eb2..76ee64b593ef 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt @@ -63,13 +63,14 @@ Required properties: generation one m4u HW. "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW. "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW. + "mediatek,mt8192-m4u" for mt8192 which uses generation two m4u HW. - reg : m4u register base and size. - interrupts : the interrupt of m4u. - clocks : must contain one entry for each clock-names. - clock-names : Only 1 optional clock: - "bclk": the block clock of m4u. Here is the list which require this "bclk": - - mt2701, mt2712, mt7623 and mt8173. + - mt2701, mt2712, mt7623, mt8173 and mt8192. Note that m4u use the EMI clock which always has been enabled before kernel if there is no this "bclk". - mediatek,larbs : List of phandle to the local arbiters in the current Socs. @@ -80,8 +81,9 @@ Required properties: dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623 dt-binding/memory/mt2712-larb-port.h for mt2712, dt-binding/memory/mt6779-larb-port.h for mt6779, - dt-binding/memory/mt8173-larb-port.h for mt8173, and - dt-binding/memory/mt8183-larb-port.h for mt8183. + dt-binding/memory/mt8173-larb-port.h for mt8173, + dt-binding/memory/mt8183-larb-port.h for mt8183, and + dt-binding/memory/mt8192-larb-port.h for mt8192. Example: iommu: iommu@10205000 { diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt index 01744ec6a75b..7c79fa488378 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt @@ -5,7 +5,7 @@ The hardware block diagram please check bindings/iommu/mediatek,iommu.txt Mediatek SMI have two generations of HW architecture, here is the list which generation the SoCs use: generation 1: mt2701 and mt7623. -generation 2: mt2712, mt8173 and mt8183. +generation 2: mt2712, mt8173, mt8183 and mt8192. There's slight differences between the two SMI, for generation 2, the register which control the iommu port is at each larb's register base. But @@ -21,6 +21,7 @@ Required properties: "mediatek,mt7623-smi-common", "mediatek,mt2701-smi-common" "mediatek,mt8173-smi-common" "mediatek,mt8183-smi-common", "syscon" + "mediatek,mt8192-smi-common" - reg : the register and size of the SMI block. - power-domains : a phandle to the power domain of this local arbiter. - clocks : Must contain an entry for each entry in clock-names. @@ -35,7 +36,7 @@ Required properties: and these 2 option clocks for generation 2 smi HW: - "gals0": the path0 clock of GALS(Global Async Local Sync). - "gals1": the path1 clock of GALS(Global Async Local Sync). - Here is the list which has this GALS: mt8183. + Here is the li
[PATCH 06/21] iommu/io-pgtable-arm-v7s: Use ias to check the valid iova in unmap
As title. Signed-off-by: Yong Wu --- drivers/iommu/io-pgtable-arm-v7s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 4272fe4e17f4..01f2a8876808 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -717,7 +717,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - if (WARN_ON(upper_32_bits(iova))) + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd); -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 07/21] iommu/io-pgtable-arm-v7s: Extend PA34 for MediaTek
MediaTek extend the bit5 in lvl1 and lvl2 descriptor as PA34. Signed-off-by: Yong Wu --- drivers/iommu/io-pgtable-arm-v7s.c | 9 +++-- drivers/iommu/mtk_iommu.c | 2 +- include/linux/io-pgtable.h | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 01f2a8876808..2830718b9d83 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -112,9 +112,10 @@ #define ARM_V7S_TEX_MASK 0x7 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT) -/* MediaTek extend the two bits for PA 32bit/33bit */ +/* MediaTek extend the bits below for PA 32bit/33bit/34bit */ #define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9) #define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4) +#define ARM_V7S_ATTR_MTK_PA_BIT34 BIT(5) /* *well, except for TEX on level 2 large pages, of course :( */ #define ARM_V7S_CONT_PAGE_TEX_SHIFT6 @@ -194,6 +195,8 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl, pte |= ARM_V7S_ATTR_MTK_PA_BIT32; if (paddr & BIT_ULL(33)) pte |= ARM_V7S_ATTR_MTK_PA_BIT33; + if (paddr & BIT_ULL(34)) + pte |= ARM_V7S_ATTR_MTK_PA_BIT34; return pte; } @@ -218,6 +221,8 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl, paddr |= BIT_ULL(32); if (pte & ARM_V7S_ATTR_MTK_PA_BIT33) paddr |= BIT_ULL(33); + if (pte & ARM_V7S_ATTR_MTK_PA_BIT34) + paddr |= BIT_ULL(34); return paddr; } @@ -754,7 +759,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->ias > ARM_V7S_ADDR_BITS) return NULL; - if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) + if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS)) return NULL; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index a8d8a874a209..c3b4d21760ed 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -317,7 +317,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) IO_PGTABLE_QUIRK_ARM_MTK_EXT, .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, .ias = 32, - .oas = 34, + .oas = 35, .tlb = &mtk_iommu_flush_ops, .iommu_dev = data->dev, }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 53d53c6c2be9..48d343189e28 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -77,8 +77,8 @@ struct io_pgtable_cfg { * TLB maintenance when mapping as well as when unmapping. * * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend -* to support up to 34 bits PA where the bit32 and bit33 are -* encoded in the bit9 and bit4 of the PTE respectively. +* to support up to 35 bits PA where the bit32, bit33 and bit34 are +* encoded in the bit9, bit4 and bit5 of the PTE respectively. * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 03/21] dt-binding: memory: mediatek: Add domain definition
In the latest SoC, there are several HW IP require a sepecial iova range, mainly CCU and VPU has this requirement. Take CCU as a example, CCU require its iova locate in the range(0x4000_ ~ 0x43ff_). In this patch we add a domain definition for the special port. This is a preparing patch for multi-domain support. Signed-off-by: Yong Wu --- include/dt-bindings/memory/mtk-smi-larb-port.h | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/dt-bindings/memory/mtk-smi-larb-port.h b/include/dt-bindings/memory/mtk-smi-larb-port.h index f4d8e3aed0bc..d00f5de8438b 100644 --- a/include/dt-bindings/memory/mtk-smi-larb-port.h +++ b/include/dt-bindings/memory/mtk-smi-larb-port.h @@ -7,9 +7,16 @@ #define __DTS_MTK_IOMMU_PORT_H_ #define MTK_LARB_NR_MAX32 +#define MTK_M4U_DOM_NR_MAX 8 + +#define MTK_M4U_DOM_ID(domid, larb, port) \ + (((domid) & 0x7) << 16 | (((larb) & 0x1f) << 5) | ((port) & 0x1f)) + +/* The default dom id is 0. */ +#define MTK_M4U_ID(larb, port) MTK_M4U_DOM_ID(0, larb, port) -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) #define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0x1f) #define MTK_M4U_TO_PORT(id)((id) & 0x1f) +#define MTK_M4U_TO_DOM(id) (((id) >> 16) & 0x7) #endif -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 02/21] dt-binding: memory: mediatek: Extend LARB_NR_MAX to 32
Extend the max larb number definition as mt8192 has larb_nr over 16. Signed-off-by: Yong Wu --- include/dt-bindings/memory/mtk-smi-larb-port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dt-bindings/memory/mtk-smi-larb-port.h b/include/dt-bindings/memory/mtk-smi-larb-port.h index 2ec7fe5ce4e9..f4d8e3aed0bc 100644 --- a/include/dt-bindings/memory/mtk-smi-larb-port.h +++ b/include/dt-bindings/memory/mtk-smi-larb-port.h @@ -6,10 +6,10 @@ #ifndef __DTS_MTK_IOMMU_PORT_H_ #define __DTS_MTK_IOMMU_PORT_H_ -#define MTK_LARB_NR_MAX16 +#define MTK_LARB_NR_MAX32 #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) -#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0xf) +#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0x1f) #define MTK_M4U_TO_PORT(id)((id) & 0x1f) #endif -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 01/21] dt-binding: memory: mediatek: Add a common larb-port header file
Put all the macros about smi larb/port togethers, this is a preparing patch for extending LARB_NR and adding new dom-id support. Signed-off-by: Yong Wu --- include/dt-bindings/memory/mt2712-larb-port.h | 2 +- include/dt-bindings/memory/mt6779-larb-port.h | 2 +- include/dt-bindings/memory/mt8173-larb-port.h | 2 +- include/dt-bindings/memory/mt8183-larb-port.h | 2 +- include/dt-bindings/memory/mtk-smi-larb-port.h | 15 +++ 5 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 include/dt-bindings/memory/mtk-smi-larb-port.h diff --git a/include/dt-bindings/memory/mt2712-larb-port.h b/include/dt-bindings/memory/mt2712-larb-port.h index 6f9aa7349cef..b6b2c6bf4459 100644 --- a/include/dt-bindings/memory/mt2712-larb-port.h +++ b/include/dt-bindings/memory/mt2712-larb-port.h @@ -6,7 +6,7 @@ #ifndef __DTS_IOMMU_PORT_MT2712_H #define __DTS_IOMMU_PORT_MT2712_H -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt6779-larb-port.h b/include/dt-bindings/memory/mt6779-larb-port.h index 2ad0899fbf2f..60f57f54393e 100644 --- a/include/dt-bindings/memory/mt6779-larb-port.h +++ b/include/dt-bindings/memory/mt6779-larb-port.h @@ -7,7 +7,7 @@ #ifndef _DTS_IOMMU_PORT_MT6779_H_ #define _DTS_IOMMU_PORT_MT6779_H_ -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID0 #define M4U_LARB1_ID1 diff --git a/include/dt-bindings/memory/mt8173-larb-port.h b/include/dt-bindings/memory/mt8173-larb-port.h index 9f31ccfeca21..d8c99c946053 100644 --- a/include/dt-bindings/memory/mt8173-larb-port.h +++ b/include/dt-bindings/memory/mt8173-larb-port.h @@ -6,7 +6,7 @@ #ifndef __DTS_IOMMU_PORT_MT8173_H #define __DTS_IOMMU_PORT_MT8173_H -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mt8183-larb-port.h b/include/dt-bindings/memory/mt8183-larb-port.h index 2c579f305162..275c095a6fd6 100644 --- a/include/dt-bindings/memory/mt8183-larb-port.h +++ b/include/dt-bindings/memory/mt8183-larb-port.h @@ -6,7 +6,7 @@ #ifndef __DTS_IOMMU_PORT_MT8183_H #define __DTS_IOMMU_PORT_MT8183_H -#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#include #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 diff --git a/include/dt-bindings/memory/mtk-smi-larb-port.h b/include/dt-bindings/memory/mtk-smi-larb-port.h new file mode 100644 index ..2ec7fe5ce4e9 --- /dev/null +++ b/include/dt-bindings/memory/mtk-smi-larb-port.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 MediaTek Inc. + * Author: Yong Wu + */ +#ifndef __DTS_MTK_IOMMU_PORT_H_ +#define __DTS_MTK_IOMMU_PORT_H_ + +#define MTK_LARB_NR_MAX16 + +#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) +#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0xf) +#define MTK_M4U_TO_PORT(id)((id) & 0x1f) + +#endif -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 00/21] MT8192 IOMMU support
This patch mainly adds support for mt8192 IOMMU and SMI. mt8192 also is MTK IOMMU gen2 which uses ARM Short-Descriptor translation table format. The M4U-SMI HW diagram is as below: EMI | M4U | SMI Common | +---+--+--+--+---+ | | | | .. | | | | | | | | larb0 larb1 larb2 larb4 .. larb19 larb20 disp0 disp1 mdpvdec IPE IPE All the connections are HW fixed, SW can NOT adjust it. Comparing with the preview SoC, this patchset also adds two functions: a) add iova 34 bits support. b) add multi domains support since several HW has the special iova region requirement. this patchset depend on v5.8-rc1 and mt6779 iommu[1]. [1]https://lore.kernel.org/linux-iommu/20200703044127.27438-1-chao@mediatek.com/ Yong Wu (21): dt-binding: memory: mediatek: Add a common larb-port header file dt-binding: memory: mediatek: Extend LARB_NR_MAX to 32 dt-binding: memory: mediatek: Add domain definition dt-binding: mediatek: Add binding for mt8192 IOMMU and SMI iommu/mediatek: Use the common mtk-smi-larb-port.h iommu/io-pgtable-arm-v7s: Use ias to check the valid iova in unmap iommu/io-pgtable-arm-v7s: Extend PA34 for MediaTek iommu/io-pgtable-arm-v7s: Add cfg as a param in some macros iommu/io-pgtable-arm-v7s: Quad lvl1 pgtable for MediaTek iommu/mediatek: Add device link for smi-common and m4u iommu/mediatek: Add power-domain operation iommu/mediatek: Add iova reserved function iommu/mediatek: Make MTK_IOMMU depend on ARM64 iommu/mediatek: Add single domain iommu/mediatek: Support master use iova over 32bit iommu/mediatek: Support up to 34bit iova in tlb invalid iommu/mediatek: Support report iova 34bit translation fault in ISR iommu/mediatek: Add support for multi domain iommu/mediatek: Adjust the structure iommu/mediatek: Add mt8192 support memory: mtk-smi: Add mt8192 support .../bindings/iommu/mediatek,iommu.txt | 8 +- .../mediatek,smi-common.txt | 5 +- .../memory-controllers/mediatek,smi-larb.txt | 3 +- drivers/iommu/Kconfig | 1 + drivers/iommu/io-pgtable-arm-v7s.c| 51 ++-- drivers/iommu/mtk_iommu.c | 265 +++--- drivers/iommu/mtk_iommu.h | 11 +- drivers/memory/mtk-smi.c | 25 ++ include/dt-bindings/memory/mt2712-larb-port.h | 2 +- include/dt-bindings/memory/mt6779-larb-port.h | 2 +- include/dt-bindings/memory/mt8173-larb-port.h | 2 +- include/dt-bindings/memory/mt8183-larb-port.h | 2 +- include/dt-bindings/memory/mt8192-larb-port.h | 237 .../dt-bindings/memory/mtk-smi-larb-port.h| 22 ++ include/linux/io-pgtable.h| 4 +- include/soc/mediatek/smi.h| 3 +- 16 files changed, 565 insertions(+), 78 deletions(-) create mode 100644 include/dt-bindings/memory/mt8192-larb-port.h create mode 100644 include/dt-bindings/memory/mtk-smi-larb-port.h -- 2.18.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 4/4] PCI/ACS: Enable PCI_ACS_TB for untrusted/external-facing devices
On Fri, Jul 10, 2020 at 03:53:59PM -0700, Rajat Jain wrote: > On Fri, Jul 10, 2020 at 2:29 PM Raj, Ashok wrote: > > On Fri, Jul 10, 2020 at 03:29:22PM -0500, Bjorn Helgaas wrote: > > > On Tue, Jul 07, 2020 at 03:46:04PM -0700, Rajat Jain wrote: > > > > When enabling ACS, enable translation blocking for external facing ports > > > > and untrusted devices. > > > > > > > > Signed-off-by: Rajat Jain > > > > --- > > > > v4: Add braces to avoid warning from kernel robot > > > > print warning for only external-facing devices. > > > > v3: print warning if ACS_TB not supported on external-facing/untrusted > > > > ports. > > > > Minor code comments fixes. > > > > v2: Commit log change > > > > > > > > drivers/pci/pci.c| 8 > > > > drivers/pci/quirks.c | 15 +++ > > > > 2 files changed, 23 insertions(+) > > > > > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > > > > index 73a8627822140..a5a6bea7af7ce 100644 > > > > --- a/drivers/pci/pci.c > > > > +++ b/drivers/pci/pci.c > > > > @@ -876,6 +876,14 @@ static void pci_std_enable_acs(struct pci_dev *dev) > > > > /* Upstream Forwarding */ > > > > ctrl |= (cap & PCI_ACS_UF); > > > > > > > > + /* Enable Translation Blocking for external devices */ > > > > + if (dev->external_facing || dev->untrusted) { > > > > + if (cap & PCI_ACS_TB) > > > > + ctrl |= PCI_ACS_TB; > > > > + else if (dev->external_facing) > > > > + pci_warn(dev, "ACS: No Translation Blocking on > > > > external-facing dev\n"); > > > > + } > > > > > > IIUC, this means that external devices can *never* use ATS > > and can > > > never cache translations. > > Yes, but it already exists today (and this patch doesn't change that): > 521376741b2c2 "PCI/ATS: Only enable ATS for trusted devices" If you get in the habit of using the commit reference style from Documentation/process/submitting-patches.rst it saves me the trouble of fixing them. I use this: gsr is aliased to `git --no-pager show -s --abbrev-commit --abbrev=12 --pretty=format:"%h (\"%s\")%n"' > IMHO any external device trying to send ATS traffic despite having > ATS disabled should count as a bad intent. And this patch is trying > to plug that loophole, by blocking the AT traffic from devices that > we do not expect to see AT from anyway. That's exactly the sort of assertion I was looking for. If we can get something like this explanation into the commit log, and if Ashok and Alex are OK with this, we'll be much closer. It sounds like this is just enforcing a restriction we already have, i.e., enabling PCI_ACS_TB blocks translated requests from devices that aren't supposed to be generating them. > Do you see any case where this is not true? > > > And (I guess, I'm not an expert) it can > > > also never use the Page Request Services? > > > > Yep, sounds like it. > > Yes, from spec "Address Translation Services" Rev 1.1: > "...a device that supports ATS need not support PRI, but PRI is > dependent on ATS’s capabilities." > (So no ATS = No PRI). > > > > Is this what we want? Do we have any idea how many external > > > devices this will affect or how much of a performance impact > > > they will see? > > > > > > Do we need some kind of override or mechanism to authenticate > > > certain devices so they can use ATS and PRI? > > > > Sounds like we would need some form of an allow-list to start with > > so we can have something in the interim. > > I assume what is being referred to, is an escape hatch to enable ATS > on certain given "external-facing" ports (and devices downstream on > that port). Do we really think a *per-port* control for ATS may be > needed? I can add if there is consensus about this. > > > I suppose a future platform might have a facilty to ensure ATS is > > secure and authenticated we could enable for all of devices in the > > system, in addition to PCI CMA/IDE. > > > > I think having a global override to enable all devices so platform > > can switch to current behavior, or maybe via a cmdline switch.. as > > much as we have a billion of those, it still gives an option in > > case someone needs it. > > Currently: > > pci.noats => No ATS on all PCI devices. > (Absense of pci.noats): ATS on all PCI devices, EXCEPT external devices. You mean the "pci=noats" kernel command line parameter, right? > I can look to add another parameter that is synonymous to > "trust-external-pci-devices" that can keep ATS enabled on external > ports as well. I think this is better than an allow-list of only > certain ports, because most likely an admin will trust all its > external ports, or not. Also, we can add this global override and > may be add a more granular control later, if and when really needed. I think this would be new functionality that we don't have today, and we don't have anything that actually *needs* it AFAIK, so I wouldn't bother. Bjorn ___ iommu mailing list i
Re: [PATCH v2 3/5] irqchip: Allow QCOM_PDC to be loadable as a permanent module
Quoting John Stultz (2020-07-10 15:44:18) > On Thu, Jul 9, 2020 at 11:02 PM Stephen Boyd wrote: > > > > Does it work? I haven't looked in detail but I worry that the child > > irqdomain (i.e. pinctrl-msm) would need to delay probing until this > > parent irqdomain is registered. Or has the hierarchical irqdomain code > > been updated to handle the parent child relationship and wait for things > > to probe or be loaded? > > So I can't say I know the underlying hardware particularly well, but > I've been using this successfully on the Dragonboard 845c with both > static builds as well as module enabled builds. > And the same patch has been in the android-mainline and android-5.4 > kernels for a while without objections from QCOM. > > As to the probe ordering question, Saravana can maybe speak in more > detail if it's involved in this case but the fw_devlink code has > addressed many of these sorts of ordering issues. > However, I'm not sure if I'm lucking into the right probe order, as we > have been able to boot android-mainline w/ both fw_devlink=on and > fw_devlink=off (though in the =off case, we need > deferred_probe_timeout=30 to give us a bit more time for modules to > load after init starts). > Ok I looked at the code (sorry for not checking earlier) and I see this in msm_gpio_init() np = of_parse_phandle(pctrl->dev->of_node, "wakeup-parent", 0); if (np) { chip->irq.parent_domain = irq_find_matching_host(np, DOMAIN_BUS_WAKEUP); of_node_put(np); if (!chip->irq.parent_domain) return -EPROBE_DEFER; so it looks like we'll probe defer the pinctrl driver until the pdc module loads. Meaning it should work to have pinctrl builtin and pdc as a module. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v3 1/3] irq: irqdomain: Export irq_domain_update_bus_token
Add export for irq_domain_update_bus_token() so that we can allow drivers like the qcom-pdc driver to be loadable as a module. Cc: Andy Gross Cc: Bjorn Andersson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: Linus Walleij Cc: Maulik Shah Cc: Lina Iyer Cc: Saravana Kannan Cc: Todd Kjos Cc: Greg Kroah-Hartman Cc: linux-arm-...@vger.kernel.org Cc: iommu@lists.linux-foundation.org Cc: linux-g...@vger.kernel.org Signed-off-by: John Stultz --- kernel/irq/irqdomain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index a4c2c915511d..ca974d965fda 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -281,6 +281,7 @@ void irq_domain_update_bus_token(struct irq_domain *domain, mutex_unlock(&irq_domain_mutex); } +EXPORT_SYMBOL_GPL(irq_domain_update_bus_token); /** * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v3 2/3] irq: irqchip: Export irq_chip_retrigger_hierarchy and irq_chip_set_vcpu_affinity_parent
Add EXPORT_SYMBOL_GPL entries for irq_chip_retrigger_hierarchy() and irq_chip_set_vcpu_affinity_parent() so that we can allow drivers like the qcom-pdc driver to be loadable as a module. Cc: Andy Gross Cc: Bjorn Andersson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: Linus Walleij Cc: Maulik Shah Cc: Lina Iyer Cc: Saravana Kannan Cc: Todd Kjos Cc: Greg Kroah-Hartman Cc: linux-arm-...@vger.kernel.org Cc: iommu@lists.linux-foundation.org Cc: linux-g...@vger.kernel.org Signed-off-by: John Stultz --- kernel/irq/chip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 41e7e37a0928..ba6ce66d7ed6 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1478,6 +1478,7 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data) return 0; } +EXPORT_SYMBOL_GPL(irq_chip_retrigger_hierarchy); /** * irq_chip_set_vcpu_affinity_parent - Set vcpu affinity on the parent interrupt @@ -1492,7 +1493,7 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info) return -ENOSYS; } - +EXPORT_SYMBOL_GPL(irq_chip_set_vcpu_affinity_parent); /** * irq_chip_set_wake_parent - Set/reset wake-up on the parent interrupt * @data: Pointer to interrupt specific data -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v3 3/3] irqchip: Allow QCOM_PDC to be loadable as a permanent module
Allows qcom-pdc driver to be loaded as a permanent module Also, due to the fact that IRQCHIP_DECLARE becomes a no-op when building as a module, we have to replace it with platform driver hooks explicitly. Thanks to Saravana for his help on pointing out the IRQCHIP_DECLARE issue and guidance on a solution. Cc: Andy Gross Cc: Bjorn Andersson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: Linus Walleij Cc: Maulik Shah Cc: Lina Iyer Cc: Saravana Kannan Cc: Todd Kjos Cc: Greg Kroah-Hartman Cc: linux-arm-...@vger.kernel.org Cc: iommu@lists.linux-foundation.org Cc: linux-g...@vger.kernel.org Signed-off-by: John Stultz --- v2: Fix spelling, include order and set suppress_bind_attrs suggested by Maulik Shah v3: Drop conditional usage of IRQCHIP_DECLARE as suggested by Stephen Boyd and Marc Zyngier --- drivers/irqchip/Kconfig| 2 +- drivers/irqchip/qcom-pdc.c | 28 +++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 216b3b8392b5..cc285c1a54c1 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -425,7 +425,7 @@ config GOLDFISH_PIC for Goldfish based virtual platforms. config QCOM_PDC - bool "QCOM PDC" + tristate "QCOM PDC" depends on ARCH_QCOM select IRQ_DOMAIN_HIERARCHY help diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 6ae9e1f0819d..5b624e3295e4 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -11,9 +11,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -430,4 +432,28 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent) return ret; } -IRQCHIP_DECLARE(qcom_pdc, "qcom,pdc", qcom_pdc_init); +static int qcom_pdc_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct device_node *parent = of_irq_find_parent(np); + + return qcom_pdc_init(np, parent); +} + +static const struct of_device_id qcom_pdc_match_table[] = { + { .compatible = "qcom,pdc" }, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_pdc_match_table); + +static struct platform_driver qcom_pdc_driver = { + .probe = qcom_pdc_probe, + .driver = { + .name = "qcom-pdc", + .of_match_table = qcom_pdc_match_table, + .suppress_bind_attrs = true, + }, +}; +module_platform_driver(qcom_pdc_driver); +MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Power Domain Controller"); +MODULE_LICENSE("GPL v2"); -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v3 0/3] Allow for qcom-pdc to be loadable as a module
This patch series provides exports and config tweaks to allow the qcom-pdc driver to be able to be configured as a permement modules (particularlly useful for the Android Generic Kernel Image efforts). This was part of a larger patch series, to enable qcom_scm driver to be a module as well, but I've split it out as there are some outstanding objections I still need to address with the follow-on patches, and wanted to see if progress could be made on this subset of the series in the meantime. New in v3: * Drop conditional usage of IRQCHIP_DECLARE as suggested by Stephen Boyd and Marc Zyngier thanks -john Cc: Andy Gross Cc: Bjorn Andersson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: Linus Walleij Cc: Lina Iyer Cc: Maulik Shah Cc: Saravana Kannan Cc: Todd Kjos Cc: Greg Kroah-Hartman Cc: linux-arm-...@vger.kernel.org Cc: iommu@lists.linux-foundation.org Cc: linux-g...@vger.kernel.org John Stultz (3): irq: irqdomain: Export irq_domain_update_bus_token irq: irqchip: Export irq_chip_retrigger_hierarchy and irq_chip_set_vcpu_affinity_parent irqchip: Allow QCOM_PDC to be loadable as a permanent module drivers/irqchip/Kconfig| 2 +- drivers/irqchip/qcom-pdc.c | 28 +++- kernel/irq/chip.c | 3 ++- kernel/irq/irqdomain.c | 1 + 4 files changed, 31 insertions(+), 3 deletions(-) -- 2.17.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 4/4] PCI/ACS: Enable PCI_ACS_TB for untrusted/external-facing devices
Hello, On Fri, Jul 10, 2020 at 2:29 PM Raj, Ashok wrote: > > Hi Bjorn > > > On Fri, Jul 10, 2020 at 03:29:22PM -0500, Bjorn Helgaas wrote: > > On Tue, Jul 07, 2020 at 03:46:04PM -0700, Rajat Jain wrote: > > > When enabling ACS, enable translation blocking for external facing ports > > > and untrusted devices. > > > > > > Signed-off-by: Rajat Jain > > > --- > > > v4: Add braces to avoid warning from kernel robot > > > print warning for only external-facing devices. > > > v3: print warning if ACS_TB not supported on external-facing/untrusted > > > ports. > > > Minor code comments fixes. > > > v2: Commit log change > > > > > > drivers/pci/pci.c| 8 > > > drivers/pci/quirks.c | 15 +++ > > > 2 files changed, 23 insertions(+) > > > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > > > index 73a8627822140..a5a6bea7af7ce 100644 > > > --- a/drivers/pci/pci.c > > > +++ b/drivers/pci/pci.c > > > @@ -876,6 +876,14 @@ static void pci_std_enable_acs(struct pci_dev *dev) > > > /* Upstream Forwarding */ > > > ctrl |= (cap & PCI_ACS_UF); > > > > > > + /* Enable Translation Blocking for external devices */ > > > + if (dev->external_facing || dev->untrusted) { > > > + if (cap & PCI_ACS_TB) > > > + ctrl |= PCI_ACS_TB; > > > + else if (dev->external_facing) > > > + pci_warn(dev, "ACS: No Translation Blocking on > > > external-facing dev\n"); > > > + } > > > > IIUC, this means that external devices can *never* use ATS > and can > > never cache translations. Yes, but it already exists today (and this patch doesn't change that): 521376741b2c2 "PCI/ATS: Only enable ATS for trusted devices" IMHO any external device trying to send ATS traffic despite having ATS disabled should count as a bad intent. And this patch is trying to plug that loophole, by blocking the AT traffic from devices that we do not expect to see AT from anyway. Do you see any case where this is not true? > And (I guess, I'm not an expert) it can > > also never use the Page Request Services? > > Yep, sounds like it. Yes, from spec "Address Translation Services" Rev 1.1: "...a device that supports ATS need not support PRI, but PRI is dependent on ATS’s capabilities." (So no ATS = No PRI). > > > > > Is this what we want? Do we have any idea how many external devices > > this will affect or how much of a performance impact they will see? > > > > Do we need some kind of override or mechanism to authenticate certain > > devices so they can use ATS and PRI? > > Sounds like we would need some form of an allow-list to start with so we > can have something in the interim. I assume what is being referred to, is an escape hatch to enable ATS on certain given "external-facing" ports (and devices downstream on that port). Do we really think a *per-port* control for ATS may be needed? I can add if there is consensus about this. > > I suppose a future platform might have a facilty to ensure ATS is secure and > authenticated we could enable for all of devices in the system, in addition > to PCI CMA/IDE. > > I think having a global override to enable all devices so platform can > switch to current behavior, or maybe via a cmdline switch.. as much as we > have a billion of those, it still gives an option in case someone needs it. Currently: pci.noats => No ATS on all PCI devices. (Absense of pci.noats): ATS on all PCI devices, EXCEPT external devices. I can look to add another parameter that is synonymous to "trust-external-pci-devices" that can keep ATS enabled on external ports as well. I think this is better than an allow-list of only certain ports, because most likely an admin will trust all its external ports, or not. Also, we can add this global override and may be add a more granular control later, if and when really needed. Thanks, Rajat > > > > > > > If we do decide this is the right thing to do, I think we need to > > expand the commit log a bit, because this is potentially a significant > > user-visible change. > > > > > pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); > > > } > > > > > > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c > > > index b341628e47527..bb22b46c1d719 100644 > > > --- a/drivers/pci/quirks.c > > > +++ b/drivers/pci/quirks.c > > > @@ -4934,6 +4934,13 @@ static void > > > pci_quirk_enable_intel_rp_mpc_acs(struct pci_dev *dev) > > > } > > > } > > > > > > +/* > > > + * Currently this quirk does the equivalent of > > > + * PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF > > > + * > > > + * TODO: This quirk also needs to do equivalent of PCI_ACS_TB, > > > + * if dev->external_facing || dev->untrusted > > > + */ > > > static int pci_quirk_enable_intel_pch_acs(struct pci_dev *dev) > > > { > > > if (!pci_quirk_intel_pch_acs_match(dev)) > > > @@ -4973,6 +4980,14 @@ static int > > > pci_quirk_enable_intel_spt_pch_acs(struct pci_dev *dev) > > > ctrl |= (cap & PCI_ACS_CR); > > > ctrl |=
Re: [PATCH v2 3/5] irqchip: Allow QCOM_PDC to be loadable as a permanent module
On Thu, Jul 9, 2020 at 11:02 PM Stephen Boyd wrote: > Quoting Marc Zyngier (2020-06-27 02:37:47) > > On Sat, 27 Jun 2020 02:34:25 +0100, > > John Stultz wrote: > > > > > > On Fri, Jun 26, 2020 at 12:42 AM Stephen Boyd wrote: > > > > > > > > > > > > Is there any reason to use IRQCHIP_DECLARE if this can work as a > > > > platform device driver? > > > > > > > > > > Hey! Thanks so much for the review! > > > > > > Mostly it was done this way to minimize the change in the non-module > > > case. But if you'd rather avoid the #ifdefery I'll respin it without. > > > > That would certainly be my own preference. In general, IRQCHIP_DECLARE > > and platform drivers should be mutually exclusive in the same driver: > > if you can delay the probing and have it as a proper platform device, > > then this should be the one true way. > > > > Does it work? I haven't looked in detail but I worry that the child > irqdomain (i.e. pinctrl-msm) would need to delay probing until this > parent irqdomain is registered. Or has the hierarchical irqdomain code > been updated to handle the parent child relationship and wait for things > to probe or be loaded? So I can't say I know the underlying hardware particularly well, but I've been using this successfully on the Dragonboard 845c with both static builds as well as module enabled builds. And the same patch has been in the android-mainline and android-5.4 kernels for a while without objections from QCOM. As to the probe ordering question, Saravana can maybe speak in more detail if it's involved in this case but the fw_devlink code has addressed many of these sorts of ordering issues. However, I'm not sure if I'm lucking into the right probe order, as we have been able to boot android-mainline w/ both fw_devlink=on and fw_devlink=off (though in the =off case, we need deferred_probe_timeout=30 to give us a bit more time for modules to load after init starts). thanks -john ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module
On Fri, Jul 10, 2020 at 12:54 AM Will Deacon wrote: > On Thu, Jul 09, 2020 at 08:28:45PM -0700, John Stultz wrote: > > On Thu, Jul 2, 2020 at 7:18 AM Will Deacon wrote: > > > On Thu, Jun 25, 2020 at 12:10:39AM +, John Stultz wrote: > > > > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig > > > > index b510f67dfa49..714893535dd2 100644 > > > > --- a/drivers/iommu/Kconfig > > > > +++ b/drivers/iommu/Kconfig > > > > @@ -381,6 +381,7 @@ config SPAPR_TCE_IOMMU > > > > config ARM_SMMU > > > > tristate "ARM Ltd. System MMU (SMMU) Support" > > > > depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) > > > > && MMU > > > > + depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y > > > > select IOMMU_API > > > > select IOMMU_IO_PGTABLE_LPAE > > > > select ARM_DMA_USE_IOMMU if ARM > > > > > > This looks like a giant hack. Is there another way to handle this? > > > > Sorry for the slow response here. > > > > So, I agree the syntax looks strange (requiring a comment obviously > > isn't a good sign), but it's a fairly common way to ensure drivers > > don't get built in if they optionally depend on another driver that > > can be built as a module. > > See "RFKILL || !RFKILL", "EXTCON || !EXTCON", or "USB_GADGET || > > !USB_GADGET" in various Kconfig files. > > > > I'm open to using a different method, and in a different thread you > > suggested using something like symbol_get(). I need to look into it > > more, but that approach looks even more messy and prone to runtime > > failures. Blocking the unwanted case at build time seems a bit cleaner > > to me, even if the syntax is odd. > > Maybe just split it out then, so that the ARM_SMMU entry doesn't have this, > as that driver _really_ doesn't care about SoC details like this. In other > words, add a new entry along the lines of: > > config ARM_SMMU_QCOM_IMPL > default y > #if QCOM_SCM=m this can't be =y > depends on ARM_SMMU & (QCOM_SCM || !QCOM_SCM) > > and then have arm-smmu.h provide a static inline qcom_smmu_impl_init() > which returns -ENODEV if CONFIG_ARM_SMMU_QCOM_IMPL=n and hack the Makefile > so that we don't bother to compile arm-smmu-qcom.o in that case. > > Would that work? I think this proposal still has problems with the directionality of the call. The arm-smmu-impl.o calls to arm-smmu-qcom.o which calls qcom_scm.o So if qcom_scm.o is part of a module, the calling code in arm-smmu-qcom.o also needs to be a module, which means CONFIG_ARM_SMMU needs to be a module. I know you said the arm-smmu driver doesn't care about SoC details, but the trouble is that currently the arm-smmu driver does directly call the qcom-scm code. So it is a real dependency. However, if QCOM_SCM is not configured, it calls stubs and that's ok. In that way, the "depends on QCOM_SCM || !QCOM_SCM" line actually makes sense. It looks terrible because we're used to boolean logic, but it's ternary. Maybe can have the ARM_SMMU_QCOM_IMPL approach you suggest above, but that just holds the issue out at arms length, because we're still going to need to have: depends on ARM_SMMU_QCOM_IMPL || !ARM_SMMU_QCOM_IMPL in the ARM_SMMU definition, which I suspect you're wanting to avoid. Otherwise the only thing I can think of is a deeper reworking of the arm-smmu-impl code so that the arm-smmu-qcom code probes itself and registers its hooks with the arm-smmu core. That way the arm-smmu driver would not directly call any SoC specific code (and thus have no dependencies outward). But it's probably a fair amount of churn vs the extra depends string. thanks -john ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 4/4] PCI/ACS: Enable PCI_ACS_TB for untrusted/external-facing devices
Hi Bjorn On Fri, Jul 10, 2020 at 03:29:22PM -0500, Bjorn Helgaas wrote: > On Tue, Jul 07, 2020 at 03:46:04PM -0700, Rajat Jain wrote: > > When enabling ACS, enable translation blocking for external facing ports > > and untrusted devices. > > > > Signed-off-by: Rajat Jain > > --- > > v4: Add braces to avoid warning from kernel robot > > print warning for only external-facing devices. > > v3: print warning if ACS_TB not supported on external-facing/untrusted > > ports. > > Minor code comments fixes. > > v2: Commit log change > > > > drivers/pci/pci.c| 8 > > drivers/pci/quirks.c | 15 +++ > > 2 files changed, 23 insertions(+) > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > > index 73a8627822140..a5a6bea7af7ce 100644 > > --- a/drivers/pci/pci.c > > +++ b/drivers/pci/pci.c > > @@ -876,6 +876,14 @@ static void pci_std_enable_acs(struct pci_dev *dev) > > /* Upstream Forwarding */ > > ctrl |= (cap & PCI_ACS_UF); > > > > + /* Enable Translation Blocking for external devices */ > > + if (dev->external_facing || dev->untrusted) { > > + if (cap & PCI_ACS_TB) > > + ctrl |= PCI_ACS_TB; > > + else if (dev->external_facing) > > + pci_warn(dev, "ACS: No Translation Blocking on > > external-facing dev\n"); > > + } > > IIUC, this means that external devices can *never* use ATS and can > never cache translations. And (I guess, I'm not an expert) it can > also never use the Page Request Services? Yep, sounds like it. > > Is this what we want? Do we have any idea how many external devices > this will affect or how much of a performance impact they will see? > > Do we need some kind of override or mechanism to authenticate certain > devices so they can use ATS and PRI? Sounds like we would need some form of an allow-list to start with so we can have something in the interim. I suppose a future platform might have a facilty to ensure ATS is secure and authenticated we could enable for all of devices in the system, in addition to PCI CMA/IDE. I think having a global override to enable all devices so platform can switch to current behavior, or maybe via a cmdline switch.. as much as we have a billion of those, it still gives an option in case someone needs it. > > If we do decide this is the right thing to do, I think we need to > expand the commit log a bit, because this is potentially a significant > user-visible change. > > > pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); > > } > > > > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c > > index b341628e47527..bb22b46c1d719 100644 > > --- a/drivers/pci/quirks.c > > +++ b/drivers/pci/quirks.c > > @@ -4934,6 +4934,13 @@ static void pci_quirk_enable_intel_rp_mpc_acs(struct > > pci_dev *dev) > > } > > } > > > > +/* > > + * Currently this quirk does the equivalent of > > + * PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF > > + * > > + * TODO: This quirk also needs to do equivalent of PCI_ACS_TB, > > + * if dev->external_facing || dev->untrusted > > + */ > > static int pci_quirk_enable_intel_pch_acs(struct pci_dev *dev) > > { > > if (!pci_quirk_intel_pch_acs_match(dev)) > > @@ -4973,6 +4980,14 @@ static int pci_quirk_enable_intel_spt_pch_acs(struct > > pci_dev *dev) > > ctrl |= (cap & PCI_ACS_CR); > > ctrl |= (cap & PCI_ACS_UF); > > > > + /* Enable Translation Blocking for external devices */ > > + if (dev->external_facing || dev->untrusted) { > > + if (cap & PCI_ACS_TB) > > + ctrl |= PCI_ACS_TB; > > + else if (dev->external_facing) > > + pci_warn(dev, "ACS: No Translation Blocking on > > external-facing dev\n"); > > + } > > + > > pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl); > > > > pci_info(dev, "Intel SPT PCH root port ACS workaround enabled\n"); > > -- > > 2.27.0.212.ge8ba1cc988-goog > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 0/4] dma-pool: Fix atomic pool selection
Hi, I have merged this to a 5.8 tree along with "dma-pool: Only allocate from CMA when in the same memory zone" and tested it in various ACPI/DT combinations, particularly on the RPI4. It seems to be working fine. So thanks for your time and effort clearing this up! Tested-by: Jeremy Linton On 7/9/20 11:19 AM, Nicolas Saenz Julienne wrote: This is my attempt at fixing one of the regressions we've seen[1] after the introduction of per-zone atomic pools. This combined with "dma-pool: Do not allocate pool memory from CMA"[2] should fix the boot issues on Jeremy's RPi4 setup. [1] https://lkml.org/lkml/2020/7/2/974 [2] https://lkml.org/lkml/2020/7/8/1108 --- Nicolas Saenz Julienne (4): dma-direct: Provide function to check physical memory area validity dma-pool: Get rid of dma_in_atomic_pool() dma-pool: Introduce dma_guess_pool() dma-pool: Make sure atomic pool suits device include/linux/dma-direct.h | 1 + kernel/dma/direct.c| 2 +- kernel/dma/pool.c | 76 +++--- 3 files changed, 56 insertions(+), 23 deletions(-) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 1/4] PCI: Move pci_enable_acs() and its dependencies up in pci.c
On Tue, Jul 07, 2020 at 03:46:01PM -0700, Rajat Jain wrote: > Move pci_enable_acs() and the functions it depends on, further up in the > source code to avoid having to forward declare it when we make it static > in near future (next patch). > > No functional changes intended. > > Signed-off-by: Rajat Jain Applied patches 1-3 to pci/enumeration for v5.9, thanks! I held off on patch 4 (enabling PCI_ACS_TB) until we have a little more conversation on the impact of it. > --- > v4: Same as v3 > v3: Initial version of the patch, created per Bjorn's suggestion > > drivers/pci/pci.c | 254 +++--- > 1 file changed, 127 insertions(+), 127 deletions(-) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index ce096272f52b1..eec625f0e594e 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -777,6 +777,133 @@ int pci_wait_for_pending(struct pci_dev *dev, int pos, > u16 mask) > return 0; > } > > +static int pci_acs_enable; > + > +/** > + * pci_request_acs - ask for ACS to be enabled if supported > + */ > +void pci_request_acs(void) > +{ > + pci_acs_enable = 1; > +} > + > +static const char *disable_acs_redir_param; > + > +/** > + * pci_disable_acs_redir - disable ACS redirect capabilities > + * @dev: the PCI device > + * > + * For only devices specified in the disable_acs_redir parameter. > + */ > +static void pci_disable_acs_redir(struct pci_dev *dev) > +{ > + int ret = 0; > + const char *p; > + int pos; > + u16 ctrl; > + > + if (!disable_acs_redir_param) > + return; > + > + p = disable_acs_redir_param; > + while (*p) { > + ret = pci_dev_str_match(dev, p, &p); > + if (ret < 0) { > + pr_info_once("PCI: Can't parse disable_acs_redir > parameter: %s\n", > + disable_acs_redir_param); > + > + break; > + } else if (ret == 1) { > + /* Found a match */ > + break; > + } > + > + if (*p != ';' && *p != ',') { > + /* End of param or invalid format */ > + break; > + } > + p++; > + } > + > + if (ret != 1) > + return; > + > + if (!pci_dev_specific_disable_acs_redir(dev)) > + return; > + > + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); > + if (!pos) { > + pci_warn(dev, "cannot disable ACS redirect for this hardware as > it does not have ACS capabilities\n"); > + return; > + } > + > + pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl); > + > + /* P2P Request & Completion Redirect */ > + ctrl &= ~(PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC); > + > + pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); > + > + pci_info(dev, "disabled ACS redirect\n"); > +} > + > +/** > + * pci_std_enable_acs - enable ACS on devices using standard ACS capabilities > + * @dev: the PCI device > + */ > +static void pci_std_enable_acs(struct pci_dev *dev) > +{ > + int pos; > + u16 cap; > + u16 ctrl; > + > + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS); > + if (!pos) > + return; > + > + pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap); > + pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl); > + > + /* Source Validation */ > + ctrl |= (cap & PCI_ACS_SV); > + > + /* P2P Request Redirect */ > + ctrl |= (cap & PCI_ACS_RR); > + > + /* P2P Completion Redirect */ > + ctrl |= (cap & PCI_ACS_CR); > + > + /* Upstream Forwarding */ > + ctrl |= (cap & PCI_ACS_UF); > + > + pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); > +} > + > +/** > + * pci_enable_acs - enable ACS if hardware support it > + * @dev: the PCI device > + */ > +void pci_enable_acs(struct pci_dev *dev) > +{ > + if (!pci_acs_enable) > + goto disable_acs_redir; > + > + if (!pci_dev_specific_enable_acs(dev)) > + goto disable_acs_redir; > + > + pci_std_enable_acs(dev); > + > +disable_acs_redir: > + /* > + * Note: pci_disable_acs_redir() must be called even if ACS was not > + * enabled by the kernel because it may have been enabled by > + * platform firmware. So if we are told to disable it, we should > + * always disable it after setting the kernel's default > + * preferences. > + */ > + pci_disable_acs_redir(dev); > +} > + > /** > * pci_restore_bars - restore a device's BAR values (e.g. after wake-up) > * @dev: PCI device to have its BARs restored > @@ -3230,133 +3357,6 @@ void pci_configure_ari(struct pci_dev *dev) > } > } > > -static int pci_acs_enable; > - > -/** > - * pci_request_acs - ask for ACS to be enabled if supported > - */ > -void pci_request_acs(void) > -{ > - pci_acs_enable = 1; > -} > - > -static const char *disable_acs_redir_param; > - > -/** > - * pci_disabl
RE: [PATCH v10 4/5] dt-bindings: arm-smmu: add binding for Tegra194 SMMU
Thanks Rob. One question on setting "minItems: ". Please see below. >> +allOf: >> + - if: >> + properties: >> +compatible: >> + contains: >> +enum: >> + - nvidia,tegra194-smmu >> +then: >> + properties: >> +reg: >> + minItems: 2 >> + maxItems: 2 >This doesn't work. The main part of the schema already said there's only >1 reg region. This part is ANDed with that, not an override. You need to add >an else clause with 'maxItems: 1' and change the base schema to >{minItems: 1, maxItems: 2}. As the earlier version of base schema doesn't have "minItems: " set, should it be set to 0 for backward compatibility? Or can it just be omitted setting in base schema as before? "else" part to set "maxItems: 1" and setting "maxItems: 2" in base schema is clear to me. -KR ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 4/4] PCI/ACS: Enable PCI_ACS_TB for untrusted/external-facing devices
On Tue, Jul 07, 2020 at 03:46:04PM -0700, Rajat Jain wrote: > When enabling ACS, enable translation blocking for external facing ports > and untrusted devices. > > Signed-off-by: Rajat Jain > --- > v4: Add braces to avoid warning from kernel robot > print warning for only external-facing devices. > v3: print warning if ACS_TB not supported on external-facing/untrusted ports. > Minor code comments fixes. > v2: Commit log change > > drivers/pci/pci.c| 8 > drivers/pci/quirks.c | 15 +++ > 2 files changed, 23 insertions(+) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index 73a8627822140..a5a6bea7af7ce 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -876,6 +876,14 @@ static void pci_std_enable_acs(struct pci_dev *dev) > /* Upstream Forwarding */ > ctrl |= (cap & PCI_ACS_UF); > > + /* Enable Translation Blocking for external devices */ > + if (dev->external_facing || dev->untrusted) { > + if (cap & PCI_ACS_TB) > + ctrl |= PCI_ACS_TB; > + else if (dev->external_facing) > + pci_warn(dev, "ACS: No Translation Blocking on > external-facing dev\n"); > + } IIUC, this means that external devices can *never* use ATS and can never cache translations. And (I guess, I'm not an expert) it can also never use the Page Request Services? Is this what we want? Do we have any idea how many external devices this will affect or how much of a performance impact they will see? Do we need some kind of override or mechanism to authenticate certain devices so they can use ATS and PRI? If we do decide this is the right thing to do, I think we need to expand the commit log a bit, because this is potentially a significant user-visible change. > pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); > } > > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c > index b341628e47527..bb22b46c1d719 100644 > --- a/drivers/pci/quirks.c > +++ b/drivers/pci/quirks.c > @@ -4934,6 +4934,13 @@ static void pci_quirk_enable_intel_rp_mpc_acs(struct > pci_dev *dev) > } > } > > +/* > + * Currently this quirk does the equivalent of > + * PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF > + * > + * TODO: This quirk also needs to do equivalent of PCI_ACS_TB, > + * if dev->external_facing || dev->untrusted > + */ > static int pci_quirk_enable_intel_pch_acs(struct pci_dev *dev) > { > if (!pci_quirk_intel_pch_acs_match(dev)) > @@ -4973,6 +4980,14 @@ static int pci_quirk_enable_intel_spt_pch_acs(struct > pci_dev *dev) > ctrl |= (cap & PCI_ACS_CR); > ctrl |= (cap & PCI_ACS_UF); > > + /* Enable Translation Blocking for external devices */ > + if (dev->external_facing || dev->untrusted) { > + if (cap & PCI_ACS_TB) > + ctrl |= PCI_ACS_TB; > + else if (dev->external_facing) > + pci_warn(dev, "ACS: No Translation Blocking on > external-facing dev\n"); > + } > + > pci_write_config_dword(dev, pos + INTEL_SPT_ACS_CTRL, ctrl); > > pci_info(dev, "Intel SPT PCH root port ACS workaround enabled\n"); > -- > 2.27.0.212.ge8ba1cc988-goog > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH V6 4/5] iommu/dma-iommu: Use the dev->coherent_dma_mask
>Btw, what is the current state of converting intel-iommu to the dma-iommu These changes expose a bug in the i915 intel driver which hasn't been fixed yet. I don't think anyone is actively working on it but I plan on merging as many patches as I can so it's easier to do the intel-iommu -> dma-iommu conversion once the bug is fixed. You can read more about it here: https://patchwork.kernel.org/cover/11306999/ On Fri, 10 Jul 2020 at 08:59, Christoph Hellwig wrote: > > Btw, what is the current state of converting intel-iommu to the dma-iommu > code? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] dma-pool: Only allocate from CMA when in same memory zone
Hi, I have merged this to a 5.8 tree along with "dma-pool: Fix atomic pool selection" and tested it in various ACPI/DT combinations, particularly on the RPI4. It seems to be working fine. So thanks for your time and effort clearing this up! tested-by: Jeremy Linton On 7/10/20 9:10 AM, Nicolas Saenz Julienne wrote: There is no guarantee to CMA's placement, so allocating a zone specific atomic pool from CMA might return memory from a completely different memory zone. To get around this double check CMA's placement before allocating from it. Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask") Reported-by: Jeremy Linton Signed-off-by: Nicolas Saenz Julienne --- This is a code intensive alternative to "dma-pool: Do not allocate pool memory from CMA"[1]. [1] https://lkml.org/lkml/2020/7/8/1108 kernel/dma/pool.c | 36 +++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 8cfa01243ed2..ccf3eeb77e00 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -3,6 +3,7 @@ * Copyright (C) 2012 ARM Ltd. * Copyright (C) 2020 Google LLC */ +#include #include #include #include @@ -56,6 +57,39 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size) pool_size_kernel += size; } +static bool cma_in_zone(gfp_t gfp) +{ + u64 zone_dma_end, zone_dma32_end; + phys_addr_t base, end; + unsigned long size; + struct cma *cma; + + cma = dev_get_cma_area(NULL); + if (!cma) + return false; + + size = cma_get_size(cma); + if (!size) + return false; + base = cma_get_base(cma) - memblock_start_of_DRAM(); + end = base + size - 1; + + zone_dma_end = IS_ENABLED(CONFIG_ZONE_DMA) ? DMA_BIT_MASK(zone_dma_bits) : 0; + zone_dma32_end = IS_ENABLED(CONFIG_ZONE_DMA32) ? DMA_BIT_MASK(32) : 0; + + /* CMA can't cross zone boundaries, see cma_activate_area() */ + if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp & GFP_DMA && + end <= zone_dma_end) + return true; + else if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp & GFP_DMA32 && + base > zone_dma_end && end <= zone_dma32_end) + return true; + else if (base > zone_dma32_end) + return true; + + return false; +} + static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, gfp_t gfp) { @@ -70,7 +104,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, do { pool_size = 1 << (PAGE_SHIFT + order); - if (dev_get_cma_area(NULL)) + if (cma_in_zone(gfp)) page = dma_alloc_from_contiguous(NULL, 1 << order, order, false); else ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: a question of split_huge_page
On Fri, Jul 10, 2020 at 2:35 AM Alex Shi wrote: > > 在 2020/7/10 下午1:28, Mika Penttilä 写道: > > > > > > On 10.7.2020 7.51, Alex Shi wrote: > >> > >> 在 2020/7/10 上午12:07, Kirill A. Shutemov 写道: > >>> On Thu, Jul 09, 2020 at 04:50:02PM +0100, Matthew Wilcox wrote: > On Thu, Jul 09, 2020 at 11:11:11PM +0800, Alex Shi wrote: > > Hi Kirill & Matthew, > > > > In the func call chain, from split_huge_page() to lru_add_page_tail(), > > Seems tail pages are added to lru list at line 963, but in this scenario > > the head page has no lru bit and isn't set the bit later. Why we do > > this? > > or do I miss sth? > I don't understand how we get to split_huge_page() with a page that's > not on an LRU list. Both anonymous and page cache pages should be on > an LRU list. What am I missing?> > >> > >> Thanks a lot for quick reply! > >> What I am confusing is the call chain: __iommu_dma_alloc_pages() > >> to split_huge_page(), in the func, splited page, > >> page = alloc_pages_node(nid, alloc_flags, order); > >> And if the pages were added into lru, they maybe reclaimed and lost, > >> that would be a panic bug. But in fact, this never happened for long time. > >> Also I put a BUG() at the line, it's nevre triggered in ltp, and > >> run_vmtests > > > > > > In __iommu_dma_alloc_pages, after split_huge_page(), who is taking a > > reference on tail pages? Seems tail pages are freed and the function > > errornously returns them in pages[] array for use? > > > > CC Joerg and iommu list, > > That's a good question. seems the split_huge_page was never triggered here, > since the func would check the PageLock first. and have page->mapping and > PageAnon > check, any of them couldn't be matched for the alloced page. > > Hi Joerg, > would you like look into this? do we still need the split_huge_page() here? I think this is the same problem which has been discussed a couple of weeks ago. Please refer to: https://lore.kernel.org/linux-mm/20200619001938.ga135...@carbon.dhcp.thefacebook.com/ I think the conclusion is split_huge_page() can't be used in this path at all. But we didn't reach a fix yet. > > Thanks > Alex > > int split_huge_page_to_list(struct page *page, struct list_head *list) > { > struct page *head = compound_head(page); > struct deferred_split *ds_queue = get_deferred_split_queue(head); > struct anon_vma *anon_vma = NULL; > struct address_space *mapping = NULL; > int count, mapcount, extra_pins, ret; > pgoff_t end; > > VM_BUG_ON_PAGE(is_huge_zero_page(head), head); > VM_BUG_ON_PAGE(!PageLocked(head), head);<== > > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] xen: introduce xen_vring_use_dma
Sorry for the late reply -- a couple of conferences kept me busy. On Wed, 1 Jul 2020, Michael S. Tsirkin wrote: > On Wed, Jul 01, 2020 at 10:34:53AM -0700, Stefano Stabellini wrote: > > Would you be in favor of a more flexible check along the lines of the > > one proposed in the patch that started this thread: > > > > if (xen_vring_use_dma()) > > return true; > > > > > > xen_vring_use_dma would be implemented so that it returns true when > > xen_swiotlb is required and false otherwise. > > Just to stress - with a patch like this virtio can *still* use DMA API > if PLATFORM_ACCESS is set. So if DMA API is broken on some platforms > as you seem to be saying, you guys should fix it before doing something > like this.. Yes, DMA API is broken with some interfaces (specifically: rpmesg and trusty), but for them PLATFORM_ACCESS is never set. That is why the errors weren't reported before. Xen special case aside, there is no problem under normal circumstances. If you are OK with this patch (after a little bit of clean-up), Peng, are you OK with sending an update or do you want me to? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [GIT PULL] dma-mapping fixes for 5.8
The pull request you sent on Fri, 10 Jul 2020 17:45:06 +0200: > git://git.infradead.org/users/hch/dma-mapping.git tags/dma-mapping-5.8-5 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/1bfde037425d91d1d615d30ec362f5f5c1ca0dd2 Thank you! -- Deet-doot-dot, I am a bot. https://korg.wiki.kernel.org/userdoc/prtracker ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH V6 4/5] iommu/dma-iommu: Use the dev->coherent_dma_mask
Btw, what is the current state of converting intel-iommu to the dma-iommu code? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[GIT PULL] dma-mapping fixes for 5.8
The following changes since commit 9ebcfadb0610322ac537dd7aa5d9cbc2b2894c68: Linux 5.8-rc3 (2020-06-28 15:00:24 -0700) are available in the Git repository at: git://git.infradead.org/users/hch/dma-mapping.git tags/dma-mapping-5.8-5 for you to fetch changes up to 68d237056e007c88031d80900cdba0945121a287: scatterlist: protect parameters of the sg_table related macros (2020-07-06 16:07:25 +0200) dma-mapping fixes for 5.8 - add a warning when the atomic pool is depleted (David Rientjes) - protect the parameters of the new scatterlist helper macros (Marek Szyprowski ) David Rientjes (1): dma-mapping: warn when coherent pool is depleted Marek Szyprowski (1): scatterlist: protect parameters of the sg_table related macros include/linux/scatterlist.h | 8 kernel/dma/pool.c | 6 +- 2 files changed, 9 insertions(+), 5 deletions(-) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag
On Fri, Jul 10, 2020 at 04:15:32PM +0200, Joerg Roedel wrote: > On Fri, Jul 10, 2020 at 02:05:27PM +0100, Will Deacon wrote: > > Ah, I'd already got this queued for 5.9: > > > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-joerg/arm-smmu/updates > > > > and I've queued a small number of patches on top of it now. > > > > Are you planning to send it for 5.8? If so, I suspect I'll have to rebase. > > No problem, nothing got pushed yet. I removed it from my branch and wait > for your pull-request. Great, thanks Joerg. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/2] iommu/intel: Avoid SAC address trick for PCIe devices
On Wed, Jul 08, 2020 at 12:32:41PM +0100, Robin Murphy wrote: > For devices stuck behind a conventional PCI bus, saving extra cycles at > 33MHz is probably fairly significant. However since native PCI Express > is now the norm for high-performance devices, the optimisation to always > prefer 32-bit addresses for the sake of avoiding DAC is starting to look > rather anachronistic. Technically 32-bit addresses do have shorter TLPs > on PCIe, but unless the device is saturating its link bandwidth with > small transfers it seems unlikely that the difference is appreciable. > > What definitely is appreciable, however, is that the IOVA allocator > doesn't behave all that well once the 32-bit space starts getting full. > As DMA working sets get bigger, this optimisation increasingly backfires > and adds considerable overhead to the dma_map path for use-cases like > high-bandwidth networking. > > As such, let's simply take it out of consideration for PCIe devices. > Technically this might work out suboptimal for a PCIe device stuck > behind a conventional PCI bridge, or for PCI-X devices that also have > native 64-bit addressing, but neither of those are likely to be found > in performance-critical parts of modern systems. > > Signed-off-by: Robin Murphy > --- > drivers/iommu/intel/iommu.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) Applied both, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] IOMMU DRIVERS: Replace HTTP links with HTTPS ones
On Wed, Jul 08, 2020 at 11:04:34PM +0200, Alexander A. Klimov wrote: > drivers/iommu/omap-iommu.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) Queued, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag
Hi Will, On Fri, Jul 10, 2020 at 02:05:27PM +0100, Will Deacon wrote: > Ah, I'd already got this queued for 5.9: > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-joerg/arm-smmu/updates > > and I've queued a small number of patches on top of it now. > > Are you planning to send it for 5.8? If so, I suspect I'll have to rebase. No problem, nothing got pushed yet. I removed it from my branch and wait for your pull-request. Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v6 00/10] MT6779 IOMMU SUPPORT
On Fri, Jul 03, 2020 at 12:41:17PM +0800, Chao Hao wrote: > Chao Hao (10): > dt-bindings: mediatek: Add bindings for MT6779 > iommu/mediatek: Rename the register STANDARD_AXI_MODE(0x48) to MISC_CTRL > iommu/mediatek: Use a u32 flags to describe different HW features > iommu/mediatek: Setting MISC_CTRL register > iommu/mediatek: Move inv_sel_reg into the plat_data > iommu/mediatek: Add sub_comm id in translation fault > iommu/mediatek: Add REG_MMU_WR_LEN_CTRL register definition > iommu/mediatek: Extend protect pa alignment value > iommu/mediatek: Modify MMU_CTRL register setting > iommu/mediatek: Add mt6779 basic support Applied, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] dma-pool: Only allocate from CMA when in same memory zone
There is no guarantee to CMA's placement, so allocating a zone specific atomic pool from CMA might return memory from a completely different memory zone. To get around this double check CMA's placement before allocating from it. Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask") Reported-by: Jeremy Linton Signed-off-by: Nicolas Saenz Julienne --- This is a code intensive alternative to "dma-pool: Do not allocate pool memory from CMA"[1]. [1] https://lkml.org/lkml/2020/7/8/1108 kernel/dma/pool.c | 36 +++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 8cfa01243ed2..ccf3eeb77e00 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -3,6 +3,7 @@ * Copyright (C) 2012 ARM Ltd. * Copyright (C) 2020 Google LLC */ +#include #include #include #include @@ -56,6 +57,39 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size) pool_size_kernel += size; } +static bool cma_in_zone(gfp_t gfp) +{ + u64 zone_dma_end, zone_dma32_end; + phys_addr_t base, end; + unsigned long size; + struct cma *cma; + + cma = dev_get_cma_area(NULL); + if (!cma) + return false; + + size = cma_get_size(cma); + if (!size) + return false; + base = cma_get_base(cma) - memblock_start_of_DRAM(); + end = base + size - 1; + + zone_dma_end = IS_ENABLED(CONFIG_ZONE_DMA) ? DMA_BIT_MASK(zone_dma_bits) : 0; + zone_dma32_end = IS_ENABLED(CONFIG_ZONE_DMA32) ? DMA_BIT_MASK(32) : 0; + + /* CMA can't cross zone boundaries, see cma_activate_area() */ + if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp & GFP_DMA && + end <= zone_dma_end) + return true; + else if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp & GFP_DMA32 && + base > zone_dma_end && end <= zone_dma32_end) + return true; + else if (base > zone_dma32_end) + return true; + + return false; +} + static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, gfp_t gfp) { @@ -70,7 +104,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, do { pool_size = 1 << (PAGE_SHIFT + order); - if (dev_get_cma_area(NULL)) + if (cma_in_zone(gfp)) page = dma_alloc_from_contiguous(NULL, 1 << order, order, false); else -- 2.27.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v6 07/10] iommu/mediatek: Add REG_MMU_WR_LEN_CTRL register definition
On 03/07/2020 06:41, Chao Hao wrote: Some platforms(ex: mt6779) need to improve performance by setting REG_MMU_WR_LEN_CTRL register. And we can use WR_THROT_EN macro to control whether we need to set the register. If the register uses default value, iommu will send command to EMI without restriction, when the number of commands become more and more, it will drop the EMI performance. So when more than ten_commands(default value) don't be handled for EMI, iommu will stop send command to EMI for keeping EMI's performace by enabling write throttling mechanism(bit[5][21]=0) in MMU_WR_LEN_CTRL register. Cc: Matthias Brugger Signed-off-by: Chao Hao Reviewed-by: Matthias Brugger --- drivers/iommu/mtk_iommu.c | 11 +++ drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 0d96dcd8612b..5c8e141668fc 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -46,6 +46,8 @@ #define F_MMU_STANDARD_AXI_MODE_MASK (BIT(3) | BIT(19)) #define REG_MMU_DCM_DIS0x050 +#define REG_MMU_WR_LEN_CTRL0x054 +#define F_MMU_WR_THROT_DIS_MASK(BIT(5) | BIT(21)) #define REG_MMU_CTRL_REG 0x110 #define F_MMU_TF_PROT_TO_PROGRAM_ADDR (2 << 4) @@ -112,6 +114,7 @@ #define RESET_AXI BIT(3) #define OUT_ORDER_WR_EN BIT(4) #define HAS_SUB_COMM BIT(5) +#define WR_THROT_ENBIT(6) #define MTK_IOMMU_HAS_FLAG(pdata, _x) \ pdata)->flags) & (_x)) == (_x)) @@ -593,6 +596,12 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); } writel_relaxed(0, data->base + REG_MMU_DCM_DIS); + if (MTK_IOMMU_HAS_FLAG(data->plat_data, WR_THROT_EN)) { + /* write command throttling mode */ + regval = readl_relaxed(data->base + REG_MMU_WR_LEN_CTRL); + regval &= ~F_MMU_WR_THROT_DIS_MASK; + writel_relaxed(regval, data->base + REG_MMU_WR_LEN_CTRL); + } if (MTK_IOMMU_HAS_FLAG(data->plat_data, RESET_AXI)) { /* The register is called STANDARD_AXI_MODE in this case */ @@ -747,6 +756,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) struct mtk_iommu_suspend_reg *reg = &data->reg; void __iomem *base = data->base; + reg->wr_len_ctrl = readl_relaxed(base + REG_MMU_WR_LEN_CTRL); reg->misc_ctrl = readl_relaxed(base + REG_MMU_MISC_CTRL); reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS); reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); @@ -771,6 +781,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); return ret; } + writel_relaxed(reg->wr_len_ctrl, base + REG_MMU_WR_LEN_CTRL); writel_relaxed(reg->misc_ctrl, base + REG_MMU_MISC_CTRL); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 46d0d47b22e1..31edd05e2eb1 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -31,6 +31,7 @@ struct mtk_iommu_suspend_reg { u32 int_main_control; u32 ivrp_paddr; u32 vld_pa_rng; + u32 wr_len_ctrl; }; enum mtk_iommu_plat { ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: generic DMA bypass flag v4
On Wed, 8 Jul 2020 17:24:44 +0200 Christoph Hellwig wrote: > Note that as-is this breaks the XSK buffer pool, which unfortunately > poked directly into DMA internals. A fix for that is already queued > up in the netdev tree. > > Jesper and XDP gang: this should not regress any performance as > the dma-direct calls are now inlined into the out of line DMA mapping > calls. But if you can verify the performance numbers that would be > greatly appreciated. From a superficial review of the patches, they look okay to me. I don't have time to run a performance benchmark (before I go on vacation). I hoped Björn could test/benchmark this(?), given (as mentioned) this also affect XSK / AF_XDP performance. -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag
Hi Joerg, On Fri, Jul 10, 2020 at 02:58:32PM +0200, Joerg Roedel wrote: > On Fri, Jul 03, 2020 at 05:25:48PM +0100, Will Deacon wrote: > > The IOMMU_SYS_CACHE_ONLY flag was never exposed via the DMA API and > > has no in-tree users. Remove it. > > > > Cc: Robin Murphy > > Cc: "Isaac J. Manjarres" > > Cc: Joerg Roedel > > Cc: Christoph Hellwig > > Cc: Sai Prakash Ranjan > > Cc: Rob Clark > > Signed-off-by: Will Deacon > > --- > > > > As discussed in [1], sounds like this should be a domain attribute anyway > > when it's needed by the GPU. > > > > [1] > > https://lore.kernel.org/r/caf6aegscrovtsi2r7_aukmh9luoc_gumr0w0kujc2cegpfj...@mail.gmail.com > > > > drivers/iommu/io-pgtable-arm.c | 3 --- > > include/linux/iommu.h | 6 -- > > 2 files changed, 9 deletions(-) > > Applied, thanks. Ah, I'd already got this queued for 5.9: https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-joerg/arm-smmu/updates and I've queued a small number of patches on top of it now. Are you planning to send it for 5.8? If so, I suspect I'll have to rebase. Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v3 06/14] vfio/type1: Add VFIO_IOMMU_PASID_REQUEST (alloc/free)
> From: Alex Williamson > Sent: Friday, July 10, 2020 8:55 PM > > On Fri, 10 Jul 2020 05:39:57 + > "Liu, Yi L" wrote: > > > Hi Alex, > > > > > From: Alex Williamson > > > Sent: Thursday, July 9, 2020 10:28 PM > > > > > > On Thu, 9 Jul 2020 07:16:31 + > > > "Liu, Yi L" wrote: > > > > > > > Hi Alex, > > > > > > > > After more thinking, looks like adding a r-b tree is still not enough to > > > > solve the potential problem for free a range of PASID in one ioctl. If > > > > caller gives [0, MAX_UNIT] in the free request, kernel anyhow should > > > > loop all the PASIDs and search in the r-b tree. Even VFIO can track the > > > > smallest/largest allocated PASID, and limit the free range to an > > > > accurate > > > > range, it is still no efficient. For example, user has allocated two > > > > PASIDs > > > > ( 1 and 999), and user gives the [0, MAX_UNIT] range in free request. > > > > VFIO > > > > will limit the free range to be [1, 999], but still needs to loop PASID > > > > 1 - > > > > 999, and search in r-b tree. > > > > > > That sounds like a poor tree implementation. Look at vfio_find_dma() > > > for instance, it returns a node within the specified range. If the > > > tree has two nodes within the specified range we should never need to > > > call a search function like vfio_find_dma() more than three times. We > > > call it once, get the first node, remove it. Call it again, get the > > > other node, remove it. Call a third time, find no matches, we're done. > > > So such an implementation limits searches to N+1 where N is the number > > > of nodes within the range. > > > > I see. When getting a free range from user. Use the range to find suited > > PASIDs in the r-b tree. For the example I mentioned, if giving [0, > > MAX_UNIT], > > will find two nodes. If giving [0, 100] range, then only one node will be > > found. But even though, it still take some time if the user holds a bunch > > of PASIDs and user gives a big free range. > > > But that time is bounded. The complexity of the tree and maximum > number of operations on the tree are bounded by the number of nodes, > which is bound by the user's pasid quota. Thanks, yes, let me try it. thanks. :-) Regards, Yi Liu > Alex > > > > > So I'm wondering can we fall back to prior proposal which only free one > > > > PASID for a free request. how about your opinion? > > > > > > Doesn't it still seem like it would be a useful user interface to have > > > a mechanism to free all pasids, by calling with exactly [0, MAX_UINT]? > > > I'm not sure if there's another use case for this given than the user > > > doesn't have strict control of the pasid values they get. Thanks, > > > > I don't have such use case neither. perhaps we may allow it in future by > > adding flag. but if it's still useful, I may try with your suggestion. :-) > > > > Regards, > > Yi Liu > > > > > Alex > > > > > > > > From: Liu, Yi L > > > > > Sent: Thursday, July 9, 2020 10:26 AM > > > > > > > > > > Hi Kevin, > > > > > > > > > > > From: Tian, Kevin > > > > > > Sent: Thursday, July 9, 2020 10:18 AM > > > > > > > > > > > > > From: Liu, Yi L > > > > > > > Sent: Thursday, July 9, 2020 10:08 AM > > > > > > > > > > > > > > Hi Kevin, > > > > > > > > > > > > > > > From: Tian, Kevin > > > > > > > > Sent: Thursday, July 9, 2020 9:57 AM > > > > > > > > > > > > > > > > > From: Liu, Yi L > > > > > > > > > Sent: Thursday, July 9, 2020 8:32 AM > > > > > > > > > > > > > > > > > > Hi Alex, > > > > > > > > > > > > > > > > > > > Alex Williamson > > > > > > > > > > Sent: Thursday, July 9, 2020 3:55 AM > > > > > > > > > > > > > > > > > > > > On Wed, 8 Jul 2020 08:16:16 + "Liu, Yi L" > > > > > > > > > > wrote: > > > > > > > > > > > > > > > > > > > > > Hi Alex, > > > > > > > > > > > > > > > > > > > > > > > From: Liu, Yi L < yi.l@intel.com> > > > > > > > > > > > > Sent: Friday, July 3, 2020 2:28 PM > > > > > > > > > > > > > > > > > > > > > > > > Hi Alex, > > > > > > > > > > > > > > > > > > > > > > > > > From: Alex Williamson > > > > > > > > > > > > > Sent: Friday, July 3, 2020 5:19 AM > > > > > > > > > > > > > > > > > > > > > > > > > > On Wed, 24 Jun 2020 01:55:19 -0700 Liu Yi L > > > > > > > > > > > > > wrote: > > > > > > > > > > > > > > > > > > > > > > > > > > > This patch allows user space to request PASID > > > > > > > > > > > > > > allocation/free, > > > > > > > e.g. > > > > > > > > > > > > > > when serving the request from the guest. > > > > > > > > > > > > > > > > > > > > > > > > > > > > PASIDs that are not freed by userspace are > > > > > > > > > > > > > > automatically freed > > > > > > > > > when > > > > > > > > > > > > > > the IOASID set is destroyed when process exits. > > > > > > > > > > > [...] > > > > > > > > > > > > > > +static int vfio_iommu_type1_pasid_request(struct > > > > > > > > > > > > > > +vfio_iommu > > > > > > > > > *iommu, > > > > > > > > > > > > > > + unsigned long > arg) { > > > > > > > > > > > > > > + st
Re: [PATCH] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag
On Fri, Jul 03, 2020 at 05:25:48PM +0100, Will Deacon wrote: > The IOMMU_SYS_CACHE_ONLY flag was never exposed via the DMA API and > has no in-tree users. Remove it. > > Cc: Robin Murphy > Cc: "Isaac J. Manjarres" > Cc: Joerg Roedel > Cc: Christoph Hellwig > Cc: Sai Prakash Ranjan > Cc: Rob Clark > Signed-off-by: Will Deacon > --- > > As discussed in [1], sounds like this should be a domain attribute anyway > when it's needed by the GPU. > > [1] > https://lore.kernel.org/r/caf6aegscrovtsi2r7_aukmh9luoc_gumr0w0kujc2cegpfj...@mail.gmail.com > > drivers/iommu/io-pgtable-arm.c | 3 --- > include/linux/iommu.h | 6 -- > 2 files changed, 9 deletions(-) Applied, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/2] iommu: Tidy up Kconfig for SoC IOMMUs
On Fri, Jul 03, 2020 at 05:03:19PM +0100, Robin Murphy wrote: > Signed-off-by: Robin Murphy > --- > > Based on the current iommu/next branch. Applied both, thanks Robin. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: a question of split_huge_page
Adding Robin. On Fri, Jul 10, 2020 at 05:34:52PM +0800, Alex Shi wrote: > 在 2020/7/10 下午1:28, Mika Penttilä 写道: > > > > > > On 10.7.2020 7.51, Alex Shi wrote: > >> > >> 在 2020/7/10 上午12:07, Kirill A. Shutemov 写道: > >>> On Thu, Jul 09, 2020 at 04:50:02PM +0100, Matthew Wilcox wrote: > On Thu, Jul 09, 2020 at 11:11:11PM +0800, Alex Shi wrote: > > Hi Kirill & Matthew, > > > > In the func call chain, from split_huge_page() to lru_add_page_tail(), > > Seems tail pages are added to lru list at line 963, but in this scenario > > the head page has no lru bit and isn't set the bit later. Why we do > > this? > > or do I miss sth? > I don't understand how we get to split_huge_page() with a page that's > not on an LRU list. Both anonymous and page cache pages should be on > an LRU list. What am I missing?> > >> > >> Thanks a lot for quick reply! > >> What I am confusing is the call chain: __iommu_dma_alloc_pages() > >> to split_huge_page(), in the func, splited page, > >>page = alloc_pages_node(nid, alloc_flags, order); > >> And if the pages were added into lru, they maybe reclaimed and lost, > >> that would be a panic bug. But in fact, this never happened for long time. > >> Also I put a BUG() at the line, it's nevre triggered in ltp, and > >> run_vmtests > > > > > > In __iommu_dma_alloc_pages, after split_huge_page(), who is taking a > > reference on tail pages? Seems tail pages are freed and the function > > errornously returns them in pages[] array for use? > > > > CC Joerg and iommu list, > > That's a good question. seems the split_huge_page was never triggered here, > since the func would check the PageLock first. and have page->mapping and > PageAnon > check, any of them couldn't be matched for the alloced page. > > Hi Joerg, > would you like look into this? do we still need the split_huge_page() here? > > Thanks > Alex > > int split_huge_page_to_list(struct page *page, struct list_head *list) > { > struct page *head = compound_head(page); > struct deferred_split *ds_queue = get_deferred_split_queue(head); > struct anon_vma *anon_vma = NULL; > struct address_space *mapping = NULL; > int count, mapcount, extra_pins, ret; > pgoff_t end; > > VM_BUG_ON_PAGE(is_huge_zero_page(head), head); > VM_BUG_ON_PAGE(!PageLocked(head), head); <== > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 06/14] vfio/type1: Add VFIO_IOMMU_PASID_REQUEST (alloc/free)
On Fri, 10 Jul 2020 05:39:57 + "Liu, Yi L" wrote: > Hi Alex, > > > From: Alex Williamson > > Sent: Thursday, July 9, 2020 10:28 PM > > > > On Thu, 9 Jul 2020 07:16:31 + > > "Liu, Yi L" wrote: > > > > > Hi Alex, > > > > > > After more thinking, looks like adding a r-b tree is still not enough to > > > solve the potential problem for free a range of PASID in one ioctl. If > > > caller gives [0, MAX_UNIT] in the free request, kernel anyhow should > > > loop all the PASIDs and search in the r-b tree. Even VFIO can track the > > > smallest/largest allocated PASID, and limit the free range to an accurate > > > range, it is still no efficient. For example, user has allocated two > > > PASIDs > > > ( 1 and 999), and user gives the [0, MAX_UNIT] range in free request. VFIO > > > will limit the free range to be [1, 999], but still needs to loop PASID 1 > > > - > > > 999, and search in r-b tree. > > > > That sounds like a poor tree implementation. Look at vfio_find_dma() > > for instance, it returns a node within the specified range. If the > > tree has two nodes within the specified range we should never need to > > call a search function like vfio_find_dma() more than three times. We > > call it once, get the first node, remove it. Call it again, get the > > other node, remove it. Call a third time, find no matches, we're done. > > So such an implementation limits searches to N+1 where N is the number > > of nodes within the range. > > I see. When getting a free range from user. Use the range to find suited > PASIDs in the r-b tree. For the example I mentioned, if giving [0, MAX_UNIT], > will find two nodes. If giving [0, 100] range, then only one node will be > found. But even though, it still take some time if the user holds a bunch > of PASIDs and user gives a big free range. But that time is bounded. The complexity of the tree and maximum number of operations on the tree are bounded by the number of nodes, which is bound by the user's pasid quota. Thanks, Alex > > > So I'm wondering can we fall back to prior proposal which only free one > > > PASID for a free request. how about your opinion? > > > > Doesn't it still seem like it would be a useful user interface to have > > a mechanism to free all pasids, by calling with exactly [0, MAX_UINT]? > > I'm not sure if there's another use case for this given than the user > > doesn't have strict control of the pasid values they get. Thanks, > > I don't have such use case neither. perhaps we may allow it in future by > adding flag. but if it's still useful, I may try with your suggestion. :-) > > Regards, > Yi Liu > > > Alex > > > > > > From: Liu, Yi L > > > > Sent: Thursday, July 9, 2020 10:26 AM > > > > > > > > Hi Kevin, > > > > > > > > > From: Tian, Kevin > > > > > Sent: Thursday, July 9, 2020 10:18 AM > > > > > > > > > > > From: Liu, Yi L > > > > > > Sent: Thursday, July 9, 2020 10:08 AM > > > > > > > > > > > > Hi Kevin, > > > > > > > > > > > > > From: Tian, Kevin > > > > > > > Sent: Thursday, July 9, 2020 9:57 AM > > > > > > > > > > > > > > > From: Liu, Yi L > > > > > > > > Sent: Thursday, July 9, 2020 8:32 AM > > > > > > > > > > > > > > > > Hi Alex, > > > > > > > > > > > > > > > > > Alex Williamson > > > > > > > > > Sent: Thursday, July 9, 2020 3:55 AM > > > > > > > > > > > > > > > > > > On Wed, 8 Jul 2020 08:16:16 + "Liu, Yi L" > > > > > > > > > wrote: > > > > > > > > > > > > > > > > > > > Hi Alex, > > > > > > > > > > > > > > > > > > > > > From: Liu, Yi L < yi.l@intel.com> > > > > > > > > > > > Sent: Friday, July 3, 2020 2:28 PM > > > > > > > > > > > > > > > > > > > > > > Hi Alex, > > > > > > > > > > > > > > > > > > > > > > > From: Alex Williamson > > > > > > > > > > > > Sent: Friday, July 3, 2020 5:19 AM > > > > > > > > > > > > > > > > > > > > > > > > On Wed, 24 Jun 2020 01:55:19 -0700 Liu Yi L > > > > > > > > > > > > wrote: > > > > > > > > > > > > > > > > > > > > > > > > > This patch allows user space to request PASID > > > > > > > > > > > > > allocation/free, > > > > > > e.g. > > > > > > > > > > > > > when serving the request from the guest. > > > > > > > > > > > > > > > > > > > > > > > > > > PASIDs that are not freed by userspace are > > > > > > > > > > > > > automatically freed > > > > > > > > when > > > > > > > > > > > > > the IOASID set is destroyed when process exits. > > > > > > > > > > [...] > > > > > > > > > > > > > +static int vfio_iommu_type1_pasid_request(struct > > > > > > > > > > > > > +vfio_iommu > > > > > > > > *iommu, > > > > > > > > > > > > > + unsigned long > > > > > > > > > > > > > arg) { > > > > > > > > > > > > > + struct vfio_iommu_type1_pasid_request req; > > > > > > > > > > > > > + unsigned long minsz; > > > > > > > > > > > > > + > > > > > > > > > > > > > + minsz = offsetofend(struct > > > > > vfio_iommu_type1_pasid_request, > > > > > > > > > range); > > > > > > > > > > > > > + > >
Re: [PATCH v2 0/3] iommu/amd: I/O VA address limits
Hi Sebastian, On Wed, Jul 01, 2020 at 12:46:31AM +0200, Sebastian Ott wrote: > The IVRS ACPI table specifies maximum address sizes for I/O virtual > addresses that can be handled by the IOMMUs in the system. Parse that > data from the IVRS header to provide aperture information for DMA > mappings and users of the iommu API. > > Changes for V2: > - use limits in iommu_setup_dma_ops() > - rebased to current upstream > > Sebastian Ott (3): > iommu/amd: Parse supported address sizes from IVRS > iommu/amd: Restrict aperture for domains to conform with IVRS > iommu/amd: Actually enforce geometry aperture Thanks for the changes. May I ask what the reason for those changes are? AFAIK all AMD IOMMU implementations (in hardware) support full 64bit address spaces, and the IVRS table might actually be wrong, limiting the address space in the worst case to only 32 bit. Regards, Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: a question of split_huge_page
在 2020/7/10 下午1:28, Mika Penttilä 写道: > > > On 10.7.2020 7.51, Alex Shi wrote: >> >> 在 2020/7/10 上午12:07, Kirill A. Shutemov 写道: >>> On Thu, Jul 09, 2020 at 04:50:02PM +0100, Matthew Wilcox wrote: On Thu, Jul 09, 2020 at 11:11:11PM +0800, Alex Shi wrote: > Hi Kirill & Matthew, > > In the func call chain, from split_huge_page() to lru_add_page_tail(), > Seems tail pages are added to lru list at line 963, but in this scenario > the head page has no lru bit and isn't set the bit later. Why we do this? > or do I miss sth? I don't understand how we get to split_huge_page() with a page that's not on an LRU list. Both anonymous and page cache pages should be on an LRU list. What am I missing?> >> >> Thanks a lot for quick reply! >> What I am confusing is the call chain: __iommu_dma_alloc_pages() >> to split_huge_page(), in the func, splited page, >> page = alloc_pages_node(nid, alloc_flags, order); >> And if the pages were added into lru, they maybe reclaimed and lost, >> that would be a panic bug. But in fact, this never happened for long time. >> Also I put a BUG() at the line, it's nevre triggered in ltp, and run_vmtests > > > In __iommu_dma_alloc_pages, after split_huge_page(), who is taking a > reference on tail pages? Seems tail pages are freed and the function > errornously returns them in pages[] array for use? > CC Joerg and iommu list, That's a good question. seems the split_huge_page was never triggered here, since the func would check the PageLock first. and have page->mapping and PageAnon check, any of them couldn't be matched for the alloced page. Hi Joerg, would you like look into this? do we still need the split_huge_page() here? Thanks Alex int split_huge_page_to_list(struct page *page, struct list_head *list) { struct page *head = compound_head(page); struct deferred_split *ds_queue = get_deferred_split_queue(head); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; int count, mapcount, extra_pins, ret; pgoff_t end; VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(head), head);<== > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] dma-pool: use single atomic pool for both DMA zones
On Thu, 2020-07-09 at 14:49 -0700, David Rientjes wrote: > On Wed, 8 Jul 2020, Christoph Hellwig wrote: > > > On Wed, Jul 08, 2020 at 06:00:35PM +0200, Nicolas Saenz Julienne wrote: > > > On Wed, 2020-07-08 at 17:35 +0200, Christoph Hellwig wrote: > > > > On Tue, Jul 07, 2020 at 02:28:04PM +0200, Nicolas Saenz Julienne wrote: > > > > > When allocating atomic DMA memory for a device, the dma-pool core > > > > > queries __dma_direct_optimal_gfp_mask() to check which atomic pool to > > > > > use. It turns out the GFP flag returned is only an optimistic guess. > > > > > The pool selected might sometimes live in a zone higher than the > > > > > device's view of memory. > > > > > > > > > > As there isn't a way to grantee a mapping between a device's DMA > > > > > constraints and correct GFP flags this unifies both DMA atomic pools. > > > > > The resulting pool is allocated in the lower DMA zone available, if > > > > > any, > > > > > so as for devices to always get accessible memory while having the > > > > > flexibility of using dma_pool_kernel for the non constrained ones. > > > > > > > > > > Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map > > > > > to gfp > > > > > mask") > > > > > Reported-by: Jeremy Linton > > > > > Suggested-by: Robin Murphy > > > > > Signed-off-by: Nicolas Saenz Julienne > > > > > > > > Hmm, this is not what I expected from the previous thread. I thought > > > > we'd just use one dma pool based on runtime available of the zones.. > > > > > > I may be misunderstanding you, but isn't that going back to how things > > > used to > > > be before pulling in David Rientjes' work? The benefit of having a > > > GFP_KERNEL > > > pool is that non-address-constrained devices can get their atomic memory > > > there, > > > instead of consuming somewhat scarcer low memory. > > > > Yes, I think we are misunderstanding each other. I don't want to remove > > any pool, just make better runtime decisions when to use them. > > > > Just to be extra explicit for the record and for my own understanding: > Nicolas, your patch series "dma-pool: Fix atomic pool selection" obsoletes > this patch, right? :) Yes, that's right. Regards, Nicolas signature.asc Description: This is a digitally signed message part ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 4/4] iommu/vt-d: Add page response ops support
Hi Kevin, On 2020/7/10 13:49, Tian, Kevin wrote: From: Lu Baolu Sent: Friday, July 10, 2020 1:37 PM Hi Kevin, On 2020/7/10 10:42, Tian, Kevin wrote: From: Lu Baolu Sent: Thursday, July 9, 2020 3:06 PM After page requests are handled, software must respond to the device which raised the page request with the result. This is done through the iommu ops.page_response if the request was reported to outside of vendor iommu driver through iommu_report_device_fault(). This adds the VT-d implementation of page_response ops. Co-developed-by: Jacob Pan Signed-off-by: Jacob Pan Co-developed-by: Liu Yi L Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 1 + drivers/iommu/intel/svm.c | 100 include/linux/intel-iommu.h | 3 ++ 3 files changed, 104 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4a6b6960fc32..98390a6d8113 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6057,6 +6057,7 @@ const struct iommu_ops intel_iommu_ops = { .sva_bind = intel_svm_bind, .sva_unbind = intel_svm_unbind, .sva_get_pasid = intel_svm_get_pasid, + .page_response = intel_svm_page_response, #endif }; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d24e71bac8db..839d2af377b6 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -1082,3 +1082,103 @@ int intel_svm_get_pasid(struct iommu_sva *sva) return pasid; } + +int intel_svm_page_response(struct device *dev, + struct iommu_fault_event *evt, + struct iommu_page_response *msg) +{ + struct iommu_fault_page_request *prm; + struct intel_svm_dev *sdev = NULL; + struct intel_svm *svm = NULL; + struct intel_iommu *iommu; + bool private_present; + bool pasid_present; + bool last_page; + u8 bus, devfn; + int ret = 0; + u16 sid; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!msg || !evt) + return -EINVAL; + + mutex_lock(&pasid_mutex); + + prm = &evt->fault.prm; + sid = PCI_DEVID(bus, devfn); + pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; + last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + + if (pasid_present) { + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { + ret = -EINVAL; + goto out; + } + + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev); + if (ret || !sdev) { + ret = -ENODEV; + goto out; + } + + /* +* For responses from userspace, need to make sure that the +* pasid has been bound to its mm. + */ + if (svm->flags & SVM_FLAG_GUEST_MODE) { + struct mm_struct *mm; + + mm = get_task_mm(current); + if (!mm) { + ret = -EINVAL; + goto out; + } + + if (mm != svm->mm) { + ret = -ENODEV; + mmput(mm); + goto out; + } + + mmput(mm); + } + } else { + pr_err_ratelimited("Invalid page response: no pasid\n"); + ret = -EINVAL; + goto out; check pasid=0 first, then no need to indent so many lines above. Yes. + } + + /* +* Per VT-d spec. v3.0 ch7.7, system software must respond +* with page group response if private data is present (PDP) +* or last page in group (LPIG) bit is set. This is an +* additional VT-d requirement beyond PCI ATS spec. +*/ What is the behavior if system software doesn't follow the requirement? en... maybe the question is really about whether the information in prm comes from userspace or from internally-recorded info in iommu core. The former cannot be trusted. The latter one is OK. We require a page response when reporting such event. The upper layer (IOMMU core or VFIO) will be implemented with a timer, if userspace doesn't respond in time, the timer will get expired and a FAILURE response will be sent to device. Yes, timer helps when userspace doesn't respond. Then I'm fine with this patch. Reviewed-by: Kevin Tian btw when you say IOMMU core or VFIO, does it mean the timer mechanism is not implemented yet? It's in local
Re: [PATCH v2 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module
On Thu, Jul 09, 2020 at 08:28:45PM -0700, John Stultz wrote: > On Thu, Jul 2, 2020 at 7:18 AM Will Deacon wrote: > > On Thu, Jun 25, 2020 at 12:10:39AM +, John Stultz wrote: > > > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig > > > index b510f67dfa49..714893535dd2 100644 > > > --- a/drivers/iommu/Kconfig > > > +++ b/drivers/iommu/Kconfig > > > @@ -381,6 +381,7 @@ config SPAPR_TCE_IOMMU > > > config ARM_SMMU > > > tristate "ARM Ltd. System MMU (SMMU) Support" > > > depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) && > > > MMU > > > + depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y > > > select IOMMU_API > > > select IOMMU_IO_PGTABLE_LPAE > > > select ARM_DMA_USE_IOMMU if ARM > > > > This looks like a giant hack. Is there another way to handle this? > > Sorry for the slow response here. > > So, I agree the syntax looks strange (requiring a comment obviously > isn't a good sign), but it's a fairly common way to ensure drivers > don't get built in if they optionally depend on another driver that > can be built as a module. > See "RFKILL || !RFKILL", "EXTCON || !EXTCON", or "USB_GADGET || > !USB_GADGET" in various Kconfig files. > > I'm open to using a different method, and in a different thread you > suggested using something like symbol_get(). I need to look into it > more, but that approach looks even more messy and prone to runtime > failures. Blocking the unwanted case at build time seems a bit cleaner > to me, even if the syntax is odd. Maybe just split it out then, so that the ARM_SMMU entry doesn't have this, as that driver _really_ doesn't care about SoC details like this. In other words, add a new entry along the lines of: config ARM_SMMU_QCOM_IMPL default y #if QCOM_SCM=m this can't be =y depends on ARM_SMMU & (QCOM_SCM || !QCOM_SCM) and then have arm-smmu.h provide a static inline qcom_smmu_impl_init() which returns -ENODEV if CONFIG_ARM_SMMU_QCOM_IMPL=n and hack the Makefile so that we don't bother to compile arm-smmu-qcom.o in that case. Would that work? Will ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu