[RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Lu Baolu
The IOMMU page tables are updated using iommu_map/unmap() interfaces.
Currently, there is no mandatory requirement for drivers to use locks
to ensure concurrent updates to page tables, because it's assumed that
overlapping IOVA ranges do not have concurrent updates. Therefore the
IOMMU drivers only need to take care of concurrent updates to level
page table entries.

But enabling new features challenges this assumption. For example, the
hardware assisted dirty page tracking feature requires scanning page
tables in interfaces other than mapping and unmapping. This might result
in a use-after-free scenario in which a level page table has been freed
by the unmap() interface, while another thread is scanning the next level
page table.

This adds RCU-protected page free support so that the pages are really
freed and reused after a RCU grace period. Hence, the page tables are
safe for scanning within a rcu_read_lock critical region. Considering
that scanning the page table is a rare case, this also adds a domain
flag and the RCU-protected page free is only used when this flat is set.

Signed-off-by: Lu Baolu 
---
 include/linux/iommu.h |  9 +
 drivers/iommu/iommu.c | 23 +++
 2 files changed, 32 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e1afe169549..6f68eabb8567 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -95,6 +95,7 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   unsigned long concurrent_traversal:1;
 };
 
 static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
@@ -657,6 +658,12 @@ static inline void dev_iommu_priv_set(struct device *dev, 
void *priv)
dev->iommu->priv = priv;
 }
 
+static inline void domain_set_concurrent_traversal(struct iommu_domain *domain,
+  bool value)
+{
+   domain->concurrent_traversal = value;
+}
+
 int iommu_probe_device(struct device *dev);
 void iommu_release_device(struct device *dev);
 
@@ -677,6 +684,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, 
void *owner);
 void iommu_group_release_dma_owner(struct iommu_group *group);
 bool iommu_group_dma_owner_claimed(struct iommu_group *group);
 
+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages);
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 847ad47a2dfd..ceeb97ebe3e2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3252,3 +3252,26 @@ bool iommu_group_dma_owner_claimed(struct iommu_group 
*group)
return user;
 }
 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+
+static void pgtble_page_free_rcu(struct rcu_head *rcu)
+{
+   struct page *page = container_of(rcu, struct page, rcu_head);
+
+   __free_pages(page, 0);
+}
+
+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages)
+{
+   struct page *page, *next;
+
+   if (!domain->concurrent_traversal) {
+   put_pages_list(pages);
+   return;
+   }
+
+   list_for_each_entry_safe(page, next, pages, lru) {
+   list_del(&page->lru);
+   call_rcu(&page->rcu_head, pgtble_page_free_rcu);
+   }
+}
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCHES 2/2] iommu: Replace put_pages_list() with iommu_free_pgtbl_pages()

2022-06-09 Thread Lu Baolu
Therefore, RCU protected page free will take effect if necessary.

Signed-off-by: Lu Baolu 
---
 drivers/iommu/amd/io_pgtable.c | 5 ++---
 drivers/iommu/dma-iommu.c  | 6 --
 drivers/iommu/intel/iommu.c| 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 6608d1717574..a62d5dafd7f2 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -423,7 +423,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, 
unsigned long iova,
}
 
/* Everything flushed out, free pages now */
-   put_pages_list(&freelist);
+   iommu_free_pgtbl_pages(&dom->domain, &freelist);
 
return ret;
 }
@@ -503,8 +503,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
 
/* Make changes visible to IOMMUs */
amd_iommu_domain_update(dom);
-
-   put_pages_list(&freelist);
+   iommu_free_pgtbl_pages(&dom->domain, &freelist);
 }
 
 static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void 
*cookie)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f90251572a5d..a948358c3e51 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -132,7 +132,8 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, 
struct iova_fq *fq)
if (fq->entries[idx].counter >= counter)
break;
 
-   put_pages_list(&fq->entries[idx].freelist);
+   iommu_free_pgtbl_pages(cookie->fq_domain,
+  &fq->entries[idx].freelist);
free_iova_fast(&cookie->iovad,
   fq->entries[idx].iova_pfn,
   fq->entries[idx].pages);
@@ -228,7 +229,8 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie 
*cookie)
struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
 
fq_ring_for_each(idx, fq)
-   put_pages_list(&fq->entries[idx].freelist);
+   iommu_free_pgtbl_pages(cookie->fq_domain,
+  &fq->entries[idx].freelist);
}
 
free_percpu(cookie->fq);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 19024dc52735..f429671e837f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1891,7 +1891,7 @@ static void domain_exit(struct dmar_domain *domain)
LIST_HEAD(freelist);
 
domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
-   put_pages_list(&freelist);
+   iommu_free_pgtbl_pages(&domain->domain, &freelist);
}
 
kfree(domain);
@@ -4510,7 +4510,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain 
*domain,
  start_pfn, nrpages,
  list_empty(&gather->freelist), 0);
 
-   put_pages_list(&gather->freelist);
+   iommu_free_pgtbl_pages(domain, &gather->freelist);
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts

2022-06-09 Thread Sai Prakash Ranjan

Hi Vincent,

On 6/9/2022 2:52 AM, Vincent Knecht wrote:

Le jeudi 26 mai 2022 à 09:44 +0530, Sai Prakash Ranjan a écrit :

TLB sync timeouts can be due to various reasons such as TBU power down
or pending TCU/TBU invalidation/sync and so on. Debugging these often
require dumping of some implementation defined registers to know the
status of TBU/TCU operations and some of these registers are not
accessible in non-secure world such as from kernel and requires SMC
calls to read them in the secure world. So, add this debug support
to dump implementation defined registers for TLB sync timeout issues.

Signed-off-by: Sai Prakash Ranjan 
---

Changes in v2:
  * Use scm call consistently so that it works on older chipsets where
    some of these regs are secure registers.
  * Add device specific data to get the implementation defined register
    offsets.

---
  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++---
  drivers/iommu/arm/arm-smmu/arm-smmu.c  |   2 +
  drivers/iommu/arm/arm-smmu/arm-smmu.h  |   1 +
  3 files changed, 146 insertions(+), 18 deletions(-)

Hi Sai, and thanks for this patch !

I've encountered TLB sync timeouts with msm8939 SoC recently.
What would be needed to add to this patch so this SoC is supported ?
Like, where could one check the values to be used in an equivalent
of qcom_smmu_impl0_reg_offset values for this SoC (if any change needed) ?
Current values are not found by simply greping in downstream/vendor dtsi/dts 
files...


These are implementation defined registers and some might not be present on 
older SoCs
and sometimes they don't add this support in downstream kernels even if the 
registers
are present.

I looked up the IP doc for msm8939 and I could find only TBU_PWR_STATUS 
register and
you can use the same offset for it as given in this patch.

Thanks,
Sai
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v3 3/4] scsi: core: Cap shost max_sectors according to DMA optimum mapping limits

2022-06-09 Thread John Garry via iommu

On 08/06/2022 22:07, Bart Van Assche wrote:

On 6/8/22 10:50, John Garry wrote:
Please note that this limit only applies if we have an IOMMU enabled 
for the scsi host dma device. Otherwise we are limited by dma direct 
or swiotlb max mapping size, as before.


SCSI host bus adapters that support 64-bit DMA may support much larger 
transfer sizes than 128 KiB.


Indeed, and that is my problem today, as my storage controller is 
generating DMA mapping lengths which exceeds 128K and they slow 
everything down.


If you say that SRP enjoys best peformance with larger transfers then 
can you please test this with an IOMMU enabled (iommu group type DMA or 
DMA-FQ)?


Thanks,
John
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/6] dt-bindings: iommu: mediatek: Add mediatek, infracfg phandle

2022-06-09 Thread AngeloGioacchino Del Regno
Add property "mediatek,infracfg" to let the mtk_iommu driver retrieve
a phandle to the infracfg syscon instead of performing a per-soc
compatible lookup in the entire devicetree and set it as a required
property for MT2712 and MT8173.

Signed-off-by: AngeloGioacchino Del Regno 

---
 .../bindings/iommu/mediatek,iommu.yaml   | 16 
 1 file changed, 16 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
index 2ae3bbad7f1a..4142a568b293 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -101,6 +101,10 @@ properties:
 items:
   - const: bclk
 
+  mediatek,infracfg:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: The phandle to the mediatek infracfg syscon
+
   mediatek,larbs:
 $ref: /schemas/types.yaml#/definitions/phandle-array
 minItems: 1
@@ -167,6 +171,18 @@ allOf:
   required:
 - power-domains
 
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - mediatek,mt2712-m4u
+  - mediatek,mt8173-m4u
+
+then:
+  required:
+- mediatek,infracfg
+
   - if: # The IOMMUs don't have larbs.
   not:
 properties:
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 0/6] mtk_iommu: Specify phandles to infracfg and pericfg

2022-06-09 Thread AngeloGioacchino Del Regno
The IOMMU has registers in the infracfg and/or pericfg iospaces: as
for the currently supported SoCs, MT2712 and MT8173 need a phandle to
infracfg, while MT8195 needs one to pericfg.

Before this change, the driver was checking for a SoC-specific infra/peri
compatible but, sooner or later, these lists are going to grow a lot...
...and this is why it was chosen to add phandles (as it was done with
some other drivers already - look at mtk-pm-domains, mt8192-afe

Please note that, while it was necessary to update the devicetrees for
MT8173 and MT2712e, there was no update for MT8195 because there is no
IOMMU node in there yet.

Changes in v3:
 - Different squashing of dt-bindings patches (sorry for misunderstanding!)
 - Removed legacy devicetree print

Changes in v2:
 - Squashed dt-bindings patches as suggested by Matthias
 - Removed quotes from infra/peri phandle refs
 - Changed dev_warn to dev_info in patches [2/7], [3/7]

AngeloGioacchino Del Regno (6):
  dt-bindings: iommu: mediatek: Add mediatek,infracfg phandle
  iommu: mtk_iommu: Lookup phandle to retrieve syscon to infracfg
  arm64: dts: mediatek: mt8173: Add mediatek,infracfg phandle for IOMMU
  arm64: dts: mediatek: mt2712e: Add mediatek,infracfg phandle for IOMMU
  dt-bindings: iommu: mediatek: Add mediatek,pericfg phandle
  iommu: mtk_iommu: Lookup phandle to retrieve syscon to pericfg

 .../bindings/iommu/mediatek,iommu.yaml| 30 +
 arch/arm64/boot/dts/mediatek/mt2712e.dtsi |  2 +
 arch/arm64/boot/dts/mediatek/mt8173.dtsi  |  1 +
 drivers/iommu/mtk_iommu.c | 61 +++
 4 files changed, 70 insertions(+), 24 deletions(-)

-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 6/6] iommu: mtk_iommu: Lookup phandle to retrieve syscon to pericfg

2022-06-09 Thread AngeloGioacchino Del Regno
On some SoCs (of which only MT8195 is supported at the time of writing),
the "R" and "W" (I/O) enable bits for the IOMMUs are in the pericfg_ao
register space and not in the IOMMU space: as it happened already with
infracfg, it is expected that this list will grow.

Instead of specifying pericfg compatibles on a per-SoC basis, following
what was done with infracfg, let's lookup the syscon by phandle instead.

Signed-off-by: AngeloGioacchino Del Regno 

---
 drivers/iommu/mtk_iommu.c | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 90685946fcbe..0ea0848581e9 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -138,6 +138,8 @@
 /* PM and clock always on. e.g. infra iommu */
 #define PM_CLK_AO  BIT(15)
 #define IFA_IOMMU_PCIE_SUPPORT BIT(16)
+/* IOMMU I/O (r/w) is enabled using PERICFG_IOMMU_1 register */
+#define HAS_PERI_IOMMU1_REGBIT(17)
 
 #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask)   \
pdata)->flags) & (mask)) == (_x))
@@ -187,7 +189,6 @@ struct mtk_iommu_plat_data {
u32 flags;
u32 inv_sel_reg;
 
-   char*pericfg_comp_str;
struct list_head*hw_list;
unsigned intiova_region_nr;
const struct mtk_iommu_iova_region  *iova_region;
@@ -1218,14 +1219,16 @@ static int mtk_iommu_probe(struct platform_device *pdev)
goto out_runtime_disable;
}
} else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) &&
-  data->plat_data->pericfg_comp_str) {
-   infracfg = 
syscon_regmap_lookup_by_compatible(data->plat_data->pericfg_comp_str);
-   if (IS_ERR(infracfg)) {
-   ret = PTR_ERR(infracfg);
-   goto out_runtime_disable;
+  MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_PERI_IOMMU1_REG)) {
+   data->pericfg = syscon_regmap_lookup_by_phandle(dev->of_node, 
"mediatek,pericfg");
+   if (IS_ERR(data->pericfg)) {
+   p = "mediatek,mt8195-pericfg_ao";
+   data->pericfg = syscon_regmap_lookup_by_compatible(p);
+   if (IS_ERR(data->pericfg)) {
+   ret = PTR_ERR(data->pericfg);
+   goto out_runtime_disable;
+   }
}
-
-   data->pericfg = infracfg;
}
 
platform_set_drvdata(pdev, data);
@@ -1484,8 +1487,8 @@ static const struct mtk_iommu_plat_data mt8192_data = {
 static const struct mtk_iommu_plat_data mt8195_data_infra = {
.m4u_plat = M4U_MT8195,
.flags= WR_THROT_EN | DCM_DISABLE | STD_AXI_MODE | 
PM_CLK_AO |
-   MTK_IOMMU_TYPE_INFRA | IFA_IOMMU_PCIE_SUPPORT,
-   .pericfg_comp_str = "mediatek,mt8195-pericfg_ao",
+   HAS_PERI_IOMMU1_REG | MTK_IOMMU_TYPE_INFRA |
+   IFA_IOMMU_PCIE_SUPPORT,
.inv_sel_reg  = REG_MMU_INV_SEL_GEN2,
.banks_num= 5,
.banks_enable = {true, false, false, false, true},
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 3/6] arm64: dts: mediatek: mt8173: Add mediatek, infracfg phandle for IOMMU

2022-06-09 Thread AngeloGioacchino Del Regno
The IOMMU driver now looks for the "mediatek,infracfg" phandle as a
new way to retrieve a syscon to that:
even though the old way is retained, it has been deprecated and the
driver will write a message in kmsg advertising to use the phandle
way instead.

For this reason, assign the right phandle to mediatek,infracfg in
the iommu node.

Signed-off-by: AngeloGioacchino Del Regno 

---
 arch/arm64/boot/dts/mediatek/mt8173.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi 
b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index 40d7b47fc52e..825a3c670373 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -588,6 +588,7 @@ iommu: iommu@10205000 {
interrupts = ;
clocks = <&infracfg CLK_INFRA_M4U>;
clock-names = "bclk";
+   mediatek,infracfg = <&infracfg>;
mediatek,larbs = <&larb0>, <&larb1>, <&larb2>,
 <&larb3>, <&larb4>, <&larb5>;
#iommu-cells = <1>;
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 2/6] iommu: mtk_iommu: Lookup phandle to retrieve syscon to infracfg

2022-06-09 Thread AngeloGioacchino Del Regno
This driver will get support for more SoCs and the list of infracfg
compatibles is expected to grow: in order to prevent getting this
situation out of control and see a long list of compatible strings,
add support to retrieve a handle to infracfg's regmap through a
new "mediatek,infracfg" phandle.

In order to keep retrocompatibility with older devicetrees, the old
way is kept in place.

Signed-off-by: AngeloGioacchino Del Regno 

---
 drivers/iommu/mtk_iommu.c | 38 --
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index bb9dd92c9898..90685946fcbe 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1140,22 +1140,32 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
 
if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) {
-   switch (data->plat_data->m4u_plat) {
-   case M4U_MT2712:
-   p = "mediatek,mt2712-infracfg";
-   break;
-   case M4U_MT8173:
-   p = "mediatek,mt8173-infracfg";
-   break;
-   default:
-   p = NULL;
+   infracfg = syscon_regmap_lookup_by_phandle(dev->of_node, 
"mediatek,infracfg");
+   if (IS_ERR(infracfg)) {
+   /*
+* Legacy devicetrees will not specify a phandle to
+* mediatek,infracfg: in that case, we use the older
+* way to retrieve a syscon to infra.
+*
+* This is for retrocompatibility purposes only, hence
+* no more compatibles shall be added to this.
+*/
+   switch (data->plat_data->m4u_plat) {
+   case M4U_MT2712:
+   p = "mediatek,mt2712-infracfg";
+   break;
+   case M4U_MT8173:
+   p = "mediatek,mt8173-infracfg";
+   break;
+   default:
+   p = NULL;
+   }
+
+   infracfg = syscon_regmap_lookup_by_compatible(p);
+   if (IS_ERR(infracfg))
+   return PTR_ERR(infracfg);
}
 
-   infracfg = syscon_regmap_lookup_by_compatible(p);
-
-   if (IS_ERR(infracfg))
-   return PTR_ERR(infracfg);
-
ret = regmap_read(infracfg, REG_INFRA_MISC, &val);
if (ret)
return ret;
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 5/6] dt-bindings: iommu: mediatek: Add mediatek, pericfg phandle

2022-06-09 Thread AngeloGioacchino Del Regno
Add property "mediatek,pericfg" to let the mtk_iommu driver retrieve
a phandle to the infracfg syscon instead of performing a per-soc
compatible lookup in the entire devicetree and set it as a required
property for MT8195's infra IOMMU.

Signed-off-by: AngeloGioacchino Del Regno 

---
 .../devicetree/bindings/iommu/mediatek,iommu.yaml  | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
index 4142a568b293..d5e3272a54e8 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -116,6 +116,10 @@ properties:
   Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort
   according to the local arbiter index, like larb0, larb1, larb2...
 
+  mediatek,pericfg:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: The phandle to the mediatek pericfg syscon
+
   '#iommu-cells':
 const: 1
 description: |
@@ -183,6 +187,16 @@ allOf:
   required:
 - mediatek,infracfg
 
+  - if:
+  properties:
+compatible:
+  contains:
+const: mediatek,mt8195-iommu-infra
+
+then:
+  required:
+- mediatek,pericfg
+
   - if: # The IOMMUs don't have larbs.
   not:
 properties:
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 4/6] arm64: dts: mediatek: mt2712e: Add mediatek, infracfg phandle for IOMMU

2022-06-09 Thread AngeloGioacchino Del Regno
The IOMMU driver now looks for the "mediatek,infracfg" phandle as a
new way to retrieve a syscon to that:
even though the old way is retained, it has been deprecated and the
driver will write a message in kmsg advertising to use the phandle
way instead.

For this reason, assign the right phandle to mediatek,infracfg in
the iommu node.

Signed-off-by: AngeloGioacchino Del Regno 

---
 arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi 
b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
index 623eb3beabf2..4797537cb368 100644
--- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
@@ -329,6 +329,7 @@ iommu0: iommu@10205000 {
interrupts = ;
clocks = <&infracfg CLK_INFRA_M4U>;
clock-names = "bclk";
+   mediatek,infracfg = <&infracfg>;
mediatek,larbs = <&larb0>, <&larb1>, <&larb2>,
 <&larb3>, <&larb6>;
#iommu-cells = <1>;
@@ -346,6 +347,7 @@ iommu1: iommu@1020a000 {
interrupts = ;
clocks = <&infracfg CLK_INFRA_M4U>;
clock-names = "bclk";
+   mediatek,infracfg = <&infracfg>;
mediatek,larbs = <&larb4>, <&larb5>, <&larb7>;
#iommu-cells = <1>;
};
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 0/3] MediaTek Helio X10 MT6795 - M4U/IOMMU Support

2022-06-09 Thread AngeloGioacchino Del Regno
In an effort to give some love to the apparently forgotten MT6795 SoC,
I am upstreaming more components that are necessary to support platforms
powered by this one apart from a simple boot to serial console.

This series introduces support for the IOMMUs found on this SoC.

Tested on a MT6795 Sony Xperia M5 (codename "Holly") smartphone.

Changes in v3:
 - Added new flag as suggested by Yong Wu
 - Rebased on top of 
https://patchwork.kernel.org/project/linux-mediatek/list/?series=648784

Changes in v2:
 - Rebased on top of 
https://patchwork.kernel.org/project/linux-mediatek/list/?series=642681

AngeloGioacchino Del Regno (3):
  dt-bindings: mediatek: Add bindings for MT6795 M4U
  iommu: mtk_iommu: Introduce new flag TF_PORT_TO_ADDR_MT8173
  iommu: mtk_iommu: Add support for MT6795 Helio X10 M4Us

 .../bindings/iommu/mediatek,iommu.yaml|  4 +
 drivers/iommu/mtk_iommu.c | 21 +++-
 include/dt-bindings/memory/mt6795-larb-port.h | 96 +++
 3 files changed, 119 insertions(+), 2 deletions(-)
 create mode 100644 include/dt-bindings/memory/mt6795-larb-port.h

-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 3/3] iommu: mtk_iommu: Add support for MT6795 Helio X10 M4Us

2022-06-09 Thread AngeloGioacchino Del Regno
Add support for the M4Us found in the MT6795 Helio X10 SoC.

Signed-off-by: AngeloGioacchino Del Regno 

---
 drivers/iommu/mtk_iommu.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 8611cf8e4bd5..beca1c5a6c10 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -160,6 +160,7 @@
 enum mtk_iommu_plat {
M4U_MT2712,
M4U_MT6779,
+   M4U_MT6795,
M4U_MT8167,
M4U_MT8173,
M4U_MT8183,
@@ -1424,6 +1425,19 @@ static const struct mtk_iommu_plat_data mt6779_data = {
.larbid_remap  = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}},
 };
 
+static const struct mtk_iommu_plat_data mt6795_data = {
+   .m4u_plat = M4U_MT6795,
+   .flags= HAS_4GB_MODE | HAS_BCLK | RESET_AXI |
+   HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM |
+   TF_PORT_TO_ADDR_MT8173,
+   .inv_sel_reg  = REG_MMU_INV_SEL_GEN1,
+   .banks_num= 1,
+   .banks_enable = {true},
+   .iova_region  = single_domain,
+   .iova_region_nr = ARRAY_SIZE(single_domain),
+   .larbid_remap = {{0}, {1}, {2}, {3}, {4}}, /* Linear mapping. */
+};
+
 static const struct mtk_iommu_plat_data mt8167_data = {
.m4u_plat = M4U_MT8167,
.flags= RESET_AXI | HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM,
@@ -1536,6 +1550,7 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = 
{
 static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
{ .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data},
+   { .compatible = "mediatek,mt6795-m4u", .data = &mt6795_data},
{ .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data},
{ .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
{ .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/3] dt-bindings: mediatek: Add bindings for MT6795 M4U

2022-06-09 Thread AngeloGioacchino Del Regno
Add bindings for the MediaTek Helio X10 (MT6795) IOMMU/M4U.

Signed-off-by: AngeloGioacchino Del Regno 

Acked-by: Rob Herring 
---
 .../bindings/iommu/mediatek,iommu.yaml|  4 +
 include/dt-bindings/memory/mt6795-larb-port.h | 96 +++
 2 files changed, 100 insertions(+)
 create mode 100644 include/dt-bindings/memory/mt6795-larb-port.h

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
index d5e3272a54e8..20902c387520 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -73,6 +73,7 @@ properties:
   - mediatek,mt2701-m4u  # generation one
   - mediatek,mt2712-m4u  # generation two
   - mediatek,mt6779-m4u  # generation two
+  - mediatek,mt6795-m4u  # generation two
   - mediatek,mt8167-m4u  # generation two
   - mediatek,mt8173-m4u  # generation two
   - mediatek,mt8183-m4u  # generation two
@@ -128,6 +129,7 @@ properties:
   dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623,
   dt-binding/memory/mt2712-larb-port.h for mt2712,
   dt-binding/memory/mt6779-larb-port.h for mt6779,
+  dt-binding/memory/mt6795-larb-port.h for mt6795,
   dt-binding/memory/mt8167-larb-port.h for mt8167,
   dt-binding/memory/mt8173-larb-port.h for mt8173,
   dt-binding/memory/mt8183-larb-port.h for mt8183,
@@ -152,6 +154,7 @@ allOf:
 enum:
   - mediatek,mt2701-m4u
   - mediatek,mt2712-m4u
+  - mediatek,mt6795-m4u
   - mediatek,mt8173-m4u
   - mediatek,mt8186-iommu-mm
   - mediatek,mt8192-m4u
@@ -181,6 +184,7 @@ allOf:
   contains:
 enum:
   - mediatek,mt2712-m4u
+  - mediatek,mt6795-m4u
   - mediatek,mt8173-m4u
 
 then:
diff --git a/include/dt-bindings/memory/mt6795-larb-port.h 
b/include/dt-bindings/memory/mt6795-larb-port.h
new file mode 100644
index ..223aca8fd350
--- /dev/null
+++ b/include/dt-bindings/memory/mt6795-larb-port.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2022 Collabora Ltd.
+ * Author: AngeloGioacchino Del Regno 
+ */
+
+#ifndef _DT_BINDINGS_MEMORY_MT6795_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT6795_LARB_PORT_H_
+
+#include 
+
+#define M4U_LARB0_ID   0
+#define M4U_LARB1_ID   1
+#define M4U_LARB2_ID   2
+#define M4U_LARB3_ID   3
+#define M4U_LARB4_ID   4
+#define M4U_LARB5_ID   5
+
+/* larb0 */
+#define M4U_PORT_DISP_OVL0 MTK_M4U_ID(M4U_LARB0_ID, 0)
+#define M4U_PORT_DISP_RDMA0MTK_M4U_ID(M4U_LARB0_ID, 1)
+#define M4U_PORT_DISP_RDMA1MTK_M4U_ID(M4U_LARB0_ID, 2)
+#define M4U_PORT_DISP_WDMA0MTK_M4U_ID(M4U_LARB0_ID, 3)
+#define M4U_PORT_DISP_OVL1 MTK_M4U_ID(M4U_LARB0_ID, 4)
+#define M4U_PORT_DISP_RDMA2MTK_M4U_ID(M4U_LARB0_ID, 5)
+#define M4U_PORT_DISP_WDMA1MTK_M4U_ID(M4U_LARB0_ID, 6)
+#define M4U_PORT_DISP_OD_R MTK_M4U_ID(M4U_LARB0_ID, 7)
+#define M4U_PORT_DISP_OD_W MTK_M4U_ID(M4U_LARB0_ID, 8)
+#define M4U_PORT_MDP_RDMA0 MTK_M4U_ID(M4U_LARB0_ID, 9)
+#define M4U_PORT_MDP_RDMA1 MTK_M4U_ID(M4U_LARB0_ID, 10)
+#define M4U_PORT_MDP_WDMA  MTK_M4U_ID(M4U_LARB0_ID, 11)
+#define M4U_PORT_MDP_WROT0 MTK_M4U_ID(M4U_LARB0_ID, 12)
+#define M4U_PORT_MDP_WROT1 MTK_M4U_ID(M4U_LARB0_ID, 13)
+
+/* larb1 */
+#define M4U_PORT_VDEC_MC   MTK_M4U_ID(M4U_LARB1_ID, 0)
+#define M4U_PORT_VDEC_PP   MTK_M4U_ID(M4U_LARB1_ID, 1)
+#define M4U_PORT_VDEC_UFO  MTK_M4U_ID(M4U_LARB1_ID, 2)
+#define M4U_PORT_VDEC_VLD  MTK_M4U_ID(M4U_LARB1_ID, 3)
+#define M4U_PORT_VDEC_VLD2 MTK_M4U_ID(M4U_LARB1_ID, 4)
+#define M4U_PORT_VDEC_AVC_MV   MTK_M4U_ID(M4U_LARB1_ID, 5)
+#define M4U_PORT_VDEC_PRED_RD  MTK_M4U_ID(M4U_LARB1_ID, 6)
+#define M4U_PORT_VDEC_PRED_WR  MTK_M4U_ID(M4U_LARB1_ID, 7)
+#define M4U_PORT_VDEC_PPWRAP   MTK_M4U_ID(M4U_LARB1_ID, 8)
+
+/* larb2 */
+#define M4U_PORT_CAM_IMGO  MTK_M4U_ID(M4U_LARB2_ID, 0)
+#define M4U_PORT_CAM_RRZO  MTK_M4U_ID(M4U_LARB2_ID, 1)
+#define M4U_PORT_CAM_AAO   MTK_M4U_ID(M4U_LARB2_ID, 2)
+#define M4U_PORT_CAM_LCSO  MTK_M4U_ID(M4U_LARB2_ID, 3)
+#define M4U_PORT_CAM_ESFKO MTK_M4U_ID(M4U_LARB2_ID, 4)
+#define M4U_PORT_CAM_IMGO_SMTK_M4U_ID(M4U_LARB2_ID, 5)
+#define M4U_PORT_CAM_LSCI  MTK_M4U_ID(M4U_LARB2_ID, 6)
+#define M4U_PORT_CAM_LSCI_DMTK_M4U_ID(M4U_LARB2_ID, 7)
+#define M4U_PORT_CAM_BPCI  MTK_M4U_ID(M4U_LARB2_ID, 8)
+#define M4U_PORT_CAM_BPCI_DMTK_M4U_ID(M4U_LA

[PATCH v3 2/3] iommu: mtk_iommu: Introduce new flag TF_PORT_TO_ADDR_MT8173

2022-06-09 Thread AngeloGioacchino Del Regno
In preparation for adding support for MT6795, add a new flag named
TF_PORT_TO_ADDR_MT8173 and use that instead of checking for m4u_plat
type in mtk_iommu_hw_init() to avoid seeing a long list of m4u_plat
checks there in the future.

Signed-off-by: AngeloGioacchino Del Regno 

---
 drivers/iommu/mtk_iommu.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 0ea0848581e9..8611cf8e4bd5 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -140,6 +140,7 @@
 #define IFA_IOMMU_PCIE_SUPPORT BIT(16)
 /* IOMMU I/O (r/w) is enabled using PERICFG_IOMMU_1 register */
 #define HAS_PERI_IOMMU1_REGBIT(17)
+#define TF_PORT_TO_ADDR_MT8173 BIT(18)
 
 #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask)   \
pdata)->flags) & (mask)) == (_x))
@@ -960,7 +961,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data 
*data, unsigned int ban
 * Global control settings are in bank0. May re-init these global 
registers
 * since no sure if there is bank0 consumers.
 */
-   if (data->plat_data->m4u_plat == M4U_MT8173) {
+   if (MTK_IOMMU_HAS_FLAG(data->plat_data, TF_PORT_TO_ADDR_MT8173)) {
regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
 F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173;
} else {
@@ -1437,7 +1438,8 @@ static const struct mtk_iommu_plat_data mt8167_data = {
 static const struct mtk_iommu_plat_data mt8173_data = {
.m4u_plat = M4U_MT8173,
.flags= HAS_4GB_MODE | HAS_BCLK | RESET_AXI |
-   HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM,
+   HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM |
+   TF_PORT_TO_ADDR_MT8173,
.inv_sel_reg  = REG_MMU_INV_SEL_GEN1,
.banks_num= 1,
.banks_enable = {true},
-- 
2.35.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/9] PM: domains: Delete usage of driver_deferred_probe_check_state()

2022-06-09 Thread Ulf Hansson
On Wed, 1 Jun 2022 at 09:07, Saravana Kannan  wrote:
>
> Now that fw_devlink=on by default and fw_devlink supports
> "power-domains" property, the execution will never get to the point
> where driver_deferred_probe_check_state() is called before the supplier
> has probed successfully or before deferred probe timeout has expired.
>
> So, delete the call and replace it with -ENODEV.

With fw_devlink=on by default - does that mean that the parameter
can't be changed?

Or perhaps the point is that we don't want to go back, but rather drop
the fw_devlink parameter altogether when moving forward?

>
> Signed-off-by: Saravana Kannan 

Just a minor nitpick below. Nevertheless, feel free to add:

Reviewed-by: Ulf Hansson 

> ---
>  drivers/base/power/domain.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
> index 739e52cd4aba..3e86772d5fac 100644
> --- a/drivers/base/power/domain.c
> +++ b/drivers/base/power/domain.c
> @@ -2730,7 +2730,7 @@ static int __genpd_dev_pm_attach(struct device *dev, 
> struct device *base_dev,
> mutex_unlock(&gpd_list_lock);
> dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
> __func__, PTR_ERR(pd));
> -   return driver_deferred_probe_check_state(base_dev);

Adding a brief comment about why -EPROBE_DEFER doesn't make sense
here, would be nice.

> +   return -ENODEV;
> }
>
> dev_dbg(dev, "adding to PM domain %s\n", pd->name);
> --
> 2.36.1.255.ge46751e96f-goog
>

Kind regards
Uffe
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Jason Gunthorpe via iommu
On Thu, Jun 09, 2022 at 03:08:10PM +0800, Lu Baolu wrote:
> The IOMMU page tables are updated using iommu_map/unmap() interfaces.
> Currently, there is no mandatory requirement for drivers to use locks
> to ensure concurrent updates to page tables, because it's assumed that
> overlapping IOVA ranges do not have concurrent updates. Therefore the
> IOMMU drivers only need to take care of concurrent updates to level
> page table entries.
> 
> But enabling new features challenges this assumption. For example, the
> hardware assisted dirty page tracking feature requires scanning page
> tables in interfaces other than mapping and unmapping. This might result
> in a use-after-free scenario in which a level page table has been freed
> by the unmap() interface, while another thread is scanning the next level
> page table.
> 
> This adds RCU-protected page free support so that the pages are really
> freed and reused after a RCU grace period. Hence, the page tables are
> safe for scanning within a rcu_read_lock critical region. Considering
> that scanning the page table is a rare case, this also adds a domain
> flag and the RCU-protected page free is only used when this flat is set.
> 
> Signed-off-by: Lu Baolu 
> ---
>  include/linux/iommu.h |  9 +
>  drivers/iommu/iommu.c | 23 +++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 5e1afe169549..6f68eabb8567 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -95,6 +95,7 @@ struct iommu_domain {
>   void *handler_token;
>   struct iommu_domain_geometry geometry;
>   struct iommu_dma_cookie *iova_cookie;
> + unsigned long concurrent_traversal:1;
>  };
>  
>  static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
> @@ -657,6 +658,12 @@ static inline void dev_iommu_priv_set(struct device 
> *dev, void *priv)
>   dev->iommu->priv = priv;
>  }
>  
> +static inline void domain_set_concurrent_traversal(struct iommu_domain 
> *domain,
> +bool value)
> +{
> + domain->concurrent_traversal = value;
> +}

?? If you want it to be a driver opt in I would just add a flags to
the domain ops. "DOMAIN_FLAG_RCU_FREE_PAGES"

> +void iommu_free_pgtbl_pages(struct iommu_domain *domain,
> + struct list_head *pages)
> +{
> + struct page *page, *next;
> +
> + if (!domain->concurrent_traversal) {
> + put_pages_list(pages);
> + return;
> + }
> +
> + list_for_each_entry_safe(page, next, pages, lru) {
> + list_del(&page->lru);
> + call_rcu(&page->rcu_head, pgtble_page_free_rcu);
> + }

It seems OK, but I wonder if there is benifit to using
put_pages_list() from the rcu callback

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Robin Murphy

On 2022-06-09 13:49, Jason Gunthorpe wrote:

On Thu, Jun 09, 2022 at 03:08:10PM +0800, Lu Baolu wrote:

The IOMMU page tables are updated using iommu_map/unmap() interfaces.
Currently, there is no mandatory requirement for drivers to use locks
to ensure concurrent updates to page tables, because it's assumed that
overlapping IOVA ranges do not have concurrent updates. Therefore the
IOMMU drivers only need to take care of concurrent updates to level
page table entries.

But enabling new features challenges this assumption. For example, the
hardware assisted dirty page tracking feature requires scanning page
tables in interfaces other than mapping and unmapping. This might result
in a use-after-free scenario in which a level page table has been freed
by the unmap() interface, while another thread is scanning the next level
page table.

This adds RCU-protected page free support so that the pages are really
freed and reused after a RCU grace period. Hence, the page tables are
safe for scanning within a rcu_read_lock critical region. Considering
that scanning the page table is a rare case, this also adds a domain
flag and the RCU-protected page free is only used when this flat is set.

Signed-off-by: Lu Baolu 
---
  include/linux/iommu.h |  9 +
  drivers/iommu/iommu.c | 23 +++
  2 files changed, 32 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e1afe169549..6f68eabb8567 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -95,6 +95,7 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   unsigned long concurrent_traversal:1;
  };
  
  static inline bool iommu_is_dma_domain(struct iommu_domain *domain)

@@ -657,6 +658,12 @@ static inline void dev_iommu_priv_set(struct device *dev, 
void *priv)
dev->iommu->priv = priv;
  }
  
+static inline void domain_set_concurrent_traversal(struct iommu_domain *domain,

+  bool value)
+{
+   domain->concurrent_traversal = value;
+}


?? If you want it to be a driver opt in I would just add a flags to
the domain ops. "DOMAIN_FLAG_RCU_FREE_PAGES"


Is there a significant benefit to keeping both paths, or could we get 
away with just always using RCU? Realistically, pagetable pages aren't 
likely to be freed all that frequently, except perhaps at domain 
teardown, but that shouldn't really be performance-critical, and I guess 
we could stick an RCU sync point in iommu_domain_free() if we're really 
worried about releasing larger quantities of pages back to the allocator 
ASAP?


It's already a driver opt-in to use the iommu_iotlb_gather freelist in 
the first place, and right now the ones that do are also the ones that 
do lock-free table walks so will ultimately all want this as well.


Robin.


+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages)
+{
+   struct page *page, *next;
+
+   if (!domain->concurrent_traversal) {
+   put_pages_list(pages);
+   return;
+   }
+
+   list_for_each_entry_safe(page, next, pages, lru) {
+   list_del(&page->lru);
+   call_rcu(&page->rcu_head, pgtble_page_free_rcu);
+   }


It seems OK, but I wonder if there is benifit to using
put_pages_list() from the rcu callback

Jason

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Jason Gunthorpe via iommu
On Thu, Jun 09, 2022 at 02:19:06PM +0100, Robin Murphy wrote:

> Is there a significant benefit to keeping both paths, or could we get away
> with just always using RCU? Realistically, pagetable pages aren't likely to
> be freed all that frequently, except perhaps at domain teardown, but that
> shouldn't really be performance-critical, and I guess we could stick an RCU
> sync point in iommu_domain_free() if we're really worried about releasing
> larger quantities of pages back to the allocator ASAP?

I think you are right, anything that uses the iommu_iotlb_gather may
as well use RCU too.

IIRC the allocators already know that RCU is often sitting on
freed-memory and have some contigency to flush it out before OOMing,
so nothing special should be needed.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/6] dt-bindings: iommu: mediatek: Add mediatek, infracfg phandle

2022-06-09 Thread Rob Herring
On Thu, 09 Jun 2022 12:07:57 +0200, AngeloGioacchino Del Regno wrote:
> Add property "mediatek,infracfg" to let the mtk_iommu driver retrieve
> a phandle to the infracfg syscon instead of performing a per-soc
> compatible lookup in the entire devicetree and set it as a required
> property for MT2712 and MT8173.
> 
> Signed-off-by: AngeloGioacchino Del Regno 
> 
> ---
>  .../bindings/iommu/mediatek,iommu.yaml   | 16 
>  1 file changed, 16 insertions(+)
> 

My bot found errors running 'make DT_CHECKER_FLAGS=-m dt_binding_check'
on your patch (DT_CHECKER_FLAGS is new in v5.13):

yamllint warnings/errors:

dtschema/dtc warnings/errors:
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/iommu/mediatek,iommu.example.dtb:
 iommu@10205000: 'mediatek,infracfg' is a required property
From schema: 
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml

doc reference errors (make refcheckdocs):

See https://patchwork.ozlabs.org/patch/

This check can fail if there are any dependencies. The base for a patch
series is generally the most recent rc1.

If you already ran 'make dt_binding_check' and didn't see the above
error(s), then make sure 'yamllint' is installed and dt-schema is up to
date:

pip3 install dtschema --upgrade

Please check and re-submit.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2] iommu/dma: Add config for PCI SAC address trick

2022-06-09 Thread Robin Murphy
For devices stuck behind a conventional PCI bus, saving extra cycles at
33MHz is probably fairly significant. However since native PCI Express
is now the norm for high-performance devices, the optimisation to always
prefer 32-bit addresses for the sake of avoiding DAC is starting to look
rather anachronistic. Technically 32-bit addresses do have shorter TLPs
on PCIe, but unless the device is saturating its link bandwidth with
small transfers it seems unlikely that the difference is appreciable.

What definitely is appreciable, however, is that the IOVA allocator
doesn't behave all that well once the 32-bit space starts getting full.
As DMA working sets get bigger, this optimisation increasingly backfires
and adds considerable overhead to the dma_map path for use-cases like
high-bandwidth networking. We've increasingly bandaged the allocator
in attempts to mitigate this, but it remains fundamentally at odds with
other valid requirements to try as hard as possible to satisfy a request
within the given limit; what we really need is to just avoid this odd
notion of a speculative allocation when it isn't beneficial anyway.

Unfortunately that's where things get awkward... Having been present on
x86 for 15 years or so now, it turns out there are systems which fail to
properly define the upper limit of usable IOVA space for certain devices
and this trick was the only thing letting them work OK. I had a similar
ulterior motive for a couple of early arm64 systems when originally
adding it to iommu-dma, but those really should be fixed with proper
firmware bindings by now. Let's be brave and default it to off in the
hope that CI systems and developers will find and fix those bugs, but
expect that desktop-focused distro configs are likely to want to turn
it back on for maximum compatibility.

Signed-off-by: Robin Murphy 
---

v2: Tweak wording to clarify that it's not really an optimisation in
general, remove "default X86".

 drivers/iommu/Kconfig | 26 ++
 drivers/iommu/dma-iommu.c |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c79a0df090c0..5a225b48dd00 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -144,6 +144,32 @@ config IOMMU_DMA
select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
 
+config IOMMU_DMA_PCI_SAC
+   bool "Enable 64-bit legacy PCI optimisation by default"
+   depends on IOMMU_DMA
+   help
+ Enable by default an IOMMU optimisation for 64-bit legacy PCI devices,
+ wherein the DMA API layer will always first try to allocate a 32-bit
+ DMA address suitable for a single address cycle, before falling back
+ to allocating from the device's full usable address range. If your
+ system has 64-bit legacy PCI devices in 32-bit slots where using dual
+ address cycles reduces DMA throughput significantly, this may be
+ beneficial to overall performance.
+
+ If you have a modern PCI Express based system, this feature mostly 
just
+ represents extra overhead in the allocation path for no practical
+ benefit, and it should usually be preferable to say "n" here.
+
+ However, beware that this feature has also historically papered over
+ bugs where the IOMMU address width and/or device DMA mask is not set
+ correctly. If device DMA problems and IOMMU faults start occurring
+ after disabling this option, it is almost certainly indicative of a
+ latent driver or firmware/BIOS bug, which would previously have only
+ manifested with several gigabytes worth of concurrent DMA mappings.
+
+ If this option is not set, the feature can still be re-enabled at
+ boot time with the "iommu.forcedac=0" command-line argument.
+
 # Shared Virtual Addressing
 config IOMMU_SVA
bool
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f90251572a5d..9f9d9ba7f376 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -67,7 +67,7 @@ struct iommu_dma_cookie {
 };
 
 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
-bool iommu_dma_forcedac __read_mostly;
+bool iommu_dma_forcedac __read_mostly = !IS_ENABLED(CONFIG_IOMMU_DMA_PCI_SAC);
 
 static int __init iommu_dma_forcedac_setup(char *str)
 {
-- 
2.36.1.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v12 6/9] iommu/arm-smmu-v3: Introduce strtab init helper

2022-06-09 Thread Will Deacon
On Tue, May 03, 2022 at 05:33:27PM +0100, Shameer Kolothum wrote:
> Introduce a helper to check the sid range and to init the l2 strtab
> entries(bypass). This will be useful when we have to initialize the
> l2 strtab with bypass for RMR SIDs.
> 
> Signed-off-by: Shameer Kolothum 
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 28 +++--
>  1 file changed, 15 insertions(+), 13 deletions(-)

Acked-by: Will Deacon 

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/dma: Fix race condition during iova_domain initialization

2022-06-09 Thread Miles Chen via iommu
Hi YF,

>When many devices share the same iova domain, iommu_dma_init_domain()
>may be called at the same time. The checking of iovad->start_pfn will
>all get false in iommu_dma_init_domain() and both enter init_iova_domain()
>to do iovad initialization.

After reading this patch.
It means that we use iovad->start_pfn as a key to tell if an iovad is already 
initialized,
but we do not protect iovad->start_pfn from concurrent access.

So what's happening is like:

   cpu0cpu1
of_dma_configure_id()  of_dma_configure_id()
  arch_setup_dma_ops()   arch_setup_dma_ops()
iommu_setup_dma_ops()  iommu_setup_dma_ops()
  init_iova_domain() init_iova_domain()
 if (iovad->start_pfn) {   if (iovad->start_pfn) {
 } }
 init_iova_domain()init_iova_domain()


init_iova_domain() is called at the same time.

>Fix this by protecting init_iova_domain() with iommu_dma_cookie->mutex.

Reviewed-by: Miles Chen  

>Exception backtrace:
>rb_insert_color(param1=0xFF80CD2BDB40, param3=1) + 64
>init_iova_domain() + 180
>iommu_setup_dma_ops() + 260
>arch_setup_dma_ops() + 132
>of_dma_configure_id() + 468
>platform_dma_configure() + 32
>really_probe() + 1168
>driver_probe_device() + 268
>__device_attach_driver() + 524
>__device_attach() + 524
>bus_probe_device() + 64
>deferred_probe_work_func() + 260
>process_one_work() + 580
>worker_thread() + 1076
>kthread() + 332
>ret_from_fork() + 16
>
>Signed-off-by: Ning Li 
>Signed-off-by: Yunfei Wang 
>---
> drivers/iommu/dma-iommu.c | 17 +
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>index 09f6e1c0f9c0..b38c5041eeab 100644
>--- a/drivers/iommu/dma-iommu.c
>+++ b/drivers/iommu/dma-iommu.c
>@@ -63,6 +63,7 @@ struct iommu_dma_cookie {
> 
>   /* Domain for flush queue callback; NULL if flush queue not in use */
>   struct iommu_domain *fq_domain;
>+  struct mutexmutex;
> };
> 
> static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
>@@ -309,6 +310,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
>   if (!domain->iova_cookie)
>   return -ENOMEM;
> 
>+  mutex_init(&domain->iova_cookie->mutex);
>   return 0;
> }
> 
>@@ -549,26 +551,33 @@ static int iommu_dma_init_domain(struct iommu_domain 
>*domain, dma_addr_t base,
>   }
> 
>   /* start_pfn is always nonzero for an already-initialised domain */
>+  mutex_lock(&cookie->mutex);
>
>   if (iovad->start_pfn) {
>   if (1UL << order != iovad->granule ||
>   base_pfn != iovad->start_pfn) {
>   pr_warn("Incompatible range for DMA domain\n");
>-  return -EFAULT;
>+  ret = -EFAULT;
>+  goto done_unlock;
>   }
> 
>-  return 0;
>+  ret = 0;
>+  goto done_unlock;
>   }
> 
>   init_iova_domain(iovad, 1UL << order, base_pfn);
>   ret = iova_domain_init_rcaches(iovad);
>   if (ret)
>-  return ret;
>+  goto done_unlock;
> 
>   /* If the FQ fails we can simply fall back to strict mode */
>   if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
>   domain->type = IOMMU_DOMAIN_DMA;
> 
>-  return iova_reserve_iommu_regions(dev, domain);
>+  ret = iova_reserve_iommu_regions(dev, domain);
>+
>+done_unlock:
>+  mutex_unlock(&cookie->mutex);
>+  return ret;
> }
> 
> /**
>-- 
>2.18.0
>
>
>___
>Linux-mediatek mailing list
>linux-media...@lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-mediatek
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Raj, Ashok
On Thu, Jun 09, 2022 at 03:08:10PM +0800, Lu Baolu wrote:
> The IOMMU page tables are updated using iommu_map/unmap() interfaces.
> Currently, there is no mandatory requirement for drivers to use locks
> to ensure concurrent updates to page tables, because it's assumed that
> overlapping IOVA ranges do not have concurrent updates. Therefore the
> IOMMU drivers only need to take care of concurrent updates to level
> page table entries.

The last part doesn't read well.. 
s/updates to level page table entries/ updates to page-table entries at the
same level

> 
> But enabling new features challenges this assumption. For example, the
> hardware assisted dirty page tracking feature requires scanning page
> tables in interfaces other than mapping and unmapping. This might result
> in a use-after-free scenario in which a level page table has been freed
> by the unmap() interface, while another thread is scanning the next level
> page table.
> 
> This adds RCU-protected page free support so that the pages are really
> freed and reused after a RCU grace period. Hence, the page tables are
> safe for scanning within a rcu_read_lock critical region. Considering
> that scanning the page table is a rare case, this also adds a domain
> flag and the RCU-protected page free is only used when this flat is set.

s/flat/flag

> 
> Signed-off-by: Lu Baolu 
> ---
>  include/linux/iommu.h |  9 +
>  drivers/iommu/iommu.c | 23 +++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 5e1afe169549..6f68eabb8567 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -95,6 +95,7 @@ struct iommu_domain {
>   void *handler_token;
>   struct iommu_domain_geometry geometry;
>   struct iommu_dma_cookie *iova_cookie;
> + unsigned long concurrent_traversal:1;

Does this need to be a bitfield? Even though you are needing just one bit
now, you can probably make have maskbits?


>  };
>  
>  static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
> @@ -657,6 +658,12 @@ static inline void dev_iommu_priv_set(struct device 
> *dev, void *priv)
>   dev->iommu->priv = priv;
>  }
>  
> +static inline void domain_set_concurrent_traversal(struct iommu_domain 
> *domain,
> +bool value)
> +{
> + domain->concurrent_traversal = value;
> +}
> +
>  int iommu_probe_device(struct device *dev);
>  void iommu_release_device(struct device *dev);
>  
> @@ -677,6 +684,8 @@ int iommu_group_claim_dma_owner(struct iommu_group 
> *group, void *owner);
>  void iommu_group_release_dma_owner(struct iommu_group *group);
>  bool iommu_group_dma_owner_claimed(struct iommu_group *group);
>  
> +void iommu_free_pgtbl_pages(struct iommu_domain *domain,
> + struct list_head *pages);
>  #else /* CONFIG_IOMMU_API */
>  
>  struct iommu_ops {};
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 847ad47a2dfd..ceeb97ebe3e2 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -3252,3 +3252,26 @@ bool iommu_group_dma_owner_claimed(struct iommu_group 
> *group)
>   return user;
>  }
>  EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
> +
> +static void pgtble_page_free_rcu(struct rcu_head *rcu)

maybe the names can be consistent? pgtble_ vs pgtbl below.

vote to drop the 'e' :-)

> +{
> + struct page *page = container_of(rcu, struct page, rcu_head);
> +
> + __free_pages(page, 0);
> +}
> +
> +void iommu_free_pgtbl_pages(struct iommu_domain *domain,
> + struct list_head *pages)
> +{
> + struct page *page, *next;
> +
> + if (!domain->concurrent_traversal) {
> + put_pages_list(pages);
> + return;
> + }
> +
> + list_for_each_entry_safe(page, next, pages, lru) {
> + list_del(&page->lru);
> + call_rcu(&page->rcu_head, pgtble_page_free_rcu);
> + }
> +}
> -- 
> 2.25.1
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/4] scsi: core: Cap shost max_sectors according to DMA optimum mapping limits

2022-06-09 Thread Bart Van Assche

On 6/9/22 01:00, John Garry wrote:

On 08/06/2022 22:07, Bart Van Assche wrote:

On 6/8/22 10:50, John Garry wrote:
Please note that this limit only applies if we have an IOMMU enabled 
for the scsi host dma device. Otherwise we are limited by dma direct 
or swiotlb max mapping size, as before.


SCSI host bus adapters that support 64-bit DMA may support much larger 
transfer sizes than 128 KiB.


Indeed, and that is my problem today, as my storage controller is 
generating DMA mapping lengths which exceeds 128K and they slow 
everything down.


If you say that SRP enjoys best peformance with larger transfers then 
can you please test this with an IOMMU enabled (iommu group type DMA or 
DMA-FQ)?


Hmm ... what exactly do you want me to test? Do you perhaps want me to 
measure how much performance drops with an IOMMU enabled? I don't have 
access anymore to the SRP setup I referred to in my previous email. But 
I do have access to devices that boot from UFS storage. For these 
devices we need to transfer 2 MiB per request to achieve full bandwidth.


Thanks,

Bart.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 01/11] iommu: Add max_pasids field in struct iommu_device

2022-06-09 Thread Raj, Ashok


On Tue, Jun 07, 2022 at 09:49:32AM +0800, Lu Baolu wrote:
> Use this field to keep the number of supported PASIDs that an IOMMU
> hardware is able to support. This is a generic attribute of an IOMMU
> and lifting it into the per-IOMMU device structure makes it possible

There is also a per-device attribute that tells what width is supported on
each device. When a device enables SVM, for simplicity we were proposing to
disable SVM on devices that don't support the full width, since it adds
additional complexity on the allocation interface. Is that factored into
this?

> to allocate a PASID for device without calls into the IOMMU drivers.
> Any iommu driver which supports PASID related features should set this
> field before enabling them on the devices.
> 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Jean-Philippe Brucker 
> ---
>  include/linux/intel-iommu.h | 3 ++-
>  include/linux/iommu.h   | 2 ++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 +
>  drivers/iommu/intel/dmar.c  | 7 +++
>  4 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 4f29139bbfc3..e065cbe3c857 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -479,7 +479,6 @@ enum {
>  #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED   (1 << 1)
>  #define VTD_FLAG_SVM_CAPABLE (1 << 2)
>  
> -extern int intel_iommu_sm;
>  extern spinlock_t device_domain_lock;
>  
>  #define sm_supported(iommu)  (intel_iommu_sm && ecap_smts((iommu)->ecap))
> @@ -786,6 +785,7 @@ struct context_entry *iommu_context_addr(struct 
> intel_iommu *iommu, u8 bus,
>  extern const struct iommu_ops intel_iommu_ops;
>  
>  #ifdef CONFIG_INTEL_IOMMU
> +extern int intel_iommu_sm;
>  extern int iommu_calculate_agaw(struct intel_iommu *iommu);
>  extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
>  extern int dmar_disabled;
> @@ -802,6 +802,7 @@ static inline int iommu_calculate_max_sagaw(struct 
> intel_iommu *iommu)
>  }
>  #define dmar_disabled(1)
>  #define intel_iommu_enabled (0)
> +#define intel_iommu_sm (0)

Is the above part of this patch? Or should be moved up somewhere?
>  #endif
>  
>  static inline const char *decode_prq_descriptor(char *str, size_t size,
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 5e1afe169549..03fbb1b71536 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -318,12 +318,14 @@ struct iommu_domain_ops {
>   * @list: Used by the iommu-core to keep a list of registered iommus
>   * @ops: iommu-ops for talking to this iommu
>   * @dev: struct device for sysfs handling
> + * @max_pasids: number of supported PASIDs
>   */
>  struct iommu_device {
>   struct list_head list;
>   const struct iommu_ops *ops;
>   struct fwnode_handle *fwnode;
>   struct device *dev;
> + u32 max_pasids;
>  };
>  
>  /**
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 88817a3376ef..ae8ec8df47c1 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -3546,6 +3546,7 @@ static int arm_smmu_device_hw_probe(struct 
> arm_smmu_device *smmu)
>   /* SID/SSID sizes */
>   smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
>   smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
> + smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
>  
>   /*
>* If the SMMU supports fewer bits than would fill a single L2 stream
> diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
> index 592c1e1a5d4b..6c33061a 100644
> --- a/drivers/iommu/intel/dmar.c
> +++ b/drivers/iommu/intel/dmar.c
> @@ -1123,6 +1123,13 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
>  
>   raw_spin_lock_init(&iommu->register_lock);
>  
> + /*
> +  * A value of N in PSS field of eCap register indicates hardware
> +  * supports PASID field of N+1 bits.
> +  */
> + if (pasid_supported(iommu))
> + iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap);
> +
>   /*
>* This is only for hotplug; at boot time intel_iommu_enabled won't
>* be set yet. When intel_iommu_init() runs, it registers the units
> -- 
> 2.25.1
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 2/6] iommu: mtk_iommu: Lookup phandle to retrieve syscon to infracfg

2022-06-09 Thread Miles Chen via iommu
> This driver will get support for more SoCs and the list of infracfg
> compatibles is expected to grow: in order to prevent getting this
> situation out of control and see a long list of compatible strings,
> add support to retrieve a handle to infracfg's regmap through a
> new "mediatek,infracfg" phandle.

I also like the idea of using mediatek,infracfg instead of a list of
platform compatible strings.

Reviewed-by: Miles Chen  

> 
> In order to keep retrocompatibility with older devicetrees, the old
> way is kept in place.
> 
> Signed-off-by: AngeloGioacchino Del Regno 
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/4] scsi: core: Cap shost max_sectors according to DMA optimum mapping limits

2022-06-09 Thread John Garry via iommu

On 09/06/2022 18:18, Bart Van Assche wrote:


SCSI host bus adapters that support 64-bit DMA may support much 
larger transfer sizes than 128 KiB.


Indeed, and that is my problem today, as my storage controller is 
generating DMA mapping lengths which exceeds 128K and they slow 
everything down.


If you say that SRP enjoys best peformance with larger transfers then 
can you please test this with an IOMMU enabled (iommu group type DMA 
or DMA-FQ)?


Hmm ... what exactly do you want me to test? Do you perhaps want me to 
measure how much performance drops with an IOMMU enabled? 


Yes, I would like to know of any performance change with an IOMMU 
enabled and then with an IOMMU enabled and including my series.


I don't have 
access anymore to the SRP setup I referred to in my previous email. But 
I do have access to devices that boot from UFS storage. For these 
devices we need to transfer 2 MiB per request to achieve full bandwidth.


ok, but do you have a system where the UFS host controller is behind an 
IOMMU? I had the impression that UFS controllers would be mostly found 
in embedded systems and IOMMUs are not as common on there.


Thanks,
John

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/6] arm64: dts: mediatek: mt8173: Add mediatek, infracfg phandle for IOMMU

2022-06-09 Thread Miles Chen via iommu
> The IOMMU driver now looks for the "mediatek,infracfg" phandle as a
> new way to retrieve a syscon to that:
> even though the old way is retained, it has been deprecated and the
> driver will write a message in kmsg advertising to use the phandle
> way instead.
> 
> For this reason, assign the right phandle to mediatek,infracfg in
> the iommu node.
> 
> Signed-off-by: AngeloGioacchino Del Regno 
> 

Reviewed-by: Miles Chen  

> ---
>  arch/arm64/boot/dts/mediatek/mt8173.dtsi | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi 
> b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
> index 40d7b47fc52e..825a3c670373 100644
> --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
> +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
> @@ -588,6 +588,7 @@ iommu: iommu@10205000 {
>   interrupts = ;
>   clocks = <&infracfg CLK_INFRA_M4U>;
>   clock-names = "bclk";
> + mediatek,infracfg = <&infracfg>;
>   mediatek,larbs = <&larb0>, <&larb1>, <&larb2>,
><&larb3>, <&larb4>, <&larb5>;
>   #iommu-cells = <1>;
> -- 
> 2.35.1
> 
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 4/6] arm64: dts: mediatek: mt2712e: Add mediatek, infracfg phandle for IOMMU

2022-06-09 Thread Miles Chen via iommu
> The IOMMU driver now looks for the "mediatek,infracfg" phandle as a
> new way to retrieve a syscon to that:
> even though the old way is retained, it has been deprecated and the
> driver will write a message in kmsg advertising to use the phandle
> way instead.
> 
> For this reason, assign the right phandle to mediatek,infracfg in
> the iommu node.
> 
> Signed-off-by: AngeloGioacchino Del Regno 
> 

Reviewed-by: Miles Chen  

> ---
>  arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi 
> b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
> index 623eb3beabf2..4797537cb368 100644
> --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
> +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
> @@ -329,6 +329,7 @@ iommu0: iommu@10205000 {
>   interrupts = ;
>   clocks = <&infracfg CLK_INFRA_M4U>;
>   clock-names = "bclk";
> + mediatek,infracfg = <&infracfg>;
>   mediatek,larbs = <&larb0>, <&larb1>, <&larb2>,
><&larb3>, <&larb6>;
>   #iommu-cells = <1>;
> @@ -346,6 +347,7 @@ iommu1: iommu@1020a000 {
>   interrupts = ;
>   clocks = <&infracfg CLK_INFRA_M4U>;
>   clock-names = "bclk";
> + mediatek,infracfg = <&infracfg>;
>   mediatek,larbs = <&larb4>, <&larb5>, <&larb7>;
>   #iommu-cells = <1>;
>   };
> -- 
> 2.35.1
> 
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/dma: Add config for PCI SAC address trick

2022-06-09 Thread John Garry via iommu

On 09/06/2022 16:12, Robin Murphy wrote:

For devices stuck behind a conventional PCI bus, saving extra cycles at
33MHz is probably fairly significant. However since native PCI Express
is now the norm for high-performance devices, the optimisation to always
prefer 32-bit addresses for the sake of avoiding DAC is starting to look
rather anachronistic. Technically 32-bit addresses do have shorter TLPs
on PCIe, but unless the device is saturating its link bandwidth with
small transfers it seems unlikely that the difference is appreciable.

What definitely is appreciable, however, is that the IOVA allocator
doesn't behave all that well once the 32-bit space starts getting full.
As DMA working sets get bigger, this optimisation increasingly backfires
and adds considerable overhead to the dma_map path for use-cases like
high-bandwidth networking. We've increasingly bandaged the allocator
in attempts to mitigate this, but it remains fundamentally at odds with
other valid requirements to try as hard as possible to satisfy a request
within the given limit; what we really need is to just avoid this odd
notion of a speculative allocation when it isn't beneficial anyway.

Unfortunately that's where things get awkward... Having been present on
x86 for 15 years or so now, it turns out there are systems which fail to
properly define the upper limit of usable IOVA space for certain devices
and this trick was the only thing letting them work OK. I had a similar
ulterior motive for a couple of early arm64 systems when originally
adding it to iommu-dma, but those really should be fixed with proper
firmware bindings by now. Let's be brave and default it to off in the
hope that CI systems and developers will find and fix those bugs, > but
expect that desktop-focused distro configs are likely to want to turn
it back on for maximum compatibility.

Signed-off-by: Robin Murphy 


FWIW,
Reviewed-by: John Garry 

If we're not enabling by default for x86 then doesn't Jeorg have some 
XHCI issue which we would now need to quirk? I don't remember which 
device exactly. Or, alternatively, simply ask him to enable this new config.




---

v2: Tweak wording to clarify that it's not really an optimisation in
 general, remove "default X86".

  drivers/iommu/Kconfig | 26 ++
  drivers/iommu/dma-iommu.c |  2 +-
  2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c79a0df090c0..5a225b48dd00 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -144,6 +144,32 @@ config IOMMU_DMA
select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
  
+config IOMMU_DMA_PCI_SAC

+   bool "Enable 64-bit legacy PCI optimisation by default"
+   depends on IOMMU_DMA
+   help
+ Enable by default an IOMMU optimisation for 64-bit legacy PCI devices,
+ wherein the DMA API layer will always first try to allocate a 32-bit
+ DMA address suitable for a single address cycle, before falling back
+ to allocating from the device's full usable address range. If your
+ system has 64-bit legacy PCI devices in 32-bit slots where using dual
+ address cycles reduces DMA throughput significantly, this may be
+ beneficial to overall performance.
+
+ If you have a modern PCI Express based system, this feature mostly 
just
+ represents extra overhead in the allocation path for no practical
+ benefit, and it should usually be preferable to say "n" here.
+
+ However, beware that this feature has also historically papered over
+ bugs where the IOMMU address width and/or device DMA mask is not set
+ correctly. If device DMA problems and IOMMU faults start occurring
+ after disabling this option, it is almost certainly indicative of a
+ latent driver or firmware/BIOS bug, which would previously have only
+ manifested with several gigabytes worth of concurrent DMA mappings.
+
+ If this option is not set, the feature can still be re-enabled at
+ boot time with the "iommu.forcedac=0" command-line argument.
+
  # Shared Virtual Addressing
  config IOMMU_SVA
bool
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f90251572a5d..9f9d9ba7f376 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -67,7 +67,7 @@ struct iommu_dma_cookie {
  };
  
  static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);

-bool iommu_dma_forcedac __read_mostly;
+bool iommu_dma_forcedac __read_mostly = !IS_ENABLED(CONFIG_IOMMU_DMA_PCI_SAC);
  
  static int __init iommu_dma_forcedac_setup(char *str)

  {


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 02/11] iommu: Add max_pasids field in struct dev_iommu

2022-06-09 Thread Raj, Ashok
On Tue, Jun 07, 2022 at 09:49:33AM +0800, Lu Baolu wrote:
> Use this field to save the number of PASIDs that a device is able to
> consume. It is a generic attribute of a device and lifting it into the
> per-device dev_iommu struct could help to avoid the boilerplate code
> in various IOMMU drivers.
> 
> Signed-off-by: Lu Baolu 
> ---
>  include/linux/iommu.h |  2 ++
>  drivers/iommu/iommu.c | 26 ++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 03fbb1b71536..d50afb2c9a09 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -364,6 +364,7 @@ struct iommu_fault_param {
>   * @fwspec:   IOMMU fwspec data
>   * @iommu_dev:IOMMU device this device is linked to
>   * @priv: IOMMU Driver private data
> + * @max_pasids:  number of PASIDs device can consume
>   *
>   * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
>   *   struct iommu_group  *iommu_group;
> @@ -375,6 +376,7 @@ struct dev_iommu {
>   struct iommu_fwspec *fwspec;
>   struct iommu_device *iommu_dev;
>   void*priv;
> + u32 max_pasids;
>  };
>  
>  int iommu_device_register(struct iommu_device *iommu,
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 847ad47a2dfd..adac85ccde73 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -20,6 +20,7 @@
>  #include 
>  #include 
>  #include 
> +#include 

Is this needed for this patch?

>  #include 
>  #include 
>  #include 
> @@ -218,6 +219,30 @@ static void dev_iommu_free(struct device *dev)
>   kfree(param);
>  }
>  
> +static u32 dev_iommu_get_max_pasids(struct device *dev)
> +{
> + u32 max_pasids = dev->iommu->iommu_dev->max_pasids;
> + u32 num_bits;
> + int ret;
> +
> + if (!max_pasids)
> + return 0;
> +
> + if (dev_is_pci(dev)) {
> + ret = pci_max_pasids(to_pci_dev(dev));
> + if (ret < 0)
> + return 0;
> +
> + return min_t(u32, max_pasids, ret);

Ah.. that answers my other question to consider device pasid-max. I guess
if we need any enforcement of restricting devices that aren't supporting
the full PASID, that will be done by some higher layer?

too many returns in this function, maybe setup all returns to the end of
the function might be elegant?

> + }
> +
> + ret = device_property_read_u32(dev, "pasid-num-bits", &num_bits);
> + if (ret)
> + return 0;
> +
> + return min_t(u32, max_pasids, 1UL << num_bits);
> +}
> +
>  static int __iommu_probe_device(struct device *dev, struct list_head 
> *group_list)
>  {
>   const struct iommu_ops *ops = dev->bus->iommu_ops;
> @@ -243,6 +268,7 @@ static int __iommu_probe_device(struct device *dev, 
> struct list_head *group_list
>   }
>  
>   dev->iommu->iommu_dev = iommu_dev;
> + dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
>  
>   group = iommu_group_get_for_dev(dev);
>   if (IS_ERR(group)) {
> -- 
> 2.25.1
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/9] PM: domains: Delete usage of driver_deferred_probe_check_state()

2022-06-09 Thread Saravana Kannan via iommu
On Thu, Jun 9, 2022 at 4:45 AM Ulf Hansson  wrote:
>
> On Wed, 1 Jun 2022 at 09:07, Saravana Kannan  wrote:
> >
> > Now that fw_devlink=on by default and fw_devlink supports
> > "power-domains" property, the execution will never get to the point
> > where driver_deferred_probe_check_state() is called before the supplier
> > has probed successfully or before deferred probe timeout has expired.
> >
> > So, delete the call and replace it with -ENODEV.
>
> With fw_devlink=on by default - does that mean that the parameter
> can't be changed?
>
> Or perhaps the point is that we don't want to go back, but rather drop
> the fw_devlink parameter altogether when moving forward?

Good question. For now, keeping fw_devlink=off and
fw_devlink=permissive as debugging options that I can ask people to
try if some probe is getting blocked.

Or maybe if some ultra low memory use case wants to avoid create
device links, fwnode links, etc and can build everything in and have
init/probe happen in the right order.

But in the long run, I see a strong possibility for
fw_devlink=off/permissive being removed. I'd still want to keep it for
implementing =rpm where it'd also automatically enable PM runtime
tracking, but I don't understand that well enough yet to do it by
default.

> >
> > Signed-off-by: Saravana Kannan 
>
> Just a minor nitpick below. Nevertheless, feel free to add:
>
> Reviewed-by: Ulf Hansson 

Thanks!

>
> > ---
> >  drivers/base/power/domain.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
> > index 739e52cd4aba..3e86772d5fac 100644
> > --- a/drivers/base/power/domain.c
> > +++ b/drivers/base/power/domain.c
> > @@ -2730,7 +2730,7 @@ static int __genpd_dev_pm_attach(struct device *dev, 
> > struct device *base_dev,
> > mutex_unlock(&gpd_list_lock);
> > dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
> > __func__, PTR_ERR(pd));
> > -   return driver_deferred_probe_check_state(base_dev);
>
> Adding a brief comment about why -EPROBE_DEFER doesn't make sense
> here, would be nice.

Will do once all the reviews comeout/when I send v3.

I'm thinking something like:
/* fw_devlink will take care of retrying for missing suppliers */

-Saravana

>
> > +   return -ENODEV;
> > }
> >
> > dev_dbg(dev, "adding to PM domain %s\n", pd->name);
> > --
> > 2.36.1.255.ge46751e96f-goog
> >
>
> Kind regards
> Uffe
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 04/11] iommu: Add sva iommu_domain support

2022-06-09 Thread Raj, Ashok
Hi Baolu

some minor nits.

On Tue, Jun 07, 2022 at 09:49:35AM +0800, Lu Baolu wrote:
> The sva iommu_domain represents a hardware pagetable that the IOMMU
> hardware could use for SVA translation. This adds some infrastructure
> to support SVA domain in the iommu common layer. It includes:
> 
> - Extend the iommu_domain to support a new IOMMU_DOMAIN_SVA domain
>   type. The IOMMU drivers that support SVA should provide the sva
>   domain specific iommu_domain_ops.
> - Add a helper to allocate an SVA domain. The iommu_domain_free()
>   is still used to free an SVA domain.
> - Add helpers to attach an SVA domain to a device and the reverse
>   operation.
> 
> Some buses, like PCI, route packets without considering the PASID value.
> Thus a DMA target address with PASID might be treated as P2P if the
> address falls into the MMIO BAR of other devices in the group. To make
> things simple, the attach/detach interfaces only apply to devices
> belonging to the singleton groups, and the singleton is immutable in
> fabric i.e. not affected by hotplug.
> 
> The iommu_attach/detach_device_pasid() can be used for other purposes,
> such as kernel DMA with pasid, mediation device, etc.
> 
> Suggested-by: Jean-Philippe Brucker 
> Suggested-by: Jason Gunthorpe 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Jean-Philippe Brucker 
> ---
>  include/linux/iommu.h | 45 -
>  drivers/iommu/iommu.c | 93 +++
>  2 files changed, 136 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 3fbad42c0bf8..9173c5741447 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -64,6 +64,9 @@ struct iommu_domain_geometry {
>  #define __IOMMU_DOMAIN_PT(1U << 2)  /* Domain is identity mapped   */
>  #define __IOMMU_DOMAIN_DMA_FQ(1U << 3)  /* DMA-API uses flush queue  
>   */
>  
> +#define __IOMMU_DOMAIN_SHARED(1U << 4)  /* Page table shared from 
> CPU  */

s/from CPU/with CPU

> +#define __IOMMU_DOMAIN_HOST_VA   (1U << 5)  /* Host CPU virtual address 
> */

Do you mean general CPU VA? or Host CPU VA, I'm reading the latter as 2nd
stage?

> +
>  /*
>   * This are the possible domain-types
>   *
> @@ -86,15 +89,24 @@ struct iommu_domain_geometry {
>  #define IOMMU_DOMAIN_DMA_FQ  (__IOMMU_DOMAIN_PAGING |\
>__IOMMU_DOMAIN_DMA_API |   \
>__IOMMU_DOMAIN_DMA_FQ)
> +#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SHARED |\
> +  __IOMMU_DOMAIN_HOST_VA)

Doesn't shared automatically mean CPU VA? Do we need another flag?

>  
>  struct iommu_domain {
>   unsigned type;
>   const struct iommu_domain_ops *ops;
>   unsigned long pgsize_bitmap;/* Bitmap of page sizes in use */
> - iommu_fault_handler_t handler;
> - void *handler_token;
>   struct iommu_domain_geometry geometry;
>   struct iommu_dma_cookie *iova_cookie;
> + union {
> + struct {/* IOMMU_DOMAIN_DMA */
> + iommu_fault_handler_t handler;
> + void *handler_token;
> + };
> + struct {/* IOMMU_DOMAIN_SVA */
> + struct mm_struct *mm;
> + };
> + };
>  };
>  
>  static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
> @@ -262,6 +274,8 @@ struct iommu_ops {
>   * struct iommu_domain_ops - domain specific operations
>   * @attach_dev: attach an iommu domain to a device
>   * @detach_dev: detach an iommu domain from a device
> + * @set_dev_pasid: set an iommu domain to a pasid of device
> + * @block_dev_pasid: block pasid of device from using iommu domain
>   * @map: map a physically contiguous memory region to an iommu domain
>   * @map_pages: map a physically contiguous set of pages of the same size to
>   * an iommu domain.
> @@ -282,6 +296,10 @@ struct iommu_ops {
>  struct iommu_domain_ops {
>   int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
>   void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
> + int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
> +  ioasid_t pasid);
> + void (*block_dev_pasid)(struct iommu_domain *domain, struct device *dev,
> + ioasid_t pasid);
>  
>   int (*map)(struct iommu_domain *domain, unsigned long iova,
>  phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
> @@ -679,6 +697,12 @@ int iommu_group_claim_dma_owner(struct iommu_group 
> *group, void *owner);
>  void iommu_group_release_dma_owner(struct iommu_group *group);
>  bool iommu_group_dma_owner_claimed(struct iommu_group *group);
>  
> +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
> + struct mm_struct *mm);
> +int iommu_attach_device_pasid(struct iommu_domain *domain, struct de

Re: [PATCH v3 3/4] scsi: core: Cap shost max_sectors according to DMA optimum mapping limits

2022-06-09 Thread Bart Van Assche

On 6/9/22 10:54, John Garry wrote:
ok, but do you have a system where the UFS host controller is behind an 
IOMMU? I had the impression that UFS controllers would be mostly found 
in embedded systems and IOMMUs are not as common on there.


Modern phones have an IOMMU. Below one can find an example from a Pixel 
6 phone. The UFS storage controller is not controller by the IOMMU as 
far as I can see but I wouldn't be surprised if the security team would 
ask us one day to enable the IOMMU for the UFS controller.


# (cd /sys/class/iommu && ls */devices)
1a09.sysmmu/devices:
1900.aoc

1a51.sysmmu/devices:
1a44.lwis_csi

1a54.sysmmu/devices:
1aa4.lwis_pdp

1a88.sysmmu/devices:
1a84.lwis_g3aa

1ad0.sysmmu/devices:
1ac4.lwis_ipp  1ac8.lwis_gtnr_align

1b08.sysmmu/devices:
1b45.lwis_itp

1b78.sysmmu/devices:

1b7b.sysmmu/devices:
1b76.lwis_mcsc

1b7e.sysmmu/devices:

1baa.sysmmu/devices:
1a4e.lwis_votf  1ba4.lwis_gdc

1bad.sysmmu/devices:
1ba6.lwis_gdc

1bb0.sysmmu/devices:
1ba8.lwis_scsc

1bc7.sysmmu/devices:
1bc4.lwis_gtnr_merge

1bca.sysmmu/devices:

1bcd.sysmmu/devices:

1bd0.sysmmu/devices:

1bd3.sysmmu/devices:

1c10.sysmmu/devices:
1c30.drmdecon  1c302000.drmdecon

1c11.sysmmu/devices:

1c12.sysmmu/devices:

1c66.sysmmu/devices:
1c64.g2d

1c69.sysmmu/devices:

1c71.sysmmu/devices:
1c70.smfc

1c87.sysmmu/devices:
1c8d.MFC-0  mfc

1c8a.sysmmu/devices:

1ca4.sysmmu/devices:
1cb0.bigocean

1cc4.sysmmu/devices:
1ce0.abrolhos

Bart.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 01/11] iommu: Add max_pasids field in struct iommu_device

2022-06-09 Thread Jason Gunthorpe via iommu
On Thu, Jun 09, 2022 at 05:25:42PM +, Raj, Ashok wrote:
> 
> On Tue, Jun 07, 2022 at 09:49:32AM +0800, Lu Baolu wrote:
> > Use this field to keep the number of supported PASIDs that an IOMMU
> > hardware is able to support. This is a generic attribute of an IOMMU
> > and lifting it into the per-IOMMU device structure makes it possible
> 
> There is also a per-device attribute that tells what width is supported on
> each device. When a device enables SVM, for simplicity we were proposing to
> disable SVM on devices that don't support the full width, since it adds
> additional complexity on the allocation interface. Is that factored into
> this?

I would like to see the concept of a 'global PASID' and this is the
only place we'd union all the per-device limits together. SVM can
optionally use a global PASID and ENQCMD requires it, but I don't want
to see the core code assuming everything is ENQCMD.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] vfio/type1: Simplify bus_type determination

2022-06-09 Thread Jason Gunthorpe via iommu
On Wed, Jun 08, 2022 at 03:25:49PM +0100, Robin Murphy wrote:
> Since IOMMU groups are mandatory for drivers to support, it stands to
> reason that any device which has been successfully be added to a group
> must be on a bus supported by that IOMMU driver, and therefore a domain
> viable for any device in the group must be viable for all devices in
> the group. This already has to be the case for the IOMMU API's internal
> default domain, for instance. Thus even if the group contains devices
> on different buses, that can only mean that the IOMMU driver actually
> supports such an odd topology, and so without loss of generality we can
> expect the bus type of any arbitrary device in a group to be suitable
> for IOMMU API calls.
> 
> Replace vfio_bus_type() with a trivial callback that simply returns any
> device from which to then derive a usable bus type. This is also a step
> towards removing the vague bus-based interfaces from the IOMMU API.
> 
> Furthermore, scrutiny reveals a lack of protection for the bus and/or
> device being removed while .attach_group is inspecting them; the
> reference we hold on the iommu_group ensures that data remains valid,
> but does not prevent the group's membership changing underfoot. Holding
> the vfio_goup's device_lock should be sufficient to block any relevant
> device's VFIO driver from unregistering, and thus block unbinding and
> any further stages of removal for the duration of the attach operation.

The device_lock only protects devices that are on the device_list from
concurrent unregistration, the device returned by
iommu_group_for_each_dev() is not guarented to be the on the device
list.

> @@ -760,8 +760,11 @@ static int __vfio_container_attach_groups(struct 
> vfio_container *container,
>   int ret = -ENODEV;
>  
>   list_for_each_entry(group, &container->group_list, container_next) {
> + /* Prevent devices unregistering during attach */
> + mutex_lock(&group->device_lock);
>   ret = driver->ops->attach_group(data, group->iommu_group,
>   group->type);
> + mutex_unlock(&group->device_lock);

I still prefer the version where we pass in an arbitrary vfio_device
from the list the group maintains:

   list_first_entry(group->device_list)

And don't call iommu_group_for_each_dev(), it is much simpler to
reason about how it works.

Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Baolu Lu

On 2022/6/9 20:49, Jason Gunthorpe wrote:

+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages)
+{
+   struct page *page, *next;
+
+   if (!domain->concurrent_traversal) {
+   put_pages_list(pages);
+   return;
+   }
+
+   list_for_each_entry_safe(page, next, pages, lru) {
+   list_del(&page->lru);
+   call_rcu(&page->rcu_head, pgtble_page_free_rcu);
+   }

It seems OK, but I wonder if there is benifit to using
put_pages_list() from the rcu callback


The price is that we need to allocate a "struct list_head" and free it
in the rcu callback as well. Currently the list_head is sitting in the
stack.

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Baolu Lu

On 2022/6/9 21:32, Jason Gunthorpe wrote:

On Thu, Jun 09, 2022 at 02:19:06PM +0100, Robin Murphy wrote:


Is there a significant benefit to keeping both paths, or could we get away
with just always using RCU? Realistically, pagetable pages aren't likely to
be freed all that frequently, except perhaps at domain teardown, but that
shouldn't really be performance-critical, and I guess we could stick an RCU
sync point in iommu_domain_free() if we're really worried about releasing
larger quantities of pages back to the allocator ASAP?


I think you are right, anything that uses the iommu_iotlb_gather may
as well use RCU too.

IIRC the allocators already know that RCU is often sitting on
freed-memory and have some contigency to flush it out before OOMing,
so nothing special should be needed.


Fair enough. How about below code?

static void pgtble_page_free_rcu(struct rcu_head *rcu)
{
struct page *page = container_of(rcu, struct page, rcu_head);

__free_pages(page, 0);
}

/*
 * Free pages gathered in the freelist of iommu_iotlb_gather. Use RCU free
 * way so that it's safe for lock-free page table walk.
 */
void iommu_free_iotlb_gather_pages(struct iommu_iotlb_gather *iotlb_gather)
{
struct page *page, *next;

list_for_each_entry_safe(page, next, &iotlb_gather->freelist, 
lru) {

list_del(&page->lru);
call_rcu(&page->rcu_head, pgtble_page_free_rcu);
}
}

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/dma: Add config for PCI SAC address trick

2022-06-09 Thread Christoph Hellwig
On Thu, Jun 09, 2022 at 04:12:10PM +0100, Robin Murphy wrote:
> +   If you have a modern PCI Express based system, this feature mostly 
> just

Overly long line here.

Otherwise looks good:

Reviewed-by: Christoph Hellwig 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCHES 1/2] iommu: Add RCU-protected page free support

2022-06-09 Thread Baolu Lu

On 2022/6/10 01:06, Raj, Ashok wrote:

On Thu, Jun 09, 2022 at 03:08:10PM +0800, Lu Baolu wrote:

The IOMMU page tables are updated using iommu_map/unmap() interfaces.
Currently, there is no mandatory requirement for drivers to use locks
to ensure concurrent updates to page tables, because it's assumed that
overlapping IOVA ranges do not have concurrent updates. Therefore the
IOMMU drivers only need to take care of concurrent updates to level
page table entries.


The last part doesn't read well..
s/updates to level page table entries/ updates to page-table entries at the
same level



But enabling new features challenges this assumption. For example, the
hardware assisted dirty page tracking feature requires scanning page
tables in interfaces other than mapping and unmapping. This might result
in a use-after-free scenario in which a level page table has been freed
by the unmap() interface, while another thread is scanning the next level
page table.

This adds RCU-protected page free support so that the pages are really
freed and reused after a RCU grace period. Hence, the page tables are
safe for scanning within a rcu_read_lock critical region. Considering
that scanning the page table is a rare case, this also adds a domain
flag and the RCU-protected page free is only used when this flat is set.


s/flat/flag


Above updated. Thank you!



Signed-off-by: Lu Baolu 
---
  include/linux/iommu.h |  9 +
  drivers/iommu/iommu.c | 23 +++
  2 files changed, 32 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e1afe169549..6f68eabb8567 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -95,6 +95,7 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+   unsigned long concurrent_traversal:1;


Does this need to be a bitfield? Even though you are needing just one bit
now, you can probably make have maskbits?



As discussed in another reply, I am about to drop the driver opt-in and
wrapper the helper around the iommu_iotlb_gather.




  };
  
  static inline bool iommu_is_dma_domain(struct iommu_domain *domain)

@@ -657,6 +658,12 @@ static inline void dev_iommu_priv_set(struct device *dev, 
void *priv)
dev->iommu->priv = priv;
  }
  
+static inline void domain_set_concurrent_traversal(struct iommu_domain *domain,

+  bool value)
+{
+   domain->concurrent_traversal = value;
+}
+
  int iommu_probe_device(struct device *dev);
  void iommu_release_device(struct device *dev);
  
@@ -677,6 +684,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);

  void iommu_group_release_dma_owner(struct iommu_group *group);
  bool iommu_group_dma_owner_claimed(struct iommu_group *group);
  
+void iommu_free_pgtbl_pages(struct iommu_domain *domain,

+   struct list_head *pages);
  #else /* CONFIG_IOMMU_API */
  
  struct iommu_ops {};

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 847ad47a2dfd..ceeb97ebe3e2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3252,3 +3252,26 @@ bool iommu_group_dma_owner_claimed(struct iommu_group 
*group)
return user;
  }
  EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+
+static void pgtble_page_free_rcu(struct rcu_head *rcu)


maybe the names can be consistent? pgtble_ vs pgtbl below.

vote to drop the 'e' :-)


Updated.




+{
+   struct page *page = container_of(rcu, struct page, rcu_head);
+
+   __free_pages(page, 0);
+}
+
+void iommu_free_pgtbl_pages(struct iommu_domain *domain,
+   struct list_head *pages)
+{
+   struct page *page, *next;
+
+   if (!domain->concurrent_traversal) {
+   put_pages_list(pages);
+   return;
+   }
+
+   list_for_each_entry_safe(page, next, pages, lru) {
+   list_del(&page->lru);
+   call_rcu(&page->rcu_head, pgtble_page_free_rcu);
+   }
+}
--
2.25.1



Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 01/11] iommu: Add max_pasids field in struct iommu_device

2022-06-09 Thread Baolu Lu

On 2022/6/10 01:25, Raj, Ashok wrote:

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4f29139bbfc3..e065cbe3c857 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -479,7 +479,6 @@ enum {
  #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED(1 << 1)
  #define VTD_FLAG_SVM_CAPABLE  (1 << 2)
  
-extern int intel_iommu_sm;

  extern spinlock_t device_domain_lock;
  
  #define sm_supported(iommu)	(intel_iommu_sm && ecap_smts((iommu)->ecap))

@@ -786,6 +785,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu 
*iommu, u8 bus,
  extern const struct iommu_ops intel_iommu_ops;
  
  #ifdef CONFIG_INTEL_IOMMU

+extern int intel_iommu_sm;
  extern int iommu_calculate_agaw(struct intel_iommu *iommu);
  extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
  extern int dmar_disabled;
@@ -802,6 +802,7 @@ static inline int iommu_calculate_max_sagaw(struct 
intel_iommu *iommu)
  }
  #define dmar_disabled (1)
  #define intel_iommu_enabled (0)
+#define intel_iommu_sm (0)

Is the above part of this patch? Or should be moved up somewhere?


This is to make pasid_supported() usable in dmar.c. It's only needed by
the change in this patch. I should make this clear in the commit
message. :-)

Best regards,
baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 02/11] iommu: Add max_pasids field in struct dev_iommu

2022-06-09 Thread Baolu Lu

On 2022/6/10 03:01, Raj, Ashok wrote:

On Tue, Jun 07, 2022 at 09:49:33AM +0800, Lu Baolu wrote:

Use this field to save the number of PASIDs that a device is able to
consume. It is a generic attribute of a device and lifting it into the
per-device dev_iommu struct could help to avoid the boilerplate code
in various IOMMU drivers.

Signed-off-by: Lu Baolu 
---
  include/linux/iommu.h |  2 ++
  drivers/iommu/iommu.c | 26 ++
  2 files changed, 28 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 03fbb1b71536..d50afb2c9a09 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -364,6 +364,7 @@ struct iommu_fault_param {
   * @fwspec:IOMMU fwspec data
   * @iommu_dev: IOMMU device this device is linked to
   * @priv:  IOMMU Driver private data
+ * @max_pasids:  number of PASIDs device can consume
   *
   * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
   *struct iommu_group  *iommu_group;
@@ -375,6 +376,7 @@ struct dev_iommu {
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void*priv;
+   u32 max_pasids;
  };
  
  int iommu_device_register(struct iommu_device *iommu,

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 847ad47a2dfd..adac85ccde73 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -20,6 +20,7 @@
  #include 
  #include 
  #include 
+#include 


Is this needed for this patch?


Yes. It's for pci_max_pasids().




  #include 
  #include 
  #include 
@@ -218,6 +219,30 @@ static void dev_iommu_free(struct device *dev)
kfree(param);
  }
  
+static u32 dev_iommu_get_max_pasids(struct device *dev)

+{
+   u32 max_pasids = dev->iommu->iommu_dev->max_pasids;
+   u32 num_bits;
+   int ret;
+
+   if (!max_pasids)
+   return 0;
+
+   if (dev_is_pci(dev)) {
+   ret = pci_max_pasids(to_pci_dev(dev));
+   if (ret < 0)
+   return 0;
+
+   return min_t(u32, max_pasids, ret);


Ah.. that answers my other question to consider device pasid-max. I guess
if we need any enforcement of restricting devices that aren't supporting
the full PASID, that will be done by some higher layer?


The mm->pasid style of SVA is explicitly enabled through
iommu_dev_enable_feature(IOMMU_DEV_FEAT_SVA). The IOMMU driver specific
restriction might be put there?



too many returns in this function, maybe setup all returns to the end of
the function might be elegant?


I didn't find cleanup room after a quick scan of the code. But sure, let
me go through code again offline.




+   }
+
+   ret = device_property_read_u32(dev, "pasid-num-bits", &num_bits);
+   if (ret)
+   return 0;
+
+   return min_t(u32, max_pasids, 1UL << num_bits);
+}
+
  static int __iommu_probe_device(struct device *dev, struct list_head 
*group_list)
  {
const struct iommu_ops *ops = dev->bus->iommu_ops;
@@ -243,6 +268,7 @@ static int __iommu_probe_device(struct device *dev, struct 
list_head *group_list
}
  
  	dev->iommu->iommu_dev = iommu_dev;

+   dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
  
  	group = iommu_group_get_for_dev(dev);

if (IS_ERR(group)) {
--
2.25.1



Best regards,
Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu