[PATCHv10 8/9] iommu: arm-smmu-impl: Use table to list QCOM implementations

2020-11-24 Thread Sai Prakash Ranjan
Use table and of_match_node() to match qcom implementation
instead of multiple of_device_compatible() calls for each
QCOM SMMU implementation.

Signed-off-by: Sai Prakash Ranjan 
Acked-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |  9 +
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 21 -
 drivers/iommu/arm/arm-smmu/arm-smmu.h  |  1 -
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 7fed89c9d18a..26e2734eb4d7 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -214,14 +214,7 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
arm_smmu_device *smmu)
if (of_device_is_compatible(np, "nvidia,tegra194-smmu"))
return nvidia_smmu_impl_init(smmu);
 
-   if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") ||
-   of_device_is_compatible(np, "qcom,sc7180-smmu-500") ||
-   of_device_is_compatible(np, "qcom,sm8150-smmu-500") ||
-   of_device_is_compatible(np, "qcom,sm8250-smmu-500"))
-   return qcom_smmu_impl_init(smmu);
-
-   if (of_device_is_compatible(smmu->dev->of_node, "qcom,adreno-smmu"))
-   return qcom_adreno_smmu_impl_init(smmu);
+   smmu = qcom_smmu_impl_init(smmu);
 
if (of_device_is_compatible(np, "marvell,ap806-smmu-500"))
smmu->impl = _mmu500_impl;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index d0636c803a36..add1859b2899 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -318,12 +318,23 @@ static struct arm_smmu_device *qcom_smmu_create(struct 
arm_smmu_device *smmu,
return >smmu;
 }
 
+static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
+   { .compatible = "qcom,sc7180-smmu-500" },
+   { .compatible = "qcom,sdm845-smmu-500" },
+   { .compatible = "qcom,sm8150-smmu-500" },
+   { .compatible = "qcom,sm8250-smmu-500" },
+   { }
+};
+
 struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
 {
-   return qcom_smmu_create(smmu, _smmu_impl);
-}
+   const struct device_node *np = smmu->dev->of_node;
 
-struct arm_smmu_device *qcom_adreno_smmu_impl_init(struct arm_smmu_device 
*smmu)
-{
-   return qcom_smmu_create(smmu, _adreno_smmu_impl);
+   if (of_match_node(qcom_smmu_impl_of_match, np))
+   return qcom_smmu_create(smmu, _smmu_impl);
+
+   if (of_device_is_compatible(np, "qcom,adreno-smmu"))
+   return qcom_smmu_create(smmu, _adreno_smmu_impl);
+
+   return smmu;
 }
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index cb7ca3a444c9..d2a2d1bc58ba 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -523,7 +523,6 @@ static inline void arm_smmu_writeq(struct arm_smmu_device 
*smmu, int page,
 struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu);
 struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu);
-struct arm_smmu_device *qcom_adreno_smmu_impl_init(struct arm_smmu_device 
*smmu);
 
 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx);
 int arm_mmu500_reset(struct arm_smmu_device *smmu);
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 7/9] drm/msm/a6xx: Add support for using system cache on MMU500 based targets

2020-11-24 Thread Sai Prakash Ranjan
From: Jordan Crouse 

GPU targets with an MMU-500 attached have a slightly different process for
enabling system cache. Use the compatible string on the IOMMU phandle
to see if an MMU-500 is attached and modify the programming sequence
accordingly.

Signed-off-by: Jordan Crouse 
Signed-off-by: Sai Prakash Ranjan 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 46 +--
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  1 +
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 95c98c642876..3f8b92da8cba 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1042,6 +1042,8 @@ static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
 
 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
 {
+   struct adreno_gpu *adreno_gpu = _gpu->base;
+   struct msm_gpu *gpu = _gpu->base;
u32 cntl1_regval = 0;
 
if (IS_ERR(a6xx_gpu->llc_mmio))
@@ -1055,11 +1057,17 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
   (gpu_scid << 15) | (gpu_scid << 20);
}
 
+   /*
+* For targets with a MMU500, activate the slice but don't program the
+* register.  The XBL will take care of that.
+*/
if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
-   u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
+   if (!a6xx_gpu->have_mmu500) {
+   u32 gpuhtw_scid = 
llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
 
-   gpuhtw_scid &= 0x1f;
-   cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
+   gpuhtw_scid &= 0x1f;
+   cntl1_regval |= FIELD_PREP(GENMASK(29, 25), 
gpuhtw_scid);
+   }
}
 
if (cntl1_regval) {
@@ -1067,13 +1075,20 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
 * Program the slice IDs for the various GPU blocks and GPU MMU
 * pagetables
 */
-   a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, 
cntl1_regval);
-
-   /*
-* Program cacheability overrides to not allocate cache lines on
-* a write miss
-*/
-   a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 
0xF, 0x03);
+   if (a6xx_gpu->have_mmu500)
+   gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0),
+   cntl1_regval);
+   else {
+   a6xx_llc_write(a6xx_gpu,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, 
cntl1_regval);
+
+   /*
+* Program cacheability overrides to not allocate cache
+* lines on a write miss
+*/
+   a6xx_llc_rmw(a6xx_gpu,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 
0x03);
+   }
}
 }
 
@@ -1086,10 +1101,21 @@ static void a6xx_llc_slices_destroy(struct a6xx_gpu 
*a6xx_gpu)
 static void a6xx_llc_slices_init(struct platform_device *pdev,
struct a6xx_gpu *a6xx_gpu)
 {
+   struct device_node *phandle;
+
a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
if (IS_ERR(a6xx_gpu->llc_mmio))
return;
 
+   /*
+* There is a different programming path for targets with an mmu500
+* attached, so detect if that is the case
+*/
+   phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
+   a6xx_gpu->have_mmu500 = (phandle &&
+   of_device_is_compatible(phandle, "arm,mmu-500"));
+   of_node_put(phandle);
+
a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
 
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 9e6079af679c..e793d329e77b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -32,6 +32,7 @@ struct a6xx_gpu {
void __iomem *llc_mmio;
void *llc_slice;
void *htw_llc_slice;
+   bool have_mmu500;
 };
 
 #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 9/9] iommu: arm-smmu-impl: Add a space before open parenthesis

2020-11-24 Thread Sai Prakash Ranjan
Fix the checkpatch warning for space required before the open
parenthesis.

Signed-off-by: Sai Prakash Ranjan 
Acked-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 26e2734eb4d7..136872e77195 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -12,7 +12,7 @@
 
 static int arm_smmu_gr0_ns(int offset)
 {
-   switch(offset) {
+   switch (offset) {
case ARM_SMMU_GR0_sCR0:
case ARM_SMMU_GR0_sACR:
case ARM_SMMU_GR0_sGFSR:
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 0/9] System Cache support for GPU and required SMMU support

2020-11-24 Thread Sai Prakash Ranjan
Some hardware variants contain a system cache or the last level
cache(llc). This cache is typically a large block which is shared
by multiple clients on the SOC. GPU uses the system cache to cache
both the GPU data buffers(like textures) as well the SMMU pagetables.
This helps with improved render performance as well as lower power
consumption by reducing the bus traffic to the system memory.

The system cache architecture allows the cache to be split into slices
which then be used by multiple SOC clients. This patch series is an
effort to enable and use two of those slices preallocated for the GPU,
one for the GPU data buffers and another for the GPU SMMU hardware
pagetables.

Patch 1 - Patch 7 adds system cache support in SMMU and GPU driver.
Patch 8 and 9 are minor cleanups for arm-smmu impl.

Changes in v10:
 * Fix non-strict mode domain attr handling (Will)
 * Split the domain attribute patch into two (Will)

Changes in v9:
 * Change name from domain_attr_io_pgtbl_cfg to io_pgtable_domain_attr (Will)
 * Modify comment for the quirk as suggested (Will)
 * Compare with IO_PGTABLE_QUIRK_NON_STRICT for non-strict mode (Will)

Changes in v8:
 * Introduce a generic domain attribute for pagetable config (Will)
 * Rename quirk to more generic IO_PGTABLE_QUIRK_ARM_OUTER_WBWA (Will)
 * Move non-strict mode to use new struct domain_attr_io_pgtbl_config (Will)

Changes in v7:
 * Squash Jordan's patch to support MMU500 targets
 * Rebase on top of for-joerg/arm-smmu/updates and Jordan's short series for 
adreno-smmu impl

Changes in v6:
 * Move table to arm-smmu-qcom (Robin)

Changes in v5:
 * Drop cleanup of blank lines since it was intentional (Robin)
 * Rebase again on top of msm-next-pgtables as it moves pretty fast

Changes in v4:
 * Drop IOMMU_SYS_CACHE prot flag
 * Rebase on top of 
https://gitlab.freedesktop.org/drm/msm/-/tree/msm-next-pgtables

Changes in v3:
 * Fix domain attribute setting to before iommu_attach_device()
 * Fix few code style and checkpatch warnings
 * Rebase on top of Jordan's latest split pagetables and per-instance
   pagetables support

Changes in v2:
 * Addressed review comments and rebased on top of Jordan's split
   pagetables series

Jordan Crouse (1):
  drm/msm/a6xx: Add support for using system cache on MMU500 based
targets

Sai Prakash Ranjan (6):
  iommu/io-pgtable: Add a domain attribute for pagetable configuration
  iommu/io-pgtable-arm: Add support to use system cache
  iommu/arm-smmu: Add support for pagetable config domain attribute
  iommu/arm-smmu: Move non-strict mode to use io_pgtable_domain_attr
  iommu: arm-smmu-impl: Use table to list QCOM implementations
  iommu: arm-smmu-impl: Add a space before open parenthesis

Sharat Masetty (2):
  drm/msm: rearrange the gpu_rmw() function
  drm/msm/a6xx: Add support for using system cache(LLC)

 drivers/gpu/drm/msm/adreno/a6xx_gpu.c  | 109 +
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h  |   5 +
 drivers/gpu/drm/msm/adreno/adreno_gpu.c|  17 
 drivers/gpu/drm/msm/msm_drv.c  |   8 ++
 drivers/gpu/drm/msm/msm_drv.h  |   1 +
 drivers/gpu/drm/msm/msm_gpu.h  |   5 +-
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |  11 +--
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c |  21 +++-
 drivers/iommu/arm/arm-smmu/arm-smmu.c  |  33 ++-
 drivers/iommu/arm/arm-smmu/arm-smmu.h  |   3 +-
 drivers/iommu/io-pgtable-arm.c |  10 +-
 include/linux/io-pgtable.h |   8 ++
 include/linux/iommu.h  |   1 +
 13 files changed, 205 insertions(+), 27 deletions(-)


base-commit: a29bbb0861f487a5e144dc997a9f71a36c7a2404
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 5/9] drm/msm: rearrange the gpu_rmw() function

2020-11-24 Thread Sai Prakash Ranjan
From: Sharat Masetty 

The register read-modify-write construct is generic enough
that it can be used by other subsystems as needed, create
a more generic rmw() function and have the gpu_rmw() use
this new function.

Signed-off-by: Sharat Masetty 
Reviewed-by: Jordan Crouse 
Signed-off-by: Sai Prakash Ranjan 
---
 drivers/gpu/drm/msm/msm_drv.c | 8 
 drivers/gpu/drm/msm/msm_drv.h | 1 +
 drivers/gpu/drm/msm/msm_gpu.h | 5 +
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 49685571dc0e..a1e22b974b77 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -180,6 +180,14 @@ u32 msm_readl(const void __iomem *addr)
return val;
 }
 
+void msm_rmw(void __iomem *addr, u32 mask, u32 or)
+{
+   u32 val = msm_readl(addr);
+
+   val &= ~mask;
+   msm_writel(val | or, addr);
+}
+
 struct msm_vblank_work {
struct work_struct work;
int crtc_id;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index b9dd8f8f4887..655b3b0424a1 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -478,6 +478,7 @@ void __iomem *msm_ioremap_quiet(struct platform_device 
*pdev, const char *name,
const char *dbgname);
 void msm_writel(u32 data, void __iomem *addr);
 u32 msm_readl(const void __iomem *addr);
+void msm_rmw(void __iomem *addr, u32 mask, u32 or);
 
 struct msm_gpu_submitqueue;
 int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 6c9e1fdc1a76..b2b419277953 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -246,10 +246,7 @@ static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
 
 static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
 {
-   uint32_t val = gpu_read(gpu, reg);
-
-   val &= ~mask;
-   gpu_write(gpu, reg, val | or);
+   msm_rmw(gpu->mmio + (reg << 2), mask, or);
 }
 
 static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 6/9] drm/msm/a6xx: Add support for using system cache(LLC)

2020-11-24 Thread Sai Prakash Ranjan
From: Sharat Masetty 

The last level system cache can be partitioned to 32 different
slices of which GPU has two slices preallocated. One slice is
used for caching GPU buffers and the other slice is used for
caching the GPU SMMU pagetables. This talks to the core system
cache driver to acquire the slice handles, configure the SCID's
to those slices and activates and deactivates the slices upon
GPU power collapse and restore.

Some support from the IOMMU driver is also needed to make use
of the system cache to set the right TCR attributes. GPU then
has the ability to override a few cacheability parameters which
it does to override write-allocate to write-no-allocate as the
GPU hardware does not benefit much from it.

DOMAIN_ATTR_IO_PGTABLE_CFG is another domain level attribute used
by the IOMMU driver for pagetable configuration which will be used
to set a quirk initially to set the right attributes to cache the
hardware pagetables into the system cache.

Signed-off-by: Sharat Masetty 
[saiprakash.ranjan: fix to set attr before device attach to iommu and rebase]
Signed-off-by: Sai Prakash Ranjan 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 83 +
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |  4 ++
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 17 +
 3 files changed, 104 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 948f3656c20c..95c98c642876 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -8,7 +8,9 @@
 #include "a6xx_gpu.h"
 #include "a6xx_gmu.xml.h"
 
+#include 
 #include 
+#include 
 
 #define GPU_PAS_ID 13
 
@@ -1022,6 +1024,79 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
return IRQ_HANDLED;
 }
 
+static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
+{
+   return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
+{
+   return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
+}
+
+static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
+{
+   llcc_slice_deactivate(a6xx_gpu->llc_slice);
+   llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
+{
+   u32 cntl1_regval = 0;
+
+   if (IS_ERR(a6xx_gpu->llc_mmio))
+   return;
+
+   if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
+   u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
+
+   gpu_scid &= 0x1f;
+   cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 
10) |
+  (gpu_scid << 15) | (gpu_scid << 20);
+   }
+
+   if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
+   u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
+
+   gpuhtw_scid &= 0x1f;
+   cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
+   }
+
+   if (cntl1_regval) {
+   /*
+* Program the slice IDs for the various GPU blocks and GPU MMU
+* pagetables
+*/
+   a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, 
cntl1_regval);
+
+   /*
+* Program cacheability overrides to not allocate cache lines on
+* a write miss
+*/
+   a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 
0xF, 0x03);
+   }
+}
+
+static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
+{
+   llcc_slice_putd(a6xx_gpu->llc_slice);
+   llcc_slice_putd(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_slices_init(struct platform_device *pdev,
+   struct a6xx_gpu *a6xx_gpu)
+{
+   a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+   if (IS_ERR(a6xx_gpu->llc_mmio))
+   return;
+
+   a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
+   a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
+
+   if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
+   a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
+}
+
 static int a6xx_pm_resume(struct msm_gpu *gpu)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -1038,6 +1113,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
 
msm_gpu_resume_devfreq(gpu);
 
+   a6xx_llc_activate(a6xx_gpu);
+
return 0;
 }
 
@@ -1048,6 +1125,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
 
trace_msm_gpu_suspend(0);
 
+   a6xx_llc_deactivate(a6xx_gpu);
+
devfreq_suspend_device(gpu->devfreq.devfreq);
 
return a6xx_gmu_stop(a6xx_gpu);
@@ -1091,6 +1170,8 @@ static void a6xx_destroy(struct msm_gpu *gpu)
drm_gem_object_put(a6xx_gpu->shadow_bo);
}
 
+   a6xx_llc_slices_destroy(a6xx_gpu);
+
a6xx_gmu_remove(a6xx_gpu);
 

[PATCHv10 2/9] iommu/io-pgtable-arm: Add support to use system cache

2020-11-24 Thread Sai Prakash Ranjan
Add a quirk IO_PGTABLE_QUIRK_ARM_OUTER_WBWA to override
the outer-cacheability attributes set in the TCR for a
non-coherent page table walker when using system cache.

Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/io-pgtable-arm.c | 10 --
 include/linux/io-pgtable.h |  4 
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index a7a9bc08dcd1..7c9ea9d7874a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -761,7 +761,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
 
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NON_STRICT |
-   IO_PGTABLE_QUIRK_ARM_TTBR1))
+   IO_PGTABLE_QUIRK_ARM_TTBR1 |
+   IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
return NULL;
 
data = arm_lpae_alloc_pgtable(cfg);
@@ -773,10 +774,15 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, 
void *cookie)
tcr->sh = ARM_LPAE_TCR_SH_IS;
tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
+   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)
+   goto out_free_data;
} else {
tcr->sh = ARM_LPAE_TCR_SH_OS;
tcr->irgn = ARM_LPAE_TCR_RGN_NC;
-   tcr->orgn = ARM_LPAE_TCR_RGN_NC;
+   if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
+   tcr->orgn = ARM_LPAE_TCR_RGN_NC;
+   else
+   tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
}
 
tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 215fd9d69540..fb4d5a763e0c 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -86,6 +86,9 @@ struct io_pgtable_cfg {
 *
 * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table
 *  for use in the upper half of a split address space.
+*
+* IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
+*  attributes set in the TCR for a non-coherent page-table walker.
 */
#define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
#define IO_PGTABLE_QUIRK_NO_PERMS   BIT(1)
@@ -93,6 +96,7 @@ struct io_pgtable_cfg {
#define IO_PGTABLE_QUIRK_ARM_MTK_EXTBIT(3)
#define IO_PGTABLE_QUIRK_NON_STRICT BIT(4)
#define IO_PGTABLE_QUIRK_ARM_TTBR1  BIT(5)
+   #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
unsigned long   quirks;
unsigned long   pgsize_bitmap;
unsigned intias;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 4/9] iommu/arm-smmu: Move non-strict mode to use io_pgtable_domain_attr

2020-11-24 Thread Sai Prakash Ranjan
Now that we have a struct io_pgtable_domain_attr with quirks,
use that for non_strict mode as well thereby removing the need
for more members of arm_smmu_domain in the future.

Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 15 +--
 drivers/iommu/arm/arm-smmu/arm-smmu.h |  1 -
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 4b9b10fe50ed..d8979bb71fc0 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -786,9 +786,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
goto out_clear_smmu;
}
 
-   if (smmu_domain->non_strict)
-   pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
-
if (smmu_domain->pgtbl_cfg.quirks)
pgtbl_cfg.quirks |= smmu_domain->pgtbl_cfg.quirks;
 
@@ -1526,9 +1523,12 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
break;
case IOMMU_DOMAIN_DMA:
switch (attr) {
-   case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
-   *(int *)data = smmu_domain->non_strict;
+   case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: {
+   bool non_strict = smmu_domain->pgtbl_cfg.quirks &
+ IO_PGTABLE_QUIRK_NON_STRICT;
+   *(int *)data = non_strict;
return 0;
+   }
default:
return -ENODEV;
}
@@ -1578,7 +1578,10 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
case IOMMU_DOMAIN_DMA:
switch (attr) {
case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
-   smmu_domain->non_strict = *(int *)data;
+   if (*(int *)data)
+   smmu_domain->pgtbl_cfg.quirks |= 
IO_PGTABLE_QUIRK_NON_STRICT;
+   else
+   smmu_domain->pgtbl_cfg.quirks &= 
~IO_PGTABLE_QUIRK_NON_STRICT;
break;
default:
ret = -ENODEV;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index bb5a419f240f..cb7ca3a444c9 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -368,7 +368,6 @@ struct arm_smmu_domain {
const struct iommu_flush_ops*flush_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage  stage;
-   boolnon_strict;
struct mutexinit_mutex; /* Protects smmu pointer */
spinlock_t  cb_lock; /* Serialises ATS1* ops and 
TLB syncs */
struct iommu_domain domain;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 3/9] iommu/arm-smmu: Add support for pagetable config domain attribute

2020-11-24 Thread Sai Prakash Ranjan
Add support for domain attribute DOMAIN_ATTR_IO_PGTABLE_CFG
to get/set pagetable configuration data which initially will
be used to set quirks and later can be extended to include
other pagetable configuration data.

Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 20 
 drivers/iommu/arm/arm-smmu/arm-smmu.h |  1 +
 2 files changed, 21 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 0f28a8614da3..4b9b10fe50ed 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -789,6 +789,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
if (smmu_domain->non_strict)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
+   if (smmu_domain->pgtbl_cfg.quirks)
+   pgtbl_cfg.quirks |= smmu_domain->pgtbl_cfg.quirks;
+
pgtbl_ops = alloc_io_pgtable_ops(fmt, _cfg, smmu_domain);
if (!pgtbl_ops) {
ret = -ENOMEM;
@@ -1511,6 +1514,12 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == 
ARM_SMMU_DOMAIN_NESTED);
return 0;
+   case DOMAIN_ATTR_IO_PGTABLE_CFG: {
+   struct io_pgtable_domain_attr *pgtbl_cfg = data;
+   *pgtbl_cfg = smmu_domain->pgtbl_cfg;
+
+   return 0;
+   }
default:
return -ENODEV;
}
@@ -1551,6 +1560,17 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
+   case DOMAIN_ATTR_IO_PGTABLE_CFG: {
+   struct io_pgtable_domain_attr *pgtbl_cfg = data;
+
+   if (smmu_domain->smmu) {
+   ret = -EPERM;
+   goto out_unlock;
+   }
+
+   smmu_domain->pgtbl_cfg = *pgtbl_cfg;
+   break;
+   }
default:
ret = -ENODEV;
}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 04288b6fc619..bb5a419f240f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -364,6 +364,7 @@ enum arm_smmu_domain_stage {
 struct arm_smmu_domain {
struct arm_smmu_device  *smmu;
struct io_pgtable_ops   *pgtbl_ops;
+   struct io_pgtable_domain_attr   pgtbl_cfg;
const struct iommu_flush_ops*flush_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage  stage;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCHv10 1/9] iommu/io-pgtable: Add a domain attribute for pagetable configuration

2020-11-24 Thread Sai Prakash Ranjan
Add a new iommu domain attribute DOMAIN_ATTR_IO_PGTABLE_CFG
for pagetable configuration which initially will be used to
set quirks like for system cache aka last level cache to be
used by client drivers like GPU to set right attributes for
caching the hardware pagetables into the system cache and
later can be extended to include other page table configuration
data.

Signed-off-by: Sai Prakash Ranjan 
---
 include/linux/io-pgtable.h | 4 
 include/linux/iommu.h  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 4cde111e425b..215fd9d69540 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -208,6 +208,10 @@ struct io_pgtable {
 
 #define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
 
+struct io_pgtable_domain_attr {
+   unsigned long quirks;
+};
+
 static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
 {
iop->cfg.tlb->tlb_flush_all(iop->cookie);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b95a6f8db6ff..ffaa389ea128 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -118,6 +118,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+   DOMAIN_ATTR_IO_PGTABLE_CFG,
DOMAIN_ATTR_MAX,
 };
 
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv9 2/8] iommu/arm-smmu: Add domain attribute for pagetable configuration

2020-11-24 Thread Sai Prakash Ranjan

On 2020-11-25 03:11, Will Deacon wrote:

On Mon, Nov 23, 2020 at 10:35:55PM +0530, Sai Prakash Ranjan wrote:

Add iommu domain attribute for pagetable configuration which
initially will be used to set quirks like for system cache aka
last level cache to be used by client drivers like GPU to set
right attributes for caching the hardware pagetables into the
system cache and later can be extended to include other page
table configuration data.

Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 20 
 drivers/iommu/arm/arm-smmu/arm-smmu.h |  1 +
 include/linux/io-pgtable.h|  4 
 include/linux/iommu.h |  1 +
 4 files changed, 26 insertions(+)


Given that we're heading for a v10 to address my comments on patch 3,
then I guess you may as well split this into two patches so that I can
share just the atttibute with Rob rather than the driver parts.

Please keep it all as one series though, with the common parts at the
beginning, and I'll figure it out.



Ok I will split up and send v10.

Thanks,
Sai

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member

of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as broken

2020-11-24 Thread Merger, Edgar [AUTOSOL/MAS/AUGS]
I see that problem only on systems that use a R1305G APU

sudo cat /sys/kernel/debug/dri/0/amdgpu_firmware_info

shows

VCE feature version: 0, firmware version: 0x
UVD feature version: 0, firmware version: 0x
MC feature version: 0, firmware version: 0x
ME feature version: 50, firmware version: 0x00a3
PFP feature version: 50, firmware version: 0x00bb
CE feature version: 50, firmware version: 0x004f
RLC feature version: 1, firmware version: 0x0049
RLC SRLC feature version: 1, firmware version: 0x0001
RLC SRLG feature version: 1, firmware version: 0x0001
RLC SRLS feature version: 1, firmware version: 0x0001
MEC feature version: 50, firmware version: 0x01b5
MEC2 feature version: 50, firmware version: 0x01b5
SOS feature version: 0, firmware version: 0x
ASD feature version: 0, firmware version: 0x2130
TA XGMI feature version: 0, firmware version: 0x
TA RAS feature version: 0, firmware version: 0x
SMC feature version: 0, firmware version: 0x2527
SDMA0 feature version: 41, firmware version: 0x00a9
VCN feature version: 0, firmware version: 0x0110901c
DMCU feature version: 0, firmware version: 0x0001
VBIOS version: 113-RAVEN2-117

We are also using V1404I APU on the same boards and I haven´t seen the issue on 
those boards

These boards give me slightly different info: sudo cat 
/sys/kernel/debug/dri/0/amdgpu_firmware_info
 
VCE feature version: 0, firmware version: 0x
UVD feature version: 0, firmware version: 0x
MC feature version: 0, firmware version: 0x
ME feature version: 47, firmware version: 0x00a2
PFP feature version: 47, firmware version: 0x00b9
CE feature version: 47, firmware version: 0x004e
RLC feature version: 1, firmware version: 0x0213
RLC SRLC feature version: 1, firmware version: 0x0001
RLC SRLG feature version: 1, firmware version: 0x0001
RLC SRLS feature version: 1, firmware version: 0x0001
MEC feature version: 47, firmware version: 0x01ab
MEC2 feature version: 47, firmware version: 0x01ab
SOS feature version: 0, firmware version: 0x
ASD feature version: 0, firmware version: 0x2113
TA XGMI feature version: 0, firmware version: 0x
TA RAS feature version: 0, firmware version: 0x
SMC feature version: 0, firmware version: 0x1e5b
SDMA0 feature version: 41, firmware version: 0x00a9
VCN feature version: 0, firmware version: 0x0110901c
DMCU feature version: 0, firmware version: 0x
VBIOS version: 113-RAVEN-116




00:00.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Root 
Complex
00:00.2 IOMMU: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 IOMMU
00:01.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Family 17h (Models 
00h-1fh) PCIe Dummy Host Bridge
00:01.1 PCI bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 PCIe GPP 
Bridge [6:0]
00:01.2 PCI bridge: Advanced Micro Devices, Inc. [AMD] Zeppelin Switch Upstream 
(PCIE SW.US)
00:01.4 PCI bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 PCIe GPP 
Bridge [6:0]
00:01.5 PCI bridge: Advanced Micro Devices, Inc. [AMD] Zeppelin Switch Upstream 
(PCIE SW.US)
00:08.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Family 17h (Models 
00h-1fh) PCIe Dummy Host Bridge
00:08.1 PCI bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Internal 
PCIe GPP Bridge 0 to Bus A
00:08.2 PCI bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Internal 
PCIe GPP Bridge 0 to Bus B
00:14.0 SMBus: Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller (rev 61)
00:14.3 ISA bridge: Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge (rev 51)
00:18.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 0
00:18.1 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 1
00:18.2 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 2
00:18.3 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 3
00:18.4 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 4
00:18.5 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 5
00:18.6 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 6
00:18.7 Host bridge: Advanced Micro Devices, Inc. [AMD] Raven/Raven2 Device 24: 
Function 7
01:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 
PCI Express Gigabit Ethernet Controller (rev 0e)
01:00.1 Serial controller: Realtek Semiconductor Co., Ltd. Device 816a (rev 0e)
01:00.2 Serial controller: Realtek Semiconductor Co., Ltd. Device 816b (rev 0e)
01:00.3 IPMI Interface: Realtek Semiconductor Co., Ltd. Device 816c (rev 0e)
01:00.4 USB controller: Realtek Semiconductor Co., Ltd. Device 816d (rev 0e)
02:00.0 Ethernet controller: Intel Corporation I210 Gigabit Network Connection 
(rev 03)
03:00.0 PCI bridge: Pericom Semiconductor PI7C9X2G608GP PCIe2 

Re: [PATCHv9 3/8] iommu/arm-smmu: Move non-strict mode to use io_pgtable_domain_attr

2020-11-24 Thread Sai Prakash Ranjan

On 2020-11-25 03:09, Will Deacon wrote:

On Mon, Nov 23, 2020 at 10:35:56PM +0530, Sai Prakash Ranjan wrote:

Now that we have a struct io_pgtable_domain_attr with quirks,
use that for non_strict mode as well thereby removing the need
for more members of arm_smmu_domain in the future.

Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 8 +++-
 drivers/iommu/arm/arm-smmu/arm-smmu.h | 1 -
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c

index 4b9b10fe50ed..f56f266ebdf7 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -786,9 +786,6 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,

goto out_clear_smmu;
}

-   if (smmu_domain->non_strict)
-   pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
-
if (smmu_domain->pgtbl_cfg.quirks)
pgtbl_cfg.quirks |= smmu_domain->pgtbl_cfg.quirks;

@@ -1527,7 +1524,8 @@ static int arm_smmu_domain_get_attr(struct 
iommu_domain *domain,

case IOMMU_DOMAIN_DMA:
switch (attr) {
case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
-   *(int *)data = smmu_domain->non_strict;
+   if (smmu_domain->pgtbl_cfg.quirks & 
IO_PGTABLE_QUIRK_NON_STRICT)
+   *(int *)data = smmu_domain->pgtbl_cfg.quirks;


I still don't think this is right :(
We need to set *data to 1 or 0 depending on whether or not the 
non-strict

quirk is set, i.e:

	bool non_strict = smmu_domain->pgtbl_cfg.quirks & 
IO_PGTABLE_QUIRK_NON_STRICT;

*(int *)data = non_strict;

Your code above leaves *data uninitialised if non_strict is not set.


Ugh sorry, I should have looked at this some more before hurrying up
to post, will fix it.




return 0;
default:
return -ENODEV;
@@ -1578,7 +1576,7 @@ static int arm_smmu_domain_set_attr(struct 
iommu_domain *domain,

case IOMMU_DOMAIN_DMA:
switch (attr) {
case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
-   smmu_domain->non_strict = *(int *)data;
+   smmu_domain->pgtbl_cfg.quirks |= 
IO_PGTABLE_QUIRK_NON_STRICT;


And this is broken because if *data is 0, then you _set_ the quirk, 
which is

the opposite of what we should be doing.

In other words, although the implementation has changed, the semantics 
have

not.



Will fix this to have quirk set only when *data = 1 and unset in case of 
0.


Thanks,
Sai

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
member

of Code Aurora Forum, hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: fix return error code in iommu_probe_device()

2020-11-24 Thread Yang Yingliang


On 2020/11/18 6:41, Will Deacon wrote:

On Tue, Nov 17, 2020 at 07:11:28PM +0800, Yang Yingliang wrote:

On 2020/11/17 17:40, Lu Baolu wrote:

On 2020/11/17 10:52, Yang Yingliang wrote:

If iommu_group_get() failed, it need return error code
in iommu_probe_device().

Fixes: cf193888bfbd ("iommu: Move new probe_device path...")
Reported-by: Hulk Robot 
Signed-off-by: Yang Yingliang 
---
   drivers/iommu/iommu.c | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b53446bb8c6b..6f4a32df90f6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -253,8 +253,10 @@ int iommu_probe_device(struct device *dev)
   goto err_out;
     group = iommu_group_get(dev);
-    if (!group)
+    if (!group) {
+    ret = -ENODEV;

Can you please explain why you use -ENODEV here?

Before 79659190ee97 ("iommu: Don't take group reference in
iommu_alloc_default_domain()"), in

iommu_alloc_default_domain(), if group is NULL, it will return -ENODEV.

Hmm. While I think the patch is ok, I'm not sure it qualifies as a fix.
Has iommu_probe_device() ever propagated this error? The commit you
identify in the 'Fixes:' tag doesn't seem to change this afaict.


I think after this commit 439945e74a4b ("iommu: Move default domain 
allocation to iommu_probe_device()"),


iommu_probe_device() won't return error code if group is NULL. I can add 
this fix tag in v2.





Will
.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 1/1] x86/tboot: Don't disable swiotlb when iommu is forced on

2020-11-24 Thread Lu Baolu
After commit 327d5b2fee91c ("iommu/vt-d: Allow 32bit devices to uses DMA
domain"), swiotbl could also be used for direct memory access if IOMMU
is enabled but a device is configured to pass through the DMA translation.
Keep swiotlb when IOMMU is forced on, otherwise, some devices won't work
if "iommu=pt" kernel parameter is used.

Fixes: 327d5b2fee91c ("iommu/vt-d: Allow 32bit devices to uses DMA domain")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210237
Reported-and-tested-by: Adrian Huang 
Signed-off-by: Lu Baolu 
---
 arch/x86/kernel/tboot.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 420be871d9d4..ae64f98ec2ab 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,13 +514,10 @@ int tboot_force_iommu(void)
if (!tboot_enabled())
return 0;
 
-   if (no_iommu || swiotlb || dmar_disabled)
+   if (no_iommu || dmar_disabled)
pr_warn("Forcing Intel-IOMMU to enabled\n");
 
dmar_disabled = 0;
-#ifdef CONFIG_SWIOTLB
-   swiotlb = 0;
-#endif
no_iommu = 0;
 
return 1;
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 12/13] iommu/arm-smmu-v3: Implement iommu_sva_bind/unbind()

2020-11-24 Thread Jason Gunthorpe
On Fri, Sep 18, 2020 at 12:18:52PM +0200, Jean-Philippe Brucker wrote:

> +/* Allocate or get existing MMU notifier for this {domain, mm} pair */
> +static struct arm_smmu_mmu_notifier *
> +arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
> +   struct mm_struct *mm)
> +{
> + int ret;
> + struct arm_smmu_ctx_desc *cd;
> + struct arm_smmu_mmu_notifier *smmu_mn;
> +
> + list_for_each_entry(smmu_mn, _domain->mmu_notifiers, list) {
> + if (smmu_mn->mn.mm == mm) {
> + refcount_inc(_mn->refs);
> + return smmu_mn;
> + }
> + }
> +
> + cd = arm_smmu_alloc_shared_cd(mm);
> + if (IS_ERR(cd))
> + return ERR_CAST(cd);
> +
> + smmu_mn = kzalloc(sizeof(*smmu_mn), GFP_KERNEL);
> + if (!smmu_mn) {
> + ret = -ENOMEM;
> + goto err_free_cd;
> + }
> +
> + refcount_set(_mn->refs, 1);
> + smmu_mn->cd = cd;
> + smmu_mn->domain = smmu_domain;
> + smmu_mn->mn.ops = _smmu_mmu_notifier_ops;
> +
> + ret = mmu_notifier_register(_mn->mn, mm);
> + if (ret) {
> + kfree(smmu_mn);
> + goto err_free_cd;
> + }

I suppose this hasn't been applied yet, but someone asked me to look
at this series..

Why did you drop the change to mmu_notifier_get here? I'm strongly
trying to discourage static lists matching mm's like smmu_mn is
doing. This is handled by the core code, don't open code it..

Thanks,
Jason
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-11-24 Thread Ashish Kalra
Hello Konrad, 

On Mon, Nov 23, 2020 at 10:56:31PM +, Ashish Kalra wrote:
> Hello Konrad,
> 
> On Mon, Nov 23, 2020 at 12:56:32PM -0500, Konrad Rzeszutek Wilk wrote:
> > On Mon, Nov 23, 2020 at 06:06:47PM +0100, Borislav Petkov wrote:
> > > On Thu, Nov 19, 2020 at 09:42:05PM +, Ashish Kalra wrote:
> > > > From: Ashish Kalra 
> > > > 
> > > > For SEV, all DMA to and from guest has to use shared (un-encrypted) 
> > > > pages.
> > > > SEV uses SWIOTLB to make this happen without requiring changes to device
> > > > drivers.  However, depending on workload being run, the default 64MB of
> > > > SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
> > > > for DMA, resulting in I/O errors and/or performance degradation for
> > > > high I/O workloads.
> > > > 
> > > > Increase the default size of SWIOTLB for SEV guests using a minimum
> > > > value of 128MB and a maximum value of 512MB, determining on amount
> > > > of provisioned guest memory.
> > > 
> > > That sentence needs massaging.
> > > 
> > > > Using late_initcall() interface to invoke swiotlb_adjust() does not
> > > > work as the size adjustment needs to be done before mem_encrypt_init()
> > > > and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> > > > hence calling it explicitly from setup_arch().
> > > 
> > > "hence call it ... "
> > > 
> > > > 
> > > > The SWIOTLB default size adjustment is added as an architecture specific
> > > 
> > > "... is added... " needs to be "Add ..."
> > > 
> > > > interface/callback to allow architectures such as those supporting 
> > > > memory
> > > > encryption to adjust/expand SWIOTLB size for their use.
> > > > 
> > > > v5 fixed build errors and warnings as
> > > > Reported-by: kbuild test robot 
> > > > 
> > > > Signed-off-by: Ashish Kalra 
> > > > ---
> > > >  arch/x86/kernel/setup.c   |  2 ++
> > > >  arch/x86/mm/mem_encrypt.c | 32 
> > > >  include/linux/swiotlb.h   |  6 ++
> > > >  kernel/dma/swiotlb.c  | 24 
> > > >  4 files changed, 64 insertions(+)
> > > > 
> > > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > > > index 3511736fbc74..b073d58dd4a3 100644
> > > > --- a/arch/x86/kernel/setup.c
> > > > +++ b/arch/x86/kernel/setup.c
> > > > @@ -1166,6 +1166,8 @@ void __init setup_arch(char **cmdline_p)
> > > > if (boot_cpu_has(X86_FEATURE_GBPAGES))
> > > > hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
> > > >  
> > > > +   swiotlb_adjust();
> > > > +
> > > > /*
> > > >  * Reserve memory for crash kernel after SRAT is parsed so that 
> > > > it
> > > >  * won't consume hotpluggable memory.
> > > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> > > > index 3f248f0d0e07..c79a0d761db5 100644
> > > > --- a/arch/x86/mm/mem_encrypt.c
> > > > +++ b/arch/x86/mm/mem_encrypt.c
> > > > @@ -490,6 +490,38 @@ static void print_mem_encrypt_feature_info(void)
> > > >  }
> > > >  
> > > >  /* Architecture __weak replacement functions */
> > > > +unsigned long __init arch_swiotlb_adjust(unsigned long 
> > > > iotlb_default_size)
> > > > +{
> > > > +   unsigned long size = 0;
> > > 
> > >   unsigned long size = iotlb_default_size;
> > > 
> > > > +
> > > > +   /*
> > > > +* For SEV, all DMA has to occur via shared/unencrypted pages.
> > > > +* SEV uses SWOTLB to make this happen without changing device
> > > > +* drivers. However, depending on the workload being run, the
> > > > +* default 64MB of SWIOTLB may not be enough & SWIOTLB may
> > >^
> > > 
> > > Use words pls, not "&".
> > > 
> > > 
> > > > +* run out of buffers for DMA, resulting in I/O errors and/or
> > > > +* performance degradation especially with high I/O workloads.
> > > > +* Increase the default size of SWIOTLB for SEV guests using
> > > > +* a minimum value of 128MB and a maximum value of 512MB,
> > > > +* depending on amount of provisioned guest memory.
> > > > +*/
> > > > +   if (sev_active()) {
> > > > +   phys_addr_t total_mem = memblock_phys_mem_size();
> > > > +
> > > > +   if (total_mem <= SZ_1G)
> > > > +   size = max(iotlb_default_size, (unsigned long) 
> > > > SZ_128M);
> > > > +   else if (total_mem <= SZ_4G)
> > > > +   size = max(iotlb_default_size, (unsigned long) 
> > > > SZ_256M);
> > 
> > That is eating 128MB for 1GB, aka 12% of the guest memory allocated 
> > statically for this.
> > 
> > And for guests that are 2GB, that is 12% until it gets to 3GB when it is 8%
> > and then 6% at 4GB.
> > 
> > I would prefer this to be based on your memory count, that is 6% of total
> > memory. And then going forward we can allocate memory _after_ boot and then 
> > stich
> > the late SWIOTLB pool and allocate on demand.
> > 
> > 
> Ok. 
> 
> As i 

Re: [PATCH RESEND 0/5] iommu/tegra-smmu: Some pending reviewed changes

2020-11-24 Thread Nicolin Chen
On Wed, Nov 25, 2020 at 02:05:14AM +0300, Dmitry Osipenko wrote:
> 25.11.2020 00:21, Nicolin Chen пишет:
> > Hi Joerg,
> > 
> > These five patches were acked by Thierry and acked-n-tested by
> > Dmitry a while ago. Would it be possible for you to apply them?
> > 
> > Thanks!
> 
> Hi,
> 
> You probably should try to ping Will Deacon.
> 
> https://lkml.org/lkml/2020/11/17/243

Thank you, Dmitry.
--

Will, would it be possible for you to take these changes?

I sent them on Nov 11 to the following lists:
linux-ker...@vger.kernel.org
iommu@lists.linux-foundation.org

If you need me to resend it again by adding you in To line,
please kindly let me know.

Thanks
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH RESEND 0/5] iommu/tegra-smmu: Some pending reviewed changes

2020-11-24 Thread Dmitry Osipenko
25.11.2020 00:21, Nicolin Chen пишет:
> Hi Joerg,
> 
> These five patches were acked by Thierry and acked-n-tested by
> Dmitry a while ago. Would it be possible for you to apply them?
> 
> Thanks!

Hi,

You probably should try to ping Will Deacon.

https://lkml.org/lkml/2020/11/17/243
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCHv8 0/8] System Cache support for GPU and required SMMU support

2020-11-24 Thread Rob Clark
On Tue, Nov 24, 2020 at 1:43 PM Will Deacon  wrote:
>
> On Tue, Nov 24, 2020 at 11:05:39AM -0800, Rob Clark wrote:
> > On Tue, Nov 24, 2020 at 3:10 AM Will Deacon  wrote:
> > > On Tue, Nov 24, 2020 at 09:32:54AM +0530, Sai Prakash Ranjan wrote:
> > > > On 2020-11-24 00:52, Rob Clark wrote:
> > > > > On Mon, Nov 23, 2020 at 9:01 AM Sai Prakash Ranjan
> > > > >  wrote:
> > > > > > On 2020-11-23 20:51, Will Deacon wrote:
> > > > > > > Modulo some minor comments I've made, this looks good to me. What 
> > > > > > > is
> > > > > > > the
> > > > > > > plan for merging it? I can take the IOMMU parts, but patches 4-6 
> > > > > > > touch
> > > > > > > the
> > > > > > > MSM GPU driver and I'd like to avoid conflicts with that.
> > > > > > >
> > > > > >
> > > > > > SMMU bits are pretty much independent and GPU relies on the domain
> > > > > > attribute
> > > > > > and the quirk exposed, so as long as SMMU changes go in first it
> > > > > > should
> > > > > > be good.
> > > > > > Rob?
> > > > >
> > > > > I suppose one option would be to split out the patch that adds the
> > > > > attribute into it's own patch, and merge that both thru drm and iommu?
> > > > >
> > > >
> > > > Ok I can split out domain attr and quirk into its own patch if Will is
> > > > fine with that approach.
> > >
> > > Why don't I just queue the first two patches on their own branch and we
> > > both pull that?
> >
> > Ok, that works for me.  I normally base msm-next on -rc1 but I guess
> > as long as we base the branch on the older or our two -next branches,
> > that should work out nicely
>
> Turns out we're getting a v10 of Sai's stuff, so I've asked him to split
> patch two up anyway. Then I'll make a branch based on -rc1 that we can
> both pull.

Sounds good, thx

BR,
-R
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] [PATCH] Adding offset keeping option when mapping data via SWIOTLB.

2020-11-24 Thread Konrad Rzeszutek Wilk
On Mon, Nov 23, 2020 at 02:18:07PM -0800, Jianxiong Gao wrote:
> NVMe driver and other applications may depend on the data offset
> to operate correctly. Currently when unaligned data is mapped via
> SWIOTLB, the data is mapped as slab aligned with the SWIOTLB. When
> booting with --swiotlb=force option and using NVMe as interface,
> running mkfs.xfs on Rhel fails because of the unalignment issue.

RHEL? So a specific RHEL kernel. Is there a Red Hat bug created
for this that can be linked to this patch to make it easier
for folks to figure this?

Why would you be using swiotlb=force?
Ah, you are using AMD SEV!

> This patch adds an option to make sure the mapped data preserves
> its offset of the orginal addrss. Tested on latest kernel that

s/addrss/address/
> this patch fixes the issue.
> 
> Signed-off-by: Jianxiong Gao 
> Acked-by: David Rientjes 
> ---
>  drivers/nvme/host/pci.c |  3 ++-
>  include/linux/dma-mapping.h |  8 
>  kernel/dma/swiotlb.c| 13 +
>  3 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 0578ff253c47..a366fb8a1ff0 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -833,7 +833,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, 
> struct request *req,
>   iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN);
>   else
>   nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
> -  rq_dma_dir(req), DMA_ATTR_NO_WARN);
> + rq_dma_dir(req),
> + DMA_ATTR_NO_WARN|DMA_ATTR_SWIOTLB_KEEP_OFFSET);
>   if (!nr_mapped)
>   goto out;
>  
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 956151052d45..e46d23d9fa20 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -61,6 +61,14 @@
>   */
>  #define DMA_ATTR_PRIVILEGED  (1UL << 9)
>  
> +/*
> + * DMA_ATTR_SWIOTLB_KEEP_OFFSET: used to indicate that the buffer has to keep
> + * its offset when mapped via SWIOTLB. Some application functionality depends
> + * on the address offset, thus when buffers are mapped via SWIOTLB, the 
> offset
> + * needs to be preserved.
> + */
> +#define DMA_ATTR_SWIOTLB_KEEP_OFFSET (1UL << 10)
> +
>  /*
>   * A dma_addr_t can hold any valid DMA or bus address for the platform.  It 
> can
>   * be given to a device to use as a DMA source or target.  It is specific to 
> a
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 781b9dca197c..f43d7be1342d 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -483,6 +483,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 
> phys_addr_t orig_addr,
>   max_slots = mask + 1
>   ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
>   : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
> + 
> + /*
> +  * If we need to keep the offset when mapping, we need to add the offset
> +  * to the total set we need to allocate in SWIOTLB
> +  */
> + if (attrs & DMA_ATTR_SWIOTLB_KEEP_OFFSET)
> + alloc_size += offset_in_page(orig_addr);
>  
>   /*
>* For mappings greater than or equal to a page, we limit the stride
> @@ -567,6 +574,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, 
> phys_addr_t orig_addr,
>*/
>   for (i = 0; i < nslots; i++)
>   io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
> + /*
> +  * When keeping the offset of the original data, we need to advance
> +  * the tlb_addr by the offset of orig_addr.
> +  */
> + if (attrs & DMA_ATTR_SWIOTLB_KEEP_OFFSET)
> + tlb_addr += orig_addr & (PAGE_SIZE - 1);
>   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
>   (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
>   swiotlb_bounce(orig_addr, tlb_addr, mapping_size, 
> DMA_TO_DEVICE);
> -- 
> 2.27.0
> 
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv8 0/8] System Cache support for GPU and required SMMU support

2020-11-24 Thread Will Deacon
On Tue, Nov 24, 2020 at 11:05:39AM -0800, Rob Clark wrote:
> On Tue, Nov 24, 2020 at 3:10 AM Will Deacon  wrote:
> > On Tue, Nov 24, 2020 at 09:32:54AM +0530, Sai Prakash Ranjan wrote:
> > > On 2020-11-24 00:52, Rob Clark wrote:
> > > > On Mon, Nov 23, 2020 at 9:01 AM Sai Prakash Ranjan
> > > >  wrote:
> > > > > On 2020-11-23 20:51, Will Deacon wrote:
> > > > > > Modulo some minor comments I've made, this looks good to me. What is
> > > > > > the
> > > > > > plan for merging it? I can take the IOMMU parts, but patches 4-6 
> > > > > > touch
> > > > > > the
> > > > > > MSM GPU driver and I'd like to avoid conflicts with that.
> > > > > >
> > > > >
> > > > > SMMU bits are pretty much independent and GPU relies on the domain
> > > > > attribute
> > > > > and the quirk exposed, so as long as SMMU changes go in first it
> > > > > should
> > > > > be good.
> > > > > Rob?
> > > >
> > > > I suppose one option would be to split out the patch that adds the
> > > > attribute into it's own patch, and merge that both thru drm and iommu?
> > > >
> > >
> > > Ok I can split out domain attr and quirk into its own patch if Will is
> > > fine with that approach.
> >
> > Why don't I just queue the first two patches on their own branch and we
> > both pull that?
> 
> Ok, that works for me.  I normally base msm-next on -rc1 but I guess
> as long as we base the branch on the older or our two -next branches,
> that should work out nicely

Turns out we're getting a v10 of Sai's stuff, so I've asked him to split
patch two up anyway. Then I'll make a branch based on -rc1 that we can
both pull.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv9 2/8] iommu/arm-smmu: Add domain attribute for pagetable configuration

2020-11-24 Thread Will Deacon
On Mon, Nov 23, 2020 at 10:35:55PM +0530, Sai Prakash Ranjan wrote:
> Add iommu domain attribute for pagetable configuration which
> initially will be used to set quirks like for system cache aka
> last level cache to be used by client drivers like GPU to set
> right attributes for caching the hardware pagetables into the
> system cache and later can be extended to include other page
> table configuration data.
> 
> Signed-off-by: Sai Prakash Ranjan 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c | 20 
>  drivers/iommu/arm/arm-smmu/arm-smmu.h |  1 +
>  include/linux/io-pgtable.h|  4 
>  include/linux/iommu.h |  1 +
>  4 files changed, 26 insertions(+)

Given that we're heading for a v10 to address my comments on patch 3,
then I guess you may as well split this into two patches so that I can
share just the atttibute with Rob rather than the driver parts.

Please keep it all as one series though, with the common parts at the
beginning, and I'll figure it out.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv9 3/8] iommu/arm-smmu: Move non-strict mode to use io_pgtable_domain_attr

2020-11-24 Thread Will Deacon
On Mon, Nov 23, 2020 at 10:35:56PM +0530, Sai Prakash Ranjan wrote:
> Now that we have a struct io_pgtable_domain_attr with quirks,
> use that for non_strict mode as well thereby removing the need
> for more members of arm_smmu_domain in the future.
> 
> Signed-off-by: Sai Prakash Ranjan 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c | 8 +++-
>  drivers/iommu/arm/arm-smmu/arm-smmu.h | 1 -
>  2 files changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
> b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 4b9b10fe50ed..f56f266ebdf7 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -786,9 +786,6 @@ static int arm_smmu_init_domain_context(struct 
> iommu_domain *domain,
>   goto out_clear_smmu;
>   }
>  
> - if (smmu_domain->non_strict)
> - pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
> -
>   if (smmu_domain->pgtbl_cfg.quirks)
>   pgtbl_cfg.quirks |= smmu_domain->pgtbl_cfg.quirks;
>  
> @@ -1527,7 +1524,8 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
> *domain,
>   case IOMMU_DOMAIN_DMA:
>   switch (attr) {
>   case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
> - *(int *)data = smmu_domain->non_strict;
> + if (smmu_domain->pgtbl_cfg.quirks & 
> IO_PGTABLE_QUIRK_NON_STRICT)
> + *(int *)data = smmu_domain->pgtbl_cfg.quirks;

I still don't think this is right :(
We need to set *data to 1 or 0 depending on whether or not the non-strict
quirk is set, i.e:

bool non_strict = smmu_domain->pgtbl_cfg.quirks & 
IO_PGTABLE_QUIRK_NON_STRICT;
*(int *)data = non_strict;

Your code above leaves *data uninitialised if non_strict is not set.

>   return 0;
>   default:
>   return -ENODEV;
> @@ -1578,7 +1576,7 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
> *domain,
>   case IOMMU_DOMAIN_DMA:
>   switch (attr) {
>   case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
> - smmu_domain->non_strict = *(int *)data;
> + smmu_domain->pgtbl_cfg.quirks |= 
> IO_PGTABLE_QUIRK_NON_STRICT;

And this is broken because if *data is 0, then you _set_ the quirk, which is
the opposite of what we should be doing.

In other words, although the implementation has changed, the semantics have
not.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

2020-11-24 Thread Alex Williamson
On Mon, 16 Nov 2020 12:00:18 +0100
Eric Auger  wrote:

> From: "Liu, Yi L" 
> 
> This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl
> which aims to pass the virtual iommu guest configuration
> to the host. This latter takes the form of the so-called
> PASID table.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Liu, Yi L 
> Signed-off-by: Eric Auger 
> 
> ---
> v11 -> v12:
> - use iommu_uapi_set_pasid_table
> - check SET and UNSET are not set simultaneously (Zenghui)
> 
> v8 -> v9:
> - Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single
>   VFIO_IOMMU_SET_PASID_TABLE ioctl.
> 
> v6 -> v7:
> - add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE
> 
> v3 -> v4:
> - restore ATTACH/DETACH
> - add unwind on failure
> 
> v2 -> v3:
> - s/BIND_PASID_TABLE/SET_PASID_TABLE
> 
> v1 -> v2:
> - s/BIND_GUEST_STAGE/BIND_PASID_TABLE
> - remove the struct device arg
> ---
>  drivers/vfio/vfio_iommu_type1.c | 65 +
>  include/uapi/linux/vfio.h   | 19 ++
>  2 files changed, 84 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 67e827638995..87ddd9e882dc 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct 
> vfio_iommu *iommu,
>   return ret;
>  }
>  
> +static void
> +vfio_detach_pasid_table(struct vfio_iommu *iommu)
> +{
> + struct vfio_domain *d;
> +
> + mutex_lock(>lock);
> + list_for_each_entry(d, >domain_list, next)
> + iommu_detach_pasid_table(d->domain);
> +
> + mutex_unlock(>lock);
> +}
> +
> +static int
> +vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg)
> +{
> + struct vfio_domain *d;
> + int ret = 0;
> +
> + mutex_lock(>lock);
> +
> + list_for_each_entry(d, >domain_list, next) {
> + ret = iommu_uapi_attach_pasid_table(d->domain, (void __user 
> *)arg);
> + if (ret)
> + goto unwind;
> + }
> + goto unlock;
> +unwind:
> + list_for_each_entry_continue_reverse(d, >domain_list, next) {
> + iommu_detach_pasid_table(d->domain);
> + }
> +unlock:

This goto leap frog could be avoided with just:

list_for_each_entry(d, >domain_list, next) {
ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
if (ret) {
list_for_each_entry_continue_reverse(d, >domain_list, 
next) {
iommu_detach_pasid_table(d->domain);
}
break;
}
}

> + mutex_unlock(>lock);
> + return ret;
> +}
> +
>  static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
>  struct vfio_info_cap *caps)
>  {
> @@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct 
> vfio_iommu *iommu,
>   -EFAULT : 0;
>  }
>  
> +static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu,
> + unsigned long arg)
> +{
> + struct vfio_iommu_type1_set_pasid_table spt;
> + unsigned long minsz;
> + int ret = -EINVAL;
> +
> + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags);
> +
> + if (copy_from_user(, (void __user *)arg, minsz))
> + return -EFAULT;
> +
> + if (spt.argsz < minsz)
> + return -EINVAL;
> +
> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET &&
> + spt.flags & VFIO_PASID_TABLE_FLAG_UNSET)
> + return -EINVAL;
> +
> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET)
> + ret = vfio_attach_pasid_table(iommu, arg + minsz);
> + else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) {
> + vfio_detach_pasid_table(iommu);
> + ret = 0;
> + }

This doesn't really validate that the other flag bits are zero, ex.
user could pass flags = (1 << 8) | VFIO_PASID_TABLE_FLAG_SET and we'd
just ignore the extra bit.  So this probably needs to be:

if (spt.flags == VFIO_PASID_TABLE_FLAG_SET)
ret = vfio_attach_pasid_table(iommu, arg + minsz);
else if (spt.flags == VFIO_PASID_TABLE_FLAG_UNSET)
vfio_detach_pasid_table(iommu);

Or otherwise validate that none of the other bits are set.  It also
seems cleaner to me to set the initial value of ret = 0 and end this
with:

else
ret = -EINVAL;


> + return ret;
> +}
> +
>  static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>   unsigned long arg)
>  {
> @@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>   return vfio_iommu_type1_unmap_dma(iommu, arg);
>   case VFIO_IOMMU_DIRTY_PAGES:
>   return vfio_iommu_type1_dirty_pages(iommu, arg);
> + case VFIO_IOMMU_SET_PASID_TABLE:
> + return vfio_iommu_type1_set_pasid_table(iommu, arg);
>   default:
>   return -ENOTTY;
>   }
> diff --git 

Re: [PATCH RESEND 0/5] iommu/tegra-smmu: Some pending reviewed changes

2020-11-24 Thread Nicolin Chen
Hi Joerg,

These five patches were acked by Thierry and acked-n-tested by
Dmitry a while ago. Would it be possible for you to apply them?

Thanks!

On Wed, Nov 11, 2020 at 02:21:24PM -0800, Nicolin Chen wrote:
> This is a merged set of resend for previously two series of patches
> that were reviewed/acked a month ago yet have not got applied.
> 
> Series-1: https://lkml.org/lkml/2020/9/29/73
> "[PATCH v4 0/2] iommu/tegra-smmu: Two followup changes"
> 
> Series-2: https://lkml.org/lkml/2020/10/9/808
> "[PATCH v7 0/3] iommu/tegra-smmu: Add PCI support"
> 
> Nicolin Chen (5):
>   iommu/tegra-smmu: Unwrap tegra_smmu_group_get
>   iommu/tegra-smmu: Expand mutex protection range
>   iommu/tegra-smmu: Use fwspec in tegra_smmu_(de)attach_dev
>   iommu/tegra-smmu: Rework tegra_smmu_probe_device()
>   iommu/tegra-smmu: Add PCI support
> 
>  drivers/iommu/tegra-smmu.c | 240 ++---
>  1 file changed, 88 insertions(+), 152 deletions(-)
> 
> -- 
> 2.17.1
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/3] drm/msm: Improve the a6xx page fault handler

2020-11-24 Thread Jordan Crouse
Use the new adreno-smmu-priv fault info function to get more SMMU
debug registers and print the current TTBR0 to debug per-instance
pagetables and figure out which GPU block generated the request.

Signed-off-by: Jordan Crouse 
---

 drivers/gpu/drm/msm/adreno/a5xx_gpu.c |  4 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 76 +--
 drivers/gpu/drm/msm/msm_iommu.c   | 11 +++-
 drivers/gpu/drm/msm/msm_mmu.h |  4 +-
 4 files changed, 87 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index d6804a802355..ed4cb81af874 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -933,7 +933,7 @@ bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer 
*ring)
return true;
 }
 
-static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
+static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void 
*data)
 {
struct msm_gpu *gpu = arg;
pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d 
(%u,%u,%u,%u)\n",
@@ -943,7 +943,7 @@ static int a5xx_fault_handler(void *arg, unsigned long 
iova, int flags)
gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 
-   return -EFAULT;
+   return 0;
 }
 
 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 948f3656c20c..ac6e8cd5cf1a 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -905,18 +905,88 @@ static void a6xx_recover(struct msm_gpu *gpu)
msm_gpu_hw_init(gpu);
 }
 
-static int a6xx_fault_handler(void *arg, unsigned long iova, int flags)
+static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
+{
+   static const char *uche_clients[7] = {
+   "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
+   };
+   u32 val;
+
+   if (mid < 1 || mid > 3)
+   return "UNKNOWN";
+
+   /*
+* The source of the data depends on the mid ID read from FSYNR1.
+* and the client ID read from the UCHE block
+*/
+   val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
+
+   /* mid = 3 is most precise and refers to only one block per client */
+   if (mid == 3)
+   return uche_clients[val & 7];
+
+   /* For mid=2 the source is TP or VFD except when the client id is 0 */
+   if (mid == 2)
+   return ((val & 7) == 0) ? "TP" : "TP|VFD";
+
+   /* For mid=1 just return "UCHE" as a catchall for everything else */
+   return "UCHE";
+}
+
+static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
+{
+   if (id == 0)
+   return "CP";
+   else if (id == 4)
+   return "CCU";
+   else if (id == 6)
+   return "CDP Prefetch";
+
+   return a6xx_uche_fault_block(gpu, id);
+}
+
+#define ARM_SMMU_FSR_TF BIT(1)
+#define ARM_SMMU_FSR_PFBIT(3)
+#define ARM_SMMU_FSR_EFBIT(4)
+
+static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void 
*data)
 {
struct msm_gpu *gpu = arg;
+   struct adreno_smmu_fault_info *info = data;
+   const char *type = "UNKNOWN";
 
-   pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d 
(%u,%u,%u,%u)\n",
+   /*
+* Print a default message if we couldn't get the data from the
+* adreno-smmu-priv
+*/
+   if (!info) {
+   pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d 
(%u,%u,%u,%u)\n",
iova, flags,
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
 
-   return -EFAULT;
+   return 0;
+   }
+
+   if (info->fsr & ARM_SMMU_FSR_TF)
+   type = "TRANSLATION";
+   else if (info->fsr & ARM_SMMU_FSR_PF)
+   type = "PERMISSION";
+   else if (info->fsr & ARM_SMMU_FSR_EF)
+   type = "EXTERNAL";
+
+   pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s 
type=%s source=%s (%u,%u,%u,%u)\n",
+   info->ttbr0, iova,
+   flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", type,
+   a6xx_fault_block(gpu, info->fsynr1 & 0xff),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
+
+   return 0;
 }
 
 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)

[PATCH v2 1/3] iommu/arm-smmu: Add support for driver IOMMU fault handlers

2020-11-24 Thread Jordan Crouse
Call report_iommu_fault() to allow upper-level drivers to register their
own fault handlers.

Signed-off-by: Jordan Crouse 
---

 drivers/iommu/arm/arm-smmu/arm-smmu.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 0f28a8614da3..7fd18bbda8f5 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -427,6 +427,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
int idx = smmu_domain->cfg.cbndx;
+   int ret;
 
fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
if (!(fsr & ARM_SMMU_FSR_FAULT))
@@ -436,11 +437,20 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
 
-   dev_err_ratelimited(smmu->dev,
-   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
cbfrsynra=0x%x, cb=%d\n",
+   ret = report_iommu_fault(domain, dev, iova,
+   fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : 
IOMMU_FAULT_READ);
+
+   if (ret == -ENOSYS)
+   dev_err_ratelimited(smmu->dev,
+   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
cbfrsynra=0x%x, cb=%d\n",
fsr, iova, fsynr, cbfrsynra, idx);
 
-   arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
+   /*
+* If the iommu fault returns an error (except -ENOSYS) then assume that
+* they will handle resuming on their own
+*/
+   if (!ret || ret == -ENOSYS)
+   arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
return IRQ_HANDLED;
 }
 
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/3] drm/msm: Add an adreno-smmu-priv callback to get pagefault info

2020-11-24 Thread Jordan Crouse
Add a callback in adreno-smmu-priv to read interesting SMMU
registers to provide an opportunity for a richer debug experience
in the GPU driver.

Signed-off-by: Jordan Crouse 
---

 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 19 +
 drivers/iommu/arm/arm-smmu/arm-smmu.h  |  2 ++
 include/linux/adreno-smmu-priv.h   | 31 +-
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index d0636c803a36..367a267324a2 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -32,6 +32,24 @@ static void qcom_adreno_smmu_write_sctlr(struct 
arm_smmu_device *smmu, int idx,
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
 }
 
+static void qcom_adreno_smmu_get_fault_info(const void *cookie,
+   struct adreno_smmu_fault_info *info)
+{
+   struct arm_smmu_domain *smmu_domain = (void *)cookie;
+   struct arm_smmu_cfg *cfg = _domain->cfg;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+   info->fsr = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_FSR);
+   /* FIXME: return error here if we aren't really in a fault? */
+
+   info->fsynr0 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_FSYNR0);
+   info->fsynr1 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_FSYNR1);
+   info->far = arm_smmu_cb_readq(smmu, cfg->cbndx, ARM_SMMU_CB_FAR);
+   info->cbfrsynra = arm_smmu_gr1_read(smmu, 
ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
+   info->ttbr0 = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_TTBR0);
+   info->contextidr = arm_smmu_cb_read(smmu, cfg->cbndx, 
ARM_SMMU_CB_CONTEXTIDR);
+}
+
 #define QCOM_ADRENO_SMMU_GPU_SID 0
 
 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -156,6 +174,7 @@ static int qcom_adreno_smmu_init_context(struct 
arm_smmu_domain *smmu_domain,
priv->cookie = smmu_domain;
priv->get_ttbr1_cfg = qcom_adreno_smmu_get_ttbr1_cfg;
priv->set_ttbr0_cfg = qcom_adreno_smmu_set_ttbr0_cfg;
+   priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
 
return 0;
 }
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 04288b6fc619..fe511540a6bf 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -224,6 +224,8 @@ enum arm_smmu_cbar_type {
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_FSYNR0_WNRBIT(4)
 
+#define ARM_SMMU_CB_FSYNR1 0x6c
+
 #define ARM_SMMU_CB_S1_TLBIVA  0x600
 #define ARM_SMMU_CB_S1_TLBIASID0x610
 #define ARM_SMMU_CB_S1_TLBIVAL 0x620
diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index a889f28afb42..53fe32fb9214 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -8,6 +8,32 @@
 
 #include 
 
+/**
+ * struct adreno_smmu_fault_info - container for key fault information
+ *
+ * @far: The faulting IOVA from ARM_SMMU_CB_FAR
+ * @ttbr0: The current TTBR0 pagetable from ARM_SMMU_CB_TTBR0
+ * @contextidr: The value of ARM_SMMU_CB_CONTEXTIDR
+ * @fsr: The fault status from ARM_SMMU_CB_FSR
+ * @fsynr0: The value of FSYNR0 from ARM_SMMU_CB_FSYNR0
+ * @fsynr1: The value of FSYNR1 from ARM_SMMU_CB_FSYNR0
+ * @cbfrsynra: The value of CBFRSYNRA from ARM_SMMU_GR1_CBFRSYNRA(idx)
+ *
+ * This struct passes back key page fault information to the GPU driver
+ * through the get_fault_info function pointer.
+ * The GPU driver can use this information to print informative
+ * log messages and provide deeper GPU specific insight into the fault.
+ */
+struct adreno_smmu_fault_info {
+   u64 far;
+   u64 ttbr0;
+   u32 contextidr;
+   u32 fsr;
+   u32 fsynr0;
+   u32 fsynr1;
+   u32 cbfrsynra;
+};
+
 /**
  * struct adreno_smmu_priv - private interface between adreno-smmu and GPU
  *
@@ -17,6 +43,8 @@
  * @set_ttbr0_cfg: Set the TTBR0 config for the GPUs context bank.  A
  * NULL config disables TTBR0 translation, otherwise
  * TTBR0 translation is enabled with the specified cfg
+ * @get_fault_info: Called by the GPU fault handler to get information about
+ *  the fault
  *
  * The GPU driver (drm/msm) and adreno-smmu work together for controlling
  * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
@@ -31,6 +59,7 @@ struct adreno_smmu_priv {
 const void *cookie;
 const struct io_pgtable_cfg *(*get_ttbr1_cfg)(const void *cookie);
 int (*set_ttbr0_cfg)(const void *cookie, const struct io_pgtable_cfg *cfg);
+void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info 
*info);
 };
 
-#endif /* __ADRENO_SMMU_PRIV_H */
\ No newline at end of file
+#endif /* __ADRENO_SMMU_PRIV_H */
-- 
2.25.1

___
iommu mailing list

[PATCH v2 0/3] iommu/arm-smmu: adreno-smmu page fault handling

2020-11-24 Thread Jordan Crouse
This is a stack to add an Adreno GPU specific handler for pagefaults. The first
patch starts by wiring up report_iommu_fault for arm-smmu. The next patch adds
a adreno-smmu-priv function hook to capture a handful of important debugging
registers such as TTBR0, CONTEXTIDR, FSYNR0 and others. This is used by the
third patch to print more detailed information on page fault such as the TTBR0
for the pagetable that caused the fault and the source of the fault as
determined by a combination of the FSYNR1 register and an internal GPU
register.

This code provides a solid base that we can expand on later for even more
extensive GPU side page fault debugging capabilities.

v2: Fix comment wording and function pointer check per Rob Clark

Jordan Crouse (3):
  iommu/arm-smmu: Add support for driver IOMMU fault handlers
  drm/msm: Add an adreno-smmu-priv callback to get pagefault info
  drm/msm: Improve the a6xx page fault handler

 drivers/gpu/drm/msm/adreno/a5xx_gpu.c  |  4 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c  | 76 +-
 drivers/gpu/drm/msm/msm_iommu.c| 11 +++-
 drivers/gpu/drm/msm/msm_mmu.h  |  4 +-
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 19 ++
 drivers/iommu/arm/arm-smmu/arm-smmu.c  | 16 -
 drivers/iommu/arm/arm-smmu/arm-smmu.h  |  2 +
 include/linux/adreno-smmu-priv.h   | 31 -
 8 files changed, 151 insertions(+), 12 deletions(-)

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv8 0/8] System Cache support for GPU and required SMMU support

2020-11-24 Thread Rob Clark
On Tue, Nov 24, 2020 at 3:10 AM Will Deacon  wrote:
>
> On Tue, Nov 24, 2020 at 09:32:54AM +0530, Sai Prakash Ranjan wrote:
> > On 2020-11-24 00:52, Rob Clark wrote:
> > > On Mon, Nov 23, 2020 at 9:01 AM Sai Prakash Ranjan
> > >  wrote:
> > > >
> > > > On 2020-11-23 20:51, Will Deacon wrote:
> > > > > On Tue, Nov 17, 2020 at 08:00:39PM +0530, Sai Prakash Ranjan wrote:
> > > > >> Some hardware variants contain a system cache or the last level
> > > > >> cache(llc). This cache is typically a large block which is shared
> > > > >> by multiple clients on the SOC. GPU uses the system cache to cache
> > > > >> both the GPU data buffers(like textures) as well the SMMU pagetables.
> > > > >> This helps with improved render performance as well as lower power
> > > > >> consumption by reducing the bus traffic to the system memory.
> > > > >>
> > > > >> The system cache architecture allows the cache to be split into 
> > > > >> slices
> > > > >> which then be used by multiple SOC clients. This patch series is an
> > > > >> effort to enable and use two of those slices preallocated for the 
> > > > >> GPU,
> > > > >> one for the GPU data buffers and another for the GPU SMMU hardware
> > > > >> pagetables.
> > > > >>
> > > > >> Patch 1 - Patch 6 adds system cache support in SMMU and GPU driver.
> > > > >> Patch 7 and 8 are minor cleanups for arm-smmu impl.
> > > > >>
> > > > >> Changes in v8:
> > > > >>  * Introduce a generic domain attribute for pagetable config (Will)
> > > > >>  * Rename quirk to more generic IO_PGTABLE_QUIRK_ARM_OUTER_WBWA 
> > > > >> (Will)
> > > > >>  * Move non-strict mode to use new struct domain_attr_io_pgtbl_config
> > > > >> (Will)
> > > > >
> > > > > Modulo some minor comments I've made, this looks good to me. What is
> > > > > the
> > > > > plan for merging it? I can take the IOMMU parts, but patches 4-6 touch
> > > > > the
> > > > > MSM GPU driver and I'd like to avoid conflicts with that.
> > > > >
> > > >
> > > > SMMU bits are pretty much independent and GPU relies on the domain
> > > > attribute
> > > > and the quirk exposed, so as long as SMMU changes go in first it
> > > > should
> > > > be good.
> > > > Rob?
> > >
> > > I suppose one option would be to split out the patch that adds the
> > > attribute into it's own patch, and merge that both thru drm and iommu?
> > >
> >
> > Ok I can split out domain attr and quirk into its own patch if Will is
> > fine with that approach.
>
> Why don't I just queue the first two patches on their own branch and we
> both pull that?

Ok, that works for me.  I normally base msm-next on -rc1 but I guess
as long as we base the branch on the older or our two -next branches,
that should work out nicely

BR,
-R
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 5/6] media: uvcvideo: Use dma_alloc_noncontiguos API

2020-11-24 Thread Ricardo Ribalda
Hi Robin

On Tue, Nov 24, 2020 at 5:29 PM Robin Murphy  wrote:
>
> On 2020-11-24 15:38, Ricardo Ribalda wrote:
> > On architectures where the is no coherent caching such as ARM use the
> > dma_alloc_noncontiguos API and handle manually the cache flushing using
> > dma_sync_single().
> >
> > With this patch on the affected architectures we can measure up to 20x
> > performance improvement in uvc_video_copy_data_work().
> >
> > Signed-off-by: Ricardo Ribalda 
> > ---
> >   drivers/media/usb/uvc/uvc_video.c | 74 ++-
> >   drivers/media/usb/uvc/uvcvideo.h  |  1 +
> >   2 files changed, 63 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/media/usb/uvc/uvc_video.c 
> > b/drivers/media/usb/uvc/uvc_video.c
> > index a6a441d92b94..9e90b261428a 100644
> > --- a/drivers/media/usb/uvc/uvc_video.c
> > +++ b/drivers/media/usb/uvc/uvc_video.c
> > @@ -1490,6 +1490,11 @@ static void uvc_video_encode_bulk(struct uvc_urb 
> > *uvc_urb,
> >   urb->transfer_buffer_length = stream->urb_size - len;
> >   }
> >
> > +static inline struct device *stream_to_dmadev(struct uvc_streaming *stream)
> > +{
> > + return stream->dev->udev->bus->controller->parent;
> > +}
> > +
> >   static void uvc_video_complete(struct urb *urb)
> >   {
> >   struct uvc_urb *uvc_urb = urb->context;
> > @@ -1539,6 +1544,11 @@ static void uvc_video_complete(struct urb *urb)
> >* Process the URB headers, and optionally queue expensive memcpy 
> > tasks
> >* to be deferred to a work queue.
> >*/
> > + if (uvc_urb->pages)
> > + dma_sync_single_for_cpu(stream_to_dmadev(stream),
> > + urb->transfer_dma,
> > + urb->transfer_buffer_length,
> > + DMA_FROM_DEVICE);
>
> This doesn't work. Even in iommu-dma, the streaming API still expects to
> work on physically-contiguous memory that could have been passed to
> dma_map_single() in the first place. As-is, this will invalidate
> transfer_buffer_length bytes from the start of the *first* physical
> page, and thus destroy random other data if lines from subsequent
> unrelated pages are dirty in caches.
>
> The only feasible way to do a DMA sync on disjoint pages in a single
> call is with a scatterlist.

Thanks for pointing this out. I guess I was lucky on my hardware and
the areas were always  contiguous.

Will rework and send back to the list.

Thanks again.

>
> Robin.
>
> >   stream->decode(uvc_urb, buf, buf_meta);
> >
> >   /* If no async work is needed, resubmit the URB immediately. */
> > @@ -1566,8 +1576,15 @@ static void uvc_free_urb_buffers(struct 
> > uvc_streaming *stream)
> >   continue;
> >
> >   #ifndef CONFIG_DMA_NONCOHERENT
> > - usb_free_coherent(stream->dev->udev, stream->urb_size,
> > -   uvc_urb->buffer, uvc_urb->dma);
> > + if (uvc_urb->pages) {
> > + vunmap(uvc_urb->buffer);
> > + dma_free_noncontiguous(stream_to_dmadev(stream),
> > +stream->urb_size,
> > +uvc_urb->pages, uvc_urb->dma);
> > + } else {
> > + usb_free_coherent(stream->dev->udev, stream->urb_size,
> > +   uvc_urb->buffer, uvc_urb->dma);
> > + }
> >   #else
> >   kfree(uvc_urb->buffer);
> >   #endif
> > @@ -1577,6 +1594,47 @@ static void uvc_free_urb_buffers(struct 
> > uvc_streaming *stream)
> >   stream->urb_size = 0;
> >   }
> >
> > +#ifndef CONFIG_DMA_NONCOHERENT
> > +static bool uvc_alloc_urb_buffer(struct uvc_streaming *stream,
> > +  struct uvc_urb *uvc_urb, gfp_t gfp_flags)
> > +{
> > + struct device *dma_dev = dma_dev = stream_to_dmadev(stream);
> > +
> > + if (!dma_can_alloc_noncontiguous(dma_dev)) {
> > + uvc_urb->buffer = usb_alloc_coherent(stream->dev->udev,
> > +  stream->urb_size,
> > +  gfp_flags | __GFP_NOWARN,
> > +  _urb->dma);
> > + return uvc_urb->buffer != NULL;
> > + }
> > +
> > + uvc_urb->pages = dma_alloc_noncontiguous(dma_dev, stream->urb_size,
> > +  _urb->dma,
> > +  gfp_flags | __GFP_NOWARN, 0);
> > + if (!uvc_urb->pages)
> > + return false;
> > +
> > + uvc_urb->buffer = vmap(uvc_urb->pages,
> > +PAGE_ALIGN(stream->urb_size) >> PAGE_SHIFT,
> > +VM_DMA_COHERENT, PAGE_KERNEL);
> > + if (!uvc_urb->buffer) {
> > + dma_free_noncontiguous(dma_dev, stream->urb_size,
> > +uvc_urb->pages, 

[PATCH v3 04/17] iommu/hyperv: don't setup IRQ remapping when running as root

2020-11-24 Thread Wei Liu
The IOMMU code needs more work. We're sure for now the IRQ remapping
hooks are not applicable when Linux is the root partition.

Signed-off-by: Wei Liu 
Acked-by: Joerg Roedel 
Reviewed-by: Vitaly Kuznetsov 
---
 drivers/iommu/hyperv-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index e09e2d734c57..8d3ce3add57d 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "irq_remapping.h"
 
@@ -143,7 +144,7 @@ static int __init hyperv_prepare_irq_remapping(void)
int i;
 
if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) ||
-   !x2apic_supported())
+   !x2apic_supported() || hv_root_partition)
return -ENODEV;
 
fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0);
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 5/6] media: uvcvideo: Use dma_alloc_noncontiguos API

2020-11-24 Thread Robin Murphy

On 2020-11-24 15:38, Ricardo Ribalda wrote:

On architectures where the is no coherent caching such as ARM use the
dma_alloc_noncontiguos API and handle manually the cache flushing using
dma_sync_single().

With this patch on the affected architectures we can measure up to 20x
performance improvement in uvc_video_copy_data_work().

Signed-off-by: Ricardo Ribalda 
---
  drivers/media/usb/uvc/uvc_video.c | 74 ++-
  drivers/media/usb/uvc/uvcvideo.h  |  1 +
  2 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/drivers/media/usb/uvc/uvc_video.c 
b/drivers/media/usb/uvc/uvc_video.c
index a6a441d92b94..9e90b261428a 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1490,6 +1490,11 @@ static void uvc_video_encode_bulk(struct uvc_urb 
*uvc_urb,
urb->transfer_buffer_length = stream->urb_size - len;
  }
  
+static inline struct device *stream_to_dmadev(struct uvc_streaming *stream)

+{
+   return stream->dev->udev->bus->controller->parent;
+}
+
  static void uvc_video_complete(struct urb *urb)
  {
struct uvc_urb *uvc_urb = urb->context;
@@ -1539,6 +1544,11 @@ static void uvc_video_complete(struct urb *urb)
 * Process the URB headers, and optionally queue expensive memcpy tasks
 * to be deferred to a work queue.
 */
+   if (uvc_urb->pages)
+   dma_sync_single_for_cpu(stream_to_dmadev(stream),
+   urb->transfer_dma,
+   urb->transfer_buffer_length,
+   DMA_FROM_DEVICE);


This doesn't work. Even in iommu-dma, the streaming API still expects to 
work on physically-contiguous memory that could have been passed to 
dma_map_single() in the first place. As-is, this will invalidate 
transfer_buffer_length bytes from the start of the *first* physical 
page, and thus destroy random other data if lines from subsequent 
unrelated pages are dirty in caches.


The only feasible way to do a DMA sync on disjoint pages in a single 
call is with a scatterlist.


Robin.


stream->decode(uvc_urb, buf, buf_meta);
  
  	/* If no async work is needed, resubmit the URB immediately. */

@@ -1566,8 +1576,15 @@ static void uvc_free_urb_buffers(struct uvc_streaming 
*stream)
continue;
  
  #ifndef CONFIG_DMA_NONCOHERENT

-   usb_free_coherent(stream->dev->udev, stream->urb_size,
- uvc_urb->buffer, uvc_urb->dma);
+   if (uvc_urb->pages) {
+   vunmap(uvc_urb->buffer);
+   dma_free_noncontiguous(stream_to_dmadev(stream),
+  stream->urb_size,
+  uvc_urb->pages, uvc_urb->dma);
+   } else {
+   usb_free_coherent(stream->dev->udev, stream->urb_size,
+ uvc_urb->buffer, uvc_urb->dma);
+   }
  #else
kfree(uvc_urb->buffer);
  #endif
@@ -1577,6 +1594,47 @@ static void uvc_free_urb_buffers(struct uvc_streaming 
*stream)
stream->urb_size = 0;
  }
  
+#ifndef CONFIG_DMA_NONCOHERENT

+static bool uvc_alloc_urb_buffer(struct uvc_streaming *stream,
+struct uvc_urb *uvc_urb, gfp_t gfp_flags)
+{
+   struct device *dma_dev = dma_dev = stream_to_dmadev(stream);
+
+   if (!dma_can_alloc_noncontiguous(dma_dev)) {
+   uvc_urb->buffer = usb_alloc_coherent(stream->dev->udev,
+stream->urb_size,
+gfp_flags | __GFP_NOWARN,
+_urb->dma);
+   return uvc_urb->buffer != NULL;
+   }
+
+   uvc_urb->pages = dma_alloc_noncontiguous(dma_dev, stream->urb_size,
+_urb->dma,
+gfp_flags | __GFP_NOWARN, 0);
+   if (!uvc_urb->pages)
+   return false;
+
+   uvc_urb->buffer = vmap(uvc_urb->pages,
+  PAGE_ALIGN(stream->urb_size) >> PAGE_SHIFT,
+  VM_DMA_COHERENT, PAGE_KERNEL);
+   if (!uvc_urb->buffer) {
+   dma_free_noncontiguous(dma_dev, stream->urb_size,
+  uvc_urb->pages, uvc_urb->dma);
+   return false;
+   }
+
+   return true;
+}
+#else
+static bool uvc_alloc_urb_buffer(struct uvc_streaming *stream,
+struct uvc_urb *uvc_urb, gfp_t gfp_flags)
+{
+   uvc_urb->buffer = kmalloc(stream->urb_size, gfp_flags | __GFP_NOWARN);
+
+   return uvc_urb->buffer != NULL;
+}
+#endif
+
  /*
   * Allocate transfer buffers. This function can be called with buffers
   * already allocated when resuming from suspend, in which case it will
@@ 

[PATCH 6/6] TEST-ONLY: media: uvcvideo: Add statistics for measuring performance

2020-11-24 Thread Ricardo Ribalda
From: Shik Chen 

Majorly based on [1], with the following tweaks:

* Use div_u64 for u64 divisions
* Calculate standard deviation
* Fix an uninitialized |min| field for header
* Apply clang-format

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/kbingham/rcar.git/commit/?h=uvc/async-ml=cebbd1b629bbe5f856ec5dc7591478c003f5a944

Signed-off-by: Shik Chen 
---
 drivers/media/usb/uvc/uvc_video.c | 163 +-
 drivers/media/usb/uvc/uvcvideo.h  |  21 
 2 files changed, 181 insertions(+), 3 deletions(-)

diff --git a/drivers/media/usb/uvc/uvc_video.c 
b/drivers/media/usb/uvc/uvc_video.c
index 9e90b261428a..d3a515015003 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -906,12 +906,61 @@ static void uvc_video_stats_update(struct uvc_streaming 
*stream)
memset(>stats.frame, 0, sizeof(stream->stats.frame));
 }
 
+size_t uvc_video_dump_time_stats(char *buf, size_t size,
+struct uvc_stats_time *stat, const char *pfx)
+{
+   unsigned int avg = 0;
+   unsigned int std = 0;
+
+   if (stat->qty) {
+   avg = div_u64(stat->duration, stat->qty);
+   std = int_sqrt64(div_u64(stat->duration2, stat->qty) -
+avg * avg);
+   }
+
+   /* Stat durations are in nanoseconds, we present in micro-seconds */
+   return scnprintf(
+   buf, size,
+   "%s: %llu/%u uS/qty: %u.%03u avg %u.%03u std %u.%03u min 
%u.%03u max (uS)\n",
+   pfx, div_u64(stat->duration, 1000), stat->qty, avg / 1000,
+   avg % 1000, std / 1000, std % 1000, stat->min / 1000,
+   stat->min % 1000, stat->max / 1000, stat->max % 1000);
+}
+
+size_t uvc_video_dump_speed(char *buf, size_t size, const char *pfx, u64 bytes,
+   u64 milliseconds)
+{
+   unsigned int rate = 0;
+   bool gbit = false;
+
+   if (milliseconds)
+   rate = div_u64(bytes * 8, milliseconds);
+
+   if (rate >= 100) {
+   gbit = true;
+   rate /= 1000;
+   }
+
+   /*
+* bits/milliseconds == kilobits/seconds,
+* presented here as Mbits/s (or Gbit/s) with 3 decimal places
+*/
+   return scnprintf(buf, size, "%s: %d.%03d %sbits/s\n", pfx, rate / 1000,
+rate % 1000, gbit ? "G" : "M");
+}
+
 size_t uvc_video_stats_dump(struct uvc_streaming *stream, char *buf,
size_t size)
 {
+   u64 bytes = stream->stats.stream.bytes; /* Single sample */
+   unsigned int empty_ratio = 0;
unsigned int scr_sof_freq;
unsigned int duration;
+   unsigned int fps = 0;
size_t count = 0;
+   u64 cpu = 0;
+   u64 cpu_q = 0;
+   u32 cpu_r = 0;
 
/* Compute the SCR.SOF frequency estimate. At the nominal 1kHz SOF
 * frequency this will not overflow before more than 1h.
@@ -924,12 +973,19 @@ size_t uvc_video_stats_dump(struct uvc_streaming *stream, 
char *buf,
else
scr_sof_freq = 0;
 
+   if (stream->stats.stream.nb_packets)
+   empty_ratio = stream->stats.stream.nb_empty * 100 /
+ stream->stats.stream.nb_packets;
+
count += scnprintf(buf + count, size - count,
-  "frames:  %u\npackets: %u\nempty:   %u\n"
-  "errors:  %u\ninvalid: %u\n",
+  "frames:  %u\n"
+  "packets: %u\n"
+  "empty:   %u (%u %%)\n"
+  "errors:  %u\n"
+  "invalid: %u\n",
   stream->stats.stream.nb_frames,
   stream->stats.stream.nb_packets,
-  stream->stats.stream.nb_empty,
+  stream->stats.stream.nb_empty, empty_ratio,
   stream->stats.stream.nb_errors,
   stream->stats.stream.nb_invalid);
count += scnprintf(buf + count, size - count,
@@ -946,6 +1002,55 @@ size_t uvc_video_stats_dump(struct uvc_streaming *stream, 
char *buf,
   stream->stats.stream.min_sof,
   stream->stats.stream.max_sof,
   scr_sof_freq / 1000, scr_sof_freq % 1000);
+   count += scnprintf(buf + count, size - count,
+  "bytes %lld : duration %d\n", bytes, duration);
+
+   if (duration != 0) {
+   /* Duration is in milliseconds, * 100 to gain 2 dp precision */
+   fps = stream->stats.stream.nb_frames * 1000 * 100 / duration;
+   /* CPU usage as a % with 6 decimal places */
+   cpu = div_u64(stream->stats.urbstat.decode.duration, duration) *
+ 100;
+   }
+
+   count += scnprintf(buf + count, size - count, "FPS: %u.%02u\n",
+  fps / 100, 

[PATCH 5/6] media: uvcvideo: Use dma_alloc_noncontiguos API

2020-11-24 Thread Ricardo Ribalda
On architectures where the is no coherent caching such as ARM use the
dma_alloc_noncontiguos API and handle manually the cache flushing using
dma_sync_single().

With this patch on the affected architectures we can measure up to 20x
performance improvement in uvc_video_copy_data_work().

Signed-off-by: Ricardo Ribalda 
---
 drivers/media/usb/uvc/uvc_video.c | 74 ++-
 drivers/media/usb/uvc/uvcvideo.h  |  1 +
 2 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/drivers/media/usb/uvc/uvc_video.c 
b/drivers/media/usb/uvc/uvc_video.c
index a6a441d92b94..9e90b261428a 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1490,6 +1490,11 @@ static void uvc_video_encode_bulk(struct uvc_urb 
*uvc_urb,
urb->transfer_buffer_length = stream->urb_size - len;
 }
 
+static inline struct device *stream_to_dmadev(struct uvc_streaming *stream)
+{
+   return stream->dev->udev->bus->controller->parent;
+}
+
 static void uvc_video_complete(struct urb *urb)
 {
struct uvc_urb *uvc_urb = urb->context;
@@ -1539,6 +1544,11 @@ static void uvc_video_complete(struct urb *urb)
 * Process the URB headers, and optionally queue expensive memcpy tasks
 * to be deferred to a work queue.
 */
+   if (uvc_urb->pages)
+   dma_sync_single_for_cpu(stream_to_dmadev(stream),
+   urb->transfer_dma,
+   urb->transfer_buffer_length,
+   DMA_FROM_DEVICE);
stream->decode(uvc_urb, buf, buf_meta);
 
/* If no async work is needed, resubmit the URB immediately. */
@@ -1566,8 +1576,15 @@ static void uvc_free_urb_buffers(struct uvc_streaming 
*stream)
continue;
 
 #ifndef CONFIG_DMA_NONCOHERENT
-   usb_free_coherent(stream->dev->udev, stream->urb_size,
- uvc_urb->buffer, uvc_urb->dma);
+   if (uvc_urb->pages) {
+   vunmap(uvc_urb->buffer);
+   dma_free_noncontiguous(stream_to_dmadev(stream),
+  stream->urb_size,
+  uvc_urb->pages, uvc_urb->dma);
+   } else {
+   usb_free_coherent(stream->dev->udev, stream->urb_size,
+ uvc_urb->buffer, uvc_urb->dma);
+   }
 #else
kfree(uvc_urb->buffer);
 #endif
@@ -1577,6 +1594,47 @@ static void uvc_free_urb_buffers(struct uvc_streaming 
*stream)
stream->urb_size = 0;
 }
 
+#ifndef CONFIG_DMA_NONCOHERENT
+static bool uvc_alloc_urb_buffer(struct uvc_streaming *stream,
+struct uvc_urb *uvc_urb, gfp_t gfp_flags)
+{
+   struct device *dma_dev = dma_dev = stream_to_dmadev(stream);
+
+   if (!dma_can_alloc_noncontiguous(dma_dev)) {
+   uvc_urb->buffer = usb_alloc_coherent(stream->dev->udev,
+stream->urb_size,
+gfp_flags | __GFP_NOWARN,
+_urb->dma);
+   return uvc_urb->buffer != NULL;
+   }
+
+   uvc_urb->pages = dma_alloc_noncontiguous(dma_dev, stream->urb_size,
+_urb->dma,
+gfp_flags | __GFP_NOWARN, 0);
+   if (!uvc_urb->pages)
+   return false;
+
+   uvc_urb->buffer = vmap(uvc_urb->pages,
+  PAGE_ALIGN(stream->urb_size) >> PAGE_SHIFT,
+  VM_DMA_COHERENT, PAGE_KERNEL);
+   if (!uvc_urb->buffer) {
+   dma_free_noncontiguous(dma_dev, stream->urb_size,
+  uvc_urb->pages, uvc_urb->dma);
+   return false;
+   }
+
+   return true;
+}
+#else
+static bool uvc_alloc_urb_buffer(struct uvc_streaming *stream,
+struct uvc_urb *uvc_urb, gfp_t gfp_flags)
+{
+   uvc_urb->buffer = kmalloc(stream->urb_size, gfp_flags | __GFP_NOWARN);
+
+   return uvc_urb->buffer != NULL;
+}
+#endif
+
 /*
  * Allocate transfer buffers. This function can be called with buffers
  * already allocated when resuming from suspend, in which case it will
@@ -1607,19 +1665,11 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming 
*stream,
 
/* Retry allocations until one succeed. */
for (; npackets > 1; npackets /= 2) {
+   stream->urb_size = psize * npackets;
for (i = 0; i < UVC_URBS; ++i) {
struct uvc_urb *uvc_urb = >uvc_urb[i];
 
-   stream->urb_size = psize * npackets;
-#ifndef CONFIG_DMA_NONCOHERENT
-   uvc_urb->buffer = usb_alloc_coherent(
-   stream->dev->udev, 

[PATCH 3/6] dma-iommu: remove __iommu_dma_mmap

2020-11-24 Thread Ricardo Ribalda
From: Christoph Hellwig 

The function has a single caller, so open code it there and take
advantage of the precalculated page count variable.

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 73249732afd3..a2fb92de7e3d 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -655,21 +655,6 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
return NULL;
 }
 
-/**
- * __iommu_dma_mmap - Map a buffer into provided user VMA
- * @pages: Array representing buffer from __iommu_dma_alloc()
- * @size: Size of buffer in bytes
- * @vma: VMA describing requested userspace mapping
- *
- * Maps the pages of the buffer in @pages into @vma. The caller is responsible
- * for verifying the correct size and protection of @vma beforehand.
- */
-static int __iommu_dma_mmap(struct page **pages, size_t size,
-   struct vm_area_struct *vma)
-{
-   return vm_map_pages(vma, pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
-}
-
 static void iommu_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 {
@@ -1074,7 +1059,7 @@ static int iommu_dma_mmap(struct device *dev, struct 
vm_area_struct *vma,
struct page **pages = dma_common_find_pages(cpu_addr);
 
if (pages)
-   return __iommu_dma_mmap(pages, size, vma);
+   return vm_map_pages(vma, pages, nr_pages);
pfn = vmalloc_to_pfn(cpu_addr);
} else {
pfn = page_to_pfn(virt_to_page(cpu_addr));
-- 
2.29.2.454.gaff20da3a2-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/6] WIP: add a dma_alloc_contiguous API

2020-11-24 Thread Ricardo Ribalda
From: Christoph Hellwig 

Add a new API that returns a virtually non-contigous array of pages
and dma address.  This API is only implemented for dma-iommu and will
not be implemented for non-iommu DMA API instances that have to allocate
contiguous memory.  It is up to the caller to check if the API is
available.

The intent is that media drivers can use this API if either:

 - no kernel mapping or only temporary kernel mappings are required.
   That is as a better replacement for DMA_ATTR_NO_KERNEL_MAPPING
 - a kernel mapping is required for cached and DMA mapped pages, but
   the driver also needs the pages to e.g. map them to userspace.
   In that sense it is a replacement for some aspects of the recently
   removed and never fully implemented DMA_ATTR_NON_CONSISTENT

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c   | 73 +
 include/linux/dma-map-ops.h |  4 ++
 include/linux/dma-mapping.h |  5 +++
 kernel/dma/mapping.c| 35 ++
 4 files changed, 93 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a2fb92de7e3d..2e72fe1b9c3b 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -564,23 +564,12 @@ static struct page **__iommu_dma_alloc_pages(struct 
device *dev,
return pages;
 }
 
-/**
- * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
- * @dev: Device to allocate memory for. Must be a real device
- *  attached to an iommu_dma_domain
- * @size: Size of buffer in bytes
- * @dma_handle: Out argument for allocated DMA handle
- * @gfp: Allocation flags
- * @prot: pgprot_t to use for the remapped mapping
- * @attrs: DMA attributes for this allocation
- *
- * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+/*
+ * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
  * but an IOMMU which supports smaller pages might not map the whole thing.
- *
- * Return: Mapped virtual address, or NULL on failure.
  */
-static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
-   dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
+static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
+   size_t size, dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
unsigned long attrs)
 {
struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -592,7 +581,6 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
struct page **pages;
struct sg_table sgt;
dma_addr_t iova;
-   void *vaddr;
 
*dma_handle = DMA_MAPPING_ERROR;
 
@@ -635,17 +623,10 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
< size)
goto out_free_sg;
 
-   vaddr = dma_common_pages_remap(pages, size, prot,
-   __builtin_return_address(0));
-   if (!vaddr)
-   goto out_unmap;
-
*dma_handle = iova;
sg_free_table();
-   return vaddr;
+   return pages;
 
-out_unmap:
-   __iommu_dma_unmap(dev, iova, size);
 out_free_sg:
sg_free_table();
 out_free_iova:
@@ -655,6 +636,46 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
return NULL;
 }
 
+static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
+   dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
+   unsigned long attrs)
+{
+   struct page **pages;
+   void *vaddr;
+
+   pages = __iommu_dma_alloc_noncontiguous(dev, size, dma_handle, gfp,
+   prot, attrs);
+   if (!pages)
+   return NULL;
+   vaddr = dma_common_pages_remap(pages, size, prot,
+   __builtin_return_address(0));
+   if (!vaddr)
+   goto out_unmap;
+   return vaddr;
+
+out_unmap:
+   __iommu_dma_unmap(dev, *dma_handle, size);
+   __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+   return NULL;
+}
+
+#ifdef CONFIG_DMA_REMAP
+static struct page **iommu_dma_alloc_noncontiguous(struct device *dev,
+   size_t size, dma_addr_t *dma_handle, gfp_t gfp,
+   unsigned long attrs)
+{
+   return __iommu_dma_alloc_noncontiguous(dev, size, dma_handle, gfp,
+  PAGE_KERNEL, attrs);
+}
+
+static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
+   struct page **pages, dma_addr_t dma_handle)
+{
+   __iommu_dma_unmap(dev, dma_handle, size);
+   __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+}
+#endif
+
 static void iommu_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 {
@@ -1109,6 +1130,10 @@ static const struct dma_map_ops iommu_dma_ops = {
.free   = 

[PATCH 2/6] dma-direct: use __GFP_ZERO in dma_direct_alloc_pages

2020-11-24 Thread Ricardo Ribalda
From: Christoph Hellwig 

Prepare for supporting the DMA_ATTR_NO_KERNEL_MAPPING flag in
dma_alloc_pages.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/direct.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 06c111544f61..76c741e610fc 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -280,13 +280,12 @@ struct page *dma_direct_alloc_pages(struct device *dev, 
size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
 {
struct page *page;
-   void *ret;
 
if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
-   page = __dma_direct_alloc_pages(dev, size, gfp);
+   page = __dma_direct_alloc_pages(dev, size, gfp | __GFP_ZERO);
if (!page)
return NULL;
if (PageHighMem(page)) {
@@ -300,13 +299,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, 
size_t size,
goto out_free_pages;
}
 
-   ret = page_address(page);
if (force_dma_unencrypted(dev)) {
-   if (set_memory_decrypted((unsigned long)ret,
+   if (set_memory_decrypted((unsigned long) page_address(page),
1 << get_order(size)))
goto out_free_pages;
}
-   memset(ret, 0, size);
*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return page;
 out_free_pages:
-- 
2.29.2.454.gaff20da3a2-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/6] dma-mapping: remove the {alloc,free}_noncoherent methods

2020-11-24 Thread Ricardo Ribalda
From: Christoph Hellwig 

It turns out allowing non-contigous allocations here was a rather bad
idea, as we'll now need to define ways to get the pages for mmaping
or dma_buf sharing.  Revert this change and stick to the original
concept.  A different API for the use case of non-contigous allocations
will be added back later.

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c   | 30 --
 include/linux/dma-map-ops.h |  5 -
 kernel/dma/mapping.c| 33 ++---
 3 files changed, 6 insertions(+), 62 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 0cbcd3fc3e7e..73249732afd3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1054,34 +1054,6 @@ static void *iommu_dma_alloc(struct device *dev, size_t 
size,
return cpu_addr;
 }
 
-#ifdef CONFIG_DMA_REMAP
-static void *iommu_dma_alloc_noncoherent(struct device *dev, size_t size,
-   dma_addr_t *handle, enum dma_data_direction dir, gfp_t gfp)
-{
-   if (!gfpflags_allow_blocking(gfp)) {
-   struct page *page;
-
-   page = dma_common_alloc_pages(dev, size, handle, dir, gfp);
-   if (!page)
-   return NULL;
-   return page_address(page);
-   }
-
-   return iommu_dma_alloc_remap(dev, size, handle, gfp | __GFP_ZERO,
-PAGE_KERNEL, 0);
-}
-
-static void iommu_dma_free_noncoherent(struct device *dev, size_t size,
-   void *cpu_addr, dma_addr_t handle, enum dma_data_direction dir)
-{
-   __iommu_dma_unmap(dev, handle, size);
-   __iommu_dma_free(dev, size, cpu_addr);
-}
-#else
-#define iommu_dma_alloc_noncoherentNULL
-#define iommu_dma_free_noncoherent NULL
-#endif /* CONFIG_DMA_REMAP */
-
 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
@@ -1152,8 +1124,6 @@ static const struct dma_map_ops iommu_dma_ops = {
.free   = iommu_dma_free,
.alloc_pages= dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
-   .alloc_noncoherent  = iommu_dma_alloc_noncoherent,
-   .free_noncoherent   = iommu_dma_free_noncoherent,
.mmap   = iommu_dma_mmap,
.get_sgtable= iommu_dma_get_sgtable,
.map_page   = iommu_dma_map_page,
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index a5f89fc4d6df..3d1f91464bcf 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -22,11 +22,6 @@ struct dma_map_ops {
gfp_t gfp);
void (*free_pages)(struct device *dev, size_t size, struct page *vaddr,
dma_addr_t dma_handle, enum dma_data_direction dir);
-   void *(*alloc_noncoherent)(struct device *dev, size_t size,
-   dma_addr_t *dma_handle, enum dma_data_direction dir,
-   gfp_t gfp);
-   void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr,
-   dma_addr_t dma_handle, enum dma_data_direction dir);
int (*mmap)(struct device *, struct vm_area_struct *,
void *, dma_addr_t, size_t, unsigned long attrs);
 
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 51bb8fa8eb89..d3032513c54b 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -514,40 +514,19 @@ EXPORT_SYMBOL_GPL(dma_free_pages);
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
 {
-   const struct dma_map_ops *ops = get_dma_ops(dev);
-   void *vaddr;
-
-   if (!ops || !ops->alloc_noncoherent) {
-   struct page *page;
-
-   page = dma_alloc_pages(dev, size, dma_handle, dir, gfp);
-   if (!page)
-   return NULL;
-   return page_address(page);
-   }
+   struct page *page;
 
-   size = PAGE_ALIGN(size);
-   vaddr = ops->alloc_noncoherent(dev, size, dma_handle, dir, gfp);
-   if (vaddr)
-   debug_dma_map_page(dev, virt_to_page(vaddr), 0, size, dir,
-  *dma_handle);
-   return vaddr;
+   page = dma_alloc_pages(dev, size, dma_handle, dir, gfp);
+   if (!page)
+   return NULL;
+   return page_address(page);
 }
 EXPORT_SYMBOL_GPL(dma_alloc_noncoherent);
 
 void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, enum dma_data_direction dir)
 {
-   const struct dma_map_ops *ops = get_dma_ops(dev);
-
-   if (!ops || !ops->free_noncoherent) {
-   dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir);
- 

RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as broken

2020-11-24 Thread Deucher, Alexander
[AMD Public Use]

> -Original Message-
> From: Merger, Edgar [AUTOSOL/MAS/AUGS]
> 
> Sent: Tuesday, November 24, 2020 2:29 AM
> To: Huang, Ray ; Kuehling, Felix
> 
> Cc: Will Deacon ; Deucher, Alexander
> ; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org; iommu@lists.linux-foundation.org; Bjorn Helgaas
> ; Joerg Roedel ; Zhu, Changfeng
> 
> Subject: RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as
> broken
> 
> Module Version : PiccasoCpu 10
> AGESA Version   : PiccasoPI 100A
> 
> I did not try to enter the system in any other way (like via ssh) than via
> Desktop.

You can get this information from the amdgpu driver.  E.g., sudo cat 
/sys/kernel/debug/dri/0/amdgpu_firmware_info .  Also what is the PCI revision 
id of your chip (from lspci)?  Also are you just seeing this on specific 
versions of the sbios?

Thanks,

Alex


> 
> -Original Message-
> From: Huang Rui 
> Sent: Dienstag, 24. November 2020 07:43
> To: Kuehling, Felix 
> Cc: Will Deacon ; Deucher, Alexander
> ; linux-ker...@vger.kernel.org; linux-
> p...@vger.kernel.org; iommu@lists.linux-foundation.org; Bjorn Helgaas
> ; Merger, Edgar [AUTOSOL/MAS/AUGS]
> ; Joerg Roedel ;
> Changfeng Zhu 
> Subject: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as broken
> 
> On Tue, Nov 24, 2020 at 06:51:11AM +0800, Kuehling, Felix wrote:
> > On 2020-11-23 5:33 p.m., Will Deacon wrote:
> > > On Mon, Nov 23, 2020 at 09:04:14PM +, Deucher, Alexander wrote:
> > >> [AMD Public Use]
> > >>
> > >>> -Original Message-
> > >>> From: Will Deacon 
> > >>> Sent: Monday, November 23, 2020 8:44 AM
> > >>> To: linux-ker...@vger.kernel.org
> > >>> Cc: linux-...@vger.kernel.org; iommu@lists.linux-foundation.org;
> > >>> Will Deacon ; Bjorn Helgaas
> > >>> ; Deucher, Alexander
> > >>> ; Edgar Merger
> > >>> ; Joerg Roedel 
> > >>> Subject: [PATCH] PCI: Mark AMD Raven iGPU ATS as broken
> > >>>
> > >>> Edgar Merger reports that the AMD Raven GPU does not work reliably
> > >>> on his system when the IOMMU is enabled:
> > >>>
> > >>>| [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx timeout,
> > >>> signaled seq=1, emitted seq=3
> > >>>| [...]
> > >>>| amdgpu :0b:00.0: GPU reset begin!
> > >>>| AMD-Vi: Completion-Wait loop timed out
> > >>>| iommu ivhd0: AMD-Vi: Event logged [IOTLB_INV_TIMEOUT
> > >>> device=0b:00.0 address=0x38edc0970]
> > >>>
> > >>> This is indicative of a hardware/platform configuration issue so,
> > >>> since disabling ATS has been shown to resolve the problem, add a
> > >>> quirk to match this particular device while Edgar follows-up with AMD
> for more information.
> > >>>
> > >>> Cc: Bjorn Helgaas 
> > >>> Cc: Alex Deucher 
> > >>> Reported-by: Edgar Merger 
> > >>> Suggested-by: Joerg Roedel 
> > >>> Link:
> > >>>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Furld
> efense.proofpoint.com%2Fv2%2Furl%3Fu%3Dhttps-
> 3A__lore%26d%3DDwIDAw%26c%3DjOURTkCZzT8tVB5xPEYIm3YJGoxoTaQs
> QPzPKJGaWbo%26r%3DBJxhacqqa4K1PJGm6_-
> 862rdSP13_P6LVp7j_9l1xmg%26m%3DlNXu2xwvyxEZ3PzoVmXMBXXS55jsmf
> DicuQFJqkIOH4%26s%3D_5VDNCRQdA7AhsvvZ3TJJtQZ2iBp9c9tFHIleTYT_ZM
> %26e%3Ddata=04%7C01%7CAlexander.Deucher%40amd.com%7C6d5f
> a241f9634692c03908d8904a942c%7C3dd8961fe4884e608e11a82d994e183d%7
> C0%7C0%7C637417997272974427%7CUnknown%7CTWFpbGZsb3d8eyJWIjoi
> MC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C100
> 0sdata=OEgYlw%2F1YP0C%2FnWBRQUxwBH56mGOJxYMWSQ%2Fj1Y
> 9f6Q%3Dreserved=0 .
> > >>> kernel.org/linux-
> > >>>
> iommu/MWHPR10MB1310F042A30661D4158520B589FC0@MWHPR10M
> > >>> B1310.namprd10.prod.outlook.com
> > >>>
> her%40amd.com%7C1a883fe14d0c408e7d9508d88fb5df4e%7C3dd8961fe488
> > >>>
> 4e608e11a82d994e183d%7C0%7C0%7C637417358593629699%7CUnknown%7
> > >>>
> CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwi
> > >>>
> LCJXVCI6Mn0%3D%7C1000sdata=TMgKldWzsX8XZ0l7q3%2BszDWXQJJ
> > >>> LOUfX5oGaoLN8n%2B8%3Dreserved=0
> > >>> Signed-off-by: Will Deacon 
> > >>> ---
> > >>>
> > >>> Hi all,
> > >>>
> > >>> Since Joerg is away at the moment, I'm posting this to try to make
> > >>> some progress with the thread in the Link: tag.
> > >> + Felix
> > >>
> > >> What system is this?  Can you provide more details?  Does a sbios
> > >> update fix this?  Disabling ATS for all Ravens will break GPU
> > >> compute for a lot of people.  I'd prefer to just black list this
> > >> particular system (e.g., just SSIDs or revision) if possible.
> >
> > +Ray
> >
> > There are already many systems where the IOMMU is disabled in the
> > BIOS, or the CRAT table reporting the APU compute capabilities is
> > broken. Ray has been working on a fallback to make APUs behave like
> > dGPUs on such systems. That should also cover this case where ATS is
> > blacklisted. That said, it affects the programming model, because we
> > don't support the unified and coherent memory model on dGPUs like we
> > do on APUs with IOMMUv2. So it would be good to make the conditions
> > for this 

Re: [PATCH v4 0/2] dma-mapping: provide a benchmark for streaming DMA mapping

2020-11-24 Thread Christoph Hellwig
Thanks,

applied to the dma-mapping tree with two trivial cleanups to
map_benchmark_ioctl().
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] WIP! media: uvcvideo: Use dma_alloc_noncontiguos API

2020-11-24 Thread Christoph Hellwig
On Tue, Nov 24, 2020 at 01:01:33PM +0100, Ricardo Ribalda wrote:
> I was hoping that you could answer that question :).
> 
> Do you have other use-cases than linux-media in mind?
> 
> I think Sergey wants to experiment also with vb2, to figure out how
> much it affects it.
> His change will be much more complicated than mine thought, there are
> more cornercases there.

I don't have anything urgend lined up, although I think there are plenty
other potential use cases.

> > Can you respost a combined series to get started?
> 
> Sure. Shall I also include the profiling patch?

That is in the media code, right?  I don't really care too much.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 3/4] iommu: Take lock before reading iommu group default domain type

2020-11-24 Thread Lu Baolu
From: Sai Praneeth Prakhya 

"/sys/kernel/iommu_groups//type" file could be read to find out the
default domain type of an iommu group. The default domain of an iommu group
doesn't change after booting and hence could be read directly. But,
after addding support to dynamically change iommu group default domain, the
above assumption no longer stays valid.

iommu group default domain type could be changed at any time by writing to
"/sys/kernel/iommu_groups//type". So, take group mutex before
reading iommu group default domain type so that the user wouldn't see stale
values or iommu_group_show_type() doesn't try to derefernce stale pointers.

Cc: Christoph Hellwig 
Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Will Deacon 
Cc: Sohil Mehta 
Cc: Robin Murphy 
Cc: Jacob Pan 
Signed-off-by: Sai Praneeth Prakhya 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5243b358b4b8..b4f69fb5e62b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -503,6 +503,7 @@ static ssize_t iommu_group_show_type(struct iommu_group 
*group,
 {
char *type = "unknown\n";
 
+   mutex_lock(>mutex);
if (group->default_domain) {
switch (group->default_domain->type) {
case IOMMU_DOMAIN_BLOCKED:
@@ -519,6 +520,7 @@ static ssize_t iommu_group_show_type(struct iommu_group 
*group,
break;
}
}
+   mutex_unlock(>mutex);
strcpy(buf, type);
 
return strlen(type);
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 4/4] iommu: Document usage of "/sys/kernel/iommu_groups//type" file

2020-11-24 Thread Lu Baolu
From: Sai Praneeth Prakhya 

The default domain type of an iommu group can be changed by writing to
"/sys/kernel/iommu_groups//type" file. Hence, document it's usage
and more importantly spell out its limitations.

Cc: Christoph Hellwig 
Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Will Deacon 
Cc: Sohil Mehta 
Cc: Robin Murphy 
Cc: Jacob Pan 
Signed-off-by: Sai Praneeth Prakhya 
Signed-off-by: Lu Baolu 
---
 .../ABI/testing/sysfs-kernel-iommu_groups | 29 +++
 1 file changed, 29 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups 
b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
index 017f5bc3920c..407b1628d7fd 100644
--- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -33,3 +33,32 @@ Description:In case an RMRR is used only by graphics or 
USB devices
it is now exposed as "direct-relaxable" instead of "direct".
In device assignment use case, for instance, those RMRR
are considered to be relaxable and safe.
+
+What:  /sys/kernel/iommu_groups//type
+Date:  November 2020
+KernelVersion: v5.11
+Contact:   Sai Praneeth Prakhya 
+Description:   /sys/kernel/iommu_groups//type shows the type of default
+   domain in use by iommu for this group. See include/linux/iommu.h
+   for possible values. A privileged user could request kernel to
+   change the group type by writing to this file. Presently, only
+   three types of request are supported:
+   1. DMA: All the DMA transactions from the device in this group
+   are translated by the iommu.
+   2. identity: All the DMA transactions from the device in this
+group are *not* translated by the iommu.
+   3. auto: Change to the type the device was booted with.
+   Note:
+   -
+   The default domain type of a group may be modified only when
+   1. The group has *only* one device
+   2. The device in the group is not bound to any device driver.
+  So, the users must unbind the appropriate driver before
+  changing the default domain type.
+   Caution:
+   
+   Unbinding a device driver will take away the driver's control
+   over the device and if done on devices that host root file
+   system could lead to catastrophic effects (the users might
+   need to reboot the machine to get it to normal state). So, it's
+   expected that the users understand what they're doing.
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 2/4] iommu: Add support to change default domain of an iommu group

2020-11-24 Thread Lu Baolu
From: Sai Praneeth Prakhya 

Presently, the default domain of an iommu group is allocated during boot
time and it cannot be changed later. So, the device would typically be
either in identity (also known as pass_through) mode or the device would be
in DMA mode as long as the machine is up and running. There is no way to
change the default domain type dynamically i.e. after booting, a device
cannot switch between identity mode and DMA mode.

But, assume a use case wherein the user trusts the device and believes that
the OS is secure enough and hence wants *only* this device to bypass IOMMU
(so that it could be high performing) whereas all the other devices to go
through IOMMU (so that the system is protected). Presently, this use case
is not supported. It will be helpful if there is some way to change the
default domain of an iommu group dynamically. Hence, add such support.

A privileged user could request the kernel to change the default domain
type of a iommu group by writing to
"/sys/kernel/iommu_groups//type" file. Presently, only three values
are supported
1. identity: all the DMA transactions from the device in this group are
 *not* translated by the iommu
2. DMA: all the DMA transactions from the device in this group are
translated by the iommu
3. auto: change to the type the device was booted with

Note:
1. Default domain of an iommu group with two or more devices cannot be
   changed.
2. The device in the iommu group shouldn't be bound to any driver.
3. The device shouldn't be assigned to user for direct access.
4. The change request will fail if any device in the group has a mandatory
   default domain type and the requested one conflicts with that.

Please see "Documentation/ABI/testing/sysfs-kernel-iommu_groups" for more
information.

Cc: Christoph Hellwig 
Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Will Deacon 
Cc: Sohil Mehta 
Cc: Robin Murphy 
Cc: Jacob Pan 
Signed-off-by: Sai Praneeth Prakhya 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/iommu.c | 230 +-
 1 file changed, 229 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f8ee0e86b7fb..5243b358b4b8 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -93,6 +93,8 @@ static void __iommu_detach_group(struct iommu_domain *domain,
 static int iommu_create_device_direct_mappings(struct iommu_group *group,
   struct device *dev);
 static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
+static ssize_t iommu_group_store_type(struct iommu_group *group,
+ const char *buf, size_t count);
 
 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)  \
 struct iommu_group_attribute iommu_group_attr_##_name =\
@@ -527,7 +529,8 @@ static IOMMU_GROUP_ATTR(name, S_IRUGO, 
iommu_group_show_name, NULL);
 static IOMMU_GROUP_ATTR(reserved_regions, 0444,
iommu_group_show_resv_regions, NULL);
 
-static IOMMU_GROUP_ATTR(type, 0444, iommu_group_show_type, NULL);
+static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
+   iommu_group_store_type);
 
 static void iommu_group_release(struct kobject *kobj)
 {
@@ -3027,3 +3030,228 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle)
return ops->sva_get_pasid(handle);
 }
 EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
+
+/*
+ * Changes the default domain of an iommu group that has *only* one device
+ *
+ * @group: The group for which the default domain should be changed
+ * @prev_dev: The device in the group (this is used to make sure that the 
device
+ *  hasn't changed after the caller has called this function)
+ * @type: The type of the new default domain that gets associated with the 
group
+ *
+ * Returns 0 on success and error code on failure
+ *
+ * Note:
+ * 1. Presently, this function is called only when user requests to change the
+ *group's default domain type through 
/sys/kernel/iommu_groups//type
+ *Please take a closer look if intended to use for other purposes.
+ */
+static int iommu_change_dev_def_domain(struct iommu_group *group,
+  struct device *prev_dev, int type)
+{
+   struct iommu_domain *prev_dom;
+   struct group_device *grp_dev;
+   int ret, dev_def_dom;
+   struct device *dev;
+
+   if (!group)
+   return -EINVAL;
+
+   mutex_lock(>mutex);
+
+   if (group->default_domain != group->domain) {
+   dev_err_ratelimited(prev_dev, "Group not assigned to default 
domain\n");
+   ret = -EBUSY;
+   goto out;
+   }
+
+   /*
+* iommu group wasn't locked while acquiring device lock in
+* iommu_group_store_type(). So, make sure that the device count hasn't
+* changed while acquiring device lock.
+*
+* Changing default domain of an iommu group with two or more 

[PATCH v10 1/4] iommu: Move def_domain type check for untrusted device into core

2020-11-24 Thread Lu Baolu
So that the vendor iommu drivers are no more required to provide the
def_domain_type callback to always isolate the untrusted devices.

Link: 
https://lore.kernel.org/linux-iommu/243ce89c33fe4b9da4c56ba35aceb...@huawei.com/
Cc: Shameerali Kolothum Thodi 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel/iommu.c |  7 ---
 drivers/iommu/iommu.c   | 16 +++-
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 0233d2438c44..8f51980023a2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2916,13 +2916,6 @@ static int device_def_domain_type(struct device *dev)
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
 
-   /*
-* Prevent any device marked as untrusted from getting
-* placed into the statically identity mapping domain.
-*/
-   if (pdev->untrusted)
-   return IOMMU_DOMAIN_DMA;
-
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && 
IS_AZALIA(pdev))
return IOMMU_DOMAIN_IDENTITY;
 
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5236c4e4dd4c..f8ee0e86b7fb 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1462,12 +1462,14 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group);
 static int iommu_get_def_domain_type(struct device *dev)
 {
const struct iommu_ops *ops = dev->bus->iommu_ops;
-   unsigned int type = 0;
+
+   if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
+   return IOMMU_DOMAIN_DMA;
 
if (ops->def_domain_type)
-   type = ops->def_domain_type(dev);
+   return ops->def_domain_type(dev);
 
-   return (type == 0) ? iommu_def_domain_type : type;
+   return 0;
 }
 
 static int iommu_group_alloc_default_domain(struct bus_type *bus,
@@ -1509,7 +1511,7 @@ static int iommu_alloc_default_domain(struct iommu_group 
*group,
if (group->default_domain)
return 0;
 
-   type = iommu_get_def_domain_type(dev);
+   type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
 
return iommu_group_alloc_default_domain(dev->bus, group, type);
 }
@@ -1647,12 +1649,8 @@ struct __group_domain_type {
 
 static int probe_get_default_domain_type(struct device *dev, void *data)
 {
-   const struct iommu_ops *ops = dev->bus->iommu_ops;
struct __group_domain_type *gtype = data;
-   unsigned int type = 0;
-
-   if (ops->def_domain_type)
-   type = ops->def_domain_type(dev);
+   unsigned int type = iommu_get_def_domain_type(dev);
 
if (type) {
if (gtype->type && gtype->type != type) {
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 0/4] iommu: Add support to change default domain of an iommu group

2020-11-24 Thread Lu Baolu
Hi,

The last post of this series:

https://lore.kernel.org/linux-iommu/20201121135620.3496419-1-baolu...@linux.intel.com/

Change log in this series:
 1. Changes according to comments at
https://lore.kernel.org/linux-iommu/20201123120449.GB10233@willie-the-truck/
- Remove the unnecessary iommu_get_mandatory_def_domain_type()

Best regards,
baolu

Lu Baolu (1):
  iommu: Move def_domain type check for untrusted device into core

Sai Praneeth Prakhya (3):
  iommu: Add support to change default domain of an iommu group
  iommu: Take lock before reading iommu group default domain type
  iommu: Document usage of "/sys/kernel/iommu_groups//type" file

 .../ABI/testing/sysfs-kernel-iommu_groups |  29 ++
 drivers/iommu/intel/iommu.c   |   7 -
 drivers/iommu/iommu.c | 248 +-
 3 files changed, 267 insertions(+), 17 deletions(-)

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] WIP! media: uvcvideo: Use dma_alloc_noncontiguos API

2020-11-24 Thread Ricardo Ribalda
HI Christoph

On Tue, Nov 24, 2020 at 12:35 PM Christoph Hellwig  wrote:
>
> On Wed, Nov 18, 2020 at 03:25:46PM +0100, Ricardo Ribalda wrote:
> > On architectures where the is no coherent caching such as ARM use the
> > dma_alloc_noncontiguos API and handle manually the cache flushing using
> > dma_sync_single().
> >
> > With this patch on the affected architectures we can measure up to 20x
> > performance improvement in uvc_video_copy_data_work().
>
> This has a bunch of crazy long lines, but otherwise looks fine to me.

That is easy to solve :)

https://github.com/ribalda/linux/commit/17ab65a08302e845ad7ae7775ce54b387a58a887

>
> >
> > Signed-off-by: Ricardo Ribalda 
> > ---
> >
> > This patch depends on dma_alloc_contiguous API1315351diffmboxseries
>
> How do we want to proceed?  Do the media maintainers want to pick up
> that patch?  Should I pick up the media patch in the dma-mapping tree?

I was hoping that you could answer that question :).

Do you have other use-cases than linux-media in mind?

I think Sergey wants to experiment also with vb2, to figure out how
much it affects it.
His change will be much more complicated than mine thought, there are
more cornercases there.

>
> Can you respost a combined series to get started?

Sure. Shall I also include the profiling patch?


Best regards
-- 
Ricardo Ribalda
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] [PATCH] Adding offset keeping option when mapping data via SWIOTLB.

2020-11-24 Thread Christoph Hellwig
On Mon, Nov 23, 2020 at 02:18:07PM -0800, Jianxiong Gao wrote:
> NVMe driver and other applications may depend on the data offset
> to operate correctly. Currently when unaligned data is mapped via
> SWIOTLB, the data is mapped as slab aligned with the SWIOTLB. When
> booting with --swiotlb=force option and using NVMe as interface,
> running mkfs.xfs on Rhel fails because of the unalignment issue.
> This patch adds an option to make sure the mapped data preserves
> its offset of the orginal addrss. Tested on latest kernel that
> this patch fixes the issue.
> 
> Signed-off-by: Jianxiong Gao 
> Acked-by: David Rientjes 

I think we actually need to do this by default.  There are plenty
of other hardware designs that rely on dma mapping not adding
offsets that did not exist, e.g. ahci and various RDMA NICs.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] WIP! media: uvcvideo: Use dma_alloc_noncontiguos API

2020-11-24 Thread Christoph Hellwig
On Wed, Nov 18, 2020 at 03:25:46PM +0100, Ricardo Ribalda wrote:
> On architectures where the is no coherent caching such as ARM use the
> dma_alloc_noncontiguos API and handle manually the cache flushing using
> dma_sync_single().
> 
> With this patch on the affected architectures we can measure up to 20x
> performance improvement in uvc_video_copy_data_work().

This has a bunch of crazy long lines, but otherwise looks fine to me.

> 
> Signed-off-by: Ricardo Ribalda 
> ---
> 
> This patch depends on dma_alloc_contiguous API1315351diffmboxseries

How do we want to proceed?  Do the media maintainers want to pick up
that patch?  Should I pick up the media patch in the dma-mapping tree?

Can you respost a combined series to get started?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCHv8 0/8] System Cache support for GPU and required SMMU support

2020-11-24 Thread Will Deacon
On Tue, Nov 24, 2020 at 09:32:54AM +0530, Sai Prakash Ranjan wrote:
> On 2020-11-24 00:52, Rob Clark wrote:
> > On Mon, Nov 23, 2020 at 9:01 AM Sai Prakash Ranjan
> >  wrote:
> > > 
> > > On 2020-11-23 20:51, Will Deacon wrote:
> > > > On Tue, Nov 17, 2020 at 08:00:39PM +0530, Sai Prakash Ranjan wrote:
> > > >> Some hardware variants contain a system cache or the last level
> > > >> cache(llc). This cache is typically a large block which is shared
> > > >> by multiple clients on the SOC. GPU uses the system cache to cache
> > > >> both the GPU data buffers(like textures) as well the SMMU pagetables.
> > > >> This helps with improved render performance as well as lower power
> > > >> consumption by reducing the bus traffic to the system memory.
> > > >>
> > > >> The system cache architecture allows the cache to be split into slices
> > > >> which then be used by multiple SOC clients. This patch series is an
> > > >> effort to enable and use two of those slices preallocated for the GPU,
> > > >> one for the GPU data buffers and another for the GPU SMMU hardware
> > > >> pagetables.
> > > >>
> > > >> Patch 1 - Patch 6 adds system cache support in SMMU and GPU driver.
> > > >> Patch 7 and 8 are minor cleanups for arm-smmu impl.
> > > >>
> > > >> Changes in v8:
> > > >>  * Introduce a generic domain attribute for pagetable config (Will)
> > > >>  * Rename quirk to more generic IO_PGTABLE_QUIRK_ARM_OUTER_WBWA (Will)
> > > >>  * Move non-strict mode to use new struct domain_attr_io_pgtbl_config
> > > >> (Will)
> > > >
> > > > Modulo some minor comments I've made, this looks good to me. What is
> > > > the
> > > > plan for merging it? I can take the IOMMU parts, but patches 4-6 touch
> > > > the
> > > > MSM GPU driver and I'd like to avoid conflicts with that.
> > > >
> > > 
> > > SMMU bits are pretty much independent and GPU relies on the domain
> > > attribute
> > > and the quirk exposed, so as long as SMMU changes go in first it
> > > should
> > > be good.
> > > Rob?
> > 
> > I suppose one option would be to split out the patch that adds the
> > attribute into it's own patch, and merge that both thru drm and iommu?
> > 
> 
> Ok I can split out domain attr and quirk into its own patch if Will is
> fine with that approach.

Why don't I just queue the first two patches on their own branch and we
both pull that?

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: Improve the performance for direct_mapping

2020-11-24 Thread Will Deacon
On Tue, Nov 24, 2020 at 05:24:44PM +0800, Yong Wu wrote:
> On Mon, 2020-11-23 at 12:32 +, Will Deacon wrote:
> > On Fri, Nov 20, 2020 at 05:06:28PM +0800, Yong Wu wrote:
> > > + unmapped_sz = 0;
> > > + }
> > > + start += pg_size;
> > > + }
> > > + if (unmapped_sz) {
> > > + ret = iommu_map(domain, start, start, unmapped_sz,
> > > + entry->prot);
> > 
> > Can you avoid this hunk by changing your loop check to something like:
> > 
> > if (!phys_addr) {
> > map_size += pg_size;
> > if (addr + pg_size < end)
> > continue;
> > }
> 
> Thanks for your quick review. I have fixed and tested it. the patch is
> simple. I copy it here. Is this readable for you now?
> 
> 
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -737,6 +737,7 @@ static int
> iommu_create_device_direct_mappings(struct iommu_group *group,
>   /* We need to consider overlapping regions for different devices */
>   list_for_each_entry(entry, , list) {
>   dma_addr_t start, end, addr;
> + size_t map_size = 0;
>  
>   if (domain->ops->apply_resv_region)
>   domain->ops->apply_resv_region(dev, domain, entry);
> @@ -752,12 +753,21 @@ static int
> iommu_create_device_direct_mappings(struct iommu_group *group,
>   phys_addr_t phys_addr;
>  
>   phys_addr = iommu_iova_to_phys(domain, addr);
> - if (phys_addr)
> - continue;
> + if (!phys_addr) {
> + map_size += pg_size;
> + if (addr + pg_size < end)
> + continue;
> + else

You don't need the 'else' here  ^^^

> + addr += pg_size; /*Point to End */

addr = end ?

That said, maybe we could simplify this further by changing the loop bounds
to be:

for (addr = start; addr <= end; addr += pg_size)

and checking:

if (!phys_addr && addr != end) {
map_size += pg_size;
continue;
}

does that work?

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-11-24 Thread Kalra, Ashish


> On Nov 24, 2020, at 3:04 AM, Borislav Petkov  wrote:
> 
> On Mon, Nov 23, 2020 at 10:56:31PM +, Ashish Kalra wrote:
>> As i mentioned earlier, the patch was initially based on using a % of
>> guest memory,
> 
> Can you figure out how much the guest memory is and then allocate a
> percentage?

But what will be the criteria to figure out this percentage?

As I mentioned earlier, this can be made as complicated as possible by adding 
all kind of heuristics but without any predictable performance gain.

Or it can be kept simple by using a static percentage value.

Thanks,
Ashish

> -- 
> Regards/Gruss,
>Boris.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquettedata=04%7C01%7Cashish.kalra%40amd.com%7C0766422bcee64d2eb57208d89057f620%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637418054797950694%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=%2FEFuRGOMOu4BZUkPOd9rxam%2BBA3nXj4tdRFFj3nQ47U%3Dreserved=0
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-11-24 Thread Kalra, Ashish


> On Nov 24, 2020, at 3:38 AM, Borislav Petkov  wrote:
> 
> On Tue, Nov 24, 2020 at 09:25:06AM +, Kalra, Ashish wrote:
>> But what will be the criteria to figure out this percentage?
>> 
>> As I mentioned earlier, this can be made as complicated as possible by
>> adding all kind of heuristics but without any predictable performance
>> gain.
>> 
>> Or it can be kept simple by using a static percentage value.
> 
> Yes, static percentage number based on the guest memory. X% of the guest
> memory is used for SWIOTLB.
> 
> Since you use sev_active(), it means the size computation is done in the
> guest so that SWIOTLB size is per-guest. Yes?

Yes

> 
> If so, you can simply take, say, 5% of the guest memory's size and use
> that for SWIOTLB buffers. Or 6 or X or whatever.
> 
> Makes sense?

Sure it does.

Thanks,
Ashish

> 
> -- 
> Regards/Gruss,
>Boris.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquettedata=04%7C01%7CAshish.Kalra%40amd.com%7C91b611b21d3049d70ca908d8905cbc37%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637418075284000564%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=JvnUfskyd9xdsal4oYkSYW5ouL2b4cs%2Fo2oYi9KrkFo%3Dreserved=0
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-11-24 Thread Borislav Petkov
On Tue, Nov 24, 2020 at 09:25:06AM +, Kalra, Ashish wrote:
> But what will be the criteria to figure out this percentage?
>
> As I mentioned earlier, this can be made as complicated as possible by
> adding all kind of heuristics but without any predictable performance
> gain.
>
> Or it can be kept simple by using a static percentage value.

Yes, static percentage number based on the guest memory. X% of the guest
memory is used for SWIOTLB.

Since you use sev_active(), it means the size computation is done in the
guest so that SWIOTLB size is per-guest. Yes?

If so, you can simply take, say, 5% of the guest memory's size and use
that for SWIOTLB buffers. Or 6 or X or whatever.

Makes sense?

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: Improve the performance for direct_mapping

2020-11-24 Thread Yong Wu
On Mon, 2020-11-23 at 12:32 +, Will Deacon wrote:
> On Fri, Nov 20, 2020 at 05:06:28PM +0800, Yong Wu wrote:
> > Currently direct_mapping always use the smallest pgsize which is SZ_4K
> > normally to mapping. This is unnecessary. we could gather the size, and
> > call iommu_map then, iommu_map could decide how to map better with the
> > just right pgsize.
> > 
> > From the original comment, we should take care overlap, otherwise,
> > iommu_map may return -EEXIST. In this overlap case, we should map the
> > previous region before overlap firstly. then map the left part.
> > 
> > Each a iommu device will call this direct_mapping when its iommu
> > initialize, This patch is effective to improve the boot/initialization
> > time especially while it only needs level 1 mapping.
> > 
> > Signed-off-by: Anan Sun 
> > Signed-off-by: Yong Wu 
> > ---
> >  drivers/iommu/iommu.c | 20 ++--
> >  1 file changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index df87c8e825f7..854a8fcb928d 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -737,6 +737,7 @@ static int iommu_create_device_direct_mappings(struct 
> > iommu_group *group,
> > /* We need to consider overlapping regions for different devices */
> > list_for_each_entry(entry, , list) {
> > dma_addr_t start, end, addr;
> > +   size_t unmapped_sz = 0;
> 
> I think "unmapped" is the wrong word here, as this variable actually
> represents the amount we want to map! I suggest "map_size" instead.
> 
> > if (domain->ops->apply_resv_region)
> > domain->ops->apply_resv_region(dev, domain, entry);
> > @@ -752,10 +753,25 @@ static int iommu_create_device_direct_mappings(struct 
> > iommu_group *group,
> > phys_addr_t phys_addr;
> >  
> > phys_addr = iommu_iova_to_phys(domain, addr);
> > -   if (phys_addr)
> > +   if (phys_addr == 0) {
> > +   unmapped_sz += pg_size; /* Gather the size. */
> > continue;
> > +   }
> >  
> > -   ret = iommu_map(domain, addr, addr, pg_size, 
> > entry->prot);
> > +   if (unmapped_sz) {
> > +   /* Map the region before the overlap. */
> > +   ret = iommu_map(domain, start, start,
> > +   unmapped_sz, entry->prot);
> > +   if (ret)
> > +   goto out;
> > +   start += unmapped_sz;
> 
> I think it's a bit confusing to update start like this. Can we call
> iommu_map(domain, addr - map_size, addr - map_size, map_size, entry->prot)
> instead?
> 
> > +   unmapped_sz = 0;
> > +   }
> > +   start += pg_size;
> > +   }
> > +   if (unmapped_sz) {
> > +   ret = iommu_map(domain, start, start, unmapped_sz,
> > +   entry->prot);
> 
> Can you avoid this hunk by changing your loop check to something like:
> 
>   if (!phys_addr) {
>   map_size += pg_size;
>   if (addr + pg_size < end)
>   continue;
>   }

Thanks for your quick review. I have fixed and tested it. the patch is
simple. I copy it here. Is this readable for you now?


--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -737,6 +737,7 @@ static int
iommu_create_device_direct_mappings(struct iommu_group *group,
/* We need to consider overlapping regions for different devices */
list_for_each_entry(entry, , list) {
dma_addr_t start, end, addr;
+   size_t map_size = 0;
 
if (domain->ops->apply_resv_region)
domain->ops->apply_resv_region(dev, domain, entry);
@@ -752,12 +753,21 @@ static int
iommu_create_device_direct_mappings(struct iommu_group *group,
phys_addr_t phys_addr;
 
phys_addr = iommu_iova_to_phys(domain, addr);
-   if (phys_addr)
-   continue;
+   if (!phys_addr) {
+   map_size += pg_size;
+   if (addr + pg_size < end)
+   continue;
+   else
+   addr += pg_size; /*Point to End */
+   }
 
-   ret = iommu_map(domain, addr, addr, pg_size, 
entry->prot);
-   if (ret)
-   goto out;
+   if (map_size) {
+   ret = iommu_map(domain, addr - map_size, addr - 
map_size,
+   map_size, entry->prot);
+

Re: [PATCH v6] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-11-24 Thread Borislav Petkov
On Mon, Nov 23, 2020 at 01:43:27PM -0500, Konrad Rzeszutek Wilk wrote:
> I am assuming that TDX is going to have the same exact issue that 
> AMD SEV will have.
> 
> Are you recommending to have an unified x86 specific callback
> where we check if it:
> 
>  - CPUID_AMD_SEV or CPUID_INTEL_TDX is set, and
>  - No vIOMMU present, then we adjust the size?

I'm thinking do it correct right now and when TDX appears on the horizon
requesting this adjusted to TDX, then change it. Like we always do.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v11 08/13] vfio/pci: Add framework for custom interrupt indices

2020-11-24 Thread Auger Eric
Hi Shameer, Qubingbing
On 11/23/20 1:51 PM, Shameerali Kolothum Thodi wrote:
> Hi Eric,
> 
>> -Original Message-
>> From: Eric Auger [mailto:eric.au...@redhat.com]
>> Sent: 16 November 2020 11:00
>> To: eric.auger@gmail.com; eric.au...@redhat.com;
>> iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org;
>> k...@vger.kernel.org; kvm...@lists.cs.columbia.edu; w...@kernel.org;
>> j...@8bytes.org; m...@kernel.org; robin.mur...@arm.com;
>> alex.william...@redhat.com
>> Cc: jean-phili...@linaro.org; zhangfei@linaro.org;
>> zhangfei@gmail.com; vivek.gau...@arm.com; Shameerali Kolothum
>> Thodi ;
>> jacob.jun@linux.intel.com; yi.l@intel.com; t...@semihalf.com;
>> nicoleots...@gmail.com; yuzenghui 
>> Subject: [PATCH v11 08/13] vfio/pci: Add framework for custom interrupt
>> indices
>>
>> Implement IRQ capability chain infrastructure. All interrupt
>> indexes beyond VFIO_PCI_NUM_IRQS are handled as extended
>> interrupts. They are registered with a specific type/subtype
>> and supported flags.
>>
>> Signed-off-by: Eric Auger 
>> ---
>>  drivers/vfio/pci/vfio_pci.c | 99 +++--
>>  drivers/vfio/pci/vfio_pci_intrs.c   | 62 ++
>>  drivers/vfio/pci/vfio_pci_private.h | 14 
>>  3 files changed, 157 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>> index 2a6cc1a87323..93e03a4a5f32 100644
>> --- a/drivers/vfio/pci/vfio_pci.c
>> +++ b/drivers/vfio/pci/vfio_pci.c
>> @@ -608,6 +608,14 @@ static void vfio_pci_disable(struct vfio_pci_device
>> *vdev)
>>
>>  WARN_ON(iommu_unregister_device_fault_handler(>pdev->dev));
>>
>> +for (i = 0; i < vdev->num_ext_irqs; i++)
>> +vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
>> +VFIO_IRQ_SET_ACTION_TRIGGER,
>> +VFIO_PCI_NUM_IRQS + i, 0, 0, NULL);
>> +vdev->num_ext_irqs = 0;
>> +kfree(vdev->ext_irqs);
>> +vdev->ext_irqs = NULL;
>> +
>>  /* Device closed, don't need mutex here */
>>  list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
>>   >ioeventfds_list, next) {
>> @@ -823,6 +831,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device
>> *vdev, int irq_type)
>>  return 1;
>>  } else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
>>  return 1;
>> +} else if (irq_type >= VFIO_PCI_NUM_IRQS &&
>> +   irq_type < VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs) {
>> +return 1;
>>  }
>>
>>  return 0;
>> @@ -1008,7 +1019,7 @@ static long vfio_pci_ioctl(void *device_data,
>>  info.flags |= VFIO_DEVICE_FLAGS_RESET;
>>
>>  info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
>> -info.num_irqs = VFIO_PCI_NUM_IRQS;
>> +info.num_irqs = VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs;
>>
>>  if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
>>  int ret = vfio_pci_info_zdev_add_caps(vdev, );
>> @@ -1187,36 +1198,87 @@ static long vfio_pci_ioctl(void *device_data,
>>
>>  } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
>>  struct vfio_irq_info info;
>> +struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
>> +unsigned long capsz;
>>
>>  minsz = offsetofend(struct vfio_irq_info, count);
>>
>> +/* For backward compatibility, cannot require this */
>> +capsz = offsetofend(struct vfio_irq_info, cap_offset);
>> +
>>  if (copy_from_user(, (void __user *)arg, minsz))
>>  return -EFAULT;
>>
>> -if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
>> +if (info.argsz < minsz ||
>> +info.index >= VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs)
>>  return -EINVAL;
>>
>> -switch (info.index) {
>> -case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
>> -case VFIO_PCI_REQ_IRQ_INDEX:
>> -break;
>> -case VFIO_PCI_ERR_IRQ_INDEX:
>> -if (pci_is_pcie(vdev->pdev))
>> -break;
>> -fallthrough;
>> -default:
>> -return -EINVAL;
>> -}
>> +if (info.argsz >= capsz)
>> +minsz = capsz;
>>
>>  info.flags = VFIO_IRQ_INFO_EVENTFD;
>>
>> -info.count = vfio_pci_get_irq_count(vdev, info.index);
>> -
>> -if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
>> +switch (info.index) {
>> +case VFIO_PCI_INTX_IRQ_INDEX:
>>  info.flags |= (VFIO_IRQ_INFO_MASKABLE |
>> VFIO_IRQ_INFO_AUTOMASKED);
>> -else
>> +break;
>> +case VFIO_PCI_MSI_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
>> +

[PATCH v6 7/7] iommu/vt-d: Cleanup after converting to dma-iommu ops

2020-11-24 Thread Lu Baolu
Some cleanups after converting the driver to use dma-iommu ops.
- Remove nobounce option;
- Cleanup and simplify the path in domain mapping.

Signed-off-by: Lu Baolu 
Tested-by: Logan Gunthorpe 
---
 .../admin-guide/kernel-parameters.txt |  5 --
 drivers/iommu/intel/iommu.c   | 90 ++-
 2 files changed, 28 insertions(+), 67 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 526d65d8573a..76b2a2063fd0 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1883,11 +1883,6 @@
Note that using this option lowers the security
provided by tboot because it makes the system
vulnerable to DMA attacks.
-   nobounce [Default off]
-   Disable bounce buffer for untrusted devices such as
-   the Thunderbolt devices. This will treat the untrusted
-   devices as the trusted ones, hence might expose security
-   risks of DMA attacks.
 
intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
0   disables intel_idle and fall back on acpi_idle.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 09003abf3bbb..6d336e59851b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -355,7 +355,6 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
-static int intel_no_bounce;
 static int iommu_skip_te_disable;
 
 #define IDENTMAP_GFX   2
@@ -457,9 +456,6 @@ static int __init intel_iommu_setup(char *str)
} else if (!strncmp(str, "tboot_noforce", 13)) {
pr_info("Intel-IOMMU: not forcing on after tboot. This 
could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
-   } else if (!strncmp(str, "nobounce", 8)) {
-   pr_info("Intel-IOMMU: No bounce buffer. This could 
expose security risks of DMA attacks\n");
-   intel_no_bounce = 1;
}
 
str += strcspn(str, ",");
@@ -2277,15 +2273,14 @@ static inline int hardware_largepage_caps(struct 
dmar_domain *domain,
return level;
 }
 
-static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
-   struct scatterlist *sg, unsigned long phys_pfn,
-   unsigned long nr_pages, int prot)
+static int
+__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+unsigned long phys_pfn, unsigned long nr_pages, int prot)
 {
struct dma_pte *first_pte = NULL, *pte = NULL;
-   phys_addr_t pteval;
-   unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+   phys_addr_t pteval;
u64 attr;
 
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
@@ -2297,26 +2292,14 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
if (domain_use_first_level(domain))
attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
 
-   if (!sg) {
-   sg_res = nr_pages;
-   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
-   }
+   pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
 
while (nr_pages > 0) {
uint64_t tmp;
 
-   if (!sg_res) {
-   unsigned int pgoff = sg->offset & ~PAGE_MASK;
-
-   sg_res = aligned_nrpages(sg->offset, sg->length);
-   sg->dma_address = ((dma_addr_t)iov_pfn << 
VTD_PAGE_SHIFT) + pgoff;
-   sg->dma_length = sg->length;
-   pteval = (sg_phys(sg) - pgoff) | attr;
-   phys_pfn = pteval >> VTD_PAGE_SHIFT;
-   }
-
if (!pte) {
-   largepage_lvl = hardware_largepage_caps(domain, 
iov_pfn, phys_pfn, sg_res);
+   largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
+   phys_pfn, nr_pages);
 
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, 
_lvl);
if (!pte)
@@ -2328,7 +2311,7 @@ static int __domain_mapping(struct dmar_domain *domain, 
unsigned long iov_pfn,
pteval |= DMA_PTE_LARGE_PAGE;
lvl_pages = lvl_to_nr_pages(largepage_lvl);
 
-   nr_superpages = sg_res / lvl_pages;
+   nr_superpages = nr_pages / lvl_pages;
end_pfn = iov_pfn + nr_superpages * lvl_pages - 
1;
 
/*

[PATCH v6 6/7] iommu/vt-d: Convert intel iommu driver to the iommu ops

2020-11-24 Thread Lu Baolu
From: Tom Murphy 

Convert the intel iommu driver to the dma-iommu api. Remove the iova
handling and reserve region code from the intel iommu driver.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
Tested-by: Logan Gunthorpe 
---
 drivers/iommu/intel/Kconfig |   1 +
 drivers/iommu/intel/iommu.c | 745 ++--
 2 files changed, 43 insertions(+), 703 deletions(-)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 5337ee1584b0..28a3d1596c76 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -13,6 +13,7 @@ config INTEL_IOMMU
select DMAR_TABLE
select SWIOTLB
select IOASID
+   select IOMMU_DMA
help
  DMA remapping (DMAR) devices support enables independent address
  translations for Direct Memory Access (DMA) from devices.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5f7d8148f1d6..09003abf3bbb 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -41,7 +42,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -382,9 +382,6 @@ struct device_domain_info *get_domain_info(struct device 
*dev)
 DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
-#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&   \
-   to_pci_dev(d)->untrusted)
-
 /*
  * Iterate over elements in device_domain_list and call the specified
  * callback @fn against each element.
@@ -1289,13 +1286,6 @@ static void dma_free_pagelist(struct page *freelist)
}
 }
 
-static void iova_entry_free(unsigned long data)
-{
-   struct page *freelist = (struct page *)data;
-
-   dma_free_pagelist(freelist);
-}
-
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1660,19 +1650,17 @@ static inline void __mapping_notify_one(struct 
intel_iommu *iommu,
iommu_flush_write_buffer(iommu);
 }
 
-static void iommu_flush_iova(struct iova_domain *iovad)
+static void intel_flush_iotlb_all(struct iommu_domain *domain)
 {
-   struct dmar_domain *domain;
+   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
int idx;
 
-   domain = container_of(iovad, struct dmar_domain, iovad);
-
-   for_each_domain_iommu(idx, domain) {
+   for_each_domain_iommu(idx, dmar_domain) {
struct intel_iommu *iommu = g_iommus[idx];
-   u16 did = domain->iommu_did[iommu->seq_id];
+   u16 did = dmar_domain->iommu_did[iommu->seq_id];
 
-   if (domain_use_first_level(domain))
-   domain_flush_piotlb(iommu, domain, 0, -1, 0);
+   if (domain_use_first_level(dmar_domain))
+   domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
else
iommu->flush.flush_iotlb(iommu, did, 0, 0,
 DMA_TLB_DSI_FLUSH);
@@ -1954,48 +1942,6 @@ static int domain_detach_iommu(struct dmar_domain 
*domain,
return count;
 }
 
-static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_rbtree_key;
-
-static int dmar_init_reserved_ranges(void)
-{
-   struct pci_dev *pdev = NULL;
-   struct iova *iova;
-   int i;
-
-   init_iova_domain(_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
-
-   lockdep_set_class(_iova_list.iova_rbtree_lock,
-   _rbtree_key);
-
-   /* IOAPIC ranges shouldn't be accessed by DMA */
-   iova = reserve_iova(_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
-   IOVA_PFN(IOAPIC_RANGE_END));
-   if (!iova) {
-   pr_err("Reserve IOAPIC range failed\n");
-   return -ENODEV;
-   }
-
-   /* Reserve all PCI MMIO to avoid peer-to-peer access */
-   for_each_pci_dev(pdev) {
-   struct resource *r;
-
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   r = >resource[i];
-   if (!r->flags || !(r->flags & IORESOURCE_MEM))
-   continue;
-   iova = reserve_iova(_iova_list,
-   IOVA_PFN(r->start),
-   IOVA_PFN(r->end));
-   if (!iova) {
-   pci_err(pdev, "Reserve iova for %pR failed\n", 
r);
-   return -ENODEV;
-   }
-   }
-   }
-   return 0;
-}
-
 static inline int guestwidth_to_adjustwidth(int gaw)
 {
int agaw;
@@ -2018,7 +1964,7 @@ static void domain_exit(struct dmar_domain *domain)
 
/* destroy iovas */
if (domain->domain.type == IOMMU_DOMAIN_DMA)
-   put_iova_domain(>iovad);
+   

[PATCH v6 5/7] iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev

2020-11-24 Thread Lu Baolu
The iommu-dma constrains IOVA allocation based on the domain geometry
that the driver reports. Update domain geometry everytime a domain is
attached to or detached from a device.

Signed-off-by: Lu Baolu 
Tested-by: Logan Gunthorpe 
---
 drivers/iommu/intel/iommu.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 414b4321fb61..5f7d8148f1d6 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -67,8 +67,8 @@
 #define MAX_AGAW_WIDTH 64
 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
 
-#define __DOMAIN_MAX_PFN(gaw)  uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << gaw) - 1)
+#define __DOMAIN_MAX_PFN(gaw)  uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 
1)
+#define __DOMAIN_MAX_ADDR(gaw) uint64_t)1) << (gaw)) - 1)
 
 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
to match. That way, we can use 'unsigned long' for PFNs with impunity. */
@@ -739,6 +739,18 @@ static void domain_update_iommu_cap(struct dmar_domain 
*domain)
 */
if (domain->nid == NUMA_NO_NODE)
domain->nid = domain_update_device_node(domain);
+
+   /*
+* First-level translation restricts the input-address to a
+* canonical address (i.e., address bits 63:N have the same
+* value as address bit [N-1], where N is 48-bits with 4-level
+* paging and 57-bits with 5-level paging). Hence, skip bit
+* [N-1].
+*/
+   if (domain_use_first_level(domain))
+   domain->domain.geometry.aperture_end = 
__DOMAIN_MAX_ADDR(domain->gaw - 1);
+   else
+   domain->domain.geometry.aperture_end = 
__DOMAIN_MAX_ADDR(domain->gaw);
 }
 
 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 4/7] iommu: Add quirk for Intel graphic devices in map_sg

2020-11-24 Thread Lu Baolu
Combining the sg segments exposes a bug in the Intel i915 driver which
causes visual artifacts and the screen to freeze. This is most likely
because of how the i915 handles the returned list. It probably doesn't
respect the returned value specifying the number of elements in the list
and instead depends on the previous behaviour of the Intel iommu driver
which would return the same number of elements in the output list as in
the input list.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
Tested-by: Logan Gunthorpe 
---
 drivers/iommu/dma-iommu.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 83eb99bfb990..5f49ed653f98 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -878,6 +878,33 @@ static int __finalise_sg(struct device *dev, struct 
scatterlist *sg, int nents,
unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
int i, count = 0;
 
+   /*
+* The Intel graphic driver is used to assume that the returned
+* sg list is not combound. This blocks the efforts of converting
+* Intel IOMMU driver to dma-iommu api's. Add this quirk to make the
+* device driver work and should be removed once it's fixed in i915
+* driver.
+*/
+   if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) &&
+   to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL &&
+   (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+   for_each_sg(sg, s, nents, i) {
+   unsigned int s_iova_off = sg_dma_address(s);
+   unsigned int s_length = sg_dma_len(s);
+   unsigned int s_iova_len = s->length;
+
+   s->offset += s_iova_off;
+   s->length = s_length;
+   sg_dma_address(s) = dma_addr + s_iova_off;
+   sg_dma_len(s) = s_length;
+   dma_addr += s_iova_len;
+
+   pr_info_once("sg combining disabled due to i915 
driver\n");
+   }
+
+   return nents;
+   }
+
for_each_sg(sg, s, nents, i) {
/* Restore this segment's original unaligned fields first */
unsigned int s_iova_off = sg_dma_address(s);
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 3/7] iommu: Allow the dma-iommu api to use bounce buffers

2020-11-24 Thread Lu Baolu
From: Tom Murphy 

Allow the dma-iommu api to use bounce buffers for untrusted devices.
This is a copy of the intel bounce buffer code.

Signed-off-by: Tom Murphy 
Co-developed-by: Lu Baolu 
Signed-off-by: Lu Baolu 
Tested-by: Logan Gunthorpe 
---
 drivers/iommu/dma-iommu.c | 162 +++---
 1 file changed, 149 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index de521e22bafb..83eb99bfb990 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -20,9 +20,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 struct iommu_dma_msi_page {
struct list_headlist;
@@ -499,6 +501,31 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
+static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
+   size_t size, enum dma_data_direction dir,
+   unsigned long attrs)
+{
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   phys_addr_t phys;
+
+   phys = iommu_iova_to_phys(domain, dma_addr);
+   if (WARN_ON(!phys))
+   return;
+
+   __iommu_dma_unmap(dev, dma_addr, size);
+
+   if (unlikely(is_swiotlb_buffer(phys)))
+   swiotlb_tbl_unmap_single(dev, phys, size,
+   iova_align(iovad, size), dir, attrs);
+}
+
+static bool dev_is_untrusted(struct device *dev)
+{
+   return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+}
+
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -524,6 +551,54 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
+static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
+   size_t org_size, dma_addr_t dma_mask, bool coherent,
+   enum dma_data_direction dir, unsigned long attrs)
+{
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = org_size;
+   void *padding_start;
+   size_t padding_size;
+   dma_addr_t iova;
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | org_size)) {
+   aligned_size = iova_align(iovad, org_size);
+   phys = swiotlb_tbl_map_single(dev, phys, org_size,
+ aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+   return DMA_MAPPING_ERROR;
+
+   /* Cleanup the padding area. */
+   padding_start = phys_to_virt(phys);
+   padding_size = aligned_size;
+
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+   (dir == DMA_TO_DEVICE ||
+dir == DMA_BIDIRECTIONAL)) {
+   padding_start += org_size;
+   padding_size -= org_size;
+   }
+
+   memset(padding_start, 0, padding_size);
+   }
+
+   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+   swiotlb_tbl_unmap_single(dev, phys, org_size,
+   aligned_size, dir, attrs);
+
+   return iova;
+}
+
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -697,11 +772,15 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   arch_sync_dma_for_cpu(phys, size, dir);
+   if (!dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
+   if (is_swiotlb_buffer(phys))
+   swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
 }
 
 static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -709,11 +788,15 @@ static void iommu_dma_sync_single_for_device(struct 
device *dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev))
+   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   

[PATCH v6 2/7] iommu: Add iommu_dma_free_cpu_cached_iovas()

2020-11-24 Thread Lu Baolu
From: Tom Murphy 

Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which
use the dma-iommu ops to free cached cpu iovas.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
Tested-by: Logan Gunthorpe 
---
 drivers/iommu/dma-iommu.c | 9 +
 include/linux/dma-iommu.h | 8 
 2 files changed, 17 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9c827a4d2207..de521e22bafb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -49,6 +49,15 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+
+   free_cpu_cached_iovas(cpu, iovad);
+}
+
 static void iommu_dma_entry_dtor(unsigned long data)
 {
struct page *freelist = (struct page *)data;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 2112f21f73d8..706b68d1359b 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc,
 
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
+void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain);
+
 #else /* CONFIG_IOMMU_DMA */
 
 struct iommu_domain;
@@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device 
*dev, struct list_he
 {
 }
 
+static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
+   struct iommu_domain *domain)
+{
+}
+
 #endif /* CONFIG_IOMMU_DMA */
 #endif /* __DMA_IOMMU_H */
-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 1/7] iommu: Handle freelists when using deferred flushing in iommu drivers

2020-11-24 Thread Lu Baolu
From: Tom Murphy 

Allow the iommu_unmap_fast to return newly freed page table pages and
pass the freelist to queue_iova in the dma-iommu ops path.

This is useful for iommu drivers (in this case the intel iommu driver)
which need to wait for the ioTLB to be flushed before newly
free/unmapped page table pages can be freed. This way we can still batch
ioTLB free operations and handle the freelists.

Signed-off-by: Tom Murphy 
Signed-off-by: Lu Baolu 
Tested-by: Logan Gunthorpe 
---
 drivers/iommu/dma-iommu.c   | 29 +--
 drivers/iommu/intel/iommu.c | 55 -
 include/linux/iommu.h   |  1 +
 3 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 0cbcd3fc3e7e..9c827a4d2207 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -49,6 +49,18 @@ struct iommu_dma_cookie {
struct iommu_domain *fq_domain;
 };
 
+static void iommu_dma_entry_dtor(unsigned long data)
+{
+   struct page *freelist = (struct page *)data;
+
+   while (freelist) {
+   unsigned long p = (unsigned long)page_address(freelist);
+
+   freelist = freelist->freelist;
+   free_page(p);
+   }
+}
+
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
 {
if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
@@ -343,7 +355,7 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, ) && attr) {
if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
-   NULL))
+ iommu_dma_entry_dtor))
pr_warn("iova flush queue initialization failed\n");
else
cookie->fq_domain = domain;
@@ -440,7 +452,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
-   dma_addr_t iova, size_t size)
+   dma_addr_t iova, size_t size, struct page *freelist)
 {
struct iova_domain *iovad = >iovad;
 
@@ -449,7 +461,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie 
*cookie,
cookie->msi_iova -= size;
else if (cookie->fq_domain) /* non-strict mode */
queue_iova(iovad, iova_pfn(iovad, iova),
-   size >> iova_shift(iovad), 0);
+   size >> iova_shift(iovad),
+   (unsigned long)freelist);
else
free_iova_fast(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad));
@@ -474,7 +487,7 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
 
if (!cookie->fq_domain)
iommu_iotlb_sync(domain, _gather);
-   iommu_dma_free_iova(cookie, dma_addr, size);
+   iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -496,7 +509,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return DMA_MAPPING_ERROR;
 
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -649,7 +662,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
 out_free_sg:
sg_free_table();
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -900,7 +913,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, iova_len);
+   iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
__invalidate_sg(sg, nents);
return 0;
@@ -1228,7 +1241,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
return msi_page;
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size);
+   iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_page:
kfree(msi_page);
return NULL;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 0233d2438c44..414b4321fb61 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1243,17 +1243,17 @@ static struct page *dma_pte_clear_level(struct 
dmar_domain *domain, int level,
pages can only be freed after the IOTLB flush has been 

[PATCH v6 0/7] Convert the intel iommu driver to the dma-iommu api

2020-11-24 Thread Lu Baolu
Hi Will,

The v5 of this series could be found here.

https://lore.kernel.org/linux-iommu/20201120101719.3172693-1-baolu...@linux.intel.com/

Changes in this version:
- Rebase the series to the latest iommu/next branch.
  https://lore.kernel.org/linux-iommu/20201123100816.ga26...@infradead.org/ 

Please review and consider it for iommu/next.

Best regards,
baolu


Lu Baolu (3):
  iommu: Add quirk for Intel graphic devices in map_sg
  iommu/vt-d: Update domain geometry in iommu_ops.at(de)tach_dev
  iommu/vt-d: Cleanup after converting to dma-iommu ops

Tom Murphy (4):
  iommu: Handle freelists when using deferred flushing in iommu drivers
  iommu: Add iommu_dma_free_cpu_cached_iovas()
  iommu: Allow the dma-iommu api to use bounce buffers
  iommu/vt-d: Convert intel iommu driver to the iommu ops

 .../admin-guide/kernel-parameters.txt |   5 -
 drivers/iommu/dma-iommu.c | 227 -
 drivers/iommu/intel/Kconfig   |   1 +
 drivers/iommu/intel/iommu.c   | 904 +++---
 include/linux/dma-iommu.h |   8 +
 include/linux/iommu.h |   1 +
 6 files changed, 335 insertions(+), 811 deletions(-)

-- 
2.25.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu