[RFC PATCH v5 10/15] iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()

2021-04-08 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/io-pgtable-arm.c | 75 ++
 1 file changed, 49 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index ea66b10c04c4..1b690911995a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -46,6 +46,9 @@
 #define ARM_LPAE_PGD_SIZE(d)   \
(sizeof(arm_lpae_iopte) << (d)->pgd_bits)
 
+#define ARM_LPAE_PTES_PER_TABLE(d) \
+   (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
+
 /*
  * Calculate the index at level l used to map virtual address a using the
  * pagetable in d.
@@ -253,8 +256,8 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, 
arm_lpae_iopte pte,
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep);
+  unsigned long iova, size_t size, size_t pgcount,
+  int lvl, arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
@@ -298,7 +301,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable 
*data,
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-   if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz,
+   if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
 lvl, tblp) != sz) {
WARN_ON(1);
return -EINVAL;
@@ -526,14 +529,15 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
   unsigned long iova, size_t size,
   arm_lpae_iopte blk_pte, int lvl,
-  arm_lpae_iopte *ptep)
+  arm_lpae_iopte *ptep, size_t pgcount)
 {
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte pte, *tablep;
phys_addr_t blk_paddr;
size_t tablesz = ARM_LPAE_GRANULE(data);
size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-   int i, unmap_idx = -1;
+   int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
+   int i, unmap_idx_start = -1, num_entries = 0, max_entries;
 
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
@@ -542,15 +546,18 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
if (!tablep)
return 0; /* Bytes unmapped */
 
-   if (size == split_sz)
-   unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   if (size == split_sz) {
+   unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   max_entries = ptes_per_table - unmap_idx_start;
+   num_entries = min_t(int, pgcount, max_entries);
+   }
 
blk_paddr = iopte_to_paddr(blk_pte, data);
pte = iopte_prot(blk_pte);
 
-   for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
+   for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
/* Unmap! */
-   if (i == unmap_idx)
+   if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
continue;
 
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
@@ -568,38 +575,44 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0;
 
tablep = iopte_deref(pte, data);
-   } else if (unmap_idx >= 0) {
-   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
-   return size;
+   } else if (unmap_idx_start >= 0) {
+   for (i = 0; i < num_entries; i++)
+   io_pgtable_tlb_add_page(&data->iop, gather, iova + i * 
size, size);
+
+   return num_entries * size;
}
 
-   return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
+   return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep)
+

[RFC PATCH v5 13/15] iommu/io-pgtable-arm-v7s: Implement arm_v7s_map_pages()

2021-04-08 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM v7s io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 1af060686985..3331caafb273 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -519,11 +519,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, 
unsigned long iova,
return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep, 
gfp);
 }
 
-static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
-   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
-   int ret;
+   int ret = -EINVAL;
 
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
paddr >= (1ULL << data->iop.cfg.oas)))
@@ -533,7 +534,17 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
unsigned long iova,
if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
 
-   ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd, gfp);
+   while (pgcount--) {
+   ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1, 
data->pgd,
+   gfp);
+   if (ret)
+   break;
+
+   iova += pgsize;
+   paddr += pgsize;
+   if (mapped)
+   *mapped += pgsize;
+   }
/*
 * Synchronise all PTE updates for the new mapping before there's
 * a chance for anything to kick off a table walk for the new iova.
@@ -543,6 +554,12 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
+   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+   return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL);
+}
+
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -797,6 +814,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_v7s_map,
+   .map_pages  = arm_v7s_map_pages,
.unmap  = arm_v7s_unmap,
.unmap_pages= arm_v7s_unmap_pages,
.iova_to_phys   = arm_v7s_iova_to_phys,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 14/15] iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback

2021-04-08 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM SMMU driver
to allow calls from iommu_unmap to unmap multiple pages of
the same size in one call. Also, remove the unmap() callback
for the SMMU driver, as it will no longer be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfde6a61..188e506d75e1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,8 +1208,9 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return ret;
 }
 
-static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long 
iova,
+  size_t pgsize, size_t pgcount,
+  struct iommu_iotlb_gather *iotlb_gather)
 {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1219,7 +1220,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return 0;
 
arm_smmu_rpm_get(smmu);
-   ret = ops->unmap(ops, iova, size, gather);
+   ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
arm_smmu_rpm_put(smmu);
 
return ret;
@@ -1624,7 +1625,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
-   .unmap  = arm_smmu_unmap,
+   .unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys   = arm_smmu_iova_to_phys,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 09/15] iommu/io-pgtable-arm: Prepare PTE methods for handling multiple entries

2021-04-08 Thread Isaac J. Manjarres
The PTE methods currently operate on a single entry. In preparation
for manipulating multiple PTEs in one map or unmap call, allow them
to handle multiple PTEs.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm.c | 78 +++---
 1 file changed, 44 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..ea66b10c04c4 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -232,20 +232,23 @@ static void __arm_lpae_free_pages(void *pages, size_t 
size,
free_pages((unsigned long)pages, get_order(size));
 }
 
-static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
struct io_pgtable_cfg *cfg)
 {
dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
-  sizeof(*ptep), DMA_TO_DEVICE);
+  sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
 }
 
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
-  struct io_pgtable_cfg *cfg)
+  int num_entries, struct io_pgtable_cfg *cfg)
 {
-   *ptep = pte;
+   int i;
+
+   for (i = 0; i < num_entries; i++)
+   ptep[i] = pte;
 
if (!cfg->coherent_walk)
-   __arm_lpae_sync_pte(ptep, cfg);
+   __arm_lpae_sync_pte(ptep, num_entries, cfg);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -255,47 +258,54 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable 
*data,
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
-   int lvl, arm_lpae_iopte *ptep)
+   int lvl, int num_entries, arm_lpae_iopte *ptep)
 {
arm_lpae_iopte pte = prot;
+   struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+   int i;
 
if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
pte |= ARM_LPAE_PTE_TYPE_PAGE;
else
pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
-   pte |= paddr_to_iopte(paddr, data);
+   for (i = 0; i < num_entries; i++)
+   ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
 
-   __arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+   if (!cfg->coherent_walk)
+   __arm_lpae_sync_pte(ptep, num_entries, cfg);
 }
 
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 unsigned long iova, phys_addr_t paddr,
-arm_lpae_iopte prot, int lvl,
+arm_lpae_iopte prot, int lvl, int num_entries,
 arm_lpae_iopte *ptep)
 {
-   arm_lpae_iopte pte = *ptep;
-
-   if (iopte_leaf(pte, lvl, data->iop.fmt)) {
-   /* We require an unmap first */
-   WARN_ON(!selftest_running);
-   return -EEXIST;
-   } else if (iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE) {
-   /*
-* We need to unmap and free the old table before
-* overwriting it with a block entry.
-*/
-   arm_lpae_iopte *tblp;
-   size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-
-   tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-   if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
-   WARN_ON(1);
-   return -EINVAL;
+   int i;
+
+   for (i = 0; i < num_entries; i++)
+   if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
+   /* We require an unmap first */
+   WARN_ON(!selftest_running);
+   return -EEXIST;
+   } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
+   /*
+* We need to unmap and free the old table before
+* overwriting it with a block entry.
+*/
+   arm_lpae_iopte *tblp;
+   size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+   tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
+   if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz,
+lvl, tblp) != sz) {
+   WARN_ON(1);
+   return -EINVAL;
+   }
}
-   }
 
-   __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+   __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
return 0;
 }
 
@@ -323,7 +333,

[RFC PATCH v5 06/15] iommu: Split 'addr_merge' argument to iommu_pgsize() into separate parts

2021-04-08 Thread Isaac J. Manjarres
From: Will Deacon 

The 'addr_merge' parameter to iommu_pgsize() is a fabricated address
intended to describe the alignment requirements to consider when
choosing an appropriate page size. On the iommu_map() path, this address
is the logical OR of the virtual and physical addresses.

Subsequent improvements to iommu_pgsize() will need to check the
alignment of the virtual and physical components of 'addr_merge'
independently, so pass them in as separate parameters and reconstruct
'addr_merge' locally.

No functional change.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index bcd623862bf9..624ce3c7ae33 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2357,12 +2357,13 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+  phys_addr_t paddr, size_t size)
 {
unsigned int pgsize_idx;
unsigned long pgsizes;
size_t pgsize;
+   unsigned long addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
@@ -2415,7 +2416,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2503,8 +2504,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
+   size_t pgsize;
 
+   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
break;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 07/15] iommu: Hook up '->unmap_pages' driver callback

2021-04-08 Thread Isaac J. Manjarres
From: Will Deacon 

Extend iommu_pgsize() to populate an optional 'count' parameter so that
we can direct unmapping operation to the ->unmap_pages callback if it
has been provided by the driver.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 60 ---
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 624ce3c7ae33..1fc919ea95ac 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2358,11 +2358,11 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
-  phys_addr_t paddr, size_t size)
+  phys_addr_t paddr, size_t size, size_t *count)
 {
-   unsigned int pgsize_idx;
+   unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
-   size_t pgsize;
+   size_t offset, pgsize, pgsize_next;
unsigned long addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
@@ -2378,7 +2378,37 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
/* Pick the biggest page size remaining */
pgsize_idx = __fls(pgsizes);
pgsize = BIT(pgsize_idx);
+   if (!count)
+   return pgsize;
 
+
+   /* Find the next biggest support page size, if it exists */
+   pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+   if (!pgsizes)
+   goto out_set_count;
+
+   pgsize_idx_next = __ffs(pgsizes);
+   pgsize_next = BIT(pgsize_idx_next);
+
+   /*
+* There's no point trying a bigger page size unless the virtual
+* and physical addresses are similarly offset within the larger page.
+*/
+   if ((iova ^ paddr) & (pgsize_next - 1))
+   goto out_set_count;
+
+   /* Calculate the offset to the next page size alignment boundary */
+   offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+   /*
+* If size is big enough to accommodate the larger page, reduce
+* the number of smaller pages.
+*/
+   if (offset + pgsize_next <= size)
+   size = offset;
+
+out_set_count:
+   *count = size >> pgsize_idx;
return pgsize;
 }
 
@@ -2416,7 +2446,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2467,6 +2497,19 @@ int iommu_map_atomic(struct iommu_domain *domain, 
unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_atomic);
 
+static size_t __iommu_unmap_pages(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *iotlb_gather)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+
+   pgsize = iommu_pgsize(domain, iova, iova, size, &count);
+   return ops->unmap_pages ?
+  ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
+  ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
 static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2476,7 +2519,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long orig_iova = iova;
unsigned int min_pagesz;
 
-   if (unlikely(ops->unmap == NULL ||
+   if (unlikely(!(ops->unmap || ops->unmap_pages) ||
 domain->pgsize_bitmap == 0UL))
return 0;
 
@@ -2504,10 +2547,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize;
-
-   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
-   unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+   unmapped_page = __iommu_unmap_pages(domain, iova,
+   size - unmapped,
+   iotlb_gather);
if (!unmapped_page)
break;
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Proje

[RFC PATCH v5 04/15] iommu: Add a map_pages() op for IOMMU drivers

2021-04-08 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can
call into the io-pgtable code, to map a physically contiguous
rnage of pages of the same size.

For IOMMU drivers that do not specify a map_pages() callback,
the existing logic of mapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Acked-by: Lu Baolu 
---
 include/linux/iommu.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9cf81242581a..528d6a58479e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_pages: map a physically contiguous set of pages of the same size to
+ * an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
@@ -244,6 +246,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 08/15] iommu: Add support for the map_pages() callback

2021-04-08 Thread Isaac J. Manjarres
Since iommu_pgsize can calculate how many pages of the
same size can be mapped/unmapped before the next largest
page size boundary, add support for invoking an IOMMU
driver's map_pages() callback, if it provides one.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/iommu.c | 43 +++
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 1fc919ea95ac..c94b6b3198b6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2412,6 +2412,30 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
return pgsize;
 }
 
+static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t size, int prot,
+gfp_t gfp, size_t *mapped)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+   int ret;
+
+   pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %ld\n",
+iova, &paddr, pgsize, count);
+
+   if (ops->map_pages) {
+   ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+gfp, mapped);
+   } else {
+   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   *mapped = ret ? 0 : pgsize;
+   }
+
+   return ret;
+}
+
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2422,7 +2446,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
 
-   if (unlikely(ops->map == NULL ||
+   if (unlikely(!(ops->map || ops->map_pages) ||
 domain->pgsize_bitmap == 0UL))
return -ENODEV;
 
@@ -2446,18 +2470,21 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
+   size_t mapped = 0;
 
-   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
-iova, &paddr, pgsize);
-   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
+   &mapped);
+   /*
+* Some pages may have been mapped, even if an error occurred,
+* so we should account for those so they can be unmapped.
+*/
+   size -= mapped;
 
if (ret)
break;
 
-   iova += pgsize;
-   paddr += pgsize;
-   size -= pgsize;
+   iova += mapped;
+   paddr += mapped;
}
 
/* unroll mapping in case something went wrong */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 15/15] iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

2021-04-08 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM SMMU driver
to allow calls from iommu_map to map multiple pages of
the same size in one call. Also, remove the map() callback
for the ARM SMMU driver, as it will no longer be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 188e506d75e1..8fcc422e2f2f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1191,8 +1191,9 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
return ret;
 }
 
-static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
-   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
 {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1202,7 +1203,7 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return -ENODEV;
 
arm_smmu_rpm_get(smmu);
-   ret = ops->map(ops, iova, paddr, size, prot, gfp);
+   ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, 
mapped);
arm_smmu_rpm_put(smmu);
 
return ret;
@@ -1624,7 +1625,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_alloc   = arm_smmu_domain_alloc,
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
-   .map= arm_smmu_map,
+   .map_pages  = arm_smmu_map_pages,
.unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 12/15] iommu/io-pgtable-arm-v7s: Implement arm_v7s_unmap_pages()

2021-04-08 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM v7s io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index d4004bcf333a..1af060686985 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -710,15 +710,32 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable 
*data,
return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
-static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-   size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long 
iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   size_t unmapped = 0, ret;
 
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
 
-   return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
+   while (pgcount--) {
+   ret = __arm_v7s_unmap(data, gather, iova, pgsize, 1, data->pgd);
+   if (!ret)
+   break;
+
+   unmapped += pgsize;
+   iova += pgsize;
+   }
+
+   return unmapped;
+}
+
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+   size_t size, struct iommu_iotlb_gather *gather)
+{
+   return arm_v7s_unmap_pages(ops, iova, size, 1, gather);
 }
 
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
@@ -781,6 +798,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_v7s_map,
.unmap  = arm_v7s_unmap,
+   .unmap_pages= arm_v7s_unmap_pages,
.iova_to_phys   = arm_v7s_iova_to_phys,
};
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 11/15] iommu/io-pgtable-arm: Implement arm_lpae_map_pages()

2021-04-08 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm.c | 42 ++
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 1b690911995a..92978dd9c885 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -344,20 +344,30 @@ static arm_lpae_iopte 
arm_lpae_install_table(arm_lpae_iopte *table,
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
- phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
- int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t pgcount,
+ arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
+ gfp_t gfp, size_t *mapped)
 {
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
size_t tblsz = ARM_LPAE_GRANULE(data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   int ret = 0, num_entries, max_entries, map_idx_start;
 
/* Find our entry at the current level */
-   ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+   map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   ptep += map_idx_start;
 
/* If we can install a leaf entry at this level, then do so */
-   if (size == block_size)
-   return arm_lpae_init_pte(data, iova, paddr, prot, lvl, 1, ptep);
+   if (size == block_size) {
+   max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
+   num_entries = min_t(int, pgcount, max_entries);
+   ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, 
num_entries, ptep);
+   if (!ret && mapped)
+   *mapped += num_entries * size;
+
+   return ret;
+   }
 
/* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@@ -386,7 +396,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, 
unsigned long iova,
}
 
/* Rinse, repeat */
-   return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, 
gfp);
+   return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
+ cptep, gfp, mapped);
 }
 
 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -453,8 +464,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
return pte;
 }
 
-static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
-   phys_addr_t paddr, size_t size, int iommu_prot, gfp_t 
gfp)
+static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
 {
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
@@ -463,7 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
arm_lpae_iopte prot;
long iaext = (s64)iova >> cfg->ias;
 
-   if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+   if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
return -EINVAL;
 
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -476,7 +488,8 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return 0;
 
prot = arm_lpae_prot_to_pte(data, iommu_prot);
-   ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
+   ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
+ptep, gfp, NULL);
/*
 * Synchronise all PTE updates for the new mapping before there's
 * a chance for anything to kick off a table walk for the new iova.
@@ -486,6 +499,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+
+static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
+   phys_addr_t paddr, size_t size, int iommu_prot, gfp_t 
gfp)
+{
+   return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
+ NULL);
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
 {
@@ -782,6 +803,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_lpae_map,
+   .map_pages  = arm_lpae_map_pages,
.unmap  = ar

[RFC PATCH v5 05/15] iommu: Use bitmap to calculate page size in iommu_pgsize()

2021-04-08 Thread Isaac J. Manjarres
From: Will Deacon 

Avoid the potential for shifting values by amounts greater than the
width of their type by using a bitmap to compute page size in
iommu_pgsize().

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0b0a15dba84..bcd623862bf9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2360,30 +2361,22 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
   unsigned long addr_merge, size_t size)
 {
unsigned int pgsize_idx;
+   unsigned long pgsizes;
size_t pgsize;
 
-   /* Max page size that still fits into 'size' */
-   pgsize_idx = __fls(size);
+   /* Page sizes supported by the hardware and small enough for @size */
+   pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
 
-   /* need to consider alignment requirements ? */
-   if (likely(addr_merge)) {
-   /* Max page size allowed by address */
-   unsigned int align_pgsize_idx = __ffs(addr_merge);
-   pgsize_idx = min(pgsize_idx, align_pgsize_idx);
-   }
-
-   /* build a mask of acceptable page sizes */
-   pgsize = (1UL << (pgsize_idx + 1)) - 1;
-
-   /* throw away page sizes not supported by the hardware */
-   pgsize &= domain->pgsize_bitmap;
+   /* Constrain the page sizes further based on the maximum alignment */
+   if (likely(addr_merge))
+   pgsizes &= GENMASK(__ffs(addr_merge), 0);
 
-   /* make sure we're still sane */
-   BUG_ON(!pgsize);
+   /* Make sure we have at least one suitable page size */
+   BUG_ON(!pgsizes);
 
-   /* pick the biggest page */
-   pgsize_idx = __fls(pgsize);
-   pgsize = 1UL << pgsize_idx;
+   /* Pick the biggest page size remaining */
+   pgsize_idx = __fls(pgsizes);
+   pgsize = BIT(pgsize_idx);
 
return pgsize;
 }
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 03/15] iommu/io-pgtable: Introduce map_pages() as a page table op

2021-04-08 Thread Isaac J. Manjarres
Mapping memory into io-pgtables follows the same semantics
that unmapping memory used to follow (i.e. a buffer will be
mapped one page block per call to the io-pgtable code). This
means that it can be optimized in the same way that unmapping
memory was, so add a map_pages() callback to the io-pgtable
ops structure, so that a range of pages of the same size
can be mapped within the same call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 2ed0c057d9e7..019149b204b8 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -143,6 +143,7 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:  Map a physically contiguous memory region.
+ * @map_pages:Map a physically contiguous range of pages of the same size.
  * @unmap:Unmap a physically contiguous memory region.
  * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
@@ -153,6 +154,9 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 01/15] iommu/io-pgtable: Introduce unmap_pages() as a page table op

2021-04-08 Thread Isaac J. Manjarres
The io-pgtable code expects to operate on a single block or
granule of memory that is supported by the IOMMU hardware when
unmapping memory.

This means that when a large buffer that consists of multiple
such blocks is unmapped, the io-pgtable code will walk the page
tables to the correct level to unmap each block, even for blocks
that are virtually contiguous and at the same level, which can
incur an overhead in performance.

Introduce the unmap_pages() page table op to express to the
io-pgtable code that it should unmap a number of blocks of
the same size, instead of a single block. Doing so allows
multiple blocks to be unmapped in one call to the io-pgtable
code, reducing the number of page table walks, and indirect
calls.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a4c9ca2c31f1..2ed0c057d9e7 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -144,6 +144,7 @@ struct io_pgtable_cfg {
  *
  * @map:  Map a physically contiguous memory region.
  * @unmap:Unmap a physically contiguous memory region.
+ * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
  * These functions map directly onto the iommu_ops member functions with
@@ -154,6 +155,9 @@ struct io_pgtable_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
+   size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
 };
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 02/15] iommu: Add an unmap_pages() op for IOMMU drivers

2021-04-08 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can call
into the io-pgtable code, to unmap a virtually contiguous
range of pages of the same size.

For IOMMU drivers that do not specify an unmap_pages() callback,
the existing logic of unmapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
Acked-by: Lu Baolu 
---
 include/linux/iommu.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe519430a..9cf81242581a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -193,6 +193,7 @@ struct iommu_iotlb_gather {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -245,6 +246,9 @@ struct iommu_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
+   size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
   size_t size);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v5 00/15] Optimizing iommu_[map/unmap] performance

2021-04-08 Thread Isaac J. Manjarres
When unmapping a buffer from an IOMMU domain, the IOMMU framework unmaps
the buffer at a granule of the largest page size that is supported by
the IOMMU hardware and fits within the buffer. For every block that
is unmapped, the IOMMU framework will call into the IOMMU driver, and
then the io-pgtable framework to walk the page tables to find the entry
that corresponds to the IOVA, and then unmaps the entry.

This can be suboptimal in scenarios where a buffer or a piece of a
buffer can be split into several contiguous page blocks of the same size.
For example, consider an IOMMU that supports 4 KB page blocks, 2 MB page
blocks, and 1 GB page blocks, and a buffer that is 4 MB in size is being
unmapped at IOVA 0. The current call-flow will result in 4 indirect calls,
and 2 page table walks, to unmap 2 entries that are next to each other in
the page-tables, when both entries could have been unmapped in one shot
by clearing both page table entries in the same call.

The same optimization is applicable to mapping buffers as well, so
these patches implement a set of callbacks called unmap_pages and
map_pages to the io-pgtable code and IOMMU drivers which unmaps or maps
an IOVA range that consists of a number of pages of the same
page size that is supported by the IOMMU hardware, and allows for
manipulating multiple page table entries in the same set of indirect
calls. The reason for introducing these callbacks is to give other IOMMU
drivers/io-pgtable formats time to change to using the new callbacks, so
that the transition to using this approach can be done piecemeal.

Changes since V4:

* Fixed type for addr_merge from phys_addr_t to unsigned long so
  that GENMASK() can be used.
* Hooked up arm_v7s_[unmap/map]_pages to the io-pgtable ops.
* Introduced a macro for calculating the number of page table entries
  for the ARM LPAE io-pgtable format.

Changes since V3:

* Removed usage of ULL variants of bitops from Will's patches, as
  they were not needed.
* Instead of unmapping/mapping pgcount pages, unmap_pages() and
  map_pages() will at most unmap and map pgcount pages, allowing
  for part of the pages in pgcount to be mapped and unmapped. This
  was done to simplify the handling in the io-pgtable layer.
* Extended the existing PTE manipulation methods in io-pgtable-arm
  to handle multiple entries, per Robin's suggestion, eliminating
  the need to add functions to clear multiple PTEs.
* Implemented a naive form of [map/unmap]_pages() for ARM v7s io-pgtable
  format.
* arm_[v7s/lpae]_[map/unmap] will call
  arm_[v7s/lpae]_[map_pages/unmap_pages] with an argument of 1 page.
* The arm_smmu_[map/unmap] functions have been removed, since they
  have been replaced by arm_smmu_[map/unmap]_pages.

Changes since V2:

* Added a check in __iommu_map() to check for the existence
  of either the map or map_pages callback as per Lu's suggestion.

Changes since V1:

* Implemented the map_pages() callbacks
* Integrated Will's patches into this series which
  address several concerns about how iommu_pgsize() partitioned a
  buffer (I made a minor change to the patch which changes
  iommu_pgsize() to use bitmaps by using the ULL variants of
  the bitops)

Isaac J. Manjarres (12):
  iommu/io-pgtable: Introduce unmap_pages() as a page table op
  iommu: Add an unmap_pages() op for IOMMU drivers
  iommu/io-pgtable: Introduce map_pages() as a page table op
  iommu: Add a map_pages() op for IOMMU drivers
  iommu: Add support for the map_pages() callback
  iommu/io-pgtable-arm: Prepare PTE methods for handling multiple
entries
  iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()
  iommu/io-pgtable-arm: Implement arm_lpae_map_pages()
  iommu/io-pgtable-arm-v7s: Implement arm_v7s_unmap_pages()
  iommu/io-pgtable-arm-v7s: Implement arm_v7s_map_pages()
  iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback
  iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

Will Deacon (3):
  iommu: Use bitmap to calculate page size in iommu_pgsize()
  iommu: Split 'addr_merge' argument to iommu_pgsize() into separate
parts
  iommu: Hook up '->unmap_pages' driver callback

 drivers/iommu/arm/arm-smmu/arm-smmu.c |  18 +--
 drivers/iommu/io-pgtable-arm-v7s.c|  50 ++-
 drivers/iommu/io-pgtable-arm.c| 189 +-
 drivers/iommu/iommu.c | 130 +-
 include/linux/io-pgtable.h|   8 ++
 include/linux/iommu.h |   9 ++
 6 files changed, 289 insertions(+), 115 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 15/15] iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

2021-04-07 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM SMMU driver
to allow calls from iommu_map to map multiple pages of
the same size in one call. Also, remove the map() callback
for the ARM SMMU driver, as it will no longer be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 188e506d75e1..8fcc422e2f2f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1191,8 +1191,9 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
return ret;
 }
 
-static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
-   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
 {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1202,7 +1203,7 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return -ENODEV;
 
arm_smmu_rpm_get(smmu);
-   ret = ops->map(ops, iova, paddr, size, prot, gfp);
+   ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, 
mapped);
arm_smmu_rpm_put(smmu);
 
return ret;
@@ -1624,7 +1625,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_alloc   = arm_smmu_domain_alloc,
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
-   .map= arm_smmu_map,
+   .map_pages  = arm_smmu_map_pages,
.unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 09/15] iommu/io-pgtable-arm: Prepare PTE methods for handling multiple entries

2021-04-07 Thread Isaac J. Manjarres
The PTE methods currently operate on a single entry. In preparation
for manipulating multiple PTEs in one map or unmap call, allow them
to handle multiple PTEs.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm.c | 78 +++---
 1 file changed, 44 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..ea66b10c04c4 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -232,20 +232,23 @@ static void __arm_lpae_free_pages(void *pages, size_t 
size,
free_pages((unsigned long)pages, get_order(size));
 }
 
-static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
struct io_pgtable_cfg *cfg)
 {
dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
-  sizeof(*ptep), DMA_TO_DEVICE);
+  sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
 }
 
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
-  struct io_pgtable_cfg *cfg)
+  int num_entries, struct io_pgtable_cfg *cfg)
 {
-   *ptep = pte;
+   int i;
+
+   for (i = 0; i < num_entries; i++)
+   ptep[i] = pte;
 
if (!cfg->coherent_walk)
-   __arm_lpae_sync_pte(ptep, cfg);
+   __arm_lpae_sync_pte(ptep, num_entries, cfg);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -255,47 +258,54 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable 
*data,
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
-   int lvl, arm_lpae_iopte *ptep)
+   int lvl, int num_entries, arm_lpae_iopte *ptep)
 {
arm_lpae_iopte pte = prot;
+   struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+   int i;
 
if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
pte |= ARM_LPAE_PTE_TYPE_PAGE;
else
pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
-   pte |= paddr_to_iopte(paddr, data);
+   for (i = 0; i < num_entries; i++)
+   ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
 
-   __arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+   if (!cfg->coherent_walk)
+   __arm_lpae_sync_pte(ptep, num_entries, cfg);
 }
 
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 unsigned long iova, phys_addr_t paddr,
-arm_lpae_iopte prot, int lvl,
+arm_lpae_iopte prot, int lvl, int num_entries,
 arm_lpae_iopte *ptep)
 {
-   arm_lpae_iopte pte = *ptep;
-
-   if (iopte_leaf(pte, lvl, data->iop.fmt)) {
-   /* We require an unmap first */
-   WARN_ON(!selftest_running);
-   return -EEXIST;
-   } else if (iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE) {
-   /*
-* We need to unmap and free the old table before
-* overwriting it with a block entry.
-*/
-   arm_lpae_iopte *tblp;
-   size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-
-   tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-   if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
-   WARN_ON(1);
-   return -EINVAL;
+   int i;
+
+   for (i = 0; i < num_entries; i++)
+   if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
+   /* We require an unmap first */
+   WARN_ON(!selftest_running);
+   return -EEXIST;
+   } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
+   /*
+* We need to unmap and free the old table before
+* overwriting it with a block entry.
+*/
+   arm_lpae_iopte *tblp;
+   size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+   tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
+   if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz,
+lvl, tblp) != sz) {
+   WARN_ON(1);
+   return -EINVAL;
+   }
}
-   }
 
-   __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+   __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
return 0;
 }
 
@@ -323,7 +333,

[RFC PATCH v4 14/15] iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback

2021-04-07 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM SMMU driver
to allow calls from iommu_unmap to unmap multiple pages of
the same size in one call. Also, remove the unmap() callback
for the SMMU driver, as it will no longer be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfde6a61..188e506d75e1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,8 +1208,9 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return ret;
 }
 
-static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long 
iova,
+  size_t pgsize, size_t pgcount,
+  struct iommu_iotlb_gather *iotlb_gather)
 {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1219,7 +1220,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return 0;
 
arm_smmu_rpm_get(smmu);
-   ret = ops->unmap(ops, iova, size, gather);
+   ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
arm_smmu_rpm_put(smmu);
 
return ret;
@@ -1624,7 +1625,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
-   .unmap  = arm_smmu_unmap,
+   .unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys   = arm_smmu_iova_to_phys,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 11/15] iommu/io-pgtable-arm: Implement arm_lpae_map_pages()

2021-04-07 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm.c | 42 ++
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 6700685f81d4..834481d3c7f3 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -341,20 +341,30 @@ static arm_lpae_iopte 
arm_lpae_install_table(arm_lpae_iopte *table,
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
- phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
- int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t pgcount,
+ arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
+ gfp_t gfp, size_t *mapped)
 {
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
size_t tblsz = ARM_LPAE_GRANULE(data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   int ret = 0, num_entries, max_entries, map_idx_start;
 
/* Find our entry at the current level */
-   ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+   map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   ptep += map_idx_start;
 
/* If we can install a leaf entry at this level, then do so */
-   if (size == block_size)
-   return arm_lpae_init_pte(data, iova, paddr, prot, lvl, 1, ptep);
+   if (size == block_size) {
+   max_entries = (tblsz >> ilog2(sizeof(pte))) - map_idx_start;
+   num_entries = min_t(int, pgcount, max_entries);
+   ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, 
num_entries, ptep);
+   if (!ret && mapped)
+   *mapped += num_entries * size;
+
+   return ret;
+   }
 
/* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@@ -383,7 +393,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, 
unsigned long iova,
}
 
/* Rinse, repeat */
-   return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, 
gfp);
+   return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
+ cptep, gfp, mapped);
 }
 
 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -450,8 +461,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
return pte;
 }
 
-static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
-   phys_addr_t paddr, size_t size, int iommu_prot, gfp_t 
gfp)
+static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
 {
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
@@ -460,7 +472,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
arm_lpae_iopte prot;
long iaext = (s64)iova >> cfg->ias;
 
-   if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+   if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
return -EINVAL;
 
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -473,7 +485,8 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return 0;
 
prot = arm_lpae_prot_to_pte(data, iommu_prot);
-   ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
+   ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
+ptep, gfp, NULL);
/*
 * Synchronise all PTE updates for the new mapping before there's
 * a chance for anything to kick off a table walk for the new iova.
@@ -483,6 +496,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+
+static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
+   phys_addr_t paddr, size_t size, int iommu_prot, gfp_t 
gfp)
+{
+   return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
+ NULL);
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
 {
@@ -779,6 +800,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_lpae_map,
+   .map_pages  = arm_lpae_map_pages,
.unmap

[RFC PATCH v4 05/15] iommu: Use bitmap to calculate page size in iommu_pgsize()

2021-04-07 Thread Isaac J. Manjarres
From: Will Deacon 

Avoid the potential for shifting values by amounts greater than the
width of their type by using a bitmap to compute page size in
iommu_pgsize().

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0b0a15dba84..bcd623862bf9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2360,30 +2361,22 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
   unsigned long addr_merge, size_t size)
 {
unsigned int pgsize_idx;
+   unsigned long pgsizes;
size_t pgsize;
 
-   /* Max page size that still fits into 'size' */
-   pgsize_idx = __fls(size);
+   /* Page sizes supported by the hardware and small enough for @size */
+   pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
 
-   /* need to consider alignment requirements ? */
-   if (likely(addr_merge)) {
-   /* Max page size allowed by address */
-   unsigned int align_pgsize_idx = __ffs(addr_merge);
-   pgsize_idx = min(pgsize_idx, align_pgsize_idx);
-   }
-
-   /* build a mask of acceptable page sizes */
-   pgsize = (1UL << (pgsize_idx + 1)) - 1;
-
-   /* throw away page sizes not supported by the hardware */
-   pgsize &= domain->pgsize_bitmap;
+   /* Constrain the page sizes further based on the maximum alignment */
+   if (likely(addr_merge))
+   pgsizes &= GENMASK(__ffs(addr_merge), 0);
 
-   /* make sure we're still sane */
-   BUG_ON(!pgsize);
+   /* Make sure we have at least one suitable page size */
+   BUG_ON(!pgsizes);
 
-   /* pick the biggest page */
-   pgsize_idx = __fls(pgsize);
-   pgsize = 1UL << pgsize_idx;
+   /* Pick the biggest page size remaining */
+   pgsize_idx = __fls(pgsizes);
+   pgsize = BIT(pgsize_idx);
 
return pgsize;
 }
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 13/15] iommu/io-pgtable-arm-v7s: Implement arm_v7s_map_pages()

2021-04-07 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM v7s io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 25 +
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 5e203e03c352..0d49f0e8cf61 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -519,11 +519,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, 
unsigned long iova,
return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep, 
gfp);
 }
 
-static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
-   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
-   int ret;
+   int ret = -EINVAL;
 
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
paddr >= (1ULL << data->iop.cfg.oas)))
@@ -533,7 +534,17 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
unsigned long iova,
if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
 
-   ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd, gfp);
+   while (pgcount--) {
+   ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1, 
data->pgd,
+   gfp);
+   if (ret)
+   break;
+
+   iova += pgsize;
+   paddr += pgsize;
+   if (mapped)
+   *mapped += pgsize;
+   }
/*
 * Synchronise all PTE updates for the new mapping before there's
 * a chance for anything to kick off a table walk for the new iova.
@@ -543,6 +554,12 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
+   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+   return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL);
+}
+
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 03/15] iommu/io-pgtable: Introduce map_pages() as a page table op

2021-04-07 Thread Isaac J. Manjarres
Mapping memory into io-pgtables follows the same semantics
that unmapping memory used to follow (i.e. a buffer will be
mapped one page block per call to the io-pgtable code). This
means that it can be optimized in the same way that unmapping
memory was, so add a map_pages() callback to the io-pgtable
ops structure, so that a range of pages of the same size
can be mapped within the same call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 2ed0c057d9e7..019149b204b8 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -143,6 +143,7 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:  Map a physically contiguous memory region.
+ * @map_pages:Map a physically contiguous range of pages of the same size.
  * @unmap:Unmap a physically contiguous memory region.
  * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
@@ -153,6 +154,9 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 12/15] iommu/io-pgtable-arm-v7s: Implement arm_v7s_unmap_pages()

2021-04-07 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM v7s io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 23 ---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index d4004bcf333a..5e203e03c352 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -710,15 +710,32 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable 
*data,
return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
-static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-   size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long 
iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   size_t unmapped = 0, ret;
 
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
 
-   return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
+   while (pgcount--) {
+   ret = __arm_v7s_unmap(data, gather, iova, pgsize, 1, data->pgd);
+   if (!ret)
+   break;
+
+   unmapped += pgsize;
+   iova += pgsize;
+   }
+
+   return unmapped;
+}
+
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+   size_t size, struct iommu_iotlb_gather *gather)
+{
+   return arm_v7s_unmap_pages(ops, iova, size, 1, gather);
 }
 
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 10/15] iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()

2021-04-07 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/io-pgtable-arm.c | 70 ++
 1 file changed, 45 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index ea66b10c04c4..6700685f81d4 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -253,8 +253,8 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, 
arm_lpae_iopte pte,
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep);
+  unsigned long iova, size_t size, size_t pgcount,
+  int lvl, arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
@@ -298,7 +298,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable 
*data,
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-   if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz,
+   if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
 lvl, tblp) != sz) {
WARN_ON(1);
return -EINVAL;
@@ -526,14 +526,14 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
   unsigned long iova, size_t size,
   arm_lpae_iopte blk_pte, int lvl,
-  arm_lpae_iopte *ptep)
+  arm_lpae_iopte *ptep, size_t pgcount)
 {
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte pte, *tablep;
phys_addr_t blk_paddr;
size_t tablesz = ARM_LPAE_GRANULE(data);
size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-   int i, unmap_idx = -1;
+   int i, unmap_idx_start = -1, num_entries = 0, max_entries;
 
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
@@ -542,15 +542,18 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
if (!tablep)
return 0; /* Bytes unmapped */
 
-   if (size == split_sz)
-   unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   if (size == split_sz) {
+   unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   max_entries = (tablesz >> ilog2(sizeof(pte))) - unmap_idx_start;
+   num_entries = min_t(int, pgcount, max_entries);
+   }
 
blk_paddr = iopte_to_paddr(blk_pte, data);
pte = iopte_prot(blk_pte);
 
for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
/* Unmap! */
-   if (i == unmap_idx)
+   if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
continue;
 
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
@@ -568,38 +571,45 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0;
 
tablep = iopte_deref(pte, data);
-   } else if (unmap_idx >= 0) {
-   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
-   return size;
+   } else if (unmap_idx_start >= 0) {
+   for (i = 0; i < num_entries; i++)
+   io_pgtable_tlb_add_page(&data->iop, gather, iova + i * 
size, size);
+
+   return num_entries * size;
}
 
-   return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
+   return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep)
+  unsigned long iova, size_t size, size_t pgcount,
+  int lvl, arm_lpae_iopte *ptep)
 {
arm_lpae_iopte pte;
struct io_pgtable *iop = &data->iop;
+   size_t tblsz = ARM_LPAE_GRANULE(data);
+   int i, num_entries, max_entries, unmap_idx_start;
 
/* Something went horribly wrong and we ran out of page table */
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
 
-   ptep += ARM_LPAE_LVL_IDX(iova, lvl, d

[RFC PATCH v4 07/15] iommu: Hook up '->unmap_pages' driver callback

2021-04-07 Thread Isaac J. Manjarres
From: Will Deacon 

Extend iommu_pgsize() to populate an optional 'count' parameter so that
we can direct unmapping operation to the ->unmap_pages callback if it
has been provided by the driver.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 60 ---
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ab689611a03b..d5d551754556 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2358,11 +2358,11 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
-  phys_addr_t paddr, size_t size)
+  phys_addr_t paddr, size_t size, size_t *count)
 {
-   unsigned int pgsize_idx;
+   unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
-   size_t pgsize;
+   size_t offset, pgsize, pgsize_next;
phys_addr_t addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
@@ -2378,7 +2378,37 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
/* Pick the biggest page size remaining */
pgsize_idx = __fls(pgsizes);
pgsize = BIT(pgsize_idx);
+   if (!count)
+   return pgsize;
 
+
+   /* Find the next biggest support page size, if it exists */
+   pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+   if (!pgsizes)
+   goto out_set_count;
+
+   pgsize_idx_next = __ffs(pgsizes);
+   pgsize_next = BIT(pgsize_idx_next);
+
+   /*
+* There's no point trying a bigger page size unless the virtual
+* and physical addresses are similarly offset within the larger page.
+*/
+   if ((iova ^ paddr) & (pgsize_next - 1))
+   goto out_set_count;
+
+   /* Calculate the offset to the next page size alignment boundary */
+   offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+   /*
+* If size is big enough to accommodate the larger page, reduce
+* the number of smaller pages.
+*/
+   if (offset + pgsize_next <= size)
+   size = offset;
+
+out_set_count:
+   *count = size >> pgsize_idx;
return pgsize;
 }
 
@@ -2416,7 +2446,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2467,6 +2497,19 @@ int iommu_map_atomic(struct iommu_domain *domain, 
unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_atomic);
 
+static size_t __iommu_unmap_pages(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *iotlb_gather)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+
+   pgsize = iommu_pgsize(domain, iova, iova, size, &count);
+   return ops->unmap_pages ?
+  ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
+  ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
 static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2476,7 +2519,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long orig_iova = iova;
unsigned int min_pagesz;
 
-   if (unlikely(ops->unmap == NULL ||
+   if (unlikely(!(ops->unmap || ops->unmap_pages) ||
 domain->pgsize_bitmap == 0UL))
return 0;
 
@@ -2504,10 +2547,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize;
-
-   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
-   unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+   unmapped_page = __iommu_unmap_pages(domain, iova,
+   size - unmapped,
+   iotlb_gather);
if (!unmapped_page)
break;
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Proje

[RFC PATCH v4 08/15] iommu: Add support for the map_pages() callback

2021-04-07 Thread Isaac J. Manjarres
Since iommu_pgsize can calculate how many pages of the
same size can be mapped/unmapped before the next largest
page size boundary, add support for invoking an IOMMU
driver's map_pages() callback, if it provides one.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/iommu.c | 43 +++
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d5d551754556..df55761932fd 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2412,6 +2412,30 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
return pgsize;
 }
 
+static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t size, int prot,
+gfp_t gfp, size_t *mapped)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+   int ret;
+
+   pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %ld\n",
+iova, &paddr, pgsize, count);
+
+   if (ops->map_pages) {
+   ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+gfp, mapped);
+   } else {
+   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   *mapped = ret ? 0 : pgsize;
+   }
+
+   return ret;
+}
+
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2422,7 +2446,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
 
-   if (unlikely(ops->map == NULL ||
+   if (unlikely(!(ops->map || ops->map_pages) ||
 domain->pgsize_bitmap == 0UL))
return -ENODEV;
 
@@ -2446,18 +2470,21 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
+   size_t mapped = 0;
 
-   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
-iova, &paddr, pgsize);
-   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
+   &mapped);
+   /*
+* Some pages may have been mapped, even if an error occurred,
+* so we should account for those so they can be unmapped.
+*/
+   size -= mapped;
 
if (ret)
break;
 
-   iova += pgsize;
-   paddr += pgsize;
-   size -= pgsize;
+   iova += mapped;
+   paddr += mapped;
}
 
/* unroll mapping in case something went wrong */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 04/15] iommu: Add a map_pages() op for IOMMU drivers

2021-04-07 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can
call into the io-pgtable code, to map a physically contiguous
rnage of pages of the same size.

For IOMMU drivers that do not specify a map_pages() callback,
the existing logic of mapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Acked-by: Lu Baolu 
---
 include/linux/iommu.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9cf81242581a..528d6a58479e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_pages: map a physically contiguous set of pages of the same size to
+ * an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
@@ -244,6 +246,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 06/15] iommu: Split 'addr_merge' argument to iommu_pgsize() into separate parts

2021-04-07 Thread Isaac J. Manjarres
From: Will Deacon 

The 'addr_merge' parameter to iommu_pgsize() is a fabricated address
intended to describe the alignment requirements to consider when
choosing an appropriate page size. On the iommu_map() path, this address
is the logical OR of the virtual and physical addresses.

Subsequent improvements to iommu_pgsize() will need to check the
alignment of the virtual and physical components of 'addr_merge'
independently, so pass them in as separate parameters and reconstruct
'addr_merge' locally.

No functional change.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index bcd623862bf9..ab689611a03b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2357,12 +2357,13 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+  phys_addr_t paddr, size_t size)
 {
unsigned int pgsize_idx;
unsigned long pgsizes;
size_t pgsize;
+   phys_addr_t addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
@@ -2415,7 +2416,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2503,8 +2504,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
+   size_t pgsize;
 
+   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
break;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 02/15] iommu: Add an unmap_pages() op for IOMMU drivers

2021-04-07 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can call
into the io-pgtable code, to unmap a virtually contiguous
range of pages of the same size.

For IOMMU drivers that do not specify an unmap_pages() callback,
the existing logic of unmapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
Acked-by: Lu Baolu 
---
 include/linux/iommu.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe519430a..9cf81242581a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -193,6 +193,7 @@ struct iommu_iotlb_gather {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -245,6 +246,9 @@ struct iommu_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
+   size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
   size_t size);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 01/15] iommu/io-pgtable: Introduce unmap_pages() as a page table op

2021-04-07 Thread Isaac J. Manjarres
The io-pgtable code expects to operate on a single block or
granule of memory that is supported by the IOMMU hardware when
unmapping memory.

This means that when a large buffer that consists of multiple
such blocks is unmapped, the io-pgtable code will walk the page
tables to the correct level to unmap each block, even for blocks
that are virtually contiguous and at the same level, which can
incur an overhead in performance.

Introduce the unmap_pages() page table op to express to the
io-pgtable code that it should unmap a number of blocks of
the same size, instead of a single block. Doing so allows
multiple blocks to be unmapped in one call to the io-pgtable
code, reducing the number of page table walks, and indirect
calls.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a4c9ca2c31f1..2ed0c057d9e7 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -144,6 +144,7 @@ struct io_pgtable_cfg {
  *
  * @map:  Map a physically contiguous memory region.
  * @unmap:Unmap a physically contiguous memory region.
+ * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
  * These functions map directly onto the iommu_ops member functions with
@@ -154,6 +155,9 @@ struct io_pgtable_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
+   size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
 };
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v4 00/15] Optimizing iommu_[map/unmap] performance

2021-04-07 Thread Isaac J. Manjarres
When unmapping a buffer from an IOMMU domain, the IOMMU framework unmaps
the buffer at a granule of the largest page size that is supported by
the IOMMU hardware and fits within the buffer. For every block that
is unmapped, the IOMMU framework will call into the IOMMU driver, and
then the io-pgtable framework to walk the page tables to find the entry
that corresponds to the IOVA, and then unmaps the entry.

This can be suboptimal in scenarios where a buffer or a piece of a
buffer can be split into several contiguous page blocks of the same size.
For example, consider an IOMMU that supports 4 KB page blocks, 2 MB page
blocks, and 1 GB page blocks, and a buffer that is 4 MB in size is being
unmapped at IOVA 0. The current call-flow will result in 4 indirect calls,
and 2 page table walks, to unmap 2 entries that are next to each other in
the page-tables, when both entries could have been unmapped in one shot
by clearing both page table entries in the same call.

The same optimization is applicable to mapping buffers as well, so
these patches implement a set of callbacks called unmap_pages and
map_pages to the io-pgtable code and IOMMU drivers which unmaps or maps
an IOVA range that consists of a number of pages of the same
page size that is supported by the IOMMU hardware, and allows for
manipulating multiple page table entries in the same set of indirect
calls. The reason for introducing these callbacks is to give other IOMMU
drivers/io-pgtable formats time to change to using the new callbacks, so
that the transition to using this approach can be done piecemeal.

Changes since V3:

* Removed usage of ULL variants of bitops from Will's patches, as
  they were not needed.
* Instead of unmapping/mapping pgcount pages, unmap_pages() and
  map_pages() will at most unmap and map pgcount pages, allowing
  for part of the pages in pgcount to be mapped and unmapped. This
  was done to simplify the handling in the io-pgtable layer.
* Extended the existing PTE manipulation methods in io-pgtable-arm
  to handle multiple entries, per Robin's suggestion, eliminating
  the need to add functions to clear multiple PTEs.
* Implemented a naive form of [map/unmap]_pages() for ARM v7s io-pgtable
  format.
* arm_[v7s/lpae]_[map/unmap] will call
  arm_[v7s/lpae]_[map_pages/unmap_pages] with an argument of 1 page.
* The arm_smmu_[map/unmap] functions have been removed, since they
  have been replaced by arm_smmu_[map/unmap]_pages.

Changes since V2:

* Added a check in __iommu_map() to check for the existence
  of either the map or map_pages callback as per Lu's suggestion.

Changes since V1:

* Implemented the map_pages() callbacks
* Integrated Will's patches into this series which
  address several concerns about how iommu_pgsize() partitioned a
  buffer (I made a minor change to the patch which changes
  iommu_pgsize() to use bitmaps by using the ULL variants of
  the bitops)

Isaac J. Manjarres (12):
  iommu/io-pgtable: Introduce unmap_pages() as a page table op
  iommu: Add an unmap_pages() op for IOMMU drivers
  iommu/io-pgtable: Introduce map_pages() as a page table op
  iommu: Add a map_pages() op for IOMMU drivers
  iommu: Add support for the map_pages() callback
  iommu/io-pgtable-arm: Prepare PTE methods for handling multiple
entries
  iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()
  iommu/io-pgtable-arm: Implement arm_lpae_map_pages()
  iommu/io-pgtable-arm-v7s: Implement arm_v7s_unmap_pages()
  iommu/io-pgtable-arm-v7s: Implement arm_v7s_map_pages()
  iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback
  iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

Will Deacon (3):
  iommu: Use bitmap to calculate page size in iommu_pgsize()
  iommu: Split 'addr_merge' argument to iommu_pgsize() into separate
parts
  iommu: Hook up '->unmap_pages' driver callback

 drivers/iommu/arm/arm-smmu/arm-smmu.c |  18 +--
 drivers/iommu/io-pgtable-arm-v7s.c|  48 ++-
 drivers/iommu/io-pgtable-arm.c| 184 +-
 drivers/iommu/iommu.c | 130 +-
 include/linux/io-pgtable.h|   8 ++
 include/linux/iommu.h |   9 ++
 6 files changed, 283 insertions(+), 114 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 09/12] iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()

2021-04-05 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/io-pgtable-arm.c | 124 +++--
 1 file changed, 104 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..fc63d57b8037 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -60,6 +60,14 @@
 /* Calculate the block/page mapping size at level l for pagetable in d. */
 #define ARM_LPAE_BLOCK_SIZE(l,d)   (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
 
+/*
+ * Calculate the level that corresponds to the block/page mapping for pagetable
+ * in d.
+ */
+#define ARM_LPAE_BLOCK_SIZE_LVL(s, d)  \
+   ((ARM_LPAE_MAX_LEVELS - \
+ ((ilog2((s)) - ilog2(sizeof(arm_lpae_iopte))) / (d)->bits_per_level)))
+
 /* Page table bits */
 #define ARM_LPAE_PTE_TYPE_SHIFT0
 #define ARM_LPAE_PTE_TYPE_MASK 0x3
@@ -248,10 +256,26 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, 
arm_lpae_iopte pte,
__arm_lpae_sync_pte(ptep, cfg);
 }
 
+static void __arm_lpae_sync_ptes(arm_lpae_iopte *ptep, size_t num_ptes,
+struct io_pgtable_cfg *cfg)
+{
+   dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
+  sizeof(*ptep) * num_ptes, DMA_TO_DEVICE);
+}
+
+static void __arm_lpae_clear_ptes(arm_lpae_iopte *ptep, size_t num_ptes,
+ struct io_pgtable_cfg *cfg)
+{
+   memset(ptep, 0, sizeof(*ptep) * num_ptes);
+
+   if (!cfg->coherent_walk)
+   __arm_lpae_sync_ptes(ptep, num_ptes, cfg);
+}
+
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep);
+  unsigned long iova, size_t size, size_t pgcount,
+  int lvl, arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
@@ -289,7 +313,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable 
*data,
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-   if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
+   if (__arm_lpae_unmap(data, NULL, iova, sz, 1, lvl, tblp) != sz) 
{
WARN_ON(1);
return -EINVAL;
}
@@ -516,14 +540,14 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
   unsigned long iova, size_t size,
   arm_lpae_iopte blk_pte, int lvl,
-  arm_lpae_iopte *ptep)
+  arm_lpae_iopte *ptep, size_t pgcount)
 {
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte pte, *tablep;
phys_addr_t blk_paddr;
size_t tablesz = ARM_LPAE_GRANULE(data);
size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-   int i, unmap_idx = -1;
+   int i, unmap_idx_start = -1;
 
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
@@ -533,14 +557,14 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0; /* Bytes unmapped */
 
if (size == split_sz)
-   unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
blk_paddr = iopte_to_paddr(blk_pte, data);
pte = iopte_prot(blk_pte);
 
for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
/* Unmap! */
-   if (i == unmap_idx)
+   if (i >= unmap_idx_start && i < (unmap_idx_start + pgcount))
continue;
 
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
@@ -558,20 +582,24 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0;
 
tablep = iopte_deref(pte, data);
-   } else if (unmap_idx >= 0) {
-   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
-   return size;
+   } else if (unmap_idx_start >= 0) {
+   for (i = 0; i < pgcount; i++) {
+   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
+   iova += size;
+   }

[RFC PATCH v3 12/12] iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

2021-04-05 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM SMMU driver
to allow calls from iommu_unmap to unmap multiple pages of
the same size in one call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index f29f1fb109f8..fe7a452ce24e 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,6 +1208,24 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return ret;
 }
 
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   int ret;
+
+   if (!ops)
+   return -ENODEV;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, 
mapped);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *gather)
 {
@@ -1642,6 +1660,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
+   .map_pages  = arm_smmu_map_pages,
.unmap  = arm_smmu_unmap,
.unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 05/12] iommu: Use bitmap to calculate page size in iommu_pgsize()

2021-04-05 Thread Isaac J. Manjarres
From: Will Deacon 

Avoid the potential for shifting values by amounts greater than the
width of their type by using a bitmap to compute page size in
iommu_pgsize().

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0b0a15dba84..9006397b6604 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2360,30 +2361,22 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
   unsigned long addr_merge, size_t size)
 {
unsigned int pgsize_idx;
+   unsigned long pgsizes;
size_t pgsize;
 
-   /* Max page size that still fits into 'size' */
-   pgsize_idx = __fls(size);
+   /* Page sizes supported by the hardware and small enough for @size */
+   pgsizes = domain->pgsize_bitmap & GENMASK_ULL(__fls(size), 0);
 
-   /* need to consider alignment requirements ? */
-   if (likely(addr_merge)) {
-   /* Max page size allowed by address */
-   unsigned int align_pgsize_idx = __ffs(addr_merge);
-   pgsize_idx = min(pgsize_idx, align_pgsize_idx);
-   }
-
-   /* build a mask of acceptable page sizes */
-   pgsize = (1UL << (pgsize_idx + 1)) - 1;
-
-   /* throw away page sizes not supported by the hardware */
-   pgsize &= domain->pgsize_bitmap;
+   /* Constrain the page sizes further based on the maximum alignment */
+   if (likely(addr_merge))
+   pgsizes &= GENMASK_ULL(__ffs(addr_merge), 0);
 
-   /* make sure we're still sane */
-   BUG_ON(!pgsize);
+   /* Make sure we have at least one suitable page size */
+   BUG_ON(!pgsizes);
 
-   /* pick the biggest page */
-   pgsize_idx = __fls(pgsize);
-   pgsize = 1UL << pgsize_idx;
+   /* Pick the biggest page size remaining */
+   pgsize_idx = __fls(pgsizes);
+   pgsize = BIT_ULL(pgsize_idx);
 
return pgsize;
 }
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 02/12] iommu: Add an unmap_pages() op for IOMMU drivers

2021-04-05 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can call
into the io-pgtable code, to unmap a virtually contiguous
range of pages of the same size.

For IOMMU drivers that do not specify an unmap_pages() callback,
the existing logic of unmapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
---
 include/linux/iommu.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe519430a..9cf81242581a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -193,6 +193,7 @@ struct iommu_iotlb_gather {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -245,6 +246,9 @@ struct iommu_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
+   size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
   size_t size);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 08/12] iommu: Add support for the map_pages() callback

2021-04-05 Thread Isaac J. Manjarres
Since iommu_pgsize can calculate how many pages of the
same size can be mapped/unmapped before the next largest
page size boundary, add support for invoking an IOMMU
driver's map_pages() callback, if it provides one.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/iommu.c | 43 +++
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b3aa9548a38e..dfe7bb39e00d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2412,6 +2412,30 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
return pgsize;
 }
 
+static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t size, int prot,
+gfp_t gfp, size_t *mapped)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+   int ret;
+
+   pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %ld\n",
+iova, &paddr, pgsize, count);
+
+   if (ops->map_pages) {
+   ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+gfp, mapped);
+   } else {
+   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   *mapped = ret ? 0 : pgsize;
+   }
+
+   return ret;
+}
+
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2422,7 +2446,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
 
-   if (unlikely(ops->map == NULL ||
+   if (unlikely(!(ops->map || ops->map_pages) ||
 domain->pgsize_bitmap == 0UL))
return -ENODEV;
 
@@ -2446,18 +2470,21 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
+   size_t mapped = 0;
 
-   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
-iova, &paddr, pgsize);
-   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
+   &mapped);
+   /*
+* Some pages may have been mapped, even if an error occurred,
+* so we should account for those so they can be unmapped.
+*/
+   size -= mapped;
 
if (ret)
break;
 
-   iova += pgsize;
-   paddr += pgsize;
-   size -= pgsize;
+   iova += mapped;
+   paddr += mapped;
}
 
/* unroll mapping in case something went wrong */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 10/12] iommu/io-pgtable-arm: Implement arm_lpae_map_pages()

2021-04-05 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/io-pgtable-arm.c | 95 +++---
 1 file changed, 88 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index fc63d57b8037..b8464305f1c2 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -355,20 +355,35 @@ static arm_lpae_iopte 
arm_lpae_install_table(arm_lpae_iopte *table,
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
- phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
- int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t pgcount,
+ arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
+ gfp_t gfp, size_t *mapped)
 {
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
size_t tblsz = ARM_LPAE_GRANULE(data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   int ret = 0;
 
/* Find our entry at the current level */
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 
-   /* If we can install a leaf entry at this level, then do so */
-   if (size == block_size)
-   return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
+   /* If we can install leaf entries at this level, then do so */
+   if (size == block_size) {
+   while (pgcount--) {
+   ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, 
ptep);
+   if (ret)
+   return ret;
+
+   iova += size;
+   paddr += size;
+   ptep++;
+   if (mapped)
+   *mapped += size;
+   }
+
+   return ret;
+   }
 
/* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@@ -397,7 +412,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, 
unsigned long iova,
}
 
/* Rinse, repeat */
-   return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, 
gfp);
+   return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1, 
cptep,
+ gfp, mapped);
 }
 
 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -487,7 +503,71 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return 0;
 
prot = arm_lpae_prot_to_pte(data, iommu_prot);
-   ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
+   ret = __arm_lpae_map(data, iova, paddr, size, 1, prot, lvl, ptep, gfp,
+NULL);
+   /*
+* Synchronise all PTE updates for the new mapping before there's
+* a chance for anything to kick off a table walk for the new iova.
+*/
+   wmb();
+
+   return ret;
+}
+
+static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   arm_lpae_iopte *ptep = data->pgd;
+   int ret, lvl = data->start_level, last_lvl;
+   arm_lpae_iopte prot;
+   long iaext = (s64)iova >> cfg->ias;
+   size_t table_size, pages, tbl_offset, max_entries;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
+   return -EINVAL;
+
+   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+   iaext = ~iaext;
+   if (WARN_ON(iaext || paddr >> cfg->oas))
+   return -ERANGE;
+
+   prot = arm_lpae_prot_to_pte(data, iommu_prot);
+
+   /*
+* Calculating the page table size here helps avoid situations where
+* a page range that is being mapped may be mapped at the same level
+* but not mapped by the same tables. Allowing such a scenario to
+* occur can complicate the logic in __arm_lpae_map().
+*/
+   last_lvl = ARM_LPAE_BLOCK_SIZE_LVL(pgsize, data);
+
+   if (last_lvl == data->start_level)
+   table_size = ARM_LPAE_PGD_SIZE(data);
+   else
+   table_size = ARM_LPAE_GRANULE(data);
+
+   max_entries = table_size / sizeof(*ptep);
+
+   while (pgcount) {
+   tbl_offset = ARM_LPAE_LVL_IDX(iova, last_lvl, data);

[RFC PATCH v3 11/12] iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback

2021-04-05 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM SMMU driver
to allow calls from iommu_unmap to unmap multiple pages of
the same size in one call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfde6a61..f29f1fb109f8 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1225,6 +1225,24 @@ static size_t arm_smmu_unmap(struct iommu_domain 
*domain, unsigned long iova,
return ret;
 }
 
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long 
iova,
+  size_t pgsize, size_t pgcount,
+  struct iommu_iotlb_gather *iotlb_gather)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   size_t ret;
+
+   if (!ops)
+   return 0;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 {
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1625,6 +1643,7 @@ static struct iommu_ops arm_smmu_ops = {
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
.unmap  = arm_smmu_unmap,
+   .unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys   = arm_smmu_iova_to_phys,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 01/12] iommu/io-pgtable: Introduce unmap_pages() as a page table op

2021-04-05 Thread Isaac J. Manjarres
The io-pgtable code expects to operate on a single block or
granule of memory that is supported by the IOMMU hardware when
unmapping memory.

This means that when a large buffer that consists of multiple
such blocks is unmapped, the io-pgtable code will walk the page
tables to the correct level to unmap each block, even for blocks
that are virtually contiguous and at the same level, which can
incur an overhead in performance.

Introduce the unmap_pages() page table op to express to the
io-pgtable code that it should unmap a number of blocks of
the same size, instead of a single block. Doing so allows
multiple blocks to be unmapped in one call to the io-pgtable
code, reducing the number of page table walks, and indirect
calls.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a4c9ca2c31f1..2ed0c057d9e7 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -144,6 +144,7 @@ struct io_pgtable_cfg {
  *
  * @map:  Map a physically contiguous memory region.
  * @unmap:Unmap a physically contiguous memory region.
+ * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
  * These functions map directly onto the iommu_ops member functions with
@@ -154,6 +155,9 @@ struct io_pgtable_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
+   size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
 };
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 07/12] iommu: Hook up '->unmap_pages' driver callback

2021-04-05 Thread Isaac J. Manjarres
From: Will Deacon 

Extend iommu_pgsize() to populate an optional 'count' paramater so that
we can direct unmapping operation to the ->unmap_pages callback if it
has been provided by the driver.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 60 ---
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a3bbf7e310b0..b3aa9548a38e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2358,11 +2358,11 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
-  phys_addr_t paddr, size_t size)
+  phys_addr_t paddr, size_t size, size_t *count)
 {
-   unsigned int pgsize_idx;
+   unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
-   size_t pgsize;
+   size_t offset, pgsize, pgsize_next;
phys_addr_t addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
@@ -2378,7 +2378,37 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
/* Pick the biggest page size remaining */
pgsize_idx = __fls(pgsizes);
pgsize = BIT_ULL(pgsize_idx);
+   if (!count)
+   return pgsize;
 
+
+   /* Find the next biggest support page size, if it exists */
+   pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+   if (!pgsizes)
+   goto out_set_count;
+
+   pgsize_idx_next = __ffs(pgsizes);
+   pgsize_next = BIT(pgsize_idx_next);
+
+   /*
+* There's no point trying a bigger page size unless the virtual
+* and physical addresses are similarly offset within the larger page.
+*/
+   if ((iova ^ paddr) & (pgsize_next - 1))
+   goto out_set_count;
+
+   /* Calculate the offset to the next page size alignment boundary */
+   offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+   /*
+* If size is big enough to accomodate the larger page, reduce
+* the number of smaller pages.
+*/
+   if (offset + pgsize_next <= size)
+   size = offset;
+
+out_set_count:
+   *count = size >> pgsize_idx;
return pgsize;
 }
 
@@ -2416,7 +2446,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2467,6 +2497,19 @@ int iommu_map_atomic(struct iommu_domain *domain, 
unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_atomic);
 
+static size_t __iommu_unmap_pages(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *iotlb_gather)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+
+   pgsize = iommu_pgsize(domain, iova, iova, size, &count);
+   return ops->unmap_pages ?
+  ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
+  ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
 static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2476,7 +2519,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long orig_iova = iova;
unsigned int min_pagesz;
 
-   if (unlikely(ops->unmap == NULL ||
+   if (unlikely(!(ops->unmap || ops->unmap_pages) ||
 domain->pgsize_bitmap == 0UL))
return 0;
 
@@ -2504,10 +2547,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize;
-
-   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
-   unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+   unmapped_page = __iommu_unmap_pages(domain, iova,
+   size - unmapped,
+   iotlb_gather);
if (!unmapped_page)
break;
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Proje

[RFC PATCH v3 06/12] iommu: Split 'addr_merge' argument to iommu_pgsize() into separate parts

2021-04-05 Thread Isaac J. Manjarres
From: Will Deacon 

The 'addr_merge' parameter to iommu_pgsize() is a fabricated address
intended to describe the alignment requirements to consider when
choosing an appropriate page size. On the iommu_map() path, this address
is the logical OR of the virtual and physical addresses.

Subsequent improvements to iommu_pgsize() will need to check the
alignment of the virtual and physical components of 'addr_merge'
independently, so pass them in as separate parameters and reconstruct
'addr_merge' locally.

No functional change.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9006397b6604..a3bbf7e310b0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2357,12 +2357,13 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+  phys_addr_t paddr, size_t size)
 {
unsigned int pgsize_idx;
unsigned long pgsizes;
size_t pgsize;
+   phys_addr_t addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
pgsizes = domain->pgsize_bitmap & GENMASK_ULL(__fls(size), 0);
@@ -2415,7 +2416,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2503,8 +2504,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
+   size_t pgsize;
 
+   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
break;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 04/12] iommu: Add a map_pages() op for IOMMU drivers

2021-04-05 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can
call into the io-pgtable code, to map a physically contiguous
rnage of pages of the same size.

For IOMMU drivers that do not specify a map_pages() callback,
the existing logic of mapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/iommu.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9cf81242581a..528d6a58479e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_pages: map a physically contiguous set of pages of the same size to
+ * an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
@@ -244,6 +246,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 03/12] iommu/io-pgtable: Introduce map_pages() as a page table op

2021-04-05 Thread Isaac J. Manjarres
Mapping memory into io-pgtables follows the same semantics
that unmapping memory used to follow (i.e. a buffer will be
mapped one page block per call to the io-pgtable code). This
means that it can be optimized in the same way that unmapping
memory was, so add a map_pages() callback to the io-pgtable
ops structure, so that a range of pages of the same size
can be mapped within the same call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 2ed0c057d9e7..019149b204b8 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -143,6 +143,7 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:  Map a physically contiguous memory region.
+ * @map_pages:Map a physically contiguous range of pages of the same size.
  * @unmap:Unmap a physically contiguous memory region.
  * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
@@ -153,6 +154,9 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 00/12] Optimizing iommu_[map/unmap] performance

2021-04-05 Thread Isaac J. Manjarres
When unmapping a buffer from an IOMMU domain, the IOMMU framework unmaps
the buffer at a granule of the largest page size that is supported by
the IOMMU hardware and fits within the buffer. For every block that
is unmapped, the IOMMU framework will call into the IOMMU driver, and
then the io-pgtable framework to walk the page tables to find the entry
that corresponds to the IOVA, and then unmaps the entry.

This can be suboptimal in scenarios where a buffer or a piece of a
buffer can be split into several contiguous page blocks of the same size.
For example, consider an IOMMU that supports 4 KB page blocks, 2 MB page
blocks, and 1 GB page blocks, and a buffer that is 4 MB in size is being
unmapped at IOVA 0. The current call-flow will result in 4 indirect calls,
and 2 page table walks, to unmap 2 entries that are next to each other in
the page-tables, when both entries could have been unmapped in one shot
by clearing both page table entries in the same call.

The same optimization is applicable to mapping buffers as well, so
these patches implement a set of callbacks called unmap_pages and
map_pages to the io-pgtable code and IOMMU drivers which unmaps or maps
an IOVA range that consists of a number of pages of the same
page size that is supported by the IOMMU hardware, and allows for
manipulating multiple page table entries in the same set of indirect
calls. The reason for introducing these callbacks is to give other IOMMU
drivers/io-pgtable formats time to change to using the new callbacks, so
that the transition to using this approach can be done piecemeal.

Changes since V2:

* Added a check in __iommu_map() to check for the existence
  of either the map or map_pages callback as per Lu's suggestion.

Changes since V1:

* Implemented the map_pages() callbacks
* Integrated Will's patches into this series which
  address several concerns about how iommu_pgsize() partitioned a
  buffer (I made a minor change to the patch which changes
  iommu_pgsize() to use bitmaps by using the ULL variants of
  the bitops)

Isaac J. Manjarres (9):
  iommu/io-pgtable: Introduce unmap_pages() as a page table op
  iommu: Add an unmap_pages() op for IOMMU drivers
  iommu/io-pgtable: Introduce map_pages() as a page table op
  iommu: Add a map_pages() op for IOMMU drivers
  iommu: Add support for the map_pages() callback
  iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()
  iommu/io-pgtable-arm: Implement arm_lpae_map_pages()
  iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback
  iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

Will Deacon (3):
  iommu: Use bitmap to calculate page size in iommu_pgsize()
  iommu: Split 'addr_merge' argument to iommu_pgsize() into separate
parts
  iommu: Hook up '->unmap_pages' driver callback

 drivers/iommu/arm/arm-smmu/arm-smmu.c |  38 +
 drivers/iommu/io-pgtable-arm.c| 219 ++
 drivers/iommu/iommu.c | 130 +++
 include/linux/io-pgtable.h|   8 +
 include/linux/iommu.h |   9 ++
 5 files changed, 344 insertions(+), 60 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 01/12] iommu/io-pgtable: Introduce unmap_pages() as a page table op

2021-04-01 Thread Isaac J. Manjarres
The io-pgtable code expects to operate on a single block or
granule of memory that is supported by the IOMMU hardware when
unmapping memory.

This means that when a large buffer that consists of multiple
such blocks is unmapped, the io-pgtable code will walk the page
tables to the correct level to unmap each block, even for blocks
that are virtually contiguous and at the same level, which can
incur an overhead in performance.

Introduce the unmap_pages() page table op to express to the
io-pgtable code that it should unmap a number of blocks of
the same size, instead of a single block. Doing so allows
multiple blocks to be unmapped in one call to the io-pgtable
code, reducing the number of page table walks, and indirect
calls.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a4c9ca2c31f1..2ed0c057d9e7 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -144,6 +144,7 @@ struct io_pgtable_cfg {
  *
  * @map:  Map a physically contiguous memory region.
  * @unmap:Unmap a physically contiguous memory region.
+ * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
  * These functions map directly onto the iommu_ops member functions with
@@ -154,6 +155,9 @@ struct io_pgtable_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
+   size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
 };
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 10/12] iommu/io-pgtable-arm: Implement arm_lpae_map_pages()

2021-04-01 Thread Isaac J. Manjarres
Implement the map_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/io-pgtable-arm.c | 95 +++---
 1 file changed, 88 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index fc63d57b8037..b8464305f1c2 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -355,20 +355,35 @@ static arm_lpae_iopte 
arm_lpae_install_table(arm_lpae_iopte *table,
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
- phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
- int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t pgcount,
+ arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
+ gfp_t gfp, size_t *mapped)
 {
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
size_t tblsz = ARM_LPAE_GRANULE(data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   int ret = 0;
 
/* Find our entry at the current level */
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
 
-   /* If we can install a leaf entry at this level, then do so */
-   if (size == block_size)
-   return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
+   /* If we can install leaf entries at this level, then do so */
+   if (size == block_size) {
+   while (pgcount--) {
+   ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, 
ptep);
+   if (ret)
+   return ret;
+
+   iova += size;
+   paddr += size;
+   ptep++;
+   if (mapped)
+   *mapped += size;
+   }
+
+   return ret;
+   }
 
/* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@@ -397,7 +412,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, 
unsigned long iova,
}
 
/* Rinse, repeat */
-   return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, 
gfp);
+   return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1, 
cptep,
+ gfp, mapped);
 }
 
 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -487,7 +503,71 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return 0;
 
prot = arm_lpae_prot_to_pte(data, iommu_prot);
-   ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
+   ret = __arm_lpae_map(data, iova, paddr, size, 1, prot, lvl, ptep, gfp,
+NULL);
+   /*
+* Synchronise all PTE updates for the new mapping before there's
+* a chance for anything to kick off a table walk for the new iova.
+*/
+   wmb();
+
+   return ret;
+}
+
+static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   arm_lpae_iopte *ptep = data->pgd;
+   int ret, lvl = data->start_level, last_lvl;
+   arm_lpae_iopte prot;
+   long iaext = (s64)iova >> cfg->ias;
+   size_t table_size, pages, tbl_offset, max_entries;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
+   return -EINVAL;
+
+   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+   iaext = ~iaext;
+   if (WARN_ON(iaext || paddr >> cfg->oas))
+   return -ERANGE;
+
+   prot = arm_lpae_prot_to_pte(data, iommu_prot);
+
+   /*
+* Calculating the page table size here helps avoid situations where
+* a page range that is being mapped may be mapped at the same level
+* but not mapped by the same tables. Allowing such a scenario to
+* occur can complicate the logic in __arm_lpae_map().
+*/
+   last_lvl = ARM_LPAE_BLOCK_SIZE_LVL(pgsize, data);
+
+   if (last_lvl == data->start_level)
+   table_size = ARM_LPAE_PGD_SIZE(data);
+   else
+   table_size = ARM_LPAE_GRANULE(data);
+
+   max_entries = table_size / sizeof(*ptep);
+
+   while (pgcount) {
+   tbl_offset = ARM_LPAE_LVL_IDX(iova, last_lvl, data);

[PATCH v2 09/12] iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()

2021-04-01 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/io-pgtable-arm.c | 124 +++--
 1 file changed, 104 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..fc63d57b8037 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -60,6 +60,14 @@
 /* Calculate the block/page mapping size at level l for pagetable in d. */
 #define ARM_LPAE_BLOCK_SIZE(l,d)   (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
 
+/*
+ * Calculate the level that corresponds to the block/page mapping for pagetable
+ * in d.
+ */
+#define ARM_LPAE_BLOCK_SIZE_LVL(s, d)  \
+   ((ARM_LPAE_MAX_LEVELS - \
+ ((ilog2((s)) - ilog2(sizeof(arm_lpae_iopte))) / (d)->bits_per_level)))
+
 /* Page table bits */
 #define ARM_LPAE_PTE_TYPE_SHIFT0
 #define ARM_LPAE_PTE_TYPE_MASK 0x3
@@ -248,10 +256,26 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, 
arm_lpae_iopte pte,
__arm_lpae_sync_pte(ptep, cfg);
 }
 
+static void __arm_lpae_sync_ptes(arm_lpae_iopte *ptep, size_t num_ptes,
+struct io_pgtable_cfg *cfg)
+{
+   dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
+  sizeof(*ptep) * num_ptes, DMA_TO_DEVICE);
+}
+
+static void __arm_lpae_clear_ptes(arm_lpae_iopte *ptep, size_t num_ptes,
+ struct io_pgtable_cfg *cfg)
+{
+   memset(ptep, 0, sizeof(*ptep) * num_ptes);
+
+   if (!cfg->coherent_walk)
+   __arm_lpae_sync_ptes(ptep, num_ptes, cfg);
+}
+
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep);
+  unsigned long iova, size_t size, size_t pgcount,
+  int lvl, arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
@@ -289,7 +313,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable 
*data,
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-   if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
+   if (__arm_lpae_unmap(data, NULL, iova, sz, 1, lvl, tblp) != sz) 
{
WARN_ON(1);
return -EINVAL;
}
@@ -516,14 +540,14 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
   unsigned long iova, size_t size,
   arm_lpae_iopte blk_pte, int lvl,
-  arm_lpae_iopte *ptep)
+  arm_lpae_iopte *ptep, size_t pgcount)
 {
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte pte, *tablep;
phys_addr_t blk_paddr;
size_t tablesz = ARM_LPAE_GRANULE(data);
size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-   int i, unmap_idx = -1;
+   int i, unmap_idx_start = -1;
 
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
@@ -533,14 +557,14 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0; /* Bytes unmapped */
 
if (size == split_sz)
-   unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
blk_paddr = iopte_to_paddr(blk_pte, data);
pte = iopte_prot(blk_pte);
 
for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
/* Unmap! */
-   if (i == unmap_idx)
+   if (i >= unmap_idx_start && i < (unmap_idx_start + pgcount))
continue;
 
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
@@ -558,20 +582,24 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0;
 
tablep = iopte_deref(pte, data);
-   } else if (unmap_idx >= 0) {
-   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
-   return size;
+   } else if (unmap_idx_start >= 0) {
+   for (i = 0; i < pgcount; i++) {
+   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
+   iova += size;
+   }

[PATCH v2 07/12] iommu: Hook up '->unmap_pages' driver callback

2021-04-01 Thread Isaac J. Manjarres
From: Will Deacon 

Extend iommu_pgsize() to populate an optional 'count' paramater so that
we can direct unmapping operation to the ->unmap_pages callback if it
has been provided by the driver.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 60 ---
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a3bbf7e310b0..5cae2a29fdc9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2358,11 +2358,11 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
-  phys_addr_t paddr, size_t size)
+  phys_addr_t paddr, size_t size, size_t *count)
 {
-   unsigned int pgsize_idx;
+   unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
-   size_t pgsize;
+   size_t offset, pgsize, pgsize_next;
phys_addr_t addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
@@ -2378,7 +2378,37 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
/* Pick the biggest page size remaining */
pgsize_idx = __fls(pgsizes);
pgsize = BIT_ULL(pgsize_idx);
+   if (!count)
+   return pgsize;
 
+
+   /* Find the next biggest support page size, if it exists */
+   pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+   if (!pgsizes)
+   goto out_set_count;
+
+   pgsize_idx_next = __ffs(pgsizes);
+   pgsize_next = BIT(pgsize_idx_next);
+
+   /*
+* There's no point trying a bigger page size unless the virtual
+* and physical addresses are similarly offset within the larger page.
+*/
+   if ((iova ^ paddr) & (pgsize_next - 1))
+   goto out_set_count;
+
+   /* Calculate the offset to the next page size alignment boundary */
+   offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+   /*
+* If size is big enough to accomodate the larger page, reduce
+* the number of smaller pages.
+*/
+   if (offset + pgsize_next <= size)
+   size = offset;
+
+out_set_count:
+   *count = size >> pgsize_idx;
return pgsize;
 }
 
@@ -2416,7 +2446,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2467,6 +2497,19 @@ int iommu_map_atomic(struct iommu_domain *domain, 
unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_atomic);
 
+static size_t __iommu_unmap_pages(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *iotlb_gather)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+
+   pgsize = iommu_pgsize(domain, iova, iova, size, &count);
+   return ops->unmap_pages ?
+  ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
+  ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
 static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2476,7 +2519,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long orig_iova = iova;
unsigned int min_pagesz;
 
-   if (unlikely(ops->unmap == NULL ||
+   if (unlikely((ops->unmap == NULL && ops->unmap_pages == NULL) ||
 domain->pgsize_bitmap == 0UL))
return 0;
 
@@ -2504,10 +2547,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize;
-
-   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
-   unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+   unmapped_page = __iommu_unmap_pages(domain, iova,
+   size - unmapped,
+   iotlb_gather);
if (!unmapped_page)
break;
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Colla

[PATCH v2 12/12] iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

2021-04-01 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM SMMU driver
to allow calls from iommu_unmap to unmap multiple pages of
the same size in one call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index f29f1fb109f8..fe7a452ce24e 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,6 +1208,24 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return ret;
 }
 
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   int ret;
+
+   if (!ops)
+   return -ENODEV;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, 
mapped);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *gather)
 {
@@ -1642,6 +1660,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
+   .map_pages  = arm_smmu_map_pages,
.unmap  = arm_smmu_unmap,
.unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 06/12] iommu: Split 'addr_merge' argument to iommu_pgsize() into separate parts

2021-04-01 Thread Isaac J. Manjarres
From: Will Deacon 

The 'addr_merge' parameter to iommu_pgsize() is a fabricated address
intended to describe the alignment requirements to consider when
choosing an appropriate page size. On the iommu_map() path, this address
is the logical OR of the virtual and physical addresses.

Subsequent improvements to iommu_pgsize() will need to check the
alignment of the virtual and physical components of 'addr_merge'
independently, so pass them in as separate parameters and reconstruct
'addr_merge' locally.

No functional change.

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9006397b6604..a3bbf7e310b0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2357,12 +2357,13 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+  phys_addr_t paddr, size_t size)
 {
unsigned int pgsize_idx;
unsigned long pgsizes;
size_t pgsize;
+   phys_addr_t addr_merge = paddr | iova;
 
/* Page sizes supported by the hardware and small enough for @size */
pgsizes = domain->pgsize_bitmap & GENMASK_ULL(__fls(size), 0);
@@ -2415,7 +2416,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+   size_t pgsize = iommu_pgsize(domain, iova, paddr, size);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2503,8 +2504,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
+   size_t pgsize;
 
+   pgsize = iommu_pgsize(domain, iova, iova, size - unmapped);
unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
break;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v2 00/12] Optimizing iommu_[map/unmap] performance

2021-04-01 Thread Isaac J. Manjarres
When unmapping a buffer from an IOMMU domain, the IOMMU framework unmaps
the buffer at a granule of the largest page size that is supported by
the IOMMU hardware and fits within the buffer. For every block that
is unmapped, the IOMMU framework will call into the IOMMU driver, and
then the io-pgtable framework to walk the page tables to find the entry
that corresponds to the IOVA, and then unmaps the entry.

This can be suboptimal in scenarios where a buffer or a piece of a
buffer can be split into several contiguous page blocks of the same size.
For example, consider an IOMMU that supports 4 KB page blocks, 2 MB page
blocks, and 1 GB page blocks, and a buffer that is 4 MB in size is being
unmapped at IOVA 0. The current call-flow will result in 4 indirect calls,
and 2 page table walks, to unmap 2 entries that are next to each other in
the page-tables, when both entries could have been unmapped in one shot
by clearing both page table entries in the same call.

The same optimization is applicable to mapping buffers as well, so
these patches implement a set of callbacks called unmap_pages and
map_pages to the io-pgtable code and IOMMU drivers which unmaps or maps
an IOVA range that consists of a number of pages of the same
page size that is supported by the IOMMU hardware, and allows for
manipulating multiple page table entries in the same set of indirect
calls. The reason for introducing these callbacks is to give other IOMMU
drivers/io-pgtable formats time to change to using the new callbacks, so
that the transition to using this approach can be done piecemeal.

Changes since V1:

* Implemented the map_pages() callbacks
* Integrated Will's patches into this series which
  address several concerns about how iommu_pgsize() partitioned a
  buffer (I made a minor change to the patch which changes
  iommu_pgsize() to use bitmaps by using the ULL variants of
  the bitops)

Any feedback is very much appreciated.

Isaac J. Manjarres (9):
  iommu/io-pgtable: Introduce unmap_pages() as a page table op
  iommu: Add an unmap_pages() op for IOMMU drivers
  iommu/io-pgtable: Introduce map_pages() as a page table op
  iommu: Add a map_pages() op for IOMMU drivers
  iommu: Add support for the map_pages() callback
  iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()
  iommu/io-pgtable-arm: Implement arm_lpae_map_pages()
  iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback
  iommu/arm-smmu: Implement the map_pages() IOMMU driver callback

Will Deacon (3):
  iommu: Use bitmap to calculate page size in iommu_pgsize()
  iommu: Split 'addr_merge' argument to iommu_pgsize() into separate
parts
  iommu: Hook up '->unmap_pages' driver callback

 drivers/iommu/arm/arm-smmu/arm-smmu.c |  38 +
 drivers/iommu/io-pgtable-arm.c| 219 ++
 drivers/iommu/iommu.c | 128 +++
 include/linux/io-pgtable.h|   8 +
 include/linux/iommu.h |   9 ++
 5 files changed, 343 insertions(+), 59 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 08/12] iommu: Add support for the map_pages() callback

2021-04-01 Thread Isaac J. Manjarres
Since iommu_pgsize can calculate how many pages of the
same size can be mapped/unmapped before the next largest
page size boundary, add support for invoking an IOMMU
driver's map_pages() callback, if it provides one.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/iommu.c | 41 ++---
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5cae2a29fdc9..167983195858 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2412,6 +2412,30 @@ static size_t iommu_pgsize(struct iommu_domain *domain, 
unsigned long iova,
return pgsize;
 }
 
+static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t size, int prot,
+gfp_t gfp, size_t *mapped)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, count;
+   int ret;
+
+   pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %ld\n",
+iova, &paddr, pgsize, count);
+
+   if (ops->map_pages) {
+   ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+gfp, mapped);
+   } else {
+   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   *mapped = ret ? 0 : pgsize;
+   }
+
+   return ret;
+}
+
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2446,18 +2470,21 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);
+   size_t mapped = 0;
 
-   pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
-iova, &paddr, pgsize);
-   ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+   ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
+   &mapped);
+   /*
+* Some pages may have been mapped, even if an error occurred,
+* so we should account for those so they can be unmapped.
+*/
+   size -= mapped;
 
if (ret)
break;
 
-   iova += pgsize;
-   paddr += pgsize;
-   size -= pgsize;
+   iova += mapped;
+   paddr += mapped;
}
 
/* unroll mapping in case something went wrong */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 11/12] iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback

2021-04-01 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM SMMU driver
to allow calls from iommu_unmap to unmap multiple pages of
the same size in one call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfde6a61..f29f1fb109f8 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1225,6 +1225,24 @@ static size_t arm_smmu_unmap(struct iommu_domain 
*domain, unsigned long iova,
return ret;
 }
 
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long 
iova,
+  size_t pgsize, size_t pgcount,
+  struct iommu_iotlb_gather *iotlb_gather)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   size_t ret;
+
+   if (!ops)
+   return 0;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 {
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1625,6 +1643,7 @@ static struct iommu_ops arm_smmu_ops = {
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
.unmap  = arm_smmu_unmap,
+   .unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys   = arm_smmu_iova_to_phys,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 03/12] iommu/io-pgtable: Introduce map_pages() as a page table op

2021-04-01 Thread Isaac J. Manjarres
Mapping memory into io-pgtables follows the same semantics
that unmapping memory used to follow (i.e. a buffer will be
mapped one page block per call to the io-pgtable code). This
means that it can be optimized in the same way that unmapping
memory was, so add a map_pages() callback to the io-pgtable
ops structure, so that a range of pages of the same size
can be mapped within the same call.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 2ed0c057d9e7..019149b204b8 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -143,6 +143,7 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:  Map a physically contiguous memory region.
+ * @map_pages:Map a physically contiguous range of pages of the same size.
  * @unmap:Unmap a physically contiguous memory region.
  * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
@@ -153,6 +154,9 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 05/12] iommu: Use bitmap to calculate page size in iommu_pgsize()

2021-04-01 Thread Isaac J. Manjarres
From: Will Deacon 

Avoid the potential for shifting values by amounts greater than the
width of their type by using a bitmap to compute page size in
iommu_pgsize().

Signed-off-by: Will Deacon 
Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0b0a15dba84..9006397b6604 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2360,30 +2361,22 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
   unsigned long addr_merge, size_t size)
 {
unsigned int pgsize_idx;
+   unsigned long pgsizes;
size_t pgsize;
 
-   /* Max page size that still fits into 'size' */
-   pgsize_idx = __fls(size);
+   /* Page sizes supported by the hardware and small enough for @size */
+   pgsizes = domain->pgsize_bitmap & GENMASK_ULL(__fls(size), 0);
 
-   /* need to consider alignment requirements ? */
-   if (likely(addr_merge)) {
-   /* Max page size allowed by address */
-   unsigned int align_pgsize_idx = __ffs(addr_merge);
-   pgsize_idx = min(pgsize_idx, align_pgsize_idx);
-   }
-
-   /* build a mask of acceptable page sizes */
-   pgsize = (1UL << (pgsize_idx + 1)) - 1;
-
-   /* throw away page sizes not supported by the hardware */
-   pgsize &= domain->pgsize_bitmap;
+   /* Constrain the page sizes further based on the maximum alignment */
+   if (likely(addr_merge))
+   pgsizes &= GENMASK_ULL(__ffs(addr_merge), 0);
 
-   /* make sure we're still sane */
-   BUG_ON(!pgsize);
+   /* Make sure we have at least one suitable page size */
+   BUG_ON(!pgsizes);
 
-   /* pick the biggest page */
-   pgsize_idx = __fls(pgsize);
-   pgsize = 1UL << pgsize_idx;
+   /* Pick the biggest page size remaining */
+   pgsize_idx = __fls(pgsizes);
+   pgsize = BIT_ULL(pgsize_idx);
 
return pgsize;
 }
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 04/12] iommu: Add a map_pages() op for IOMMU drivers

2021-04-01 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can
call into the io-pgtable code, to map a physically contiguous
rnage of pages of the same size.

For IOMMU drivers that do not specify a map_pages() callback,
the existing logic of mapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/iommu.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9cf81242581a..528d6a58479e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_pages: map a physically contiguous set of pages of the same size to
+ * an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
@@ -244,6 +246,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 02/12] iommu: Add an unmap_pages() op for IOMMU drivers

2021-04-01 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can call
into the io-pgtable code, to unmap a virtually contiguous
range of pages of the same size.

For IOMMU drivers that do not specify an unmap_pages() callback,
the existing logic of unmapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
Signed-off-by: Will Deacon 
---
 include/linux/iommu.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe519430a..9cf81242581a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -193,6 +193,7 @@ struct iommu_iotlb_gather {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -245,6 +246,9 @@ struct iommu_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
+   size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
   size_t size);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 4/5] iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()

2021-03-30 Thread Isaac J. Manjarres
Implement the unmap_pages() callback for the ARM LPAE io-pgtable
format.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/io-pgtable-arm.c | 114 +++--
 1 file changed, 94 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..6eccebf1744d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -248,10 +248,26 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, 
arm_lpae_iopte pte,
__arm_lpae_sync_pte(ptep, cfg);
 }
 
+static void __arm_lpae_sync_ptes(arm_lpae_iopte *ptep, size_t num_ptes,
+struct io_pgtable_cfg *cfg)
+{
+   dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
+  sizeof(*ptep) * num_ptes, DMA_TO_DEVICE);
+}
+
+static void __arm_lpae_clear_ptes(arm_lpae_iopte *ptep, size_t num_ptes,
+ struct io_pgtable_cfg *cfg)
+{
+   memset(ptep, 0, sizeof(*ptep) * num_ptes);
+
+   if (!cfg->coherent_walk)
+   __arm_lpae_sync_ptes(ptep, num_ptes, cfg);
+}
+
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep);
+  unsigned long iova, size_t size, size_t pgcount,
+  int lvl, arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
@@ -289,7 +305,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable 
*data,
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-   if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
+   if (__arm_lpae_unmap(data, NULL, iova, sz, 1, lvl, tblp) != sz) 
{
WARN_ON(1);
return -EINVAL;
}
@@ -516,14 +532,14 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
   unsigned long iova, size_t size,
   arm_lpae_iopte blk_pte, int lvl,
-  arm_lpae_iopte *ptep)
+  arm_lpae_iopte *ptep, size_t pgcount)
 {
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte pte, *tablep;
phys_addr_t blk_paddr;
size_t tablesz = ARM_LPAE_GRANULE(data);
size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-   int i, unmap_idx = -1;
+   int i, unmap_idx_start = -1;
 
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
@@ -533,14 +549,14 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0; /* Bytes unmapped */
 
if (size == split_sz)
-   unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+   unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
blk_paddr = iopte_to_paddr(blk_pte, data);
pte = iopte_prot(blk_pte);
 
for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
/* Unmap! */
-   if (i == unmap_idx)
+   if (i >= unmap_idx_start && i < (unmap_idx_start + pgcount))
continue;
 
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
@@ -558,20 +574,24 @@ static size_t arm_lpae_split_blk_unmap(struct 
arm_lpae_io_pgtable *data,
return 0;
 
tablep = iopte_deref(pte, data);
-   } else if (unmap_idx >= 0) {
-   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
-   return size;
+   } else if (unmap_idx_start >= 0) {
+   for (i = 0; i < pgcount; i++) {
+   io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
+   iova += size;
+   }
+   return pgcount * size;
}
 
-   return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
+   return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
   struct iommu_iotlb_gather *gather,
-  unsigned long iova, size_t size, int lvl,
-  arm_lpae_iopte *ptep)
+  unsigned long iova, size_t size, size_t pgcount,
+  int lvl, arm_lpae_iopte *ptep)
 {
arm_lpae_io

[RFC PATCH 1/5] iommu/io-pgtable: Introduce unmap_pages() as a page table op

2021-03-30 Thread Isaac J. Manjarres
The io-pgtable code expects to operate on a single block or
granule of memory that is supported by the IOMMU hardware when
unmapping memory.

This means that when a large buffer that consists of multiple
such blocks is unmapped, the io-pgtable code will walk the page
tables to the correct level to unmap each block, even for blocks
that are virtually contiguous and at the same level, which can
incur an overhead in performance.

Introduce the unmap_pages() page table op to express to the
io-pgtable code that it should unmap a number of blocks of
the same size, instead of a single block. Doing so allows
multiple blocks to be unmapped in one call to the io-pgtable
code, reducing the number of page table walks, and indirect
calls.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/io-pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a4c9ca2c31f1..2ed0c057d9e7 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -144,6 +144,7 @@ struct io_pgtable_cfg {
  *
  * @map:  Map a physically contiguous memory region.
  * @unmap:Unmap a physically contiguous memory region.
+ * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
  * These functions map directly onto the iommu_ops member functions with
@@ -154,6 +155,9 @@ struct io_pgtable_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
+   size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
 };
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 2/5] iommu: Add an unmap_pages() op for IOMMU drivers

2021-03-30 Thread Isaac J. Manjarres
Add a callback for IOMMU drivers to provide a path for the
IOMMU framework to call into an IOMMU driver, which can call
into the io-pgtable code, to unmap a virtually contiguous
range of pages of the same size.

For IOMMU drivers that do not specify an unmap_pages() callback,
the existing logic of unmapping memory one page block at a time
will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 include/linux/iommu.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe519430a..9cf81242581a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -193,6 +193,7 @@ struct iommu_iotlb_gather {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -245,6 +246,9 @@ struct iommu_ops {
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
+   size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
   size_t size);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 0/5] Optimization for unmapping iommu mapped buffers

2021-03-30 Thread Isaac J. Manjarres
When unmapping a buffer from an IOMMU domain, the IOMMU framework unmaps
the buffer at a granule of the largest page size that is supported by
the IOMMU hardware and fits within the buffer. For every block that
is unmapped, the IOMMU framework will call into the IOMMU driver, and
then the io-pgtable framework to walk the page tables to find the entry
that corresponds to the IOVA, and then unmaps the entry.

This can be suboptimal in scenarios where a buffer or a piece of a
buffer can be split into several contiguous page blocks of the same size.
For example, consider an IOMMU that supports 4 KB page blocks, 2 MB page
blocks, and 1 GB page blocks, and a buffer that is 4 MB in size is being
unmapped at IOVA 0. The current call-flow will result in 4 indirect calls,
and 2 page table walks, to unmap 2 entries that are next to each other in
the page-tables, when both entries could have been unmapped in one shot
by clearing both page table entries in the same call.

These patches implement a callback called unmap_pages to the io-pgtable
code and IOMMU drivers which unmaps an IOVA range that consists of a
number of pages of the same page size that is supported by the IOMMU
hardware, and allows for clearing multiple entries in the same set of
indirect calls. The reason for introducing unmap_pages is to give
other IOMMU drivers/io-pgtable formats time to change to using the new
unmap_pages callback, so that the transition to using this approach can be
done piecemeal.

The same optimization is applicable for mapping buffers, however, the
error handling in the io-pgtable layer couldn't be handled cleanly, as we
would need to invoke iommu_unmap to unmap the parts of the buffer that
were mapped, and then do any TLB maintenance. However, that seemed like a
layering violation.

Any feedback is very much appreciated.

Thanks,
Isaac

Isaac J. Manjarres (5):
  iommu/io-pgtable: Introduce unmap_pages() as a page table op
  iommu: Add an unmap_pages() op for IOMMU drivers
  iommu: Add support for the unmap_pages IOMMU callback
  iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()
  iommu/arm-smmu: Implement the unmap_pages IOMMU driver callback

 drivers/iommu/arm/arm-smmu/arm-smmu.c |  19 +
 drivers/iommu/io-pgtable-arm.c| 114 +-
 drivers/iommu/iommu.c |  44 --
 include/linux/io-pgtable.h|   4 +
 include/linux/iommu.h |   4 +
 5 files changed, 159 insertions(+), 26 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 3/5] iommu: Add support for the unmap_pages IOMMU callback

2021-03-30 Thread Isaac J. Manjarres
The IOMMU framework currently unmaps memory one page block at a time,
per the page block sizes that are supported by the IOMMU hardware.
Now that IOMMU drivers can supply a callback for unmapping multiple
in one call, add support in the IOMMU framework to calculate how many
page mappings of the same size can be unmapped in one shot, and invoke the
IOMMU driver's unmap_pages callback if it has one. Otherwise, the
existing behavior will be used.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/iommu.c | 44 +--
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0b0a15dba84..dc4295f6bc7f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2356,8 +2356,8 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+static size_t __iommu_pgsize(struct iommu_domain *domain,
+unsigned long addr_merge, size_t size)
 {
unsigned int pgsize_idx;
size_t pgsize;
@@ -2388,6 +2388,24 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
return pgsize;
 }
 
+static size_t iommu_pgsize(struct iommu_domain *domain,
+  unsigned long addr_merge, size_t size,
+  size_t *pgcount)
+{
+   size_t pgsize = __iommu_pgsize(domain, addr_merge, size);
+   size_t pgs = 0;
+
+   do {
+   pgs++;
+   size -= pgsize;
+   addr_merge += pgsize;
+   } while (size && __iommu_pgsize(domain, addr_merge, size) == pgsize);
+
+   *pgcount = pgs;
+
+   return pgsize;
+}
+
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2422,7 +2440,7 @@ static int __iommu_map(struct iommu_domain *domain, 
unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
while (size) {
-   size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+   size_t pgsize = __iommu_pgsize(domain, iova | paddr, size);
 
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 iova, &paddr, pgsize);
@@ -2473,6 +2491,21 @@ int iommu_map_atomic(struct iommu_domain *domain, 
unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_atomic);
 
+static size_t __iommu_unmap_pages(struct iommu_domain *domain, unsigned long 
iova,
+ size_t size, struct iommu_iotlb_gather 
*iotlb_gather)
+{
+   const struct iommu_ops *ops = domain->ops;
+   size_t pgsize, pgcount;
+
+   if (ops->unmap_pages) {
+   pgsize = iommu_pgsize(domain, iova, size, &pgcount);
+   return ops->unmap_pages(domain, iova, pgsize, pgcount, 
iotlb_gather);
+   }
+
+   pgsize = __iommu_pgsize(domain, iova, size);
+   return ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
 static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2510,9 +2543,8 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 * or we hit an area that isn't mapped.
 */
while (unmapped < size) {
-   size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
-
-   unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+   unmapped_page = __iommu_unmap_pages(domain, iova, size - 
unmapped,
+   iotlb_gather);
if (!unmapped_page)
break;
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 5/5] iommu/arm-smmu: Implement the unmap_pages IOMMU driver callback

2021-03-30 Thread Isaac J. Manjarres
Implement the unmap_pages IOMMU driver callback for the ARM
SMMU driver.

Signed-off-by: Isaac J. Manjarres 
Suggested-by: Will Deacon 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfde6a61..a65ff92cb6e3 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1225,6 +1225,24 @@ static size_t arm_smmu_unmap(struct iommu_domain 
*domain, unsigned long iova,
return ret;
 }
 
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long 
iova,
+  size_t pgsize, size_t pgcount,
+  struct iommu_iotlb_gather *gather)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   size_t ret;
+
+   if (!ops)
+   return 0;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 {
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1625,6 +1643,7 @@ static struct iommu_ops arm_smmu_ops = {
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
.unmap  = arm_smmu_unmap,
+   .unmap_pages= arm_smmu_unmap_pages,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys   = arm_smmu_iova_to_phys,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/arm-smmu-qcom: Fix mask extraction for bootloader programmed SMRs

2021-01-25 Thread Isaac J. Manjarres
When extracting the mask for a SMR that was programmed by the
bootloader, the SMR's valid bit is also extracted and is treated
as part of the mask, which is not correct. Consider the scenario
where an SMMU master whose context is determined by a bootloader
programmed SMR is removed (omitting parts of device/driver core):

->iommu_release_device()
 -> arm_smmu_release_device()
  -> arm_smmu_master_free_smes()
   -> arm_smmu_free_sme() /* Assume that the SME is now free */
   -> arm_smmu_write_sme()
-> arm_smmu_write_smr() /* Construct SMR value using mask and SID */

Since the valid bit was considered as part of the mask, the SMR will
be programmed as valid.

Fix the SMR mask extraction step for bootloader programmed SMRs
by masking out the valid bit when we know that we're already
working with a valid SMR.

Fixes: 07a7f2caaa5a ("iommu/arm-smmu-qcom: Read back stream mappings")
Signed-off-by: Isaac J. Manjarres 
Cc: sta...@vger.kernel.org
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index bcda170..abb1d2f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -206,6 +206,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
 
if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) {
+   /* Ignore valid bit for SMR mask extraction. */
+   smr &= ~ARM_SMMU_SMR_VALID;
smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr);
smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
smmu->smrs[i].valid = true;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 4/5] iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers

2021-01-11 Thread Isaac J. Manjarres
Add support for IOMMU drivers to have their own map_sg() callbacks.
This completes the path for having iommu_map_sg() invoke an IOMMU
driver's map_sg() callback, which can then invoke the io-pgtable
map_sg() callback with the entire scatter-gather list, so that it
can be processed entirely in the io-pgtable layer.

For IOMMU drivers that do not provide a callback, the default
implementation of iterating through the scatter-gather list, while
calling iommu_map() will be used.

Signed-off-by: Isaac J. Manjarres 
Tested-by: Sai Prakash Ranjan 
---
 drivers/iommu/iommu.c | 13 +
 include/linux/iommu.h |  5 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0da0687..46acd5c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2535,11 +2535,24 @@ static size_t __iommu_map_sg(struct iommu_domain 
*domain, unsigned long iova,
 struct scatterlist *sg, unsigned int nents, int 
prot,
 gfp_t gfp)
 {
+   const struct iommu_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
int ret;
 
+   if (ops->map_sg) {
+   ret = ops->map_sg(domain, iova, sg, nents, prot, gfp, &mapped);
+
+   if (ops->iotlb_sync_map)
+   ops->iotlb_sync_map(domain);
+
+   if (ret)
+   goto out_err;
+
+   return mapped;
+   }
+
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0e40a38..bac7681 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_sg: map a scatter-gather list of physically contiguous chunks to
+ *  an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
@@ -243,6 +245,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_sg)(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/5] iommu/io-pgtable: Introduce map_sg() as a page table op

2021-01-11 Thread Isaac J. Manjarres
While mapping a scatter-gather list, iommu_map_sg() calls
into the IOMMU driver through an indirect call, which can
call into the io-pgtable code through another indirect call.

This sequence of going through the IOMMU core code, the IOMMU
driver, and finally the io-pgtable code, occurs for every
element in the scatter-gather list, in the worse case, which
is not optimal.

Introduce a map_sg callback in the io-pgtable ops so that
IOMMU drivers can invoke it with the complete scatter-gather
list, so that it can be processed within the io-pgtable
code entirely, reducing the number of indirect calls, and
boosting overall iommu_map_sg() performance.

Signed-off-by: Isaac J. Manjarres 
Tested-by: Sai Prakash Ranjan 
---
 include/linux/io-pgtable.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index ea727eb..6d0e731 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -147,6 +147,9 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:  Map a physically contiguous memory region.
+ * @map_sg:   Map a scatter-gather list of physically contiguous memory
+ *chunks. The mapped pointer argument is used to store how
+ *many bytes are mapped.
  * @unmap:Unmap a physically contiguous memory region.
  * @iova_to_phys: Translate iova to physical address.
  *
@@ -156,6 +159,9 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/5] iommu/io-pgtable-arm-v7s: Hook up map_sg()

2021-01-11 Thread Isaac J. Manjarres
Implement the map_sg io-pgtable op for the ARMv7s io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.

Signed-off-by: Isaac J. Manjarres 
Tested-by: Sai Prakash Ranjan 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 90 ++
 1 file changed, 90 insertions(+)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac9..8665dab 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -545,6 +545,95 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_v7s_map_by_pgsize(struct io_pgtable_ops *ops,
+unsigned long iova, phys_addr_t paddr,
+size_t size, int prot, gfp_t gfp,
+size_t *mapped)
+{
+   struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable *iop = &data->iop;
+   struct io_pgtable_cfg *cfg = &iop->cfg;
+   unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+   int ret;
+   size_t pgsize;
+
+   if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+   pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 
0x%x\n",
+  iova, &paddr, size, min_pagesz);
+   return -EINVAL;
+   }
+
+   if (WARN_ON((iova + size - 1) >= (1ULL << cfg->ias) ||
+   (paddr + size - 1) >= (1ULL << cfg->oas)))
+   return -ERANGE;
+
+   while (size) {
+   pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+   ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1,
+   data->pgd, gfp);
+
+   if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
+   io_pgtable_tlb_flush_walk(&data->iop, iova, size,
+ ARM_V7S_BLOCK_SIZE(2));
+   } else {
+   wmb();
+   }
+
+   if (ret)
+   return ret;
+
+   iova += pgsize;
+   paddr += pgsize;
+   *mapped += pgsize;
+   size -= pgsize;
+   }
+
+   return 0;
+}
+
+static int arm_v7s_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+   size_t len = 0;
+   unsigned int i = 0;
+   int ret;
+   phys_addr_t start;
+
+   *mapped = 0;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   while (i <= nents) {
+   phys_addr_t s_phys = sg_phys(sg);
+
+   if (len && s_phys != start + len) {
+   ret = arm_v7s_map_by_pgsize(ops, iova + *mapped, start,
+   len, iommu_prot, gfp,
+   mapped);
+
+   if (ret)
+   return ret;
+
+   len = 0;
+   }
+
+   if (len) {
+   len += sg->length;
+   } else {
+   len = sg->length;
+   start = s_phys;
+   }
+
+   if (++i < nents)
+   sg = sg_next(sg);
+   }
+
+   return 0;
+}
+
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -783,6 +872,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_v7s_map,
+   .map_sg = arm_v7s_map_sg,
.unmap  = arm_v7s_unmap,
.iova_to_phys   = arm_v7s_iova_to_phys,
};
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/5] Optimize iommu_map_sg() performance

2021-01-11 Thread Isaac J. Manjarres
The iommu_map_sg() code currently iterates through the given
scatter-gather list, and in the worst case, invokes iommu_map()
for each element in the scatter-gather list, which calls into
the IOMMU driver through an indirect call. For an IOMMU driver
that uses a format supported by the io-pgtable code, the IOMMU
driver will then call into the io-pgtable code to map the chunk.

Jumping between the IOMMU core code, the IOMMU driver, and the
io-pgtable code and back for each element in a scatter-gather list
is not efficient.

Instead, add a map_sg() hook in both the IOMMU driver ops and the
io-pgtable ops. iommu_map_sg() can then call into the IOMMU driver's
map_sg() hook with the entire scatter-gather list, which can call
into the io-pgtable map_sg() hook, which can process the entire
scatter-gather list, signficantly reducing the number of indirect
calls, and jumps between these layers, boosting performance.

On a system that uses the ARM SMMU driver, and the ARM LPAE format,
the current implementation of iommu_map_sg() yields the following
latencies for mapping scatter-gather lists of various sizes. These
latencies are calculated by repeating the mapping operation 10 times:

sizeiommu_map_sg latency
  4K0.624 us
 64K9.468 us
  1M  122.557 us
  2M  239.807 us
 12M 1435.979 us
 24M 2884.968 us
 32M 3832.979 us

On the same system, the proposed modifications yield the following
results:

sizeiommu_map_sg latency
  4K3.645 us
 64K4.198 us
  1M   11.010 us
  2M   17.125 us
 12M   82.416 us
 24M  158.677 us
 32M  210.468 us

The procedure for collecting the iommu_map_sg latencies is
the same in both experiments. Clearly, reducing the jumps
between the different layers in the IOMMU code offers a
signficant performance boost in iommu_map_sg() latency.

Changes since v1:

-Fixed an off by one error in arm_[lpae/v7s]_map_by_pgsize
when checking if the IOVA and physical address ranges being
mapped are within the appropriate limits.
-Added Sai Prakash Ranjan's "Tested-by" tag.

Thanks,
Isaac

Isaac J. Manjarres (5):
  iommu/io-pgtable: Introduce map_sg() as a page table op
  iommu/io-pgtable-arm: Hook up map_sg()
  iommu/io-pgtable-arm-v7s: Hook up map_sg()
  iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers
  iommu/arm-smmu: Hook up map_sg()

 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 
 drivers/iommu/io-pgtable-arm-v7s.c| 90 +++
 drivers/iommu/io-pgtable-arm.c| 86 +
 drivers/iommu/iommu.c | 25 --
 include/linux/io-pgtable.h|  6 +++
 include/linux/iommu.h | 13 +
 6 files changed, 234 insertions(+), 5 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 5/5] iommu/arm-smmu: Hook up map_sg()

2021-01-11 Thread Isaac J. Manjarres
Now that everything is in place for iommu_map_sg() to defer
mapping a scatter-gather list to the io-pgtable layer, implement
the map_sg() callback in the SMMU driver, so that iommu_map_sg()
can invoke it with the entire scatter-gather list that will be
mapped.

Signed-off-by: Isaac J. Manjarres 
Tested-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfd..52acc68 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,6 +1208,24 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return ret;
 }
 
+static int arm_smmu_map_sg(struct iommu_domain *domain, unsigned long iova,
+  struct scatterlist *sg, unsigned int nents, int prot,
+  gfp_t gfp, size_t *mapped)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   int ret;
+
+   if (!ops)
+   return -ENODEV;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->map_sg(ops, iova, sg, nents, prot, gfp, mapped);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *gather)
 {
@@ -1624,6 +1642,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
+   .map_sg = arm_smmu_map_sg,
.unmap  = arm_smmu_unmap,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/5] iommu/io-pgtable-arm: Hook up map_sg()

2021-01-11 Thread Isaac J. Manjarres
Implement the map_sg io-pgtable op for the ARM LPAE io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.

Signed-off-by: Isaac J. Manjarres 
Tested-by: Sai Prakash Ranjan 
---
 drivers/iommu/io-pgtable-arm.c | 86 ++
 drivers/iommu/iommu.c  | 12 +++---
 include/linux/iommu.h  |  8 
 3 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58..0c11529 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -473,6 +473,91 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_lpae_map_by_pgsize(struct io_pgtable_ops *ops,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int iommu_prot, gfp_t gfp,
+ size_t *mapped)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   arm_lpae_iopte *ptep = data->pgd;
+   int ret, lvl = data->start_level;
+   arm_lpae_iopte prot = arm_lpae_prot_to_pte(data, iommu_prot);
+   unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+   long iaext = (s64)(iova + size - 1) >> cfg->ias;
+   size_t pgsize;
+
+   if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+   pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 
0x%x\n",
+  iova, &paddr, size, min_pagesz);
+   return -EINVAL;
+   }
+
+   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+   iaext = ~iaext;
+   if (WARN_ON(iaext || (paddr + size - 1) >> cfg->oas))
+   return -ERANGE;
+
+   while (size) {
+   pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+   ret = __arm_lpae_map(data, iova, paddr, pgsize, prot, lvl, ptep,
+gfp);
+   if (ret)
+   return ret;
+
+   iova += pgsize;
+   paddr += pgsize;
+   *mapped += pgsize;
+   size -= pgsize;
+   }
+
+   return 0;
+}
+
+static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+  struct scatterlist *sg, unsigned int nents,
+  int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+
+   size_t len = 0;
+   unsigned int i = 0;
+   int ret;
+   phys_addr_t start;
+
+   *mapped = 0;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   while (i <= nents) {
+   phys_addr_t s_phys = sg_phys(sg);
+
+   if (len && s_phys != start + len) {
+   ret = arm_lpae_map_by_pgsize(ops, iova + *mapped, start,
+len, iommu_prot, gfp,
+mapped);
+
+   if (ret)
+   return ret;
+
+   len = 0;
+   }
+
+   if (len) {
+   len += sg->length;
+   } else {
+   len = sg->length;
+   start = s_phys;
+   }
+
+   if (++i < nents)
+   sg = sg_next(sg);
+   }
+
+   return 0;
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
 {
@@ -750,6 +835,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_lpae_map,
+   .map_sg = arm_lpae_map_sg,
.unmap  = arm_lpae_unmap,
.iova_to_phys   = arm_lpae_iova_to_phys,
};
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ffeebda..0da0687 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2346,8 +2346,8 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long addr_merge,
+   size_t size)
 {
unsigned int pgsize_idx;
size_t pgsize;
@@ -2366,7 +2366,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
pgsize = (1UL << (pgsize_idx + 1)) - 1;
 
/* throw away page sizes not supported by the hardware */
-   pgsize &= domain->pgsize_bitmap;
+   pgsize 

[PATCH 5/5] iommu/arm-smmu: Hook up map_sg()

2021-01-08 Thread Isaac J. Manjarres
Now that everything is in place for iommu_map_sg() to defer
mapping a scatter-gather list to the io-pgtable layer, implement
the map_sg() callback in the SMMU driver, so that iommu_map_sg()
can invoke it with the entire scatter-gather list that will be
mapped.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfd..52acc68 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,6 +1208,24 @@ static int arm_smmu_map(struct iommu_domain *domain, 
unsigned long iova,
return ret;
 }
 
+static int arm_smmu_map_sg(struct iommu_domain *domain, unsigned long iova,
+  struct scatterlist *sg, unsigned int nents, int prot,
+  gfp_t gfp, size_t *mapped)
+{
+   struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+   int ret;
+
+   if (!ops)
+   return -ENODEV;
+
+   arm_smmu_rpm_get(smmu);
+   ret = ops->map_sg(ops, iova, sg, nents, prot, gfp, mapped);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
+}
+
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *gather)
 {
@@ -1624,6 +1642,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free= arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map= arm_smmu_map,
+   .map_sg = arm_smmu_map_sg,
.unmap  = arm_smmu_unmap,
.flush_iotlb_all= arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/5] iommu/io-pgtable-arm: Hook up map_sg()

2021-01-08 Thread Isaac J. Manjarres
Implement the map_sg io-pgtable op for the ARM LPAE io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm.c | 86 ++
 drivers/iommu/iommu.c  | 12 +++---
 include/linux/iommu.h  |  8 
 3 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58..9c17d9d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -473,6 +473,91 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_lpae_map_by_pgsize(struct io_pgtable_ops *ops,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int iommu_prot, gfp_t gfp,
+ size_t *mapped)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable_cfg *cfg = &data->iop.cfg;
+   arm_lpae_iopte *ptep = data->pgd;
+   int ret, lvl = data->start_level;
+   arm_lpae_iopte prot = arm_lpae_prot_to_pte(data, iommu_prot);
+   unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+   long iaext = (s64)(iova + size) >> cfg->ias;
+   size_t pgsize;
+
+   if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+   pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 
0x%x\n",
+  iova, &paddr, size, min_pagesz);
+   return -EINVAL;
+   }
+
+   if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+   iaext = ~iaext;
+   if (WARN_ON(iaext || (paddr + size) >> cfg->oas))
+   return -ERANGE;
+
+   while (size) {
+   pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+   ret = __arm_lpae_map(data, iova, paddr, pgsize, prot, lvl, ptep,
+gfp);
+   if (ret)
+   return ret;
+
+   iova += pgsize;
+   paddr += pgsize;
+   *mapped += pgsize;
+   size -= pgsize;
+   }
+
+   return 0;
+}
+
+static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+  struct scatterlist *sg, unsigned int nents,
+  int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+
+   size_t len = 0;
+   unsigned int i = 0;
+   int ret;
+   phys_addr_t start;
+
+   *mapped = 0;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   while (i <= nents) {
+   phys_addr_t s_phys = sg_phys(sg);
+
+   if (len && s_phys != start + len) {
+   ret = arm_lpae_map_by_pgsize(ops, iova + *mapped, start,
+len, iommu_prot, gfp,
+mapped);
+
+   if (ret)
+   return ret;
+
+   len = 0;
+   }
+
+   if (len) {
+   len += sg->length;
+   } else {
+   len = sg->length;
+   start = s_phys;
+   }
+
+   if (++i < nents)
+   sg = sg_next(sg);
+   }
+
+   return 0;
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
 {
@@ -750,6 +835,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_lpae_map,
+   .map_sg = arm_lpae_map_sg,
.unmap  = arm_lpae_unmap,
.iova_to_phys   = arm_lpae_iova_to_phys,
};
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ffeebda..0da0687 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2346,8 +2346,8 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain 
*domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-  unsigned long addr_merge, size_t size)
+size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long addr_merge,
+   size_t size)
 {
unsigned int pgsize_idx;
size_t pgsize;
@@ -2366,7 +2366,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
pgsize = (1UL << (pgsize_idx + 1)) - 1;
 
/* throw away page sizes not supported by the hardware */
-   pgsize &= domain->pgsize_bitmap;
+   pgsize &= pgsize_bitmap;
 
/

[PATCH 4/5] iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers

2021-01-08 Thread Isaac J. Manjarres
Add support for IOMMU drivers to have their own map_sg() callbacks.
This completes the path for having iommu_map_sg() invoke an IOMMU
driver's map_sg() callback, which can then invoke the io-pgtable
map_sg() callback with the entire scatter-gather list, so that it
can be processed entirely in the io-pgtable layer.

For IOMMU drivers that do not provide a callback, the default
implementation of iterating through the scatter-gather list, while
calling iommu_map() will be used.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/iommu.c | 13 +
 include/linux/iommu.h |  5 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0da0687..46acd5c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2535,11 +2535,24 @@ static size_t __iommu_map_sg(struct iommu_domain 
*domain, unsigned long iova,
 struct scatterlist *sg, unsigned int nents, int 
prot,
 gfp_t gfp)
 {
+   const struct iommu_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
int ret;
 
+   if (ops->map_sg) {
+   ret = ops->map_sg(domain, iova, sg, nents, prot, gfp, &mapped);
+
+   if (ops->iotlb_sync_map)
+   ops->iotlb_sync_map(domain);
+
+   if (ret)
+   goto out_err;
+
+   return mapped;
+   }
+
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0e40a38..bac7681 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_sg: map a scatter-gather list of physically contiguous chunks to
+ *  an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
@@ -243,6 +245,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_sg)(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 size_t size, struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/5] iommu/io-pgtable-arm-v7s: Hook up map_sg()

2021-01-08 Thread Isaac J. Manjarres
Implement the map_sg io-pgtable op for the ARMv7s io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 90 ++
 1 file changed, 90 insertions(+)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac9..40d96d2 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -545,6 +545,95 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_v7s_map_by_pgsize(struct io_pgtable_ops *ops,
+unsigned long iova, phys_addr_t paddr,
+size_t size, int prot, gfp_t gfp,
+size_t *mapped)
+{
+   struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   struct io_pgtable *iop = &data->iop;
+   struct io_pgtable_cfg *cfg = &iop->cfg;
+   unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+   int ret;
+   size_t pgsize;
+
+   if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+   pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 
0x%x\n",
+  iova, &paddr, size, min_pagesz);
+   return -EINVAL;
+   }
+
+   if (WARN_ON((iova + size) >= (1ULL << cfg->ias) ||
+   (paddr + size) >= (1ULL << cfg->oas)))
+   return -ERANGE;
+
+   while (size) {
+   pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+   ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1,
+   data->pgd, gfp);
+
+   if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
+   io_pgtable_tlb_flush_walk(&data->iop, iova, size,
+ ARM_V7S_BLOCK_SIZE(2));
+   } else {
+   wmb();
+   }
+
+   if (ret)
+   return ret;
+
+   iova += pgsize;
+   paddr += pgsize;
+   *mapped += pgsize;
+   size -= pgsize;
+   }
+
+   return 0;
+}
+
+static int arm_v7s_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+   size_t len = 0;
+   unsigned int i = 0;
+   int ret;
+   phys_addr_t start;
+
+   *mapped = 0;
+
+   /* If no access, then nothing to do */
+   if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+   return 0;
+
+   while (i <= nents) {
+   phys_addr_t s_phys = sg_phys(sg);
+
+   if (len && s_phys != start + len) {
+   ret = arm_v7s_map_by_pgsize(ops, iova + *mapped, start,
+   len, iommu_prot, gfp,
+   mapped);
+
+   if (ret)
+   return ret;
+
+   len = 0;
+   }
+
+   if (len) {
+   len += sg->length;
+   } else {
+   len = sg->length;
+   start = s_phys;
+   }
+
+   if (++i < nents)
+   sg = sg_next(sg);
+   }
+
+   return 0;
+}
+
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -783,6 +872,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
 
data->iop.ops = (struct io_pgtable_ops) {
.map= arm_v7s_map,
+   .map_sg = arm_v7s_map_sg,
.unmap  = arm_v7s_unmap,
.iova_to_phys   = arm_v7s_iova_to_phys,
};
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/5] iommu/io-pgtable: Introduce map_sg() as a page table op

2021-01-08 Thread Isaac J. Manjarres
While mapping a scatter-gather list, iommu_map_sg() calls
into the IOMMU driver through an indirect call, which can
call into the io-pgtable code through another indirect call.

This sequence of going through the IOMMU core code, the IOMMU
driver, and finally the io-pgtable code, occurs for every
element in the scatter-gather list, in the worse case, which
is not optimal.

Introduce a map_sg callback in the io-pgtable ops so that
IOMMU drivers can invoke it with the complete scatter-gather
list, so that it can be processed within the io-pgtable
code entirely, reducing the number of indirect calls, and
boosting overall iommu_map_sg() performance.

Signed-off-by: Isaac J. Manjarres 
---
 include/linux/io-pgtable.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index ea727eb..6d0e731 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -147,6 +147,9 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:  Map a physically contiguous memory region.
+ * @map_sg:   Map a scatter-gather list of physically contiguous memory
+ *chunks. The mapped pointer argument is used to store how
+ *many bytes are mapped.
  * @unmap:Unmap a physically contiguous memory region.
  * @iova_to_phys: Translate iova to physical address.
  *
@@ -156,6 +159,9 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+   int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/5] Optimize iommu_map_sg() performance

2021-01-08 Thread Isaac J. Manjarres
The iommu_map_sg() code currently iterates through the given
scatter-gather list, and in the worst case, invokes iommu_map()
for each element in the scatter-gather list, which calls into
the IOMMU driver through an indirect call. For an IOMMU driver
that uses a format supported by the io-pgtable code, the IOMMU
driver will then call into the io-pgtable code to map the chunk.

Jumping between the IOMMU core code, the IOMMU driver, and the
io-pgtable code and back for each element in a scatter-gather list
is not efficient.

Instead, add a map_sg() hook in both the IOMMU driver ops and the
io-pgtable ops. iommu_map_sg() can then call into the IOMMU driver's
map_sg() hook with the entire scatter-gather list, which can call
into the io-pgtable map_sg() hook, which can process the entire
scatter-gather list, signficantly reducing the number of indirect
calls, and jumps between these layers, boosting performance.

On a system that uses the ARM SMMU driver, and the ARM LPAE format,
the current implementation of iommu_map_sg() yields the following
latencies for mapping scatter-gather lists of various sizes. These
latencies are calculated by repeating the mapping operation 10 times:

sizeiommu_map_sg latency
  4K0.624 us
 64K9.468 us
  1M  122.557 us
  2M  239.807 us
 12M 1435.979 us
 24M 2884.968 us
 32M 3832.979 us

On the same system, the proposed modifications yield the following
results:

sizeiommu_map_sg latency
  4K3.645 us
 64K4.198 us
  1M   11.010 us
  2M   17.125 us
 12M   82.416 us
 24M  158.677 us
 32M  210.468 us

The procedure for collecting the iommu_map_sg latencies is
the same in both experiments. Clearly, reducing the jumps
between the different layers in the IOMMU code offers a
signficant performance boost in iommu_map_sg() latency.

Thanks,
Isaac

Isaac J. Manjarres (5):
  iommu/io-pgtable: Introduce map_sg() as a page table op
  iommu/io-pgtable-arm: Hook up map_sg()
  iommu/io-pgtable-arm-v7s: Hook up map_sg()
  iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers
  iommu/arm-smmu: Hook up map_sg()

 drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 
 drivers/iommu/io-pgtable-arm-v7s.c| 90 +++
 drivers/iommu/io-pgtable-arm.c| 86 +
 drivers/iommu/iommu.c | 25 --
 include/linux/io-pgtable.h|  6 +++
 include/linux/iommu.h | 13 +
 6 files changed, 234 insertions(+), 5 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 4/7] iommu/arm-smmu-v3: Add dependency on io-pgtable-arm format module

2021-01-04 Thread Isaac J. Manjarres
The SMMUv3 driver depends on the availability of the ARM LPAE io-pgtable
format code to work properly. In preparation for having the io-pgtable
formats as modules, add a "pre" dependency with MODULE_SOFTDEP() to
ensure that the io-pgtable-arm format module is loaded before loading
the ARM SMMUv3 driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 8ca7415..c498ac8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3650,3 +3650,4 @@ MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 
implementations");
 MODULE_AUTHOR("Will Deacon ");
 MODULE_ALIAS("platform:arm-smmu-v3");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 6/7] drm/panfrost: Add dependency on io-pgtable-arm format module

2021-01-04 Thread Isaac J. Manjarres
The Panfrost DRM driver depends on the availability of the ARM LPAE
io-pgtable format code to work properly. In preparation for having the
io-pgtable formats as modules, add a "pre" dependency with
MODULE_SOFTDEP() to ensure that the io-pgtable-arm format module is loaded
before loading the Panfrost DRM driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 83a461b..7294622 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -704,3 +704,4 @@ module_platform_driver(panfrost_driver);
 MODULE_AUTHOR("Panfrost Project Developers");
 MODULE_DESCRIPTION("Panfrost DRM Driver");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 7/7] iommu/io-pgtable-arm: Allow building modular io-pgtable formats

2021-01-04 Thread Isaac J. Manjarres
Now that everything is in place for modular io-pgtable formats,
allow the ARM LPAE and ARMV7S io-pgtable formats to be built
as modules, and allow the io-pgtable framework to be enabled,
without having to explicitly enable an io-pgtable format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/Kconfig  | 11 +++
 drivers/iommu/io-pgtable-arm-v7s.c |  2 ++
 drivers/iommu/io-pgtable-arm.c |  2 ++
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 192ef8f..d3c4e9a 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -25,12 +25,15 @@ if IOMMU_SUPPORT
 
 menu "Generic IOMMU Pagetable Support"
 
-# Selected by the actual pagetable implementations
 config IOMMU_IO_PGTABLE
-   bool
+   bool "IOMMU Pagetable support"
+   help
+ Enable support for using IOMMU pagetables. This option enables
+ the generic IOMMU pagetable framework for registering IOMMU
+ pagetable formats, as well as managing IOMMU pagetable instances.
 
 config IOMMU_IO_PGTABLE_LPAE
-   bool "ARMv7/v8 Long Descriptor Format"
+   tristate "ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE
depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
help
@@ -49,7 +52,7 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST
  If unsure, say N here.
 
 config IOMMU_IO_PGTABLE_ARMV7S
-   bool "ARMv7/v8 Short Descriptor Format"
+   tristate "ARMv7/v8 Short Descriptor Format"
select IOMMU_IO_PGTABLE
depends on ARM || ARM64 || COMPILE_TEST
help
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 7e81135..69dbf86 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -1014,3 +1014,5 @@ static void __exit arm_v7s_exit(void)
io_pgtable_ops_unregister(ARM_V7S);
 }
 module_exit(arm_v7s_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 8ed52a0..8d4805f 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1306,3 +1306,5 @@ static void __exit arm_lpae_exit(void)
io_pgtable_ops_unregister(arm_lpae_init_fns_table[i].fmt);
 }
 module_exit(arm_lpae_exit);
+
+MODULE_LICENSE("GPL v2");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 5/7] drm/msm: Add dependency on io-pgtable-arm format module

2021-01-04 Thread Isaac J. Manjarres
The MSM DRM driver depends on the availability of the ARM LPAE io-pgtable
format code to work properly. In preparation for having the io-pgtable
formats as modules, add a "pre" dependency with MODULE_SOFTDEP() to
ensure that the io-pgtable-arm format module is loaded before loading
the MSM DRM driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/gpu/drm/msm/msm_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 535a026..8be3506 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -1369,3 +1369,4 @@ module_exit(msm_drm_unregister);
 MODULE_AUTHOR("Rob Clark https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 2/7] iommu/io-pgtable: Add refcounting for io-pgtable format modules

2021-01-04 Thread Isaac J. Manjarres
In preparation for modularizing io-pgtable formats, add support
for reference counting the io-pgtable format modules to ensure
that the modules are not unloaded while they are in use.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c |  1 +
 drivers/iommu/io-pgtable-arm.c |  5 +
 drivers/iommu/io-pgtable.c | 12 ++--
 include/linux/io-pgtable.h |  2 ++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 080881b..7e81135 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -839,6 +839,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
 static struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
.alloc  = arm_v7s_alloc_pgtable,
.free   = arm_v7s_free_pgtable,
+   .owner  = THIS_MODULE,
 };
 
 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index e1f8d54..8ed52a0 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1054,26 +1054,31 @@ static struct arm_lpae_io_pgtable_init_fns 
arm_lpae_init_fns_table[] = {
.fmt= ARM_32_LPAE_S1,
.init_fns.alloc = arm_32_lpae_alloc_pgtable_s1,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_32_LPAE_S2,
.init_fns.alloc = arm_32_lpae_alloc_pgtable_s2,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_64_LPAE_S1,
.init_fns.alloc = arm_64_lpae_alloc_pgtable_s1,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_64_LPAE_S2,
.init_fns.alloc = arm_64_lpae_alloc_pgtable_s2,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_MALI_LPAE,
.init_fns.alloc = arm_mali_lpae_alloc_pgtable,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
 };
 
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 95e872d..9792e25 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 static struct io_pgtable_init_fns *io_pgtable_init_table[IO_PGTABLE_NUM_FMTS];
@@ -28,9 +29,14 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum 
io_pgtable_fmt fmt,
if (!fns)
return NULL;
 
+   if (!try_module_get(fns->owner))
+   return NULL;
+
iop = fns->alloc(cfg, cookie);
-   if (!iop)
+   if (!iop) {
+   module_put(fns->owner);
return NULL;
+   }
 
iop->fmt= fmt;
iop->cookie = cookie;
@@ -55,8 +61,10 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops)
iop = io_pgtable_ops_to_pgtable(ops);
io_pgtable_tlb_flush_all(iop);
fns = io_pgtable_init_table[iop->fmt];
-   if (fns)
+   if (fns) {
fns->free(iop);
+   module_put(fns->owner);
+   }
 }
 EXPORT_SYMBOL_GPL(free_io_pgtable_ops);
 
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a460ae1..bdf0a01 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -168,10 +168,12 @@ struct io_pgtable_ops {
  *
  * @alloc: Allocate a set of page tables described by cfg.
  * @free:  Free the page tables associated with iop.
+ * @owner: Driver module providing these ops.
  */
 struct io_pgtable_init_fns {
struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
void (*free)(struct io_pgtable *iop);
+   struct module *owner;
 };
 
 /**
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 1/7] iommu/io-pgtable: Introduce dynamic io-pgtable format registration

2021-01-04 Thread Isaac J. Manjarres
The io-pgtable code constructs an array of init functions for each
page table format at compile time. This is not ideal, as it prevents
io-pgtable formats from being built as kernel modules.

In preparation for modularizing the io-pgtable formats, switch to a
dynamic registration scheme, where each io-pgtable format can register
their init functions with the io-pgtable code at boot or module
insertion time.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 33 -
 drivers/iommu/io-pgtable-arm.c | 97 --
 drivers/iommu/io-pgtable.c | 44 +++--
 include/linux/io-pgtable.h | 50 
 4 files changed, 164 insertions(+), 60 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac9..080881b 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -835,7 +836,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
return NULL;
 }
 
-struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
+static struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
.alloc  = arm_v7s_alloc_pgtable,
.free   = arm_v7s_free_pgtable,
 };
@@ -982,5 +983,33 @@ static int __init arm_v7s_do_selftests(void)
pr_info("self test ok\n");
return 0;
 }
-subsys_initcall(arm_v7s_do_selftests);
+#else
+static int arm_v7s_do_selftests(void)
+{
+   return 0;
+}
 #endif
+
+static int __init arm_v7s_init(void)
+{
+   int ret;
+
+   ret = io_pgtable_ops_register(ARM_V7S, &io_pgtable_arm_v7s_init_fns);
+   if (ret < 0) {
+   pr_err("Failed to register ARM v7s fmt ret = %d\n", ret);
+   return ret;
+   }
+
+   ret = arm_v7s_do_selftests();
+   if (ret < 0)
+   io_pgtable_ops_unregister(ARM_V7S);
+
+   return ret;
+}
+core_initcall(arm_v7s_init);
+
+static void __exit arm_v7s_exit(void)
+{
+   io_pgtable_ops_unregister(ARM_V7S);
+}
+module_exit(arm_v7s_exit);
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58..e1f8d54 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -145,6 +146,11 @@ struct arm_lpae_io_pgtable {
void*pgd;
 };
 
+struct arm_lpae_io_pgtable_init_fns {
+   enum io_pgtable_fmt fmt;
+   struct io_pgtable_init_fns init_fns;
+};
+
 typedef u64 arm_lpae_iopte;
 
 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
@@ -1043,29 +1049,32 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, 
void *cookie)
return NULL;
 }
 
-struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
-   .alloc  = arm_64_lpae_alloc_pgtable_s1,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
-   .alloc  = arm_64_lpae_alloc_pgtable_s2,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = {
-   .alloc  = arm_32_lpae_alloc_pgtable_s1,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
-   .alloc  = arm_32_lpae_alloc_pgtable_s2,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
-   .alloc  = arm_mali_lpae_alloc_pgtable,
-   .free   = arm_lpae_free_pgtable,
+static struct arm_lpae_io_pgtable_init_fns arm_lpae_init_fns_table[] = {
+   {
+   .fmt= ARM_32_LPAE_S1,
+   .init_fns.alloc = arm_32_lpae_alloc_pgtable_s1,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_32_LPAE_S2,
+   .init_fns.alloc = arm_32_lpae_alloc_pgtable_s2,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_64_LPAE_S1,
+   .init_fns.alloc = arm_64_lpae_alloc_pgtable_s1,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_64_LPAE_S2,
+   .init_fns.alloc = arm_64_lpae_alloc_pgtable_s2,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_MALI_LPAE,
+   .init_fns.alloc = arm_mali_lpae_alloc_pgtable,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
 };
 
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
@@ -1250,5 +1259,45 @@ static int __init arm_lpae_do_selftests(void)
pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
return

[PATCH RESEND 3/7] iommu/arm-smmu: Add dependency on io-pgtable format modules

2021-01-04 Thread Isaac J. Manjarres
The SMMU driver depends on the availability of the ARM LPAE
io-pgtable format code to work properly. In preparation
for having the io-pgtable formats as modules, add a "pre"
dependency with MODULE_SOFTDEP() to ensure that the ARM LPAE
io-pgtable format module is loaded before loading the ARM SMMU
driver module. Also, add a dependency on the ARMv7 short descriptor
io-pgtable format, so that it can be loaded before the SMMU driver
module, if available.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfd..a72649f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -2351,3 +2351,4 @@ MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU 
implementations");
 MODULE_AUTHOR("Will Deacon ");
 MODULE_ALIAS("platform:arm-smmu");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm io-pgtable-arm-v7s");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 0/7] iommu: Permit modular builds of io-pgtable drivers

2021-01-04 Thread Isaac J. Manjarres
The goal of the Generic Kernel Image (GKI) effort is to have a common
kernel image that works across multiple Android devices. This involves
generating a kernel image that has core features integrated into it,
while SoC specific functionality can be added to the kernel for the
device as a module.

Along with modularizing IOMMU drivers, this also means building the
io-pgtable code as modules, which allows for SoC vendors to only include
the io-pgtable implementations that they use. For example, GKI for arm64
must include support for both the IOMMU ARM LPAE/V7S formats at the
moment. Having the code for both formats as modules allows SoC vendors
to only provide the page table format that they use, along with their
IOMMU driver.

The patches are split into 4 parts:

1) Modularizing io-pgtable-arm[-v7s].c, while leaving the io-pgtable.c
code as part of the core kernel, requires removing the references to
the ARM LPAE and ARM V7S io-pgtable init functions, and using a
dynamic method for formats to register their io-pgtable init functions.

2) Taking references to the io-pgtable format drivers to ensure that they
cannot be unloaded while in use.

3) Adding pre MODULE_SOFTDEP() dependencies to drivers in the kernel
that are tristate, and invoke [alloc/free]_io_pgtable_ops(). This makes
it so that the io-pgtable format drivers are loaded before the driver
that needs them.

4) Changing the Kconfig options for the ARM LPAE nad ARM V7S to tristate
and allowing the io-pgtable code to be enabled without having to select
either page table format. The reason for doing this is so that a kernel
can be built, such that it only provides the interface for io-pgtable
formats to be registered as modules, as would be the case for the GKI.

Thanks,
Isaac

Isaac J. Manjarres (7):
  iommu/io-pgtable: Introduce dynamic io-pgtable format registration
  iommu/io-pgtable: Add refcounting for io-pgtable format modules
  iommu/arm-smmu: Add dependency on io-pgtable format modules
  iommu/arm-smmu-v3: Add dependency on io-pgtable-arm format module
  drm/msm: Add dependency on io-pgtable-arm format module
  drm/panfrost: Add dependency on io-pgtable-arm format module
  iommu/io-pgtable-arm: Allow building modular io-pgtable formats

 drivers/gpu/drm/msm/msm_drv.c   |   1 +
 drivers/gpu/drm/panfrost/panfrost_drv.c |   1 +
 drivers/iommu/Kconfig   |  11 +--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   1 +
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   1 +
 drivers/iommu/io-pgtable-arm-v7s.c  |  36 +-
 drivers/iommu/io-pgtable-arm.c  | 104 +---
 drivers/iommu/io-pgtable.c  |  54 ++-
 include/linux/io-pgtable.h  |  52 +-
 9 files changed, 196 insertions(+), 65 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/7] drm/msm: Add dependency on io-pgtable-arm format module

2020-12-28 Thread Isaac J. Manjarres
The MSM DRM driver depends on the availability of the ARM LPAE io-pgtable
format code to work properly. In preparation for having the io-pgtable
formats as modules, add a "pre" dependency with MODULE_SOFTDEP() to
ensure that the io-pgtable-arm format module is loaded before loading
the MSM DRM driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/gpu/drm/msm/msm_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 535a026..8be3506 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -1369,3 +1369,4 @@ module_exit(msm_drm_unregister);
 MODULE_AUTHOR("Rob Clark https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/7] drm/panfrost: Add dependency on io-pgtable-arm format module

2020-12-28 Thread Isaac J. Manjarres
The Panfrost DRM driver depends on the availability of the ARM LPAE
io-pgtable format code to work properly. In preparation for having the
io-pgtable formats as modules, add a "pre" dependency with
MODULE_SOFTDEP() to ensure that the io-pgtable-arm format module is loaded
before loading the Panfrost DRM driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 83a461b..7294622 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -704,3 +704,4 @@ module_platform_driver(panfrost_driver);
 MODULE_AUTHOR("Panfrost Project Developers");
 MODULE_DESCRIPTION("Panfrost DRM Driver");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 7/7] iommu/io-pgtable-arm: Allow building modular io-pgtable formats

2020-12-28 Thread Isaac J. Manjarres
Now that everything is in place for modular io-pgtable formats,
allow the ARM LPAE and ARMV7S io-pgtable formats to be built
as modules, and allow the io-pgtable framework to be enabled,
without having to explicitly enable an io-pgtable format.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/Kconfig  | 11 +++
 drivers/iommu/io-pgtable-arm-v7s.c |  2 ++
 drivers/iommu/io-pgtable-arm.c |  2 ++
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 192ef8f..d3c4e9a 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -25,12 +25,15 @@ if IOMMU_SUPPORT
 
 menu "Generic IOMMU Pagetable Support"
 
-# Selected by the actual pagetable implementations
 config IOMMU_IO_PGTABLE
-   bool
+   bool "IOMMU Pagetable support"
+   help
+ Enable support for using IOMMU pagetables. This option enables
+ the generic IOMMU pagetable framework for registering IOMMU
+ pagetable formats, as well as managing IOMMU pagetable instances.
 
 config IOMMU_IO_PGTABLE_LPAE
-   bool "ARMv7/v8 Long Descriptor Format"
+   tristate "ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE
depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
help
@@ -49,7 +52,7 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST
  If unsure, say N here.
 
 config IOMMU_IO_PGTABLE_ARMV7S
-   bool "ARMv7/v8 Short Descriptor Format"
+   tristate "ARMv7/v8 Short Descriptor Format"
select IOMMU_IO_PGTABLE
depends on ARM || ARM64 || COMPILE_TEST
help
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 7e81135..69dbf86 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -1014,3 +1014,5 @@ static void __exit arm_v7s_exit(void)
io_pgtable_ops_unregister(ARM_V7S);
 }
 module_exit(arm_v7s_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 8ed52a0..8d4805f 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1306,3 +1306,5 @@ static void __exit arm_lpae_exit(void)
io_pgtable_ops_unregister(arm_lpae_init_fns_table[i].fmt);
 }
 module_exit(arm_lpae_exit);
+
+MODULE_LICENSE("GPL v2");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/7] iommu/io-pgtable: Add refcounting for io-pgtable format modules

2020-12-28 Thread Isaac J. Manjarres
In preparation for modularizing io-pgtable formats, add support
for reference counting the io-pgtable format modules to ensure
that the modules are not unloaded while they are in use.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c |  1 +
 drivers/iommu/io-pgtable-arm.c |  5 +
 drivers/iommu/io-pgtable.c | 12 ++--
 include/linux/io-pgtable.h |  2 ++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 080881b..7e81135 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -839,6 +839,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
 static struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
.alloc  = arm_v7s_alloc_pgtable,
.free   = arm_v7s_free_pgtable,
+   .owner  = THIS_MODULE,
 };
 
 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index e1f8d54..8ed52a0 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1054,26 +1054,31 @@ static struct arm_lpae_io_pgtable_init_fns 
arm_lpae_init_fns_table[] = {
.fmt= ARM_32_LPAE_S1,
.init_fns.alloc = arm_32_lpae_alloc_pgtable_s1,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_32_LPAE_S2,
.init_fns.alloc = arm_32_lpae_alloc_pgtable_s2,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_64_LPAE_S1,
.init_fns.alloc = arm_64_lpae_alloc_pgtable_s1,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_64_LPAE_S2,
.init_fns.alloc = arm_64_lpae_alloc_pgtable_s2,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
{
.fmt= ARM_MALI_LPAE,
.init_fns.alloc = arm_mali_lpae_alloc_pgtable,
.init_fns.free  = arm_lpae_free_pgtable,
+   .init_fns.owner = THIS_MODULE,
},
 };
 
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 95e872d..9792e25 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 static struct io_pgtable_init_fns *io_pgtable_init_table[IO_PGTABLE_NUM_FMTS];
@@ -28,9 +29,14 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum 
io_pgtable_fmt fmt,
if (!fns)
return NULL;
 
+   if (!try_module_get(fns->owner))
+   return NULL;
+
iop = fns->alloc(cfg, cookie);
-   if (!iop)
+   if (!iop) {
+   module_put(fns->owner);
return NULL;
+   }
 
iop->fmt= fmt;
iop->cookie = cookie;
@@ -55,8 +61,10 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops)
iop = io_pgtable_ops_to_pgtable(ops);
io_pgtable_tlb_flush_all(iop);
fns = io_pgtable_init_table[iop->fmt];
-   if (fns)
+   if (fns) {
fns->free(iop);
+   module_put(fns->owner);
+   }
 }
 EXPORT_SYMBOL_GPL(free_io_pgtable_ops);
 
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index a460ae1..bdf0a01 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -168,10 +168,12 @@ struct io_pgtable_ops {
  *
  * @alloc: Allocate a set of page tables described by cfg.
  * @free:  Free the page tables associated with iop.
+ * @owner: Driver module providing these ops.
  */
 struct io_pgtable_init_fns {
struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
void (*free)(struct io_pgtable *iop);
+   struct module *owner;
 };
 
 /**
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/7] iommu/arm-smmu: Add dependency on io-pgtable format modules

2020-12-28 Thread Isaac J. Manjarres
The SMMU driver depends on the availability of the ARM LPAE
io-pgtable format code to work properly. In preparation
for having the io-pgtable formats as modules, add a "pre"
dependency with MODULE_SOFTDEP() to ensure that the ARM LPAE
io-pgtable format module is loaded before loading the ARM SMMU
driver module. Also, add a dependency on the ARMv7 short descriptor
io-pgtable format, so that it can be loaded before the SMMU driver
module, if available.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfd..a72649f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -2351,3 +2351,4 @@ MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU 
implementations");
 MODULE_AUTHOR("Will Deacon ");
 MODULE_ALIAS("platform:arm-smmu");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm io-pgtable-arm-v7s");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/7] iommu/io-pgtable: Introduce dynamic io-pgtable format registration

2020-12-28 Thread Isaac J. Manjarres
The io-pgtable code constructs an array of init functions for each
page table format at compile time. This is not ideal, as it prevents
io-pgtable formats from being built as kernel modules.

In preparation for modularizing the io-pgtable formats, switch to a
dynamic registration scheme, where each io-pgtable format can register
their init functions with the io-pgtable code at boot or module
insertion time.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 33 -
 drivers/iommu/io-pgtable-arm.c | 97 --
 drivers/iommu/io-pgtable.c | 44 +++--
 include/linux/io-pgtable.h | 50 
 4 files changed, 164 insertions(+), 60 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac9..080881b 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -835,7 +836,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
return NULL;
 }
 
-struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
+static struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
.alloc  = arm_v7s_alloc_pgtable,
.free   = arm_v7s_free_pgtable,
 };
@@ -982,5 +983,33 @@ static int __init arm_v7s_do_selftests(void)
pr_info("self test ok\n");
return 0;
 }
-subsys_initcall(arm_v7s_do_selftests);
+#else
+static int arm_v7s_do_selftests(void)
+{
+   return 0;
+}
 #endif
+
+static int __init arm_v7s_init(void)
+{
+   int ret;
+
+   ret = io_pgtable_ops_register(ARM_V7S, &io_pgtable_arm_v7s_init_fns);
+   if (ret < 0) {
+   pr_err("Failed to register ARM v7s fmt ret = %d\n", ret);
+   return ret;
+   }
+
+   ret = arm_v7s_do_selftests();
+   if (ret < 0)
+   io_pgtable_ops_unregister(ARM_V7S);
+
+   return ret;
+}
+core_initcall(arm_v7s_init);
+
+static void __exit arm_v7s_exit(void)
+{
+   io_pgtable_ops_unregister(ARM_V7S);
+}
+module_exit(arm_v7s_exit);
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58..e1f8d54 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -145,6 +146,11 @@ struct arm_lpae_io_pgtable {
void*pgd;
 };
 
+struct arm_lpae_io_pgtable_init_fns {
+   enum io_pgtable_fmt fmt;
+   struct io_pgtable_init_fns init_fns;
+};
+
 typedef u64 arm_lpae_iopte;
 
 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
@@ -1043,29 +1049,32 @@ arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, 
void *cookie)
return NULL;
 }
 
-struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
-   .alloc  = arm_64_lpae_alloc_pgtable_s1,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
-   .alloc  = arm_64_lpae_alloc_pgtable_s2,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = {
-   .alloc  = arm_32_lpae_alloc_pgtable_s1,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
-   .alloc  = arm_32_lpae_alloc_pgtable_s2,
-   .free   = arm_lpae_free_pgtable,
-};
-
-struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
-   .alloc  = arm_mali_lpae_alloc_pgtable,
-   .free   = arm_lpae_free_pgtable,
+static struct arm_lpae_io_pgtable_init_fns arm_lpae_init_fns_table[] = {
+   {
+   .fmt= ARM_32_LPAE_S1,
+   .init_fns.alloc = arm_32_lpae_alloc_pgtable_s1,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_32_LPAE_S2,
+   .init_fns.alloc = arm_32_lpae_alloc_pgtable_s2,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_64_LPAE_S1,
+   .init_fns.alloc = arm_64_lpae_alloc_pgtable_s1,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_64_LPAE_S2,
+   .init_fns.alloc = arm_64_lpae_alloc_pgtable_s2,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
+   {
+   .fmt= ARM_MALI_LPAE,
+   .init_fns.alloc = arm_mali_lpae_alloc_pgtable,
+   .init_fns.free  = arm_lpae_free_pgtable,
+   },
 };
 
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
@@ -1250,5 +1259,45 @@ static int __init arm_lpae_do_selftests(void)
pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
return

[PATCH 4/7] iommu/arm-smmu-v3: Add dependency on io-pgtable-arm format module

2020-12-28 Thread Isaac J. Manjarres
The SMMUv3 driver depends on the availability of the ARM LPAE io-pgtable
format code to work properly. In preparation for having the io-pgtable
formats as modules, add a "pre" dependency with MODULE_SOFTDEP() to
ensure that the io-pgtable-arm format module is loaded before loading
the ARM SMMUv3 driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 8ca7415..c498ac8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3650,3 +3650,4 @@ MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 
implementations");
 MODULE_AUTHOR("Will Deacon ");
 MODULE_ALIAS("platform:arm-smmu-v3");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/7] iommu: Permit modular builds of io-pgtable drivers

2020-12-28 Thread Isaac J. Manjarres
The goal of the Generic Kernel Image (GKI) effort is to have a common
kernel image that works across multiple Android devices. This involves
generating a kernel image that has core features integrated into it,
while SoC specific functionality can be added to the kernel for the
device as a module.

Along with modularizing IOMMU drivers, this also means building the
io-pgtable code as modules, which allows for SoC vendors to only include
the io-pgtable implementations that they use. For example, GKI for arm64
must include support for both the IOMMU ARM LPAE/V7S formats at the
moment. Having the code for both formats as modules allows SoC vendors
to only provide the page table format that they use, along with their
IOMMU driver.

The patches are split into 4 parts:

1) Modularizing io-pgtable-arm[-v7s].c, while leaving the io-pgtable.c
code as part of the core kernel, requires removing the references to
the ARM LPAE and ARM V7S io-pgtable init functions, and using a
dynamic method for formats to register their io-pgtable init functions.

2) Taking references to the io-pgtable format drivers to ensure that they
cannot be unloaded while in use.

3) Adding pre MODULE_SOFTDEP() dependencies to drivers in the kernel
that are tristate, and invoke [alloc/free]_io_pgtable_ops(). This makes
it so that the io-pgtable format drivers are loaded before the driver
that needs them.

4) Changing the Kconfig options for the ARM LPAE nad ARM V7S to tristate
and allowing the io-pgtable code to be enabled without having to select
either page table format. The reason for doing this is so that a kernel
can be built, such that it only provides the interface for io-pgtable
formats to be registered as modules, as would be the case for the GKI.

Thanks,
Isaac

Isaac J. Manjarres (7):
  iommu/io-pgtable: Introduce dynamic io-pgtable format registration
  iommu/io-pgtable: Add refcounting for io-pgtable format modules
  iommu/arm-smmu: Add dependency on io-pgtable format modules
  iommu/arm-smmu-v3: Add dependency on io-pgtable-arm format module
  drm/msm: Add dependency on io-pgtable-arm format module
  drm/panfrost: Add dependency on io-pgtable-arm format module
  iommu/io-pgtable-arm: Allow building modular io-pgtable formats

 drivers/gpu/drm/msm/msm_drv.c   |   1 +
 drivers/gpu/drm/panfrost/panfrost_drv.c |   1 +
 drivers/iommu/Kconfig   |  11 +--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   1 +
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   1 +
 drivers/iommu/io-pgtable-arm-v7s.c  |  36 +-
 drivers/iommu/io-pgtable-arm.c  | 104 +---
 drivers/iommu/io-pgtable.c  |  54 ++-
 include/linux/io-pgtable.h  |  52 +-
 9 files changed, 196 insertions(+), 65 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/7] iommu/arm-smmu: Add dependency on io-pgtable format modules

2020-12-21 Thread Isaac J. Manjarres
The SMMU driver depends on the availability of the ARM LPAE and
ARM V7S io-pgtable format code to work properly. In preparation
for having the io-pgtable formats as modules, add a "pre"
dependency with MODULE_SOFTDEP() to ensure that the io-pgtable
format modules are loaded before loading the ARM SMMU driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu/arm-smmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfd..a72649f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -2351,3 +2351,4 @@ MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU 
implementations");
 MODULE_AUTHOR("Will Deacon ");
 MODULE_ALIAS("platform:arm-smmu");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm io-pgtable-arm-v7s");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 7/7] iommu/io-pgtable-arm: Allow building modular io-pgtable fmts

2020-12-21 Thread Isaac J. Manjarres
Now that everything is in place for modular io-pgtable formats,
allow the ARM LPAE and ARMV7S io-pgtable formats to be built
as modules.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/Kconfig  | 4 ++--
 drivers/iommu/io-pgtable-arm-v7s.c | 2 ++
 drivers/iommu/io-pgtable-arm.c | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 192ef8f..7e4f44f 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -30,7 +30,7 @@ config IOMMU_IO_PGTABLE
bool
 
 config IOMMU_IO_PGTABLE_LPAE
-   bool "ARMv7/v8 Long Descriptor Format"
+   tristate "ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE
depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
help
@@ -49,7 +49,7 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST
  If unsure, say N here.
 
 config IOMMU_IO_PGTABLE_ARMV7S
-   bool "ARMv7/v8 Short Descriptor Format"
+   tristate "ARMv7/v8 Short Descriptor Format"
select IOMMU_IO_PGTABLE
depends on ARM || ARM64 || COMPILE_TEST
help
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index a5cb755a..9d9f08f 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -1015,3 +1015,5 @@ static void __exit arm_v7s_exit(void)
io_pgtable_ops_unregister(&io_pgtable_arm_v7s_init_fns);
 }
 module_exit(arm_v7s_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index e8b1e34..e0de4ad 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1299,3 +1299,5 @@ static void __exit arm_lpae_exit(void)
io_pgtable_ops_unregister(&io_pgtable_arm_lpae_init_fns[i]);
 }
 module_exit(arm_lpae_exit);
+
+MODULE_LICENSE("GPL v2");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/7] iommu/io-pgtable: Add refcounting for io-pgtable format modules

2020-12-21 Thread Isaac J. Manjarres
In preparation for modularizing io-pgtable formats, add support
for reference counting the io-pgtable format modules to ensure
that the modules are not unloaded while they are in use.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/io-pgtable-arm-v7s.c |  1 +
 drivers/iommu/io-pgtable-arm.c |  5 +
 drivers/iommu/io-pgtable.c | 12 ++--
 include/linux/io-pgtable.h |  2 ++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 89aad2f..a5cb755a 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -840,6 +840,7 @@ static struct io_pgtable_init_fns 
io_pgtable_arm_v7s_init_fns = {
.fmt= ARM_V7S,
.alloc  = arm_v7s_alloc_pgtable,
.free   = arm_v7s_free_pgtable,
+   .owner  = THIS_MODULE,
 };
 
 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index ff0ea2f..e8b1e34 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1049,26 +1049,31 @@ static struct io_pgtable_init_fns 
io_pgtable_arm_lpae_init_fns[] = {
.fmt= ARM_32_LPAE_S1,
.alloc  = arm_32_lpae_alloc_pgtable_s1,
.free   = arm_lpae_free_pgtable,
+   .owner  = THIS_MODULE,
},
{
.fmt= ARM_32_LPAE_S2,
.alloc  = arm_32_lpae_alloc_pgtable_s2,
.free   = arm_lpae_free_pgtable,
+   .owner  = THIS_MODULE,
},
{
.fmt= ARM_64_LPAE_S1,
.alloc  = arm_64_lpae_alloc_pgtable_s1,
.free   = arm_lpae_free_pgtable,
+   .owner  = THIS_MODULE,
},
{
.fmt= ARM_64_LPAE_S2,
.alloc  = arm_64_lpae_alloc_pgtable_s2,
.free   = arm_lpae_free_pgtable,
+   .owner  = THIS_MODULE,
},
{
.fmt= ARM_MALI_LPAE,
.alloc  = arm_mali_lpae_alloc_pgtable,
.free   = arm_lpae_free_pgtable,
+   .owner  = THIS_MODULE,
},
 };
 
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 2c6eb2e..cc83542 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -52,9 +53,14 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum 
io_pgtable_fmt fmt,
if (!fns)
return NULL;
 
+   if (!try_module_get(fns->owner))
+   return NULL;
+
iop = fns->alloc(cfg, cookie);
-   if (!iop)
+   if (!iop) {
+   module_put(fns->owner);
return NULL;
+   }
 
iop->fmt= fmt;
iop->cookie = cookie;
@@ -79,8 +85,10 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops)
iop = io_pgtable_ops_to_pgtable(ops);
io_pgtable_tlb_flush_all(iop);
fns = io_pgtable_get_init_fns(iop->fmt);
-   if (fns)
+   if (fns) {
fns->free(iop);
+   module_put(fns->owner);
+   }
 }
 EXPORT_SYMBOL_GPL(free_io_pgtable_ops);
 
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 45b367ce..a03b262 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -169,11 +169,13 @@ struct io_pgtable_ops {
  * @fmt:   The page table format.
  * @alloc: Allocate a set of page tables described by cfg.
  * @free:  Free the page tables associated with iop.
+ * @owner: Driver module providing these ops.
  */
 struct io_pgtable_init_fns {
enum io_pgtable_fmt fmt;
struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
void (*free)(struct io_pgtable *iop);
+   struct module *owner;
 };
 
 /**
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 6/7] drm/panfrost: Add dependency on io-pgtable-arm format module

2020-12-21 Thread Isaac J. Manjarres
The Panfrost DRM driver depends on the availability of the ARM LPAE
io-pgtable format code to work properly. In preparation for having the
io-pgtable formats as modules, add a "pre" dependency with
MODULE_SOFTDEP() to ensure that the io-pgtable-arm format module is loaded
before loading the Panfrost DRM driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 83a461b..7294622 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -704,3 +704,4 @@ module_platform_driver(panfrost_driver);
 MODULE_AUTHOR("Panfrost Project Developers");
 MODULE_DESCRIPTION("Panfrost DRM Driver");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 4/7] iommu/arm-smmu-v3: Add dependency on io-pgtable-arm format module

2020-12-21 Thread Isaac J. Manjarres
The SMMUv3 driver depends on the availability of the ARM LPAE io-pgtable
format code to work properly. In preparation for having the io-pgtable
formats as modules, add a "pre" dependency with MODULE_SOFTDEP() to
ensure that the io-pgtable-arm format module is loaded before loading
the ARM SMMUv3 driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 8ca7415..c498ac8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3650,3 +3650,4 @@ MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 
implementations");
 MODULE_AUTHOR("Will Deacon ");
 MODULE_ALIAS("platform:arm-smmu-v3");
 MODULE_LICENSE("GPL v2");
+MODULE_SOFTDEP("pre: io-pgtable-arm");
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v2 0/7] iommu: Permit modular builds of io-pgtable drivers

2020-12-21 Thread Isaac J. Manjarres
The goal of the Generic Kernel Image (GKI) effort is to have a common
kernel image that works across multiple Android devices. This involves
generating a kernel image that has core features integrated into it,
while SoC specific functionality can be added to the kernel for the
device as a module.

Along with modularizing IOMMU drivers, this also means building the
io-pgtable code as modules, which allows for SoC vendors to only include
the io-pgtable implementations that they use. For example, GKI for arm64
must include support for both the IOMMU ARM LPAE/V7S formats at the
moment. Having the code for both formats as modules allows SoC vendors
to only provide the page table format that they use, along with their
IOMMU driver.

Main changes since v1:

1) Retain io-pgtable.c as part of the core kernel

The patches are split into 4 parts:

1) Modularizing io-pgtable-arm[-v7s].c, while leaving the io-pgtable.c
code as part of the core kernel, requires removing the references to
the ARM LPAE and ARM V7S io-pgtable init functions, and using a
dynamic method for formats to register their io-pgtable init functions.

The reason for defining an io_pgtable_init_fns_node structure is to
not have the data structures used to store the init functions seep into
the io-pgtable fmt drivers. Doing so allows for changing the internal
data structure used to keep track of the init functions, without impacting
the client data structures.

2) Taking references to the io-pgtable format drivers to ensure that they
cannot be unloaded while in use.

3) Adding pre MODULE_SOFTDEP() dependencies to drivers in the kernel
that are tristate, and invoke [alloc/free]_io_pgtable_ops(). This makes
it so that the io-pgtable format drivers are loaded before the driver
that needs them.

4) Changing the Kconfig options for the ARM LPAE nad ARM V7S to tristate.

Thanks in advance for the feedback,

Isaac J. Manjarres

Isaac J. Manjarres (7):
  iommu/io-pgtable: Introduce dynamic io-pgtable fmt registration
  iommu/io-pgtable: Add refcounting for io-pgtable format modules
  iommu/arm-smmu: Add dependency on io-pgtable format modules
  iommu/arm-smmu-v3: Add dependency on io-pgtable-arm format module
  drm/msm: Add dependency on io-pgtable-arm format module
  drm/panfrost: Add dependency on io-pgtable-arm format module
  iommu/io-pgtable-arm: Allow building modular io-pgtable fmts

 drivers/gpu/drm/msm/msm_drv.c   |   1 +
 drivers/gpu/drm/panfrost/panfrost_drv.c |   1 +
 drivers/iommu/Kconfig   |   4 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |   1 +
 drivers/iommu/arm/arm-smmu/arm-smmu.c   |   1 +
 drivers/iommu/io-pgtable-arm-v7s.c  |  37 +-
 drivers/iommu/io-pgtable-arm.c  |  97 +++---
 drivers/iommu/io-pgtable.c  | 104 +++-
 include/linux/io-pgtable.h  |  53 +-
 9 files changed, 236 insertions(+), 63 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 5/7] drm/msm: Add dependency on io-pgtable-arm format module

2020-12-21 Thread Isaac J. Manjarres
The MSM DRM driver depends on the availability of the ARM LPAE io-pgtable
format code to work properly. In preparation for having the io-pgtable
formats as modules, add a "pre" dependency with MODULE_SOFTDEP() to
ensure that the io-pgtable-arm format module is loaded before loading
the MSM DRM driver module.

Signed-off-by: Isaac J. Manjarres 
---
 drivers/gpu/drm/msm/msm_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 535a026..8be3506 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -1369,3 +1369,4 @@ module_exit(msm_drm_unregister);
 MODULE_AUTHOR("Rob Clark https://lists.linuxfoundation.org/mailman/listinfo/iommu


  1   2   >