[PATCH v4 5/6] iommu/iova: Extend rbtree node caching

2017-09-19 Thread Robin Murphy
The cached node mechanism provides a significant performance benefit for
allocations using a 32-bit DMA mask, but in the case of non-PCI devices
or where the 32-bit space is full, the loss of this benefit can be
significant - on large systems there can be many thousands of entries in
the tree, such that walking all the way down to find free space every
time becomes increasingly awful.

Maintain a similar cached node for the whole IOVA space as a superset of
the 32-bit space so that performance can remain much more consistent.

Inspired by work by Zhen Lei .

Tested-by: Ard Biesheuvel 
Tested-by: Zhen Lei 
Tested-by: Nate Watterson 
Signed-off-by: Robin Murphy 
---

v4:
 - Adjust to simplified __get_cached_rbnode() behaviour
 - Cosmetic tweaks

 drivers/iommu/iova.c | 43 +--
 include/linux/iova.h |  3 ++-
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index c93a6c46bcb1..a125a5786dbf 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -51,6 +51,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long 
granule,
 
spin_lock_init(&iovad->iova_rbtree_lock);
iovad->rbroot = RB_ROOT;
+   iovad->cached_node = NULL;
iovad->cached32_node = NULL;
iovad->granule = granule;
iovad->start_pfn = start_pfn;
@@ -119,39 +120,38 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned 
long limit_pfn)
if (limit_pfn <= iovad->dma_32bit_pfn && iovad->cached32_node)
return iovad->cached32_node;
 
+   if (iovad->cached_node)
+   return iovad->cached_node;
+
return &iovad->anchor.node;
 }
 
 static void
-__cached_rbnode_insert_update(struct iova_domain *iovad,
-   unsigned long limit_pfn, struct iova *new)
+__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
 {
-   if (limit_pfn != iovad->dma_32bit_pfn)
-   return;
-   iovad->cached32_node = &new->node;
+   if (new->pfn_hi < iovad->dma_32bit_pfn)
+   iovad->cached32_node = &new->node;
+   else
+   iovad->cached_node = &new->node;
 }
 
 static void
 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 {
struct iova *cached_iova;
-   struct rb_node *curr;
+   struct rb_node **curr;
 
-   if (!iovad->cached32_node)
+   if (free->pfn_hi < iovad->dma_32bit_pfn)
+   curr = &iovad->cached32_node;
+   else
+   curr = &iovad->cached_node;
+
+   if (!*curr)
return;
-   curr = iovad->cached32_node;
-   cached_iova = rb_entry(curr, struct iova, node);
 
-   if (free->pfn_lo >= cached_iova->pfn_lo) {
-   struct rb_node *node = rb_next(&free->node);
-   struct iova *iova = rb_entry(node, struct iova, node);
-
-   /* only cache if it's below 32bit pfn */
-   if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
-   iovad->cached32_node = node;
-   else
-   iovad->cached32_node = NULL;
-   }
+   cached_iova = rb_entry(*curr, struct iova, node);
+   if (free->pfn_lo >= cached_iova->pfn_lo)
+   *curr = rb_next(&free->node);
 }
 
 /* Insert the iova into domain rbtree by holding writer lock */
@@ -189,7 +189,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
struct rb_node *curr, *prev;
struct iova *curr_iova;
unsigned long flags;
-   unsigned long saved_pfn, new_pfn;
+   unsigned long new_pfn;
unsigned long align_mask = ~0UL;
 
if (size_aligned)
@@ -197,7 +197,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
 
/* Walk the tree backwards */
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-   saved_pfn = limit_pfn;
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
do {
@@ -218,7 +217,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
 
/* If we have 'prev', it's a valid place to start the insertion. */
iova_insert_rbtree(&iovad->rbroot, new, prev);
-   __cached_rbnode_insert_update(iovad, saved_pfn, new);
+   __cached_rbnode_insert_update(iovad, new);
 
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 22dc30a28387..5eaedf77b152 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -70,7 +70,8 @@ struct iova_fq {
 struct iova_domain {
spinlock_t  iova_rbtree_lock; /* Lock to protect update of rbtree */
struct rb_root  rbroot; /* iova domain rbtree root */
-   struct rb_node  *cached32_node; /* Save last alloced node */
+   struct rb_node  *cached_node;   /* Save last alloced node */
+   struct rb_node  *cached32_node; /* Save last 3

[PATCH v4 4/6] iommu/iova: Simplify cached node logic

2017-09-19 Thread Robin Murphy
The logic of __get_cached_rbnode() is a little obtuse, but then
__get_prev_node_of_cached_rbnode_or_last_node_and_update_limit_pfn()
wouldn't exactly roll off the tongue...

Now that we have the invariant that there is always a valid node to
start searching downwards from, everything gets a bit easier to follow
if we simplify that function to do what it says on the tin and return
the cached node (or anchor node as appropriate) directly. In turn, we
can then deduplicate the rb_prev() and limit_pfn logic into the main
loop itself, further reduce the amount of code under the lock, and
generally make the inner workings a bit less subtle.

Signed-off-by: Robin Murphy 
---

v4: New

 drivers/iommu/iova.c | 42 ++
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 03b677afb109..c93a6c46bcb1 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -114,18 +114,12 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
 
 static struct rb_node *
-__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
+__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
 {
-   if ((*limit_pfn > iovad->dma_32bit_pfn) ||
-   (iovad->cached32_node == NULL))
-   return rb_prev(&iovad->anchor.node);
-   else {
-   struct rb_node *prev_node = rb_prev(iovad->cached32_node);
-   struct iova *curr_iova =
-   rb_entry(iovad->cached32_node, struct iova, node);
-   *limit_pfn = curr_iova->pfn_lo;
-   return prev_node;
-   }
+   if (limit_pfn <= iovad->dma_32bit_pfn && iovad->cached32_node)
+   return iovad->cached32_node;
+
+   return &iovad->anchor.node;
 }
 
 static void
@@ -192,7 +186,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain 
*iovad,
unsigned long size, unsigned long limit_pfn,
struct iova *new, bool size_aligned)
 {
-   struct rb_node *prev, *curr = NULL;
+   struct rb_node *curr, *prev;
+   struct iova *curr_iova;
unsigned long flags;
unsigned long saved_pfn, new_pfn;
unsigned long align_mask = ~0UL;
@@ -203,29 +198,20 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
/* Walk the tree backwards */
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
saved_pfn = limit_pfn;
-   curr = __get_cached_rbnode(iovad, &limit_pfn);
-   prev = curr;
-   while (curr) {
-   struct iova *curr_iova = rb_entry(curr, struct iova, node);
-
-   if (limit_pfn <= curr_iova->pfn_lo)
-   goto move_left;
-
-   if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi)
-   break;  /* found a free slot */
-
-   limit_pfn = curr_iova->pfn_lo;
-move_left:
+   curr = __get_cached_rbnode(iovad, limit_pfn);
+   curr_iova = rb_entry(curr, struct iova, node);
+   do {
+   limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
+   new_pfn = (limit_pfn - size) & align_mask;
prev = curr;
curr = rb_prev(curr);
-   }
+   curr_iova = rb_entry(curr, struct iova, node);
+   } while (curr && new_pfn <= curr_iova->pfn_hi);
 
-   new_pfn = (limit_pfn - size) & align_mask;
if (limit_pfn < size || new_pfn < iovad->start_pfn) {
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
return -ENOMEM;
}
-
/* pfn_lo will point to size aligned address if size_aligned is set */
new->pfn_lo = new_pfn;
new->pfn_hi = new->pfn_lo + size - 1;
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 6/6] iommu/iova: Make dma_32bit_pfn implicit

2017-09-19 Thread Robin Murphy
From: Zhen Lei 

Now that the cached node optimisation can apply to all allocations, the
couple of users which were playing tricks with dma_32bit_pfn in order to
benefit from it can stop doing so. Conversely, there is also no need for
all the other users to explicitly calculate a 'real' 32-bit PFN, when
init_iova_domain() can happily do that itself from the page granularity.

CC: Thierry Reding 
CC: Jonathan Hunter 
CC: David Airlie 
CC: Sudeep Dutt 
CC: Ashutosh Dixit 
Signed-off-by: Zhen Lei 
Tested-by: Ard Biesheuvel 
Tested-by: Zhen Lei 
Tested-by: Nate Watterson 
[rm: use iova_shift(), rewrote commit message]
Signed-off-by: Robin Murphy 
---

v4: No change

 drivers/gpu/drm/tegra/drm.c  |  3 +--
 drivers/gpu/host1x/dev.c |  3 +--
 drivers/iommu/amd_iommu.c|  7 ++-
 drivers/iommu/dma-iommu.c| 18 +-
 drivers/iommu/intel-iommu.c  | 11 +++
 drivers/iommu/iova.c |  4 ++--
 drivers/misc/mic/scif/scif_rma.c |  3 +--
 include/linux/iova.h |  5 ++---
 8 files changed, 13 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 597d563d636a..b822e484b7e5 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned 
long flags)
 
order = __ffs(tegra->domain->pgsize_bitmap);
init_iova_domain(&tegra->carveout.domain, 1UL << order,
-carveout_start >> order,
-carveout_end >> order);
+carveout_start >> order);
 
tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 7f22c5c37660..5267c62e8896 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev)
 
order = __ffs(host->domain->pgsize_bitmap);
init_iova_domain(&host->iova, 1UL << order,
-geometry->aperture_start >> order,
-geometry->aperture_end >> order);
+geometry->aperture_start >> order);
host->iova_end = geometry->aperture_end;
}
 
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 51f8215877f5..647ab7691aee 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -63,7 +63,6 @@
 /* IO virtual address start page frame number */
 #define IOVA_START_PFN (1)
 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN  IOVA_PFN(DMA_BIT_MASK(32))
 
 /* Reserved IOVA ranges */
 #define MSI_RANGE_START(0xfee0)
@@ -1788,8 +1787,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
 
-   init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
-IOVA_START_PFN, DMA_32BIT_PFN);
+   init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
 
if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
goto free_dma_dom;
@@ -2696,8 +2694,7 @@ static int init_reserved_iova_ranges(void)
struct pci_dev *pdev = NULL;
struct iova *val;
 
-   init_iova_domain(&reserved_iova_ranges, PAGE_SIZE,
-IOVA_START_PFN, DMA_32BIT_PFN);
+   init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN);
 
lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
  &reserved_rbtree_key);
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9d1cebe7f6cb..191be9c80a8a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, 
dma_addr_t base,
/* ...then finally give it a kicking to make sure it fits */
base_pfn = max_t(unsigned long, base_pfn,
domain->geometry.aperture_start >> order);
-   end_pfn = min_t(unsigned long, end_pfn,
-   domain->geometry.aperture_end >> order);
}
-   /*
-* PCI devices may have larger DMA masks, but still prefer allocating
-* within a 32-bit mask to avoid DAC addressing. Such limitations don't
-* apply to the typical platform device, so for those we may as well
-* leave the cache limit at the top of their range to save an rb_last()
-* traversal on every allocation.
-*/
-   if (dev && dev_is_pci(dev))
-   end_pfn &= DMA_BIT_MASK(32) >> order;
 
/* start_pfn is always nonzero for an already-initialised domain */
  

[PATCH v4 3/6] iommu/iova: Add rbtree anchor node

2017-09-19 Thread Robin Murphy
Add a permanent dummy IOVA reservation to the rbtree, such that we can
always access the top of the address space instantly. The immediate
benefit is that we remove the overhead of the rb_last() traversal when
not using the cached node, but it also paves the way for further
simplifications.

Signed-off-by: Robin Murphy 
---

v4: New

 drivers/iommu/iova.c | 15 +--
 include/linux/iova.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 20be9a8b3188..03b677afb109 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -24,6 +24,9 @@
 #include 
 #include 
 
+/* The anchor node sits above the top of the usable address space */
+#define IOVA_ANCHOR~0UL
+
 static bool iova_rcache_insert(struct iova_domain *iovad,
   unsigned long pfn,
   unsigned long size);
@@ -54,6 +57,9 @@ init_iova_domain(struct iova_domain *iovad, unsigned long 
granule,
iovad->dma_32bit_pfn = pfn_32bit + 1;
iovad->flush_cb = NULL;
iovad->fq = NULL;
+   iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
+   rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
+   rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -112,7 +118,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned 
long *limit_pfn)
 {
if ((*limit_pfn > iovad->dma_32bit_pfn) ||
(iovad->cached32_node == NULL))
-   return rb_last(&iovad->rbroot);
+   return rb_prev(&iovad->anchor.node);
else {
struct rb_node *prev_node = rb_prev(iovad->cached32_node);
struct iova *curr_iova =
@@ -246,7 +252,8 @@ EXPORT_SYMBOL(alloc_iova_mem);
 
 void free_iova_mem(struct iova *iova)
 {
-   kmem_cache_free(iova_cache, iova);
+   if (iova->pfn_lo != IOVA_ANCHOR)
+   kmem_cache_free(iova_cache, iova);
 }
 EXPORT_SYMBOL(free_iova_mem);
 
@@ -680,6 +687,10 @@ reserve_iova(struct iova_domain *iovad,
struct iova *iova;
unsigned int overlap = 0;
 
+   /* Don't allow nonsensical pfns */
+   if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad
+   return NULL;
+
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
diff --git a/include/linux/iova.h b/include/linux/iova.h
index d179b9bf7814..22dc30a28387 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -74,6 +74,7 @@ struct iova_domain {
unsigned long   granule;/* pfn granularity for this domain */
unsigned long   start_pfn;  /* Lower limit for this domain */
unsigned long   dma_32bit_pfn;
+   struct iova anchor; /* rbtree lookup anchor */
struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];  /* IOVA range 
caches */
 
iova_flush_cb   flush_cb;   /* Call-Back function to flush IOMMU
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 2/6] iommu/iova: Optimise the padding calculation

2017-09-19 Thread Robin Murphy
From: Zhen Lei 

The mask for calculating the padding size doesn't change, so there's no
need to recalculate it every loop iteration. Furthermore, Once we've
done that, it becomes clear that we don't actually need to calculate a
padding size at all - by flipping the arithmetic around, we can just
combine the upper limit, size, and mask directly to check against the
lower limit.

For an arm64 build, this alone knocks 20% off the object code size of
the entire alloc_iova() function!

Signed-off-by: Zhen Lei 
Tested-by: Ard Biesheuvel 
Tested-by: Zhen Lei 
Tested-by: Nate Watterson 
[rm: simplified more of the arithmetic, rewrote commit message]
Signed-off-by: Robin Murphy 
---

v4:
 - Round align_mask up instead of down (oops!)
 - Remove redundant !curr check
 - Introduce new_pfn variable here to reduce churn in later patches

 drivers/iommu/iova.c | 42 +++---
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index f129ff4f5c89..20be9a8b3188 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -182,24 +182,17 @@ iova_insert_rbtree(struct rb_root *root, struct iova 
*iova,
rb_insert_color(&iova->node, root);
 }
 
-/*
- * Computes the padding size required, to make the start address
- * naturally aligned on the power-of-two order of its size
- */
-static unsigned int
-iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
-{
-   return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
-}
-
 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
unsigned long size, unsigned long limit_pfn,
struct iova *new, bool size_aligned)
 {
struct rb_node *prev, *curr = NULL;
unsigned long flags;
-   unsigned long saved_pfn;
-   unsigned int pad_size = 0;
+   unsigned long saved_pfn, new_pfn;
+   unsigned long align_mask = ~0UL;
+
+   if (size_aligned)
+   align_mask <<= fls_long(size - 1);
 
/* Walk the tree backwards */
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
@@ -209,31 +202,26 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
while (curr) {
struct iova *curr_iova = rb_entry(curr, struct iova, node);
 
-   if (limit_pfn <= curr_iova->pfn_lo) {
+   if (limit_pfn <= curr_iova->pfn_lo)
goto move_left;
-   } else if (limit_pfn > curr_iova->pfn_hi) {
-   if (size_aligned)
-   pad_size = iova_get_pad_size(size, limit_pfn);
-   if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
-   break;  /* found a free slot */
-   }
+
+   if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi)
+   break;  /* found a free slot */
+
limit_pfn = curr_iova->pfn_lo;
 move_left:
prev = curr;
curr = rb_prev(curr);
}
 
-   if (!curr) {
-   if (size_aligned)
-   pad_size = iova_get_pad_size(size, limit_pfn);
-   if ((iovad->start_pfn + size + pad_size) > limit_pfn) {
-   spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-   return -ENOMEM;
-   }
+   new_pfn = (limit_pfn - size) & align_mask;
+   if (limit_pfn < size || new_pfn < iovad->start_pfn) {
+   spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+   return -ENOMEM;
}
 
/* pfn_lo will point to size aligned address if size_aligned is set */
-   new->pfn_lo = limit_pfn - (size + pad_size);
+   new->pfn_lo = new_pfn;
new->pfn_hi = new->pfn_lo + size - 1;
 
/* If we have 'prev', it's a valid place to start the insertion. */
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 0/6] Optimise 64-bit IOVA allocations

2017-09-19 Thread Robin Murphy
v3: https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg19694.html

This was supposed to be just a rebase and repost, but in the meantime I
kept scratching the itch and ended up with two extra patches to simplify
the end result even more - the series was least churny with them added
in the middle, but #5 and #6 remain essentially unchanged from v3, such
that I'm happy the tested-bys all remain valid.

Robin.

Robin Murphy (3):
  iommu/iova: Add rbtree anchor node
  iommu/iova: Simplify cached node logic
  iommu/iova: Extend rbtree node caching

Zhen Lei (3):
  iommu/iova: Optimise rbtree searching
  iommu/iova: Optimise the padding calculation
  iommu/iova: Make dma_32bit_pfn implicit

 drivers/gpu/drm/tegra/drm.c  |   3 +-
 drivers/gpu/host1x/dev.c |   3 +-
 drivers/iommu/amd_iommu.c|   7 +-
 drivers/iommu/dma-iommu.c|  18 +
 drivers/iommu/intel-iommu.c  |  11 +---
 drivers/iommu/iova.c | 139 +--
 drivers/misc/mic/scif/scif_rma.c |   3 +-
 include/linux/iova.h |   9 +--
 8 files changed, 74 insertions(+), 119 deletions(-)

-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 1/6] iommu/iova: Optimise rbtree searching

2017-09-19 Thread Robin Murphy
From: Zhen Lei 

Checking the IOVA bounds separately before deciding which direction to
continue the search (if necessary) results in redundantly comparing both
pfns twice each. GCC can already determine that the final comparison op
is redundant and optimise it down to 3 in total, but we can go one
further with a little tweak of the ordering (which makes the intent of
the code that much cleaner as a bonus).

Signed-off-by: Zhen Lei 
Tested-by: Ard Biesheuvel 
Tested-by: Zhen Lei 
Tested-by: Nate Watterson 
[rm: rewrote commit message to clarify]
Signed-off-by: Robin Murphy 
---

v4: No change

 drivers/iommu/iova.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 33edfa794ae9..f129ff4f5c89 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -342,15 +342,12 @@ private_find_iova(struct iova_domain *iovad, unsigned 
long pfn)
while (node) {
struct iova *iova = rb_entry(node, struct iova, node);
 
-   /* If pfn falls within iova's range, return iova */
-   if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
-   return iova;
-   }
-
if (pfn < iova->pfn_lo)
node = node->rb_left;
-   else if (pfn > iova->pfn_lo)
+   else if (pfn > iova->pfn_hi)
node = node->rb_right;
+   else
+   return iova;/* pfn falls within iova's range */
}
 
return NULL;
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v7 1/5] Doc: iommu/arm-smmu-v3: Add workaround for HiSilicon erratum 161010801

2017-09-19 Thread Shameerali Kolothum Thodi


> -Original Message-
> From: Rob Herring [mailto:r...@kernel.org]
> Sent: Tuesday, September 19, 2017 3:53 PM
> To: Shameerali Kolothum Thodi 
> Cc: lorenzo.pieral...@arm.com; marc.zyng...@arm.com;
> sudeep.ho...@arm.com; will.dea...@arm.com; robin.mur...@arm.com;
> j...@8bytes.org; mark.rutl...@arm.com; hanjun@linaro.org; Gabriele
> Paoloni ; John Garry
> ; iommu@lists.linux-foundation.org; linux-arm-
> ker...@lists.infradead.org; linux-a...@vger.kernel.org;
> devicet...@vger.kernel.org; de...@acpica.org; Linuxarm
> ; Wangzhou (B) ;
> Guohanjun (Hanjun Guo) 
> Subject: Re: [PATCH v7 1/5] Doc: iommu/arm-smmu-v3: Add workaround for
> HiSilicon erratum 161010801
> 
> On Thu, Sep 14, 2017 at 01:57:52PM +0100, Shameer Kolothum wrote:
> > From: John Garry 
> >
> > The HiSilicon erratum 161010801 describes the limitation of HiSilicon
> platforms
> > hip06/hip07 to support the SMMU mappings for MSI transactions.
> >
> > On these platforms, GICv3 ITS translator is presented with the deviceID
> > by extending the MSI payload data to 64 bits to include the deviceID.
> > Hence, the PCIe controller on this platforms has to differentiate the MSI
> > payload against other DMA payload and has to modify the MSI payload.
> > This basically makes it difficult for this platforms to have a SMMU
> > translation for MSI.
> >
> > This patch adds a SMMUv3 binding to flag that the SMMU breaks msi
> > translation at ITS.
> >
> > Also, the arm64 silicon errata is updated with this same erratum.
> >
> > Signed-off-by: John Garry 
> > Signed-off-by: Shameer Kolothum
> 
[...]
> > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> > @@ -55,6 +55,9 @@ the PCIe specification.
> >  - hisilicon,broken-prefetch-cmd
> >  : Avoid sending CMD_PREFETCH_* commands to the SMMU.
> >
> > +- hisilicon,broken-untranslated-msi
> > +: Reserve ITS HW region to avoid translating msi.
> > +
> 
> This should be determined from the compatible string. Continuing to add
> properties for each errata doesn't scale.

Ok. I think the suggestion here is to follow the arm-smmu.c (SMMUv1/v2) 
driver way of implementing the errata. As you might have noticed,  the 
SMMUv3 driver dt errata framework depends on properties  and this will
change the way errata is implemented in the driver now.

Hi Will/Robin,
Could you please take a look and let us know your thoughts on changing
the SMMUv3 dt errata implementation to version/model/compatible string
framework for this quirk.

Thanks,
Shameer
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 1/5] Doc: iommu/arm-smmu-v3: Add workaround for HiSilicon erratum 161010801

2017-09-19 Thread Rob Herring
On Thu, Sep 14, 2017 at 01:57:52PM +0100, Shameer Kolothum wrote:
> From: John Garry 
> 
> The HiSilicon erratum 161010801 describes the limitation of HiSilicon 
> platforms
> hip06/hip07 to support the SMMU mappings for MSI transactions.
> 
> On these platforms, GICv3 ITS translator is presented with the deviceID
> by extending the MSI payload data to 64 bits to include the deviceID.
> Hence, the PCIe controller on this platforms has to differentiate the MSI
> payload against other DMA payload and has to modify the MSI payload.
> This basically makes it difficult for this platforms to have a SMMU
> translation for MSI.
> 
> This patch adds a SMMUv3 binding to flag that the SMMU breaks msi
> translation at ITS.
> 
> Also, the arm64 silicon errata is updated with this same erratum.
> 
> Signed-off-by: John Garry 
> Signed-off-by: Shameer Kolothum 
> ---
>  Documentation/arm64/silicon-errata.txt  | 1 +
>  Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 3 +++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/Documentation/arm64/silicon-errata.txt 
> b/Documentation/arm64/silicon-errata.txt
> index 66e8ce1..02816b1 100644
> --- a/Documentation/arm64/silicon-errata.txt
> +++ b/Documentation/arm64/silicon-errata.txt
> @@ -70,6 +70,7 @@ stable kernels.
>  || | |   
>   |
>  | Hisilicon  | Hip0{5,6,7} | #161010101  | 
> HISILICON_ERRATUM_161010101 |
>  | Hisilicon  | Hip0{6,7}   | #161010701  | N/A   
>   |
> +| Hisilicon  | Hip0{6,7}   | #161010801  | N/A   
>   |
>  || | |   
>   |
>  | Qualcomm Tech. | Falkor v1   | E1003   | 
> QCOM_FALKOR_ERRATUM_1003|
>  | Qualcomm Tech. | Falkor v1   | E1009   | 
> QCOM_FALKOR_ERRATUM_1009|
> diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt 
> b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> index c9abbf3..1f5f7f9 100644
> --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
> @@ -55,6 +55,9 @@ the PCIe specification.
>  - hisilicon,broken-prefetch-cmd
>  : Avoid sending CMD_PREFETCH_* commands to the SMMU.
>  
> +- hisilicon,broken-untranslated-msi
> +: Reserve ITS HW region to avoid translating msi.
> +

This should be determined from the compatible string. Continuing to add 
properties for each errata doesn't scale.

>  - cavium,cn9900-broken-page1-regspace
>  : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
> PRIQ_PROD/CONS register access with page 0 offsets.
> -- 
> 1.9.1
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe devicetree" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] iommu: arm-smmu: stall support

2017-09-19 Thread Rob Clark
On Tue, Sep 19, 2017 at 8:30 AM, Joerg Roedel  wrote:
> Hi Rob,
>
> thanks for the RFC patch. I have some comments about the interface to
> the IOMMU-API below.
>
> On Thu, Sep 14, 2017 at 03:44:33PM -0400, Rob Clark wrote:
>> +/**
>> + * iommu_domain_resume - Resume translations for a domain after a fault.
>> + *
>> + * This can be called at some point after the fault handler is called,
>> + * allowing the user of the IOMMU to (for example) handle the fault
>> + * from a task context.  It is illegal to call this if
>> + * iommu_domain_set_attr(STALL) failed.
>> + *
>> + * @domain:the domain to resume
>> + * @terminate: if true, the translation that triggered the fault should
>> + *be terminated, else it should be retried.
>> + */
>> +void iommu_domain_resume(struct iommu_domain *domain, bool terminate)
>> +{
>> + /* invalid to call if iommu_domain_set_attr(STALL) failed: */
>> + if (WARN_ON(!domain->ops->domain_resume))
>> + return;
>> + domain->ops->domain_resume(domain, terminate);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_domain_resume);
>
> So this function is being called by the device driver owning the domain,
> right?

yes, this was my plan

> I don't think that the resume call-back you added needs to be exposed
> like this. It is better to do the page-fault handling completly in the
> iommu-code, including calling the resume call-back and just let the
> device-driver provide a per-domain call-back to let it handle the fault
> and map in the required pages.

I would like to decide in the IRQ whether or not to queue work or not,
because when we get a gpu fault, we tend to get 1000's of gpu faults
all at once (and I really only need to handle the first one).  I
suppose that could also be achieved by having a special return value
from the fault handler to say "call me again from a wq"..

Note that in the drm driver I already have a suitable wq to queue the
work, so it really doesn't buy me anything to have the iommu driver
toss things off to a wq for me.  Might be a different situation for
other drivers (but I guess mostly other drivers are using iommu API
indirectly via dma-mapping?)

> The interface could look like this:
>
> * New function iommu_domain_enable_stalls(domain) - When
>   this function returns the domain is in stall-handling mode. A
>   iommu_domain_disable_stalls() might make sense too, not sure
>   about that.

I don't particularly see a use-case for disabling stalls, fwiw

BR,
-R

> * When stalls are enabled for a domain, report_iommu_fault()
>   queues the fault to a workqueue (so that its handler can
>   block) and in the workqueue you call ->resume() based on the
>   return value of the handler.
>
> As a side-note, as there has been discussion on this: For now it doesn't
> make sense to merge this with the SVM page-fault handling efforts, as
> this path is different enough (SVM will call handle_mm_fault() as the
> handler, for example).
>
>
> Regards,
>
> Joerg
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 1/1] iommu/arm-smmu: Add support for multiple TBU child devices

2017-09-19 Thread Rob Herring
On Tue, Sep 12, 2017 at 05:31:07PM +0530, Vivek Gautam wrote:
> ARM MMU-500 implements a TBU (uTLB) for each connected master
> besides a single TCU which controls and manages the address
> translations. Each of these TBUs can either be in the same
> power domain as the master, or they can have a independent
> power switch.
> This design addresses the challenges to control TBU power.
> Adding child devices for each TBUs present in the configuration
> lets us control the power and clocks to TLBs having individual
> power domains. The device link between master devices (such as,
> display, and GPU) and TBU devices ensures that the master takes
> care of powering the smmu as long as it's available.
> When the master is not available, the TBUs are identified with
> sid and powered on.
> 
> Signed-off-by: Vivek Gautam 
> ---
> 
>  - The idea behind this patch is to handle the distributed smmu
>architectures, similar to MMU-500.
>  - Untested yet.
>  - There are still few instances where the correct tbu device has
>to be referenced and thus powered on to handle TLB maintenance
>operations.
> 
>  .../devicetree/bindings/iommu/arm,smmu.txt |  27 +++
>  drivers/iommu/arm-smmu.c   | 191 
> +++--
>  2 files changed, 205 insertions(+), 13 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
> b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
> index d97a6bc8e608..7cf67e75022e 100644
> --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
> +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
> @@ -98,6 +98,18 @@ conditions.
>accessed before master gets enabled and linked to its
>SMMU.
>  
> +- child nodes:ARM MMU-500 implements a TBU (page table cache or TLB) for
> +  each connected master besides a single TCU that controls
> +  and manages the address translations.
> +  Each of the child nodes represents a TBU that is attached 
> to
> +  the master. This child node will have following property:
> +
> +  - compatibe:   must be "arm,mmu-500-tbu" for TBU child nodes of 
> arm,mmu-500
> + smmu.
> +  - stream-id-range: array representing the starting stream id and the number
> + of supported stream-ids. This gives information about
> + the range of stream-ids that are supported by this TBU.

Needs a vendor prefix.

Also need to document reg property. What does reg represent? If just an 
index with no correlation to h/w numbering, then perhaps stream ids 
could be put into reg instead.

Rob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] iommu/iova: Make rcache limit_pfn handling more robust

2017-09-19 Thread Robin Murphy
When popping a pfn from an rcache, we are currently checking it directly
against limit_pfn for viability. Since this represents iova->pfn_lo, it
is technically possible for the corresponding iova->pfn_hi to be greater
than limit_pfn. Although we generally get away with it in practice since
limit_pfn is typically a power-of-two boundary and the IOVAs are
size-aligned, it's pretty trivial to make the iova_rcache_get() path
take the allocation size into account for complete safety.

Signed-off-by: Robin Murphy 
---
 drivers/iommu/iova.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 35dde0fc7793..8f8b436afd81 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -411,7 +411,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long 
size,
unsigned long iova_pfn;
struct iova *new_iova;
 
-   iova_pfn = iova_rcache_get(iovad, size, limit_pfn);
+   iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
if (iova_pfn)
return iova_pfn;
 
@@ -828,7 +828,7 @@ static unsigned long iova_magazine_pop(struct iova_magazine 
*mag,
 {
BUG_ON(iova_magazine_empty(mag));
 
-   if (mag->pfns[mag->size - 1] >= limit_pfn)
+   if (mag->pfns[mag->size - 1] > limit_pfn)
return 0;
 
return mag->pfns[--mag->size];
@@ -982,7 +982,7 @@ static unsigned long iova_rcache_get(struct iova_domain 
*iovad,
if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
return 0;
 
-   return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn);
+   return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
 }
 
 /*
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] iommu/iova: Try harder to allocate from rcache magazine

2017-09-19 Thread Robin Murphy
When devices with different DMA masks are using the same domain, or for
PCI devices where we usually try a speculative 32-bit allocation first,
there is a fair possibility that the top PFN of the rcache stack at any
given time may be unsuitable for the lower limit, prompting a fallback
to allocating anew from the rbtree. Consequently, we may end up
artifically increasing pressure on the 32-bit IOVA space as unused IOVAs
accumulate lower down in the rcache stacks, while callers with 32-bit
masks also impose unnecessary rbtree overhead.

In such cases, let's try a bit harder to satisfy the allocation locally
first - scanning the whole stack should still be relatively inexpensive,
and even rotating an entry up from the very bottom probably has less
overall impact than going to the rbtree.

Signed-off-by: Robin Murphy 
---
 drivers/iommu/iova.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 8f8b436afd81..a7af8273fa98 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -826,12 +826,25 @@ static bool iova_magazine_empty(struct iova_magazine *mag)
 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
   unsigned long limit_pfn)
 {
+   int i;
+   unsigned long pfn;
+
BUG_ON(iova_magazine_empty(mag));
 
-   if (mag->pfns[mag->size - 1] > limit_pfn)
-   return 0;
+   /*
+* If we can pull a suitable pfn from anywhere in the stack, that's
+* still probably preferable to falling back to the rbtree.
+*/
+   for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
+   if (i == 0)
+   return 0;
 
-   return mag->pfns[--mag->size];
+   pfn = mag->pfns[i];
+   mag->size--;
+   for (; i < mag->size; i++)
+   mag->pfns[i] = mag->pfns[i + 1];
+
+   return pfn;
 }
 
 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] Misc IOVA tweaks

2017-09-19 Thread Robin Murphy
While I was elbow-deep in the IOVA code, a few other things came to
light which don't really fit into the rbtree optimisation series.
Patches #1 and #2 are more or less just cleanup, while patch #3
complements Tomasz' recent PCI allocation patch as it aims to
potentially improve the same situation.

Last time I checked, these should all apply independently and without
major conflicts against any other in-flight IOVA patches.

Robin.

Robin Murphy (3):
  iommu/iova: Simplify domain destruction
  iommu/iova: Make rcache limit_pfn handling more robust
  iommu/iova: Try harder to allocate from rcache magazine

 drivers/iommu/iova.c | 73 
 1 file changed, 28 insertions(+), 45 deletions(-)

-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] iommu/iova: Simplify domain destruction

2017-09-19 Thread Robin Murphy
All put_iova_domain() should have to worry about is freeing memory - by
that point the domain must no longer be live, so the act of cleaning up
doesn't need to be concurrency-safe or maintain the rbtree in a
self-consistent state. There's no need to waste time with locking or
emptying the rcache magazines, and we can just use the postorder
traversal helper to clear out the remaining rbtree entries in-place.

Signed-off-by: Robin Murphy 
---
 drivers/iommu/iova.c | 50 ++
 1 file changed, 10 insertions(+), 40 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index a6cf775f75e0..35dde0fc7793 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -588,21 +588,12 @@ EXPORT_SYMBOL_GPL(queue_iova);
  */
 void put_iova_domain(struct iova_domain *iovad)
 {
-   struct rb_node *node;
-   unsigned long flags;
+   struct iova *iova, *tmp;
 
free_iova_flush_queue(iovad);
free_iova_rcaches(iovad);
-   spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-   node = rb_first(&iovad->rbroot);
-   while (node) {
-   struct iova *iova = rb_entry(node, struct iova, node);
-
-   rb_erase(node, &iovad->rbroot);
+   rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
free_iova_mem(iova);
-   node = rb_first(&iovad->rbroot);
-   }
-   spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 }
 EXPORT_SYMBOL_GPL(put_iova_domain);
 
@@ -995,46 +986,25 @@ static unsigned long iova_rcache_get(struct iova_domain 
*iovad,
 }
 
 /*
- * Free a cpu's rcache.
- */
-static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
-struct iova_rcache *rcache)
-{
-   struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, 
cpu);
-   unsigned long flags;
-
-   spin_lock_irqsave(&cpu_rcache->lock, flags);
-
-   iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
-   iova_magazine_free(cpu_rcache->loaded);
-
-   iova_magazine_free_pfns(cpu_rcache->prev, iovad);
-   iova_magazine_free(cpu_rcache->prev);
-
-   spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-}
-
-/*
  * free rcache data structures.
  */
 static void free_iova_rcaches(struct iova_domain *iovad)
 {
struct iova_rcache *rcache;
-   unsigned long flags;
+   struct iova_cpu_rcache *cpu_rcache;
unsigned int cpu;
int i, j;
 
for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
rcache = &iovad->rcaches[i];
-   for_each_possible_cpu(cpu)
-   free_cpu_iova_rcache(cpu, iovad, rcache);
-   spin_lock_irqsave(&rcache->lock, flags);
-   free_percpu(rcache->cpu_rcaches);
-   for (j = 0; j < rcache->depot_size; ++j) {
-   iova_magazine_free_pfns(rcache->depot[j], iovad);
-   iova_magazine_free(rcache->depot[j]);
+   for_each_possible_cpu(cpu) {
+   cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
+   iova_magazine_free(cpu_rcache->loaded);
+   iova_magazine_free(cpu_rcache->prev);
}
-   spin_unlock_irqrestore(&rcache->lock, flags);
+   free_percpu(rcache->cpu_rcaches);
+   for (j = 0; j < rcache->depot_size; ++j)
+   iova_magazine_free(rcache->depot[j]);
}
 }
 
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: QCOM_IOMMU should depend on HAS_DMA

2017-09-19 Thread Joerg Roedel
On Mon, Sep 11, 2017 at 02:34:34PM +0200, Geert Uytterhoeven wrote:
> 
> Fixes: 0ae349a0f33fb040 ("iommu/qcom: Add qcom_iommu")
> Signed-off-by: Geert Uytterhoeven 
> ---
>  drivers/iommu/Kconfig | 1 +
>  1 file changed, 1 insertion(+)

Applied, thanks.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/exynos: Rework runtime PM links management

2017-09-19 Thread Joerg Roedel
On Fri, Sep 15, 2017 at 01:05:08PM +0200, Marek Szyprowski wrote:
> add_device is a bit more suitable for establishing runtime PM links than
> the xlate callback. This change also makes it possible to implement proper
> cleanup - in remove_device callback.
> 
> Signed-off-by: Marek Szyprowski 
> ---
>  drivers/iommu/exynos-iommu.c | 23 ---
>  1 file changed, 16 insertions(+), 7 deletions(-)

Applied, thanks.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] iommu: arm-smmu: stall support

2017-09-19 Thread Joerg Roedel
Hi Rob,

thanks for the RFC patch. I have some comments about the interface to
the IOMMU-API below.

On Thu, Sep 14, 2017 at 03:44:33PM -0400, Rob Clark wrote:
> +/**
> + * iommu_domain_resume - Resume translations for a domain after a fault.
> + *
> + * This can be called at some point after the fault handler is called,
> + * allowing the user of the IOMMU to (for example) handle the fault
> + * from a task context.  It is illegal to call this if
> + * iommu_domain_set_attr(STALL) failed.
> + *
> + * @domain:the domain to resume
> + * @terminate: if true, the translation that triggered the fault should
> + *be terminated, else it should be retried.
> + */
> +void iommu_domain_resume(struct iommu_domain *domain, bool terminate)
> +{
> + /* invalid to call if iommu_domain_set_attr(STALL) failed: */
> + if (WARN_ON(!domain->ops->domain_resume))
> + return;
> + domain->ops->domain_resume(domain, terminate);
> +}
> +EXPORT_SYMBOL_GPL(iommu_domain_resume);

So this function is being called by the device driver owning the domain,
right?

I don't think that the resume call-back you added needs to be exposed
like this. It is better to do the page-fault handling completly in the
iommu-code, including calling the resume call-back and just let the
device-driver provide a per-domain call-back to let it handle the fault
and map in the required pages.

The interface could look like this:

* New function iommu_domain_enable_stalls(domain) - When
  this function returns the domain is in stall-handling mode. A
  iommu_domain_disable_stalls() might make sense too, not sure
  about that.

* When stalls are enabled for a domain, report_iommu_fault()
  queues the fault to a workqueue (so that its handler can
  block) and in the workqueue you call ->resume() based on the
  return value of the handler.

As a side-note, as there has been discussion on this: For now it doesn't
make sense to merge this with the SVM page-fault handling efforts, as
this path is different enough (SVM will call handle_mm_fault() as the
handler, for example).


Regards,

Joerg

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: Avoid ILLEGAL setting of STE.S1STALLD

2017-09-19 Thread Jean-Philippe Brucker
On 14/09/17 06:08, Yisheng Xie wrote:
> According to Spec, it is ILLEGAL to set STE.S1STALLD if STALL_MODEL
> is not 0b00, which means we should not disable stall mode if stall
> or terminate mode is not configuable.
> 
> As Jean-Philippe's suggestion, this patch introduce a feature bit
> ARM_SMMU_FEAT_STALL_FORCE, which means smmu only supports stall force.
> Therefore, we can avoid the ILLEGAL setting of STE.S1STALLD.by checking
> ARM_SMMU_FEAT_STALL_FORCE.
> 
> This patch keeps the ARM_SMMU_FEAT_STALLS as the meaning of stall supported
> (force or configuable) to easy to expand the future function, i.e. we can
> only use ARM_SMMU_FEAT_STALLS to check whether we should register fault
> handle or enable master can_stall, etc to supporte platform SVM.
> 
> After apply this patch, the feature bit and S1STALLD setting will be like:
> STALL_MODEL  FEATURE  S1STALLD
> 0b00 ARM_SMMU_FEAT_STALLS 0b1
> 0b01 !ARM_SMMU_FEAT_STALLS && !ARM_SMMU_FEAT_STALL_FORCE  0b0
> 0b10 ARM_SMMU_FEAT_STALLS && ARM_SMMU_FEAT_STALL_FORCE0b0

Thanks for the patch. Since it's the same problem, could you also fix the
context descriptor value? The spec says, in 5.5 Fault configuration:

"A CD (Stage 1 translation enabled) is considered ILLEGAL if one of the
following applies:
* SMMU_(S_)IDR0.STALL_MODEL == 0b10 and CD.S == 0."

So I think we should always set CD.S if the SMMU has STALL_FORCE.

As Will pointed out, more work is needed for STALL_FORCE. We can't enable
translation at all for devices that don't support stalling (e.g. PCI). We
should force them into bypass or abort mode depending on the config. Maybe
we can fix that later, after the devicetree property is added.

> Signed-off-by: Yisheng Xie 
> ---
>  drivers/iommu/arm-smmu-v3.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index e67ba6c..d2a3627 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -603,7 +603,8 @@ struct arm_smmu_device {
>  #define ARM_SMMU_FEAT_TRANS_S1   (1 << 9)
>  #define ARM_SMMU_FEAT_TRANS_S2   (1 << 10)
>  #define ARM_SMMU_FEAT_STALLS (1 << 11)
> -#define ARM_SMMU_FEAT_HYP(1 << 12)
> +#define ARM_SMMU_FEAT_STALL_FORCE(1 << 12)
> +#define ARM_SMMU_FEAT_HYP(1 << 13)

We probably should keep the feature bits backward compatible and only add
new ones at the end. It's not ABI, but it's printed at boot time and I
sometimes use them when inspecting the kernel output to see what an SMMU
supports.

Thanks,
Jean

>   u32 features;
>  
>  #define ARM_SMMU_OPT_SKIP_PREFETCH   (1 << 0)
> @@ -1112,7 +1113,8 @@ static void arm_smmu_write_strtab_ent(struct 
> arm_smmu_device *smmu, u32 sid,
>  #endif
>STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
>  
> - if (smmu->features & ARM_SMMU_FEAT_STALLS)
> + if (smmu->features & ARM_SMMU_FEAT_STALLS &&
> +!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
>   dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
>  
>   val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
> @@ -2536,9 +2538,10 @@ static int arm_smmu_device_hw_probe(struct 
> arm_smmu_device *smmu)
>coherent ? "true" : "false");
>  
>   switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
> - case IDR0_STALL_MODEL_STALL:
> - /* Fallthrough */
>   case IDR0_STALL_MODEL_FORCE:
> + smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
> + /* Fallthrough */
> + case IDR0_STALL_MODEL_STALL:
>   smmu->features |= ARM_SMMU_FEAT_STALLS;
>   }
>  
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/ipmmu-vmsa: Do not replace bus IOMMU ops on driver init.

2017-09-19 Thread Robin Murphy
Hi Laurent,

On 19/09/17 08:07, Laurent Pinchart wrote:
> Hi Liviu,
> 
> Thank you for the patch.
> 
> On Monday, 18 September 2017 13:04:44 EEST Liviu Dudau wrote:
>> If the IPMMU driver is compiled in the kernel it will replace the
>> platform bus IOMMU ops on running the ipmmu_init() function, regardless
>> if there is any IPMMU hardware present or not. This screws up systems
>> that just want to build a generic kernel that runs on multiple platforms
>> and use a different IOMMU implementation.
>>
>> Move the bus_set_iommu() call at the end of the ipmmu_probe() function
>> when we know that hardware is present. With current IOMMU framework it
>> should be safe (at least for OF case).
> 
> If I recall correctly the issue is that the IPMMU might be probed after bus 
> master devices when using the legacy IOMMU DT integration, which the ipmmu-
> vmsa driver does on ARM32. Calling bus_set_iommu() from the probe function 
> will then result in some devices losing IOMMU support.

Yes, that used to be the problem, but since IPMMU uses the generic
bindings and iommu_device_register(), it should now benefit from client
probe-deferral even when it doesn't support of_xlate().

Robin.

>> Now that the ipmmu_init() and ipmmu_exit() functions are simple calls to
>> platform_driver_register() and platform_driver_unregister(), replace
>> them with the module_platform_driver() macro call.
>>
>> Signed-off-by: Liviu Dudau 
>> Cc: Laurent Pinchart 
>> ---
>>  drivers/iommu/ipmmu-vmsa.c | 29 +
>>  1 file changed, 5 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
>> index 2a38aa15be17d..c60569c74678d 100644
>> --- a/drivers/iommu/ipmmu-vmsa.c
>> +++ b/drivers/iommu/ipmmu-vmsa.c
>> @@ -1055,10 +1055,11 @@ static int ipmmu_probe(struct platform_device *pdev)
>> ipmmu_device_reset(mmu);
>>
>>  /*
>> - * We can't create the ARM mapping here as it requires the bus to have
>> - * an IOMMU, which only happens when bus_set_iommu() is called in
>> - * ipmmu_init() after the probe function returns.
>> + * Now that we have validated the presence of the hardware, set
>> + * the bus IOMMU ops to enable future domain and device setup.
>>   */
>> +if (!iommu_present(&platform_bus_type))
>> +bus_set_iommu(&platform_bus_type, &ipmmu_ops);
>>
>>  spin_lock(&ipmmu_devices_lock);
>>  list_add(&mmu->list, &ipmmu_devices);
> 
> What branch is this patch based on ? ipmmu_devices_lock isn't in mainline.
> 
>> @@ -1100,27 +1101,7 @@ static struct platform_driver ipmmu_driver = {
>>  .remove = ipmmu_remove,
>>  };
>>
>> -static int __init ipmmu_init(void)
>> -{
>> -int ret;
>> -
>> -ret = platform_driver_register(&ipmmu_driver);
>> -if (ret < 0)
>> -return ret;
>> -
>> -if (!iommu_present(&platform_bus_type))
>> -bus_set_iommu(&platform_bus_type, &ipmmu_ops);
>> -
>> -return 0;
>> -}
>> -
>> -static void __exit ipmmu_exit(void)
>> -{
>> -return platform_driver_unregister(&ipmmu_driver);
>> -}
>> -
>> -subsys_initcall(ipmmu_init);
>> -module_exit(ipmmu_exit);
>> +module_platform_driver(ipmmu_driver);
>>
>>  MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
>>  MODULE_AUTHOR("Laurent Pinchart ");
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] virtio-iommu version 0.4

2017-09-19 Thread Jean-Philippe Brucker
Hi Eric,

On 12/09/17 18:13, Auger Eric wrote:
> 2.6.7
> - As I am currently integrating v0.4 in QEMU here are some other comments:
> At the moment struct virtio_iommu_req_probe flags is missing in your
> header. As such I understood the ACK protocol was not implemented by the
> driver in your branch.

Uh indeed. And yet I could swear I've written that code... somewhere. I
will add it to the batch of v0.5 changes, it shouldn't be too invasive.

> - VIRTIO_IOMMU_PROBE_PROPERTY_TYPE_MASK is VIRTIO_IOMMU_T_MASK in your
> header too.

Yes, keeping VIRTIO_IOMMU_PROBE_PROPERTY_TYPE_MASK is probably best
(though it is a mouthful).

> 2.6.8.2:
> - I am really confused about what the device should report as resv
> regions depending on the PE nature (VFIO or not VFIO)
>
> In other iommu drivers, the resv regions are populated by the iommu
> driver through its get_resv_regions callback. They are usually composed
> of an iommu specific MSI region (mapped or bypassed) and non IOMMU
> specific (device specific) reserved regions:
> iommu_dma_get_resv_regions(). In the case of virtio-iommu driver, those
> are the guest reserved regions.
>
> First in the current virtio-iommu driver I don't see the
> iommu_dma_get_resv_regions call. Do you agree that the virtio-iommu
> driver should compute the non IOMMU specific MSI regions. ie. this is
> not the responsability of the virtio-iommu device.

For SW_MSI, certainly. The driver allocates a fixed IOVA region for
mapping the MSI doorbell. But the driver has to know whether the doorbell
region is translated or bypassed.

> Then why is it more the job of the device to return the guest iommu
> specific region rather than the driver itself?

The MSI region is architectural on x86 IOMMUs, but
implementation-defined on virtio-iommu. It depends which platform the host
is emulating. In Linux, x86 IOMMU drivers register the bypass region
because there always is an IOAPIC on the other end, with a fixed MSI
address. But virtio-iommu may be either behind a GIC, an APIC or some
other IRQ chip.

The driver *could* go over all the irqchips/platforms it knows and try to
guess if there is a fixed doorbell or if it needs to reserve an IOVA for
them, but it would look horrible. I much prefer having a well-defined way
of doing this, so a description from the device.

> Then I understand this is the responsability of the virtio-iommu device
> to gather information about the host resv regions in case of VFIO EP.
> Typically the host PCIe host bridge windows cannot be used for IOVA.
> Also the host MSI reserved IOVA window cannot be used. Do you agree.

Yes, all regions reported in sysfs reserved_regions in the host would be
reported as RESV_T_RESERVED by virtio-iommu.

> I really think the spec should clarify what exact resv regions the
> device should return in case of VFIO device and non VFIO device.

Agreed. I will add something about RESV_T_RESERVED with the PCI bridge
example in Implementation Notes. Do you think the MSI examples at the end
need improvement as well? I can try to explain that RESV_MSI regions in
virtio-iommu are only those of the emulated platform, not the HW or SW MSI
regions from the host.

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/4] iommu/iova: Extend rbtree node caching

2017-09-19 Thread Robin Murphy
On 19/09/17 10:42, Leizhen (ThunderTown) wrote:
[...]
 static void
   __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova
 *free)
   {
   struct iova *cached_iova;
 -struct rb_node *curr;
 +struct rb_node **curr = NULL;
   -if (!iovad->cached32_node)
 -return;
 -curr = iovad->cached32_node;
 -cached_iova = rb_entry(curr, struct iova, node);
> 
> -
 +if (free->pfn_hi < iovad->dma_32bit_pfn)
 +curr = &iovad->cached32_node;
 +if (!curr)
 +curr = &iovad->cached_node;
>>
>> +if (!*curr)
>> +return;
> Is it necessary for us to try the following adjustment?
> + if (free->pfn_hi < iovad->dma_32bit_pfn)
> + curr = &iovad->cached32_node;
> + else
> + curr = &iovad->cached_node;
> +
> + if (!*curr) {
> + *curr = rb_next(&free->node);
> + return;
> + }

Yeah, I spotted that this looked a bit wonky after I posted it. It's
already cleaned up in v3, which I'll be posting shortly after I write up
some cover letters.

Thanks,
Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/ipmmu-vmsa: Do not replace bus IOMMU ops on driver init.

2017-09-19 Thread Liviu Dudau
On Tue, Sep 19, 2017 at 10:07:58AM +0300, Laurent Pinchart wrote:
> Hi Liviu,
> 
> Thank you for the patch.
> 
> On Monday, 18 September 2017 13:04:44 EEST Liviu Dudau wrote:
> > If the IPMMU driver is compiled in the kernel it will replace the
> > platform bus IOMMU ops on running the ipmmu_init() function, regardless
> > if there is any IPMMU hardware present or not. This screws up systems
> > that just want to build a generic kernel that runs on multiple platforms
> > and use a different IOMMU implementation.
> > 
> > Move the bus_set_iommu() call at the end of the ipmmu_probe() function
> > when we know that hardware is present. With current IOMMU framework it
> > should be safe (at least for OF case).
> 
> If I recall correctly the issue is that the IPMMU might be probed after bus 
> master devices when using the legacy IOMMU DT integration, which the ipmmu-
> vmsa driver does on ARM32. Calling bus_set_iommu() from the probe function 
> will then result in some devices losing IOMMU support.

This is on arm64, on the Arm Juno board.

> 
> > Now that the ipmmu_init() and ipmmu_exit() functions are simple calls to
> > platform_driver_register() and platform_driver_unregister(), replace
> > them with the module_platform_driver() macro call.
> > 
> > Signed-off-by: Liviu Dudau 
> > Cc: Laurent Pinchart 
> > ---
> >  drivers/iommu/ipmmu-vmsa.c | 29 +
> >  1 file changed, 5 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
> > index 2a38aa15be17d..c60569c74678d 100644
> > --- a/drivers/iommu/ipmmu-vmsa.c
> > +++ b/drivers/iommu/ipmmu-vmsa.c
> > @@ -1055,10 +1055,11 @@ static int ipmmu_probe(struct platform_device *pdev)
> > ipmmu_device_reset(mmu);
> > 
> > /*
> > -* We can't create the ARM mapping here as it requires the bus to have
> > -* an IOMMU, which only happens when bus_set_iommu() is called in
> > -* ipmmu_init() after the probe function returns.
> > +* Now that we have validated the presence of the hardware, set
> > +* the bus IOMMU ops to enable future domain and device setup.
> >  */
> > +   if (!iommu_present(&platform_bus_type))
> > +   bus_set_iommu(&platform_bus_type, &ipmmu_ops);
> > 
> > spin_lock(&ipmmu_devices_lock);
> > list_add(&mmu->list, &ipmmu_devices);
> 
> What branch is this patch based on ? ipmmu_devices_lock isn't in mainline.

drm-next. :)
Looks like things have move forward since I've made the patch before
LPC, will update and resend.

Best regards,
Liviu

> 
> > @@ -1100,27 +1101,7 @@ static struct platform_driver ipmmu_driver = {
> > .remove = ipmmu_remove,
> >  };
> > 
> > -static int __init ipmmu_init(void)
> > -{
> > -   int ret;
> > -
> > -   ret = platform_driver_register(&ipmmu_driver);
> > -   if (ret < 0)
> > -   return ret;
> > -
> > -   if (!iommu_present(&platform_bus_type))
> > -   bus_set_iommu(&platform_bus_type, &ipmmu_ops);
> > -
> > -   return 0;
> > -}
> > -
> > -static void __exit ipmmu_exit(void)
> > -{
> > -   return platform_driver_unregister(&ipmmu_driver);
> > -}
> > -
> > -subsys_initcall(ipmmu_init);
> > -module_exit(ipmmu_exit);
> > +module_platform_driver(ipmmu_driver);
> > 
> >  MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
> >  MODULE_AUTHOR("Laurent Pinchart ");
> 
> -- 
> Regards,
> 
> Laurent Pinchart
> 

-- 

| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---
¯\_(ツ)_/¯
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu: dmar: fix harmless section mismatch warning

2017-09-19 Thread Joerg Roedel
On Tue, Sep 12, 2017 at 10:10:21PM +0200, Arnd Bergmann wrote:
> Building with gcc-4.6 results in this warning due to
> dmar_table_print_dmar_entry being inlined as in newer compiler versions:
> 
> WARNING: vmlinux.o(.text+0x5c8bee): Section mismatch in reference from the 
> function dmar_walk_remapping_entries() to the function 
> .init.text:dmar_table_print_dmar_entry()
> The function dmar_walk_remapping_entries() references
> the function __init dmar_table_print_dmar_entry().
> This is often because dmar_walk_remapping_entries lacks a __init
> annotation or the annotation of dmar_table_print_dmar_entry is wrong.
> 
> This removes the __init annotation to avoid the warning. On compilers
> that don't show the warning today, this should have no impact since the
> function gets inlined anyway.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/iommu/dmar.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/4] iommu/iova: Extend rbtree node caching

2017-09-19 Thread Leizhen (ThunderTown)


On 2017/7/31 19:42, Robin Murphy wrote:
> Hi Nate,
> 
> On 29/07/17 04:57, Nate Watterson wrote:
>> Hi Robin,
>> I am seeing a crash when performing very basic testing on this series
>> with a Mellanox CX4 NIC. I dug into the crash a bit, and think this
>> patch is the culprit, but this rcache business is still mostly
>> witchcraft to me.
>>
>> # ifconfig eth5 up
>> # ifconfig eth5 down
>> Unable to handle kernel NULL pointer dereference at virtual address
>> 0020
>> user pgtable: 64k pages, 48-bit VAs, pgd = 8007dbf47c00
>> [0020] *pgd=0006efab0003, *pud=0006efab0003,
>> *pmd=0007d8720003, *pte=
>> Internal error: Oops: 9607 [#1] SMP
>> Modules linked in:
>> CPU: 47 PID: 5082 Comm: ifconfig Not tainted 4.13.0-rtp-enablement+ #3
>> task: 8007da1e5780 task.stack: 8007ddcb8000
>> PC is at __cached_rbnode_delete_update+0x2c/0x58
>> LR is at private_free_iova+0x2c/0x60
>> pc : [] lr : [] pstate: 204001c5
>> sp : 8007ddcbba00
>> x29: 8007ddcbba00 x28: 8007c8350210
>> x27: 8007d1a8 x26: 8007dcc20800
>> x25: 0140 x24: 8007c98f0008
>> x23: fe4e x22: 0140
>> x21: 8007c98f0008 x20: 8007c9adb240
>> x19: 8007c98f0018 x18: 0010
>> x17:  x16: 
>> x15: 4000 x14: 
>> x13:  x12: 0001
>> x11: dead0200 x10: 
>> x9 :  x8 : 8007c9adb1c0
>> x7 : 40002000 x6 : 00210d00
>> x5 :  x4 : c57e
>> x3 : ffcf x2 : ffcf
>> x1 : 8007c9adb240 x0 : 
>> [...]
>> [] __cached_rbnode_delete_update+0x2c/0x58
>> [] private_free_iova+0x2c/0x60
>> [] iova_magazine_free_pfns+0x4c/0xa0
>> [] free_iova_fast+0x1b0/0x230
>> [] iommu_dma_free_iova+0x5c/0x80
>> [] __iommu_dma_unmap+0x5c/0x98
>> [] iommu_dma_unmap_resource+0x24/0x30
>> [] iommu_dma_unmap_page+0xc/0x18
>> [] __iommu_unmap_page+0x40/0x60
>> [] mlx5e_page_release+0xbc/0x128
>> [] mlx5e_dealloc_rx_wqe+0x30/0x40
>> [] mlx5e_close_channel+0x70/0x1f8
>> [] mlx5e_close_channels+0x2c/0x50
>> [] mlx5e_close_locked+0x54/0x68
>> [] mlx5e_close+0x30/0x58
>> [...]
>>
>> ** Disassembly for __cached_rbnode_delete_update() near the fault **
>>   92|if (free->pfn_hi < iovad->dma_32bit_pfn)
>> 0852C6C4|ldr x3,[x1,#0x18]; x3,[free,#24]
>> 0852C6C8|ldr x2,[x0,#0x30]; x2,[iovad,#48]
>> 0852C6CC|cmp x3,x2
>> 0852C6D0|b.cs0x0852C708
>> |curr = &iovad->cached32_node;
>>   94|if (!curr)
>> 0852C6D4|addsx19,x0,#0x18 ; x19,iovad,#24
>> 0852C6D8|b.eq0x0852C708
>> |
>> |cached_iova = rb_entry(*curr, struct iova, node);
>> |
>>   99|if (free->pfn_lo >= cached_iova->pfn_lo)
>> 0852C6DC|ldr x0,[x19] ; xiovad,[curr]
>> 0852C6E0|ldr x2,[x1,#0x20]; x2,[free,#32]
>> 0852C6E4|ldr x0,[x0,#0x20]; x0,[x0,#32]
>> Apparently cached_iova was NULL so the pfn_lo access faulted.
>>
>> 0852C6E8|cmp x2,x0
>> 0852C6EC|b.cc0x0852C6FC
>> 0852C6F0|mov x0,x1; x0,free
>>  100|*curr = rb_next(&free->node);
>> After instrumenting the code a bit, this seems to be the culprit. In the
>> previous call, free->pfn_lo was 0x_ which is actually the
>> dma_limit for the domain so rb_next() returns NULL.
>>
>> Let me know if you have any questions or would like additional tests
>> run. I also applied your "DMA domain debug info" patches and dumped the
>> contents of the domain at each of the steps above in case that would be
>> useful. If nothing else, they reinforce how thirsty the CX4 NIC is
>> especially when using 64k pages and many CPUs.
> 
> Thanks for the report - I somehow managed to reason myself out of
> keeping the "no cached node" check in __cached_rbnode_delete_update() on
> the assumption that it must always be set by a previous allocation.
> However, there is indeed just one case case for which that fails: when
> you free any IOVA immediately after freeing the very topmost one. Which
> is something that freeing an entire magazine's worth of IOVAs back to
> the tree all at once has a very real chance of doing...
> 
> The obvious straightforward fix is inline below, but I'm now starting to
> understand the appeal of reserving a sentinel node to ensure the tree
> can never be empty, so I might have a quick go at that to see if it
> r

Re: [PATCH] iommu: Add missing dependencies

2017-09-19 Thread Joerg Roedel
On Sun, Sep 10, 2017 at 01:43:37PM -0700, Guenter Roeck wrote:
> parisc:allmodconfig, xtensa:allmodconfig, and possibly others generate
> the following Kconfig warning.
> 
> warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects
> IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT &&
> HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64))
> 
> IOMMU_IO_PGTABLE_LPAE depends on (COMPILE_TEST && !GENERIC_ATOMIC64),
> so any configuration option selecting it needs to have the same dependencies.
> 
> Signed-off-by: Guenter Roeck 
> ---
>  drivers/iommu/Kconfig | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->flush_queue

2017-09-19 Thread Joerg Roedel
Hi Sebastian,

On Wed, Sep 06, 2017 at 12:34:59PM +0200, Sebastian Andrzej Siewior wrote:
> get_cpu_ptr() disables preemption and returns the ->flush_queue object
> of the current CPU. raw_cpu_ptr() does the same except that it not
> disable preemption which means the scheduler can move it to another CPU
> after it obtained the per-CPU object.
> In this case this is not bad because the data structure itself is
> protected with a spin_lock. This change shouldn't matter in general
> but on RT it does because the sleeping lock can't be accessed with
> disabled preemption.

I moved the flushing to driver/iommu/iova.c to share it with the Intel
IOMMU and possibly other drivers too, so this patch does no longer apply
to v4.14-rc1. Can you update the patch to these changes?

Regards,

Joerg

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/2] Dual MMU support for TI DRA7xx DSPs

2017-09-19 Thread Joerg Roedel
On Tue, Sep 05, 2017 at 05:56:16PM -0500, Suman Anna wrote:
> Suman Anna (2):
>   iommu/omap: Change the attach detection logic
>   iommu/omap: Add support to program multiple iommus
> 
>  drivers/iommu/omap-iommu.c | 375 
> ++---
>  drivers/iommu/omap-iommu.h |  30 ++--
>  2 files changed, 296 insertions(+), 109 deletions(-)

Applied, thanks.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/iova: Make rcache flush optional on IOVA allocation failure

2017-09-19 Thread Tomasz Nowicki

Hi Nate,

On 19.09.2017 04:57, Nate Watterson wrote:

Hi Tomasz,

On 9/18/2017 12:02 PM, Robin Murphy wrote:

Hi Tomasz,

On 18/09/17 11:56, Tomasz Nowicki wrote:

Since IOVA allocation failure is not unusual case we need to flush
CPUs' rcache in hope we will succeed in next round.

However, it is useful to decide whether we need rcache flush step 
because

of two reasons:
- Not scalability. On large system with ~100 CPUs iterating and flushing
   rcache for each CPU becomes serious bottleneck so we may want to 
deffer it.

s/deffer/defer

- free_cpu_cached_iovas() does not care about max PFN we are 
interested in.

   Thus we may flush our rcaches and still get no new IOVA like in the
   commonly used scenario:

 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
 iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> 
shift);


 if (!iova)
 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);

1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to 
get

   PCI devices a SAC address
2. alloc_iova() fails due to full 32-bit space
3. rcaches contain PFNs out of 32-bit space so 
free_cpu_cached_iovas()

   throws entries away for nothing and alloc_iova() fails again
4. Next alloc_iova_fast() call cannot take advantage of rcache 
since we
   have just defeated caches. In this case we pick the slowest 
option

   to proceed.

This patch reworks flushed_rcache local flag to be additional function
argument instead and control rcache flush step. Also, it updates all 
users

to do the flush as the last chance.


Looks like you've run into the same thing Nate found[1] - I came up with
almost the exact same patch, only with separate alloc_iova_fast() and
alloc_iova_fast_noretry() wrapper functions, but on reflection, just
exposing the bool to callers is probably simpler. One nit, can you
document it in the kerneldoc comment too? With that:

Reviewed-by: Robin Murphy 

Thanks,
Robin.

[1]:https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg19758.html 


This patch completely resolves the issue I reported in [1]!!


I somehow missed your observations in [1] :/
Anyway, it's great it fixes performance for you too.


Tested-by: Nate Watterson 


Thanks!
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/iova: Make rcache flush optional on IOVA allocation failure

2017-09-19 Thread Tomasz Nowicki

Hi Robin,

On 18.09.2017 18:02, Robin Murphy wrote:

Hi Tomasz,

On 18/09/17 11:56, Tomasz Nowicki wrote:

Since IOVA allocation failure is not unusual case we need to flush
CPUs' rcache in hope we will succeed in next round.

However, it is useful to decide whether we need rcache flush step because
of two reasons:
- Not scalability. On large system with ~100 CPUs iterating and flushing
   rcache for each CPU becomes serious bottleneck so we may want to deffer it.
- free_cpu_cached_iovas() does not care about max PFN we are interested in.
   Thus we may flush our rcaches and still get no new IOVA like in the
   commonly used scenario:

 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
 iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);

 if (!iova)
 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);

1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get
   PCI devices a SAC address
2. alloc_iova() fails due to full 32-bit space
3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas()
   throws entries away for nothing and alloc_iova() fails again
4. Next alloc_iova_fast() call cannot take advantage of rcache since we
   have just defeated caches. In this case we pick the slowest option
   to proceed.

This patch reworks flushed_rcache local flag to be additional function
argument instead and control rcache flush step. Also, it updates all users
to do the flush as the last chance.


Looks like you've run into the same thing Nate found[1] - I came up with
almost the exact same patch, only with separate alloc_iova_fast() and
alloc_iova_fast_noretry() wrapper functions, but on reflection, just
exposing the bool to callers is probably simpler. One nit, can you
document it in the kerneldoc comment too? With that:

Reviewed-by: Robin Murphy 


Thanks! I will add missing comment.

Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: Seeing conflict with IPMMU driver under ACPI

2017-09-19 Thread Laurent Pinchart
Hi Ananth,

On Tuesday, 19 September 2017 09:54:49 EEST Jasty, Ananth wrote:
> On Sep 18, 2017, at 11:49 PM, Laurent Pinchart wrote:
> > On Tuesday, 19 September 2017 03:43:05 EEST Jasty, Ananth wrote:
> >> Hi, with your IPMMU driver enabled under 4.13 we’re seeing a crash on
> >> boot:
> >> 
> >> [ 13.785164] Unable to handle kernel NULL pointer dereference at virtual
> >> address 0018
> >> [ 13.793254] [0018] user address but active_mm is swapper
> >> [ 13.799600] Internal error: Oops: 9604 [#1] SMP
> >> [ 13.804466] Modules linked in: aes_neon_bs aes_neon_blk crypto_simd
> >> cryptd
> >> [ 13.811334] CPU: 152 PID: 1529 Comm: kworker/152:1 Not tainted
> >> 4.13.0-9-generic #10-Ubuntu
> >> [ 13.819584] Hardware name: Default string Cavium ThunderX2/Default
> >> string, BIOS 5.13 07/20/2017
> >> [ 13.828285] Workqueue: events deferred_probe_work_func
> >> [ 13.833410] task: 80bee93d task.stack: 80bee93dc000
> >> [ 13.839330] PC is at iommu_ops_from_fwnode+0x4c/0x90
> >> [ 13.844282] LR is at iommu_ops_from_fwnode+0x28/0x90
> >> 
> >> The ARM SMMUv3 driver (which our platform implements) seems to be losing
> >> iommu_ops to the IPMMU driver.
> > 
> > You seem not to be the first one to notice:
> > 
> > https://patchwork.kernel.org/patch/9956449/
> 
> Wow, he did not beat me by much.
> 
> >> Note: our platform uses ACPI for device enumeration.
> >> 
> >> I have no way to test this, but is there a reason the set_iommu isn’t in
> >> _probe?
> > 
> > I can't recall what the reason was I'm afraid.
> 
> arm-smmu-v3.c does do the set_iommu in probe, and thus far there have been
> no confirmed fatalities, but I can't speak authoritatively.
> 
> I'll leave you all to it then.

After digging a bit I realized that calling bus_set_iommu() from the init 
function is needed to support ARM32 (the same driver supports both ARM32 and 
ARM64). Let's discuss the matter in a reply to https://patchwork.kernel.org/
patch/9956449/.

-- 
Regards,

Laurent Pinchart
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/ipmmu-vmsa: Do not replace bus IOMMU ops on driver init.

2017-09-19 Thread Laurent Pinchart
Hi Liviu,

Thank you for the patch.

On Monday, 18 September 2017 13:04:44 EEST Liviu Dudau wrote:
> If the IPMMU driver is compiled in the kernel it will replace the
> platform bus IOMMU ops on running the ipmmu_init() function, regardless
> if there is any IPMMU hardware present or not. This screws up systems
> that just want to build a generic kernel that runs on multiple platforms
> and use a different IOMMU implementation.
> 
> Move the bus_set_iommu() call at the end of the ipmmu_probe() function
> when we know that hardware is present. With current IOMMU framework it
> should be safe (at least for OF case).

If I recall correctly the issue is that the IPMMU might be probed after bus 
master devices when using the legacy IOMMU DT integration, which the ipmmu-
vmsa driver does on ARM32. Calling bus_set_iommu() from the probe function 
will then result in some devices losing IOMMU support.

> Now that the ipmmu_init() and ipmmu_exit() functions are simple calls to
> platform_driver_register() and platform_driver_unregister(), replace
> them with the module_platform_driver() macro call.
> 
> Signed-off-by: Liviu Dudau 
> Cc: Laurent Pinchart 
> ---
>  drivers/iommu/ipmmu-vmsa.c | 29 +
>  1 file changed, 5 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
> index 2a38aa15be17d..c60569c74678d 100644
> --- a/drivers/iommu/ipmmu-vmsa.c
> +++ b/drivers/iommu/ipmmu-vmsa.c
> @@ -1055,10 +1055,11 @@ static int ipmmu_probe(struct platform_device *pdev)
> ipmmu_device_reset(mmu);
> 
>   /*
> -  * We can't create the ARM mapping here as it requires the bus to have
> -  * an IOMMU, which only happens when bus_set_iommu() is called in
> -  * ipmmu_init() after the probe function returns.
> +  * Now that we have validated the presence of the hardware, set
> +  * the bus IOMMU ops to enable future domain and device setup.
>*/
> + if (!iommu_present(&platform_bus_type))
> + bus_set_iommu(&platform_bus_type, &ipmmu_ops);
> 
>   spin_lock(&ipmmu_devices_lock);
>   list_add(&mmu->list, &ipmmu_devices);

What branch is this patch based on ? ipmmu_devices_lock isn't in mainline.

> @@ -1100,27 +1101,7 @@ static struct platform_driver ipmmu_driver = {
>   .remove = ipmmu_remove,
>  };
> 
> -static int __init ipmmu_init(void)
> -{
> - int ret;
> -
> - ret = platform_driver_register(&ipmmu_driver);
> - if (ret < 0)
> - return ret;
> -
> - if (!iommu_present(&platform_bus_type))
> - bus_set_iommu(&platform_bus_type, &ipmmu_ops);
> -
> - return 0;
> -}
> -
> -static void __exit ipmmu_exit(void)
> -{
> - return platform_driver_unregister(&ipmmu_driver);
> -}
> -
> -subsys_initcall(ipmmu_init);
> -module_exit(ipmmu_exit);
> +module_platform_driver(ipmmu_driver);
> 
>  MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
>  MODULE_AUTHOR("Laurent Pinchart ");

-- 
Regards,

Laurent Pinchart

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu