[PATCH v4 5/6] iommu/iova: Extend rbtree node caching
The cached node mechanism provides a significant performance benefit for allocations using a 32-bit DMA mask, but in the case of non-PCI devices or where the 32-bit space is full, the loss of this benefit can be significant - on large systems there can be many thousands of entries in the tree, such that walking all the way down to find free space every time becomes increasingly awful. Maintain a similar cached node for the whole IOVA space as a superset of the 32-bit space so that performance can remain much more consistent. Inspired by work by Zhen Lei . Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson Signed-off-by: Robin Murphy --- v4: - Adjust to simplified __get_cached_rbnode() behaviour - Cosmetic tweaks drivers/iommu/iova.c | 43 +-- include/linux/iova.h | 3 ++- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index c93a6c46bcb1..a125a5786dbf 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -51,6 +51,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; + iovad->cached_node = NULL; iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; @@ -119,39 +120,38 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) if (limit_pfn <= iovad->dma_32bit_pfn && iovad->cached32_node) return iovad->cached32_node; + if (iovad->cached_node) + return iovad->cached_node; + return &iovad->anchor.node; } static void -__cached_rbnode_insert_update(struct iova_domain *iovad, - unsigned long limit_pfn, struct iova *new) +__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) { - if (limit_pfn != iovad->dma_32bit_pfn) - return; - iovad->cached32_node = &new->node; + if (new->pfn_hi < iovad->dma_32bit_pfn) + iovad->cached32_node = &new->node; + else + iovad->cached_node = &new->node; } static void __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; - struct rb_node *curr; + struct rb_node **curr; - if (!iovad->cached32_node) + if (free->pfn_hi < iovad->dma_32bit_pfn) + curr = &iovad->cached32_node; + else + curr = &iovad->cached_node; + + if (!*curr) return; - curr = iovad->cached32_node; - cached_iova = rb_entry(curr, struct iova, node); - if (free->pfn_lo >= cached_iova->pfn_lo) { - struct rb_node *node = rb_next(&free->node); - struct iova *iova = rb_entry(node, struct iova, node); - - /* only cache if it's below 32bit pfn */ - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) - iovad->cached32_node = node; - else - iovad->cached32_node = NULL; - } + cached_iova = rb_entry(*curr, struct iova, node); + if (free->pfn_lo >= cached_iova->pfn_lo) + *curr = rb_next(&free->node); } /* Insert the iova into domain rbtree by holding writer lock */ @@ -189,7 +189,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, struct rb_node *curr, *prev; struct iova *curr_iova; unsigned long flags; - unsigned long saved_pfn, new_pfn; + unsigned long new_pfn; unsigned long align_mask = ~0UL; if (size_aligned) @@ -197,7 +197,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = rb_entry(curr, struct iova, node); do { @@ -218,7 +217,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* If we have 'prev', it's a valid place to start the insertion. */ iova_insert_rbtree(&iovad->rbroot, new, prev); - __cached_rbnode_insert_update(iovad, saved_pfn, new); + __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); diff --git a/include/linux/iova.h b/include/linux/iova.h index 22dc30a28387..5eaedf77b152 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -70,7 +70,8 @@ struct iova_fq { struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ + struct rb_node *cached_node; /* Save last alloced node */ + struct rb_node *cached32_node; /* Save last 3
[PATCH v4 4/6] iommu/iova: Simplify cached node logic
The logic of __get_cached_rbnode() is a little obtuse, but then __get_prev_node_of_cached_rbnode_or_last_node_and_update_limit_pfn() wouldn't exactly roll off the tongue... Now that we have the invariant that there is always a valid node to start searching downwards from, everything gets a bit easier to follow if we simplify that function to do what it says on the tin and return the cached node (or anchor node as appropriate) directly. In turn, we can then deduplicate the rb_prev() and limit_pfn logic into the main loop itself, further reduce the amount of code under the lock, and generally make the inner workings a bit less subtle. Signed-off-by: Robin Murphy --- v4: New drivers/iommu/iova.c | 42 ++ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 03b677afb109..c93a6c46bcb1 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -114,18 +114,12 @@ int init_iova_flush_queue(struct iova_domain *iovad, EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) +__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) { - if ((*limit_pfn > iovad->dma_32bit_pfn) || - (iovad->cached32_node == NULL)) - return rb_prev(&iovad->anchor.node); - else { - struct rb_node *prev_node = rb_prev(iovad->cached32_node); - struct iova *curr_iova = - rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo; - return prev_node; - } + if (limit_pfn <= iovad->dma_32bit_pfn && iovad->cached32_node) + return iovad->cached32_node; + + return &iovad->anchor.node; } static void @@ -192,7 +186,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { - struct rb_node *prev, *curr = NULL; + struct rb_node *curr, *prev; + struct iova *curr_iova; unsigned long flags; unsigned long saved_pfn, new_pfn; unsigned long align_mask = ~0UL; @@ -203,29 +198,20 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); saved_pfn = limit_pfn; - curr = __get_cached_rbnode(iovad, &limit_pfn); - prev = curr; - while (curr) { - struct iova *curr_iova = rb_entry(curr, struct iova, node); - - if (limit_pfn <= curr_iova->pfn_lo) - goto move_left; - - if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi) - break; /* found a free slot */ - - limit_pfn = curr_iova->pfn_lo; -move_left: + curr = __get_cached_rbnode(iovad, limit_pfn); + curr_iova = rb_entry(curr, struct iova, node); + do { + limit_pfn = min(limit_pfn, curr_iova->pfn_lo); + new_pfn = (limit_pfn - size) & align_mask; prev = curr; curr = rb_prev(curr); - } + curr_iova = rb_entry(curr, struct iova, node); + } while (curr && new_pfn <= curr_iova->pfn_hi); - new_pfn = (limit_pfn - size) & align_mask; if (limit_pfn < size || new_pfn < iovad->start_pfn) { spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return -ENOMEM; } - /* pfn_lo will point to size aligned address if size_aligned is set */ new->pfn_lo = new_pfn; new->pfn_hi = new->pfn_lo + size - 1; -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 6/6] iommu/iova: Make dma_32bit_pfn implicit
From: Zhen Lei Now that the cached node optimisation can apply to all allocations, the couple of users which were playing tricks with dma_32bit_pfn in order to benefit from it can stop doing so. Conversely, there is also no need for all the other users to explicitly calculate a 'real' 32-bit PFN, when init_iova_domain() can happily do that itself from the page granularity. CC: Thierry Reding CC: Jonathan Hunter CC: David Airlie CC: Sudeep Dutt CC: Ashutosh Dixit Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: use iova_shift(), rewrote commit message] Signed-off-by: Robin Murphy --- v4: No change drivers/gpu/drm/tegra/drm.c | 3 +-- drivers/gpu/host1x/dev.c | 3 +-- drivers/iommu/amd_iommu.c| 7 ++- drivers/iommu/dma-iommu.c| 18 +- drivers/iommu/intel-iommu.c | 11 +++ drivers/iommu/iova.c | 4 ++-- drivers/misc/mic/scif/scif_rma.c | 3 +-- include/linux/iova.h | 5 ++--- 8 files changed, 13 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 597d563d636a..b822e484b7e5 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) order = __ffs(tegra->domain->pgsize_bitmap); init_iova_domain(&tegra->carveout.domain, 1UL << order, -carveout_start >> order, -carveout_end >> order); +carveout_start >> order); tegra->carveout.shift = iova_shift(&tegra->carveout.domain); tegra->carveout.limit = carveout_end >> tegra->carveout.shift; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 7f22c5c37660..5267c62e8896 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev) order = __ffs(host->domain->pgsize_bitmap); init_iova_domain(&host->iova, 1UL << order, -geometry->aperture_start >> order, -geometry->aperture_end >> order); +geometry->aperture_start >> order); host->iova_end = geometry->aperture_end; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 51f8215877f5..647ab7691aee 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -63,7 +63,6 @@ /* IO virtual address start page frame number */ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) /* Reserved IOVA ranges */ #define MSI_RANGE_START(0xfee0) @@ -1788,8 +1787,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, -IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN); if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) goto free_dma_dom; @@ -2696,8 +2694,7 @@ static int init_reserved_iova_ranges(void) struct pci_dev *pdev = NULL; struct iova *val; - init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, -IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN); lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, &reserved_rbtree_key); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9d1cebe7f6cb..191be9c80a8a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* ...then finally give it a kicking to make sure it fits */ base_pfn = max_t(unsigned long, base_pfn, domain->geometry.aperture_start >> order); - end_pfn = min_t(unsigned long, end_pfn, - domain->geometry.aperture_end >> order); } - /* -* PCI devices may have larger DMA masks, but still prefer allocating -* within a 32-bit mask to avoid DAC addressing. Such limitations don't -* apply to the typical platform device, so for those we may as well -* leave the cache limit at the top of their range to save an rb_last() -* traversal on every allocation. -*/ - if (dev && dev_is_pci(dev)) - end_pfn &= DMA_BIT_MASK(32) >> order; /* start_pfn is always nonzero for an already-initialised domain */
[PATCH v4 3/6] iommu/iova: Add rbtree anchor node
Add a permanent dummy IOVA reservation to the rbtree, such that we can always access the top of the address space instantly. The immediate benefit is that we remove the overhead of the rb_last() traversal when not using the cached node, but it also paves the way for further simplifications. Signed-off-by: Robin Murphy --- v4: New drivers/iommu/iova.c | 15 +-- include/linux/iova.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 20be9a8b3188..03b677afb109 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -24,6 +24,9 @@ #include #include +/* The anchor node sits above the top of the usable address space */ +#define IOVA_ANCHOR~0UL + static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, unsigned long size); @@ -54,6 +57,9 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->dma_32bit_pfn = pfn_32bit + 1; iovad->flush_cb = NULL; iovad->fq = NULL; + iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; + rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); + rb_insert_color(&iovad->anchor.node, &iovad->rbroot); init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -112,7 +118,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) { if ((*limit_pfn > iovad->dma_32bit_pfn) || (iovad->cached32_node == NULL)) - return rb_last(&iovad->rbroot); + return rb_prev(&iovad->anchor.node); else { struct rb_node *prev_node = rb_prev(iovad->cached32_node); struct iova *curr_iova = @@ -246,7 +252,8 @@ EXPORT_SYMBOL(alloc_iova_mem); void free_iova_mem(struct iova *iova) { - kmem_cache_free(iova_cache, iova); + if (iova->pfn_lo != IOVA_ANCHOR) + kmem_cache_free(iova_cache, iova); } EXPORT_SYMBOL(free_iova_mem); @@ -680,6 +687,10 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; + /* Don't allow nonsensical pfns */ + if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad + return NULL; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { diff --git a/include/linux/iova.h b/include/linux/iova.h index d179b9bf7814..22dc30a28387 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -74,6 +74,7 @@ struct iova_domain { unsigned long granule;/* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 2/6] iommu/iova: Optimise the padding calculation
From: Zhen Lei The mask for calculating the padding size doesn't change, so there's no need to recalculate it every loop iteration. Furthermore, Once we've done that, it becomes clear that we don't actually need to calculate a padding size at all - by flipping the arithmetic around, we can just combine the upper limit, size, and mask directly to check against the lower limit. For an arm64 build, this alone knocks 20% off the object code size of the entire alloc_iova() function! Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: simplified more of the arithmetic, rewrote commit message] Signed-off-by: Robin Murphy --- v4: - Round align_mask up instead of down (oops!) - Remove redundant !curr check - Introduce new_pfn variable here to reduce churn in later patches drivers/iommu/iova.c | 42 +++--- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f129ff4f5c89..20be9a8b3188 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -182,24 +182,17 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, rb_insert_color(&iova->node, root); } -/* - * Computes the padding size required, to make the start address - * naturally aligned on the power-of-two order of its size - */ -static unsigned int -iova_get_pad_size(unsigned int size, unsigned int limit_pfn) -{ - return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); -} - static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { struct rb_node *prev, *curr = NULL; unsigned long flags; - unsigned long saved_pfn; - unsigned int pad_size = 0; + unsigned long saved_pfn, new_pfn; + unsigned long align_mask = ~0UL; + + if (size_aligned) + align_mask <<= fls_long(size - 1); /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); @@ -209,31 +202,26 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, while (curr) { struct iova *curr_iova = rb_entry(curr, struct iova, node); - if (limit_pfn <= curr_iova->pfn_lo) { + if (limit_pfn <= curr_iova->pfn_lo) goto move_left; - } else if (limit_pfn > curr_iova->pfn_hi) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) - break; /* found a free slot */ - } + + if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi) + break; /* found a free slot */ + limit_pfn = curr_iova->pfn_lo; move_left: prev = curr; curr = rb_prev(curr); } - if (!curr) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((iovad->start_pfn + size + pad_size) > limit_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; - } + new_pfn = (limit_pfn - size) & align_mask; + if (limit_pfn < size || new_pfn < iovad->start_pfn) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; } /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size); + new->pfn_lo = new_pfn; new->pfn_hi = new->pfn_lo + size - 1; /* If we have 'prev', it's a valid place to start the insertion. */ -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 0/6] Optimise 64-bit IOVA allocations
v3: https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg19694.html This was supposed to be just a rebase and repost, but in the meantime I kept scratching the itch and ended up with two extra patches to simplify the end result even more - the series was least churny with them added in the middle, but #5 and #6 remain essentially unchanged from v3, such that I'm happy the tested-bys all remain valid. Robin. Robin Murphy (3): iommu/iova: Add rbtree anchor node iommu/iova: Simplify cached node logic iommu/iova: Extend rbtree node caching Zhen Lei (3): iommu/iova: Optimise rbtree searching iommu/iova: Optimise the padding calculation iommu/iova: Make dma_32bit_pfn implicit drivers/gpu/drm/tegra/drm.c | 3 +- drivers/gpu/host1x/dev.c | 3 +- drivers/iommu/amd_iommu.c| 7 +- drivers/iommu/dma-iommu.c| 18 + drivers/iommu/intel-iommu.c | 11 +--- drivers/iommu/iova.c | 139 +-- drivers/misc/mic/scif/scif_rma.c | 3 +- include/linux/iova.h | 9 +-- 8 files changed, 74 insertions(+), 119 deletions(-) -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v4 1/6] iommu/iova: Optimise rbtree searching
From: Zhen Lei Checking the IOVA bounds separately before deciding which direction to continue the search (if necessary) results in redundantly comparing both pfns twice each. GCC can already determine that the final comparison op is redundant and optimise it down to 3 in total, but we can go one further with a little tweak of the ordering (which makes the intent of the code that much cleaner as a bonus). Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: rewrote commit message to clarify] Signed-off-by: Robin Murphy --- v4: No change drivers/iommu/iova.c | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 33edfa794ae9..f129ff4f5c89 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -342,15 +342,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) while (node) { struct iova *iova = rb_entry(node, struct iova, node); - /* If pfn falls within iova's range, return iova */ - if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { - return iova; - } - if (pfn < iova->pfn_lo) node = node->rb_left; - else if (pfn > iova->pfn_lo) + else if (pfn > iova->pfn_hi) node = node->rb_right; + else + return iova;/* pfn falls within iova's range */ } return NULL; -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v7 1/5] Doc: iommu/arm-smmu-v3: Add workaround for HiSilicon erratum 161010801
> -Original Message- > From: Rob Herring [mailto:r...@kernel.org] > Sent: Tuesday, September 19, 2017 3:53 PM > To: Shameerali Kolothum Thodi > Cc: lorenzo.pieral...@arm.com; marc.zyng...@arm.com; > sudeep.ho...@arm.com; will.dea...@arm.com; robin.mur...@arm.com; > j...@8bytes.org; mark.rutl...@arm.com; hanjun@linaro.org; Gabriele > Paoloni ; John Garry > ; iommu@lists.linux-foundation.org; linux-arm- > ker...@lists.infradead.org; linux-a...@vger.kernel.org; > devicet...@vger.kernel.org; de...@acpica.org; Linuxarm > ; Wangzhou (B) ; > Guohanjun (Hanjun Guo) > Subject: Re: [PATCH v7 1/5] Doc: iommu/arm-smmu-v3: Add workaround for > HiSilicon erratum 161010801 > > On Thu, Sep 14, 2017 at 01:57:52PM +0100, Shameer Kolothum wrote: > > From: John Garry > > > > The HiSilicon erratum 161010801 describes the limitation of HiSilicon > platforms > > hip06/hip07 to support the SMMU mappings for MSI transactions. > > > > On these platforms, GICv3 ITS translator is presented with the deviceID > > by extending the MSI payload data to 64 bits to include the deviceID. > > Hence, the PCIe controller on this platforms has to differentiate the MSI > > payload against other DMA payload and has to modify the MSI payload. > > This basically makes it difficult for this platforms to have a SMMU > > translation for MSI. > > > > This patch adds a SMMUv3 binding to flag that the SMMU breaks msi > > translation at ITS. > > > > Also, the arm64 silicon errata is updated with this same erratum. > > > > Signed-off-by: John Garry > > Signed-off-by: Shameer Kolothum > [...] > > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > > @@ -55,6 +55,9 @@ the PCIe specification. > > - hisilicon,broken-prefetch-cmd > > : Avoid sending CMD_PREFETCH_* commands to the SMMU. > > > > +- hisilicon,broken-untranslated-msi > > +: Reserve ITS HW region to avoid translating msi. > > + > > This should be determined from the compatible string. Continuing to add > properties for each errata doesn't scale. Ok. I think the suggestion here is to follow the arm-smmu.c (SMMUv1/v2) driver way of implementing the errata. As you might have noticed, the SMMUv3 driver dt errata framework depends on properties and this will change the way errata is implemented in the driver now. Hi Will/Robin, Could you please take a look and let us know your thoughts on changing the SMMUv3 dt errata implementation to version/model/compatible string framework for this quirk. Thanks, Shameer ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v7 1/5] Doc: iommu/arm-smmu-v3: Add workaround for HiSilicon erratum 161010801
On Thu, Sep 14, 2017 at 01:57:52PM +0100, Shameer Kolothum wrote: > From: John Garry > > The HiSilicon erratum 161010801 describes the limitation of HiSilicon > platforms > hip06/hip07 to support the SMMU mappings for MSI transactions. > > On these platforms, GICv3 ITS translator is presented with the deviceID > by extending the MSI payload data to 64 bits to include the deviceID. > Hence, the PCIe controller on this platforms has to differentiate the MSI > payload against other DMA payload and has to modify the MSI payload. > This basically makes it difficult for this platforms to have a SMMU > translation for MSI. > > This patch adds a SMMUv3 binding to flag that the SMMU breaks msi > translation at ITS. > > Also, the arm64 silicon errata is updated with this same erratum. > > Signed-off-by: John Garry > Signed-off-by: Shameer Kolothum > --- > Documentation/arm64/silicon-errata.txt | 1 + > Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt | 3 +++ > 2 files changed, 4 insertions(+) > > diff --git a/Documentation/arm64/silicon-errata.txt > b/Documentation/arm64/silicon-errata.txt > index 66e8ce1..02816b1 100644 > --- a/Documentation/arm64/silicon-errata.txt > +++ b/Documentation/arm64/silicon-errata.txt > @@ -70,6 +70,7 @@ stable kernels. > || | | > | > | Hisilicon | Hip0{5,6,7} | #161010101 | > HISILICON_ERRATUM_161010101 | > | Hisilicon | Hip0{6,7} | #161010701 | N/A > | > +| Hisilicon | Hip0{6,7} | #161010801 | N/A > | > || | | > | > | Qualcomm Tech. | Falkor v1 | E1003 | > QCOM_FALKOR_ERRATUM_1003| > | Qualcomm Tech. | Falkor v1 | E1009 | > QCOM_FALKOR_ERRATUM_1009| > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > index c9abbf3..1f5f7f9 100644 > --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt > @@ -55,6 +55,9 @@ the PCIe specification. > - hisilicon,broken-prefetch-cmd > : Avoid sending CMD_PREFETCH_* commands to the SMMU. > > +- hisilicon,broken-untranslated-msi > +: Reserve ITS HW region to avoid translating msi. > + This should be determined from the compatible string. Continuing to add properties for each errata doesn't scale. > - cavium,cn9900-broken-page1-regspace > : Replaces all page 1 offsets used for EVTQ_PROD/CONS, > PRIQ_PROD/CONS register access with page 0 offsets. > -- > 1.9.1 > > > -- > To unsubscribe from this list: send the line "unsubscribe devicetree" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC] iommu: arm-smmu: stall support
On Tue, Sep 19, 2017 at 8:30 AM, Joerg Roedel wrote: > Hi Rob, > > thanks for the RFC patch. I have some comments about the interface to > the IOMMU-API below. > > On Thu, Sep 14, 2017 at 03:44:33PM -0400, Rob Clark wrote: >> +/** >> + * iommu_domain_resume - Resume translations for a domain after a fault. >> + * >> + * This can be called at some point after the fault handler is called, >> + * allowing the user of the IOMMU to (for example) handle the fault >> + * from a task context. It is illegal to call this if >> + * iommu_domain_set_attr(STALL) failed. >> + * >> + * @domain:the domain to resume >> + * @terminate: if true, the translation that triggered the fault should >> + *be terminated, else it should be retried. >> + */ >> +void iommu_domain_resume(struct iommu_domain *domain, bool terminate) >> +{ >> + /* invalid to call if iommu_domain_set_attr(STALL) failed: */ >> + if (WARN_ON(!domain->ops->domain_resume)) >> + return; >> + domain->ops->domain_resume(domain, terminate); >> +} >> +EXPORT_SYMBOL_GPL(iommu_domain_resume); > > So this function is being called by the device driver owning the domain, > right? yes, this was my plan > I don't think that the resume call-back you added needs to be exposed > like this. It is better to do the page-fault handling completly in the > iommu-code, including calling the resume call-back and just let the > device-driver provide a per-domain call-back to let it handle the fault > and map in the required pages. I would like to decide in the IRQ whether or not to queue work or not, because when we get a gpu fault, we tend to get 1000's of gpu faults all at once (and I really only need to handle the first one). I suppose that could also be achieved by having a special return value from the fault handler to say "call me again from a wq".. Note that in the drm driver I already have a suitable wq to queue the work, so it really doesn't buy me anything to have the iommu driver toss things off to a wq for me. Might be a different situation for other drivers (but I guess mostly other drivers are using iommu API indirectly via dma-mapping?) > The interface could look like this: > > * New function iommu_domain_enable_stalls(domain) - When > this function returns the domain is in stall-handling mode. A > iommu_domain_disable_stalls() might make sense too, not sure > about that. I don't particularly see a use-case for disabling stalls, fwiw BR, -R > * When stalls are enabled for a domain, report_iommu_fault() > queues the fault to a workqueue (so that its handler can > block) and in the workqueue you call ->resume() based on the > return value of the handler. > > As a side-note, as there has been discussion on this: For now it doesn't > make sense to merge this with the SVM page-fault handling efforts, as > this path is different enough (SVM will call handle_mm_fault() as the > handler, for example). > > > Regards, > > Joerg > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH 1/1] iommu/arm-smmu: Add support for multiple TBU child devices
On Tue, Sep 12, 2017 at 05:31:07PM +0530, Vivek Gautam wrote: > ARM MMU-500 implements a TBU (uTLB) for each connected master > besides a single TCU which controls and manages the address > translations. Each of these TBUs can either be in the same > power domain as the master, or they can have a independent > power switch. > This design addresses the challenges to control TBU power. > Adding child devices for each TBUs present in the configuration > lets us control the power and clocks to TLBs having individual > power domains. The device link between master devices (such as, > display, and GPU) and TBU devices ensures that the master takes > care of powering the smmu as long as it's available. > When the master is not available, the TBUs are identified with > sid and powered on. > > Signed-off-by: Vivek Gautam > --- > > - The idea behind this patch is to handle the distributed smmu >architectures, similar to MMU-500. > - Untested yet. > - There are still few instances where the correct tbu device has >to be referenced and thus powered on to handle TLB maintenance >operations. > > .../devicetree/bindings/iommu/arm,smmu.txt | 27 +++ > drivers/iommu/arm-smmu.c | 191 > +++-- > 2 files changed, 205 insertions(+), 13 deletions(-) > > diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt > b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > index d97a6bc8e608..7cf67e75022e 100644 > --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt > +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt > @@ -98,6 +98,18 @@ conditions. >accessed before master gets enabled and linked to its >SMMU. > > +- child nodes:ARM MMU-500 implements a TBU (page table cache or TLB) for > + each connected master besides a single TCU that controls > + and manages the address translations. > + Each of the child nodes represents a TBU that is attached > to > + the master. This child node will have following property: > + > + - compatibe: must be "arm,mmu-500-tbu" for TBU child nodes of > arm,mmu-500 > + smmu. > + - stream-id-range: array representing the starting stream id and the number > + of supported stream-ids. This gives information about > + the range of stream-ids that are supported by this TBU. Needs a vendor prefix. Also need to document reg property. What does reg represent? If just an index with no correlation to h/w numbering, then perhaps stream ids could be put into reg instead. Rob ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 2/3] iommu/iova: Make rcache limit_pfn handling more robust
When popping a pfn from an rcache, we are currently checking it directly against limit_pfn for viability. Since this represents iova->pfn_lo, it is technically possible for the corresponding iova->pfn_hi to be greater than limit_pfn. Although we generally get away with it in practice since limit_pfn is typically a power-of-two boundary and the IOVAs are size-aligned, it's pretty trivial to make the iova_rcache_get() path take the allocation size into account for complete safety. Signed-off-by: Robin Murphy --- drivers/iommu/iova.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 35dde0fc7793..8f8b436afd81 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -411,7 +411,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long iova_pfn; struct iova *new_iova; - iova_pfn = iova_rcache_get(iovad, size, limit_pfn); + iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); if (iova_pfn) return iova_pfn; @@ -828,7 +828,7 @@ static unsigned long iova_magazine_pop(struct iova_magazine *mag, { BUG_ON(iova_magazine_empty(mag)); - if (mag->pfns[mag->size - 1] >= limit_pfn) + if (mag->pfns[mag->size - 1] > limit_pfn) return 0; return mag->pfns[--mag->size]; @@ -982,7 +982,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) return 0; - return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); + return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); } /* -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 3/3] iommu/iova: Try harder to allocate from rcache magazine
When devices with different DMA masks are using the same domain, or for PCI devices where we usually try a speculative 32-bit allocation first, there is a fair possibility that the top PFN of the rcache stack at any given time may be unsuitable for the lower limit, prompting a fallback to allocating anew from the rbtree. Consequently, we may end up artifically increasing pressure on the 32-bit IOVA space as unused IOVAs accumulate lower down in the rcache stacks, while callers with 32-bit masks also impose unnecessary rbtree overhead. In such cases, let's try a bit harder to satisfy the allocation locally first - scanning the whole stack should still be relatively inexpensive, and even rotating an entry up from the very bottom probably has less overall impact than going to the rbtree. Signed-off-by: Robin Murphy --- drivers/iommu/iova.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 8f8b436afd81..a7af8273fa98 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -826,12 +826,25 @@ static bool iova_magazine_empty(struct iova_magazine *mag) static unsigned long iova_magazine_pop(struct iova_magazine *mag, unsigned long limit_pfn) { + int i; + unsigned long pfn; + BUG_ON(iova_magazine_empty(mag)); - if (mag->pfns[mag->size - 1] > limit_pfn) - return 0; + /* +* If we can pull a suitable pfn from anywhere in the stack, that's +* still probably preferable to falling back to the rbtree. +*/ + for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) + if (i == 0) + return 0; - return mag->pfns[--mag->size]; + pfn = mag->pfns[i]; + mag->size--; + for (; i < mag->size; i++) + mag->pfns[i] = mag->pfns[i + 1]; + + return pfn; } static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 0/3] Misc IOVA tweaks
While I was elbow-deep in the IOVA code, a few other things came to light which don't really fit into the rbtree optimisation series. Patches #1 and #2 are more or less just cleanup, while patch #3 complements Tomasz' recent PCI allocation patch as it aims to potentially improve the same situation. Last time I checked, these should all apply independently and without major conflicts against any other in-flight IOVA patches. Robin. Robin Murphy (3): iommu/iova: Simplify domain destruction iommu/iova: Make rcache limit_pfn handling more robust iommu/iova: Try harder to allocate from rcache magazine drivers/iommu/iova.c | 73 1 file changed, 28 insertions(+), 45 deletions(-) -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/3] iommu/iova: Simplify domain destruction
All put_iova_domain() should have to worry about is freeing memory - by that point the domain must no longer be live, so the act of cleaning up doesn't need to be concurrency-safe or maintain the rbtree in a self-consistent state. There's no need to waste time with locking or emptying the rcache magazines, and we can just use the postorder traversal helper to clear out the remaining rbtree entries in-place. Signed-off-by: Robin Murphy --- drivers/iommu/iova.c | 50 ++ 1 file changed, 10 insertions(+), 40 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a6cf775f75e0..35dde0fc7793 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -588,21 +588,12 @@ EXPORT_SYMBOL_GPL(queue_iova); */ void put_iova_domain(struct iova_domain *iovad) { - struct rb_node *node; - unsigned long flags; + struct iova *iova, *tmp; free_iova_flush_queue(iovad); free_iova_rcaches(iovad); - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = rb_first(&iovad->rbroot); - while (node) { - struct iova *iova = rb_entry(node, struct iova, node); - - rb_erase(node, &iovad->rbroot); + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) free_iova_mem(iova); - node = rb_first(&iovad->rbroot); - } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } EXPORT_SYMBOL_GPL(put_iova_domain); @@ -995,46 +986,25 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, } /* - * Free a cpu's rcache. - */ -static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, -struct iova_rcache *rcache) -{ - struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); - unsigned long flags; - - spin_lock_irqsave(&cpu_rcache->lock, flags); - - iova_magazine_free_pfns(cpu_rcache->loaded, iovad); - iova_magazine_free(cpu_rcache->loaded); - - iova_magazine_free_pfns(cpu_rcache->prev, iovad); - iova_magazine_free(cpu_rcache->prev); - - spin_unlock_irqrestore(&cpu_rcache->lock, flags); -} - -/* * free rcache data structures. */ static void free_iova_rcaches(struct iova_domain *iovad) { struct iova_rcache *rcache; - unsigned long flags; + struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; int i, j; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; - for_each_possible_cpu(cpu) - free_cpu_iova_rcache(cpu, iovad, rcache); - spin_lock_irqsave(&rcache->lock, flags); - free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); - iova_magazine_free(rcache->depot[j]); + for_each_possible_cpu(cpu) { + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + iova_magazine_free(cpu_rcache->loaded); + iova_magazine_free(cpu_rcache->prev); } - spin_unlock_irqrestore(&rcache->lock, flags); + free_percpu(rcache->cpu_rcaches); + for (j = 0; j < rcache->depot_size; ++j) + iova_magazine_free(rcache->depot[j]); } } -- 2.13.4.dirty ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: QCOM_IOMMU should depend on HAS_DMA
On Mon, Sep 11, 2017 at 02:34:34PM +0200, Geert Uytterhoeven wrote: > > Fixes: 0ae349a0f33fb040 ("iommu/qcom: Add qcom_iommu") > Signed-off-by: Geert Uytterhoeven > --- > drivers/iommu/Kconfig | 1 + > 1 file changed, 1 insertion(+) Applied, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/exynos: Rework runtime PM links management
On Fri, Sep 15, 2017 at 01:05:08PM +0200, Marek Szyprowski wrote: > add_device is a bit more suitable for establishing runtime PM links than > the xlate callback. This change also makes it possible to implement proper > cleanup - in remove_device callback. > > Signed-off-by: Marek Szyprowski > --- > drivers/iommu/exynos-iommu.c | 23 --- > 1 file changed, 16 insertions(+), 7 deletions(-) Applied, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC] iommu: arm-smmu: stall support
Hi Rob, thanks for the RFC patch. I have some comments about the interface to the IOMMU-API below. On Thu, Sep 14, 2017 at 03:44:33PM -0400, Rob Clark wrote: > +/** > + * iommu_domain_resume - Resume translations for a domain after a fault. > + * > + * This can be called at some point after the fault handler is called, > + * allowing the user of the IOMMU to (for example) handle the fault > + * from a task context. It is illegal to call this if > + * iommu_domain_set_attr(STALL) failed. > + * > + * @domain:the domain to resume > + * @terminate: if true, the translation that triggered the fault should > + *be terminated, else it should be retried. > + */ > +void iommu_domain_resume(struct iommu_domain *domain, bool terminate) > +{ > + /* invalid to call if iommu_domain_set_attr(STALL) failed: */ > + if (WARN_ON(!domain->ops->domain_resume)) > + return; > + domain->ops->domain_resume(domain, terminate); > +} > +EXPORT_SYMBOL_GPL(iommu_domain_resume); So this function is being called by the device driver owning the domain, right? I don't think that the resume call-back you added needs to be exposed like this. It is better to do the page-fault handling completly in the iommu-code, including calling the resume call-back and just let the device-driver provide a per-domain call-back to let it handle the fault and map in the required pages. The interface could look like this: * New function iommu_domain_enable_stalls(domain) - When this function returns the domain is in stall-handling mode. A iommu_domain_disable_stalls() might make sense too, not sure about that. * When stalls are enabled for a domain, report_iommu_fault() queues the fault to a workqueue (so that its handler can block) and in the workqueue you call ->resume() based on the return value of the handler. As a side-note, as there has been discussion on this: For now it doesn't make sense to merge this with the SVM page-fault handling efforts, as this path is different enough (SVM will call handle_mm_fault() as the handler, for example). Regards, Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/arm-smmu-v3: Avoid ILLEGAL setting of STE.S1STALLD
On 14/09/17 06:08, Yisheng Xie wrote: > According to Spec, it is ILLEGAL to set STE.S1STALLD if STALL_MODEL > is not 0b00, which means we should not disable stall mode if stall > or terminate mode is not configuable. > > As Jean-Philippe's suggestion, this patch introduce a feature bit > ARM_SMMU_FEAT_STALL_FORCE, which means smmu only supports stall force. > Therefore, we can avoid the ILLEGAL setting of STE.S1STALLD.by checking > ARM_SMMU_FEAT_STALL_FORCE. > > This patch keeps the ARM_SMMU_FEAT_STALLS as the meaning of stall supported > (force or configuable) to easy to expand the future function, i.e. we can > only use ARM_SMMU_FEAT_STALLS to check whether we should register fault > handle or enable master can_stall, etc to supporte platform SVM. > > After apply this patch, the feature bit and S1STALLD setting will be like: > STALL_MODEL FEATURE S1STALLD > 0b00 ARM_SMMU_FEAT_STALLS 0b1 > 0b01 !ARM_SMMU_FEAT_STALLS && !ARM_SMMU_FEAT_STALL_FORCE 0b0 > 0b10 ARM_SMMU_FEAT_STALLS && ARM_SMMU_FEAT_STALL_FORCE0b0 Thanks for the patch. Since it's the same problem, could you also fix the context descriptor value? The spec says, in 5.5 Fault configuration: "A CD (Stage 1 translation enabled) is considered ILLEGAL if one of the following applies: * SMMU_(S_)IDR0.STALL_MODEL == 0b10 and CD.S == 0." So I think we should always set CD.S if the SMMU has STALL_FORCE. As Will pointed out, more work is needed for STALL_FORCE. We can't enable translation at all for devices that don't support stalling (e.g. PCI). We should force them into bypass or abort mode depending on the config. Maybe we can fix that later, after the devicetree property is added. > Signed-off-by: Yisheng Xie > --- > drivers/iommu/arm-smmu-v3.c | 11 +++ > 1 file changed, 7 insertions(+), 4 deletions(-) > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c > index e67ba6c..d2a3627 100644 > --- a/drivers/iommu/arm-smmu-v3.c > +++ b/drivers/iommu/arm-smmu-v3.c > @@ -603,7 +603,8 @@ struct arm_smmu_device { > #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9) > #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10) > #define ARM_SMMU_FEAT_STALLS (1 << 11) > -#define ARM_SMMU_FEAT_HYP(1 << 12) > +#define ARM_SMMU_FEAT_STALL_FORCE(1 << 12) > +#define ARM_SMMU_FEAT_HYP(1 << 13) We probably should keep the feature bits backward compatible and only add new ones at the end. It's not ABI, but it's printed at boot time and I sometimes use them when inspecting the kernel output to see what an SMMU supports. Thanks, Jean > u32 features; > > #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) > @@ -1112,7 +1113,8 @@ static void arm_smmu_write_strtab_ent(struct > arm_smmu_device *smmu, u32 sid, > #endif >STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); > > - if (smmu->features & ARM_SMMU_FEAT_STALLS) > + if (smmu->features & ARM_SMMU_FEAT_STALLS && > +!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK > @@ -2536,9 +2538,10 @@ static int arm_smmu_device_hw_probe(struct > arm_smmu_device *smmu) >coherent ? "true" : "false"); > > switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { > - case IDR0_STALL_MODEL_STALL: > - /* Fallthrough */ > case IDR0_STALL_MODEL_FORCE: > + smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; > + /* Fallthrough */ > + case IDR0_STALL_MODEL_STALL: > smmu->features |= ARM_SMMU_FEAT_STALLS; > } > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/ipmmu-vmsa: Do not replace bus IOMMU ops on driver init.
Hi Laurent, On 19/09/17 08:07, Laurent Pinchart wrote: > Hi Liviu, > > Thank you for the patch. > > On Monday, 18 September 2017 13:04:44 EEST Liviu Dudau wrote: >> If the IPMMU driver is compiled in the kernel it will replace the >> platform bus IOMMU ops on running the ipmmu_init() function, regardless >> if there is any IPMMU hardware present or not. This screws up systems >> that just want to build a generic kernel that runs on multiple platforms >> and use a different IOMMU implementation. >> >> Move the bus_set_iommu() call at the end of the ipmmu_probe() function >> when we know that hardware is present. With current IOMMU framework it >> should be safe (at least for OF case). > > If I recall correctly the issue is that the IPMMU might be probed after bus > master devices when using the legacy IOMMU DT integration, which the ipmmu- > vmsa driver does on ARM32. Calling bus_set_iommu() from the probe function > will then result in some devices losing IOMMU support. Yes, that used to be the problem, but since IPMMU uses the generic bindings and iommu_device_register(), it should now benefit from client probe-deferral even when it doesn't support of_xlate(). Robin. >> Now that the ipmmu_init() and ipmmu_exit() functions are simple calls to >> platform_driver_register() and platform_driver_unregister(), replace >> them with the module_platform_driver() macro call. >> >> Signed-off-by: Liviu Dudau >> Cc: Laurent Pinchart >> --- >> drivers/iommu/ipmmu-vmsa.c | 29 + >> 1 file changed, 5 insertions(+), 24 deletions(-) >> >> diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c >> index 2a38aa15be17d..c60569c74678d 100644 >> --- a/drivers/iommu/ipmmu-vmsa.c >> +++ b/drivers/iommu/ipmmu-vmsa.c >> @@ -1055,10 +1055,11 @@ static int ipmmu_probe(struct platform_device *pdev) >> ipmmu_device_reset(mmu); >> >> /* >> - * We can't create the ARM mapping here as it requires the bus to have >> - * an IOMMU, which only happens when bus_set_iommu() is called in >> - * ipmmu_init() after the probe function returns. >> + * Now that we have validated the presence of the hardware, set >> + * the bus IOMMU ops to enable future domain and device setup. >> */ >> +if (!iommu_present(&platform_bus_type)) >> +bus_set_iommu(&platform_bus_type, &ipmmu_ops); >> >> spin_lock(&ipmmu_devices_lock); >> list_add(&mmu->list, &ipmmu_devices); > > What branch is this patch based on ? ipmmu_devices_lock isn't in mainline. > >> @@ -1100,27 +1101,7 @@ static struct platform_driver ipmmu_driver = { >> .remove = ipmmu_remove, >> }; >> >> -static int __init ipmmu_init(void) >> -{ >> -int ret; >> - >> -ret = platform_driver_register(&ipmmu_driver); >> -if (ret < 0) >> -return ret; >> - >> -if (!iommu_present(&platform_bus_type)) >> -bus_set_iommu(&platform_bus_type, &ipmmu_ops); >> - >> -return 0; >> -} >> - >> -static void __exit ipmmu_exit(void) >> -{ >> -return platform_driver_unregister(&ipmmu_driver); >> -} >> - >> -subsys_initcall(ipmmu_init); >> -module_exit(ipmmu_exit); >> +module_platform_driver(ipmmu_driver); >> >> MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); >> MODULE_AUTHOR("Laurent Pinchart "); > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC] virtio-iommu version 0.4
Hi Eric, On 12/09/17 18:13, Auger Eric wrote: > 2.6.7 > - As I am currently integrating v0.4 in QEMU here are some other comments: > At the moment struct virtio_iommu_req_probe flags is missing in your > header. As such I understood the ACK protocol was not implemented by the > driver in your branch. Uh indeed. And yet I could swear I've written that code... somewhere. I will add it to the batch of v0.5 changes, it shouldn't be too invasive. > - VIRTIO_IOMMU_PROBE_PROPERTY_TYPE_MASK is VIRTIO_IOMMU_T_MASK in your > header too. Yes, keeping VIRTIO_IOMMU_PROBE_PROPERTY_TYPE_MASK is probably best (though it is a mouthful). > 2.6.8.2: > - I am really confused about what the device should report as resv > regions depending on the PE nature (VFIO or not VFIO) > > In other iommu drivers, the resv regions are populated by the iommu > driver through its get_resv_regions callback. They are usually composed > of an iommu specific MSI region (mapped or bypassed) and non IOMMU > specific (device specific) reserved regions: > iommu_dma_get_resv_regions(). In the case of virtio-iommu driver, those > are the guest reserved regions. > > First in the current virtio-iommu driver I don't see the > iommu_dma_get_resv_regions call. Do you agree that the virtio-iommu > driver should compute the non IOMMU specific MSI regions. ie. this is > not the responsability of the virtio-iommu device. For SW_MSI, certainly. The driver allocates a fixed IOVA region for mapping the MSI doorbell. But the driver has to know whether the doorbell region is translated or bypassed. > Then why is it more the job of the device to return the guest iommu > specific region rather than the driver itself? The MSI region is architectural on x86 IOMMUs, but implementation-defined on virtio-iommu. It depends which platform the host is emulating. In Linux, x86 IOMMU drivers register the bypass region because there always is an IOAPIC on the other end, with a fixed MSI address. But virtio-iommu may be either behind a GIC, an APIC or some other IRQ chip. The driver *could* go over all the irqchips/platforms it knows and try to guess if there is a fixed doorbell or if it needs to reserve an IOVA for them, but it would look horrible. I much prefer having a well-defined way of doing this, so a description from the device. > Then I understand this is the responsability of the virtio-iommu device > to gather information about the host resv regions in case of VFIO EP. > Typically the host PCIe host bridge windows cannot be used for IOVA. > Also the host MSI reserved IOVA window cannot be used. Do you agree. Yes, all regions reported in sysfs reserved_regions in the host would be reported as RESV_T_RESERVED by virtio-iommu. > I really think the spec should clarify what exact resv regions the > device should return in case of VFIO device and non VFIO device. Agreed. I will add something about RESV_T_RESERVED with the PCI bridge example in Implementation Notes. Do you think the MSI examples at the end need improvement as well? I can try to explain that RESV_MSI regions in virtio-iommu are only those of the emulated platform, not the HW or SW MSI regions from the host. Thanks, Jean ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 3/4] iommu/iova: Extend rbtree node caching
On 19/09/17 10:42, Leizhen (ThunderTown) wrote: [...] static void __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; -struct rb_node *curr; +struct rb_node **curr = NULL; -if (!iovad->cached32_node) -return; -curr = iovad->cached32_node; -cached_iova = rb_entry(curr, struct iova, node); > > - +if (free->pfn_hi < iovad->dma_32bit_pfn) +curr = &iovad->cached32_node; +if (!curr) +curr = &iovad->cached_node; >> >> +if (!*curr) >> +return; > Is it necessary for us to try the following adjustment? > + if (free->pfn_hi < iovad->dma_32bit_pfn) > + curr = &iovad->cached32_node; > + else > + curr = &iovad->cached_node; > + > + if (!*curr) { > + *curr = rb_next(&free->node); > + return; > + } Yeah, I spotted that this looked a bit wonky after I posted it. It's already cleaned up in v3, which I'll be posting shortly after I write up some cover letters. Thanks, Robin. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/ipmmu-vmsa: Do not replace bus IOMMU ops on driver init.
On Tue, Sep 19, 2017 at 10:07:58AM +0300, Laurent Pinchart wrote: > Hi Liviu, > > Thank you for the patch. > > On Monday, 18 September 2017 13:04:44 EEST Liviu Dudau wrote: > > If the IPMMU driver is compiled in the kernel it will replace the > > platform bus IOMMU ops on running the ipmmu_init() function, regardless > > if there is any IPMMU hardware present or not. This screws up systems > > that just want to build a generic kernel that runs on multiple platforms > > and use a different IOMMU implementation. > > > > Move the bus_set_iommu() call at the end of the ipmmu_probe() function > > when we know that hardware is present. With current IOMMU framework it > > should be safe (at least for OF case). > > If I recall correctly the issue is that the IPMMU might be probed after bus > master devices when using the legacy IOMMU DT integration, which the ipmmu- > vmsa driver does on ARM32. Calling bus_set_iommu() from the probe function > will then result in some devices losing IOMMU support. This is on arm64, on the Arm Juno board. > > > Now that the ipmmu_init() and ipmmu_exit() functions are simple calls to > > platform_driver_register() and platform_driver_unregister(), replace > > them with the module_platform_driver() macro call. > > > > Signed-off-by: Liviu Dudau > > Cc: Laurent Pinchart > > --- > > drivers/iommu/ipmmu-vmsa.c | 29 + > > 1 file changed, 5 insertions(+), 24 deletions(-) > > > > diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c > > index 2a38aa15be17d..c60569c74678d 100644 > > --- a/drivers/iommu/ipmmu-vmsa.c > > +++ b/drivers/iommu/ipmmu-vmsa.c > > @@ -1055,10 +1055,11 @@ static int ipmmu_probe(struct platform_device *pdev) > > ipmmu_device_reset(mmu); > > > > /* > > -* We can't create the ARM mapping here as it requires the bus to have > > -* an IOMMU, which only happens when bus_set_iommu() is called in > > -* ipmmu_init() after the probe function returns. > > +* Now that we have validated the presence of the hardware, set > > +* the bus IOMMU ops to enable future domain and device setup. > > */ > > + if (!iommu_present(&platform_bus_type)) > > + bus_set_iommu(&platform_bus_type, &ipmmu_ops); > > > > spin_lock(&ipmmu_devices_lock); > > list_add(&mmu->list, &ipmmu_devices); > > What branch is this patch based on ? ipmmu_devices_lock isn't in mainline. drm-next. :) Looks like things have move forward since I've made the patch before LPC, will update and resend. Best regards, Liviu > > > @@ -1100,27 +1101,7 @@ static struct platform_driver ipmmu_driver = { > > .remove = ipmmu_remove, > > }; > > > > -static int __init ipmmu_init(void) > > -{ > > - int ret; > > - > > - ret = platform_driver_register(&ipmmu_driver); > > - if (ret < 0) > > - return ret; > > - > > - if (!iommu_present(&platform_bus_type)) > > - bus_set_iommu(&platform_bus_type, &ipmmu_ops); > > - > > - return 0; > > -} > > - > > -static void __exit ipmmu_exit(void) > > -{ > > - return platform_driver_unregister(&ipmmu_driver); > > -} > > - > > -subsys_initcall(ipmmu_init); > > -module_exit(ipmmu_exit); > > +module_platform_driver(ipmmu_driver); > > > > MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); > > MODULE_AUTHOR("Laurent Pinchart "); > > -- > Regards, > > Laurent Pinchart > -- | I would like to | | fix the world, | | but they're not | | giving me the | \ source code! / --- ¯\_(ツ)_/¯ ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu: dmar: fix harmless section mismatch warning
On Tue, Sep 12, 2017 at 10:10:21PM +0200, Arnd Bergmann wrote: > Building with gcc-4.6 results in this warning due to > dmar_table_print_dmar_entry being inlined as in newer compiler versions: > > WARNING: vmlinux.o(.text+0x5c8bee): Section mismatch in reference from the > function dmar_walk_remapping_entries() to the function > .init.text:dmar_table_print_dmar_entry() > The function dmar_walk_remapping_entries() references > the function __init dmar_table_print_dmar_entry(). > This is often because dmar_walk_remapping_entries lacks a __init > annotation or the annotation of dmar_table_print_dmar_entry is wrong. > > This removes the __init annotation to avoid the warning. On compilers > that don't show the warning today, this should have no impact since the > function gets inlined anyway. > > Signed-off-by: Arnd Bergmann > --- > drivers/iommu/dmar.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) Applied, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 3/4] iommu/iova: Extend rbtree node caching
On 2017/7/31 19:42, Robin Murphy wrote: > Hi Nate, > > On 29/07/17 04:57, Nate Watterson wrote: >> Hi Robin, >> I am seeing a crash when performing very basic testing on this series >> with a Mellanox CX4 NIC. I dug into the crash a bit, and think this >> patch is the culprit, but this rcache business is still mostly >> witchcraft to me. >> >> # ifconfig eth5 up >> # ifconfig eth5 down >> Unable to handle kernel NULL pointer dereference at virtual address >> 0020 >> user pgtable: 64k pages, 48-bit VAs, pgd = 8007dbf47c00 >> [0020] *pgd=0006efab0003, *pud=0006efab0003, >> *pmd=0007d8720003, *pte= >> Internal error: Oops: 9607 [#1] SMP >> Modules linked in: >> CPU: 47 PID: 5082 Comm: ifconfig Not tainted 4.13.0-rtp-enablement+ #3 >> task: 8007da1e5780 task.stack: 8007ddcb8000 >> PC is at __cached_rbnode_delete_update+0x2c/0x58 >> LR is at private_free_iova+0x2c/0x60 >> pc : [] lr : [] pstate: 204001c5 >> sp : 8007ddcbba00 >> x29: 8007ddcbba00 x28: 8007c8350210 >> x27: 8007d1a8 x26: 8007dcc20800 >> x25: 0140 x24: 8007c98f0008 >> x23: fe4e x22: 0140 >> x21: 8007c98f0008 x20: 8007c9adb240 >> x19: 8007c98f0018 x18: 0010 >> x17: x16: >> x15: 4000 x14: >> x13: x12: 0001 >> x11: dead0200 x10: >> x9 : x8 : 8007c9adb1c0 >> x7 : 40002000 x6 : 00210d00 >> x5 : x4 : c57e >> x3 : ffcf x2 : ffcf >> x1 : 8007c9adb240 x0 : >> [...] >> [] __cached_rbnode_delete_update+0x2c/0x58 >> [] private_free_iova+0x2c/0x60 >> [] iova_magazine_free_pfns+0x4c/0xa0 >> [] free_iova_fast+0x1b0/0x230 >> [] iommu_dma_free_iova+0x5c/0x80 >> [] __iommu_dma_unmap+0x5c/0x98 >> [] iommu_dma_unmap_resource+0x24/0x30 >> [] iommu_dma_unmap_page+0xc/0x18 >> [] __iommu_unmap_page+0x40/0x60 >> [] mlx5e_page_release+0xbc/0x128 >> [] mlx5e_dealloc_rx_wqe+0x30/0x40 >> [] mlx5e_close_channel+0x70/0x1f8 >> [] mlx5e_close_channels+0x2c/0x50 >> [] mlx5e_close_locked+0x54/0x68 >> [] mlx5e_close+0x30/0x58 >> [...] >> >> ** Disassembly for __cached_rbnode_delete_update() near the fault ** >> 92|if (free->pfn_hi < iovad->dma_32bit_pfn) >> 0852C6C4|ldr x3,[x1,#0x18]; x3,[free,#24] >> 0852C6C8|ldr x2,[x0,#0x30]; x2,[iovad,#48] >> 0852C6CC|cmp x3,x2 >> 0852C6D0|b.cs0x0852C708 >> |curr = &iovad->cached32_node; >> 94|if (!curr) >> 0852C6D4|addsx19,x0,#0x18 ; x19,iovad,#24 >> 0852C6D8|b.eq0x0852C708 >> | >> |cached_iova = rb_entry(*curr, struct iova, node); >> | >> 99|if (free->pfn_lo >= cached_iova->pfn_lo) >> 0852C6DC|ldr x0,[x19] ; xiovad,[curr] >> 0852C6E0|ldr x2,[x1,#0x20]; x2,[free,#32] >> 0852C6E4|ldr x0,[x0,#0x20]; x0,[x0,#32] >> Apparently cached_iova was NULL so the pfn_lo access faulted. >> >> 0852C6E8|cmp x2,x0 >> 0852C6EC|b.cc0x0852C6FC >> 0852C6F0|mov x0,x1; x0,free >> 100|*curr = rb_next(&free->node); >> After instrumenting the code a bit, this seems to be the culprit. In the >> previous call, free->pfn_lo was 0x_ which is actually the >> dma_limit for the domain so rb_next() returns NULL. >> >> Let me know if you have any questions or would like additional tests >> run. I also applied your "DMA domain debug info" patches and dumped the >> contents of the domain at each of the steps above in case that would be >> useful. If nothing else, they reinforce how thirsty the CX4 NIC is >> especially when using 64k pages and many CPUs. > > Thanks for the report - I somehow managed to reason myself out of > keeping the "no cached node" check in __cached_rbnode_delete_update() on > the assumption that it must always be set by a previous allocation. > However, there is indeed just one case case for which that fails: when > you free any IOVA immediately after freeing the very topmost one. Which > is something that freeing an entire magazine's worth of IOVAs back to > the tree all at once has a very real chance of doing... > > The obvious straightforward fix is inline below, but I'm now starting to > understand the appeal of reserving a sentinel node to ensure the tree > can never be empty, so I might have a quick go at that to see if it > r
Re: [PATCH] iommu: Add missing dependencies
On Sun, Sep 10, 2017 at 01:43:37PM -0700, Guenter Roeck wrote: > parisc:allmodconfig, xtensa:allmodconfig, and possibly others generate > the following Kconfig warning. > > warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects > IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT && > HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64)) > > IOMMU_IO_PGTABLE_LPAE depends on (COMPILE_TEST && !GENERIC_ATOMIC64), > so any configuration option selecting it needs to have the same dependencies. > > Signed-off-by: Guenter Roeck > --- > drivers/iommu/Kconfig | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) Applied, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/amd: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->flush_queue
Hi Sebastian, On Wed, Sep 06, 2017 at 12:34:59PM +0200, Sebastian Andrzej Siewior wrote: > get_cpu_ptr() disables preemption and returns the ->flush_queue object > of the current CPU. raw_cpu_ptr() does the same except that it not > disable preemption which means the scheduler can move it to another CPU > after it obtained the per-CPU object. > In this case this is not bad because the data structure itself is > protected with a spin_lock. This change shouldn't matter in general > but on RT it does because the sleeping lock can't be accessed with > disabled preemption. I moved the flushing to driver/iommu/iova.c to share it with the Intel IOMMU and possibly other drivers too, so this patch does no longer apply to v4.14-rc1. Can you update the patch to these changes? Regards, Joerg ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 0/2] Dual MMU support for TI DRA7xx DSPs
On Tue, Sep 05, 2017 at 05:56:16PM -0500, Suman Anna wrote: > Suman Anna (2): > iommu/omap: Change the attach detection logic > iommu/omap: Add support to program multiple iommus > > drivers/iommu/omap-iommu.c | 375 > ++--- > drivers/iommu/omap-iommu.h | 30 ++-- > 2 files changed, 296 insertions(+), 109 deletions(-) Applied, thanks. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/iova: Make rcache flush optional on IOVA allocation failure
Hi Nate, On 19.09.2017 04:57, Nate Watterson wrote: Hi Tomasz, On 9/18/2017 12:02 PM, Robin Murphy wrote: Hi Tomasz, On 18/09/17 11:56, Tomasz Nowicki wrote: Since IOVA allocation failure is not unusual case we need to flush CPUs' rcache in hope we will succeed in next round. However, it is useful to decide whether we need rcache flush step because of two reasons: - Not scalability. On large system with ~100 CPUs iterating and flushing rcache for each CPU becomes serious bottleneck so we may want to deffer it. s/deffer/defer - free_cpu_cached_iovas() does not care about max PFN we are interested in. Thus we may flush our rcaches and still get no new IOVA like in the commonly used scenario: if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); if (!iova) iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get PCI devices a SAC address 2. alloc_iova() fails due to full 32-bit space 3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas() throws entries away for nothing and alloc_iova() fails again 4. Next alloc_iova_fast() call cannot take advantage of rcache since we have just defeated caches. In this case we pick the slowest option to proceed. This patch reworks flushed_rcache local flag to be additional function argument instead and control rcache flush step. Also, it updates all users to do the flush as the last chance. Looks like you've run into the same thing Nate found[1] - I came up with almost the exact same patch, only with separate alloc_iova_fast() and alloc_iova_fast_noretry() wrapper functions, but on reflection, just exposing the bool to callers is probably simpler. One nit, can you document it in the kerneldoc comment too? With that: Reviewed-by: Robin Murphy Thanks, Robin. [1]:https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg19758.html This patch completely resolves the issue I reported in [1]!! I somehow missed your observations in [1] :/ Anyway, it's great it fixes performance for you too. Tested-by: Nate Watterson Thanks! Tomasz ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/iova: Make rcache flush optional on IOVA allocation failure
Hi Robin, On 18.09.2017 18:02, Robin Murphy wrote: Hi Tomasz, On 18/09/17 11:56, Tomasz Nowicki wrote: Since IOVA allocation failure is not unusual case we need to flush CPUs' rcache in hope we will succeed in next round. However, it is useful to decide whether we need rcache flush step because of two reasons: - Not scalability. On large system with ~100 CPUs iterating and flushing rcache for each CPU becomes serious bottleneck so we may want to deffer it. - free_cpu_cached_iovas() does not care about max PFN we are interested in. Thus we may flush our rcaches and still get no new IOVA like in the commonly used scenario: if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); if (!iova) iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get PCI devices a SAC address 2. alloc_iova() fails due to full 32-bit space 3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas() throws entries away for nothing and alloc_iova() fails again 4. Next alloc_iova_fast() call cannot take advantage of rcache since we have just defeated caches. In this case we pick the slowest option to proceed. This patch reworks flushed_rcache local flag to be additional function argument instead and control rcache flush step. Also, it updates all users to do the flush as the last chance. Looks like you've run into the same thing Nate found[1] - I came up with almost the exact same patch, only with separate alloc_iova_fast() and alloc_iova_fast_noretry() wrapper functions, but on reflection, just exposing the bool to callers is probably simpler. One nit, can you document it in the kerneldoc comment too? With that: Reviewed-by: Robin Murphy Thanks! I will add missing comment. Tomasz ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: Seeing conflict with IPMMU driver under ACPI
Hi Ananth, On Tuesday, 19 September 2017 09:54:49 EEST Jasty, Ananth wrote: > On Sep 18, 2017, at 11:49 PM, Laurent Pinchart wrote: > > On Tuesday, 19 September 2017 03:43:05 EEST Jasty, Ananth wrote: > >> Hi, with your IPMMU driver enabled under 4.13 we’re seeing a crash on > >> boot: > >> > >> [ 13.785164] Unable to handle kernel NULL pointer dereference at virtual > >> address 0018 > >> [ 13.793254] [0018] user address but active_mm is swapper > >> [ 13.799600] Internal error: Oops: 9604 [#1] SMP > >> [ 13.804466] Modules linked in: aes_neon_bs aes_neon_blk crypto_simd > >> cryptd > >> [ 13.811334] CPU: 152 PID: 1529 Comm: kworker/152:1 Not tainted > >> 4.13.0-9-generic #10-Ubuntu > >> [ 13.819584] Hardware name: Default string Cavium ThunderX2/Default > >> string, BIOS 5.13 07/20/2017 > >> [ 13.828285] Workqueue: events deferred_probe_work_func > >> [ 13.833410] task: 80bee93d task.stack: 80bee93dc000 > >> [ 13.839330] PC is at iommu_ops_from_fwnode+0x4c/0x90 > >> [ 13.844282] LR is at iommu_ops_from_fwnode+0x28/0x90 > >> > >> The ARM SMMUv3 driver (which our platform implements) seems to be losing > >> iommu_ops to the IPMMU driver. > > > > You seem not to be the first one to notice: > > > > https://patchwork.kernel.org/patch/9956449/ > > Wow, he did not beat me by much. > > >> Note: our platform uses ACPI for device enumeration. > >> > >> I have no way to test this, but is there a reason the set_iommu isn’t in > >> _probe? > > > > I can't recall what the reason was I'm afraid. > > arm-smmu-v3.c does do the set_iommu in probe, and thus far there have been > no confirmed fatalities, but I can't speak authoritatively. > > I'll leave you all to it then. After digging a bit I realized that calling bus_set_iommu() from the init function is needed to support ARM32 (the same driver supports both ARM32 and ARM64). Let's discuss the matter in a reply to https://patchwork.kernel.org/ patch/9956449/ . -- Regards, Laurent Pinchart ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/ipmmu-vmsa: Do not replace bus IOMMU ops on driver init.
Hi Liviu, Thank you for the patch. On Monday, 18 September 2017 13:04:44 EEST Liviu Dudau wrote: > If the IPMMU driver is compiled in the kernel it will replace the > platform bus IOMMU ops on running the ipmmu_init() function, regardless > if there is any IPMMU hardware present or not. This screws up systems > that just want to build a generic kernel that runs on multiple platforms > and use a different IOMMU implementation. > > Move the bus_set_iommu() call at the end of the ipmmu_probe() function > when we know that hardware is present. With current IOMMU framework it > should be safe (at least for OF case). If I recall correctly the issue is that the IPMMU might be probed after bus master devices when using the legacy IOMMU DT integration, which the ipmmu- vmsa driver does on ARM32. Calling bus_set_iommu() from the probe function will then result in some devices losing IOMMU support. > Now that the ipmmu_init() and ipmmu_exit() functions are simple calls to > platform_driver_register() and platform_driver_unregister(), replace > them with the module_platform_driver() macro call. > > Signed-off-by: Liviu Dudau > Cc: Laurent Pinchart > --- > drivers/iommu/ipmmu-vmsa.c | 29 + > 1 file changed, 5 insertions(+), 24 deletions(-) > > diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c > index 2a38aa15be17d..c60569c74678d 100644 > --- a/drivers/iommu/ipmmu-vmsa.c > +++ b/drivers/iommu/ipmmu-vmsa.c > @@ -1055,10 +1055,11 @@ static int ipmmu_probe(struct platform_device *pdev) > ipmmu_device_reset(mmu); > > /* > - * We can't create the ARM mapping here as it requires the bus to have > - * an IOMMU, which only happens when bus_set_iommu() is called in > - * ipmmu_init() after the probe function returns. > + * Now that we have validated the presence of the hardware, set > + * the bus IOMMU ops to enable future domain and device setup. >*/ > + if (!iommu_present(&platform_bus_type)) > + bus_set_iommu(&platform_bus_type, &ipmmu_ops); > > spin_lock(&ipmmu_devices_lock); > list_add(&mmu->list, &ipmmu_devices); What branch is this patch based on ? ipmmu_devices_lock isn't in mainline. > @@ -1100,27 +1101,7 @@ static struct platform_driver ipmmu_driver = { > .remove = ipmmu_remove, > }; > > -static int __init ipmmu_init(void) > -{ > - int ret; > - > - ret = platform_driver_register(&ipmmu_driver); > - if (ret < 0) > - return ret; > - > - if (!iommu_present(&platform_bus_type)) > - bus_set_iommu(&platform_bus_type, &ipmmu_ops); > - > - return 0; > -} > - > -static void __exit ipmmu_exit(void) > -{ > - return platform_driver_unregister(&ipmmu_driver); > -} > - > -subsys_initcall(ipmmu_init); > -module_exit(ipmmu_exit); > +module_platform_driver(ipmmu_driver); > > MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); > MODULE_AUTHOR("Laurent Pinchart "); -- Regards, Laurent Pinchart ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu