Re: [PATCH v2 0/9] Add dynamic iommu backed bounce buffers

2022-06-05 Thread David Stevens
On Fri, Jun 3, 2022 at 11:53 PM Niklas Schnelle  wrote:
>
> On Fri, 2022-05-27 at 10:25 +0900, David Stevens wrote:
> > On Tue, May 24, 2022 at 9:27 PM Niklas Schnelle  
> > wrote:
> > > On Fri, 2021-08-06 at 19:34 +0900, David Stevens wrote:
> > > > From: David Stevens 
> > > >
> > > > This patch series adds support for per-domain dynamic pools of iommu
> > > > bounce buffers to the dma-iommu API. This allows iommu mappings to be
> > > > reused while still maintaining strict iommu protection.
> > > >
> > > > This bounce buffer support is used to add a new config option that, when
> > > > enabled, causes all non-direct streaming mappings below a configurable
> > > > size to go through the bounce buffers. This serves as an optimization on
> > > > systems where manipulating iommu mappings is very expensive. For
> > > > example, virtio-iommu operations in a guest on a linux host require a
> > > > vmexit, involvement the VMM, and a VFIO syscall. For relatively small
> > > > DMA operations, memcpy can be significantly faster.
> > > >
> > > > As a performance comparison, on a device with an i5-10210U, I ran fio
> > > > with a VFIO passthrough NVMe drive and virtio-iommu with '--direct=1
> > > > --rw=read --ioengine=libaio --iodepth=64' and block sizes 4k, 16k, 64k,
> > > > and 128k. Test throughput increased by 2.8x, 4.7x, 3.6x, and 3.6x. Time
> > > > spent in iommu_dma_unmap_(page|sg) per GB processed decreased by 97%,
> > > > 94%, 90%, and 87%. Time spent in iommu_dma_map_(page|sg) decreased
> > > > by >99%, as bounce buffers don't require syncing here in the read case.
> > > > Running with multiple jobs doesn't serve as a useful performance
> > > > comparison because virtio-iommu and vfio_iommu_type1 both have big
> > > > locks that significantly limit mulithreaded DMA performance.
> > > >
> > > > These pooled bounce buffers are also used for subgranule mappings with
> > > > untrusted devices, replacing the single use bounce buffers used
> > > > currently. The biggest difference here is that the new implementation
> > > > maps a whole sglist using a single bounce buffer. The new implementation
> > > > does not support using bounce buffers for only some segments of the
> > > > sglist, so it may require more copying. However, the current
> > > > implementation requires per-segment iommu map/unmap operations for all
> > > > untrusted sglist mappings (fully aligned sglists included). On a
> > > > i5-10210U laptop with the internal NVMe drive made to appear untrusted,
> > > > fio --direct=1 --rw=read --ioengine=libaio --iodepth=64 --bs=64k showed
> > > > a statistically significant decrease in CPU load from 2.28% -> 2.17%
> > > > with the new iommu bounce buffer optimization enabled.
> > > >
> > > > Each domain's buffer pool is split into multiple power-of-2 size
> > > > classes. Each class allocates a fixed number of buffer slot metadata. A
> > > > large iova range is allocated, and each slot is assigned an iova from
> > > > the range. This allows the iova to be easily mapped back to the slot,
> > > > and allows the critical section of most pool operations to be constant
> > > > time. The one exception is finding a cached buffer to reuse. These are
> > > > only separated according to R/W permissions - the use of other
> > > > permissions such as IOMMU_PRIV may require a linear search through the
> > > > cache. However, these other permissions are rare and likely exhibit high
> > > > locality, so the should not be a bottleneck in practice.
> > > >
> > > > Since untrusted devices may require bounce buffers, each domain has a
> > > > fallback rbtree to manage single use buffers. This may be necessary if a
> > > > very large number of DMA operations are simultaneously in-flight, or for
> > > > very large individual DMA operations.
> > > >
> > > > This patch set does not use swiotlb. There are two primary ways in which
> > > > swiotlb isn't compatible with per-domain buffer pools. First, swiotlb
> > > > allocates buffers to be compatible with a single device, whereas
> > > > per-domain buffer pools don't handle that during buffer allocation as a
> > > > single buffer may end up being used by multiple devices. Second, swiotlb
> > > > allocation establishes the original to 

Re: [PATCH v2 0/9] Add dynamic iommu backed bounce buffers

2022-05-26 Thread David Stevens
On Tue, May 24, 2022 at 9:27 PM Niklas Schnelle  wrote:
>
> On Fri, 2021-08-06 at 19:34 +0900, David Stevens wrote:
> > From: David Stevens 
> >
> > This patch series adds support for per-domain dynamic pools of iommu
> > bounce buffers to the dma-iommu API. This allows iommu mappings to be
> > reused while still maintaining strict iommu protection.
> >
> > This bounce buffer support is used to add a new config option that, when
> > enabled, causes all non-direct streaming mappings below a configurable
> > size to go through the bounce buffers. This serves as an optimization on
> > systems where manipulating iommu mappings is very expensive. For
> > example, virtio-iommu operations in a guest on a linux host require a
> > vmexit, involvement the VMM, and a VFIO syscall. For relatively small
> > DMA operations, memcpy can be significantly faster.
> >
> > As a performance comparison, on a device with an i5-10210U, I ran fio
> > with a VFIO passthrough NVMe drive and virtio-iommu with '--direct=1
> > --rw=read --ioengine=libaio --iodepth=64' and block sizes 4k, 16k, 64k,
> > and 128k. Test throughput increased by 2.8x, 4.7x, 3.6x, and 3.6x. Time
> > spent in iommu_dma_unmap_(page|sg) per GB processed decreased by 97%,
> > 94%, 90%, and 87%. Time spent in iommu_dma_map_(page|sg) decreased
> > by >99%, as bounce buffers don't require syncing here in the read case.
> > Running with multiple jobs doesn't serve as a useful performance
> > comparison because virtio-iommu and vfio_iommu_type1 both have big
> > locks that significantly limit mulithreaded DMA performance.
> >
> > These pooled bounce buffers are also used for subgranule mappings with
> > untrusted devices, replacing the single use bounce buffers used
> > currently. The biggest difference here is that the new implementation
> > maps a whole sglist using a single bounce buffer. The new implementation
> > does not support using bounce buffers for only some segments of the
> > sglist, so it may require more copying. However, the current
> > implementation requires per-segment iommu map/unmap operations for all
> > untrusted sglist mappings (fully aligned sglists included). On a
> > i5-10210U laptop with the internal NVMe drive made to appear untrusted,
> > fio --direct=1 --rw=read --ioengine=libaio --iodepth=64 --bs=64k showed
> > a statistically significant decrease in CPU load from 2.28% -> 2.17%
> > with the new iommu bounce buffer optimization enabled.
> >
> > Each domain's buffer pool is split into multiple power-of-2 size
> > classes. Each class allocates a fixed number of buffer slot metadata. A
> > large iova range is allocated, and each slot is assigned an iova from
> > the range. This allows the iova to be easily mapped back to the slot,
> > and allows the critical section of most pool operations to be constant
> > time. The one exception is finding a cached buffer to reuse. These are
> > only separated according to R/W permissions - the use of other
> > permissions such as IOMMU_PRIV may require a linear search through the
> > cache. However, these other permissions are rare and likely exhibit high
> > locality, so the should not be a bottleneck in practice.
> >
> > Since untrusted devices may require bounce buffers, each domain has a
> > fallback rbtree to manage single use buffers. This may be necessary if a
> > very large number of DMA operations are simultaneously in-flight, or for
> > very large individual DMA operations.
> >
> > This patch set does not use swiotlb. There are two primary ways in which
> > swiotlb isn't compatible with per-domain buffer pools. First, swiotlb
> > allocates buffers to be compatible with a single device, whereas
> > per-domain buffer pools don't handle that during buffer allocation as a
> > single buffer may end up being used by multiple devices. Second, swiotlb
> > allocation establishes the original to bounce buffer mapping, which
> > again doesn't work if buffers can be reused. Effectively the only code
> > that can be shared between the two use cases is allocating slots from
> > the swiotlb's memory. However, given that we're going to be allocating
> > memory for use with an iommu, allocating memory from a block of memory
> > explicitly set aside to deal with a lack of iommu seems kind of
> > contradictory. At best there might be a small performance improvement if
> > wiotlb allocation is faster than regular page allocation, but buffer
> > allocation isn't on the hot path anyway.
> >
> > Not using the swiotlb has the benefit that memory doesn't have to be
> > preallocated. 

[PATCH v3] iommu/vt-d: calculate mask for non-aligned flushes

2022-03-31 Thread David Stevens
From: David Stevens 

Calculate the appropriate mask for non-size-aligned page selective
invalidation. Since psi uses the mask value to mask out the lower order
bits of the target address, properly flushing the iotlb requires using a
mask value such that [pfn, pfn+pages) all lie within the flushed
size-aligned region.  This is not normally an issue because iova.c
always allocates iovas that are aligned to their size. However, iovas
which come from other sources (e.g. userspace via VFIO) may not be
aligned.

To properly flush the IOTLB, both the start and end pfns need to be
equal after applying the mask. That means that the most efficient mask
to use is the index of the lowest bit that is equal where all higher
bits are also equal. For example, if pfn=0x17f and pages=3, then
end_pfn=0x181, so the smallest mask we can use is 8. Any differences
above the highest bit of pages are due to carrying, so by xnor'ing pfn
and end_pfn and then masking out the lower order bits based on pages, we
get 0xff00, where the first set bit is the mask we want to use.

Fixes: 6fe1010d6d9c ("vfio/type1: DMA unmap chunking")
Cc: sta...@vger.kernel.org
Signed-off-by: David Stevens 
Reviewed-by: Kevin Tian 
---
The seeds of the bug were introduced by f76aec76ec7f6, which
simultaniously added the alignement requirement to the iommu driver and
made the iova allocator return aligned iovas. However, I don't think
there was any way to trigger the bug at that time. The tagged VFIO
change is one that actually introduced a code path that could trigger
the bug. There may also be other ways to trigger the bug that I am not
aware of.

v1 -> v2:
 - Calculate an appropriate mask for non-size-aligned iovas instead
   of falling back to domain selective flush.
v2 -> v3:
 - Add more detail to commit message.

 drivers/iommu/intel/iommu.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5b196cfe9ed2..ab2273300346 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1717,7 +1717,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu 
*iommu,
  unsigned long pfn, unsigned int pages,
  int ih, int map)
 {
-   unsigned int mask = ilog2(__roundup_pow_of_two(pages));
+   unsigned int aligned_pages = __roundup_pow_of_two(pages);
+   unsigned int mask = ilog2(aligned_pages);
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
u16 did = domain->iommu_did[iommu->seq_id];
 
@@ -1729,10 +1730,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu 
*iommu,
if (domain_use_first_level(domain)) {
domain_flush_piotlb(iommu, domain, addr, pages, ih);
} else {
+   unsigned long bitmask = aligned_pages - 1;
+
+   /*
+* PSI masks the low order bits of the base address. If the
+* address isn't aligned to the mask, then compute a mask value
+* needed to ensure the target range is flushed.
+*/
+   if (unlikely(bitmask & pfn)) {
+   unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+   /*
+* Since end_pfn <= pfn + bitmask, the only way bits
+* higher than bitmask can differ in pfn and end_pfn is
+* by carrying. This means after masking out bitmask,
+* high bits starting with the first set bit in
+* shared_bits are all equal in both pfn and end_pfn.
+*/
+   shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+   mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+   }
+
/*
 * Fallback to domain selective flush if no PSI support or
-* the size is too big. PSI requires page size to be 2 ^ x,
-* and the base address is naturally aligned to the size.
+* the size is too big.
 */
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap))
-- 
2.35.1.1094.g7c7d902a7c-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/vt-d: calculate mask for non-aligned flushes

2022-03-25 Thread David Stevens
On Fri, Mar 25, 2022 at 4:15 PM Zhang, Tina  wrote:
>
>
>
> > -Original Message-
> > From: iommu  On Behalf Of
> > Tian, Kevin
> > Sent: Friday, March 25, 2022 2:14 PM
> > To: David Stevens ; Lu Baolu
> > 
> > Cc: iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org
> > Subject: RE: [PATCH v2] iommu/vt-d: calculate mask for non-aligned flushes
> >
> > > From: David Stevens
> > > Sent: Tuesday, March 22, 2022 2:36 PM
> > >
> > > From: David Stevens 
> > >
> > > Calculate the appropriate mask for non-size-aligned page selective
> > > invalidation. Since psi uses the mask value to mask out the lower
> > > order bits of the target address, properly flushing the iotlb requires
> > > using a mask value such that [pfn, pfn+pages) all lie within the
> > > flushed size-aligned region.  This is not normally an issue because
> > > iova.c always allocates iovas that are aligned to their size. However,
> > > iovas which come from other sources (e.g. userspace via VFIO) may not
> > > be aligned.
> > >
> > > Signed-off-by: David Stevens 
> > > ---
> > > v1 -> v2:
> > >  - Calculate an appropriate mask for non-size-aligned iovas instead
> > >of falling back to domain selective flush.
> > >
> > >  drivers/iommu/intel/iommu.c | 27 ---
> > >  1 file changed, 24 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> > > index 5b196cfe9ed2..ab2273300346 100644
> > > --- a/drivers/iommu/intel/iommu.c
> > > +++ b/drivers/iommu/intel/iommu.c
> > > @@ -1717,7 +1717,8 @@ static void iommu_flush_iotlb_psi(struct
> > > intel_iommu *iommu,
> > >   unsigned long pfn, unsigned int pages,
> > >   int ih, int map)
> > >  {
> > > -   unsigned int mask = ilog2(__roundup_pow_of_two(pages));
> > > +   unsigned int aligned_pages = __roundup_pow_of_two(pages);
> > > +   unsigned int mask = ilog2(aligned_pages);
> > > uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
> > > u16 did = domain->iommu_did[iommu->seq_id];
> > >
> > > @@ -1729,10 +1730,30 @@ static void iommu_flush_iotlb_psi(struct
> > > intel_iommu *iommu,
> > > if (domain_use_first_level(domain)) {
> > > domain_flush_piotlb(iommu, domain, addr, pages, ih);
> > > } else {
> > > +   unsigned long bitmask = aligned_pages - 1;
> > > +
> > > +   /*
> > > +* PSI masks the low order bits of the base address. If the
> > > +* address isn't aligned to the mask, then compute a mask
> > > value
> > > +* needed to ensure the target range is flushed.
> > > +*/
> > > +   if (unlikely(bitmask & pfn)) {
> > > +   unsigned long end_pfn = pfn + pages - 1, shared_bits;
> > > +
> > > +   /*
> > > +* Since end_pfn <= pfn + bitmask, the only way bits
> > > +* higher than bitmask can differ in pfn and end_pfn
> > > is
> > > +* by carrying. This means after masking out bitmask,
> > > +* high bits starting with the first set bit in
> > > +* shared_bits are all equal in both pfn and end_pfn.
> > > +*/
> > > +   shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
> > > +   mask = shared_bits ? __ffs(shared_bits) :
> > > BITS_PER_LONG;
> > > +   }
> >
> > While it works I wonder whether below is simpler regarding to readability:
> >
> >   } else {
> > + /*
> > +  * PSI masks the low order bits of the base address. If the
> > +  * address isn't aligned to the mask and [pfn, pfn+pages)
> > +  * don't all lie within the flushed size-aligned region,
> > +  * simply increment the mask by one to cover the trailing
> > pages.
> > +  */
> > + if (unlikely((pfn & (aligned_pages - 1)) &&
> > +  (pfn + pages - 1 >= ALIGN(pfn, aligned_pages
> > + mask++;
>
> According to the vt-d spec, increasing mask means more bits of the pfn would 
> be masked out. So simply increasing the mask number might not be correct.
> This second version does give more consideration on that.
>

Right, this is what the more complicated code handles. For a concrete
example, if pfn=0x17f and pages=2, just doing mask+1 would only flush
[0x17c, 0x17f], which still misses 0x180. To ensure 0x180 is flushed,
mask needs to be 8.

-David


-David
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2] iommu/vt-d: calculate mask for non-aligned flushes

2022-03-22 Thread David Stevens
From: David Stevens 

Calculate the appropriate mask for non-size-aligned page selective
invalidation. Since psi uses the mask value to mask out the lower order
bits of the target address, properly flushing the iotlb requires using a
mask value such that [pfn, pfn+pages) all lie within the flushed
size-aligned region.  This is not normally an issue because iova.c
always allocates iovas that are aligned to their size. However, iovas
which come from other sources (e.g. userspace via VFIO) may not be
aligned.

Signed-off-by: David Stevens 
---
v1 -> v2:
 - Calculate an appropriate mask for non-size-aligned iovas instead
   of falling back to domain selective flush.

 drivers/iommu/intel/iommu.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5b196cfe9ed2..ab2273300346 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1717,7 +1717,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu 
*iommu,
  unsigned long pfn, unsigned int pages,
  int ih, int map)
 {
-   unsigned int mask = ilog2(__roundup_pow_of_two(pages));
+   unsigned int aligned_pages = __roundup_pow_of_two(pages);
+   unsigned int mask = ilog2(aligned_pages);
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
u16 did = domain->iommu_did[iommu->seq_id];
 
@@ -1729,10 +1730,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu 
*iommu,
if (domain_use_first_level(domain)) {
domain_flush_piotlb(iommu, domain, addr, pages, ih);
} else {
+   unsigned long bitmask = aligned_pages - 1;
+
+   /*
+* PSI masks the low order bits of the base address. If the
+* address isn't aligned to the mask, then compute a mask value
+* needed to ensure the target range is flushed.
+*/
+   if (unlikely(bitmask & pfn)) {
+   unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+   /*
+* Since end_pfn <= pfn + bitmask, the only way bits
+* higher than bitmask can differ in pfn and end_pfn is
+* by carrying. This means after masking out bitmask,
+* high bits starting with the first set bit in
+* shared_bits are all equal in both pfn and end_pfn.
+*/
+   shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+   mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+   }
+
/*
 * Fallback to domain selective flush if no PSI support or
-* the size is too big. PSI requires page size to be 2 ^ x,
-* and the base address is naturally aligned to the size.
+* the size is too big.
 */
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap))
-- 
2.35.1.894.gb6a874cedc-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/vt-d: check alignment before using psi

2022-03-15 Thread David Stevens
From: David Stevens 

Fall back to domain selective flush if the target address is not aligned
to the mask being used for invalidation. This is necessary because page
selective invalidation masks out the lower order bits of the target
address based on the mask value, so if a non-aligned address is targeted
for psi, then mappings at the end of [pfn, pfn+pages) may not properly
be flushed from the iotlb.

This is not normally an issue because iova.c always allocates iovas that
are aligned to their size. However, iovas which come from other sources
(e.g. userspace via VFIO) may not be aligned.

Signed-off-by: David Stevens 
---
 drivers/iommu/intel/iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5b196cfe9ed2..c122686e0a5c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1735,7 +1735,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu 
*iommu,
 * and the base address is naturally aligned to the size.
 */
if (!cap_pgsel_inv(iommu->cap) ||
-   mask > cap_max_amask_val(iommu->cap))
+   mask > cap_max_amask_val(iommu->cap) ||
+   unlikely(((1 << mask) - 1) & pfn))
iommu->flush.flush_iotlb(iommu, did, 0, 0,
DMA_TLB_DSI_FLUSH);
else
-- 
2.35.1.723.g4982287a31-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 7/7] dma-iommu: account for min_align_mask w/swiotlb

2021-09-28 Thread David Stevens
From: David Stevens 

Pass the non-aligned size to __iommu_dma_map when using swiotlb bounce
buffers in iommu_dma_map_page, to account for min_align_mask.

To deal with granule alignment, __iommu_dma_map maps iova_align(size +
iova_off) bytes starting at phys - iova_off. If iommu_dma_map_page
passes aligned size when using swiotlb, then this becomes
iova_align(iova_align(orig_size) + iova_off). Normally iova_off will be
zero when using swiotlb. However, this is not the case for devices that
set min_align_mask. When iova_off is non-zero, __iommu_dma_map ends up
mapping an extra page at the end of the buffer. Beyond just being a
security issue, the extra page is not cleaned up by __iommu_dma_unmap.
This causes problems when the IOVA is reused, due to collisions in the
iommu driver.  Just passing the original size is sufficient, since
__iommu_dma_map will take care of granule alignment.

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 289c49ead01a..342359727a59 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -806,7 +806,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = >iovad;
-   size_t aligned_size = size;
dma_addr_t iova, dma_mask = dma_get_mask(dev);
 
/*
@@ -815,7 +814,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 */
if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
void *padding_start;
-   size_t padding_size;
+   size_t padding_size, aligned_size;
 
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
@@ -840,7 +839,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, size, dir);
 
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
-- 
2.33.0.685.g46640cef36-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 6/7] swiotlb: support aligned swiotlb buffers

2021-09-28 Thread David Stevens
From: David Stevens 

Add an argument to swiotlb_tbl_map_single that specifies the desired
alignment of the allocated buffer. This is used by dma-iommu to ensure
the buffer is aligned to the iova granule size when using swiotlb with
untrusted sub-granule mappings. This addresses an issue where adjacent
slots could be exposed to the untrusted device if IO_TLB_SIZE < iova
granule < PAGE_SIZE.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c |  4 ++--
 drivers/xen/swiotlb-xen.c |  2 +-
 include/linux/swiotlb.h   |  3 ++-
 kernel/dma/swiotlb.c  | 13 -
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 85a005b268f6..289c49ead01a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -818,8 +818,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
size_t padding_size;
 
aligned_size = iova_align(iovad, size);
-   phys = swiotlb_tbl_map_single(dev, phys, size,
- aligned_size, dir, attrs);
+   phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
+ iova_mask(iovad), dir, attrs);
 
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e56a5faac395..cbdff8979980 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -380,7 +380,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
 */
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
-   map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
+   map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b0cb2a9973f4..569272871375 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -45,7 +45,8 @@ extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs);
+   unsigned int alloc_aligned_mask, enum dma_data_direction dir,
+   unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 phys_addr_t tlb_addr,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 87c40517e822..019672b3da1d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -459,7 +459,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
unsigned int index)
  * allocate a buffer from that IO TLB pool.
  */
 static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
- size_t alloc_size)
+ size_t alloc_size, unsigned int alloc_align_mask)
 {
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
@@ -483,6 +483,7 @@ static int swiotlb_find_slots(struct device *dev, 
phys_addr_t orig_addr,
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+   stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
spin_lock_irqsave(>lock, flags);
if (unlikely(nslots > mem->nslabs - mem->used))
@@ -541,7 +542,8 @@ static int swiotlb_find_slots(struct device *dev, 
phys_addr_t orig_addr,
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs)
+   unsigned int alloc_align_mask, enum dma_data_direction dir,
+   unsigned long attrs)
 {
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
@@ -561,7 +563,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
 
-   index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset);
+   index = swiotlb_find_slots(dev, orig_addr,
+  alloc_size + offset, alloc_align_mask);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
@@ -675,7 +678,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t 
paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), 

[PATCH v8 5/7] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-09-28 Thread David Stevens
From: David Stevens 

Introduce a new dev_use_swiotlb function to guard swiotlb code, instead
of overloading dev_is_untrusted. This allows CONFIG_SWIOTLB to be
checked more broadly, so the swiotlb related code can be removed more
aggressively.

Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4f77c44eaf14..85a005b268f6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -317,6 +317,11 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+static bool dev_use_swiotlb(struct device *dev)
+{
+   return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+}
+
 /* sysfs updates are serialised by the mutex of the group owning @domain */
 int iommu_dma_init_fq(struct iommu_domain *domain)
 {
@@ -731,7 +736,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -747,7 +752,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -765,7 +770,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -781,7 +786,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
 sg_dma_address(sg),
@@ -808,8 +813,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | size)) {
+   if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size;
 
@@ -994,7 +998,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
goto out;
}
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1072,7 +1076,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.33.0.685.g46640cef36-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 4/7] dma-iommu: fold _swiotlb helpers into callers

2021-09-28 Thread David Stevens
From: David Stevens 

Fold the _swiotlb helper functions into the respective _page functions,
since recent fixes have moved all logic from the _page functions to the
_swiotlb functions.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 135 +-
 1 file changed, 59 insertions(+), 76 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 027b489714b7..4f77c44eaf14 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -510,26 +510,6 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, _gather);
 }
 
-static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   phys_addr_t phys;
-
-   phys = iommu_iova_to_phys(domain, dma_addr);
-   if (WARN_ON(!phys))
-   return;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
-   arch_sync_dma_for_cpu(phys, size, dir);
-
-   __iommu_dma_unmap(dev, dma_addr, size);
-
-   if (unlikely(is_swiotlb_buffer(dev, phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -556,55 +536,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
-static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
-   size_t org_size, dma_addr_t dma_mask, bool coherent,
-   enum dma_data_direction dir, unsigned long attrs)
-{
-   int prot = dma_info_to_prot(dir, coherent, attrs);
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   struct iommu_dma_cookie *cookie = domain->iova_cookie;
-   struct iova_domain *iovad = >iovad;
-   size_t aligned_size = org_size;
-   void *padding_start;
-   size_t padding_size;
-   dma_addr_t iova;
-
-   /*
-* If both the physical buffer start address and size are
-* page aligned, we don't need to use a bounce page.
-*/
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
-   aligned_size = iova_align(iovad, org_size);
-   phys = swiotlb_tbl_map_single(dev, phys, org_size,
- aligned_size, dir, attrs);
-
-   if (phys == DMA_MAPPING_ERROR)
-   return DMA_MAPPING_ERROR;
-
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_TO_DEVICE ||
-dir == DMA_BIDIRECTIONAL)) {
-   padding_start += org_size;
-   padding_size -= org_size;
-   }
-
-   memset(padding_start, 0, padding_size);
-   }
-
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   arch_sync_dma_for_device(phys, org_size, dir);
-
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
-   swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
-   return iova;
-}
-
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -866,15 +797,68 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = size;
+   dma_addr_t iova, dma_mask = dma_get_mask(dev);
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | size)) {
+   void *padding_start;
+   size_t padding_size;
+
+   aligned_size = iova_align(iovad, size);
+   phys = swiotlb_tbl_map_single(dev, phys, size,
+ aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+   

[PATCH v8 3/7] dma-iommu: skip extra sync during unmap w/swiotlb

2021-09-28 Thread David Stevens
From: David Stevens 

Calling the iommu_dma_sync_*_for_cpu functions during unmap can cause
two copies out of the swiotlb buffer. Do the arch sync directly in
__iommu_dma_unmap_swiotlb instead to avoid this. This makes the call to
iommu_dma_sync_sg_for_cpu for untrusted devices in iommu_dma_unmap_sg no
longer necessary, so move that invocation later in the function.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 19bebacbf178..027b489714b7 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -521,6 +521,9 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
if (WARN_ON(!phys))
return;
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
__iommu_dma_unmap(dev, dma_addr, size);
 
if (unlikely(is_swiotlb_buffer(dev, phys)))
@@ -871,8 +874,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
__iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
 }
 
@@ -1088,14 +1089,14 @@ static void iommu_dma_unmap_sg(struct device *dev, 
struct scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+
/*
 * The scatterlist segments are mapped into a single
 * contiguous IOVA allocation, so this is incredibly easy.
-- 
2.33.0.685.g46640cef36-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 2/7] dma-iommu: fix arch_sync_dma for map

2021-09-28 Thread David Stevens
From: David Stevens 

When calling arch_sync_dma, we need to pass it the memory that's
actually being used for dma. When using swiotlb bounce buffers, this is
the bounce buffer. Move arch_sync_dma into the __iommu_dma_map_swiotlb
helper, so it can use the bounce buffer address if necessary.

Now that iommu_dma_map_sg delegates to a function which takes care of
architectural syncing in the untrusted device case, the call to
iommu_dma_sync_sg_for_device can be moved so it only occurs for trusted
devices. Doing the sync for untrusted devices before mapping never
really worked, since it needs to be able to target swiotlb buffers.

This also moves the architectural sync to before the call to
__iommu_dma_map, to guarantee that untrusted devices can't see stale
data they shouldn't see.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index c4d205b63c58..19bebacbf178 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -593,6 +593,9 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
memset(padding_start, 0, padding_size);
}
 
+   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   arch_sync_dma_for_device(phys, org_size, dir);
+
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
@@ -860,14 +863,9 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
-   dma_addr_t dma_handle;
 
-   dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
+   return __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
coherent, dir, attrs);
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   dma_handle != DMA_MAPPING_ERROR)
-   arch_sync_dma_for_device(phys, size, dir);
-   return dma_handle;
 }
 
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
@@ -1012,12 +1010,12 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
goto out;
}
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
/*
 * Work out how much IOVA space we need, and align the segments to
 * IOVA granules for the IOMMU driver to handle. With some clever
-- 
2.33.0.685.g46640cef36-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 1/7] dma-iommu: fix sync_sg with swiotlb

2021-09-28 Thread David Stevens
From: David Stevens 

The is_swiotlb_buffer function takes the physical address of the swiotlb
buffer, not the physical address of the original buffer. The sglist
contains the physical addresses of the original buffer, so for the
sync_sg functions to work properly when a bounce buffer might have been
used, we need to use iommu_iova_to_phys to look up the physical address.
This is what sync_single does, so call that function on each sglist
segment.

The previous code mostly worked because swiotlb does the transfer on map
and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
sglists or which call sync_sg would not have had anything copied to the
bounce buffer.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 896bea04c347..c4d205b63c58 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -828,17 +828,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-   if (is_swiotlb_buffer(dev, sg_phys(sg)))
-   swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-   sg->length, dir);
-   }
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -848,17 +844,14 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(dev, sg_phys(sg)))
-   swiotlb_sync_single_for_device(dev, sg_phys(sg),
-  sg->length, dir);
-
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_device(dev,
+sg_dma_address(sg),
+sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-   }
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-- 
2.33.0.685.g46640cef36-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 0/7] Fixes for dma-iommu swiotlb bounce buffers

2021-09-28 Thread David Stevens
From: David Stevens 

This patch set includes various fixes for dma-iommu's swiotlb bounce
buffers for untrusted devices.

The min_align_mask issue was found when running fio on an untrusted nvme
device with bs=512. The other issues were found via code inspection, so
I don't have any specific use cases where things were not working, nor
any concrete performance numbers.

There are two issues related to min_align_mask that this patch series
does not attempt to fix. First, it does not address the case where
min_align_mask is larger than the IOVA granule. Doing so requires
changes to IOVA allocation, and is not specific to when swiotlb bounce
buffers are used. This is not a problem in practice today, since the
only driver which uses min_align_mask is nvme, which sets it to 4096.

The second issue this series does not address is the fact that extra
swiotlb slots adjacent to a bounce buffer can be exposed to untrusted
devices whose drivers use min_align_mask. Fixing this requires being
able to allocate padding slots at the beginning of a swiotlb allocation.
This is a rather significant change that I am not comfortable making.
Without being able to handle this, there is also little point to
clearing the padding at the start of such a buffer, since we can only
clear based on (IO_TLB_SIZE - 1) instead of iova_mask.

v7 -> v8:
 - Rebase on v5.15-rc3 and resolve conflicts with restricted dma

v6 -> v7:
 - Remove unsafe attempt to clear padding at start of swiotlb buffer
 - Rewrite commit message for min_align_mask commit to better explain
   the problem it's fixing
 - Rebase on iommu/core
 - Acknowledge unsolved issues in cover letter

v5 -> v6:
 - Remove unnecessary line break
 - Remove redundant config check

v4 -> v5:
 - Fix xen build error
 - Move _swiotlb refactor into its own patch

v3 -> v4:
 - Fold _swiotlb functions into _page functions
 - Add patch to align swiotlb buffer to iovad granule
 - Combine if checks in iommu_dma_sync_sg_* functions

v2 -> v3:
 - Add new patch to address min_align_mask bug
 - Set SKIP_CPU_SYNC flag after syncing in map/unmap
 - Properly call arch_sync_dma_for_cpu in iommu_dma_sync_sg_for_cpu

v1 -> v2:
 - Split fixes into dedicated patches
 - Less invasive changes to fix arch_sync when mapping
 - Leave dev_is_untrusted check for strict iommu

David Stevens (7):
  dma-iommu: fix sync_sg with swiotlb
  dma-iommu: fix arch_sync_dma for map
  dma-iommu: skip extra sync during unmap w/swiotlb
  dma-iommu: fold _swiotlb helpers into callers
  dma-iommu: Check CONFIG_SWIOTLB more broadly
  swiotlb: support aligned swiotlb buffers
  dma-iommu: account for min_align_mask w/swiotlb

 drivers/iommu/dma-iommu.c | 188 +-
 drivers/xen/swiotlb-xen.c |   2 +-
 include/linux/swiotlb.h   |   3 +-
 kernel/dma/swiotlb.c  |  13 ++-
 4 files changed, 94 insertions(+), 112 deletions(-)

-- 
2.33.0.685.g46640cef36-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 0/7] Fixes for dma-iommu swiotlb bounce buffers

2021-09-13 Thread David Stevens
Is there further feedback on these patches? Only patch 7 is still
pending review.

-David

On Mon, Aug 30, 2021 at 2:00 PM David Stevens  wrote:
>
> This patch set includes various fixes for dma-iommu's swiotlb bounce
> buffers for untrusted devices.
>
> The min_align_mask issue was found when running fio on an untrusted nvme
> device with bs=512. The other issues were found via code inspection, so
> I don't have any specific use cases where things were not working, nor
> any concrete performance numbers.
>
> There are two issues related to min_align_mask that this patch series
> does not attempt to fix. First, it does not address the case where
> min_align_mask is larger than the IOVA granule. Doing so requires
> changes to IOVA allocation, and is not specific to when swiotlb bounce
> buffers are used. This is not a problem in practice today, since the
> only driver which uses min_align_mask is nvme, which sets it to 4096.
>
> The second issue this series does not address is the fact that extra
> swiotlb slots adjacent to a bounce buffer can be exposed to untrusted
> devices whose drivers use min_align_mask. Fixing this requires being
> able to allocate padding slots at the beginning of a swiotlb allocation.
> This is a rather significant change that I am not comfortable making.
> Without being able to handle this, there is also little point to
> clearing the padding at the start of such a buffer, since we can only
> clear based on (IO_TLB_SIZE - 1) instead of iova_mask.
>
> v6 -> v7:
>  - Remove unsafe attempt to clear padding at start of swiotlb buffer
>  - Rewrite commit message for min_align_mask commit to better explain
>the problem it's fixing
>  - Rebase on iommu/core
>  - Acknowledge unsolved issues in cover letter
>
> v5 -> v6:
>  - Remove unnecessary line break
>  - Remove redundant config check
>
> v4 -> v5:
>  - Fix xen build error
>  - Move _swiotlb refactor into its own patch
>
> v3 -> v4:
>  - Fold _swiotlb functions into _page functions
>  - Add patch to align swiotlb buffer to iovad granule
>  - Combine if checks in iommu_dma_sync_sg_* functions
>
> v2 -> v3:
>  - Add new patch to address min_align_mask bug
>  - Set SKIP_CPU_SYNC flag after syncing in map/unmap
>  - Properly call arch_sync_dma_for_cpu in iommu_dma_sync_sg_for_cpu
>
> v1 -> v2:
>  - Split fixes into dedicated patches
>  - Less invasive changes to fix arch_sync when mapping
>  - Leave dev_is_untrusted check for strict iommu
>
> David Stevens (7):
>   dma-iommu: fix sync_sg with swiotlb
>   dma-iommu: fix arch_sync_dma for map
>   dma-iommu: skip extra sync during unmap w/swiotlb
>   dma-iommu: fold _swiotlb helpers into callers
>   dma-iommu: Check CONFIG_SWIOTLB more broadly
>   swiotlb: support aligned swiotlb buffers
>   dma-iommu: account for min_align_mask w/swiotlb
>
>  drivers/iommu/dma-iommu.c | 188 +-
>  drivers/xen/swiotlb-xen.c |   2 +-
>  include/linux/swiotlb.h   |   3 +-
>  kernel/dma/swiotlb.c  |  11 ++-
>  4 files changed, 93 insertions(+), 111 deletions(-)
>
> --
> 2.33.0.259.gc128427fd7-goog
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 7/7] dma-iommu: account for min_align_mask w/swiotlb

2021-08-29 Thread David Stevens
From: David Stevens 

Pass the non-aligned size to __iommu_dma_map when using swiotlb bounce
buffers in iommu_dma_map_page, to account for min_align_mask.

To deal with granule alignment, __iommu_dma_map maps iova_align(size +
iova_off) bytes starting at phys - iova_off. If iommu_dma_map_page
passes aligned size when using swiotlb, then this becomes
iova_align(iova_align(orig_size) + iova_off). Normally iova_off will be
zero when using swiotlb. However, this is not the case for devices that
set min_align_mask. When iova_off is non-zero, __iommu_dma_map ends up
mapping an extra page at the end of the buffer. Beyond just being a
security issue, the extra page is not cleaned up by __iommu_dma_unmap.
This causes problems when the IOVA is reused, due to collisions in the
iommu driver.  Just passing the original size is sufficient, since
__iommu_dma_map will take care of granule alignment.

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9b8c17c3d29b..addcaa09db12 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -805,7 +805,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = >iovad;
-   size_t aligned_size = size;
dma_addr_t iova, dma_mask = dma_get_mask(dev);
 
/*
@@ -814,7 +813,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 */
if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
void *padding_start;
-   size_t padding_size;
+   size_t padding_size, aligned_size;
 
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
@@ -839,7 +838,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, size, dir);
 
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
-- 
2.33.0.259.gc128427fd7-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 6/7] swiotlb: support aligned swiotlb buffers

2021-08-29 Thread David Stevens
From: David Stevens 

Add an argument to swiotlb_tbl_map_single that specifies the desired
alignment of the allocated buffer. This is used by dma-iommu to ensure
the buffer is aligned to the iova granule size when using swiotlb with
untrusted sub-granule mappings. This addresses an issue where adjacent
slots could be exposed to the untrusted device if IO_TLB_SIZE < iova
granule < PAGE_SIZE.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c |  4 ++--
 drivers/xen/swiotlb-xen.c |  2 +-
 include/linux/swiotlb.h   |  3 ++-
 kernel/dma/swiotlb.c  | 11 +++
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 714bec7a53c2..9b8c17c3d29b 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -817,8 +817,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
size_t padding_size;
 
aligned_size = iova_align(iovad, size);
-   phys = swiotlb_tbl_map_single(dev, phys, size,
- aligned_size, dir, attrs);
+   phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
+ iova_mask(iovad), dir, attrs);
 
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 24d11861ac7d..8b03d2c93428 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -382,7 +382,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
 */
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
-   map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
+   map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..93d82e43eb3a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -44,7 +44,8 @@ extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs);
+   unsigned int alloc_aligned_mask, enum dma_data_direction dir,
+   unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 phys_addr_t tlb_addr,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e50df8d8f87e..d4c45d8cd1fa 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -427,7 +427,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
unsigned int index)
  * allocate a buffer from that IO TLB pool.
  */
 static int find_slots(struct device *dev, phys_addr_t orig_addr,
-   size_t alloc_size)
+   size_t alloc_size, unsigned int alloc_align_mask)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
@@ -450,6 +450,7 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+   stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
spin_lock_irqsave(>lock, flags);
if (unlikely(nslots > mem->nslabs - mem->used))
@@ -504,7 +505,8 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs)
+   unsigned int alloc_align_mask, enum dma_data_direction dir,
+   unsigned long attrs)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
@@ -524,7 +526,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
 
-   index = find_slots(dev, orig_addr, alloc_size + offset);
+   index = find_slots(dev, orig_addr,
+  alloc_size + offset, alloc_align_mask);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
@@ -636,7 +639,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t 
paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
  swiotlb_force);
 
-   swiotlb_addr = swiotlb_tbl_map_single(dev, p

[PATCH v7 5/7] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-08-29 Thread David Stevens
From: David Stevens 

Introduce a new dev_use_swiotlb function to guard swiotlb code, instead
of overloading dev_is_untrusted. This allows CONFIG_SWIOTLB to be
checked more broadly, so the swiotlb related code can be removed more
aggressively.

Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 95bfa57be488..714bec7a53c2 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -317,6 +317,11 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+static bool dev_use_swiotlb(struct device *dev)
+{
+   return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+}
+
 /* sysfs updates are serialised by the mutex of the group owning @domain */
 int iommu_dma_init_fq(struct iommu_domain *domain)
 {
@@ -730,7 +735,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -746,7 +751,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -764,7 +769,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -780,7 +785,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
 sg_dma_address(sg),
@@ -807,8 +812,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | size)) {
+   if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size;
 
@@ -991,7 +995,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1063,7 +1067,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.33.0.259.gc128427fd7-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 4/7] dma-iommu: fold _swiotlb helpers into callers

2021-08-29 Thread David Stevens
From: David Stevens 

Fold the _swiotlb helper functions into the respective _page functions,
since recent fixes have moved all logic from the _page functions to the
_swiotlb functions.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 135 +-
 1 file changed, 59 insertions(+), 76 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index abc528ed653c..95bfa57be488 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -510,26 +510,6 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, _gather);
 }
 
-static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   phys_addr_t phys;
-
-   phys = iommu_iova_to_phys(domain, dma_addr);
-   if (WARN_ON(!phys))
-   return;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
-   arch_sync_dma_for_cpu(phys, size, dir);
-
-   __iommu_dma_unmap(dev, dma_addr, size);
-
-   if (unlikely(is_swiotlb_buffer(phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -556,55 +536,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
-static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
-   size_t org_size, dma_addr_t dma_mask, bool coherent,
-   enum dma_data_direction dir, unsigned long attrs)
-{
-   int prot = dma_info_to_prot(dir, coherent, attrs);
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   struct iommu_dma_cookie *cookie = domain->iova_cookie;
-   struct iova_domain *iovad = >iovad;
-   size_t aligned_size = org_size;
-   void *padding_start;
-   size_t padding_size;
-   dma_addr_t iova;
-
-   /*
-* If both the physical buffer start address and size are
-* page aligned, we don't need to use a bounce page.
-*/
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
-   aligned_size = iova_align(iovad, org_size);
-   phys = swiotlb_tbl_map_single(dev, phys, org_size,
- aligned_size, dir, attrs);
-
-   if (phys == DMA_MAPPING_ERROR)
-   return DMA_MAPPING_ERROR;
-
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_TO_DEVICE ||
-dir == DMA_BIDIRECTIONAL)) {
-   padding_start += org_size;
-   padding_size -= org_size;
-   }
-
-   memset(padding_start, 0, padding_size);
-   }
-
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   arch_sync_dma_for_device(phys, org_size, dir);
-
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
-   swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
-   return iova;
-}
-
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -865,15 +796,68 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = size;
+   dma_addr_t iova, dma_mask = dma_get_mask(dev);
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | size)) {
+   void *padding_start;
+   size_t padding_size;
+
+   aligned_size = iova_align(iovad, size);
+   phys = swiotlb_tbl_map_single(dev, phys, size,
+ aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+ 

[PATCH v7 3/7] dma-iommu: skip extra sync during unmap w/swiotlb

2021-08-29 Thread David Stevens
From: David Stevens 

Calling the iommu_dma_sync_*_for_cpu functions during unmap can cause
two copies out of the swiotlb buffer. Do the arch sync directly in
__iommu_dma_unmap_swiotlb instead to avoid this. This makes the call to
iommu_dma_sync_sg_for_cpu for untrusted devices in iommu_dma_unmap_sg no
longer necessary, so move that invocation later in the function.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 12197fdc3b1c..abc528ed653c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -521,6 +521,9 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
if (WARN_ON(!phys))
return;
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
__iommu_dma_unmap(dev, dma_addr, size);
 
if (unlikely(is_swiotlb_buffer(phys)))
@@ -870,8 +873,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
__iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
 }
 
@@ -1079,14 +1080,14 @@ static void iommu_dma_unmap_sg(struct device *dev, 
struct scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+
/*
 * The scatterlist segments are mapped into a single
 * contiguous IOVA allocation, so this is incredibly easy.
-- 
2.33.0.259.gc128427fd7-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 2/7] dma-iommu: fix arch_sync_dma for map

2021-08-29 Thread David Stevens
From: David Stevens 

When calling arch_sync_dma, we need to pass it the memory that's
actually being used for dma. When using swiotlb bounce buffers, this is
the bounce buffer. Move arch_sync_dma into the __iommu_dma_map_swiotlb
helper, so it can use the bounce buffer address if necessary.

Now that iommu_dma_map_sg delegates to a function which takes care of
architectural syncing in the untrusted device case, the call to
iommu_dma_sync_sg_for_device can be moved so it only occurs for trusted
devices. Doing the sync for untrusted devices before mapping never
really worked, since it needs to be able to target swiotlb buffers.

This also moves the architectural sync to before the call to
__iommu_dma_map, to guarantee that untrusted devices can't see stale
data they shouldn't see.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index d6ae87212768..12197fdc3b1c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -593,6 +593,9 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
memset(padding_start, 0, padding_size);
}
 
+   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   arch_sync_dma_for_device(phys, org_size, dir);
+
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
@@ -859,14 +862,9 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
-   dma_addr_t dma_handle;
 
-   dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
+   return __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
coherent, dir, attrs);
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   dma_handle != DMA_MAPPING_ERROR)
-   arch_sync_dma_for_device(phys, size, dir);
-   return dma_handle;
 }
 
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
@@ -1009,12 +1007,12 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
/*
 * Work out how much IOVA space we need, and align the segments to
 * IOVA granules for the IOMMU driver to handle. With some clever
-- 
2.33.0.259.gc128427fd7-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 1/7] dma-iommu: fix sync_sg with swiotlb

2021-08-29 Thread David Stevens
From: David Stevens 

The is_swiotlb_buffer function takes the physical address of the swiotlb
buffer, not the physical address of the original buffer. The sglist
contains the physical addresses of the original buffer, so for the
sync_sg functions to work properly when a bounce buffer might have been
used, we need to use iommu_iova_to_phys to look up the physical address.
This is what sync_single does, so call that function on each sglist
segment.

The previous code mostly worked because swiotlb does the transfer on map
and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
sglists or which call sync_sg would not have had anything copied to the
bounce buffer.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index bac7370ead3e..d6ae87212768 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -827,17 +827,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-   sg->length, dir);
-   }
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -847,17 +843,14 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_device(dev, sg_phys(sg),
-  sg->length, dir);
-
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_device(dev,
+sg_dma_address(sg),
+sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-   }
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-- 
2.33.0.259.gc128427fd7-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 0/7] Fixes for dma-iommu swiotlb bounce buffers

2021-08-29 Thread David Stevens
This patch set includes various fixes for dma-iommu's swiotlb bounce
buffers for untrusted devices.

The min_align_mask issue was found when running fio on an untrusted nvme
device with bs=512. The other issues were found via code inspection, so
I don't have any specific use cases where things were not working, nor
any concrete performance numbers.

There are two issues related to min_align_mask that this patch series
does not attempt to fix. First, it does not address the case where
min_align_mask is larger than the IOVA granule. Doing so requires
changes to IOVA allocation, and is not specific to when swiotlb bounce
buffers are used. This is not a problem in practice today, since the
only driver which uses min_align_mask is nvme, which sets it to 4096.

The second issue this series does not address is the fact that extra
swiotlb slots adjacent to a bounce buffer can be exposed to untrusted
devices whose drivers use min_align_mask. Fixing this requires being
able to allocate padding slots at the beginning of a swiotlb allocation.
This is a rather significant change that I am not comfortable making.
Without being able to handle this, there is also little point to
clearing the padding at the start of such a buffer, since we can only
clear based on (IO_TLB_SIZE - 1) instead of iova_mask.

v6 -> v7:
 - Remove unsafe attempt to clear padding at start of swiotlb buffer
 - Rewrite commit message for min_align_mask commit to better explain
   the problem it's fixing
 - Rebase on iommu/core
 - Acknowledge unsolved issues in cover letter

v5 -> v6:
 - Remove unnecessary line break
 - Remove redundant config check

v4 -> v5:
 - Fix xen build error
 - Move _swiotlb refactor into its own patch

v3 -> v4:
 - Fold _swiotlb functions into _page functions
 - Add patch to align swiotlb buffer to iovad granule
 - Combine if checks in iommu_dma_sync_sg_* functions

v2 -> v3:
 - Add new patch to address min_align_mask bug
 - Set SKIP_CPU_SYNC flag after syncing in map/unmap
 - Properly call arch_sync_dma_for_cpu in iommu_dma_sync_sg_for_cpu

v1 -> v2:
 - Split fixes into dedicated patches
 - Less invasive changes to fix arch_sync when mapping
 - Leave dev_is_untrusted check for strict iommu

David Stevens (7):
  dma-iommu: fix sync_sg with swiotlb
  dma-iommu: fix arch_sync_dma for map
  dma-iommu: skip extra sync during unmap w/swiotlb
  dma-iommu: fold _swiotlb helpers into callers
  dma-iommu: Check CONFIG_SWIOTLB more broadly
  swiotlb: support aligned swiotlb buffers
  dma-iommu: account for min_align_mask w/swiotlb

 drivers/iommu/dma-iommu.c | 188 +-
 drivers/xen/swiotlb-xen.c |   2 +-
 include/linux/swiotlb.h   |   3 +-
 kernel/dma/swiotlb.c  |  11 ++-
 4 files changed, 93 insertions(+), 111 deletions(-)

-- 
2.33.0.259.gc128427fd7-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 7/7] dma-iommu: account for min_align_mask

2021-08-19 Thread David Stevens
  On Thu, Aug 19, 2021 at 6:03 PM Robin Murphy  wrote:
>
> On 2021-08-17 02:38, David Stevens wrote:
> > From: David Stevens 
> >
> > For devices which set min_align_mask, swiotlb preserves the offset of
> > the original physical address within that mask. Since __iommu_dma_map
> > accounts for non-aligned addresses, passing a non-aligned swiotlb
> > address with the swiotlb aligned size results in the offset being
> > accounted for twice in the size passed to iommu_map_atomic. The extra
> > page exposed to DMA is also not cleaned up by __iommu_dma_unmap, since
> > that function unmaps with the correct size. This causes mapping failures
> > if the iova gets reused, due to collisions in the iommu page tables.
> >
> > To fix this, pass the original size to __iommu_dma_map, since that
> > function already handles alignment.
> >
> > Additionally, when swiotlb returns non-aligned addresses, there is
> > padding at the start of the bounce buffer that needs to be cleared.
> >
> > Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
> > Signed-off-by: David Stevens 
> > ---
> >   drivers/iommu/dma-iommu.c | 24 +---
> >   1 file changed, 13 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index 6738420fc081..f2fb360c2907 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -788,7 +788,6 @@ static dma_addr_t iommu_dma_map_page(struct device 
> > *dev, struct page *page,
> >   struct iommu_domain *domain = iommu_get_dma_domain(dev);
> >   struct iommu_dma_cookie *cookie = domain->iova_cookie;
> >   struct iova_domain *iovad = >iovad;
> > - size_t aligned_size = size;
> >   dma_addr_t iova, dma_mask = dma_get_mask(dev);
> >
> >   /*
> > @@ -796,8 +795,8 @@ static dma_addr_t iommu_dma_map_page(struct device 
> > *dev, struct page *page,
> >* page aligned, we don't need to use a bounce page.
> >*/
> >   if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
> > - void *padding_start;
> > - size_t padding_size;
> > + void *tlb_start;
> > + size_t aligned_size, iova_off, mapping_end_off;
> >
> >   aligned_size = iova_align(iovad, size);
> >   phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
> > @@ -806,23 +805,26 @@ static dma_addr_t iommu_dma_map_page(struct device 
> > *dev, struct page *page,
> >   if (phys == DMA_MAPPING_ERROR)
> >   return DMA_MAPPING_ERROR;
> >
> > - /* Cleanup the padding area. */
> > - padding_start = phys_to_virt(phys);
> > - padding_size = aligned_size;
> > + iova_off = iova_offset(iovad, phys);
> > + tlb_start = phys_to_virt(phys - iova_off);
> >
> >   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
> >   (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
> > - padding_start += size;
> > - padding_size -= size;
> > + /* Cleanup the padding area. */
> > + mapping_end_off = iova_off + size;
> > + memset(tlb_start, 0, iova_off);
> > + memset(tlb_start + mapping_end_off, 0,
> > +aligned_size - mapping_end_off);
> > + } else {
> > + /* Nothing was sync'ed, so clear the whole buffer. */
> > + memset(tlb_start, 0, aligned_size);
> >   }
> > -
> > - memset(padding_start, 0, padding_size);
> >   }
> >
> >   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> >   arch_sync_dma_for_device(phys, size, dir);
> >
> > - iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
> > + iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
>
> I still don't see how this preserves min_align_mask if it is larger than
> the IOVA granule

That's a slightly different issue, and not addressed in this series. I
guess the commit message should be 'dma-iommu: account for
min_align_mask w/swiotlb'. At least from my understanding of
min_align_mask, getting min_align_mask larger than the IOVA granule to
work would require changes to IOVA allocation, not anything to do
directly with swiotlb bounce buffers. Also, probably changes to
scatterlist coalescing. That being

[PATCH v6 7/7] dma-iommu: account for min_align_mask

2021-08-16 Thread David Stevens
From: David Stevens 

For devices which set min_align_mask, swiotlb preserves the offset of
the original physical address within that mask. Since __iommu_dma_map
accounts for non-aligned addresses, passing a non-aligned swiotlb
address with the swiotlb aligned size results in the offset being
accounted for twice in the size passed to iommu_map_atomic. The extra
page exposed to DMA is also not cleaned up by __iommu_dma_unmap, since
that function unmaps with the correct size. This causes mapping failures
if the iova gets reused, due to collisions in the iommu page tables.

To fix this, pass the original size to __iommu_dma_map, since that
function already handles alignment.

Additionally, when swiotlb returns non-aligned addresses, there is
padding at the start of the bounce buffer that needs to be cleared.

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 6738420fc081..f2fb360c2907 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -788,7 +788,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = >iovad;
-   size_t aligned_size = size;
dma_addr_t iova, dma_mask = dma_get_mask(dev);
 
/*
@@ -796,8 +795,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 * page aligned, we don't need to use a bounce page.
 */
if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
-   void *padding_start;
-   size_t padding_size;
+   void *tlb_start;
+   size_t aligned_size, iova_off, mapping_end_off;
 
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
@@ -806,23 +805,26 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
+   iova_off = iova_offset(iovad, phys);
+   tlb_start = phys_to_virt(phys - iova_off);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
-   padding_start += size;
-   padding_size -= size;
+   /* Cleanup the padding area. */
+   mapping_end_off = iova_off + size;
+   memset(tlb_start, 0, iova_off);
+   memset(tlb_start + mapping_end_off, 0,
+  aligned_size - mapping_end_off);
+   } else {
+   /* Nothing was sync'ed, so clear the whole buffer. */
+   memset(tlb_start, 0, aligned_size);
}
-
-   memset(padding_start, 0, padding_size);
}
 
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, size, dir);
 
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 6/7] swiotlb: support aligned swiotlb buffers

2021-08-16 Thread David Stevens
From: David Stevens 

Add an argument to swiotlb_tbl_map_single that specifies the desired
alignment of the allocated buffer. This is used by dma-iommu to ensure
the buffer is aligned to the iova granule size when using swiotlb with
untrusted sub-granule mappings. This addresses an issue where adjacent
slots could be exposed to the untrusted device if IO_TLB_SIZE < iova
granule < PAGE_SIZE.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c |  4 ++--
 drivers/xen/swiotlb-xen.c |  2 +-
 include/linux/swiotlb.h   |  3 ++-
 kernel/dma/swiotlb.c  | 11 +++
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 49a0d4de5f6c..6738420fc081 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -800,8 +800,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
size_t padding_size;
 
aligned_size = iova_align(iovad, size);
-   phys = swiotlb_tbl_map_single(dev, phys, size,
- aligned_size, dir, attrs);
+   phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
+ iova_mask(iovad), dir, attrs);
 
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 24d11861ac7d..8b03d2c93428 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -382,7 +382,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
 */
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
-   map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
+   map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..93d82e43eb3a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -44,7 +44,8 @@ extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs);
+   unsigned int alloc_aligned_mask, enum dma_data_direction dir,
+   unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 phys_addr_t tlb_addr,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e50df8d8f87e..d4c45d8cd1fa 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -427,7 +427,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
unsigned int index)
  * allocate a buffer from that IO TLB pool.
  */
 static int find_slots(struct device *dev, phys_addr_t orig_addr,
-   size_t alloc_size)
+   size_t alloc_size, unsigned int alloc_align_mask)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
@@ -450,6 +450,7 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+   stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
spin_lock_irqsave(>lock, flags);
if (unlikely(nslots > mem->nslabs - mem->used))
@@ -504,7 +505,8 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs)
+   unsigned int alloc_align_mask, enum dma_data_direction dir,
+   unsigned long attrs)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
@@ -524,7 +526,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
 
-   index = find_slots(dev, orig_addr, alloc_size + offset);
+   index = find_slots(dev, orig_addr,
+  alloc_size + offset, alloc_align_mask);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
@@ -636,7 +639,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t 
paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
  swiotlb_force);
 
-   swiotlb_addr = swiotlb_tbl_map_single(dev, p

[PATCH v6 5/7] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-08-16 Thread David Stevens
From: David Stevens 

Introduce a new dev_use_swiotlb function to guard swiotlb code, instead
of overloading dev_is_untrusted. This allows CONFIG_SWIOTLB to be
checked more broadly, so the swiotlb related code can be removed more
aggressively.

Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8152efada8b2..49a0d4de5f6c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -317,6 +317,11 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+static bool dev_use_swiotlb(struct device *dev)
+{
+   return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -713,7 +718,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -729,7 +734,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -747,7 +752,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -763,7 +768,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
 sg_dma_address(sg),
@@ -790,8 +795,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | size)) {
+   if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size;
 
@@ -974,7 +978,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1046,7 +1050,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 4/7] dma-iommu: fold _swiotlb helpers into callers

2021-08-16 Thread David Stevens
From: David Stevens 

Fold the _swiotlb helper functions into the respective _page functions,
since recent fixes have moved all logic from the _page functions to the
_swiotlb functions.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 135 +-
 1 file changed, 59 insertions(+), 76 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5dd2c517dbf5..8152efada8b2 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -493,26 +493,6 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
-static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   phys_addr_t phys;
-
-   phys = iommu_iova_to_phys(domain, dma_addr);
-   if (WARN_ON(!phys))
-   return;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
-   arch_sync_dma_for_cpu(phys, size, dir);
-
-   __iommu_dma_unmap(dev, dma_addr, size);
-
-   if (unlikely(is_swiotlb_buffer(phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -539,55 +519,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
-static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
-   size_t org_size, dma_addr_t dma_mask, bool coherent,
-   enum dma_data_direction dir, unsigned long attrs)
-{
-   int prot = dma_info_to_prot(dir, coherent, attrs);
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   struct iommu_dma_cookie *cookie = domain->iova_cookie;
-   struct iova_domain *iovad = >iovad;
-   size_t aligned_size = org_size;
-   void *padding_start;
-   size_t padding_size;
-   dma_addr_t iova;
-
-   /*
-* If both the physical buffer start address and size are
-* page aligned, we don't need to use a bounce page.
-*/
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
-   aligned_size = iova_align(iovad, org_size);
-   phys = swiotlb_tbl_map_single(dev, phys, org_size,
- aligned_size, dir, attrs);
-
-   if (phys == DMA_MAPPING_ERROR)
-   return DMA_MAPPING_ERROR;
-
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_TO_DEVICE ||
-dir == DMA_BIDIRECTIONAL)) {
-   padding_start += org_size;
-   padding_size -= org_size;
-   }
-
-   memset(padding_start, 0, padding_size);
-   }
-
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   arch_sync_dma_for_device(phys, org_size, dir);
-
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
-   swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
-   return iova;
-}
-
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -848,15 +779,68 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = size;
+   dma_addr_t iova, dma_mask = dma_get_mask(dev);
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | size)) {
+   void *padding_start;
+   size_t padding_size;
+
+   aligned_size = iova_align(iovad, size);
+   phys = swiotlb_tbl_map_single(dev, phys, size,
+ aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+   return DMA_M

[PATCH v6 1/7] dma-iommu: fix sync_sg with swiotlb

2021-08-16 Thread David Stevens
From: David Stevens 

The is_swiotlb_buffer function takes the physical address of the swiotlb
buffer, not the physical address of the original buffer. The sglist
contains the physical addresses of the original buffer, so for the
sync_sg functions to work properly when a bounce buffer might have been
used, we need to use iommu_iova_to_phys to look up the physical address.
This is what sync_single does, so call that function on each sglist
segment.

The previous code mostly worked because swiotlb does the transfer on map
and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
sglists or which call sync_sg would not have had anything copied to the
bounce buffer.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98ba927aee1a..968e0150c95e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -810,17 +810,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-   sg->length, dir);
-   }
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -830,17 +826,14 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_device(dev, sg_phys(sg),
-  sg->length, dir);
-
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_device(dev,
+sg_dma_address(sg),
+sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-   }
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 3/7] dma-iommu: skip extra sync during unmap w/swiotlb

2021-08-16 Thread David Stevens
From: David Stevens 

Calling the iommu_dma_sync_*_for_cpu functions during unmap can cause
two copies out of the swiotlb buffer. Do the arch sync directly in
__iommu_dma_unmap_swiotlb instead to avoid this. This makes the call to
iommu_dma_sync_sg_for_cpu for untrusted devices in iommu_dma_unmap_sg no
longer necessary, so move that invocation later in the function.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8098ce593640..5dd2c517dbf5 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -504,6 +504,9 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
if (WARN_ON(!phys))
return;
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
__iommu_dma_unmap(dev, dma_addr, size);
 
if (unlikely(is_swiotlb_buffer(phys)))
@@ -853,8 +856,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
__iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
 }
 
@@ -1062,14 +1063,14 @@ static void iommu_dma_unmap_sg(struct device *dev, 
struct scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+
/*
 * The scatterlist segments are mapped into a single
 * contiguous IOVA allocation, so this is incredibly easy.
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 2/7] dma-iommu: fix arch_sync_dma for map

2021-08-16 Thread David Stevens
From: David Stevens 

When calling arch_sync_dma, we need to pass it the memory that's
actually being used for dma. When using swiotlb bounce buffers, this is
the bounce buffer. Move arch_sync_dma into the __iommu_dma_map_swiotlb
helper, so it can use the bounce buffer address if necessary.

Now that iommu_dma_map_sg delegates to a function which takes care of
architectural syncing in the untrusted device case, the call to
iommu_dma_sync_sg_for_device can be moved so it only occurs for trusted
devices. Doing the sync for untrusted devices before mapping never
really worked, since it needs to be able to target swiotlb buffers.

This also moves the architectural sync to before the call to
__iommu_dma_map, to guarantee that untrusted devices can't see stale
data they shouldn't see.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 968e0150c95e..8098ce593640 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -576,6 +576,9 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
memset(padding_start, 0, padding_size);
}
 
+   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   arch_sync_dma_for_device(phys, org_size, dir);
+
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
@@ -842,14 +845,9 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
-   dma_addr_t dma_handle;
 
-   dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
+   return __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
coherent, dir, attrs);
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   dma_handle != DMA_MAPPING_ERROR)
-   arch_sync_dma_for_device(phys, size, dir);
-   return dma_handle;
 }
 
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
@@ -992,12 +990,12 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
/*
 * Work out how much IOVA space we need, and align the segments to
 * IOVA granules for the IOMMU driver to handle. With some clever
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 0/7] Fixes for dma-iommu swiotlb bounce buffers

2021-08-16 Thread David Stevens
From: David Stevens 

This patch set includes various fixes for dma-iommu's swiotlb bounce
buffers for untrusted devices. There are four fixes for correctness
issues, one for a performance issue, and one for general cleanup.

The min_align_mask issue was found when running fio on an untrusted nvme
device with bs=512. The other issues were found via code inspection, so
I don't have any specific use cases where things were not working, nor
any concrete performance numbers.

v5 -> v6:
 - Remove unnecessary line break
 - Remove redundant config check

v4 -> v5:
 - Fix xen build error
 - Move _swiotlb refactor into its own patch

v3 -> v4:
 - Fold _swiotlb functions into _page functions
 - Add patch to align swiotlb buffer to iovad granule
 - Combine if checks in iommu_dma_sync_sg_* functions

v2 -> v3:
 - Add new patch to address min_align_mask bug
 - Set SKIP_CPU_SYNC flag after syncing in map/unmap
 - Properly call arch_sync_dma_for_cpu in iommu_dma_sync_sg_for_cpu

v1 -> v2:
 - Split fixes into dedicated patches
 - Less invasive changes to fix arch_sync when mapping
 - Leave dev_is_untrusted check for strict iommu

David Stevens (7):
  dma-iommu: fix sync_sg with swiotlb
  dma-iommu: fix arch_sync_dma for map
  dma-iommu: skip extra sync during unmap w/swiotlb
  dma-iommu: fold _swiotlb helpers into callers
  dma-iommu: Check CONFIG_SWIOTLB more broadly
  swiotlb: support aligned swiotlb buffers
  dma-iommu: account for min_align_mask

 drivers/iommu/dma-iommu.c | 191 +-
 drivers/xen/swiotlb-xen.c |   2 +-
 include/linux/swiotlb.h   |   3 +-
 kernel/dma/swiotlb.c  |  11 ++-
 4 files changed, 96 insertions(+), 111 deletions(-)

-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 7/7] dma-iommu: account for min_align_mask

2021-08-15 Thread David Stevens
From: David Stevens 

For devices which set min_align_mask, swiotlb preserves the offset of
the original physical address within that mask. Since __iommu_dma_map
accounts for non-aligned addresses, passing a non-aligned swiotlb
address with the swiotlb aligned size results in the offset being
accounted for twice in the size passed to iommu_map_atomic. The extra
page exposed to DMA is also not cleaned up by __iommu_dma_unmap, since
that function unmaps with the correct size. This causes mapping failures
if the iova gets reused, due to collisions in the iommu page tables.

To fix this, pass the original size to __iommu_dma_map, since that
function already handles alignment.

Additionally, when swiotlb returns non-aligned addresses, there is
padding at the start of the bounce buffer that needs to be cleared.

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index b1b0327cc2f6..b44b367839c2 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -788,7 +788,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = >iovad;
-   size_t aligned_size = size;
dma_addr_t iova, dma_mask = dma_get_mask(dev);
 
/*
@@ -797,8 +796,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 */
if (IS_ENABLED(CONFIG_SWIOTLB) && dev_use_swiotlb(dev) &&
iova_offset(iovad, phys | size)) {
-   void *padding_start;
-   size_t padding_size;
+   void *tlb_start;
+   size_t aligned_size, iova_off, mapping_end_off;
 
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
@@ -807,24 +806,27 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
+   iova_off = iova_offset(iovad, phys);
+   tlb_start = phys_to_virt(phys - iova_off);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE ||
 dir == DMA_BIDIRECTIONAL)) {
-   padding_start += size;
-   padding_size -= size;
+   /* Cleanup the padding area. */
+   mapping_end_off = iova_off + size;
+   memset(tlb_start, 0, iova_off);
+   memset(tlb_start + mapping_end_off, 0,
+  aligned_size - mapping_end_off);
+   } else {
+   /* Nothing was sync'ed, so clear the whole buffer. */
+   memset(tlb_start, 0, aligned_size);
}
-
-   memset(padding_start, 0, padding_size);
}
 
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, size, dir);
 
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 6/7] swiotlb: support aligned swiotlb buffers

2021-08-15 Thread David Stevens
From: David Stevens 

Add an argument to swiotlb_tbl_map_single that specifies the desired
alignment of the allocated buffer. This is used by dma-iommu to ensure
the buffer is aligned to the iova granule size when using swiotlb with
untrusted sub-granule mappings. This addresses an issue where adjacent
slots could be exposed to the untrusted device if IO_TLB_SIZE < iova
granule < PAGE_SIZE.

Signed-off-by: David Stevens 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c |  4 ++--
 drivers/xen/swiotlb-xen.c |  2 +-
 include/linux/swiotlb.h   |  3 ++-
 kernel/dma/swiotlb.c  | 11 +++
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index bad813d63ea6..b1b0327cc2f6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -801,8 +801,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
size_t padding_size;
 
aligned_size = iova_align(iovad, size);
-   phys = swiotlb_tbl_map_single(dev, phys, size,
- aligned_size, dir, attrs);
+   phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
+ iova_mask(iovad), dir, attrs);
 
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 24d11861ac7d..8b03d2c93428 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -382,7 +382,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
 */
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
-   map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
+   map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..93d82e43eb3a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -44,7 +44,8 @@ extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs);
+   unsigned int alloc_aligned_mask, enum dma_data_direction dir,
+   unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 phys_addr_t tlb_addr,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e50df8d8f87e..d4c45d8cd1fa 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -427,7 +427,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
unsigned int index)
  * allocate a buffer from that IO TLB pool.
  */
 static int find_slots(struct device *dev, phys_addr_t orig_addr,
-   size_t alloc_size)
+   size_t alloc_size, unsigned int alloc_align_mask)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
@@ -450,6 +450,7 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+   stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
spin_lock_irqsave(>lock, flags);
if (unlikely(nslots > mem->nslabs - mem->used))
@@ -504,7 +505,8 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs)
+   unsigned int alloc_align_mask, enum dma_data_direction dir,
+   unsigned long attrs)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
@@ -524,7 +526,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
 
-   index = find_slots(dev, orig_addr, alloc_size + offset);
+   index = find_slots(dev, orig_addr,
+  alloc_size + offset, alloc_align_mask);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
@@ -636,7 +639,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t 
paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
  swiotlb_force);
 
-   swiotlb_addr = swiotlb_tbl_map_single(dev, p

[PATCH v5 5/7] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-08-15 Thread David Stevens
From: David Stevens 

Introduce a new dev_use_swiotlb function to guard swiotlb code, instead
of overloading dev_is_untrusted. This allows CONFIG_SWIOTLB to be
checked more broadly, so the swiotlb related code can be removed more
aggressively.

Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f7da4934f7e6..bad813d63ea6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -317,6 +317,11 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+static bool dev_use_swiotlb(struct device *dev)
+{
+   return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -713,7 +718,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -729,7 +734,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -747,7 +752,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -763,7 +768,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
 sg_dma_address(sg),
@@ -790,7 +795,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_use_swiotlb(dev) &&
iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size;
@@ -975,7 +980,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1047,7 +1052,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 4/7] dma-iommu: fold _swiotlb helpers into callers

2021-08-15 Thread David Stevens
From: David Stevens 

Fold the _swiotlb helper functions into the respective _page functions,
since recent fixes have moved all logic from the _page functions to the
_swiotlb functions.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 136 +-
 1 file changed, 60 insertions(+), 76 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5dd2c517dbf5..f7da4934f7e6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -493,26 +493,6 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
-static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   phys_addr_t phys;
-
-   phys = iommu_iova_to_phys(domain, dma_addr);
-   if (WARN_ON(!phys))
-   return;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
-   arch_sync_dma_for_cpu(phys, size, dir);
-
-   __iommu_dma_unmap(dev, dma_addr, size);
-
-   if (unlikely(is_swiotlb_buffer(phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -539,55 +519,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
-static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
-   size_t org_size, dma_addr_t dma_mask, bool coherent,
-   enum dma_data_direction dir, unsigned long attrs)
-{
-   int prot = dma_info_to_prot(dir, coherent, attrs);
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   struct iommu_dma_cookie *cookie = domain->iova_cookie;
-   struct iova_domain *iovad = >iovad;
-   size_t aligned_size = org_size;
-   void *padding_start;
-   size_t padding_size;
-   dma_addr_t iova;
-
-   /*
-* If both the physical buffer start address and size are
-* page aligned, we don't need to use a bounce page.
-*/
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
-   aligned_size = iova_align(iovad, org_size);
-   phys = swiotlb_tbl_map_single(dev, phys, org_size,
- aligned_size, dir, attrs);
-
-   if (phys == DMA_MAPPING_ERROR)
-   return DMA_MAPPING_ERROR;
-
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_TO_DEVICE ||
-dir == DMA_BIDIRECTIONAL)) {
-   padding_start += org_size;
-   padding_size -= org_size;
-   }
-
-   memset(padding_start, 0, padding_size);
-   }
-
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   arch_sync_dma_for_device(phys, org_size, dir);
-
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
-   swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
-   return iova;
-}
-
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -848,15 +779,69 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = size;
+   dma_addr_t iova, dma_mask = dma_get_mask(dev);
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | size)) {
+   void *padding_start;
+   size_t padding_size;
 
-   return __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
-   coherent, dir, attrs);
+   aligned_size = iova_align(iovad, size);
+   phys = swiotlb_tbl_map_single(dev, phys, size,
+ aligned_size, dir,

[PATCH v5 3/7] dma-iommu: skip extra sync during unmap w/swiotlb

2021-08-15 Thread David Stevens
From: David Stevens 

Calling the iommu_dma_sync_*_for_cpu functions during unmap can cause
two copies out of the swiotlb buffer. Do the arch sync directly in
__iommu_dma_unmap_swiotlb instead to avoid this. This makes the call to
iommu_dma_sync_sg_for_cpu for untrusted devices in iommu_dma_unmap_sg no
longer necessary, so move that invocation later in the function.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 8098ce593640..5dd2c517dbf5 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -504,6 +504,9 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
if (WARN_ON(!phys))
return;
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
__iommu_dma_unmap(dev, dma_addr, size);
 
if (unlikely(is_swiotlb_buffer(phys)))
@@ -853,8 +856,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
__iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
 }
 
@@ -1062,14 +1063,14 @@ static void iommu_dma_unmap_sg(struct device *dev, 
struct scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+
/*
 * The scatterlist segments are mapped into a single
 * contiguous IOVA allocation, so this is incredibly easy.
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 2/7] dma-iommu: fix arch_sync_dma for map

2021-08-15 Thread David Stevens
From: David Stevens 

When calling arch_sync_dma, we need to pass it the memory that's
actually being used for dma. When using swiotlb bounce buffers, this is
the bounce buffer. Move arch_sync_dma into the __iommu_dma_map_swiotlb
helper, so it can use the bounce buffer address if necessary.

Now that iommu_dma_map_sg delegates to a function which takes care of
architectural syncing in the untrusted device case, the call to
iommu_dma_sync_sg_for_device can be moved so it only occurs for trusted
devices. Doing the sync for untrusted devices before mapping never
really worked, since it needs to be able to target swiotlb buffers.

This also moves the architectural sync to before the call to
__iommu_dma_map, to guarantee that untrusted devices can't see stale
data they shouldn't see.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 968e0150c95e..8098ce593640 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -576,6 +576,9 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
memset(padding_start, 0, padding_size);
}
 
+   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   arch_sync_dma_for_device(phys, org_size, dir);
+
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
@@ -842,14 +845,9 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
-   dma_addr_t dma_handle;
 
-   dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
+   return __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
coherent, dir, attrs);
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   dma_handle != DMA_MAPPING_ERROR)
-   arch_sync_dma_for_device(phys, size, dir);
-   return dma_handle;
 }
 
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
@@ -992,12 +990,12 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
/*
 * Work out how much IOVA space we need, and align the segments to
 * IOVA granules for the IOMMU driver to handle. With some clever
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 1/7] dma-iommu: fix sync_sg with swiotlb

2021-08-15 Thread David Stevens
From: David Stevens 

The is_swiotlb_buffer function takes the physical address of the swiotlb
buffer, not the physical address of the original buffer. The sglist
contains the physical addresses of the original buffer, so for the
sync_sg functions to work properly when a bounce buffer might have been
used, we need to use iommu_iova_to_phys to look up the physical address.
This is what sync_single does, so call that function on each sglist
segment.

The previous code mostly worked because swiotlb does the transfer on map
and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
sglists or which call sync_sg would not have had anything copied to the
bounce buffer.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
Reviewed-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98ba927aee1a..968e0150c95e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -810,17 +810,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-   sg->length, dir);
-   }
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -830,17 +826,14 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_device(dev, sg_phys(sg),
-  sg->length, dir);
-
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_device(dev,
+sg_dma_address(sg),
+sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-   }
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 0/7] Fixes for dma-iommu swiotlb bounce buffers

2021-08-15 Thread David Stevens
From: David Stevens 

This patch set includes various fixes for dma-iommu's swiotlb bounce
buffers for untrusted devices. There are four fixes for correctness
issues, one for a performance issue, and one for general cleanup.

The min_align_mask issue was found when running fio on an untrusted nvme
device with bs=512. The other issues were found via code inspection, so
I don't have any specific use cases where things were not working, nor
any concrete performance numbers.

v4 -> v5:
 - Fix xen build error
 - Move _swiotlb refactor into its own patch

v3 -> v4:
 - Fold _swiotlb functions into _page functions
 - Add patch to align swiotlb buffer to iovad granule
 - Combine if checks in iommu_dma_sync_sg_* functions

v2 -> v3:
 - Add new patch to address min_align_mask bug
 - Set SKIP_CPU_SYNC flag after syncing in map/unmap
 - Properly call arch_sync_dma_for_cpu in iommu_dma_sync_sg_for_cpu

v1 -> v2:
 - Split fixes into dedicated patches
 - Less invasive changes to fix arch_sync when mapping
 - Leave dev_is_untrusted check for strict iommu

David Stevens (7):
  dma-iommu: fix sync_sg with swiotlb
  dma-iommu: fix arch_sync_dma for map
  dma-iommu: skip extra sync during unmap w/swiotlb
  dma-iommu: fold _swiotlb helpers into callers
  dma-iommu: Check CONFIG_SWIOTLB more broadly
  swiotlb: support aligned swiotlb buffers
  dma-iommu: account for min_align_mask

 drivers/iommu/dma-iommu.c | 193 +-
 drivers/xen/swiotlb-xen.c |   2 +-
 include/linux/swiotlb.h   |   3 +-
 kernel/dma/swiotlb.c  |  11 ++-
 4 files changed, 98 insertions(+), 111 deletions(-)

-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 6/6] dma-iommu: account for min_align_mask

2021-08-13 Thread David Stevens
From: David Stevens 

For devices which set min_align_mask, swiotlb preserves the offset of
the original physical address within that mask. Since __iommu_dma_map
accounts for non-aligned addresses, passing a non-aligned swiotlb
address with the swiotlb aligned size results in the offset being
accounted for twice in the size passed to iommu_map_atomic. The extra
page exposed to DMA is also not cleaned up by __iommu_dma_unmap, since
that function unmaps with the correct size. This causes mapping failures
if the iova gets reused, due to collisions in the iommu page tables.

To fix this, pass the original size to __iommu_dma_map, since that
function already handles alignment.

Additionally, when swiotlb returns non-aligned addresses, there is
padding at the start of the bounce buffer that needs to be cleared.

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index b1b0327cc2f6..b44b367839c2 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -788,7 +788,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = >iovad;
-   size_t aligned_size = size;
dma_addr_t iova, dma_mask = dma_get_mask(dev);
 
/*
@@ -797,8 +796,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 */
if (IS_ENABLED(CONFIG_SWIOTLB) && dev_use_swiotlb(dev) &&
iova_offset(iovad, phys | size)) {
-   void *padding_start;
-   size_t padding_size;
+   void *tlb_start;
+   size_t aligned_size, iova_off, mapping_end_off;
 
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
@@ -807,24 +806,27 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
+   iova_off = iova_offset(iovad, phys);
+   tlb_start = phys_to_virt(phys - iova_off);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE ||
 dir == DMA_BIDIRECTIONAL)) {
-   padding_start += size;
-   padding_size -= size;
+   /* Cleanup the padding area. */
+   mapping_end_off = iova_off + size;
+   memset(tlb_start, 0, iova_off);
+   memset(tlb_start + mapping_end_off, 0,
+  aligned_size - mapping_end_off);
+   } else {
+   /* Nothing was sync'ed, so clear the whole buffer. */
+   memset(tlb_start, 0, aligned_size);
}
-
-   memset(padding_start, 0, padding_size);
}
 
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, size, dir);
 
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 5/6] swiotlb: support aligned swiotlb buffers

2021-08-13 Thread David Stevens
From: David Stevens 

Add an argument to swiotlb_tbl_map_single that specifies the desired
alignment of the allocated buffer. This is used by dma-iommu to ensure
the buffer is aligned to the iova granule size when using swiotlb with
untrusted sub-granule mappings. This addresses an issue where adjacent
slots could be exposed to the untrusted device if IO_TLB_SIZE < iova
granule < PAGE_SIZE.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c |  4 ++--
 include/linux/swiotlb.h   |  3 ++-
 kernel/dma/swiotlb.c  | 11 +++
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index bad813d63ea6..b1b0327cc2f6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -801,8 +801,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
size_t padding_size;
 
aligned_size = iova_align(iovad, size);
-   phys = swiotlb_tbl_map_single(dev, phys, size,
- aligned_size, dir, attrs);
+   phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
+ iova_mask(iovad), dir, attrs);
 
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..93d82e43eb3a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -44,7 +44,8 @@ extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs);
+   unsigned int alloc_aligned_mask, enum dma_data_direction dir,
+   unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 phys_addr_t tlb_addr,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e50df8d8f87e..d4c45d8cd1fa 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -427,7 +427,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, 
unsigned int index)
  * allocate a buffer from that IO TLB pool.
  */
 static int find_slots(struct device *dev, phys_addr_t orig_addr,
-   size_t alloc_size)
+   size_t alloc_size, unsigned int alloc_align_mask)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
@@ -450,6 +450,7 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+   stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
spin_lock_irqsave(>lock, flags);
if (unlikely(nslots > mem->nslabs - mem->used))
@@ -504,7 +505,8 @@ static int find_slots(struct device *dev, phys_addr_t 
orig_addr,
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
-   enum dma_data_direction dir, unsigned long attrs)
+   unsigned int alloc_align_mask, enum dma_data_direction dir,
+   unsigned long attrs)
 {
struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
@@ -524,7 +526,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
 
-   index = find_slots(dev, orig_addr, alloc_size + offset);
+   index = find_slots(dev, orig_addr,
+  alloc_size + offset, alloc_align_mask);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
@@ -636,7 +639,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t 
paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
  swiotlb_force);
 
-   swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir,
+   swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
attrs);
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 4/6] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-08-13 Thread David Stevens
From: David Stevens 

Introduce a new dev_use_swiotlb function to guard swiotlb code, instead
of overloading dev_is_untrusted. This allows CONFIG_SWIOTLB to be
checked more broadly, so the swiotlb related code can be removed more
aggressively.

Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f7da4934f7e6..bad813d63ea6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -317,6 +317,11 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+static bool dev_use_swiotlb(struct device *dev)
+{
+   return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -713,7 +718,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -729,7 +734,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -747,7 +752,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -763,7 +768,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
 sg_dma_address(sg),
@@ -790,7 +795,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_use_swiotlb(dev) &&
iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size;
@@ -975,7 +980,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1047,7 +1052,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 3/6] dma-iommu: skip extra sync during unmap w/swiotlb

2021-08-13 Thread David Stevens
From: David Stevens 

Calling the iommu_dma_sync_*_for_cpu functions during unmap can cause
two copies out of the swiotlb buffer. Fold __iommu_dma_unmap_swiotlb
into iommu_dma_unmap_page, and directly call arch_sync_dma_for_cpu
instead of iommu_dma_sync_single_for_cpu to avoid this double sync. With
this refactor, calling iommu_dma_sync_sg_for_cpu for untrusted devices
in iommu_dma_unmap_sg is also no longer necessary, so move that
invocation later in the function.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 42 +--
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index e9119ff93535..f7da4934f7e6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -493,23 +493,6 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
-static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   phys_addr_t phys;
-
-   phys = iommu_iova_to_phys(domain, dma_addr);
-   if (WARN_ON(!phys))
-   return;
-
-   __iommu_dma_unmap(dev, dma_addr, size);
-
-   if (unlikely(is_swiotlb_buffer(phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -845,9 +828,20 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
-   __iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   phys_addr_t phys;
+
+   phys = iommu_iova_to_phys(domain, dma_handle);
+   if (WARN_ON(!phys))
+   return;
+
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+   arch_sync_dma_for_cpu(phys, size, dir);
+
+   __iommu_dma_unmap(dev, dma_handle, size);
+
+   if (unlikely(is_swiotlb_buffer(phys)))
+   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 }
 
 /*
@@ -932,7 +926,7 @@ static void iommu_dma_unmap_sg_swiotlb(struct device *dev, 
struct scatterlist *s
int i;
 
for_each_sg(sg, s, nents, i)
-   __iommu_dma_unmap_swiotlb(dev, sg_dma_address(s),
+   iommu_dma_unmap_page(dev, sg_dma_address(s),
sg_dma_len(s), dir, attrs);
 }
 
@@ -1053,14 +1047,14 @@ static void iommu_dma_unmap_sg(struct device *dev, 
struct scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+
/*
 * The scatterlist segments are mapped into a single
 * contiguous IOVA allocation, so this is incredibly easy.
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 2/6] dma-iommu: fix arch_sync_dma for map

2021-08-13 Thread David Stevens
From: David Stevens 

When calling arch_sync_dma, we need to pass it the memory that's
actually being used for dma. When using swiotlb bounce buffers, this is
the bounce buffer. Fold __iommu_dma_map_swiotlb into iommu_dma_map_page
so it can sync the right phys_addr_t.

Now that iommu_dma_map_sg delegates to a function which takes care of
architectural syncing in the untrusted device case, the call to
iommu_dma_sync_sg_for_device can be moved so it only occurs in for
trusted devices. Doing the sync there for untrusted devices never really
worked, since it needs to be able to target swiotlb buffers.

This also moves the architectural sync to before the call to
__iommu_dma_map, to guarantee that untrusted devices can't see stale
data they shouldn't see.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 105 +-
 1 file changed, 47 insertions(+), 58 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 968e0150c95e..e9119ff93535 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -536,52 +536,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
-static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
-   size_t org_size, dma_addr_t dma_mask, bool coherent,
-   enum dma_data_direction dir, unsigned long attrs)
-{
-   int prot = dma_info_to_prot(dir, coherent, attrs);
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   struct iommu_dma_cookie *cookie = domain->iova_cookie;
-   struct iova_domain *iovad = >iovad;
-   size_t aligned_size = org_size;
-   void *padding_start;
-   size_t padding_size;
-   dma_addr_t iova;
-
-   /*
-* If both the physical buffer start address and size are
-* page aligned, we don't need to use a bounce page.
-*/
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
-   aligned_size = iova_align(iovad, org_size);
-   phys = swiotlb_tbl_map_single(dev, phys, org_size,
- aligned_size, dir, attrs);
-
-   if (phys == DMA_MAPPING_ERROR)
-   return DMA_MAPPING_ERROR;
-
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_TO_DEVICE ||
-dir == DMA_BIDIRECTIONAL)) {
-   padding_start += org_size;
-   padding_size -= org_size;
-   }
-
-   memset(padding_start, 0, padding_size);
-   }
-
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
-   swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
-   return iova;
-}
-
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -842,14 +796,50 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
-   dma_addr_t dma_handle;
+   int prot = dma_info_to_prot(dir, coherent, attrs);
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+   struct iova_domain *iovad = >iovad;
+   size_t aligned_size = size;
+   dma_addr_t iova, dma_mask = dma_get_mask(dev);
+
+   /*
+* If both the physical buffer start address and size are
+* page aligned, we don't need to use a bounce page.
+*/
+   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
+   iova_offset(iovad, phys | size)) {
+   void *padding_start;
+   size_t padding_size;
+
+   aligned_size = iova_align(iovad, size);
+   phys = swiotlb_tbl_map_single(dev, phys, size,
+ aligned_size, dir, attrs);
+
+   if (phys == DMA_MAPPING_ERROR)
+   return DMA_MAPPING_ERROR;
+
+   /* Cleanup the padding area. */
+   padding_start = phys_to_virt(phys);
+   padding_size = aligned_size;
+
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+   (dir == DMA_TO_DEVICE ||
+dir == DMA_BIDIRECTIONAL)) {
+   padding_start += size;
+   padding_size -= size;
+   }
 
-   dma_handle = __iommu_dma

[PATCH v4 1/6] dma-iommu: fix sync_sg with swiotlb

2021-08-13 Thread David Stevens
From: David Stevens 

The is_swiotlb_buffer function takes the physical address of the swiotlb
buffer, not the physical address of the original buffer. The sglist
contains the physical addresses of the original buffer, so for the
sync_sg functions to work properly when a bounce buffer might have been
used, we need to use iommu_iova_to_phys to look up the physical address.
This is what sync_single does, so call that function on each sglist
segment.

The previous code mostly worked because swiotlb does the transfer on map
and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
sglists or which call sync_sg would not have had anything copied to the
bounce buffer.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/dma-iommu.c | 33 +
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98ba927aee1a..968e0150c95e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -810,17 +810,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-   sg->length, dir);
-   }
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -830,17 +826,14 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
-   return;
-
-   for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_device(dev, sg_phys(sg),
-  sg->length, dir);
-
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_device(dev,
+sg_dma_address(sg),
+sg->length, dir);
+   else if (!dev_is_dma_coherent(dev))
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-   }
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 0/6] Fixes for dma-iommu swiotlb bounce buffers

2021-08-13 Thread David Stevens
From: David Stevens 

This patch set includes various fixes for dma-iommu's swiotlb bounce
buffers for untrusted devices. There are four fixes for correctness
issues, one for a performance issue, and one for general cleanup.

The min_align_mask issue was found when running fio on an untrusted nvme
device with bs=512. The other issues were found via code inspection, so
I don't have any specific use cases where things were not working, nor
any concrete performance numbers.


v3 -> v4:
 - Fold _swiotlb functions into _page functions
 - Add patch to align swiotlb buffer to iovad granule
 - Combine if checks in iommu_dma_sync_sg_* functions

v2 -> v3:
 - Add new patch to address min_align_mask bug
 - Set SKIP_CPU_SYNC flag after syncing in map/unmap
 - Properly call arch_sync_dma_for_cpu in iommu_dma_sync_sg_for_cpu

v1 -> v2:
 - Split fixes into dedicated patches
 - Less invasive changes to fix arch_sync when mapping
 - Leave dev_is_untrusted check for strict iommu

David Stevens (6):
  dma-iommu: fix sync_sg with swiotlb
  dma-iommu: fix arch_sync_dma for map
  dma-iommu: skip extra sync during unmap w/swiotlb
  dma-iommu: Check CONFIG_SWIOTLB more broadly
  swiotlb: support aligned swiotlb buffers
  dma-iommu: account for min_align_mask

 drivers/iommu/dma-iommu.c | 193 +-
 include/linux/swiotlb.h   |   3 +-
 kernel/dma/swiotlb.c  |  11 ++-
 3 files changed, 97 insertions(+), 110 deletions(-)

-- 
2.33.0.rc1.237.g0d66db33f3-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 2/5] dma-iommu: fix arch_sync_dma for map

2021-08-12 Thread David Stevens
On Thu, Aug 12, 2021 at 3:47 AM Robin Murphy  wrote:
>
> On 2021-08-11 03:42, David Stevens wrote:
> > From: David Stevens 
> >
> > When calling arch_sync_dma, we need to pass it the memory that's
> > actually being used for dma. When using swiotlb bounce buffers, this is
> > the bounce buffer. Move arch_sync_dma into the __iommu_dma_map_swiotlb
> > helper, so it can use the bounce buffer address if necessary. This also
> > means it is no longer necessary to call iommu_dma_sync_sg_for_device in
> > iommu_dma_map_sg for untrusted devices.
> >
> > Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
> > Signed-off-by: David Stevens 
> > ---
> >   drivers/iommu/dma-iommu.c | 16 +++-
> >   1 file changed, 7 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index 54e103b989d9..4f0cc4a0a61f 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -576,6 +576,9 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
> > *dev, phys_addr_t phys,
> >   memset(padding_start, 0, padding_size);
> >   }
> >
> > + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
>
> Make that an "else if" - otherwise you're just reintroducing the same
> thing that the third hunk is trying to clean up.

swiotlb_tbl_map_single only copies into the swiotlb buffer, it doesn't
do any architectural syncing. So this block is necessary in the
swiotlb case as well, for iommu_dma_map_page to work properly.

The third chunk is a separate optimization, so I'll split it out into
its own patch.

-David
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 5/5] dma-iommu: account for min_align_mask

2021-08-11 Thread David Stevens
On Thu, Aug 12, 2021 at 4:12 AM Robin Murphy  wrote:
>
> On 2021-08-11 03:42, David Stevens wrote:
> > From: David Stevens 
> >
> > For devices which set min_align_mask, swiotlb preserves the offset of
> > the original physical address within that mask. Since __iommu_dma_map
> > accounts for non-aligned addresses, passing a non-aligned swiotlb
> > address with the swiotlb aligned size results in the offset being
> > accounted for twice in the size passed to iommu_map_atomic. The extra
> > page exposed to DMA is also not cleaned up by __iommu_dma_unmap, since
> > tht at function unmaps with the correct size. This causes mapping failures
> > if the iova gets reused, due to collisions in the iommu page tables.
> >
> > To fix this, pass the original size to __iommu_dma_map, since that
> > function already handles alignment.
> >
> > Additionally, when swiotlb returns non-aligned addresses, there is
> > padding at the start of the bounce buffer that needs to be cleared.
> >
> > Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
> > Signed-off-by: David Stevens 
> > ---
> >   drivers/iommu/dma-iommu.c | 23 ---
> >   1 file changed, 12 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index 89b689bf801f..ffa7e8ef5db4 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -549,9 +549,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
> > *dev, phys_addr_t phys,
> >   struct iommu_domain *domain = iommu_get_dma_domain(dev);
> >   struct iommu_dma_cookie *cookie = domain->iova_cookie;
> >   struct iova_domain *iovad = >iovad;
> > - size_t aligned_size = org_size;
> > - void *padding_start;
> > - size_t padding_size;
> > + void *tlb_start;
> > + size_t aligned_size, iova_off, mapping_end_off;
> >   dma_addr_t iova;
> >
> >   /*
> > @@ -566,24 +565,26 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct 
> > device *dev, phys_addr_t phys,
> >   if (phys == DMA_MAPPING_ERROR)
> >   return DMA_MAPPING_ERROR;
> >
> > - /* Cleanup the padding area. */
> > - padding_start = phys_to_virt(phys);
> > - padding_size = aligned_size;
> > + iova_off = iova_offset(iovad, phys);
> > + tlb_start = phys_to_virt(phys - iova_off);
> >
> > + /* Cleanup the padding area. */
> >   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
> >   (dir == DMA_TO_DEVICE ||
> >dir == DMA_BIDIRECTIONAL)) {
> > - padding_start += org_size;
> > - padding_size -= org_size;
> > + mapping_end_off = iova_off + org_size;
> > + memset(tlb_start, 0, iova_off);
> > + memset(tlb_start + mapping_end_off, 0,
> > +aligned_size - mapping_end_off);
> > + } else {
> > + memset(tlb_start, 0, aligned_size);
> >   }
> > -
> > - memset(padding_start, 0, padding_size);
> >   }
> >
> >   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> >   arch_sync_dma_for_device(phys, org_size, dir);
> >
> > - iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
> > + iova = __iommu_dma_map(dev, phys, org_size, prot, dma_mask);
>
> This doesn't feel right - what if the IOVA granule was equal to or
> smaller than min_align_mask, wouldn't you potentially end up mapping the
> padding rather than the data?

The phys value returned by swiotlb_tbl_map_single is the address of
the start of the data in the swiotlb buffer, so the range that needs
to be mapped is [phys, phys + org_size). __iommu_dma_map will handle
this the same as it handles a misaligned mapping in the non-swiotlb
case. It might map memory before/after the desired range, but it will
map the entire range and iova will be the mapped address of phys. Is
there something I'm missing there?

That said, considering that memory before phys might be mapped, I
think there is actually still a bug. The buffer allocated by swiotlb
needs to be aligned to the granule size to ensure that preceding
swiotlb slots aren't mapped. The swiotlb does align allocations larger
than a page to PAGE_SIZE, but if IO_TLB_SIZE < IOVA granule <
PAGE_SIZE, then there can be problems. That can't happen if PAGE_SIZE
is 4k, but it can for larger page sizes. I'll add a fix for that to
the next version of this series.

-David

> Robin.
>
> >   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
> >   swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
> >   return iova;
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 5/5] dma-iommu: account for min_align_mask

2021-08-10 Thread David Stevens
From: David Stevens 

For devices which set min_align_mask, swiotlb preserves the offset of
the original physical address within that mask. Since __iommu_dma_map
accounts for non-aligned addresses, passing a non-aligned swiotlb
address with the swiotlb aligned size results in the offset being
accounted for twice in the size passed to iommu_map_atomic. The extra
page exposed to DMA is also not cleaned up by __iommu_dma_unmap, since
tht at function unmaps with the correct size. This causes mapping failures
if the iova gets reused, due to collisions in the iommu page tables.

To fix this, pass the original size to __iommu_dma_map, since that
function already handles alignment.

Additionally, when swiotlb returns non-aligned addresses, there is
padding at the start of the bounce buffer that needs to be cleared.

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 89b689bf801f..ffa7e8ef5db4 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -549,9 +549,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = >iovad;
-   size_t aligned_size = org_size;
-   void *padding_start;
-   size_t padding_size;
+   void *tlb_start;
+   size_t aligned_size, iova_off, mapping_end_off;
dma_addr_t iova;
 
/*
@@ -566,24 +565,26 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
 
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
+   iova_off = iova_offset(iovad, phys);
+   tlb_start = phys_to_virt(phys - iova_off);
 
+   /* Cleanup the padding area. */
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE ||
 dir == DMA_BIDIRECTIONAL)) {
-   padding_start += org_size;
-   padding_size -= org_size;
+   mapping_end_off = iova_off + org_size;
+   memset(tlb_start, 0, iova_off);
+   memset(tlb_start + mapping_end_off, 0,
+  aligned_size - mapping_end_off);
+   } else {
+   memset(tlb_start, 0, aligned_size);
}
-
-   memset(padding_start, 0, padding_size);
}
 
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, org_size, dir);
 
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
+   iova = __iommu_dma_map(dev, phys, org_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
return iova;
-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 4/5] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-08-10 Thread David Stevens
From: David Stevens 

Introduce a new dev_use_swiotlb function to guard swiotlb code, instead
of overloading dev_is_untrusted. This allows CONFIG_SWIOTLB to be
checked more broadly, so the swiotlb related code can be removed more
aggressively.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index be0214b1455c..89b689bf801f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -317,6 +317,11 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+static bool dev_use_swiotlb(struct device *dev)
+{
+   return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -553,8 +558,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
+   if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | org_size)) {
aligned_size = iova_align(iovad, org_size);
phys = swiotlb_tbl_map_single(dev, phys, org_size,
  aligned_size, dir, attrs);
@@ -779,7 +783,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -795,7 +799,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -813,10 +817,10 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -832,10 +836,10 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
 sg_dma_address(sg),
@@ -999,7 +1003,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
@@ -1078,7 +1082,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
}
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 3/5] dma-iommu: add SKIP_CPU_SYNC after syncing

2021-08-10 Thread David Stevens
From: David Stevens 

After syncing in map/unmap, add the DMA_ATTR_SKIP_CPU_SYNC flag so
anything that uses attrs later on will skip any sync work that has
already been completed. In particular, this skips copying from the
swiotlb twice during unmap.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4f0cc4a0a61f..be0214b1455c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -859,8 +859,11 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
iommu_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
+   attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+   }
+
__iommu_dma_unmap_swiotlb(dev, dma_handle, size, dir, attrs);
 }
 
@@ -999,8 +1002,10 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
if (dev_is_untrusted(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+   attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+   }
 
/*
 * Work out how much IOVA space we need, and align the segments to
@@ -1068,8 +1073,10 @@ static void iommu_dma_unmap_sg(struct device *dev, 
struct scatterlist *sg,
struct scatterlist *tmp;
int i;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+   attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+   }
 
if (dev_is_untrusted(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 2/5] dma-iommu: fix arch_sync_dma for map

2021-08-10 Thread David Stevens
From: David Stevens 

When calling arch_sync_dma, we need to pass it the memory that's
actually being used for dma. When using swiotlb bounce buffers, this is
the bounce buffer. Move arch_sync_dma into the __iommu_dma_map_swiotlb
helper, so it can use the bounce buffer address if necessary. This also
means it is no longer necessary to call iommu_dma_sync_sg_for_device in
iommu_dma_map_sg for untrusted devices.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 54e103b989d9..4f0cc4a0a61f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -576,6 +576,9 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
memset(padding_start, 0, padding_size);
}
 
+   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   arch_sync_dma_for_device(phys, org_size, dir);
+
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
@@ -848,14 +851,9 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
-   dma_addr_t dma_handle;
 
-   dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
+   return __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
coherent, dir, attrs);
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   dma_handle != DMA_MAPPING_ERROR)
-   arch_sync_dma_for_device(phys, size, dir);
-   return dma_handle;
 }
 
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
@@ -998,12 +996,12 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
/*
 * Work out how much IOVA space we need, and align the segments to
 * IOVA granules for the IOMMU driver to handle. With some clever
-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/5] dma-iommu: fix sync_sg with swiotlb

2021-08-10 Thread David Stevens
From: David Stevens 

The is_swiotlb_buffer function takes the physical address of the swiotlb
buffer, not the physical address of the original buffer. The sglist
contains the physical addresses of the original buffer, so for the
sync_sg functions to work properly when a bounce buffer might have been
used, we need to use iommu_iova_to_phys to look up the physical address.
This is what sync_single does, so call that function on each sglist
segment.

The previous code mostly worked because swiotlb does the transfer on map
and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
sglists or which call sync_sg would not have had anything copied to the
bounce buffer.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98ba927aee1a..54e103b989d9 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -813,14 +813,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
-   for_each_sg(sgl, sg, nelems, i) {
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+   else
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
-   sg->length, dir);
-   }
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -833,14 +832,14 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
-   for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_device(dev, sg_phys(sg),
-  sg->length, dir);
-
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_device(dev,
+sg_dma_address(sg),
+sg->length, dir);
+   else
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-   }
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 0/5] Fixes for dma-iommu swiotlb bounce buffers

2021-08-10 Thread David Stevens
From: David Stevens 

This patch set includes various fixes for dma-iommu's swiotlb bounce
buffers for untrusted devices. There are three fixes for correctness
issues, one performance issue, and one general cleanup.

The min_align_mask issue was found when running fio on an untrusted nvme
device with bs=512. The other issues were found via code inspection, so
I don't have any specific use cases where things were not working, nor
any concrete performance numbers.

v2 -> v3:
 - Add new patch to address min_align_mask bug
 - Set SKIP_CPU_SYNC flag after syncing in map/unmap
 - Properly call arch_sync_dma_for_cpu in iommu_dma_sync_sg_for_cpu

v1 -> v2:
 - Split fixes into dedicated patches
 - Less invasive changes to fix arch_sync when mapping
 - Leave dev_is_untrusted check for strict iommu

David Stevens (5):
  dma-iommu: fix sync_sg with swiotlb
  dma-iommu: fix arch_sync_dma for map
  dma-iommu: add SKIP_CPU_SYNC after syncing
  dma-iommu: Check CONFIG_SWIOTLB more broadly
  dma-iommu: account for min_align_mask

 drivers/iommu/dma-iommu.c | 97 +--
 1 file changed, 53 insertions(+), 44 deletions(-)

-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/9] dma-iommu: bounce buffers for untrusted devices

2021-08-09 Thread David Stevens
On Tue, Aug 10, 2021 at 10:19 AM Mi, Dapeng1  wrote:
>
> Hi David,
>
> I like this patch set and this is crucial for reducing the significant vIOMMU 
> performance. It looks you totally rewrite the IOMMU mapping/unmapping part 
> and use the dynamically allocated memory from buddy system as bounce buffer 
> instead of using the legacy SWIOTLB bounce buffer. As I know, some legacy 
> devices' DMA could not access the memory larger than 32-bit memory space and 
> the dynamically allocated memory address could exceed the 32-bit memory 
> space. Is it a problem?

My understanding is that when devices with that sort of limitation sit
behind an IOMMU, the IOVA is what matters, not the physical address.
The bounce bounce buffers use the same limits for IOVA allocation as
the regular dma-iommu path, so compatible IOVAs will be allocated for
the bounce buffers.

-David

> Thx,
> Dapeng Mi
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 9/9] drm/i915: use DMA_ATTR_PERSISTENT_STREAMING flag

2021-08-06 Thread David Stevens
From: David Stevens 

Use the new DMA_ATTR_PERSISTENT_STREAMING for long lived dma mappings
which directly handle CPU cache coherency instead of using dma_sync_*.

Signed-off-by: David Stevens 
---
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c| 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 616c3a2f1baf..df982cfb4f34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -48,7 +48,9 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
dma_buf_attachment *attachme
src = sg_next(src);
}
 
-   ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC);
+   ret = dma_map_sgtable(attachment->dev, st, dir,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_PERSISTENT_STREAMING);
if (ret)
goto err_free_sg;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 36489be4896b..f27a849631f7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -33,7 +33,8 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object 
*obj,
 PCI_DMA_BIDIRECTIONAL,
 DMA_ATTR_SKIP_CPU_SYNC |
 DMA_ATTR_NO_KERNEL_MAPPING |
-DMA_ATTR_NO_WARN))
+DMA_ATTR_NO_WARN |
+DMA_ATTR_PERSISTENT_STREAMING))
return 0;
 
/*
-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 8/9] dma-mapping: add persistent streaming mapping flag

2021-08-06 Thread David Stevens
From: David Stevens 

Add a DMA_ATTR_PERSISTENT_STREAMING flag which indicates that the
streaming mapping is long lived and that the caller will manage
coherency either through the dma_sync_* functions or via some other
use-case specific mechanism. This flag indicates to the platform that
it should optimize for more efficient syncing at the cost of more
expensive mapping and unmapping.

This flag is used to skip optional bounce buffers when
CONFIG_IOMMU_BOUNCE_BUFFERS is enabled. With respect to these bounce
buffers, in most cases the flag is an optimization. However, callers
which do not use the dma_sync_* calls to manage coherency must use this
flag to work properly when CONFIG_IOMMU_BOUNCE_BUFFERS is enabled.

Signed-off-by: David Stevens 
---
 drivers/iommu/io-bounce-buffers.c | 14 +++---
 include/linux/dma-mapping.h   | 11 +++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/io-bounce-buffers.c 
b/drivers/iommu/io-bounce-buffers.c
index af8c2a51eeed..3a0071d5a9ea 100644
--- a/drivers/iommu/io-bounce-buffers.c
+++ b/drivers/iommu/io-bounce-buffers.c
@@ -409,8 +409,16 @@ static bool io_bounce_buffers_map_buffer(struct 
io_bounce_buffers *buffers,
return mapped >= info->size;
 }
 
-static bool use_bounce_buffer(bool force_bounce, size_t size)
+static bool use_bounce_buffer(struct device *dev, unsigned long attrs,
+ bool force_bounce, size_t size)
 {
+   if (attrs & DMA_ATTR_PERSISTENT_STREAMING) {
+   WARN_ONCE(force_bounce,
+ "Skipping bounce buffer for untrusted mapping %s\n",
+ dev_name(dev));
+   return false;
+   }
+
if (IS_ENABLED(CONFIG_IOMMU_BOUNCE_BUFFERS) &&
size <= always_bounce_limit)
return true;
@@ -429,7 +437,7 @@ bool io_bounce_buffers_map_page(struct io_bounce_buffers 
*buffers,
bool force_bounce = buffers->untrusted &&
iova_offset(buffers->iovad, offset | size);
 
-   if (!use_bounce_buffer(force_bounce, size))
+   if (!use_bounce_buffer(dev, attrs, force_bounce, size))
return false;
 
*handle = DMA_MAPPING_ERROR;
@@ -476,7 +484,7 @@ bool io_bounce_buffers_map_sg(struct io_bounce_buffers 
*buffers,
buffers->iovad, iter->offset | iter->length);
}
 
-   if (!use_bounce_buffer(force_bounce, size))
+   if (!use_bounce_buffer(dev, attrs, force_bounce, size))
return false;
 
*out_nents = 0;
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 183e7103a66d..5d318753bb79 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -61,6 +61,17 @@
  */
 #define DMA_ATTR_PRIVILEGED(1UL << 9)
 
+/*
+ * DMA_ATTR_PERSISTENT_STREAMING: Indicates that the streaming mapping is long
+ * lived, so syncing performance should be prioritized over mapping/unmapping
+ * performance. Platform code will establish a mapping which only requires CPU
+ * cache synchronization.
+ *
+ * Callers that create long lived mappings and directly handle CPU cache
+ * management without calling using dma_sync_* functions must set this flag.
+ */
+#define DMA_ATTR_PERSISTENT_STREAMING  (1UL << 10)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
  * be given to a device to use as a DMA source or target.  It is specific to a
-- 
2.32.0.605.g8dce9f2422-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 7/9] dma-iommu: support iommu bounce buffer optimization

2021-08-06 Thread David Stevens
From: David Stevens 

Add config that uses IOMMU bounce buffer pools to avoid IOMMU
interactions as much as possible for relatively small streaming DMA
operations. This can lead to significant performance improvements on
systems where IOMMU map/unmap operations are very slow, such as when
running virtualized.

Signed-off-by: David Stevens 
---
 drivers/iommu/Kconfig | 11 +
 drivers/iommu/dma-iommu.c |  5 ++-
 drivers/iommu/io-bounce-buffers.c | 70 +--
 drivers/iommu/io-buffer-manager.c | 17 +---
 drivers/iommu/io-buffer-manager.h |  8 ++--
 include/linux/dma-iommu.h |  2 +
 6 files changed, 81 insertions(+), 32 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 07b7c25cbed8..e573b5c276dc 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -422,4 +422,15 @@ config SPRD_IOMMU
 
  Say Y here if you want to use the multimedia devices listed above.
 
+config IOMMU_BOUNCE_BUFFERS
+   bool "Use IOMMU bounce buffers"
+   depends on IOMMU_DMA
+   default n
+   help
+ Use bounce buffers for small, streaming DMA operations. This may
+ have performance benefits on systems where establishing IOMMU mappings
+ is particularly expensive, such as when running as a guest.
+
+ If unsure, say N here.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 42f85b7a90f0..965bc0a2f140 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -324,7 +324,7 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain 
*iovad)
domain->ops->flush_iotlb_all(domain);
 }
 
-static bool dev_is_untrusted(struct device *dev)
+bool dev_is_untrusted(struct device *dev)
 {
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
@@ -402,7 +402,8 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
 
ret = iova_reserve_iommu_regions(dev, domain);
 
-   if (ret == 0 && dev_is_untrusted(dev)) {
+   if (ret == 0 && (dev_is_untrusted(dev) ||
+IS_ENABLED(CONFIG_IOMMU_BOUNCE_BUFFERS))) {
cookie->bounce_buffers =
io_bounce_buffers_init(dev, domain, iovad);
if (IS_ERR(cookie->bounce_buffers))
diff --git a/drivers/iommu/io-bounce-buffers.c 
b/drivers/iommu/io-bounce-buffers.c
index 8af8e1546d5f..af8c2a51eeed 100644
--- a/drivers/iommu/io-bounce-buffers.c
+++ b/drivers/iommu/io-bounce-buffers.c
@@ -20,10 +20,20 @@
 static unsigned int buffer_pool_size = 1024;
 module_param(buffer_pool_size, uint, 0);
 
+#ifdef CONFIG_IOMMU_BOUNCE_BUFFERS
+// All buffers at most this size will always use bounce buffers if there
+// are slots of the appropriate size available.
+static unsigned int always_bounce_limit = PAGE_SIZE;
+module_param(always_bounce_limit, uint, 0644);
+#else
+static const unsigned int always_bounce_limit;
+#endif
+
 struct io_bounce_buffers {
struct iommu_domain *domain;
struct iova_domain *iovad;
unsigned int nid;
+   bool untrusted;
struct io_buffer_manager manager;
 };
 
@@ -56,6 +66,7 @@ struct io_bounce_buffers *io_bounce_buffers_init(struct 
device *dev,
buffers->domain = domain;
buffers->iovad = iovad;
buffers->nid = dev_to_node(dev);
+   buffers->untrusted = dev_is_untrusted(dev);
 
return buffers;
 }
@@ -201,7 +212,8 @@ bool io_bounce_buffers_sync_single(struct io_bounce_buffers 
*buffers,
void *orig_buffer;
int prot;
 
-   if (!io_buffer_manager_find_buffer(>manager, dma_handle, ,
+   if (!io_buffer_manager_find_buffer(>manager, dma_handle,
+  buffers->untrusted, ,
   _buffer, ))
return false;
 
@@ -237,9 +249,9 @@ bool io_bounce_buffers_sync_sg(struct io_bounce_buffers 
*buffers,
void *orig_buffer;
int prot;
 
-   if (!io_buffer_manager_find_buffer(>manager,
-  sg_dma_address(sgl), ,
-  _buffer, ))
+   if (!io_buffer_manager_find_buffer(
+   >manager, sg_dma_address(sgl), buffers->untrusted,
+   , _buffer, ))
return false;
 
// In the non bounce buffer case, iommu_dma_map_sg syncs before setting
@@ -291,7 +303,7 @@ bool io_bounce_buffers_unmap_page(struct io_bounce_buffers 
*buffers,
 
return io_buffer_manager_release_buffer(
>manager, buffers->domain, handle, true,
-   io_bounce_buffers_unmap_page_sync, );
+   buffers->untrusted, io_bounce_buffers_unmap_page_sync, );
 }
 
 static void io_bounce_buffers_unmap_sg_sync(struct io_bounce_buffer_info *info,
@@ -318,7 +330,7 @@ bool io_bounce_buffers_unmap_

[PATCH v2 6/9] dma-iommu: add bounce buffer pools

2021-08-06 Thread David Stevens
From: David Stevens 

Add per-domain pools for IOMMU mapped bounce buffers. Each domain has 8
buffer pools, which hold buffers of size 2^n pages. Buffers are
allocated on demand, and unused buffers are periodically released from
the cache. Single use buffers are still used for mappings that are too
large to use any pool, or if there are too many simultaneously in-use
streaming mappings.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c |  24 +-
 drivers/iommu/io-bounce-buffers.c |  42 ++-
 drivers/iommu/io-bounce-buffers.h |   3 +
 drivers/iommu/io-buffer-manager.c | 417 +-
 drivers/iommu/io-buffer-manager.h |  49 +++-
 include/linux/dma-iommu.h |   2 +
 6 files changed, 520 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 908eb6fb7dc3..42f85b7a90f0 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -378,6 +378,11 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
return -EFAULT;
}
 
+   if (cookie->bounce_buffers &&
+   !io_bounce_buffer_reinit_check(cookie->bounce_buffers,
+  dev, base, limit))
+   return -EFAULT;
+
return 0;
}
 
@@ -436,12 +441,24 @@ static int dma_info_to_prot(enum dma_data_direction dir, 
bool coherent,
}
 }
 
+u64 __iommu_dma_limit(struct iommu_domain *domain, struct device *dev, u64 
mask)
+{
+   u64 dma_limit = mask;
+
+   dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
+   if (domain->geometry.force_aperture)
+   dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
+
+   return dma_limit;
+}
+
 dma_addr_t __iommu_dma_alloc_iova(struct iommu_domain *domain,
-   size_t size, u64 dma_limit, struct device *dev)
+   size_t size, u64 mask, struct device *dev)
 {
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = >iovad;
unsigned long shift, iova_len, iova = 0;
+   u64 dma_limit = __iommu_dma_limit(domain, dev, mask);
 
if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
cookie->msi_iova += size;
@@ -459,11 +476,6 @@ dma_addr_t __iommu_dma_alloc_iova(struct iommu_domain 
*domain,
if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
iova_len = roundup_pow_of_two(iova_len);
 
-   dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
-
-   if (domain->geometry.force_aperture)
-   dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
-
/* Try to get PCI devices a SAC address */
if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && 
dev_is_pci(dev))
iova = alloc_iova_fast(iovad, iova_len,
diff --git a/drivers/iommu/io-bounce-buffers.c 
b/drivers/iommu/io-bounce-buffers.c
index ed05f593a195..8af8e1546d5f 100644
--- a/drivers/iommu/io-bounce-buffers.c
+++ b/drivers/iommu/io-bounce-buffers.c
@@ -9,11 +9,17 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "io-buffer-manager.h"
 #include "io-bounce-buffers.h"
 
+// Specifies the number of slots in each buffer pool. The total amount of
+// preallocated IOVA range per 1024 slots is slightly under 1GB.
+static unsigned int buffer_pool_size = 1024;
+module_param(buffer_pool_size, uint, 0);
+
 struct io_bounce_buffers {
struct iommu_domain *domain;
struct iova_domain *iovad;
@@ -40,7 +46,8 @@ struct io_bounce_buffers *io_bounce_buffers_init(struct 
device *dev,
if (!buffers)
return ERR_PTR(-ENOMEM);
 
-   ret = io_buffer_manager_init(>manager);
+   ret = io_buffer_manager_init(>manager, dev, iovad,
+buffer_pool_size);
if (ret) {
kfree(buffers);
return ERR_PTR(ret);
@@ -53,8 +60,26 @@ struct io_bounce_buffers *io_bounce_buffers_init(struct 
device *dev,
return buffers;
 }
 
+bool io_bounce_buffer_reinit_check(struct io_bounce_buffers *buffers,
+  struct device *dev, dma_addr_t base,
+  dma_addr_t limit)
+{
+   if (!io_buffer_manager_reinit_check(>manager, dev,
+   buffers->iovad, base, limit)) {
+   pr_warn("io-buffer-buffers out of range of %s\n",
+   dev_name(dev));
+   return false;
+   }
+
+   if (buffers->nid != dev_to_node(dev))
+   pr_info("node mismatch: buffers=%d dev=%d\n", buffers->nid,
+   dev_to_node(dev));
+   return true;
+}
+
 void io_bounce_buffers_destroy(struct io_bounce_buffer

[PATCH v2 5/9] dma-iommu: clear only necessary bytes

2021-08-06 Thread David Stevens
From: David Stevens 

Only clear the padding bytes in bounce buffers, since syncing from the
original buffer already overwrites the non-padding bytes.

Signed-off-by: David Stevens 
---
 drivers/iommu/io-bounce-buffers.c | 64 +--
 drivers/iommu/io-buffer-manager.c |  7 +---
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/io-bounce-buffers.c 
b/drivers/iommu/io-bounce-buffers.c
index c7c52a3f8bf7..ed05f593a195 100644
--- a/drivers/iommu/io-bounce-buffers.c
+++ b/drivers/iommu/io-bounce-buffers.c
@@ -296,14 +296,70 @@ bool io_bounce_buffers_unmap_sg(struct io_bounce_buffers 
*buffers,
io_bounce_buffers_unmap_sg_sync, );
 }
 
+static void io_bounce_buffers_clear_padding(struct io_bounce_buffer_info *info,
+   size_t pad_hd_end,
+   size_t pad_tl_start)
+{
+   size_t idx, pad_hd_idx, pad_tl_idx, count;
+
+   count = info->size / PAGE_SIZE;
+   pad_hd_idx = pad_hd_end / PAGE_SIZE;
+   pad_tl_idx = pad_tl_start / PAGE_SIZE;
+
+   if (!IS_ALIGNED(pad_hd_end, PAGE_SIZE)) {
+   struct page *page = info->bounce_buffer[pad_hd_idx];
+   size_t len = offset_in_page(pad_hd_end);
+
+   memset_page(page, 0, 0, len);
+   arch_sync_dma_for_device(page_to_phys(page), 0, len);
+   }
+
+   if (!IS_ALIGNED(pad_tl_start, PAGE_SIZE)) {
+   size_t off = offset_in_page(pad_tl_start);
+   size_t len = PAGE_SIZE - off;
+   struct page *page = info->bounce_buffer[pad_tl_idx];
+
+   memset_page(page, off, 0, len);
+   arch_sync_dma_for_device(page_to_phys(page) + off, 0, len);
+
+   pad_tl_idx++;
+   }
+
+   idx = pad_hd_idx ? 0 : pad_tl_idx;
+   while (idx < count) {
+   struct page *page = info->bounce_buffer[idx++];
+
+   clear_highpage(page);
+   arch_sync_dma_for_device(page_to_phys(page), 0, PAGE_SIZE);
+   if (idx == pad_hd_idx)
+   idx = pad_tl_idx;
+   }
+}
+
 static bool io_bounce_buffers_map_buffer(struct io_bounce_buffers *buffers,
 struct io_bounce_buffer_info *info,
-int prot)
+int prot, bool skiped_sync,
+size_t offset, size_t orig_size)
 {
unsigned int count = info->size >> PAGE_SHIFT;
struct sg_table sgt;
size_t mapped;
 
+   if (offset || offset + orig_size < info->size || skiped_sync) {
+   // Ensure that nothing is leaked to untrusted devices when
+   // mapping the buffer by clearing any part of the bounce buffer
+   // that wasn't already cleared by syncing.
+   size_t pad_hd_end, pad_tl_start;
+
+   if (skiped_sync) {
+   pad_hd_end = pad_tl_start = 0;
+   } else {
+   pad_hd_end = offset;
+   pad_tl_start = offset + orig_size;
+   }
+   io_bounce_buffers_clear_padding(info, pad_hd_end, pad_tl_start);
+   }
+
if (sg_alloc_table_from_pages(, info->bounce_buffer, count, 0,
  info->size, GFP_ATOMIC))
return false;
@@ -338,7 +394,8 @@ bool io_bounce_buffers_map_page(struct io_bounce_buffers 
*buffers,
io_bounce_buffers_do_sync(buffers, info.bounce_buffer, offset,
  page, offset, size, dir, prot, false);
 
-   if (!io_bounce_buffers_map_buffer(buffers, , prot)) {
+   if (!io_bounce_buffers_map_buffer(buffers, , prot, skip_cpu_sync,
+ offset, size)) {
io_buffer_manager_release_buffer(>manager,
 buffers->domain, info.iova,
 false, NULL, NULL);
@@ -381,7 +438,8 @@ bool io_bounce_buffers_map_sg(struct io_bounce_buffers 
*buffers,
info.bounce_buffer, dir, prot,
false);
 
-   if (!io_bounce_buffers_map_buffer(buffers, , prot)) {
+   if (!io_bounce_buffers_map_buffer(buffers, , prot, skip_cpu_sync,
+ 0, size)) {
io_buffer_manager_release_buffer(>manager,
 buffers->domain, info.iova,
 false, NULL, NULL);
diff --git a/drivers/iommu/io-buffer-manager.c 
b/drivers/iommu/io-buffer-manager.c
index 79b9759da928..587584fdf26b 100644
--- a/drivers/iommu/io-buffer-manager.c
+++ b/drivers/iommu/io-buffer-manager.c
@@ -37,13 +37,10 @@ static struct

[PATCH v2 4/9] dma-iommu: remove extra buffer search on unmap

2021-08-06 Thread David Stevens
From: David Stevens 

Add callback to buffer manager's removal function so that the buffer can
be sync'ed during unmap without an extra find operation.

Signed-off-by: David Stevens 
---
 drivers/iommu/io-bounce-buffers.c | 87 +--
 drivers/iommu/io-buffer-manager.c |  6 ++-
 drivers/iommu/io-buffer-manager.h |  6 ++-
 3 files changed, 81 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/io-bounce-buffers.c 
b/drivers/iommu/io-bounce-buffers.c
index 78b4440b58c8..c7c52a3f8bf7 100644
--- a/drivers/iommu/io-bounce-buffers.c
+++ b/drivers/iommu/io-bounce-buffers.c
@@ -153,6 +153,20 @@ static void io_bounce_buffers_do_sync(struct 
io_bounce_buffers *buffers,
}
 }
 
+static void __io_bounce_buffers_sync_single(struct io_bounce_buffers *buffers,
+   dma_addr_t dma_handle, size_t size,
+   struct io_bounce_buffer_info *info,
+   struct page *orig_buffer, int prot,
+   enum dma_data_direction dir,
+   bool sync_for_cpu)
+{
+   size_t offset = dma_handle - info->iova;
+
+   io_bounce_buffers_do_sync(buffers, info->bounce_buffer, offset,
+ orig_buffer, offset, size, dir, prot,
+ sync_for_cpu);
+}
+
 bool io_bounce_buffers_sync_single(struct io_bounce_buffers *buffers,
   dma_addr_t dma_handle, size_t size,
   enum dma_data_direction dir,
@@ -160,17 +174,14 @@ bool io_bounce_buffers_sync_single(struct 
io_bounce_buffers *buffers,
 {
struct io_bounce_buffer_info info;
void *orig_buffer;
-   size_t offset;
int prot;
 
if (!io_buffer_manager_find_buffer(>manager, dma_handle, ,
   _buffer, ))
return false;
 
-   offset = dma_handle - info.iova;
-   io_bounce_buffers_do_sync(buffers, info.bounce_buffer, offset,
- orig_buffer, offset, size, dir, prot,
- sync_for_cpu);
+   __io_bounce_buffers_sync_single(buffers, dma_handle, size, ,
+   orig_buffer, prot, dir, sync_for_cpu);
return true;
 }
 
@@ -219,16 +230,56 @@ bool io_bounce_buffers_sync_sg(struct io_bounce_buffers 
*buffers,
return true;
 }
 
+struct unmap_sync_args {
+   struct io_bounce_buffers *buffers;
+   unsigned long attrs;
+   enum dma_data_direction dir;
+   dma_addr_t handle;
+   size_t size;
+   int nents;
+};
+
+static void
+io_bounce_buffers_unmap_page_sync(struct io_bounce_buffer_info *info, int prot,
+ void *orig_buffer, void *ctx)
+{
+   struct unmap_sync_args *args = ctx;
+
+   if (args->attrs & DMA_ATTR_SKIP_CPU_SYNC)
+   return;
+
+   __io_bounce_buffers_sync_single(args->buffers, args->handle, args->size,
+   info, orig_buffer, prot, args->dir,
+   true);
+}
+
 bool io_bounce_buffers_unmap_page(struct io_bounce_buffers *buffers,
  dma_addr_t handle, size_t size,
  enum dma_data_direction dir,
  unsigned long attrs)
 {
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   io_bounce_buffers_sync_single(buffers, handle, size, dir, true);
+   struct unmap_sync_args args = { .buffers = buffers,
+   .attrs = attrs,
+   .dir = dir,
+   .handle = handle,
+   .size = size };
+
+   return io_buffer_manager_release_buffer(
+   >manager, buffers->domain, handle, true,
+   io_bounce_buffers_unmap_page_sync, );
+}
+
+static void io_bounce_buffers_unmap_sg_sync(struct io_bounce_buffer_info *info,
+   int prot, void *orig_buffer,
+   void *ctx)
+{
+   struct unmap_sync_args *args = ctx;
+
+   if (args->attrs & DMA_ATTR_SKIP_CPU_SYNC)
+   return;
 
-   return io_buffer_manager_release_buffer(>manager,
-   buffers->domain, handle, true);
+   __io_bounce_buffers_sync_sg(args->buffers, orig_buffer, args->nents,
+   info->bounce_buffer, args->dir, prot, true);
 }
 
 bool io_bounce_buffers_unmap_sg(struct io_bounce_buffers *buffers,
@@ -236,11 +287,13 @@ bool io_bounce_buffers_unmap_sg(struct io_bounce_buffers 
*buffers,
enum dma_data_direction dir,

[PATCH v2 3/9] dma-iommu: bounce buffers for untrusted devices

2021-08-06 Thread David Stevens
From: David Stevens 

Add support for dynamic bounce buffers to the dma-api for use with
subgranule IOMMU mappings with untrusted devices. Bounce buffer
management is split into two parts. First, there is a buffer manager
that is responsible for allocating and tracking buffers. Second, there
is a layer that uses the managed buffers as bounce buffers. It is
responsible for managing the IOMMU mapping and for syncing between the
original and bounce buffers.

For now, buffer management is very simple - every mapping allocates a
new bounce buffer.

Signed-off-by: David Stevens 
---
 drivers/iommu/Makefile|   2 +-
 drivers/iommu/dma-iommu.c |  70 +-
 drivers/iommu/io-bounce-buffers.c | 358 ++
 drivers/iommu/io-bounce-buffers.h |  46 
 drivers/iommu/io-buffer-manager.c | 212 ++
 drivers/iommu/io-buffer-manager.h |  43 
 6 files changed, 728 insertions(+), 3 deletions(-)
 create mode 100644 drivers/iommu/io-bounce-buffers.c
 create mode 100644 drivers/iommu/io-bounce-buffers.h
 create mode 100644 drivers/iommu/io-buffer-manager.c
 create mode 100644 drivers/iommu/io-buffer-manager.h

diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c0fb0ba88143..4edaf7adc082 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
 obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
-obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o io-bounce-buffers.o io-buffer-manager.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 055ccda5eba1..908eb6fb7dc3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -24,6 +24,8 @@
 #include 
 #include 
 
+#include "io-bounce-buffers.h"
+
 struct iommu_dma_msi_page {
struct list_headlist;
dma_addr_t  iova;
@@ -44,6 +46,7 @@ struct iommu_dma_cookie {
dma_addr_t  msi_iova;
};
struct list_headmsi_page_list;
+   struct io_bounce_buffers*bounce_buffers;
 
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
@@ -81,6 +84,14 @@ static inline size_t cookie_msi_granule(struct 
iommu_dma_cookie *cookie)
return PAGE_SIZE;
 }
 
+static struct io_bounce_buffers *dev_to_io_bounce_buffers(struct device *dev)
+{
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+
+   return cookie->bounce_buffers;
+}
+
 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
 {
struct iommu_dma_cookie *cookie;
@@ -160,6 +171,9 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
if (!cookie)
return;
 
+   if (cookie->bounce_buffers)
+   io_bounce_buffers_destroy(cookie->bounce_buffers);
+
if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
put_iova_domain(>iovad);
 
@@ -333,6 +347,7 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
unsigned long order, base_pfn;
struct iova_domain *iovad;
+   int ret;
 
if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
return -EINVAL;
@@ -380,7 +395,16 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
if (!dev)
return 0;
 
-   return iova_reserve_iommu_regions(dev, domain);
+   ret = iova_reserve_iommu_regions(dev, domain);
+
+   if (ret == 0 && dev_is_untrusted(dev)) {
+   cookie->bounce_buffers =
+   io_bounce_buffers_init(dev, domain, iovad);
+   if (IS_ERR(cookie->bounce_buffers))
+   ret = PTR_ERR(cookie->bounce_buffers);
+   }
+
+   return ret;
 }
 
 /**
@@ -710,8 +734,13 @@ static void iommu_dma_free_noncontiguous(struct device 
*dev, size_t size,
 static void iommu_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
 {
+   struct io_bounce_buffers *bounce = dev_to_io_bounce_buffers(dev);
phys_addr_t phys;
 
+   if (bounce && io_bounce_buffers_sync_single(bounce, dma_handle,
+   size, dir, true))
+   return;
+
if (dev_is_dma_coherent(dev))
return;
 
@@ -722,8 +751,13 @@ static void iommu_dma_sync_single_for_cpu(struct dev

[PATCH v2 2/9] dma-iommu: expose a few helper functions to module

2021-08-06 Thread David Stevens
From: David Stevens 

Expose a few helper functions from dma-iommu to the rest of the module.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 23 ---
 include/linux/dma-iommu.h |  8 
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1491b5450246..055ccda5eba1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -412,7 +412,7 @@ static int dma_info_to_prot(enum dma_data_direction dir, 
bool coherent,
}
 }
 
-static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
+dma_addr_t __iommu_dma_alloc_iova(struct iommu_domain *domain,
size_t size, u64 dma_limit, struct device *dev)
 {
struct iommu_dma_cookie *cookie = domain->iova_cookie;
@@ -452,7 +452,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
return (dma_addr_t)iova << shift;
 }
 
-static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
+void __iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
dma_addr_t iova, size_t size, struct page *freelist)
 {
struct iova_domain *iovad = >iovad;
@@ -488,7 +488,7 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
 
if (!cookie->fq_domain)
iommu_iotlb_sync(domain, _gather);
-   iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
+   __iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -506,12 +506,12 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
 
size = iova_align(iovad, size + iova_off);
 
-   iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
+   iova = __iommu_dma_alloc_iova(domain, size, dma_mask, dev);
if (!iova)
return DMA_MAPPING_ERROR;
 
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
-   iommu_dma_free_iova(cookie, iova, size, NULL);
+   __iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -617,7 +617,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
return NULL;
 
size = iova_align(iovad, size);
-   iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
+   iova = __iommu_dma_alloc_iova(domain, size,
+ dev->coherent_dma_mask, dev);
if (!iova)
goto out_free_pages;
 
@@ -643,7 +644,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
 out_free_sg:
sg_free_table(sgt);
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size, NULL);
+   __iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -923,7 +924,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
prev = s;
}
 
-   iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
+   iova = __iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
if (!iova)
goto out_restore_sg;
 
@@ -937,7 +938,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, iova_len, NULL);
+   __iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
__invalidate_sg(sg, nents);
return 0;
@@ -1226,7 +1227,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
if (!msi_page)
return NULL;
 
-   iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
+   iova = __iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
if (!iova)
goto out_free_page;
 
@@ -1240,7 +1241,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
return msi_page;
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size, NULL);
+   __iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_page:
kfree(msi_page);
return NULL;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 758ca4694257..50f676678318 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -42,6 +42,14 @@ void iommu_dma_free_cpu_cached_iovas(unsigned int cpu,
 
 extern bool iommu_dma_forcedac;
 
+struct iommu_dma_cookie;
+
+dma_addr_t __iommu_dma_alloc_iova(struct iommu_domain *domain,
+ size_t size, dma_addr_t dma_limit,
+ struct device *dev);
+void __iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
+   dm

[PATCH v2 1/9] Revert "iommu: Allow the dma-iommu api to use bounce buffers"

2021-08-06 Thread David Stevens
From: David Stevens 

A new pooled bounce buffer implementation will be added to reduce IOMMU
interactions on platforms with slow IOMMUs. The new implementation can
also support using bounce buffers with untrusted devices, so the current
basic bounce buffer support can be reverted.

This reverts commit 82612d66d51d3bacdd789e31d2e875d2494b7514.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 152 --
 1 file changed, 13 insertions(+), 139 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98ba927aee1a..1491b5450246 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -20,11 +20,9 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-#include 
 
 struct iommu_dma_msi_page {
struct list_headlist;
@@ -493,23 +491,6 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
-static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
-   size_t size, enum dma_data_direction dir,
-   unsigned long attrs)
-{
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   phys_addr_t phys;
-
-   phys = iommu_iova_to_phys(domain, dma_addr);
-   if (WARN_ON(!phys))
-   return;
-
-   __iommu_dma_unmap(dev, dma_addr, size);
-
-   if (unlikely(is_swiotlb_buffer(phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
 {
@@ -536,52 +517,6 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
return iova + iova_off;
 }
 
-static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
-   size_t org_size, dma_addr_t dma_mask, bool coherent,
-   enum dma_data_direction dir, unsigned long attrs)
-{
-   int prot = dma_info_to_prot(dir, coherent, attrs);
-   struct iommu_domain *domain = iommu_get_dma_domain(dev);
-   struct iommu_dma_cookie *cookie = domain->iova_cookie;
-   struct iova_domain *iovad = >iovad;
-   size_t aligned_size = org_size;
-   void *padding_start;
-   size_t padding_size;
-   dma_addr_t iova;
-
-   /*
-* If both the physical buffer start address and size are
-* page aligned, we don't need to use a bounce page.
-*/
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
-   aligned_size = iova_align(iovad, org_size);
-   phys = swiotlb_tbl_map_single(dev, phys, org_size,
- aligned_size, dir, attrs);
-
-   if (phys == DMA_MAPPING_ERROR)
-   return DMA_MAPPING_ERROR;
-
-   /* Cleanup the padding area. */
-   padding_start = phys_to_virt(phys);
-   padding_size = aligned_size;
-
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   (dir == DMA_TO_DEVICE ||
-dir == DMA_BIDIRECTIONAL)) {
-   padding_start += org_size;
-   padding_size -= org_size;
-   }
-
-   memset(padding_start, 0, padding_size);
-   }
-
-   iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-   if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
-   swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
-   return iova;
-}
-
 static void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
@@ -776,15 +711,11 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   if (!dev_is_dma_coherent(dev))
-   arch_sync_dma_for_cpu(phys, size, dir);
-
-   if (is_swiotlb_buffer(phys))
-   swiotlb_sync_single_for_cpu(dev, phys, size, dir);
+   arch_sync_dma_for_cpu(phys, size, dir);
 }
 
 static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -792,15 +723,11 @@ static void iommu_dma_sync_single_for_device(struct 
device *dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-   if (is_swiotlb_buffer(phys))
-   swiotlb_sync_single_for_device(dev, phys, size, dir);
-
-   if (!dev_is_dma_coherent(dev)

[PATCH v2 0/9] Add dynamic iommu backed bounce buffers

2021-08-06 Thread David Stevens
From: David Stevens 

This patch series adds support for per-domain dynamic pools of iommu
bounce buffers to the dma-iommu API. This allows iommu mappings to be
reused while still maintaining strict iommu protection.

This bounce buffer support is used to add a new config option that, when
enabled, causes all non-direct streaming mappings below a configurable
size to go through the bounce buffers. This serves as an optimization on
systems where manipulating iommu mappings is very expensive. For
example, virtio-iommu operations in a guest on a linux host require a
vmexit, involvement the VMM, and a VFIO syscall. For relatively small
DMA operations, memcpy can be significantly faster.

As a performance comparison, on a device with an i5-10210U, I ran fio
with a VFIO passthrough NVMe drive and virtio-iommu with '--direct=1
--rw=read --ioengine=libaio --iodepth=64' and block sizes 4k, 16k, 64k,
and 128k. Test throughput increased by 2.8x, 4.7x, 3.6x, and 3.6x. Time
spent in iommu_dma_unmap_(page|sg) per GB processed decreased by 97%,
94%, 90%, and 87%. Time spent in iommu_dma_map_(page|sg) decreased
by >99%, as bounce buffers don't require syncing here in the read case.
Running with multiple jobs doesn't serve as a useful performance
comparison because virtio-iommu and vfio_iommu_type1 both have big
locks that significantly limit mulithreaded DMA performance.

These pooled bounce buffers are also used for subgranule mappings with
untrusted devices, replacing the single use bounce buffers used
currently. The biggest difference here is that the new implementation
maps a whole sglist using a single bounce buffer. The new implementation
does not support using bounce buffers for only some segments of the
sglist, so it may require more copying. However, the current
implementation requires per-segment iommu map/unmap operations for all
untrusted sglist mappings (fully aligned sglists included). On a 
i5-10210U laptop with the internal NVMe drive made to appear untrusted,
fio --direct=1 --rw=read --ioengine=libaio --iodepth=64 --bs=64k showed
a statistically significant decrease in CPU load from 2.28% -> 2.17%
with the new iommu bounce buffer optimization enabled.

Each domain's buffer pool is split into multiple power-of-2 size
classes. Each class allocates a fixed number of buffer slot metadata. A
large iova range is allocated, and each slot is assigned an iova from
the range. This allows the iova to be easily mapped back to the slot,
and allows the critical section of most pool operations to be constant
time. The one exception is finding a cached buffer to reuse. These are
only separated according to R/W permissions - the use of other
permissions such as IOMMU_PRIV may require a linear search through the
cache. However, these other permissions are rare and likely exhibit high
locality, so the should not be a bottleneck in practice.

Since untrusted devices may require bounce buffers, each domain has a
fallback rbtree to manage single use buffers. This may be necessary if a
very large number of DMA operations are simultaneously in-flight, or for
very large individual DMA operations.

This patch set does not use swiotlb. There are two primary ways in which
swiotlb isn't compatible with per-domain buffer pools. First, swiotlb
allocates buffers to be compatible with a single device, whereas
per-domain buffer pools don't handle that during buffer allocation as a
single buffer may end up being used by multiple devices. Second, swiotlb
allocation establishes the original to bounce buffer mapping, which
again doesn't work if buffers can be reused. Effectively the only code
that can be shared between the two use cases is allocating slots from
the swiotlb's memory. However, given that we're going to be allocating
memory for use with an iommu, allocating memory from a block of memory
explicitly set aside to deal with a lack of iommu seems kind of
contradictory. At best there might be a small performance improvement if 
wiotlb allocation is faster than regular page allocation, but buffer
allocation isn't on the hot path anyway.

Not using the swiotlb has the benefit that memory doesn't have to be
preallocated. Instead, bounce buffers consume memory only for in-flight
dma transactions (ignoring temporarily cached buffers), which is the
smallest amount possible. This makes it easier to use bounce buffers as
an optimization on systems with large numbers of devices or in
situations where devices are unknown, since it is not necessary to try
to tune how much memory needs to be set aside to achieve good
performance without costing too much memory.

Finally, this series adds a new DMA_ATTR_PERSISTENT_STREAMING flag. This
is meant to address devices which create long lived streaming mappings
but manage CPU cache coherency without using the dma_sync_* APIs.
Currently, these devices don't function properly with swiotlb=force. The
new flag is used to bypass bounce buffers so such devices will function
when the new bounce 

Re: [PATCH v2 3/4] dma-iommu: pass SKIP_CPU_SYNC to swiotlb unmap

2021-08-04 Thread David Stevens
On Mon, Aug 2, 2021 at 10:54 PM Will Deacon  wrote:
>
> On Fri, Jul 09, 2021 at 12:35:01PM +0900, David Stevens wrote:
> > From: David Stevens 
> >
> > If SKIP_CPU_SYNC isn't already set, then iommu_dma_unmap_(page|sg) has
> > already called iommu_dma_sync_(single|sg)_for_cpu, so there is no need
> > to copy from the bounce buffer again.
> >
> > Signed-off-by: David Stevens 
> > ---
> >  drivers/iommu/dma-iommu.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index e79e274d2dc5..0a9a9a343e64 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device 
> > *dev, dma_addr_t dma_addr,
> >   __iommu_dma_unmap(dev, dma_addr, size);
> >
> >   if (unlikely(is_swiotlb_buffer(phys)))
> > - swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
> > + swiotlb_tbl_unmap_single(dev, phys, size, dir,
> > +  attrs | DMA_ATTR_SKIP_CPU_SYNC);
> >  }
>
> I think it would be cleaner to drop DMA_ATTR_SKIP_CPU_SYNC in the callers
> once they've called iommu_dma_sync_*_for_cpu().

Dropping that flag in iommu_dma_unmap_* would result in always copying
from the swiotlb here, which is the opposite direction of what this
patch is trying to do.

This change is aiming to address the case where DMA_ATTR_SKIP_CPU_SYNC
isn't passed to dma_unmap_*. In that case, there are calls to
swiotlb_sync_single_for_cpu from iommu_dma_sync_*_for_cpu, and calls
to swiotlb_tlb_unmap_single here. That means we copy from the swiotlb
twice. Adding the DMA_ATTR_SKIP_CPU_SYNC flag here skips the second
copy.

-David

> Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/4] dma-iommu: fix sync_sg with swiotlb

2021-08-04 Thread David Stevens
On Mon, Aug 2, 2021 at 10:30 PM Will Deacon  wrote:
>
> On Fri, Jul 09, 2021 at 12:34:59PM +0900, David Stevens wrote:
> > From: David Stevens 
> >
> > The is_swiotlb_buffer function takes the physical address of the swiotlb
> > buffer, not the physical address of the original buffer. The sglist
> > contains the physical addresses of the original buffer, so for the
> > sync_sg functions to work properly when a bounce buffer might have been
> > used, we need to use iommu_iova_to_phys to look up the physical address.
> > This is what sync_single does, so call that function on each sglist
> > segment.
> >
> > The previous code mostly worked because swiotlb does the transfer on map
> > and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
> > sglists or which call sync_sg would not have had anything copied to the
> > bounce buffer.
> >
> > Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
> > Signed-off-by: David Stevens 
> > ---
> >  drivers/iommu/dma-iommu.c | 26 +-
> >  1 file changed, 13 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index 7bcdd1205535..eac65302439e 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -811,14 +811,14 @@ static void iommu_dma_sync_sg_for_cpu(struct device 
> > *dev,
> >   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
> >   return;
> >
> > - for_each_sg(sgl, sg, nelems, i) {
> > - if (!dev_is_dma_coherent(dev))
> > - arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
> > -
> > - if (is_swiotlb_buffer(sg_phys(sg)))
> > + if (dev_is_untrusted(dev))
> > + for_each_sg(sgl, sg, nelems, i)
> > + iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
> > +   sg->length, dir);
> > + else
> > + for_each_sg(sgl, sg, nelems, i)
> >   swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
> >   sg->length, dir);
>
> Doesn't this skip arch_sync_dma_for_cpu() for non-coherent trusted devices?

Whoops, this was supposed to be a call to arch_sync_dma_for_cpu, not
to swiotlb_sync_single_for_cpu. Similar to the sync_sg_for_device
case.

> Why not skip the extra dev_is_untrusted(dev) call here and just call
> iommu_dma_sync_single_for_cpu() for each entry regardless?

iommu_dma_sync_single_for_cpu calls iommu_iova_to_phys to translate
the dma_addr_t to a phys_addr_t. Since the physical address is readily
available, I think it's better to avoid that extra work.

> Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/sun50i: fix protection flag check

2021-07-15 Thread David Stevens
From: David Stevens 

Fix RW protection check when making a pte, so that it properly checks
that both R and W flags are set, instead of either R or W.

Signed-off-by: David Stevens 
---
 drivers/iommu/sun50i-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 181bb1c3437c..11cf5af30956 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -271,7 +271,7 @@ static u32 sun50i_mk_pte(phys_addr_t page, int prot)
enum sun50i_iommu_aci aci;
u32 flags = 0;
 
-   if (prot & (IOMMU_READ | IOMMU_WRITE))
+   if ((prot & IOMMU_READ) && (prot & IOMMU_WRITE))
aci = SUN50I_IOMMU_ACI_RD_WR;
else if (prot & IOMMU_READ)
aci = SUN50I_IOMMU_ACI_RD;
-- 
2.32.0.402.g57bb445576-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/4] Add dynamic iommu backed bounce buffers

2021-07-09 Thread David Stevens
On Fri, Jul 9, 2021 at 2:14 AM Robin Murphy  wrote:
>
> On 2021-07-08 10:29, Joerg Roedel wrote:
> > Adding Robin too.
> >
> > On Wed, Jul 07, 2021 at 04:55:01PM +0900, David Stevens wrote:
> >> Add support for per-domain dynamic pools of iommu bounce buffers to the
> >> dma-iommu API. This allows iommu mappings to be reused while still
> >> maintaining strict iommu protection. Allocating buffers dynamically
> >> instead of using swiotlb carveouts makes per-domain pools more amenable
> >> on systems with large numbers of devices or where devices are unknown.
>
> But isn't that just as true for the currently-supported case? All you
> need is a large enough Thunderbolt enclosure and you could suddenly plug
> in a dozen untrusted GPUs all wanting to map hundreds of megabytes of
> memory. If there's a real concern worth addressing, surely it's worth
> addressing properly for everyone.

Bounce buffers consume memory, so there is always going to be some
limitation on how many devices are supported. This patch series limits
the memory consumption at a given point in time to approximately the
amount of active DMA transactions. There's really no way to improve
significantly on that. The 'approximately' qualification could be
removed by adding a shrinker, but that doesn't change things
materially.

This is compared to reusing swiotlb, where the amount of memory
consumed would be the largest amount of active DMA transactions you
want bounce buffers to handle. I see two concrete shortcomings here.
First, most of the time you're not doing heavy IO, especially for
consumer workloads. Second, it raises the problem of per-device
tuning, since you don't want to waste performance by having too few
bounce buffers but you also don't want to waste memory by
preallocating too many bounce buffers. This tuning becomes more
problematic once you start dealing with external devices.

Also, although this doesn't directly address the raised concern, the
bounce buffers are only used for relatively small DMA transactions. So
large allocations like framebuffers won't actually consume extra
memory via bounce buffers.

> >> When enabled, all non-direct streaming mappings below a configurable
> >> size will go through bounce buffers. Note that this means drivers which
> >> don't properly use the DMA API (e.g. i915) cannot use an iommu when this
> >> feature is enabled. However, all drivers which work with swiotlb=force
> >> should work.
> >>
> >> Bounce buffers serve as an optimization in situations where interactions
> >> with the iommu are very costly. For example, virtio-iommu operations in
> >> a guest on a linux host require a vmexit, involvement the VMM, and a
> >> VFIO syscall. For relatively small DMA operations, memcpy can be
> >> significantly faster.
>
> Yup, back when the bounce-buffering stuff first came up I know
> networking folks were interested in terms of latency for small packets -
> virtualised IOMMUs are indeed another interesting case I hadn't thought
> of. It's definitely been on the radar as another use-case we'd like to
> accommodate with the bounce-buffering scheme. However, that's the thing:
> bouncing is bouncing and however you look at it it still overlaps so
> much with the untrusted case - there's no reason that couldn't use
> pre-mapped bounce buffers too, for instance - that the only necessary
> difference is really the policy decision of when to bounce. iommu-dma
> has already grown complicated enough, and having *three* different ways
> of doing things internally just seems bonkers and untenable. Pre-map the
> bounce buffers? Absolutely. Dynamically grow them on demand? Yes please!
> Do it all as a special thing in its own NIH module and leave the
> existing mess to rot? Sorry, but no.

I do agree that iommu-dma is getting fairly complicated. Since a
virtualized IOMMU uses bounce buffers much more heavily than
sub-granule untrusted DMA, and for the reasons stated earlier in this
email, I don't think pre-allocated bounce buffers are viable for the
virtualized IOMMU case. I can look at migrating the sub-granule
untrusted DMA case to dynamic bounce buffers, if that's an acceptable
approach.

-David

> Thanks,
> Robin.
>
> >> As a performance comparison, on a device with an i5-10210U, I ran fio
> >> with a VFIO passthrough NVMe drive with '--direct=1 --rw=read
> >> --ioengine=libaio --iodepth=64' and block sizes 4k, 16k, 64k, and
> >> 128k. Test throughput increased by 2.8x, 4.7x, 3.6x, and 3.6x. Time
> >> spent in iommu_dma_unmap_(page|sg) per GB processed decreased by 97%,
> >> 94%, 90%, and 87%. Time spent in iommu_dma_map_(page|sg) decreased
> >> by >99%, as bounce buffers don't require sy

Re: [PATCH 0/4] Add dynamic iommu backed bounce buffers

2021-07-09 Thread David Stevens
On Thu, Jul 8, 2021 at 10:38 PM Lu Baolu  wrote:
>
> Hi David,
>
> I like this idea. Thanks for proposing this.
>
> On 2021/7/7 15:55, David Stevens wrote:
> > Add support for per-domain dynamic pools of iommu bounce buffers to the
> > dma-iommu API. This allows iommu mappings to be reused while still
> > maintaining strict iommu protection. Allocating buffers dynamically
> > instead of using swiotlb carveouts makes per-domain pools more amenable
> > on systems with large numbers of devices or where devices are unknown.
>
> Have you ever considered leveraging the per-device swiotlb memory pool
> added by below series?
>
> https://lore.kernel.org/linux-iommu/20210625123004.GA3170@willie-the-truck/

I'm not sure if that's a good fit. The swiotlb pools are allocated
during device initialization, so they require setting aside the
worst-case amount of memory. That's okay if you only use it with a
small number of devices where you know in advance approximately how
much memory they use. However, it doesn't work as well if you want to
use it with a large number of devices, or with unknown (i.e.
hotplugged) devices.

> >
> > When enabled, all non-direct streaming mappings below a configurable
> > size will go through bounce buffers. Note that this means drivers which
> > don't properly use the DMA API (e.g. i915) cannot use an iommu when this
> > feature is enabled. However, all drivers which work with swiotlb=force
> > should work.
>
> If so, why not making it more scalable by adding a callback into vendor
> iommu drivers? The vendor iommu drivers have enough information to tell
> whether the bounce buffer is feasible for a specific domain.

I'm not very familiar with the specifics of VT-d or restrictions with
the graphics hardware, but at least on the surface it looks like a
limitation of the i915 driver's implementation. The driver uses the
DMA_ATTR_SKIP_CPU_SYNC flag, but never calls the dma_sync functions,
since things are coherent on x86 hardware. However, bounce buffers
violate the driver's assumption that there's no need to sync the CPU
and device domain. I doubt there's an inherent limitation of the
hardware here, it's just how the driver is implemented. Given that, I
don't know if it's something the iommu driver needs to handle.

One potential way this could be addressed would be to add explicit
support to the DMA API for long-lived streaming mappings. Drivers can
get that behavior today via DMA_ATTR_SKIP_CPU_SYNC and dma_sync.
However, the DMA API doesn't really have enough information to treat
ephemeral and long-lived mappings differently. With a new DMA_ATTR
flag for long-lived streaming mappings, the DMA API could skip bounce
buffers. That flag could also be used as a performance optimization in
the various dma-buf implementations, since they seem to mostly fall
into the long-lived streaming category (the handful I checked do call
dma_sync, so there isn't a correctness issue).

-David

> >
> > Bounce buffers serve as an optimization in situations where interactions
> > with the iommu are very costly. For example, virtio-iommu operations in
>
> The simulated IOMMU does the same thing.
>
> It's also an optimization for bare metal in cases where the strict mode
> of cache invalidation is used. CPU moving data is faster than IOMMU
> cache invalidation if the buffer is small.
>
> Best regards,
> baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 4/4] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-07-08 Thread David Stevens
From: David Stevens 

Introduce a new dev_use_swiotlb function to guard swiotlb code, instead
of overloading dev_is_untrusted. This allows CONFIG_SWIOTLB to be
checked more broadly, so the swiotlb related code can be removed more
aggressively.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 0a9a9a343e64..d8a0764c69aa 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -315,6 +315,11 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+static bool dev_use_swiotlb(struct device *dev)
+{
+   return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -552,8 +557,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
+   if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | org_size)) {
aligned_size = iova_align(iovad, org_size);
phys = swiotlb_tbl_map_single(dev, phys, org_size,
  aligned_size, dir, attrs);
@@ -778,7 +782,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -794,7 +798,7 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -812,10 +816,10 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -832,10 +836,10 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
 sg_dma_address(sg),
@@ -996,7 +1000,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1071,7 +1075,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/4] dma-iommu: pass SKIP_CPU_SYNC to swiotlb unmap

2021-07-08 Thread David Stevens
From: David Stevens 

If SKIP_CPU_SYNC isn't already set, then iommu_dma_unmap_(page|sg) has
already called iommu_dma_sync_(single|sg)_for_cpu, so there is no need
to copy from the bounce buffer again.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index e79e274d2dc5..0a9a9a343e64 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
__iommu_dma_unmap(dev, dma_addr, size);
 
if (unlikely(is_swiotlb_buffer(phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+   swiotlb_tbl_unmap_single(dev, phys, size, dir,
+attrs | DMA_ATTR_SKIP_CPU_SYNC);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/4] dma-iommu: fix arch_sync_dma for map with swiotlb

2021-07-08 Thread David Stevens
From: David Stevens 

When calling arch_sync_dma, we need to pass it the memory that's
actually being used for dma. When using swiotlb bounce buffers, this is
the bounce buffer. Move arch_sync_dma into the __iommu_dma_map_swiotlb
helper, so it can use the bounce buffer address if necessary. This also
means it is no longer necessary to call iommu_dma_sync_sg_for_device in
iommu_dma_map_sg for untrusted devices.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index eac65302439e..e79e274d2dc5 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -574,6 +574,9 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
memset(padding_start, 0, padding_size);
}
 
+   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   arch_sync_dma_for_device(phys, org_size, dir);
+
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
@@ -847,14 +850,9 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, 
struct page *page,
 {
phys_addr_t phys = page_to_phys(page) + offset;
bool coherent = dev_is_dma_coherent(dev);
-   dma_addr_t dma_handle;
 
-   dma_handle = __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
+   return __iommu_dma_map_swiotlb(dev, phys, size, dma_get_mask(dev),
coherent, dir, attrs);
-   if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-   dma_handle != DMA_MAPPING_ERROR)
-   arch_sync_dma_for_device(phys, size, dir);
-   return dma_handle;
 }
 
 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
@@ -997,12 +995,12 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
-
if (dev_is_untrusted(dev))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
+   if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+   iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
+
/*
 * Work out how much IOVA space we need, and align the segments to
 * IOVA granules for the IOMMU driver to handle. With some clever
-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/4] dma-iommu: fix sync_sg with swiotlb

2021-07-08 Thread David Stevens
From: David Stevens 

The is_swiotlb_buffer function takes the physical address of the swiotlb
buffer, not the physical address of the original buffer. The sglist
contains the physical addresses of the original buffer, so for the
sync_sg functions to work properly when a bounce buffer might have been
used, we need to use iommu_iova_to_phys to look up the physical address.
This is what sync_single does, so call that function on each sglist
segment.

The previous code mostly worked because swiotlb does the transfer on map
and unmap. However, any callers which use DMA_ATTR_SKIP_CPU_SYNC with
sglists or which call sync_sg would not have had anything copied to the
bounce buffer.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7bcdd1205535..eac65302439e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -811,14 +811,14 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
-   for_each_sg(sgl, sg, nelems, i) {
-   if (!dev_is_dma_coherent(dev))
-   arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
-
-   if (is_swiotlb_buffer(sg_phys(sg)))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg->length, dir);
+   else
+   for_each_sg(sgl, sg, nelems, i)
swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
sg->length, dir);
-   }
 }
 
 static void iommu_dma_sync_sg_for_device(struct device *dev,
@@ -831,14 +831,14 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
-   for_each_sg(sgl, sg, nelems, i) {
-   if (is_swiotlb_buffer(sg_phys(sg)))
-   swiotlb_sync_single_for_device(dev, sg_phys(sg),
-  sg->length, dir);
-
-   if (!dev_is_dma_coherent(dev))
+   if (dev_is_untrusted(dev))
+   for_each_sg(sgl, sg, nelems, i)
+   iommu_dma_sync_single_for_device(dev,
+sg_dma_address(sg),
+sg->length, dir);
+   else
+   for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
-   }
 }
 
 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/4] Fixes for dma-iommu swiotlb bounce buffers

2021-07-08 Thread David Stevens
From: David Stevens 

This patch set includes two fixes for bugs caused by mixing up the
original buffer's physical address and bounce buffer's physical address.
It also includes a performance fix that avoids an extra copy, as well as
a general cleanup fix.

The issues were found via code inspection, so I don't have any specific
use cases where things were not working, or any performance numbers.

v1 -> v2:
 - Split fixes into dedicated patches
 - Less invasive changes to fix arch_sync when mapping
 - Leave dev_is_untrusted check for strict iommu

David Stevens (4):
  dma-iommu: fix sync_sg with swiotlb
  dma-iommu: fix arch_sync_dma for map with swiotlb
  dma-iommu: pass SKIP_CPU_SYNC to swiotlb unmap
  dma-iommu: Check CONFIG_SWIOTLB more broadly

 drivers/iommu/dma-iommu.c | 63 ---
 1 file changed, 33 insertions(+), 30 deletions(-)

-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/4] dma-iommu: Add iommu bounce buffers to dma-iommu api

2021-07-07 Thread David Stevens
From: David Stevens 

Add support for per-domain dynamic pools of iommu bounce buffers to the
dma-iommu api. When enabled, all non-direct streaming mappings below a
configurable size will go through bounce buffers.

Each domain has its own buffer pool. Each buffer pool is split into
multiple power-of-2 size classes. Each class has a number of
preallocated slots that can hold bounce buffers. Bounce buffers are
allocated on demand, and unmapped bounce buffers are stored in a cache
with periodic eviction of unused cache entries. As the buffer pool is an
optimization, any failures simply result in falling back to the normal
dma-iommu handling.

Signed-off-by: David Stevens 
---
 drivers/iommu/Kconfig  |  10 +
 drivers/iommu/Makefile |   1 +
 drivers/iommu/dma-iommu.c  |  75 +++-
 drivers/iommu/io-buffer-pool.c | 656 +
 drivers/iommu/io-buffer-pool.h |  91 +
 5 files changed, 826 insertions(+), 7 deletions(-)
 create mode 100644 drivers/iommu/io-buffer-pool.c
 create mode 100644 drivers/iommu/io-buffer-pool.h

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 1f111b399bca..6eee57b03ff9 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -420,4 +420,14 @@ config SPRD_IOMMU
 
  Say Y here if you want to use the multimedia devices listed above.
 
+config IOMMU_IO_BUFFER
+   bool "Use IOMMU bounce buffers"
+   depends on IOMMU_DMA
+   help
+ Use bounce buffers for small, streaming DMA operations. This may
+ have performance benefits on systems where establishing IOMMU mappings
+ is particularly expensive, such as when running as a guest.
+
+ If unsure, say N here.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c0fb0ba88143..2287b2e3d92d 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
 obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
 obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
+obj-$(CONFIG_IOMMU_IO_BUFFER) += io-buffer-pool.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 48267d9f5152..1d2cfbbe03c1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -26,6 +26,8 @@
 #include 
 #include 
 
+#include "io-buffer-pool.h"
+
 struct iommu_dma_msi_page {
struct list_headlist;
dma_addr_t  iova;
@@ -46,6 +48,7 @@ struct iommu_dma_cookie {
dma_addr_t  msi_iova;
};
struct list_headmsi_page_list;
+   struct io_buffer_pool   *bounce_buffers;
 
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
@@ -83,6 +86,14 @@ static inline size_t cookie_msi_granule(struct 
iommu_dma_cookie *cookie)
return PAGE_SIZE;
 }
 
+static inline struct io_buffer_pool *dev_to_io_buffer_pool(struct device *dev)
+{
+   struct iommu_domain *domain = iommu_get_dma_domain(dev);
+   struct iommu_dma_cookie *cookie = domain->iova_cookie;
+
+   return cookie->bounce_buffers;
+}
+
 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
 {
struct iommu_dma_cookie *cookie;
@@ -162,6 +173,9 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
if (!cookie)
return;
 
+   if (IS_ENABLED(CONFIG_IOMMU_IO_BUFFER))
+   io_buffer_pool_destroy(cookie->bounce_buffers);
+
if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
put_iova_domain(>iovad);
 
@@ -334,6 +348,7 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
unsigned long order, base_pfn;
struct iova_domain *iovad;
+   int ret;
 
if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
return -EINVAL;
@@ -381,7 +396,13 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
if (!dev)
return 0;
 
-   return iova_reserve_iommu_regions(dev, domain);
+   ret = iova_reserve_iommu_regions(dev, domain);
+
+   if (ret == 0 && IS_ENABLED(CONFIG_IOMMU_IO_BUFFER))
+   ret = io_buffer_pool_init(dev, domain, iovad,
+ >bounce_buffers);
+
+   return ret;
 }
 
 /**
@@ -537,11 +558,10 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
 }
 
 static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
-   size_t org_size, dma_addr_t dma_mask, bool cohe

[PATCH 3/4] dma-iommu: expose a few helper functions to module

2021-07-07 Thread David Stevens
From: David Stevens 

Expose a few helper functions from dma-iommu to the rest of the module.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 27 ++-
 include/linux/dma-iommu.h | 12 
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 98a5c566a303..48267d9f5152 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -413,7 +413,7 @@ static int dma_info_to_prot(enum dma_data_direction dir, 
bool coherent,
}
 }
 
-static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
+dma_addr_t __iommu_dma_alloc_iova(struct iommu_domain *domain,
size_t size, u64 dma_limit, struct device *dev)
 {
struct iommu_dma_cookie *cookie = domain->iova_cookie;
@@ -453,7 +453,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
return (dma_addr_t)iova << shift;
 }
 
-static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
+void __iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
dma_addr_t iova, size_t size, struct page *freelist)
 {
struct iova_domain *iovad = >iovad;
@@ -489,7 +489,7 @@ static void __iommu_dma_unmap(struct device *dev, 
dma_addr_t dma_addr,
 
if (!cookie->fq_domain)
iommu_iotlb_sync(domain, _gather);
-   iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
+   __iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
 }
 
 static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
@@ -525,12 +525,12 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
 
size = iova_align(iovad, size + iova_off);
 
-   iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
+   iova = __iommu_dma_alloc_iova(domain, size, dma_mask, dev);
if (!iova)
return DMA_MAPPING_ERROR;
 
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
-   iommu_dma_free_iova(cookie, iova, size, NULL);
+   __iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -585,14 +585,14 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
return iova;
 }
 
-static void __iommu_dma_free_pages(struct page **pages, int count)
+void __iommu_dma_free_pages(struct page **pages, int count)
 {
while (count--)
__free_page(pages[count]);
kvfree(pages);
 }
 
-static struct page **__iommu_dma_alloc_pages(
+struct page **__iommu_dma_alloc_pages(
unsigned int count, unsigned long order_mask,
unsigned int nid, gfp_t page_gfp, gfp_t kalloc_gfp)
 {
@@ -686,7 +686,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
return NULL;
 
size = iova_align(iovad, size);
-   iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
+   iova = __iommu_dma_alloc_iova(domain, size,
+ dev->coherent_dma_mask, dev);
if (!iova)
goto out_free_pages;
 
@@ -712,7 +713,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
 out_free_sg:
sg_free_table(sgt);
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size, NULL);
+   __iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -1063,7 +1064,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
prev = s;
}
 
-   iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
+   iova = __iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
if (!iova)
goto out_restore_sg;
 
@@ -1077,7 +1078,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, iova_len, NULL);
+   __iommu_dma_free_iova(cookie, iova, iova_len, NULL);
 out_restore_sg:
__invalidate_sg(sg, nents);
return 0;
@@ -1370,7 +1371,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
if (!msi_page)
return NULL;
 
-   iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
+   iova = __iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
if (!iova)
goto out_free_page;
 
@@ -1384,7 +1385,7 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
return msi_page;
 
 out_free_iova:
-   iommu_dma_free_iova(cookie, iova, size, NULL);
+   __iommu_dma_free_iova(cookie, iova, size, NULL);
 out_free_page:
kfree(msi_page);

[PATCH 2/4] dma-iommu: replace device arguments

2021-07-07 Thread David Stevens
From: David Stevens 

Replace the struct device argument with the device's nid in
__iommu_dma_alloc_pages, since it doesn't need the whole struct. This
allows it to be called from places which don't have access to the
device.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 00993b56c977..98a5c566a303 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -592,12 +592,12 @@ static void __iommu_dma_free_pages(struct page **pages, 
int count)
kvfree(pages);
 }
 
-static struct page **__iommu_dma_alloc_pages(struct device *dev,
+static struct page **__iommu_dma_alloc_pages(
unsigned int count, unsigned long order_mask,
-   gfp_t page_gfp, gfp_t kalloc_gfp)
+   unsigned int nid, gfp_t page_gfp, gfp_t kalloc_gfp)
 {
struct page **pages;
-   unsigned int i = 0, nid = dev_to_node(dev);
+   unsigned int i = 0;
 
order_mask &= (2U << MAX_ORDER) - 1;
if (!order_mask)
@@ -680,8 +680,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
alloc_sizes = min_size;
 
count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-   pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
-   gfp, GFP_KERNEL);
+   pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT,
+   dev_to_node(dev), gfp, GFP_KERNEL);
if (!pages)
return NULL;
 
-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/4] dma-iommu: add kalloc gfp flag to alloc helper

2021-07-07 Thread David Stevens
From: David Stevens 

Add gfp flag for kalloc calls within __iommu_dma_alloc_pages, so the
function can be called from atomic contexts.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 614f0dd86b08..00993b56c977 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -593,7 +593,8 @@ static void __iommu_dma_free_pages(struct page **pages, int 
count)
 }
 
 static struct page **__iommu_dma_alloc_pages(struct device *dev,
-   unsigned int count, unsigned long order_mask, gfp_t gfp)
+   unsigned int count, unsigned long order_mask,
+   gfp_t page_gfp, gfp_t kalloc_gfp)
 {
struct page **pages;
unsigned int i = 0, nid = dev_to_node(dev);
@@ -602,15 +603,15 @@ static struct page **__iommu_dma_alloc_pages(struct 
device *dev,
if (!order_mask)
return NULL;
 
-   pages = kvzalloc(count * sizeof(*pages), GFP_KERNEL);
+   pages = kvzalloc(count * sizeof(*pages), kalloc_gfp);
if (!pages)
return NULL;
 
/* IOMMU can map any pages, so himem can also be used here */
-   gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+   page_gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
 
/* It makes no sense to muck about with huge pages */
-   gfp &= ~__GFP_COMP;
+   page_gfp &= ~__GFP_COMP;
 
while (count) {
struct page *page = NULL;
@@ -624,7 +625,7 @@ static struct page **__iommu_dma_alloc_pages(struct device 
*dev,
for (order_mask &= (2U << __fls(count)) - 1;
 order_mask; order_mask &= ~order_size) {
unsigned int order = __fls(order_mask);
-   gfp_t alloc_flags = gfp;
+   gfp_t alloc_flags = page_gfp;
 
order_size = 1U << order;
if (order_mask > order_size)
@@ -680,7 +681,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct 
device *dev,
 
count = PAGE_ALIGN(size) >> PAGE_SHIFT;
pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
-   gfp);
+   gfp, GFP_KERNEL);
if (!pages)
return NULL;
 
-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/4] Add dynamic iommu backed bounce buffers

2021-07-07 Thread David Stevens
Add support for per-domain dynamic pools of iommu bounce buffers to the 
dma-iommu API. This allows iommu mappings to be reused while still
maintaining strict iommu protection. Allocating buffers dynamically
instead of using swiotlb carveouts makes per-domain pools more amenable
on systems with large numbers of devices or where devices are unknown.

When enabled, all non-direct streaming mappings below a configurable
size will go through bounce buffers. Note that this means drivers which
don't properly use the DMA API (e.g. i915) cannot use an iommu when this
feature is enabled. However, all drivers which work with swiotlb=force
should work.

Bounce buffers serve as an optimization in situations where interactions
with the iommu are very costly. For example, virtio-iommu operations in
a guest on a linux host require a vmexit, involvement the VMM, and a
VFIO syscall. For relatively small DMA operations, memcpy can be
significantly faster.

As a performance comparison, on a device with an i5-10210U, I ran fio
with a VFIO passthrough NVMe drive with '--direct=1 --rw=read
--ioengine=libaio --iodepth=64' and block sizes 4k, 16k, 64k, and
128k. Test throughput increased by 2.8x, 4.7x, 3.6x, and 3.6x. Time
spent in iommu_dma_unmap_(page|sg) per GB processed decreased by 97%,
94%, 90%, and 87%. Time spent in iommu_dma_map_(page|sg) decreased
by >99%, as bounce buffers don't require syncing here in the read case.
Running with multiple jobs doesn't serve as a useful performance
comparison because virtio-iommu and vfio_iommu_type1 both have big
locks that significantly limit mulithreaded DMA performance.

This patch set is based on v5.13-rc7 plus the patches at [1].

David Stevens (4):
  dma-iommu: add kalloc gfp flag to alloc helper
  dma-iommu: replace device arguments
  dma-iommu: expose a few helper functions to module
  dma-iommu: Add iommu bounce buffers to dma-iommu api

 drivers/iommu/Kconfig  |  10 +
 drivers/iommu/Makefile |   1 +
 drivers/iommu/dma-iommu.c  | 119 --
 drivers/iommu/io-buffer-pool.c | 656 +
 drivers/iommu/io-buffer-pool.h |  91 +
 include/linux/dma-iommu.h  |  12 +
 6 files changed, 861 insertions(+), 28 deletions(-)
 create mode 100644 drivers/iommu/io-buffer-pool.c
 create mode 100644 drivers/iommu/io-buffer-pool.h

-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] dma-iommu: Check CONFIG_SWIOTLB more broadly

2021-07-02 Thread David Stevens
From: David Stevens 

Add check for CONFIG_SWIOTLB to dev_is_untrusted, so that swiotlb
related code can be removed more aggressively.

Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 24d1042cd052..614f0dd86b08 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -310,9 +310,10 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain 
*iovad)
domain->ops->flush_iotlb_all(domain);
 }
 
-static bool dev_is_untrusted(struct device *dev)
+static bool dev_use_swiotlb(struct device *dev)
 {
-   return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+   return IS_ENABLED(CONFIG_SWIOTLB) &&
+  dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
 /**
@@ -368,7 +369,7 @@ static int iommu_dma_init_domain(struct iommu_domain 
*domain, dma_addr_t base,
 
init_iova_domain(iovad, 1UL << order, base_pfn);
 
-   if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
+   if (!cookie->fq_domain && (!dev || !dev_use_swiotlb(dev)) &&
domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) {
if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
  iommu_dma_entry_dtor))
@@ -553,8 +554,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
 * If both the physical buffer start address and size are
 * page aligned, we don't need to use a bounce page.
 */
-   if (IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev) &&
-   iova_offset(iovad, phys | org_size)) {
+   if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | org_size)) {
aligned_size = iova_align(iovad, org_size);
phys = swiotlb_tbl_map_single(dev, phys, org_size,
  aligned_size, dir,
@@ -779,7 +779,7 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
 {
phys_addr_t phys;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -794,7 +794,7 @@ static void __iommu_dma_sync_single_for_device(struct 
device *dev,
dma_addr_t dma_handle, size_t size,
enum dma_data_direction dir, phys_addr_t phys)
 {
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
if (phys == 0)
@@ -821,10 +821,10 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
  sg->length, dir);
@@ -840,10 +840,10 @@ static void iommu_dma_sync_sg_for_device(struct device 
*dev,
struct scatterlist *sg;
int i;
 
-   if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
+   if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
return;
 
-   if (dev_is_untrusted(dev))
+   if (dev_use_swiotlb(dev))
for_each_sg(sgl, sg, nelems, i)
__iommu_dma_sync_single_for_device(dev,
   sg_dma_address(sg),
@@ -1010,7 +1010,7 @@ static int iommu_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
iommu_deferred_attach(dev, domain))
return 0;
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
early_mapped = iommu_dma_map_sg_swiotlb(dev, sg, nents,
dir, attrs);
if (!early_mapped)
@@ -1092,7 +1092,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
 
-   if (dev_is_untrusted(dev)) {
+   if (dev_use_swiotlb(dev)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
-- 
2.32.0.93.g670b81a890-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] dma-iommu: fix swiotlb SKIP_CPU_SYNC and arch sync

2021-07-02 Thread David Stevens
From: David Stevens 

Make map_swiotlb and unmap_swiotlb only for mapping, and consistently
use sync_single_for and sync_sg_for functions for swiotlb sync and arch
sync. This ensures that the same code path is responsible for syncing
regardless of whether or not SKIP_CPU_SYNC is set. In the process, fix
various places where the original physical address and swiotlb tlb_addr
are mixed up:
  - Make sync_sg functions call sync_single functions for untrusted
devices, so they use tlb_addr when checking is_swiotlb_buffer and
when doing arch sync if necessary.
  - Use tlb_addr for arch sync in map_page if necessary.
  - In map_sg, map before syncing so that arch sync can target the
bounce buffer if necessary.
  - Pass SKIP_CPU_SYNC to swiotlb map and unmap to avoid double syncing
the swiotlb. This had previously only happened in the unmap_page
case, but is now necessary for all swiotlb cases.

Fixes: 82612d66d51d ("iommu: Allow the dma-iommu api to use bounce buffers")
Signed-off-by: David Stevens 
---
 drivers/iommu/dma-iommu.c | 82 ---
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7bcdd1205535..24d1042cd052 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -505,7 +505,8 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, 
dma_addr_t dma_addr,
__iommu_dma_unmap(dev, dma_addr, size);
 
if (unlikely(is_swiotlb_buffer(phys)))
-   swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+   swiotlb_tbl_unmap_single(dev, phys, size, dir,
+attrs | DMA_ATTR_SKIP_CPU_SYNC);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -536,7 +537,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
 
 static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
size_t org_size, dma_addr_t dma_mask, bool coherent,
-   enum dma_data_direction dir, unsigned long attrs)
+   enum dma_data_direction dir, unsigned long attrs,
+   phys_addr_t *adj_phys)
 {
int prot = dma_info_to_prot(dir, coherent, attrs);
struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -555,7 +557,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
iova_offset(iovad, phys | org_size)) {
aligned_size = iova_align(iovad, org_size);
phys = swiotlb_tbl_map_single(dev, phys, org_size,
- aligned_size, dir, attrs);
+ aligned_size, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC);
 
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
@@ -573,6 +576,8 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device 
*dev, phys_addr_t phys,
 
memset(padding_start, 0, padding_size);
}
+   if (adj_phys)
+   *adj_phys = phys;
 
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
@@ -785,15 +790,17 @@ static void iommu_dma_sync_single_for_cpu(struct device 
*dev,
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 }
 
-static void iommu_dma_sync_single_for_device(struct device *dev,
-   dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
+static void __iommu_dma_sync_single_for_device(struct device *dev,
+   dma_addr_t dma_handle, size_t size,
+   enum dma_data_direction dir, phys_addr_t phys)
 {
-   phys_addr_t phys;
-
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
-   phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+   if (phys == 0)
+   phys = iommu_iova_to_phys(iommu_get_dma_domain(dev),
+ dma_handle);
+
if (is_swiotlb_buffer(phys))
swiotlb_sync_single_for_device(dev, phys, size, dir);
 
@@ -801,6 +808,12 @@ static void iommu_dma_sync_single_for_device(struct device 
*dev,
arch_sync_dma_for_device(phys, size, dir);
 }
 
+static void iommu_dma_sync_single_for_device(struct device *dev,
+   dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
+{
+   __iommu_dma_sync_single_for_device(dev, dma_handle, size, dir, 0);
+}
+
 static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sgl, int nelems,
enum dma_data_direction dir)
@@ -811,14 +824,13 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev))
return;
 
-