From: Barry Song <[email protected]>

Apply batched DMA synchronization to __dma_iova_link() and
iommu_dma_iova_unlink_range_slow(). For multiple
sync_dma_for_device() and sync_dma_for_cpu() calls, we only
need to wait once for the completion of all sync operations,
rather than waiting for each one individually.

I do not have the hardware to test this, so it is marked as
RFC. I would greatly appreciate it if someone could test it.

Suggested-by: Marek Szyprowski <[email protected]>
Cc: Leon Romanovsky <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Marek Szyprowski <[email protected]>
Cc: Ada Couprie Diaz <[email protected]>
Cc: Ard Biesheuvel <[email protected]>
Cc: Marc Zyngier <[email protected]>
Cc: Anshuman Khandual <[email protected]>
Cc: Ryan Roberts <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Cc: Robin Murphy <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Tangquan Zheng <[email protected]>
Signed-off-by: Barry Song <[email protected]>
---
 drivers/iommu/dma-iommu.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 6827763a3877..ffa940bdbbaf 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1849,10 +1849,8 @@ static int __dma_iova_link(struct device *dev, 
dma_addr_t addr,
        bool coherent = dev_is_dma_coherent(dev);
        int prot = dma_info_to_prot(dir, coherent, attrs);
 
-       if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
+       if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
                arch_sync_dma_for_device(phys, size, dir);
-               arch_sync_dma_flush();
-       }
 
        return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size,
                        prot, GFP_ATOMIC);
@@ -1995,6 +1993,8 @@ int dma_iova_sync(struct device *dev, struct 
dma_iova_state *state,
        dma_addr_t addr = state->addr + offset;
        size_t iova_start_pad = iova_offset(iovad, addr);
 
+       if (!dev_is_dma_coherent(dev))
+               arch_sync_dma_flush();
        return iommu_sync_map(domain, addr - iova_start_pad,
                      iova_align(iovad, size + iova_start_pad));
 }
@@ -2008,6 +2008,8 @@ static void iommu_dma_iova_unlink_range_slow(struct 
device *dev,
        struct iommu_dma_cookie *cookie = domain->iova_cookie;
        struct iova_domain *iovad = &cookie->iovad;
        size_t iova_start_pad = iova_offset(iovad, addr);
+       bool need_sync_dma = !dev_is_dma_coherent(dev) &&
+                       !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO));
        dma_addr_t end = addr + size;
 
        do {
@@ -2023,16 +2025,17 @@ static void iommu_dma_iova_unlink_range_slow(struct 
device *dev,
                        end - addr, iovad->granule - iova_start_pad);
 
                if (!dev_is_dma_coherent(dev) &&
-                   !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
+                   !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
                        arch_sync_dma_for_cpu(phys, len, dir);
-                       arch_sync_dma_flush();
-               }
 
                swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs);
 
                addr += len;
                iova_start_pad = 0;
        } while (addr < end);
+
+       if (need_sync_dma)
+               arch_sync_dma_flush();
 }
 
 static void __iommu_dma_iova_unlink(struct device *dev,
-- 
2.43.0


Reply via email to