From: Barry Song <[email protected]> Apply batched DMA synchronization to __dma_iova_link() and iommu_dma_iova_unlink_range_slow(). For multiple sync_dma_for_device() and sync_dma_for_cpu() calls, we only need to wait once for the completion of all sync operations, rather than waiting for each one individually.
I do not have the hardware to test this, so it is marked as RFC. I would greatly appreciate it if someone could test it. Suggested-by: Marek Szyprowski <[email protected]> Cc: Leon Romanovsky <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Will Deacon <[email protected]> Cc: Marek Szyprowski <[email protected]> Cc: Ada Couprie Diaz <[email protected]> Cc: Ard Biesheuvel <[email protected]> Cc: Marc Zyngier <[email protected]> Cc: Anshuman Khandual <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Cc: Robin Murphy <[email protected]> Cc: Joerg Roedel <[email protected]> Cc: Tangquan Zheng <[email protected]> Signed-off-by: Barry Song <[email protected]> --- drivers/iommu/dma-iommu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 6827763a3877..ffa940bdbbaf 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1849,10 +1849,8 @@ static int __dma_iova_link(struct device *dev, dma_addr_t addr, bool coherent = dev_is_dma_coherent(dev); int prot = dma_info_to_prot(dir, coherent, attrs); - if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { + if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) arch_sync_dma_for_device(phys, size, dir); - arch_sync_dma_flush(); - } return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size, prot, GFP_ATOMIC); @@ -1995,6 +1993,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state, dma_addr_t addr = state->addr + offset; size_t iova_start_pad = iova_offset(iovad, addr); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); return iommu_sync_map(domain, addr - iova_start_pad, iova_align(iovad, size + iova_start_pad)); } @@ -2008,6 +2008,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; size_t iova_start_pad = iova_offset(iovad, addr); + bool need_sync_dma = !dev_is_dma_coherent(dev) && + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)); dma_addr_t end = addr + size; do { @@ -2023,16 +2025,17 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev, end - addr, iovad->granule - iova_start_pad); if (!dev_is_dma_coherent(dev) && - !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) arch_sync_dma_for_cpu(phys, len, dir); - arch_sync_dma_flush(); - } swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs); addr += len; iova_start_pad = 0; } while (addr < end); + + if (need_sync_dma) + arch_sync_dma_flush(); } static void __iommu_dma_iova_unlink(struct device *dev, -- 2.43.0
