Re: [PATCH v3 3/3] arm64: Force swiotlb bounce buffering for non-coherent DMA with large CWG

2018-05-14 Thread Catalin Marinas
On Sat, May 12, 2018 at 02:38:29PM +0200, Christoph Hellwig wrote:
> On Fri, May 11, 2018 at 02:55:47PM +0100, Catalin Marinas wrote:
> > On systems with a Cache Writeback Granule (CTR_EL0.CWG) greater than
> > ARCH_DMA_MINALIGN, DMA cache maintenance on sub-CWG ranges is not safe,
> > leading to data corruption. If such configuration is detected, the
> > kernel will force swiotlb bounce buffering for all non-coherent devices.
> 
> Per the previous discussion I understand that so far this is a
> purely theoretical condition. 

That's what we think, at least for publicly available hardware.

> Given that I'd rather avoid commiting this patch and just refuse too
> boot in this case.

I'll keep it to a WARN_TAINT() for now. Given that the warn triggers
only when cache_line_size() > ARCH_DMA_MINALIGN and we keep this
constant unchanged (128), it shouldn't be much different from our
current assumptions and no-one complained of DMA corruption so far.

> In a merge window or two I plan to have a noncoherent flag in struct
> device, at which point we can handle this entirely in common code.

Sounds ok, looking forward to this.

Thanks.

-- 
Catalin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 3/3] arm64: Force swiotlb bounce buffering for non-coherent DMA with large CWG

2018-05-12 Thread Christoph Hellwig
On Fri, May 11, 2018 at 02:55:47PM +0100, Catalin Marinas wrote:
> On systems with a Cache Writeback Granule (CTR_EL0.CWG) greater than
> ARCH_DMA_MINALIGN, DMA cache maintenance on sub-CWG ranges is not safe,
> leading to data corruption. If such configuration is detected, the
> kernel will force swiotlb bounce buffering for all non-coherent devices.

Per the previous discussion I understand that so far this is a
purely theoretical condition.  Given that I'd rather avoid commiting
this patch and just refuse too boot in this case.

In a merge window or two I plan to have a noncoherent flag in struct
device, at which point we can handle this entirely in common code.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 3/3] arm64: Force swiotlb bounce buffering for non-coherent DMA with large CWG

2018-05-11 Thread Catalin Marinas
On systems with a Cache Writeback Granule (CTR_EL0.CWG) greater than
ARCH_DMA_MINALIGN, DMA cache maintenance on sub-CWG ranges is not safe,
leading to data corruption. If such configuration is detected, the
kernel will force swiotlb bounce buffering for all non-coherent devices.

Cc: Will Deacon 
Cc: Robin Murphy 
Cc: Christoph Hellwig 
Signed-off-by: Catalin Marinas 
---
 arch/arm64/Kconfig  |  1 +
 arch/arm64/include/asm/dma-direct.h | 43 +
 arch/arm64/mm/dma-mapping.c | 17 +++
 arch/arm64/mm/init.c|  3 ++-
 4 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/dma-direct.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index eb2cf4938f6d..ef56b2478205 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -17,6 +17,7 @@ config ARM64
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+   select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/arm64/include/asm/dma-direct.h 
b/arch/arm64/include/asm/dma-direct.h
new file mode 100644
index ..0c18a4d56702
--- /dev/null
+++ b/arch/arm64/include/asm/dma-direct.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_DMA_DIRECT_H
+#define __ASM_DMA_DIRECT_H
+
+#include 
+#include 
+
+#include 
+
+DECLARE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce);
+
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+   dma_addr_t dev_addr = (dma_addr_t)paddr;
+
+   return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
+}
+
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t 
dev_addr)
+{
+   phys_addr_t paddr = (phys_addr_t)dev_addr;
+
+   return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT);
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t 
size)
+{
+   if (!dev->dma_mask)
+   return false;
+
+   /*
+* Force swiotlb buffer bouncing when ARCH_DMA_MINALIGN < CWG. The
+* swiotlb bounce buffers are aligned to (1 << IO_TLB_SHIFT).
+*/
+   if (static_branch_unlikely(&swiotlb_noncoherent_bounce) &&
+   !is_device_dma_coherent(dev) &&
+   !is_swiotlb_buffer(__dma_to_phys(dev, addr)))
+   return false;
+
+   return addr + size - 1 <= *dev->dma_mask;
+}
+
+#endif /* __ASM_DMA_DIRECT_H */
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a96ec0181818..1e9dac8684ca 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -33,6 +33,7 @@
 #include 
 
 static int swiotlb __ro_after_init;
+DEFINE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce);
 
 static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
 bool coherent)
@@ -504,6 +505,14 @@ static int __init arm64_dma_init(void)
max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
swiotlb = 1;
 
+   if (WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
+  TAINT_CPU_OUT_OF_SPEC,
+  "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+  ARCH_DMA_MINALIGN, cache_line_size())) {
+   swiotlb = 1;
+   static_branch_enable(&swiotlb_noncoherent_bounce);
+   }
+
return atomic_pool_init();
 }
 arch_initcall(arm64_dma_init);
@@ -882,6 +891,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 
dma_base, u64 size,
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
 {
+   /*
+* Enable swiotlb for buffer bouncing if ARCH_DMA_MINALIGN < CWG.
+* dma_capable() forces the actual bounce if the device is
+* non-coherent.
+*/
+   if (static_branch_unlikely(&swiotlb_noncoherent_bounce) && !coherent)
+   iommu = NULL;
+
if (!dev->dma_ops)
dev->dma_ops = &arm64_swiotlb_dma_ops;
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9f3c47acf8ff..664acf177799 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -586,7 +586,8 @@ static void __init free_unused_memmap(void)
 void __init mem_init(void)
 {
if (swiotlb_force == SWIOTLB_FORCE ||
-   max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+   max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT) ||
+   ARCH_DMA_MINALIGN < cache_line_size())
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu