Re: [PATCH 18/20] powerpc/dma-noncoherent: use generic dma_noncoherent_ops

2018-08-08 Thread Benjamin Herrenschmidt
On Mon, 2018-07-30 at 18:38 +0200, Christoph Hellwig wrote:
> The generic dma-noncoherent code provides all that is needed by powerpc.
> 
> Note that the cache maintainance in the existing code is a bit odd
> as it implements both the sync_to_device and sync_to_cpu callouts,
> but never flushes caches when unmapping.  This patch keeps both
> directions arounds, which will lead to more flushing than the previous
> implementation.  Someone more familar with the required CPUs should
> eventually take a look and optimize the cache flush handling if needed.

The original code looks bogus indeed.

I think we got away with it because those older CPUs wouldn't speculate
or prefetch aggressively enough (or at all) so the flush on map was
sufficient, the stuff wouldn't come back into the cache.

But safe is better than sorry, so ... tentative Ack, I do need to try
to dig one of these things to test, which might take a while.

Acked-by: Benjamin Herrenschmidt 


> Signed-off-by: Christoph Hellwig 
> ---
>  arch/powerpc/Kconfig   |  2 +-
>  arch/powerpc/include/asm/dma-mapping.h | 29 -
>  arch/powerpc/kernel/dma.c  | 59 +++---
>  arch/powerpc/kernel/pci-common.c   |  5 ++-
>  arch/powerpc/kernel/setup-common.c |  4 ++
>  arch/powerpc/mm/dma-noncoherent.c  | 52 +--
>  arch/powerpc/platforms/44x/warp.c  |  2 +-
>  arch/powerpc/platforms/Kconfig.cputype |  6 ++-
>  8 files changed, 60 insertions(+), 99 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index bbfa6a8df4da..33c6017ffce6 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -129,7 +129,7 @@ config PPC
>   # Please keep this list sorted alphabetically.
>   #
>   select ARCH_HAS_DEVMEM_IS_ALLOWED
> - select ARCH_HAS_DMA_SET_COHERENT_MASK
> + select ARCH_HAS_DMA_SET_COHERENT_MASK if !NOT_COHERENT_CACHE
>   select ARCH_HAS_ELF_RANDOMIZE
>   select ARCH_HAS_FORTIFY_SOURCE
>   select ARCH_HAS_GCOV_PROFILE_ALL
> diff --git a/arch/powerpc/include/asm/dma-mapping.h 
> b/arch/powerpc/include/asm/dma-mapping.h
> index f0bf7ac2686c..879c4efba785 100644
> --- a/arch/powerpc/include/asm/dma-mapping.h
> +++ b/arch/powerpc/include/asm/dma-mapping.h
> @@ -19,40 +19,11 @@
>  #include 
>  
>  /* Some dma direct funcs must be visible for use in other dma_ops */
> -extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
> -  dma_addr_t *dma_handle, gfp_t flag,
> -  unsigned long attrs);
> -extern void __dma_nommu_free_coherent(struct device *dev, size_t size,
> -void *vaddr, dma_addr_t dma_handle,
> -unsigned long attrs);
>  extern int dma_nommu_mmap_coherent(struct device *dev,
>   struct vm_area_struct *vma,
>   void *cpu_addr, dma_addr_t handle,
>   size_t size, unsigned long attrs);
>  
> -#ifdef CONFIG_NOT_COHERENT_CACHE
> -/*
> - * DMA-consistent mapping functions for PowerPCs that don't support
> - * cache snooping.  These allocate/free a region of uncached mapped
> - * memory space for use with DMA devices.  Alternatively, you could
> - * allocate the space "normally" and use the cache management functions
> - * to ensure it is consistent.
> - */
> -struct device;
> -extern void __dma_sync(void *vaddr, size_t size, int direction);
> -extern void __dma_sync_page(struct page *page, unsigned long offset,
> -  size_t size, int direction);
> -extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr);
> -
> -#else /* ! CONFIG_NOT_COHERENT_CACHE */
> -/*
> - * Cache coherent cores.
> - */
> -
> -#define __dma_sync(addr, size, rw)   ((void)0)
> -#define __dma_sync_page(pg, off, sz, rw) ((void)0)
> -
> -#endif /* ! CONFIG_NOT_COHERENT_CACHE */
>  
>  static inline unsigned long device_to_mask(struct device *dev)
>  {
> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
> index 2b90a403cdac..b2e88075b2ea 100644
> --- a/arch/powerpc/kernel/dma.c
> +++ b/arch/powerpc/kernel/dma.c
> @@ -36,12 +36,7 @@ static void *dma_nommu_alloc_coherent(struct device *dev, 
> size_t size,
>* we can really use the direct ops
>*/
>   if (dma_direct_supported(dev, dev->coherent_dma_mask))
> -#ifdef CONFIG_NOT_COHERENT_CACHE
> - return __dma_nommu_alloc_coherent(dev, size, dma_handle,
> -flag, attrs);
> -#else
>   return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
> -#endif
>  
>   /* Ok we can't ... do we have an iommu ? If not, fail */
>   iommu = get_iommu_table_base(dev);
> @@ -62,12 +57,7 @@ static void dma_nommu_free_coherent(struct device *dev, 
> size_t size,
>  
>   /* See comments in 

[PATCH 18/20] powerpc/dma-noncoherent: use generic dma_noncoherent_ops

2018-07-30 Thread Christoph Hellwig
The generic dma-noncoherent code provides all that is needed by powerpc.

Note that the cache maintainance in the existing code is a bit odd
as it implements both the sync_to_device and sync_to_cpu callouts,
but never flushes caches when unmapping.  This patch keeps both
directions arounds, which will lead to more flushing than the previous
implementation.  Someone more familar with the required CPUs should
eventually take a look and optimize the cache flush handling if needed.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/Kconfig   |  2 +-
 arch/powerpc/include/asm/dma-mapping.h | 29 -
 arch/powerpc/kernel/dma.c  | 59 +++---
 arch/powerpc/kernel/pci-common.c   |  5 ++-
 arch/powerpc/kernel/setup-common.c |  4 ++
 arch/powerpc/mm/dma-noncoherent.c  | 52 +--
 arch/powerpc/platforms/44x/warp.c  |  2 +-
 arch/powerpc/platforms/Kconfig.cputype |  6 ++-
 8 files changed, 60 insertions(+), 99 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bbfa6a8df4da..33c6017ffce6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -129,7 +129,7 @@ config PPC
# Please keep this list sorted alphabetically.
#
select ARCH_HAS_DEVMEM_IS_ALLOWED
-   select ARCH_HAS_DMA_SET_COHERENT_MASK
+   select ARCH_HAS_DMA_SET_COHERENT_MASK if !NOT_COHERENT_CACHE
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index f0bf7ac2686c..879c4efba785 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -19,40 +19,11 @@
 #include 
 
 /* Some dma direct funcs must be visible for use in other dma_ops */
-extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
-dma_addr_t *dma_handle, gfp_t flag,
-unsigned long attrs);
-extern void __dma_nommu_free_coherent(struct device *dev, size_t size,
-  void *vaddr, dma_addr_t dma_handle,
-  unsigned long attrs);
 extern int dma_nommu_mmap_coherent(struct device *dev,
struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t handle,
size_t size, unsigned long attrs);
 
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * DMA-consistent mapping functions for PowerPCs that don't support
- * cache snooping.  These allocate/free a region of uncached mapped
- * memory space for use with DMA devices.  Alternatively, you could
- * allocate the space "normally" and use the cache management functions
- * to ensure it is consistent.
- */
-struct device;
-extern void __dma_sync(void *vaddr, size_t size, int direction);
-extern void __dma_sync_page(struct page *page, unsigned long offset,
-size_t size, int direction);
-extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr);
-
-#else /* ! CONFIG_NOT_COHERENT_CACHE */
-/*
- * Cache coherent cores.
- */
-
-#define __dma_sync(addr, size, rw) ((void)0)
-#define __dma_sync_page(pg, off, sz, rw)   ((void)0)
-
-#endif /* ! CONFIG_NOT_COHERENT_CACHE */
 
 static inline unsigned long device_to_mask(struct device *dev)
 {
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 2b90a403cdac..b2e88075b2ea 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -36,12 +36,7 @@ static void *dma_nommu_alloc_coherent(struct device *dev, 
size_t size,
 * we can really use the direct ops
 */
if (dma_direct_supported(dev, dev->coherent_dma_mask))
-#ifdef CONFIG_NOT_COHERENT_CACHE
-   return __dma_nommu_alloc_coherent(dev, size, dma_handle,
-  flag, attrs);
-#else
return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
-#endif
 
/* Ok we can't ... do we have an iommu ? If not, fail */
iommu = get_iommu_table_base(dev);
@@ -62,12 +57,7 @@ static void dma_nommu_free_coherent(struct device *dev, 
size_t size,
 
/* See comments in dma_nommu_alloc_coherent() */
if (dma_direct_supported(dev, dev->coherent_dma_mask))
-#ifdef CONFIG_NOT_COHERENT_CACHE
-   return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
- attrs);
-#else
return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
-#endif
 
/* Maybe we used an iommu ... */
iommu = get_iommu_table_base(dev);
@@ -84,14 +74,8 @@ int dma_nommu_mmap_coherent(struct device *dev, struct 
vm_area_struct *vma,
 void *cpu_addr, dma_addr_t handle, size_t size,