Re: [PATCH 16/20] powerpc/dma: use dma_direct_{alloc,free}

2018-08-27 Thread Scott Wood
On Thu, 2018-08-09 at 10:52 +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2018-07-30 at 18:38 +0200, Christoph Hellwig wrote:
> > These do the same functionality as the existing helpers, but do it
> > simpler, and also allow the (optional) use of CMA.
> > 
> > Note that the swiotlb code now calls into the dma_direct code directly,
> > given that it doesn't work with noncoherent caches at all, and isn't
> > called
> > when we have an iommu either, so the iommu special case in
> > dma_nommu_alloc_coherent isn't required for swiotlb.
> 
> I am not convinced that this will produce the same results due to
> the way the zone picking works.
> 
> As for the interaction with swiotlb, we'll need the FSL guys to have
> a look. Scott, do you remember what this is about ?

dma_direct_alloc() has similar (though not identical[1]) zone picking, so I
think it will work.  Needs testing though, and I no longer have a book3e
machine with a PCIe card in it.

The odd thing about this platform (fsl book3e) is the 31-bit[2] limitation on
PCI.  We currently use ZONE_DMA32 for this, rather than ZONE_DMA, at Ben's
request[3].  dma_direct_alloc() regards ZONE_DMA32 as being fixed at 32-bits,
but it doesn't really matter as long as limit_zone_pfn() still works, and the
allocation is made below 2 GiB.  If we were to switch to ZONE_DMA, and have
both 31-bit and 32-bit zones, then dma_direct_alloc() would have a problem
knowing when to use the 31-bit zone since it's based on a non-power-of-2 limit
that isn't reflected in the dma mask.

-Scott

[1] The logic in dma_direct_alloc() seems wrong -- the zone should need to fit
in the mask, not the other way around.  If ARCH_ZONE_DMA_BITS is 24, then
0x007f should be a failure rather than GFP_DMA, 0x7fff should be
GFP_DMA rather than GFP_DMA32, and 0x3 should be GFP_DMA32 rather than
an unrestricted allocation (in each case assuming that the end of RAM is
beyond the mask).

[2] The actual limit is closer to 4 GiB, but not quite due to special windows.
 swiotlb still uses the real limit when deciding whether to bounce, so the dma
mask is still 32 bits.

[3] https://lists.ozlabs.org/pipermail/linuxppc-dev/2012-July/099593.html



Re: [PATCH 16/20] powerpc/dma: use dma_direct_{alloc,free}

2018-08-08 Thread Benjamin Herrenschmidt
On Mon, 2018-07-30 at 18:38 +0200, Christoph Hellwig wrote:
> These do the same functionality as the existing helpers, but do it
> simpler, and also allow the (optional) use of CMA.
> 
> Note that the swiotlb code now calls into the dma_direct code directly,
> given that it doesn't work with noncoherent caches at all, and isn't called
> when we have an iommu either, so the iommu special case in
> dma_nommu_alloc_coherent isn't required for swiotlb.

I am not convinced that this will produce the same results due to
the way the zone picking works.

As for the interaction with swiotlb, we'll need the FSL guys to have
a look. Scott, do you remember what this is about ?

> Signed-off-by: Christoph Hellwig 
> ---
>  arch/powerpc/include/asm/pgtable.h |  1 -
>  arch/powerpc/kernel/dma-swiotlb.c  |  4 +-
>  arch/powerpc/kernel/dma.c  | 78 --
>  arch/powerpc/mm/mem.c  | 19 
>  4 files changed, 11 insertions(+), 91 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pgtable.h 
> b/arch/powerpc/include/asm/pgtable.h
> index 14c79a7dc855..123de4958d2e 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -38,7 +38,6 @@ extern unsigned long empty_zero_page[];
>  extern pgd_t swapper_pg_dir[];
>  
>  void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
> -int dma_pfn_limit_to_zone(u64 pfn_limit);
>  extern void paging_init(void);
>  
>  /*
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
> b/arch/powerpc/kernel/dma-swiotlb.c
> index f6e0701c5303..25986fcd1e5e 100644
> --- a/arch/powerpc/kernel/dma-swiotlb.c
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -46,8 +46,8 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
>   * for everything else.
>   */
>  const struct dma_map_ops powerpc_swiotlb_dma_ops = {
> - .alloc = __dma_nommu_alloc_coherent,
> - .free = __dma_nommu_free_coherent,
> + .alloc = dma_direct_alloc,
> + .free = dma_direct_free,
>   .mmap = dma_nommu_mmap_coherent,
>   .map_sg = swiotlb_map_sg_attrs,
>   .unmap_sg = swiotlb_unmap_sg_attrs,
> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
> index 2cfc45acbb52..2b90a403cdac 100644
> --- a/arch/powerpc/kernel/dma.c
> +++ b/arch/powerpc/kernel/dma.c
> @@ -26,75 +26,6 @@
>   * can set archdata.dma_data to an unsigned long holding the offset. By
>   * default the offset is PCI_DRAM_OFFSET.
>   */
> -
> -static u64 __maybe_unused get_pfn_limit(struct device *dev)
> -{
> - u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
> - struct dev_archdata __maybe_unused *sd = >archdata;
> -
> -#ifdef CONFIG_SWIOTLB
> - if (sd->max_direct_dma_addr && dev->dma_ops == _swiotlb_dma_ops)
> - pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
> -#endif
> -
> - return pfn;
> -}
> -
> -#ifndef CONFIG_NOT_COHERENT_CACHE
> -void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
> -   dma_addr_t *dma_handle, gfp_t flag,
> -   unsigned long attrs)
> -{
> - void *ret;
> - struct page *page;
> - int node = dev_to_node(dev);
> -#ifdef CONFIG_FSL_SOC
> - u64 pfn = get_pfn_limit(dev);
> - int zone;
> -
> - /*
> -  * This code should be OK on other platforms, but we have drivers that
> -  * don't set coherent_dma_mask. As a workaround we just ifdef it. This
> -  * whole routine needs some serious cleanup.
> -  */
> -
> - zone = dma_pfn_limit_to_zone(pfn);
> - if (zone < 0) {
> - dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
> - __func__, pfn);
> - return NULL;
> - }
> -
> - switch (zone) {
> - case ZONE_DMA:
> - flag |= GFP_DMA;
> - break;
> -#ifdef CONFIG_ZONE_DMA32
> - case ZONE_DMA32:
> - flag |= GFP_DMA32;
> - break;
> -#endif
> - };
> -#endif /* CONFIG_FSL_SOC */
> -
> - page = alloc_pages_node(node, flag, get_order(size));
> - if (page == NULL)
> - return NULL;
> - ret = page_address(page);
> - memset(ret, 0, size);
> - *dma_handle = phys_to_dma(dev,__pa(ret));
> -
> - return ret;
> -}
> -
> -void __dma_nommu_free_coherent(struct device *dev, size_t size,
> - void *vaddr, dma_addr_t dma_handle,
> - unsigned long attrs)
> -{
> - free_pages((unsigned long)vaddr, get_order(size));
> -}
> -#endif /* !CONFIG_NOT_COHERENT_CACHE */
> -
>  static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
>  dma_addr_t *dma_handle, gfp_t flag,
>  unsigned long attrs)
> @@ -105,8 +36,12 @@ static void *dma_nommu_alloc_coherent(struct device *dev, 
> size_t size,
>* we can really use the direct ops
>*/
>   if (dma_direct_supported(dev, dev->coherent_dma_mask))
> 

[PATCH 16/20] powerpc/dma: use dma_direct_{alloc,free}

2018-07-30 Thread Christoph Hellwig
These do the same functionality as the existing helpers, but do it
simpler, and also allow the (optional) use of CMA.

Note that the swiotlb code now calls into the dma_direct code directly,
given that it doesn't work with noncoherent caches at all, and isn't called
when we have an iommu either, so the iommu special case in
dma_nommu_alloc_coherent isn't required for swiotlb.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/include/asm/pgtable.h |  1 -
 arch/powerpc/kernel/dma-swiotlb.c  |  4 +-
 arch/powerpc/kernel/dma.c  | 78 --
 arch/powerpc/mm/mem.c  | 19 
 4 files changed, 11 insertions(+), 91 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 14c79a7dc855..123de4958d2e 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -38,7 +38,6 @@ extern unsigned long empty_zero_page[];
 extern pgd_t swapper_pg_dir[];
 
 void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
-int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
 /*
diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
b/arch/powerpc/kernel/dma-swiotlb.c
index f6e0701c5303..25986fcd1e5e 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -46,8 +46,8 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
  * for everything else.
  */
 const struct dma_map_ops powerpc_swiotlb_dma_ops = {
-   .alloc = __dma_nommu_alloc_coherent,
-   .free = __dma_nommu_free_coherent,
+   .alloc = dma_direct_alloc,
+   .free = dma_direct_free,
.mmap = dma_nommu_mmap_coherent,
.map_sg = swiotlb_map_sg_attrs,
.unmap_sg = swiotlb_unmap_sg_attrs,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 2cfc45acbb52..2b90a403cdac 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -26,75 +26,6 @@
  * can set archdata.dma_data to an unsigned long holding the offset. By
  * default the offset is PCI_DRAM_OFFSET.
  */
-
-static u64 __maybe_unused get_pfn_limit(struct device *dev)
-{
-   u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
-   struct dev_archdata __maybe_unused *sd = >archdata;
-
-#ifdef CONFIG_SWIOTLB
-   if (sd->max_direct_dma_addr && dev->dma_ops == _swiotlb_dma_ops)
-   pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
-#endif
-
-   return pfn;
-}
-
-#ifndef CONFIG_NOT_COHERENT_CACHE
-void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
-   void *ret;
-   struct page *page;
-   int node = dev_to_node(dev);
-#ifdef CONFIG_FSL_SOC
-   u64 pfn = get_pfn_limit(dev);
-   int zone;
-
-   /*
-* This code should be OK on other platforms, but we have drivers that
-* don't set coherent_dma_mask. As a workaround we just ifdef it. This
-* whole routine needs some serious cleanup.
-*/
-
-   zone = dma_pfn_limit_to_zone(pfn);
-   if (zone < 0) {
-   dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
-   __func__, pfn);
-   return NULL;
-   }
-
-   switch (zone) {
-   case ZONE_DMA:
-   flag |= GFP_DMA;
-   break;
-#ifdef CONFIG_ZONE_DMA32
-   case ZONE_DMA32:
-   flag |= GFP_DMA32;
-   break;
-#endif
-   };
-#endif /* CONFIG_FSL_SOC */
-
-   page = alloc_pages_node(node, flag, get_order(size));
-   if (page == NULL)
-   return NULL;
-   ret = page_address(page);
-   memset(ret, 0, size);
-   *dma_handle = phys_to_dma(dev,__pa(ret));
-
-   return ret;
-}
-
-void __dma_nommu_free_coherent(struct device *dev, size_t size,
-   void *vaddr, dma_addr_t dma_handle,
-   unsigned long attrs)
-{
-   free_pages((unsigned long)vaddr, get_order(size));
-}
-#endif /* !CONFIG_NOT_COHERENT_CACHE */
-
 static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
   dma_addr_t *dma_handle, gfp_t flag,
   unsigned long attrs)
@@ -105,8 +36,12 @@ static void *dma_nommu_alloc_coherent(struct device *dev, 
size_t size,
 * we can really use the direct ops
 */
if (dma_direct_supported(dev, dev->coherent_dma_mask))
+#ifdef CONFIG_NOT_COHERENT_CACHE
return __dma_nommu_alloc_coherent(dev, size, dma_handle,
   flag, attrs);
+#else
+   return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
+#endif
 
/* Ok we can't ... do we have an iommu ? If not, fail */
iommu = get_iommu_table_base(dev);
@@ -127,8 +62,13 @@ static void