Re: [PATCH 9/9] csky: use the generic remapping dma alloc implementation

2018-11-05 Thread Guo Ren
On Mon, Nov 05, 2018 at 01:19:31PM +0100, Christoph Hellwig wrote:
> The csky code was largely copied from arm/arm64, so switch to the
> generic arm64-based implementation instead.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/csky/Kconfig  |   2 +-
>  arch/csky/mm/dma-mapping.c | 142 +
>  2 files changed, 3 insertions(+), 141 deletions(-)
> 
> diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
> index c0cf8e948821..ea74f3a9eeaf 100644
> --- a/arch/csky/Kconfig
> +++ b/arch/csky/Kconfig
> @@ -8,7 +8,7 @@ config CSKY
>   select CLKSRC_MMIO
>   select CLKSRC_OF
>   select DMA_DIRECT_OPS
> - select DMA_REMAP
> + select DMA_DIRECT_REMAP
>   select IRQ_DOMAIN
>   select HANDLE_DOMAIN_IRQ
>   select DW_APB_TIMER_OF
> diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
> index ad4046939713..80783bb71c5c 100644
> --- a/arch/csky/mm/dma-mapping.c
> +++ b/arch/csky/mm/dma-mapping.c
> @@ -14,73 +14,13 @@
>  #include 
>  #include 
>  
> -static struct gen_pool *atomic_pool;
> -static size_t atomic_pool_size __initdata = SZ_256K;
> -
> -static int __init early_coherent_pool(char *p)
> -{
> - atomic_pool_size = memparse(p, &p);
> - return 0;
> -}
> -early_param("coherent_pool", early_coherent_pool);
> -
>  static int __init atomic_pool_init(void)
>  {
> - struct page *page;
> - size_t size = atomic_pool_size;
> - void *ptr;
> - int ret;
> -
> - atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
> - if (!atomic_pool)
> - BUG();
> -
> - page = alloc_pages(GFP_KERNEL, get_order(size));
> - if (!page)
> - BUG();
> -
> - ptr = dma_common_contiguous_remap(page, size, VM_ALLOC,
> -   pgprot_noncached(PAGE_KERNEL),
> -   __builtin_return_address(0));
> - if (!ptr)
> - BUG();
> -
> - ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr,
> - page_to_phys(page), atomic_pool_size, -1);
> - if (ret)
> - BUG();
> -
> - gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
> -
> - pr_info("DMA: preallocated %zu KiB pool for atomic coherent pool\n",
> - atomic_pool_size / 1024);
> -
> - pr_info("DMA: vaddr: 0x%x phy: 0x%lx,\n", (unsigned int)ptr,
> - page_to_phys(page));
> -
> - return 0;
> + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
>  }
>  postcore_initcall(atomic_pool_init);
Seems also could remove atomic_pool_init from csky, why not put them in
common?

>  
> -static void *csky_dma_alloc_atomic(struct device *dev, size_t size,
> -dma_addr_t *dma_handle)
> -{
> - unsigned long addr;
> -
> - addr = gen_pool_alloc(atomic_pool, size);
> - if (addr)
> - *dma_handle = gen_pool_virt_to_phys(atomic_pool, addr);
> -
> - return (void *)addr;
> -}
> -
> -static void csky_dma_free_atomic(struct device *dev, size_t size, void 
> *vaddr,
> -  dma_addr_t dma_handle, unsigned long attrs)
> -{
> - gen_pool_free(atomic_pool, (unsigned long)vaddr, size);
> -}
> -
> -static void __dma_clear_buffer(struct page *page, size_t size)
> +void arch_dma_prep_coherent(struct page *page, size_t size)
>  {
>   if (PageHighMem(page)) {
>   unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> @@ -107,84 +47,6 @@ static void __dma_clear_buffer(struct page *page, size_t 
> size)
>   }
>  }
>  
> -static void *csky_dma_alloc_nonatomic(struct device *dev, size_t size,
> -   dma_addr_t *dma_handle, gfp_t gfp,
> -   unsigned long attrs)
> -{
> - void  *vaddr;
> - struct page *page;
> - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> -
> - if (DMA_ATTR_NON_CONSISTENT & attrs) {
> - pr_err("csky %s can't support DMA_ATTR_NON_CONSISTENT.\n", 
> __func__);
> - return NULL;
> - }
> -
> - if (IS_ENABLED(CONFIG_DMA_CMA))
> - page = dma_alloc_from_contiguous(dev, count, get_order(size),
> -  gfp);
> - else
> - page = alloc_pages(gfp, get_order(size));
> -
> - if (!page) {
> - pr_err("csky %s no more free pages.\n", __func__);
> - return NULL;
> - }
> -
> - *dma_handle = page_to_phys(page);
> -
> - __dma_clear_buffer(page, size);
> -
> - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
> - return page;
> -
> - vaddr = dma_common_contiguous_remap(page, PAGE_ALIGN(size), VM_USERMAP,
> - pgprot_noncached(PAGE_KERNEL), __builtin_return_address(0));
> - if (!vaddr)
> - BUG();
> -
> - return vaddr;
> -}
> -
> -static void csky_dma_free_nonatomic(
> - struct device *dev,
> - size_t size,
> - void *vaddr,
> -

Re: [PATCH v4 6/8] vfio/mdev: Add iommu place holders in mdev_device

2018-11-05 Thread Lu Baolu

Hi,

On 11/5/18 10:51 PM, Christoph Hellwig wrote:

Please use EXPORT_SYMBOL_GPL like most of the vfio code.



Sure. Will use this in the next version.

Best regards,
Lu Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH RFC] dma-direct: do not allocate a single page from CMA area

2018-11-05 Thread Nicolin Chen
On Fri, Nov 02, 2018 at 07:35:42AM +0100, Christoph Hellwig wrote:
> On Thu, Nov 01, 2018 at 02:07:55PM +, Robin Murphy wrote:
> > On 31/10/2018 20:03, Nicolin Chen wrote:
> >> The addresses within a single page are always contiguous, so it's
> >> not so necessary to allocate one single page from CMA area. Since
> >> the CMA area has a limited predefined size of space, it might run
> >> out of space in some heavy use case, where there might be quite a
> >> lot CMA pages being allocated for single pages.
> >>
> >> This patch tries to skip CMA allocations of single pages and lets
> >> them go through normal page allocations. This would save resource
> >> in the CMA area for further more CMA allocations.
> >
> > In general, this seems to make sense to me. It does represent a theoretical 
> > change in behaviour for devices which have their own CMA area somewhere 
> > other than kernel memory, and only ever make non-atomic allocations, but 
> > I'm not sure whether that's a realistic or common enough case to really 
> > worry about.
> 
> Yes, I think we should make the decision in dma_alloc_from_contiguous
> based on having a per-dev CMA area or not.  There is a lot of cruft in

It seems that cma_alloc() already has a CMA area check? Would it
be duplicated to have a similar one in dma_alloc_from_contiguous?

> this area that should be cleaned up while we're at it, like always
> falling back to the normal page allocator if there is no CMA area or
> nothing suitable found in dma_alloc_from_contiguous instead of
> having to duplicate all that in the caller.

Am I supposed to clean up things that's mentioned above by moving
the fallback allocator into dma_alloc_from_contiguous, or to just
move my change (the count check) into dma_alloc_from_contiguous?

I understand that'd be great to have a cleanup, yet feel it could
be done separately as this patch isn't really a cleanup change.

Thanks
Nicolin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] MIPS: Fix `dma_alloc_coherent' returning a non-coherent allocation

2018-11-05 Thread Paul Burton
Hi Maciej,

On Thu, Nov 01, 2018 at 07:54:24AM +, Maciej W. Rozycki wrote:
> Fix a MIPS `dma_alloc_coherent' regression from commit bc3ec75de545 
> ("dma-mapping: merge direct and noncoherent ops") that causes a cached 
> allocation to be returned on noncoherent cache systems.
> 
> This is due to an inverted check now used in the MIPS implementation of 
> `arch_dma_alloc' on the result from `dma_direct_alloc_pages' before 
> doing the cached-to-uncached mapping of the allocation address obtained.  
> The mapping has to be done for a non-NULL rather than NULL result, 
> because a NULL result means the allocation has failed.
> 
> Invert the check for correct operation then.
> 
> Signed-off-by: Maciej W. Rozycki 
> Fixes: bc3ec75de545 ("dma-mapping: merge direct and noncoherent ops")
> Cc: sta...@vger.kernel.org # 4.19+
> ---
>  arch/mips/mm/dma-noncoherent.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Thanks, nice catch! Applied to mips-fixes.

Paul
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 8/9] csky: don't use GFP_DMA in atomic_pool_init

2018-11-05 Thread Guo Ren
On Mon, Nov 05, 2018 at 01:19:30PM +0100, Christoph Hellwig wrote:
> csky does not implement ZONE_DMA, which means passing GFP_DMA is a no-op.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/csky/mm/dma-mapping.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
> index 85437b21e045..ad4046939713 100644
> --- a/arch/csky/mm/dma-mapping.c
> +++ b/arch/csky/mm/dma-mapping.c
> @@ -35,7 +35,7 @@ static int __init atomic_pool_init(void)
>   if (!atomic_pool)
>   BUG();
>  
> - page = alloc_pages(GFP_KERNEL | GFP_DMA, get_order(size));
> + page = alloc_pages(GFP_KERNEL, get_order(size));
>   if (!page)
>   BUG();
>  
> -- 
> 2.19.1

Acked-by: Guo Ren 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 7/9] csky: don't select DMA_NONCOHERENT_OPS

2018-11-05 Thread Guo Ren
On Mon, Nov 05, 2018 at 01:19:29PM +0100, Christoph Hellwig wrote:
> This option is gone past Linux 4.19.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/csky/Kconfig | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
> index 8a30e006a845..c0cf8e948821 100644
> --- a/arch/csky/Kconfig
> +++ b/arch/csky/Kconfig
> @@ -8,7 +8,6 @@ config CSKY
>   select CLKSRC_MMIO
>   select CLKSRC_OF
>   select DMA_DIRECT_OPS
> - select DMA_NONCOHERENT_OPS
>   select DMA_REMAP
>   select IRQ_DOMAIN
>   select HANDLE_DOMAIN_IRQ
> -- 
> 2.19.1

Acked-by: Guo Ren 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/dma: Zero pages manually in a length of scatterlist

2018-11-05 Thread Christoph Hellwig
On Fri, Nov 02, 2018 at 04:36:13PM -0700, Nicolin Chen wrote:
> > What if the pages came from highmem? I know that doesn't happen on arm64
> > today, but the point of this code *is* to be generic, and other users will
> > arrive eventually.
> 
> Hmm, so it probably should use sg_miter_start/stop() too? Looking
> at the flush routine doing in PAGE_SIZE for each iteration, would
> be possible to map and memset contiguous pages together? Actually
> the flush routine might be also optimized if we can map contiguous
> pages.

FYI, I have patches I plan to submit soon that gets rid of the
struct scatterlist use in this code to simplify it:

http://git.infradead.org/users/hch/misc.git/commitdiff/84e837fc3248b513f73adde49e04e7c58f605113
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 6/8] vfio/mdev: Add iommu place holders in mdev_device

2018-11-05 Thread Christoph Hellwig
Please use EXPORT_SYMBOL_GPL like most of the vfio code.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 8/9] csky: don't use GFP_DMA in atomic_pool_init

2018-11-05 Thread Christoph Hellwig
csky does not implement ZONE_DMA, which means passing GFP_DMA is a no-op.

Signed-off-by: Christoph Hellwig 
---
 arch/csky/mm/dma-mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 85437b21e045..ad4046939713 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -35,7 +35,7 @@ static int __init atomic_pool_init(void)
if (!atomic_pool)
BUG();
 
-   page = alloc_pages(GFP_KERNEL | GFP_DMA, get_order(size));
+   page = alloc_pages(GFP_KERNEL, get_order(size));
if (!page)
BUG();
 
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/9] dma-remap: support DMA_ATTR_NO_KERNEL_MAPPING

2018-11-05 Thread Christoph Hellwig
Do not waste vmalloc space on allocations that do not require a mapping
into the kernel address space.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/remap.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 8f1fca34b894..10a545126b0b 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -200,7 +200,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
 
size = PAGE_ALIGN(size);
 
-   if (!gfpflags_allow_blocking(flags)) {
+   if (!gfpflags_allow_blocking(flags) &&
+   !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {
ret = dma_alloc_from_pool(size, &page, flags);
if (!ret)
return NULL;
@@ -215,6 +216,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
/* remove any dirty cache lines on the kernel alias */
arch_dma_prep_coherent(page, size);
 
+   if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
+   return page; /* opaqueue cookie */
+
/* create a coherent mapping */
ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
@@ -227,7 +231,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
 void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
 {
-   if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
+   if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+   /* vaddr is a struct page cookie, not a kernel address */
+   __dma_direct_free_pages(dev, size, vaddr);
+   } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
phys_addr_t phys = dma_to_phys(dev, dma_handle);
struct page *page = pfn_to_page(__phys_to_pfn(phys));
 
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 9/9] csky: use the generic remapping dma alloc implementation

2018-11-05 Thread Christoph Hellwig
The csky code was largely copied from arm/arm64, so switch to the
generic arm64-based implementation instead.

Signed-off-by: Christoph Hellwig 
---
 arch/csky/Kconfig  |   2 +-
 arch/csky/mm/dma-mapping.c | 142 +
 2 files changed, 3 insertions(+), 141 deletions(-)

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index c0cf8e948821..ea74f3a9eeaf 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -8,7 +8,7 @@ config CSKY
select CLKSRC_MMIO
select CLKSRC_OF
select DMA_DIRECT_OPS
-   select DMA_REMAP
+   select DMA_DIRECT_REMAP
select IRQ_DOMAIN
select HANDLE_DOMAIN_IRQ
select DW_APB_TIMER_OF
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index ad4046939713..80783bb71c5c 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -14,73 +14,13 @@
 #include 
 #include 
 
-static struct gen_pool *atomic_pool;
-static size_t atomic_pool_size __initdata = SZ_256K;
-
-static int __init early_coherent_pool(char *p)
-{
-   atomic_pool_size = memparse(p, &p);
-   return 0;
-}
-early_param("coherent_pool", early_coherent_pool);
-
 static int __init atomic_pool_init(void)
 {
-   struct page *page;
-   size_t size = atomic_pool_size;
-   void *ptr;
-   int ret;
-
-   atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
-   if (!atomic_pool)
-   BUG();
-
-   page = alloc_pages(GFP_KERNEL, get_order(size));
-   if (!page)
-   BUG();
-
-   ptr = dma_common_contiguous_remap(page, size, VM_ALLOC,
- pgprot_noncached(PAGE_KERNEL),
- __builtin_return_address(0));
-   if (!ptr)
-   BUG();
-
-   ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr,
-   page_to_phys(page), atomic_pool_size, -1);
-   if (ret)
-   BUG();
-
-   gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL);
-
-   pr_info("DMA: preallocated %zu KiB pool for atomic coherent pool\n",
-   atomic_pool_size / 1024);
-
-   pr_info("DMA: vaddr: 0x%x phy: 0x%lx,\n", (unsigned int)ptr,
-   page_to_phys(page));
-
-   return 0;
+   return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
 }
 postcore_initcall(atomic_pool_init);
 
-static void *csky_dma_alloc_atomic(struct device *dev, size_t size,
-  dma_addr_t *dma_handle)
-{
-   unsigned long addr;
-
-   addr = gen_pool_alloc(atomic_pool, size);
-   if (addr)
-   *dma_handle = gen_pool_virt_to_phys(atomic_pool, addr);
-
-   return (void *)addr;
-}
-
-static void csky_dma_free_atomic(struct device *dev, size_t size, void *vaddr,
-dma_addr_t dma_handle, unsigned long attrs)
-{
-   gen_pool_free(atomic_pool, (unsigned long)vaddr, size);
-}
-
-static void __dma_clear_buffer(struct page *page, size_t size)
+void arch_dma_prep_coherent(struct page *page, size_t size)
 {
if (PageHighMem(page)) {
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -107,84 +47,6 @@ static void __dma_clear_buffer(struct page *page, size_t 
size)
}
 }
 
-static void *csky_dma_alloc_nonatomic(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- unsigned long attrs)
-{
-   void  *vaddr;
-   struct page *page;
-   unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-   if (DMA_ATTR_NON_CONSISTENT & attrs) {
-   pr_err("csky %s can't support DMA_ATTR_NON_CONSISTENT.\n", 
__func__);
-   return NULL;
-   }
-
-   if (IS_ENABLED(CONFIG_DMA_CMA))
-   page = dma_alloc_from_contiguous(dev, count, get_order(size),
-gfp);
-   else
-   page = alloc_pages(gfp, get_order(size));
-
-   if (!page) {
-   pr_err("csky %s no more free pages.\n", __func__);
-   return NULL;
-   }
-
-   *dma_handle = page_to_phys(page);
-
-   __dma_clear_buffer(page, size);
-
-   if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
-   return page;
-
-   vaddr = dma_common_contiguous_remap(page, PAGE_ALIGN(size), VM_USERMAP,
-   pgprot_noncached(PAGE_KERNEL), __builtin_return_address(0));
-   if (!vaddr)
-   BUG();
-
-   return vaddr;
-}
-
-static void csky_dma_free_nonatomic(
-   struct device *dev,
-   size_t size,
-   void *vaddr,
-   dma_addr_t dma_handle,
-   unsigned long attrs
-   )
-{
-   struct page *page = phys_to_page(dma_handle);
-   unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-   if ((unsigned int)vaddr >= VMALLOC_START)
-   dma_common_free_remap(vaddr, size, VM_USERMAP);

[PATCH 5/9] dma-mapping: support highmem in the generic remap allocator

2018-11-05 Thread Christoph Hellwig
By using __dma_direct_alloc_pages we can deal entirely with struct page
instead of having to derive a kernel virtual address.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/remap.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index bc42766f52df..8f1fca34b894 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -196,7 +196,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
gfp_t flags, unsigned long attrs)
 {
struct page *page = NULL;
-   void *ret, *kaddr;
+   void *ret;
 
size = PAGE_ALIGN(size);
 
@@ -208,10 +208,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
return ret;
}
 
-   kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
-   if (!kaddr)
+   page = __dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
+   if (!page)
return NULL;
-   page = virt_to_page(kaddr);
 
/* remove any dirty cache lines on the kernel alias */
arch_dma_prep_coherent(page, size);
@@ -221,7 +220,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
__builtin_return_address(0));
if (!ret)
-   dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs);
+   __dma_direct_free_pages(dev, size, page);
return ret;
 }
 
@@ -229,10 +228,11 @@ void arch_dma_free(struct device *dev, size_t size, void 
*vaddr,
dma_addr_t dma_handle, unsigned long attrs)
 {
if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
-   void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
+   phys_addr_t phys = dma_to_phys(dev, dma_handle);
+   struct page *page = pfn_to_page(__phys_to_pfn(phys));
 
vunmap(vaddr);
-   dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
+   __dma_direct_free_pages(dev, size, page);
}
 }
 
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 7/9] csky: don't select DMA_NONCOHERENT_OPS

2018-11-05 Thread Christoph Hellwig
This option is gone past Linux 4.19.

Signed-off-by: Christoph Hellwig 
---
 arch/csky/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 8a30e006a845..c0cf8e948821 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -8,7 +8,6 @@ config CSKY
select CLKSRC_MMIO
select CLKSRC_OF
select DMA_DIRECT_OPS
-   select DMA_NONCOHERENT_OPS
select DMA_REMAP
select IRQ_DOMAIN
select HANDLE_DOMAIN_IRQ
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/9] dma-mapping: move the arm64 ncoherent alloc/free support to common code

2018-11-05 Thread Christoph Hellwig
The arm64 codebase to implement coherent dma allocation for architectures
with non-coherent DMA is a good start for a generic implementation, given
that is uses the generic remap helpers, provides the atomic pool for
allocations that can't sleep and still is realtively simple and well
tested.  Move it to kernel/dma and allow architectures to opt into it
using a config symbol.  Architectures just need to provide a new
arch_dma_prep_coherent helper to writeback an invalidate the caches
for any memory that gets remapped for uncached access.

Signed-off-by: Christoph Hellwig 
---
 arch/arm64/Kconfig  |   2 +-
 arch/arm64/mm/dma-mapping.c | 184 ++--
 include/linux/dma-mapping.h |   5 +
 include/linux/dma-noncoherent.h |   2 +
 kernel/dma/Kconfig  |   6 ++
 kernel/dma/remap.c  | 158 ++-
 6 files changed, 181 insertions(+), 176 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d065acb6d10..2e645ea693ea 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -82,7 +82,7 @@ config ARM64
select CRC32
select DCACHE_WORD_ACCESS
select DMA_DIRECT_OPS
-   select DMA_REMAP
+   select DMA_DIRECT_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
select GENERIC_ALLOCATOR
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a3ac26284845..e2e7e5d0f94e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -33,113 +33,6 @@
 
 #include 
 
-static struct gen_pool *atomic_pool __ro_after_init;
-
-#define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
-static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
-
-static int __init early_coherent_pool(char *p)
-{
-   atomic_pool_size = memparse(p, &p);
-   return 0;
-}
-early_param("coherent_pool", early_coherent_pool);
-
-static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t 
flags)
-{
-   unsigned long val;
-   void *ptr = NULL;
-
-   if (!atomic_pool) {
-   WARN(1, "coherent pool not initialised!\n");
-   return NULL;
-   }
-
-   val = gen_pool_alloc(atomic_pool, size);
-   if (val) {
-   phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
-
-   *ret_page = phys_to_page(phys);
-   ptr = (void *)val;
-   memset(ptr, 0, size);
-   }
-
-   return ptr;
-}
-
-static bool __in_atomic_pool(void *start, size_t size)
-{
-   return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
-}
-
-static int __free_from_pool(void *start, size_t size)
-{
-   if (!__in_atomic_pool(start, size))
-   return 0;
-
-   gen_pool_free(atomic_pool, (unsigned long)start, size);
-
-   return 1;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-   gfp_t flags, unsigned long attrs)
-{
-   struct page *page;
-   void *ptr, *coherent_ptr;
-   pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
-
-   size = PAGE_ALIGN(size);
-
-   if (!gfpflags_allow_blocking(flags)) {
-   struct page *page = NULL;
-   void *addr = __alloc_from_pool(size, &page, flags);
-
-   if (addr)
-   *dma_handle = phys_to_dma(dev, page_to_phys(page));
-
-   return addr;
-   }
-
-   ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
-   if (!ptr)
-   goto no_mem;
-
-   /* remove any dirty cache lines on the kernel alias */
-   __dma_flush_area(ptr, size);
-
-   /* create a coherent mapping */
-   page = virt_to_page(ptr);
-   coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
-  prot, 
__builtin_return_address(0));
-   if (!coherent_ptr)
-   goto no_map;
-
-   return coherent_ptr;
-
-no_map:
-   dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
-no_mem:
-   return NULL;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-   dma_addr_t dma_handle, unsigned long attrs)
-{
-   if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
-   void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
-
-   vunmap(vaddr);
-   dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
-   }
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
-   dma_addr_t dma_addr)
-{
-   return __phys_to_pfn(dma_to_phys(dev, dma_addr));
-}
-
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
 {
@@ -160,6 +53,11 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t 
paddr,
__dma_unmap_area(phys_to_virt(paddr), size, dir);
 }
 
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+   __dma_flush_

[PATCH 3/9] dma-mapping: move the remap helpers to a separate file

2018-11-05 Thread Christoph Hellwig
The dma remap code only really makes sense for not cache coherent
architectures, and currently is only used by arm, arm64 and xtensa.
Split it out into a separate file with a separate Kconfig symbol,
which gets the right copyright notice given that this code was
written by Laura Abbott working for Code Aurora at that point.

Signed-off-by: Christoph Hellwig 
Acked-by: Laura Abbott 
---
 arch/arm/Kconfig |  1 +
 arch/arm64/Kconfig   |  1 +
 arch/csky/Kconfig|  1 +
 arch/xtensa/Kconfig  |  1 +
 kernel/dma/Kconfig   |  4 ++
 kernel/dma/Makefile  |  2 +-
 kernel/dma/mapping.c | 84 --
 kernel/dma/remap.c   | 88 
 8 files changed, 97 insertions(+), 85 deletions(-)
 create mode 100644 kernel/dma/remap.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 91be74d8df65..3b2852df6eb3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -30,6 +30,7 @@ config ARM
select CPU_PM if (SUSPEND || CPU_IDLE)
select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
select DMA_DIRECT_OPS if !MMU
+   select DMA_REMAP if MMU
select EDAC_SUPPORT
select EDAC_ATOMIC_SCRUB
select GENERIC_ALLOCATOR
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 787d7850e064..5d065acb6d10 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -82,6 +82,7 @@ config ARM64
select CRC32
select DCACHE_WORD_ACCESS
select DMA_DIRECT_OPS
+   select DMA_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
select GENERIC_ALLOCATOR
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index cb64f8dacd08..8a30e006a845 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -9,6 +9,7 @@ config CSKY
select CLKSRC_OF
select DMA_DIRECT_OPS
select DMA_NONCOHERENT_OPS
+   select DMA_REMAP
select IRQ_DOMAIN
select HANDLE_DOMAIN_IRQ
select DW_APB_TIMER_OF
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index d29b7365da8d..239bfb16c58b 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -11,6 +11,7 @@ config XTENSA
select CLONE_BACKWARDS
select COMMON_CLK
select DMA_DIRECT_OPS
+   select DMA_REMAP if MMU
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_SHOW
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 645c7a2ecde8..c92e08173ed8 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -51,3 +51,7 @@ config SWIOTLB
bool
select DMA_DIRECT_OPS
select NEED_DMA_MAP_STATE
+
+config DMA_REMAP
+   depends on MMU
+   bool
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 7d581e4eea4a..f4feeceb8020 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_DMA_DIRECT_OPS)+= direct.o
 obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
 obj-$(CONFIG_DMA_API_DEBUG)+= debug.o
 obj-$(CONFIG_SWIOTLB)  += swiotlb.o
-
+obj-$(CONFIG_DMA_REMAP)+= remap.o
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58dec7a92b7b..dfbc3deb95cd 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -262,87 +262,3 @@ int dma_common_mmap(struct device *dev, struct 
vm_area_struct *vma,
 #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
 }
 EXPORT_SYMBOL(dma_common_mmap);
-
-#ifdef CONFIG_MMU
-static struct vm_struct *__dma_common_pages_remap(struct page **pages,
-   size_t size, unsigned long vm_flags, pgprot_t prot,
-   const void *caller)
-{
-   struct vm_struct *area;
-
-   area = get_vm_area_caller(size, vm_flags, caller);
-   if (!area)
-   return NULL;
-
-   if (map_vm_area(area, prot, pages)) {
-   vunmap(area->addr);
-   return NULL;
-   }
-
-   return area;
-}
-
-/*
- * remaps an array of PAGE_SIZE pages into another vm_area
- * Cannot be used in non-sleeping contexts
- */
-void *dma_common_pages_remap(struct page **pages, size_t size,
-   unsigned long vm_flags, pgprot_t prot,
-   const void *caller)
-{
-   struct vm_struct *area;
-
-   area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
-   if (!area)
-   return NULL;
-
-   area->pages = pages;
-
-   return area->addr;
-}
-
-/*
- * remaps an allocated contiguous region into another vm_area.
- * Cannot be used in non-sleeping contexts
- */
-
-void *dma_common_contiguous_remap(struct page *page, size_t size,
-   unsigned long vm_flags,
-   pgprot_t prot, const void *caller)
-{
-   int i;
-   struct page **pages;
-   struct vm_struct *area;
-
-   pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
-   if (!pages)
-   return NULL;
-
-   for (i = 0; i < (

[PATCH 2/9] dma-direct: reject highmem pages from dma_alloc_from_contiguous

2018-11-05 Thread Christoph Hellwig
dma_alloc_from_contiguous can return highmem pages depending on the
setup, which a plain non-remapping DMA allocator can't handle.  Detect
this case and try the normal page allocator instead.

Signed-off-by: Christoph Hellwig 
---
 kernel/dma/direct.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 680287779b0a..c49849bcced6 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -162,6 +162,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t 
size,
if (!page)
return NULL;
 
+   if (PageHighMem(page)) {
+   /*
+* Depending on the cma= arguments and per-arch setup
+* dma_alloc_from_contiguous could return highmem pages.
+* Without remapping there is no way to return them here,
+* so log an error and fail.
+*/
+   dev_info(dev, "Rejecting highmem page from CMA.\n");
+   __dma_direct_free_pages(dev, size, page);
+   return NULL;
+   }
+
ret = page_address(page);
if (force_dma_unencrypted()) {
set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/9] dma-direct: provide page based alloc/free helpers

2018-11-05 Thread Christoph Hellwig
Some architectures support remapping highmem into DMA coherent
allocations.  To use the common code for them we need variants of
dma_direct_{alloc,free}_pages that do not use kernel virtual addresses.

Signed-off-by: Christoph Hellwig 
---
 include/linux/dma-direct.h |  3 +++
 kernel/dma/direct.c| 32 ++--
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bd73e7a91410..5a7a3bbb912f 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -67,6 +67,9 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
+struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
+   dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
+void __dma_direct_free_pages(struct device *dev, size_t size, struct page 
*page);
 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 22a12ab5a5e9..680287779b0a 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -103,14 +103,13 @@ static bool dma_coherent_ok(struct device *dev, 
phys_addr_t phys, size_t size)
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
 }
 
-void *dma_direct_alloc_pages(struct device *dev, size_t size,
+struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
int page_order = get_order(size);
struct page *page = NULL;
u64 phys_mask;
-   void *ret;
 
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
@@ -150,11 +149,22 @@ void *dma_direct_alloc_pages(struct device *dev, size_t 
size,
}
}
 
+   return page;
+}
+
+void *dma_direct_alloc_pages(struct device *dev, size_t size,
+   dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+{
+   struct page *page;
+   void *ret;
+
+   page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
if (!page)
return NULL;
+
ret = page_address(page);
if (force_dma_unencrypted()) {
-   set_memory_decrypted((unsigned long)ret, 1 << page_order);
+   set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
*dma_handle = __phys_to_dma(dev, page_to_phys(page));
} else {
*dma_handle = phys_to_dma(dev, page_to_phys(page));
@@ -163,20 +173,22 @@ void *dma_direct_alloc_pages(struct device *dev, size_t 
size,
return ret;
 }
 
-/*
- * NOTE: this function must never look at the dma_addr argument, because we 
want
- * to be able to use it as a helper for iommu implementations as well.
- */
+void __dma_direct_free_pages(struct device *dev, size_t size, struct page 
*page)
+{
+   unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+   if (!dma_release_from_contiguous(dev, page, count))
+   __free_pages(page, get_order(size));
+}
+
 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
 {
-   unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned int page_order = get_order(size);
 
if (force_dma_unencrypted())
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
-   if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
-   free_pages((unsigned long)cpu_addr, page_order);
+   __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
 }
 
 void *dma_direct_alloc(struct device *dev, size_t size,
-- 
2.19.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


move the arm arch_dma_alloc implementation to common code

2018-11-05 Thread Christoph Hellwig
Hi all,

this series moves the existing arm64 implementation of arch_dma_alloc and
arch_dma_free to common code given that it is not arm64-specific, and
then also uses it for csky.  Given how many architectures remap memory
for the DMA coherent implementation it should be usable for many more,
and the new cache flushing hook and the generic atomic pool are also
enablers for implementing the IOMMU API dma ops in common code in a
follow on series.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu