[PATCH 4/4] arm: remove wrappers for the generic dma remap helpers

2019-08-29 Thread Christoph Hellwig
Remove a few tiny wrappers around the generic dma remap code.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/mm/dma-mapping.c | 32 +---
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d07e5c865557..8cb57f1664b2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -336,18 +336,6 @@ static void *__alloc_remap_buffer(struct device *dev, 
size_t size, gfp_t gfp,
 pgprot_t prot, struct page **ret_page,
 const void *caller, bool want_vaddr);
 
-static void *
-__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
-   const void *caller)
-{
-   return dma_common_contiguous_remap(page, size, prot, caller);
-}
-
-static void __dma_free_remap(void *cpu_addr, size_t size)
-{
-   dma_common_free_remap(cpu_addr, size);
-}
-
 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
 static struct gen_pool *atomic_pool __ro_after_init;
 
@@ -503,7 +491,7 @@ static void *__alloc_remap_buffer(struct device *dev, 
size_t size, gfp_t gfp,
if (!want_vaddr)
goto out;
 
-   ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
+   ptr = dma_common_contiguous_remap(page, size, prot, caller);
if (!ptr) {
__dma_free_buffer(page, size);
return NULL;
@@ -570,7 +558,7 @@ static void *__alloc_from_contiguous(struct device *dev, 
size_t size,
goto out;
 
if (PageHighMem(page)) {
-   ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
+   ptr = dma_common_contiguous_remap(page, size, prot, caller);
if (!ptr) {
dma_release_from_contiguous(dev, page, count);
return NULL;
@@ -590,7 +578,7 @@ static void __free_from_contiguous(struct device *dev, 
struct page *page,
 {
if (want_vaddr) {
if (PageHighMem(page))
-   __dma_free_remap(cpu_addr, size);
+   dma_common_free_remap(cpu_addr, size);
else
__dma_remap(page, size, PAGE_KERNEL);
}
@@ -682,7 +670,7 @@ static void *remap_allocator_alloc(struct 
arm_dma_alloc_args *args,
 static void remap_allocator_free(struct arm_dma_free_args *args)
 {
if (args->want_vaddr)
-   __dma_free_remap(args->cpu_addr, args->size);
+   dma_common_free_remap(args->cpu_addr, args->size);
 
__dma_free_buffer(args->page, args->size);
 }
@@ -1365,16 +1353,6 @@ static int __iommu_free_buffer(struct device *dev, 
struct page **pages,
return 0;
 }
 
-/*
- * Create a CPU mapping for a specified pages
- */
-static void *
-__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
-   const void *caller)
-{
-   return dma_common_pages_remap(pages, size, prot, caller);
-}
-
 /*
  * Create a mapping in device IO address space for specified pages
  */
@@ -1526,7 +1504,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, 
size_t size,
if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
return pages;
 
-   addr = __iommu_alloc_remap(pages, size, gfp, prot,
+   addr = dma_common_pages_remap(pages, size, prot,
   __builtin_return_address(0));
if (!addr)
goto err_mapping;
-- 
2.20.1



[PATCH 2/4] dma-mapping: always use VM_DMA_COHERENT for generic DMA remap

2019-08-29 Thread Christoph Hellwig
Currently the generic dma remap allocator gets a vm_flags passed by
the caller that is a little confusing.  We just introduced a generic
vmalloc-level flag to identify the dma coherent allocations, so use
that everywhere and remove the now pointless argument.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/mm/dma-mapping.c| 10 --
 arch/xtensa/kernel/pci-dma.c |  4 ++--
 drivers/iommu/dma-iommu.c|  6 +++---
 include/linux/dma-mapping.h  |  6 ++
 kernel/dma/remap.c   | 25 +++--
 5 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 5c0af4a2faa7..054a66f725b3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -340,13 +340,12 @@ static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
 {
-   return dma_common_contiguous_remap(page, size, VM_DMA_COHERENT,
-   prot, caller);
+   return dma_common_contiguous_remap(page, size, prot, caller);
 }
 
 static void __dma_free_remap(void *cpu_addr, size_t size)
 {
-   dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
+   dma_common_free_remap(cpu_addr, size);
 }
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
@@ -1373,8 +1372,7 @@ static void *
 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
 {
-   return dma_common_pages_remap(pages, size, VM_DMA_COHERENT, prot,
-   caller);
+   return dma_common_pages_remap(pages, size, prot, caller);
 }
 
 /*
@@ -1617,7 +1615,7 @@ void __arm_iommu_free_attrs(struct device *dev, size_t 
size, void *cpu_addr,
}
 
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
-   dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
+   dma_common_free_remap(cpu_addr, size);
 
__iommu_remove_mapping(dev, handle, size);
__iommu_free_buffer(dev, pages, size, attrs);
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 65f05776d827..154979d62b73 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -167,7 +167,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, 
dma_addr_t *handle,
if (PageHighMem(page)) {
void *p;
 
-   p = dma_common_contiguous_remap(page, size, VM_MAP,
+   p = dma_common_contiguous_remap(page, size,
pgprot_noncached(PAGE_KERNEL),
__builtin_return_address(0));
if (!p) {
@@ -192,7 +192,7 @@ void arch_dma_free(struct device *dev, size_t size, void 
*vaddr,
page = virt_to_page(platform_vaddr_to_cached(vaddr));
} else {
 #ifdef CONFIG_MMU
-   dma_common_free_remap(vaddr, size, VM_MAP);
+   dma_common_free_remap(vaddr, size);
 #endif
page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f68a62c3c32b..013416f5ad38 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -617,7 +617,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
< size)
goto out_free_sg;
 
-   vaddr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
+   vaddr = dma_common_pages_remap(pages, size, prot,
__builtin_return_address(0));
if (!vaddr)
goto out_unmap;
@@ -941,7 +941,7 @@ static void __iommu_dma_free(struct device *dev, size_t 
size, void *cpu_addr)
pages = __iommu_dma_get_pages(cpu_addr);
if (!pages)
page = vmalloc_to_page(cpu_addr);
-   dma_common_free_remap(cpu_addr, alloc_size, VM_USERMAP);
+   dma_common_free_remap(cpu_addr, alloc_size);
} else {
/* Lowmem means a coherent atomic or CMA allocation */
page = virt_to_page(cpu_addr);
@@ -979,7 +979,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, 
size_t size,
pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
 
cpu_addr = dma_common_contiguous_remap(page, alloc_size,
-   VM_USERMAP, prot, __builtin_return_address(0));
+   prot, __builtin_return_address(0));
if (!cpu_addr)
goto out_free_pages;
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f7d1eea32c78..c9725390fbbc 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -616,13 +616,11 @@ extern int dma_common_mmap(struct device *dev, struct 
vm_area_struct *vma,
unsigned long attrs);
 
 void *dma_common_contiguous_remap(struct page *pa

cleanup vmap usage in the dma-mapping layer

2019-08-29 Thread Christoph Hellwig
Hi all,

the common DMA remapping code uses the vmalloc/vmap code to create
page table entries for DMA mappings.  This series lifts the currently
arm specific VM_* flag for that into common code, and also exposes
it to userspace in procfs to better understand the mappings.


[PATCH 1/4] vmalloc: lift the arm flag for coherent mappings to common code

2019-08-29 Thread Christoph Hellwig
The arm architecture had a VM_ARM_DMA_CONSISTENT flag to mark DMA
coherent remapping for a while.  Lift this flag to common code so
that we can use it generically.  We also check it in the only place
VM_USERMAP is directly check so that we can entirely replace that
flag as well (although I'm not even sure why we'd want to allow
remapping DMA appings, but I'd rather not change behavior).

Signed-off-by: Christoph Hellwig 
---
 arch/arm/mm/dma-mapping.c | 22 +++---
 arch/arm/mm/mm.h  |  3 ---
 include/linux/vmalloc.h   |  2 ++
 mm/vmalloc.c  |  5 -
 4 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d42557ee69c2..5c0af4a2faa7 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -340,19 +340,13 @@ static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
 {
-   /*
-* DMA allocation can be mapped to user space, so lets
-* set VM_USERMAP flags too.
-*/
-   return dma_common_contiguous_remap(page, size,
-   VM_ARM_DMA_CONSISTENT | VM_USERMAP,
+   return dma_common_contiguous_remap(page, size, VM_DMA_COHERENT,
prot, caller);
 }
 
 static void __dma_free_remap(void *cpu_addr, size_t size)
 {
-   dma_common_free_remap(cpu_addr, size,
-   VM_ARM_DMA_CONSISTENT | VM_USERMAP);
+   dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
 }
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
@@ -1379,8 +1373,8 @@ static void *
 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
const void *caller)
 {
-   return dma_common_pages_remap(pages, size,
-   VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
+   return dma_common_pages_remap(pages, size, VM_DMA_COHERENT, prot,
+   caller);
 }
 
 /*
@@ -1464,7 +1458,7 @@ static struct page **__iommu_get_pages(void *cpu_addr, 
unsigned long attrs)
return cpu_addr;
 
area = find_vm_area(cpu_addr);
-   if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
+   if (area && (area->flags & VM_DMA_COHERENT))
return area->pages;
return NULL;
 }
@@ -1622,10 +1616,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t 
size, void *cpu_addr,
return;
}
 
-   if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) {
-   dma_common_free_remap(cpu_addr, size,
-   VM_ARM_DMA_CONSISTENT | VM_USERMAP);
-   }
+   if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
+   dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
 
__iommu_remove_mapping(dev, handle, size);
__iommu_free_buffer(dev, pages, size, attrs);
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 941356d95a67..88c121ac14b3 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -70,9 +70,6 @@ extern void __flush_dcache_page(struct address_space 
*mapping, struct page *page
 #define VM_ARM_MTYPE(mt)   ((mt) << 20)
 #define VM_ARM_MTYPE_MASK  (0x1f << 20)
 
-/* consistent regions used by dma_alloc_attrs() */
-#define VM_ARM_DMA_CONSISTENT  0x2000
-
 
 struct static_vm {
struct vm_struct vm;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 9b21d0047710..dfa718ffdd4f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -18,6 +18,7 @@ struct notifier_block;/* in notifier.h */
 #define VM_ALLOC   0x0002  /* vmalloc() */
 #define VM_MAP 0x0004  /* vmap()ed pages */
 #define VM_USERMAP 0x0008  /* suitable for 
remap_vmalloc_range */
+#define VM_DMA_COHERENT0x0010  /* dma_alloc_coherent */
 #define VM_UNINITIALIZED   0x0020  /* vm_struct is not fully 
initialized */
 #define VM_NO_GUARD0x0040  /* don't add guard page */
 #define VM_KASAN   0x0080  /* has allocated kasan shadow 
memory */
@@ -26,6 +27,7 @@ struct notifier_block;/* in notifier.h */
  * vfree_atomic().
  */
 #define VM_FLUSH_RESET_PERMS   0x0100  /* Reset direct map and flush 
TLB on unmap */
+
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7ba11e12a11f..c1246d77cf75 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2993,7 +2993,7 @@ int remap_vmalloc_range_partial(struct vm_area_struct 
*vma, unsigned long uaddr,
if (!area)
return -EINVAL;
 
-   if (!(area->flags & VM_USERMAP))
+   if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
return -EINVAL;
 
if (kaddr + size > area->addr + get_vm_area_size(area))
@@ -3496,6 +3496,9 @@ static int s_show(struct seq_file *m, void *p)
 

[PATCH 3/4] dma-mapping: introduce a dma_common_find_pages helper

2019-08-29 Thread Christoph Hellwig
A helper to find the backing page array based on a virtual address.
This also ensures we do the same vm_flags check everywhere instead
of slightly different or missing ones in a few places.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/mm/dma-mapping.c   |  7 +--
 drivers/iommu/dma-iommu.c   | 15 +++
 include/linux/dma-mapping.h |  1 +
 kernel/dma/remap.c  | 13 +++--
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 054a66f725b3..d07e5c865557 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1447,18 +1447,13 @@ static struct page **__atomic_get_pages(void *addr)
 
 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
 {
-   struct vm_struct *area;
-
if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
return __atomic_get_pages(cpu_addr);
 
if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
return cpu_addr;
 
-   area = find_vm_area(cpu_addr);
-   if (area && (area->flags & VM_DMA_COHERENT))
-   return area->pages;
-   return NULL;
+   return dma_common_find_pages(cpu_addr);
 }
 
 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 013416f5ad38..eafc378da448 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -541,15 +541,6 @@ static struct page **__iommu_dma_alloc_pages(struct device 
*dev,
return pages;
 }
 
-static struct page **__iommu_dma_get_pages(void *cpu_addr)
-{
-   struct vm_struct *area = find_vm_area(cpu_addr);
-
-   if (!area || !area->pages)
-   return NULL;
-   return area->pages;
-}
-
 /**
  * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
  * @dev: Device to allocate memory for. Must be a real device
@@ -938,7 +929,7 @@ static void __iommu_dma_free(struct device *dev, size_t 
size, void *cpu_addr)
 * If it the address is remapped, then it's either non-coherent
 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
 */
-   pages = __iommu_dma_get_pages(cpu_addr);
+   pages = dma_common_find_pages(cpu_addr);
if (!pages)
page = vmalloc_to_page(cpu_addr);
dma_common_free_remap(cpu_addr, alloc_size);
@@ -1045,7 +1036,7 @@ static int iommu_dma_mmap(struct device *dev, struct 
vm_area_struct *vma,
return -ENXIO;
 
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
-   struct page **pages = __iommu_dma_get_pages(cpu_addr);
+   struct page **pages = dma_common_find_pages(cpu_addr);
 
if (pages)
return __iommu_dma_mmap(pages, size, vma);
@@ -1067,7 +1058,7 @@ static int iommu_dma_get_sgtable(struct device *dev, 
struct sg_table *sgt,
int ret;
 
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
-   struct page **pages = __iommu_dma_get_pages(cpu_addr);
+   struct page **pages = dma_common_find_pages(cpu_addr);
 
if (pages) {
return sg_alloc_table_from_pages(sgt, pages,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index c9725390fbbc..e4840f40ae69 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -615,6 +615,7 @@ extern int dma_common_mmap(struct device *dev, struct 
vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
 
+struct page **dma_common_find_pages(void *cpu_addr);
 void *dma_common_contiguous_remap(struct page *page, size_t size,
pgprot_t prot, const void *caller);
 
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 01d4ef5685a4..3482fc585c59 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -11,6 +11,15 @@
 #include 
 #include 
 
+struct page **dma_common_find_pages(void *cpu_addr)
+{
+   struct vm_struct *area = find_vm_area(cpu_addr);
+
+   if (!area || area->flags != VM_DMA_COHERENT)
+   return NULL;
+   return area->pages;
+}
+
 static struct vm_struct *__dma_common_pages_remap(struct page **pages,
size_t size, pgprot_t prot, const void *caller)
 {
@@ -78,9 +87,9 @@ void *dma_common_contiguous_remap(struct page *page, size_t 
size,
  */
 void dma_common_free_remap(void *cpu_addr, size_t size)
 {
-   struct vm_struct *area = find_vm_area(cpu_addr);
+   struct page **pages = dma_common_find_pages(cpu_addr);
 
-   if (!area || area->flags != VM_DMA_COHERENT) {
+   if (!pages) {
WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
return;
}
-- 
2.20.1



[PATCH] iommu/qcom_iommu: Use struct_size() helper

2019-08-29 Thread Gustavo A. R. Silva
One of the more common cases of allocation size calculations is finding
the size of a structure that has a zero-sized array at the end, along
with memory for some number of elements for that array. For example:

struct qcom_iommu_dev {
...
struct qcom_iommu_ctx   *ctxs[0];   /* indexed by asid-1 */
};

Make use of the struct_size() helper instead of an open-coded version
in order to avoid any potential type mistakes.

So, replace the following form:

sizeof(*qcom_iommu) + (max_asid * sizeof(qcom_iommu->ctxs[0]))

with:

struct_size(qcom_iommu, ctxs, max_asid)

Also, notice that, in this case, variable sz is not necessary,
hence it is removed.

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva 
---
 drivers/iommu/qcom_iommu.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 3608f58f1ea8..c18168fd7fe7 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -801,7 +801,7 @@ static int qcom_iommu_device_probe(struct platform_device 
*pdev)
struct qcom_iommu_dev *qcom_iommu;
struct device *dev = &pdev->dev;
struct resource *res;
-   int ret, sz, max_asid = 0;
+   int ret, max_asid = 0;
 
/* find the max asid (which is 1:1 to ctx bank idx), so we know how
 * many child ctx devices we have:
@@ -809,9 +809,8 @@ static int qcom_iommu_device_probe(struct platform_device 
*pdev)
for_each_child_of_node(dev->of_node, child)
max_asid = max(max_asid, get_asid(child));
 
-   sz = sizeof(*qcom_iommu) + (max_asid * sizeof(qcom_iommu->ctxs[0]));
-
-   qcom_iommu = devm_kzalloc(dev, sz, GFP_KERNEL);
+   qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid),
+ GFP_KERNEL);
if (!qcom_iommu)
return -ENOMEM;
qcom_iommu->num_ctxs = max_asid;
-- 
2.23.0



Re: swiotlb-xen cleanups v2

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> Hi Xen maintainers and friends,
> 
> please take a look at this series that cleans up the parts of swiotlb-xen
> that deal with non-coherent caches.
> 
> Changes since v1:
>  - rewrite dma_cache_maint to be much simpler
>  - improve various comments and commit logs
>  - remove page-coherent.h entirely

Thanks for your work on this, it really makes the code better. I tested
it on ARM64 with a non-coherent network device and verified it works as
intended (Cadence GEM on ZynqMP).


Re: [PATCH 03/11] xen/arm: simplify dma_cache_maint

2019-08-29 Thread Stefano Stabellini
On Tue, 27 Aug 2019, Christoph Hellwig wrote:
> And this was still buggy I think, it really needs some real Xen/Arm
> testing which I can't do.  Hopefully better version below:
> 
> --
> >From 5ad4b6e291dbb49f65480c9b769414931cbd485a Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig 
> Date: Wed, 24 Jul 2019 15:26:08 +0200
> Subject: xen/arm: simplify dma_cache_maint
> 
> Calculate the required operation in the caller, and pass it directly
> instead of recalculating it for each page, and use simple arithmetics
> to get from the physical address to Xen page size aligned chunks.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/arm/xen/mm.c | 61 ---
>  1 file changed, 21 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index 90574d89d0d4..2fde161733b0 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -35,64 +35,45 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int 
> order)
>   return __get_free_pages(flags, order);
>  }
>  
> -enum dma_cache_op {
> -   DMA_UNMAP,
> -   DMA_MAP,
> -};
>  static bool hypercall_cflush = false;
>  
> -/* functions called by SWIOTLB */
> -
> -static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
> - size_t size, enum dma_data_direction dir, enum dma_cache_op op)
> +/* buffers in highmem or foreign pages cannot cross page boundaries */
> +static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op)
>  {
>   struct gnttab_cache_flush cflush;
> - unsigned long xen_pfn;
> - size_t left = size;
>  
> - xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
> - offset %= XEN_PAGE_SIZE;
> + cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK;
> + cflush.offset = xen_offset_in_page(handle);
> + cflush.op = op;
>  
>   do {
> - size_t len = left;
> - 
> - /* buffers in highmem or foreign pages cannot cross page
> -  * boundaries */
> - if (len + offset > XEN_PAGE_SIZE)
> - len = XEN_PAGE_SIZE - offset;
> -
> - cflush.op = 0;
> - cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
> - cflush.offset = offset;
> - cflush.length = len;
> -
> - if (op == DMA_UNMAP && dir != DMA_TO_DEVICE)
> - cflush.op = GNTTAB_CACHE_INVAL;
> - if (op == DMA_MAP) {
> - if (dir == DMA_FROM_DEVICE)
> - cflush.op = GNTTAB_CACHE_INVAL;
> - else
> - cflush.op = GNTTAB_CACHE_CLEAN;
> - }
> - if (cflush.op)
> - HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, 
> &cflush, 1);
> + if (size + cflush.offset > XEN_PAGE_SIZE)
> + cflush.length = XEN_PAGE_SIZE - cflush.offset;
> + else
> + cflush.length = size;
> +
> + HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
>  
> - offset = 0;
> - xen_pfn++;
> - left -= len;
> - } while (left);
> + cflush.offset = 0;
> + cflush.a.dev_bus_addr += cflush.length;
> + size -= cflush.length;

Yes that's better

Reviewed-by: Stefano Stabellini 


> + } while (size);
>  }
>  
>  static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t 
> handle,
>   size_t size, enum dma_data_direction dir)
>  {
> - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
> DMA_UNMAP);
> + if (dir != DMA_TO_DEVICE)
> + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
>  }
>  
>  static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t 
> handle,
>   size_t size, enum dma_data_direction dir)
>  {
> - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
> DMA_MAP);
> + if (dir == DMA_FROM_DEVICE)
> + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
> + else
> + dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN);
>  }
>  
>  void __xen_dma_map_page(struct device *hwdev, struct page *page,
> -- 
> 2.20.1
> 


Re: [PATCH 11/11] arm64: use asm-generic/dma-mapping.h

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> Now that the Xen special cases are gone nothing worth mentioning is
> left in the arm64  file, so switch to use the
> asm-generic version instead.
> 
> Signed-off-by: Christoph Hellwig 
> Acked-by: Will Deacon 

Reviewed-by: Stefano Stabellini 


> ---
>  arch/arm64/include/asm/Kbuild|  1 +
>  arch/arm64/include/asm/dma-mapping.h | 22 --
>  arch/arm64/mm/dma-mapping.c  |  1 +
>  3 files changed, 2 insertions(+), 22 deletions(-)
>  delete mode 100644 arch/arm64/include/asm/dma-mapping.h
> 
> diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
> index c52e151afab0..98a5405c8558 100644
> --- a/arch/arm64/include/asm/Kbuild
> +++ b/arch/arm64/include/asm/Kbuild
> @@ -4,6 +4,7 @@ generic-y += delay.h
>  generic-y += div64.h
>  generic-y += dma.h
>  generic-y += dma-contiguous.h
> +generic-y += dma-mapping.h
>  generic-y += early_ioremap.h
>  generic-y += emergency-restart.h
>  generic-y += hw_irq.h
> diff --git a/arch/arm64/include/asm/dma-mapping.h 
> b/arch/arm64/include/asm/dma-mapping.h
> deleted file mode 100644
> index 67243255a858..
> --- a/arch/arm64/include/asm/dma-mapping.h
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0-only */
> -/*
> - * Copyright (C) 2012 ARM Ltd.
> - */
> -#ifndef __ASM_DMA_MAPPING_H
> -#define __ASM_DMA_MAPPING_H
> -
> -#ifdef __KERNEL__
> -
> -#include 
> -#include 
> -
> -#include 
> -#include 
> -
> -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type 
> *bus)
> -{
> - return NULL;
> -}
> -
> -#endif   /* __KERNEL__ */
> -#endif   /* __ASM_DMA_MAPPING_H */
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index 4b244a037349..6578abcfbbc7 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> -- 
> 2.20.1
> 


Re: [PATCH 10/11] swiotlb-xen: merge xen_unmap_single into xen_swiotlb_unmap_page

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> No need for a no-op wrapper.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Stefano Stabellini 

> ---
>  drivers/xen/swiotlb-xen.c | 15 ---
>  1 file changed, 4 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 95911ff9c11c..384304a77020 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -414,9 +414,8 @@ static dma_addr_t xen_swiotlb_map_page(struct device 
> *dev, struct page *page,
>   * After this call, reads by the cpu to the buffer are guaranteed to see
>   * whatever the device wrote there.
>   */
> -static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
> -  size_t size, enum dma_data_direction dir,
> -  unsigned long attrs)
> +static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
> + size_t size, enum dma_data_direction dir, unsigned long attrs)
>  {
>   phys_addr_t paddr = xen_bus_to_phys(dev_addr);
>  
> @@ -430,13 +429,6 @@ static void xen_unmap_single(struct device *hwdev, 
> dma_addr_t dev_addr,
>   swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
>  }
>  
> -static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
> - size_t size, enum dma_data_direction dir,
> - unsigned long attrs)
> -{
> - xen_unmap_single(hwdev, dev_addr, size, dir, attrs);
> -}
> -
>  static void
>  xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
>   size_t size, enum dma_data_direction dir)
> @@ -477,7 +469,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct 
> scatterlist *sgl, int nelems,
>   BUG_ON(dir == DMA_NONE);
>  
>   for_each_sg(sgl, sg, nelems, i)
> - xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, 
> attrs);
> + xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
> + dir, attrs);
>  
>  }
>  
> -- 
> 2.20.1
> 


[PATCH 4/7] iommu/arm-smmu: Add global/context fault implementation hooks

2019-08-29 Thread Krishna Reddy
Add global/context fault hooks to allow Nvidia SMMU implementation
handle faults across multiple SMMUs.

Signed-off-by: Krishna Reddy 
---
 drivers/iommu/arm-smmu-nvidia.c | 127 
 drivers/iommu/arm-smmu.c|   6 ++
 drivers/iommu/arm-smmu.h|   4 ++
 3 files changed, 137 insertions(+)

diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c
index a429b2c..b2a3c49 100644
--- a/drivers/iommu/arm-smmu-nvidia.c
+++ b/drivers/iommu/arm-smmu-nvidia.c
@@ -14,6 +14,10 @@
 
 #define NUM_SMMU_INSTANCES 3
 
+static irqreturn_t nsmmu_context_fault_inst(int irq,
+   struct arm_smmu_device *smmu,
+   int idx, int inst);
+
 struct nvidia_smmu {
struct arm_smmu_device  smmu;
int num_inst;
@@ -87,12 +91,135 @@ static void nsmmu_tlb_sync(struct arm_smmu_device *smmu, 
int page,
nsmmu_tlb_sync_wait(smmu, page, sync, status, i);
 }
 
+static irqreturn_t nsmmu_global_fault_inst(int irq,
+  struct arm_smmu_device *smmu,
+  int inst)
+{
+   u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
+
+   gfsr = readl_relaxed(nsmmu_page(smmu, inst, 0) + ARM_SMMU_GR0_sGFSR);
+   gfsynr0 = readl_relaxed(nsmmu_page(smmu, inst, 0) +
+   ARM_SMMU_GR0_sGFSYNR0);
+   gfsynr1 = readl_relaxed(nsmmu_page(smmu, inst, 0) +
+   ARM_SMMU_GR0_sGFSYNR1);
+   gfsynr2 = readl_relaxed(nsmmu_page(smmu, inst, 0) +
+   ARM_SMMU_GR0_sGFSYNR2);
+
+   if (!gfsr)
+   return IRQ_NONE;
+
+   dev_err_ratelimited(smmu->dev,
+   "Unexpected global fault, this could be serious\n");
+   dev_err_ratelimited(smmu->dev,
+   "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 
0x%08x\n",
+   gfsr, gfsynr0, gfsynr1, gfsynr2);
+
+   writel_relaxed(gfsr, nsmmu_page(smmu, inst, 0) + ARM_SMMU_GR0_sGFSR);
+   return IRQ_HANDLED;
+}
+
+static irqreturn_t nsmmu_global_fault(int irq, struct arm_smmu_device *smmu)
+{
+   int i;
+   irqreturn_t irq_ret = IRQ_NONE;
+
+   /* Interrupt line is shared between global and context faults.
+* Check for both type of interrupts on either fault handlers.
+*/
+   for (i = 0; i < to_nsmmu(smmu)->num_inst; i++) {
+   irq_ret = nsmmu_context_fault_inst(irq, smmu, 0, i);
+   if (irq_ret == IRQ_HANDLED)
+   return irq_ret;
+   }
+
+   for (i = 0; i < to_nsmmu(smmu)->num_inst; i++) {
+   irq_ret = nsmmu_global_fault_inst(irq, smmu, i);
+   if (irq_ret == IRQ_HANDLED)
+   return irq_ret;
+   }
+
+   return irq_ret;
+}
+
+static irqreturn_t nsmmu_context_fault_bank(int irq,
+   struct arm_smmu_device *smmu,
+   int idx, int inst)
+{
+   u32 fsr, fsynr, cbfrsynra;
+   unsigned long iova;
+
+   fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
+   if (!(fsr & FSR_FAULT))
+   return IRQ_NONE;
+
+   fsynr = readl_relaxed(nsmmu_page(smmu, inst, smmu->numpage + idx) +
+ ARM_SMMU_CB_FSYNR0);
+   iova = readq_relaxed(nsmmu_page(smmu, inst, smmu->numpage + idx) +
+ARM_SMMU_CB_FAR);
+   cbfrsynra = readl_relaxed(nsmmu_page(smmu, inst, 1) +
+ ARM_SMMU_GR1_CBFRSYNRA(idx));
+
+   dev_err_ratelimited(smmu->dev,
+   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
cbfrsynra=0x%x, cb=%d\n",
+   fsr, iova, fsynr, cbfrsynra, idx);
+
+   writel_relaxed(fsr, nsmmu_page(smmu, inst, smmu->numpage + idx) +
+   ARM_SMMU_CB_FSR);
+   return IRQ_HANDLED;
+}
+
+static irqreturn_t nsmmu_context_fault_inst(int irq,
+   struct arm_smmu_device *smmu,
+   int idx, int inst)
+{
+   irqreturn_t irq_ret = IRQ_NONE;
+
+   /* Interrupt line shared between global and all context faults.
+* Check for faults across all contexts.
+*/
+   for (idx = 0; idx < smmu->num_context_banks; idx++) {
+   irq_ret = nsmmu_context_fault_bank(irq, smmu, idx, inst);
+
+   if (irq_ret == IRQ_HANDLED)
+   break;
+   }
+
+   return irq_ret;
+}
+
+static irqreturn_t nsmmu_context_fault(int irq,
+  struct arm_smmu_device *smmu,
+  int cbndx)
+{
+   int i;
+   irqreturn_t irq_ret = IRQ_NONE;
+
+   /* Interrupt line is shared between global and context faults.
+* Check for both type of interrupts on eith

Re: [PATCH 08/11] swiotlb-xen: simplify cache maintainance

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> Now that we know we always have the dma-noncoherent.h helpers available
> if we are on an architecture with support for non-coherent devices,
> we can just call them directly, and remove the calls to the dma-direct
> routines, including the fact that we call the dma_direct_map_page
> routines but ignore the value returned from it.  Instead we now have
> Xen wrappers for the arch_sync_dma_for_{device,cpu} helpers that call
> the special Xen versions of those routines for foreign pages.
> 
> Note that the new helpers get the physical address passed in addition
> to the dma address to avoid another translation for the local cache
> maintainance.  The pfn_valid checks remain on the dma address as in
> the old code, even if that looks a little funny.
> 
> Signed-off-by: Christoph Hellwig 
>
> ---
>  arch/arm/xen/mm.c| 64 ++
>  arch/x86/include/asm/xen/page-coherent.h | 11 
>  drivers/xen/swiotlb-xen.c| 20 +++
>  include/xen/arm/page-coherent.h  | 69 ++--
>  4 files changed, 31 insertions(+), 133 deletions(-)

WOW nice! Now I really can see why this series was worth doing :-)

Reviewed-by: Stefano Stabellini 




> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index b7d53415532b..7096652f5a1e 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -61,63 +61,33 @@ static void dma_cache_maint(dma_addr_t handle, size_t 
> size, u32 op)
>   } while (size);
>  }
>  
> -static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t 
> handle,
> - size_t size, enum dma_data_direction dir)
> +/*
> + * Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen
> + * pages, it is not possible for it to contain a mix of local and foreign Xen
> + * pages.  Calling pfn_valid on a foreign mfn will always return false, so if
> + * pfn_valid returns true the pages is local and we can use the native
> + * dma-direct functions, otherwise we call the Xen specific version.
> + */
> +void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
> + phys_addr_t paddr, size_t size, enum dma_data_direction dir)
>  {
> - if (dir != DMA_TO_DEVICE)
> + if (pfn_valid(PFN_DOWN(handle)))
> + arch_sync_dma_for_cpu(dev, paddr, size, dir);
> + else if (dir != DMA_TO_DEVICE)
>   dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
>  }
>  
> -static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t 
> handle,
> - size_t size, enum dma_data_direction dir)
> +void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
> + phys_addr_t paddr, size_t size, enum dma_data_direction dir)
>  {
> - if (dir == DMA_FROM_DEVICE)
> + if (pfn_valid(PFN_DOWN(handle)))
> + arch_sync_dma_for_device(dev, paddr, size, dir);
> + else if (dir == DMA_FROM_DEVICE)
>   dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
>   else
>   dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN);
>  }
>  
> -void __xen_dma_map_page(struct device *hwdev, struct page *page,
> -  dma_addr_t dev_addr, unsigned long offset, size_t size,
> -  enum dma_data_direction dir, unsigned long attrs)
> -{
> - if (dev_is_dma_coherent(hwdev))
> - return;
> - if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
> - return;
> -
> - __xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir);
> -}
> -
> -void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
> - size_t size, enum dma_data_direction dir,
> - unsigned long attrs)
> -
> -{
> - if (dev_is_dma_coherent(hwdev))
> - return;
> - if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
> - return;
> -
> - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
> -}
> -
> -void __xen_dma_sync_single_for_cpu(struct device *hwdev,
> - dma_addr_t handle, size_t size, enum dma_data_direction dir)
> -{
> - if (dev_is_dma_coherent(hwdev))
> - return;
> - __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
> -}
> -
> -void __xen_dma_sync_single_for_device(struct device *hwdev,
> - dma_addr_t handle, size_t size, enum dma_data_direction dir)
> -{
> - if (dev_is_dma_coherent(hwdev))
> - return;
> - __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
> -}
> -
>  bool xen_arch_need_swiotlb(struct device *dev,
>  phys_addr_t phys,
>  dma_addr_t dev_addr)
> diff --git a/arch/x86/include/asm/xen/page-coherent.h 
> b/arch/x86/include/asm/xen/page-coherent.h
> index 8ee33c5edded..c9c8398a31ff 100644
> --- a/arch/x86/include/asm/xen/page-coherent.h
> +++ b/arch/x86/include/asm/xen/page-coherent.h
> @@ -2,17 +2,6 @@
>  #ifndef _ASM_X86_XEN_PAGE_COHERENT_H
>  #define _ASM_X86_XEN_PAGE_COHERENT_H
>  
> -#include 
> -#include 
> -
> -static 

Re: [PATCH 09/11] swiotlb-xen: remove page-coherent.h

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> The only thing left of page-coherent.h is two functions implemented by
> the architecture for non-coherent DMA support that are never called for
> fully coherent architectures.  Just move the prototypes for those to
> swiotlb-xen.h instead.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Stefano Stabellini 


> ---
>  arch/arm/include/asm/xen/page-coherent.h   |  2 --
>  arch/arm64/include/asm/xen/page-coherent.h |  2 --
>  arch/x86/include/asm/xen/page-coherent.h   | 11 ---
>  drivers/xen/swiotlb-xen.c  |  3 ---
>  include/Kbuild |  1 -
>  include/xen/arm/page-coherent.h| 10 --
>  include/xen/swiotlb-xen.h  |  6 ++
>  7 files changed, 6 insertions(+), 29 deletions(-)
>  delete mode 100644 arch/arm/include/asm/xen/page-coherent.h
>  delete mode 100644 arch/arm64/include/asm/xen/page-coherent.h
>  delete mode 100644 arch/x86/include/asm/xen/page-coherent.h
>  delete mode 100644 include/xen/arm/page-coherent.h
> 
> diff --git a/arch/arm/include/asm/xen/page-coherent.h 
> b/arch/arm/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 27e984977402..
> --- a/arch/arm/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#include 
> diff --git a/arch/arm64/include/asm/xen/page-coherent.h 
> b/arch/arm64/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 27e984977402..
> --- a/arch/arm64/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#include 
> diff --git a/arch/x86/include/asm/xen/page-coherent.h 
> b/arch/x86/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index c9c8398a31ff..
> --- a/arch/x86/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,11 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
> -#define _ASM_X86_XEN_PAGE_COHERENT_H
> -
> -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
> - dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
> -
> -static inline void xen_dma_sync_single_for_device(struct device *hwdev,
> - dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
> -
> -#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index a642e284f1e2..95911ff9c11c 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -35,9 +35,6 @@
>  #include 
>  #include 
>  
> -#include 
> -#include 
> -
>  #include 
>  /*
>   * Used to do a quick range check in swiotlb_tbl_unmap_single and
> diff --git a/include/Kbuild b/include/Kbuild
> index c38f0d46b267..cce5cf6abf89 100644
> --- a/include/Kbuild
> +++ b/include/Kbuild
> @@ -1189,7 +1189,6 @@ header-test-+= video/vga.h
>  header-test- += video/w100fb.h
>  header-test- += xen/acpi.h
>  header-test- += xen/arm/hypercall.h
> -header-test- += xen/arm/page-coherent.h
>  header-test- += xen/arm/page.h
>  header-test- += xen/balloon.h
>  header-test- += xen/events.h
> diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
> deleted file mode 100644
> index 635492d41ebe..
> --- a/include/xen/arm/page-coherent.h
> +++ /dev/null
> @@ -1,10 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _XEN_ARM_PAGE_COHERENT_H
> -#define _XEN_ARM_PAGE_COHERENT_H
> -
> -void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
> - phys_addr_t paddr, size_t size, enum dma_data_direction dir);
> -void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
> - phys_addr_t paddr, size_t size, enum dma_data_direction dir);
> -
> -#endif /* _XEN_ARM_PAGE_COHERENT_H */
> diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
> index 5e4b83f83dbc..a7c642872568 100644
> --- a/include/xen/swiotlb-xen.h
> +++ b/include/xen/swiotlb-xen.h
> @@ -2,8 +2,14 @@
>  #ifndef __LINUX_SWIOTLB_XEN_H
>  #define __LINUX_SWIOTLB_XEN_H
>  
> +#include 
>  #include 
>  
> +void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
> + phys_addr_t paddr, size_t size, enum dma_data_direction dir);
> +void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
> + phys_addr_t paddr, size_t size, enum dma_data_direction dir);
> +
>  extern int xen_swiotlb_init(int verbose, bool early);
>  extern const struct dma_map_ops xen_swiotlb_dma_ops;
>  
> -- 
> 2.20.1
> 


[PATCH 5/7] arm64: tegra: Add Memory controller DT node on T194

2019-08-29 Thread Krishna Reddy
Add Memory controller DT node on T194 and enable it.
This patch is a prerequisite for SMMU enable on T194.

Signed-off-by: Krishna Reddy 
---
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 4 
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   | 7 +++
 2 files changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
index 62e07e11..4b3441b 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
@@ -47,6 +47,10 @@
};
};
 
+   memory-controller@2c0 {
+   status = "okay";
+   };
+
serial@311 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index adebbbf..d906958 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 / {
compatible = "nvidia,tegra194";
@@ -130,6 +131,12 @@
};
};
 
+   memory-controller@2c0 {
+   compatible = "nvidia,tegra186-mc";
+   reg = <0x02c0 0xb>;
+   status = "disabled";
+   };
+
uarta: serial@310 {
compatible = "nvidia,tegra194-uart", 
"nvidia,tegra20-uart";
reg = <0x0310 0x40>;
-- 
2.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 7/7] arm64: tegra: enable SMMU for SDHCI and EQOS

2019-08-29 Thread Krishna Reddy
Enable SMMU translations for SDHCI and EQOS transactions.

Signed-off-by: Krishna Reddy 
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index ad509bb..0496a87 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -51,6 +51,7 @@
clock-names = "master_bus", "slave_bus", "rx", "tx", 
"ptp_ref";
resets = <&bpmp TEGRA194_RESET_EQOS>;
reset-names = "eqos";
+   iommus = <&smmu TEGRA186_SID_EQOS>;
status = "disabled";
 
snps,write-requests = <1>;
@@ -381,6 +382,7 @@
clock-names = "sdhci";
resets = <&bpmp TEGRA194_RESET_SDMMC1>;
reset-names = "sdhci";
+   iommus = <&smmu TEGRA186_SID_SDMMC1>;
nvidia,pad-autocal-pull-up-offset-3v3-timeout =
<0x07>;
nvidia,pad-autocal-pull-down-offset-3v3-timeout =
@@ -403,6 +405,7 @@
clock-names = "sdhci";
resets = <&bpmp TEGRA194_RESET_SDMMC3>;
reset-names = "sdhci";
+   iommus = <&smmu TEGRA186_SID_SDMMC3>;
nvidia,pad-autocal-pull-up-offset-1v8 = <0x00>;
nvidia,pad-autocal-pull-down-offset-1v8 = <0x7a>;
nvidia,pad-autocal-pull-up-offset-3v3-timeout = <0x07>;
@@ -430,6 +433,7 @@
  <&bpmp TEGRA194_CLK_PLLC4>;
resets = <&bpmp TEGRA194_RESET_SDMMC4>;
reset-names = "sdhci";
+   iommus = <&smmu TEGRA186_SID_SDMMC4>;
nvidia,pad-autocal-pull-up-offset-hs400 = <0x00>;
nvidia,pad-autocal-pull-down-offset-hs400 = <0x00>;
nvidia,pad-autocal-pull-up-offset-1v8-timeout = <0x0a>;
-- 
2.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/7] iommu/arm-smmu: add Nvidia SMMUv2 implementation

2019-08-29 Thread Krishna Reddy
Add Nvidia SMMUv2 implementation and model info.

Signed-off-by: Krishna Reddy 
---
 MAINTAINERS |  2 +
 drivers/iommu/Makefile  |  2 +-
 drivers/iommu/arm-smmu-impl.c   |  2 +
 drivers/iommu/arm-smmu-nvidia.c | 97 +
 drivers/iommu/arm-smmu.c|  2 +
 drivers/iommu/arm-smmu.h|  2 +
 6 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/arm-smmu-nvidia.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 289fb06..b9d59e51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15785,9 +15785,11 @@ F: drivers/i2c/busses/i2c-tegra.c
 
 TEGRA IOMMU DRIVERS
 M: Thierry Reding 
+R: Krishna Reddy 
 L: linux-te...@vger.kernel.org
 S: Supported
 F: drivers/iommu/tegra*
+F: drivers/iommu/arm-smmu-nvidia.c
 
 TEGRA KBC DRIVER
 M: Laxman Dewangan 
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index a2729aa..7f5489e 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
-obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
+obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index 5c87a38..e5e595f 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -162,6 +162,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct 
arm_smmu_device *smmu)
break;
case CAVIUM_SMMUV2:
return cavium_smmu_impl_init(smmu);
+   case NVIDIA_SMMUV2:
+   return nvidia_smmu_impl_init(smmu);
default:
break;
}
diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c
new file mode 100644
index 000..d93ceda
--- /dev/null
+++ b/drivers/iommu/arm-smmu-nvidia.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Nvidia ARM SMMU v2 implementation quirks
+// Copyright (C) 2019 NVIDIA CORPORATION.  All rights reserved.
+
+#define pr_fmt(fmt) "nvidia-smmu: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "arm-smmu.h"
+
+#define NUM_SMMU_INSTANCES 3
+
+struct nvidia_smmu {
+   struct arm_smmu_device  smmu;
+   int num_inst;
+   void __iomem*bases[NUM_SMMU_INSTANCES];
+};
+
+#define to_nsmmu(s)container_of(s, struct nvidia_smmu, smmu)
+
+#define nsmmu_page(smmu, inst, page) \
+   (((inst) ? to_nsmmu(smmu)->bases[(inst)] : smmu->base) + \
+   ((page) << smmu->pgshift))
+
+static u32 nsmmu_read_reg(struct arm_smmu_device *smmu,
+ int page, int offset)
+{
+   return readl_relaxed(nsmmu_page(smmu, 0, page) + offset);
+}
+
+static void nsmmu_write_reg(struct arm_smmu_device *smmu,
+   int page, int offset, u32 val)
+{
+   int i;
+
+   for (i = 0; i < to_nsmmu(smmu)->num_inst; i++)
+   writel_relaxed(val, nsmmu_page(smmu, i, page) + offset);
+}
+
+static u64 nsmmu_read_reg64(struct arm_smmu_device *smmu,
+   int page, int offset)
+{
+   return readq_relaxed(nsmmu_page(smmu, 0, page) + offset);
+}
+
+static void nsmmu_write_reg64(struct arm_smmu_device *smmu,
+ int page, int offset, u64 val)
+{
+   int i;
+
+   for (i = 0; i < to_nsmmu(smmu)->num_inst; i++)
+   writeq_relaxed(val, nsmmu_page(smmu, i, page) + offset);
+}
+
+static const struct arm_smmu_impl nsmmu_impl = {
+   .read_reg = nsmmu_read_reg,
+   .write_reg = nsmmu_write_reg,
+   .read_reg64 = nsmmu_read_reg64,
+   .write_reg64 = nsmmu_write_reg64,
+};
+
+struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+   int i;
+   struct nvidia_smmu *nsmmu;
+   struct resource *res;
+   struct device *dev = smmu->dev;
+   struct platform_device *pdev = to_platform_device(smmu->dev);
+
+   nsmmu = devm_kzalloc(smmu->dev, sizeof(*nsmmu), GFP_KERNEL);
+   if (!nsmmu)
+   return ERR_PTR(-ENOMEM);
+
+   nsmmu->smmu = *smmu;
+   /* Instance 0 is ioremapped by arm-smmu.c */
+   nsmmu->num_inst = 1;
+
+   for (i = 1; i < NUM_SMMU_INSTANCES; i++) {
+   res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+   if (!res)
+   break;
+   nsmmu->bases[i] = devm_ioremap_resource(dev, res);
+   if (IS_ERR(nsmmu->bases[i]))
+   return (struct arm_smmu_device *)nsmmu->bases[i];
+   nsmmu->num_inst++;
+   }
+
+   nsmmu->smmu.impl = &nsmmu_impl;
+  

[PATCH 2/7] dt-bindings: arm-smmu: Add binding for nvidia,smmu-v2

2019-08-29 Thread Krishna Reddy
Add binding doc for Nvidia's smmu-v2 implementation.

Signed-off-by: Krishna Reddy 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 3133f3b..0de3759 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -17,6 +17,7 @@ conditions.
 "arm,mmu-401"
 "arm,mmu-500"
 "cavium,smmu-v2"
+"nidia,smmu-v2"
 "qcom,smmu-v2"
 
   depending on the particular implementation and/or the
-- 
2.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 06/11] swiotlb-xen: always use dma-direct helpers to alloc coherent pages

2019-08-29 Thread Stefano Stabellini
+ Boris, Juergen

On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> x86 currently calls alloc_pages, but using dma-direct works as well
> there, with the added benefit of using the CMA pool if available.
> The biggest advantage is of course to remove a pointless bit of
> architecture specific code.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/x86/include/asm/xen/page-coherent.h | 16 
>  drivers/xen/swiotlb-xen.c|  7 +++
>  include/xen/arm/page-coherent.h  | 12 
>  3 files changed, 3 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/include/asm/xen/page-coherent.h 
> b/arch/x86/include/asm/xen/page-coherent.h
> index 116777e7f387..8ee33c5edded 100644
> --- a/arch/x86/include/asm/xen/page-coherent.h
> +++ b/arch/x86/include/asm/xen/page-coherent.h
> @@ -5,22 +5,6 @@
>  #include 
>  #include 
>  
> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t 
> size,
> - dma_addr_t *dma_handle, gfp_t flags,
> - unsigned long attrs)
> -{
> - void *vstart = (void*)__get_free_pages(flags, get_order(size));
> - *dma_handle = virt_to_phys(vstart);

This is where we need Boris and Juergen's opinion. From an ARM POV it
looks OK.


> - return vstart;
> -}
> -
> -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
> - void *cpu_addr, dma_addr_t dma_handle,
> - unsigned long attrs)
> -{
> - free_pages((unsigned long) cpu_addr, get_order(size));
> -}
> -
>  static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
>dma_addr_t dev_addr, unsigned long offset, size_t size,
>enum dma_data_direction dir, unsigned long attrs) { }
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index b8808677ae1d..f9dd4cb6e4b3 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -299,8 +299,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
> size,
>* address. In fact on ARM virt_to_phys only works for kernel direct
>* mapped RAM memory. Also see comment below.
>*/
> - ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
> -
> + ret = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
>   if (!ret)
>   return ret;
>  
> @@ -319,7 +318,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
> size,
>   else {
>   if (xen_create_contiguous_region(phys, order,
>fls64(dma_mask), dma_handle) 
> != 0) {
> - xen_free_coherent_pages(hwdev, size, ret, 
> (dma_addr_t)phys, attrs);
> + dma_direct_free(hwdev, size, ret, (dma_addr_t)phys, 
> attrs);
>   return NULL;
>   }
>   SetPageXenRemapped(virt_to_page(ret));
> @@ -351,7 +350,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
> size, void *vaddr,
>   TestClearPageXenRemapped(virt_to_page(vaddr)))
>   xen_destroy_contiguous_region(phys, order);
>  
> - xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
> + dma_direct_free(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
>  }
>  
>  /*
> diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
> index a840d6949a87..0e244f4fec1a 100644
> --- a/include/xen/arm/page-coherent.h
> +++ b/include/xen/arm/page-coherent.h
> @@ -16,18 +16,6 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev,
>  void __xen_dma_sync_single_for_device(struct device *hwdev,
>   dma_addr_t handle, size_t size, enum dma_data_direction dir);
>  
> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t 
> size,
> - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
> -{
> - return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
> -}
> -
> -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
> - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
> -{
> - dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
> -}
> -
>  static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
>   dma_addr_t handle, size_t size, enum dma_data_direction dir)
>  {
> -- 
> 2.20.1
> 


[PATCH 0/7] Nvidia Arm SMMUv2 Implementation

2019-08-29 Thread Krishna Reddy
Hi All,
Nvidia Arm SMMUv2 implementation has two ARM SMMU(MMU-500) instances
that are used together for SMMU translations. The IOVA accesses from
HW devices are interleaved across these two SMMU instances and need
to be programmed identical except during tlb sync and fault handling.

This patch set adds Nvidia Arm SMMUv2 Implementation on top of ARM SMMU
driver to handle Nvidia specific implementation. It is also adding
hooks for tlb sync and fault handling to allow vendor specific
implementation for the same.

Please review the patch set and provide the feedback.

This patch set is based on the following branch as it is dependent on the
Arm SMMU Refactor changes from Robin Murphy that are present in this branch.

https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git 
for-joerg/arm-smmu/updates


Krishna Reddy (7):
  iommu/arm-smmu: add Nvidia SMMUv2 implementation
  dt-bindings: arm-smmu: Add binding for nvidia,smmu-v2
  iommu/arm-smmu: Add tlb_sync implementation hook
  iommu/arm-smmu: Add global/context fault implementation hooks
  arm64: tegra: Add Memory controller DT node on T194
  arm64: tegra: Add DT node for T194 SMMU
  arm64: tegra: enable SMMU for SDHCI and EQOS

 .../devicetree/bindings/iommu/arm,smmu.txt |   1 +
 MAINTAINERS|   2 +
 arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi |   4 +
 arch/arm64/boot/dts/nvidia/tegra194.dtsi   |  86 +++
 drivers/iommu/Makefile |   2 +-
 drivers/iommu/arm-smmu-impl.c  |   2 +
 drivers/iommu/arm-smmu-nvidia.c| 256 +
 drivers/iommu/arm-smmu.c   |  16 +-
 drivers/iommu/arm-smmu.h   |  10 +
 9 files changed, 375 insertions(+), 4 deletions(-)
 create mode 100644 drivers/iommu/arm-smmu-nvidia.c

-- 
2.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 07/11] swiotlb-xen: use the same foreign page check everywhere

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> xen_dma_map_page uses a different and more complicated check for foreign
> pages than the other three cache maintainance helpers.  Switch it to the
> simpler pfn_valid method a well, and document the scheme with a single
> improved comment in xen_dma_map_page.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Stefano Stabellini 


> ---
>  include/xen/arm/page-coherent.h | 31 +--
>  1 file changed, 9 insertions(+), 22 deletions(-)
> 
> diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
> index 0e244f4fec1a..07c104dbc21f 100644
> --- a/include/xen/arm/page-coherent.h
> +++ b/include/xen/arm/page-coherent.h
> @@ -41,23 +41,17 @@ static inline void xen_dma_map_page(struct device *hwdev, 
> struct page *page,
>dma_addr_t dev_addr, unsigned long offset, size_t size,
>enum dma_data_direction dir, unsigned long attrs)
>  {
> - unsigned long page_pfn = page_to_xen_pfn(page);
> - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
> - unsigned long compound_pages =
> - (1< - bool local = (page_pfn <= dev_pfn) &&
> - (dev_pfn - page_pfn < compound_pages);
> + unsigned long pfn = PFN_DOWN(dev_addr);
>  
>   /*
> -  * Dom0 is mapped 1:1, while the Linux page can span across
> -  * multiple Xen pages, it's not possible for it to contain a
> -  * mix of local and foreign Xen pages. So if the first xen_pfn
> -  * == mfn the page is local otherwise it's a foreign page
> -  * grant-mapped in dom0. If the page is local we can safely
> -  * call the native dma_ops function, otherwise we call the xen
> -  * specific function.
> +  * Dom0 is mapped 1:1, and while the Linux page can span across multiple
> +  * Xen pages, it is not possible for it to contain a mix of local and
> +  * foreign Xen pages.  Calling pfn_valid on a foreign mfn will always
> +  * return false, so if pfn_valid returns true the pages is local and we
> +  * can use the native dma-direct functions, otherwise we call the Xen
> +  * specific version.
>*/
> - if (local)
> + if (pfn_valid(pfn))
>   dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
>   else
>   __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, 
> attrs);
> @@ -67,14 +61,7 @@ static inline void xen_dma_unmap_page(struct device 
> *hwdev, dma_addr_t handle,
>   size_t size, enum dma_data_direction dir, unsigned long attrs)
>  {
>   unsigned long pfn = PFN_DOWN(handle);
> - /*
> -  * Dom0 is mapped 1:1, while the Linux page can be spanned accross
> -  * multiple Xen page, it's not possible to have a mix of local and
> -  * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
> -  * foreign mfn will always return false. If the page is local we can
> -  * safely call the native dma_ops function, otherwise we call the xen
> -  * specific function.
> -  */
> +
>   if (pfn_valid(pfn))
>   dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
>   else
> -- 
> 2.20.1
> 


[PATCH 3/7] iommu/arm-smmu: Add tlb_sync implementation hook

2019-08-29 Thread Krishna Reddy
tlb_sync hook allows nvidia smmu handle tlb sync
across multiple SMMUs as necessary.

Signed-off-by: Krishna Reddy 
---
 drivers/iommu/arm-smmu-nvidia.c | 32 
 drivers/iommu/arm-smmu.c|  8 +---
 drivers/iommu/arm-smmu.h|  4 
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c
index d93ceda..a429b2c 100644
--- a/drivers/iommu/arm-smmu-nvidia.c
+++ b/drivers/iommu/arm-smmu-nvidia.c
@@ -56,11 +56,43 @@ static void nsmmu_write_reg64(struct arm_smmu_device *smmu,
writeq_relaxed(val, nsmmu_page(smmu, i, page) + offset);
 }
 
+static void nsmmu_tlb_sync_wait(struct arm_smmu_device *smmu, int page,
+   int sync, int status, int inst)
+{
+   u32 reg;
+   unsigned int spin_cnt, delay;
+
+   for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+   for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+   reg = readl_relaxed(
+ nsmmu_page(smmu, inst, page) + status);
+   if (!(reg & sTLBGSTATUS_GSACTIVE))
+   return;
+   cpu_relax();
+   }
+   udelay(delay);
+   }
+   dev_err_ratelimited(smmu->dev,
+   "TLB sync timed out -- SMMU may be deadlocked\n");
+}
+
+static void nsmmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+  int sync, int status)
+{
+   int i;
+
+   arm_smmu_writel(smmu, page, sync, 0);
+
+   for (i = 0; i < to_nsmmu(smmu)->num_inst; i++)
+   nsmmu_tlb_sync_wait(smmu, page, sync, status, i);
+}
+
 static const struct arm_smmu_impl nsmmu_impl = {
.read_reg = nsmmu_read_reg,
.write_reg = nsmmu_write_reg,
.read_reg64 = nsmmu_read_reg64,
.write_reg64 = nsmmu_write_reg64,
+   .tlb_sync = nsmmu_tlb_sync,
 };
 
 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 46e1641..f5454e71 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -52,9 +52,6 @@
  */
 #define QCOM_DUMMY_VAL -1
 
-#define TLB_LOOP_TIMEOUT   100 /* 1s! */
-#define TLB_SPIN_COUNT 10
-
 #define MSI_IOVA_BASE  0x800
 #define MSI_IOVA_LENGTH0x10
 
@@ -244,6 +241,11 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device 
*smmu, int page,
unsigned int spin_cnt, delay;
u32 reg;
 
+   if (smmu->impl->tlb_sync) {
+   smmu->impl->tlb_sync(smmu, page, sync, status);
+   return;
+   }
+
arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index 9645bf1..d3217f1 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -207,6 +207,8 @@ enum arm_smmu_cbar_type {
 /* Maximum number of context banks per SMMU */
 #define ARM_SMMU_MAX_CBS   128
 
+#define TLB_LOOP_TIMEOUT   100 /* 1s! */
+#define TLB_SPIN_COUNT 10
 
 /* Shared driver definitions */
 enum arm_smmu_arch_version {
@@ -336,6 +338,8 @@ struct arm_smmu_impl {
int (*cfg_probe)(struct arm_smmu_device *smmu);
int (*reset)(struct arm_smmu_device *smmu);
int (*init_context)(struct arm_smmu_domain *smmu_domain);
+   void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync,
+int status);
 };
 
 static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
-- 
2.1.4



[PATCH 6/7] arm64: tegra: Add DT node for T194 SMMU

2019-08-29 Thread Krishna Reddy
Add DT node for T194 SMMU to enable SMMU support.

Signed-off-by: Krishna Reddy 
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi | 75 
 1 file changed, 75 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index d906958..ad509bb 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1401,6 +1401,81 @@
  0x8200 0x0  0x4000 0x1f 0x4000 0x0 
0xc000>; /* non-prefetchable memory (3GB) */
};
 
+   smmu: iommu@1200 {
+   compatible = "nvidia,smmu-v2";
+   reg = <0 0x1200 0 0x80>,
+ <0 0x1100 0 0x80>,
+ <0 0x1000 0 0x80>;
+   interrupts = ,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+;
+   stream-match-mask = <0x7f80>;
+   #global-interrupts = <1>;
+   #iommu-cells = <1>;
+   };
+
sysram@4000 {
compatible = "nvidia,tegra194-sysram", "mmio-sram";
reg = <0x0 0x4000 0x0 0x5>;
-- 
2.1.4



Re: [PATCH 03/11] xen/arm: simplify dma_cache_maint

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> Calculate the required operation in the caller, and pass it directly
> instead of recalculating it for each page, and use simple arithmetics
> to get from the physical address to Xen page size aligned chunks.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/arm/xen/mm.c | 62 +--
>  1 file changed, 22 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index 90574d89d0d4..14210ebdea1a 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -35,64 +35,46 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int 
> order)
>   return __get_free_pages(flags, order);
>  }
>  
> -enum dma_cache_op {
> -   DMA_UNMAP,
> -   DMA_MAP,
> -};
>  static bool hypercall_cflush = false;
>  
> -/* functions called by SWIOTLB */
> -
> -static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
> - size_t size, enum dma_data_direction dir, enum dma_cache_op op)
> +/* buffers in highmem or foreign pages cannot cross page boundaries */
> +static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op)
>  {
>   struct gnttab_cache_flush cflush;
> - unsigned long xen_pfn;
> - size_t left = size;
>  
> - xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
> - offset %= XEN_PAGE_SIZE;
> + cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK;
> + cflush.offset = xen_offset_in_page(handle);
> + cflush.op = op;
>  
>   do {
> - size_t len = left;
> - 
> - /* buffers in highmem or foreign pages cannot cross page
> -  * boundaries */
> - if (len + offset > XEN_PAGE_SIZE)
> - len = XEN_PAGE_SIZE - offset;
> -
> - cflush.op = 0;
> - cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
> - cflush.offset = offset;
> - cflush.length = len;
> -
> - if (op == DMA_UNMAP && dir != DMA_TO_DEVICE)
> - cflush.op = GNTTAB_CACHE_INVAL;
> - if (op == DMA_MAP) {
> - if (dir == DMA_FROM_DEVICE)
> - cflush.op = GNTTAB_CACHE_INVAL;
> - else
> - cflush.op = GNTTAB_CACHE_CLEAN;
> - }
> - if (cflush.op)
> - HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, 
> &cflush, 1);
> + if (size + cflush.offset > XEN_PAGE_SIZE)
> + cflush.length = XEN_PAGE_SIZE - cflush.offset;
> + else
> + cflush.length = size;

isn't it missing a:

  cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK;

here?


> + HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
> +
> + handle += cflush.length;
> + size -= cflush.length;
>  
> - offset = 0;
> - xen_pfn++;
> - left -= len;
> - } while (left);
> + cflush.offset = 0;
> + } while (size);
>  }
>  
>  static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t 
> handle,
>   size_t size, enum dma_data_direction dir)
>  {
> - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
> DMA_UNMAP);
> + if (dir != DMA_TO_DEVICE)
> + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
>  }
>  
>  static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t 
> handle,
>   size_t size, enum dma_data_direction dir)
>  {
> - dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, 
> DMA_MAP);
> + if (dir == DMA_FROM_DEVICE)
> + dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
> + else
> + dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN);
>  }
>  
>  void __xen_dma_map_page(struct device *hwdev, struct page *page,
> -- 
> 2.20.1
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 02/11] xen/arm: use dev_is_dma_coherent

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> Use the dma-noncoherent dev_is_dma_coherent helper instead of the home
> grown variant.  Note that both are always initialized to the same
> value in arch_setup_dma_ops.
> 
> Signed-off-by: Christoph Hellwig 
> Reviewed-by: Julien Grall 

Reviewed-by: Stefano Stabellini 


> ---
>  arch/arm/include/asm/dma-mapping.h   |  6 --
>  arch/arm/xen/mm.c| 12 ++--
>  arch/arm64/include/asm/dma-mapping.h |  9 -
>  3 files changed, 6 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/arm/include/asm/dma-mapping.h 
> b/arch/arm/include/asm/dma-mapping.h
> index dba9355e2484..bdd80ddbca34 100644
> --- a/arch/arm/include/asm/dma-mapping.h
> +++ b/arch/arm/include/asm/dma-mapping.h
> @@ -91,12 +91,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, 
> void *addr)
>  }
>  #endif
>  
> -/* do not use this function in a driver */
> -static inline bool is_device_dma_coherent(struct device *dev)
> -{
> - return dev->archdata.dma_coherent;
> -}
> -
>  /**
>   * arm_dma_alloc - allocate consistent memory for DMA
>   * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index d33b77e9add3..90574d89d0d4 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -1,6 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  #include 
> -#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -99,7 +99,7 @@ void __xen_dma_map_page(struct device *hwdev, struct page 
> *page,
>dma_addr_t dev_addr, unsigned long offset, size_t size,
>enum dma_data_direction dir, unsigned long attrs)
>  {
> - if (is_device_dma_coherent(hwdev))
> + if (dev_is_dma_coherent(hwdev))
>   return;
>   if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
>   return;
> @@ -112,7 +112,7 @@ void __xen_dma_unmap_page(struct device *hwdev, 
> dma_addr_t handle,
>   unsigned long attrs)
>  
>  {
> - if (is_device_dma_coherent(hwdev))
> + if (dev_is_dma_coherent(hwdev))
>   return;
>   if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
>   return;
> @@ -123,7 +123,7 @@ void __xen_dma_unmap_page(struct device *hwdev, 
> dma_addr_t handle,
>  void __xen_dma_sync_single_for_cpu(struct device *hwdev,
>   dma_addr_t handle, size_t size, enum dma_data_direction dir)
>  {
> - if (is_device_dma_coherent(hwdev))
> + if (dev_is_dma_coherent(hwdev))
>   return;
>   __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
>  }
> @@ -131,7 +131,7 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev,
>  void __xen_dma_sync_single_for_device(struct device *hwdev,
>   dma_addr_t handle, size_t size, enum dma_data_direction dir)
>  {
> - if (is_device_dma_coherent(hwdev))
> + if (dev_is_dma_coherent(hwdev))
>   return;
>   __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
>  }
> @@ -159,7 +159,7 @@ bool xen_arch_need_swiotlb(struct device *dev,
>* memory and we are not able to flush the cache.
>*/
>   return (!hypercall_cflush && (xen_pfn != bfn) &&
> - !is_device_dma_coherent(dev));
> + !dev_is_dma_coherent(dev));
>  }
>  
>  int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> diff --git a/arch/arm64/include/asm/dma-mapping.h 
> b/arch/arm64/include/asm/dma-mapping.h
> index bdcb0922a40c..67243255a858 100644
> --- a/arch/arm64/include/asm/dma-mapping.h
> +++ b/arch/arm64/include/asm/dma-mapping.h
> @@ -18,14 +18,5 @@ static inline const struct dma_map_ops 
> *get_arch_dma_ops(struct bus_type *bus)
>   return NULL;
>  }
>  
> -/*
> - * Do not use this function in a driver, it is only provided for
> - * arch/arm/mm/xen.c, which is used by arm64 as well.
> - */
> -static inline bool is_device_dma_coherent(struct device *dev)
> -{
> - return dev->dma_coherent;
> -}
> -
>  #endif   /* __KERNEL__ */
>  #endif   /* __ASM_DMA_MAPPING_H */
> -- 
> 2.20.1
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/11] xen/arm: use dma-noncoherent.h calls for xen-swiotlb cache maintainance

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> Reuse the arm64 code that uses the dma-direct/swiotlb helpers for DMA
> non-coherent devices.

This patch does a bunch of things not listed in the commit message, such
as moving the static inline functions to include/xen/arm/page-coherent.h
and removing xen_swiotlb_dma_mmap and xen_swiotlb_get_sgtable because
unnecessary.

I would prefer if they were separate patches (for bisectability). It's
OK if you want to keep it all in one patch but please list all changes
the commit message.

In any case, I looked at the patch in details and it does all the right
things -- it's correct.


> Signed-off-by: Christoph Hellwig 
> ---
>  arch/arm/include/asm/device.h  |  3 -
>  arch/arm/include/asm/xen/page-coherent.h   | 93 --
>  arch/arm/mm/dma-mapping.c  |  8 +-
>  arch/arm64/include/asm/xen/page-coherent.h | 75 -
>  drivers/xen/swiotlb-xen.c  | 49 +---
>  include/xen/arm/page-coherent.h| 80 +++
>  6 files changed, 83 insertions(+), 225 deletions(-)
> 
> diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
> index f6955b55c544..c675bc0d5aa8 100644
> --- a/arch/arm/include/asm/device.h
> +++ b/arch/arm/include/asm/device.h
> @@ -14,9 +14,6 @@ struct dev_archdata {
>  #endif
>  #ifdef CONFIG_ARM_DMA_USE_IOMMU
>   struct dma_iommu_mapping*mapping;
> -#endif
> -#ifdef CONFIG_XEN
> - const struct dma_map_ops *dev_dma_ops;
>  #endif
>   unsigned int dma_coherent:1;
>   unsigned int dma_ops_setup:1;
> diff --git a/arch/arm/include/asm/xen/page-coherent.h 
> b/arch/arm/include/asm/xen/page-coherent.h
> index 2c403e7c782d..27e984977402 100644
> --- a/arch/arm/include/asm/xen/page-coherent.h
> +++ b/arch/arm/include/asm/xen/page-coherent.h
> @@ -1,95 +1,2 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
> -#define _ASM_ARM_XEN_PAGE_COHERENT_H
> -
> -#include 
> -#include 
>  #include 
> -
> -static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
> -{
> - if (dev && dev->archdata.dev_dma_ops)
> - return dev->archdata.dev_dma_ops;
> - return get_arch_dma_ops(NULL);
> -}
> -
> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t 
> size,
> - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
> -{
> - return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, 
> attrs);
> -}
> -
> -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
> - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
> -{
> - xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
> -}
> -
> -static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
> -  dma_addr_t dev_addr, unsigned long offset, size_t size,
> -  enum dma_data_direction dir, unsigned long attrs)
> -{
> - unsigned long page_pfn = page_to_xen_pfn(page);
> - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
> - unsigned long compound_pages =
> - (1< - bool local = (page_pfn <= dev_pfn) &&
> - (dev_pfn - page_pfn < compound_pages);
> -
> - /*
> -  * Dom0 is mapped 1:1, while the Linux page can span across
> -  * multiple Xen pages, it's not possible for it to contain a
> -  * mix of local and foreign Xen pages. So if the first xen_pfn
> -  * == mfn the page is local otherwise it's a foreign page
> -  * grant-mapped in dom0. If the page is local we can safely
> -  * call the native dma_ops function, otherwise we call the xen
> -  * specific function.
> -  */
> - if (local)
> - xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, 
> dir, attrs);
> - else
> - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, 
> attrs);
> -}
> -
> -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t 
> handle,
> - size_t size, enum dma_data_direction dir, unsigned long attrs)
> -{
> - unsigned long pfn = PFN_DOWN(handle);
> - /*
> -  * Dom0 is mapped 1:1, while the Linux page can be spanned accross
> -  * multiple Xen page, it's not possible to have a mix of local and
> -  * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
> -  * foreign mfn will always return false. If the page is local we can
> -  * safely call the native dma_ops function, otherwise we call the xen
> -  * specific function.
> -  */
> - if (pfn_valid(pfn)) {
> - if (xen_get_dma_ops(hwdev)->unmap_page)
> - xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, 
> dir, attrs);
> - } else
> - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
> -}
> -
> -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
> - dma_addr_t handle, size

Re: [PATCH 05/11] xen: remove the exports for xen_{create,destroy}_contiguous_region

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> These routines are only used by swiotlb-xen, which cannot be modular.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Stefano Stabellini 


> ---
>  arch/arm/xen/mm.c | 2 --
>  arch/x86/xen/mmu_pv.c | 2 --
>  2 files changed, 4 deletions(-)
> 
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index 9b3a6c0ca681..b7d53415532b 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -155,13 +155,11 @@ int xen_create_contiguous_region(phys_addr_t pstart, 
> unsigned int order,
>   *dma_handle = pstart;
>   return 0;
>  }
> -EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
>  
>  void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
>  {
>   return;
>  }
> -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
>  
>  int __init xen_mm_init(void)
>  {
> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
> index 26e8b326966d..c8dbee62ec2a 100644
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -2625,7 +2625,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, 
> unsigned int order,
>   *dma_handle = virt_to_machine(vstart).maddr;
>   return success ? 0 : -ENOMEM;
>  }
> -EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
>  
>  void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
>  {
> @@ -2660,7 +2659,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, 
> unsigned int order)
>  
>   spin_unlock_irqrestore(&xen_reservation_lock, flags);
>  }
> -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
>  
>  static noinline void xen_flush_tlb_all(void)
>  {
> -- 
> 2.20.1
> 


Re: [PATCH 04/11] xen/arm: remove xen_dma_ops

2019-08-29 Thread Stefano Stabellini
On Mon, 26 Aug 2019, Christoph Hellwig wrote:
> arm and arm64 can just use xen_swiotlb_dma_ops directly like x86, no
> need for a pointer indirection.
> 
> Signed-off-by: Christoph Hellwig 
> Reviewed-by: Julien Grall 

Reviewed-by: Stefano Stabellini 


> ---
>  arch/arm/mm/dma-mapping.c| 3 ++-
>  arch/arm/xen/mm.c| 4 
>  arch/arm64/mm/dma-mapping.c  | 3 ++-
>  include/xen/arm/hypervisor.h | 2 --
>  4 files changed, 4 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 738097396445..2661cad36359 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "dma.h"
>  #include "mm.h"
> @@ -2360,7 +2361,7 @@ void arch_setup_dma_ops(struct device *dev, u64 
> dma_base, u64 size,
>  
>  #ifdef CONFIG_XEN
>   if (xen_initial_domain())
> - dev->dma_ops = xen_dma_ops;
> + dev->dma_ops = &xen_swiotlb_dma_ops;
>  #endif
>   dev->archdata.dma_ops_setup = true;
>  }
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index 14210ebdea1a..9b3a6c0ca681 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -163,16 +163,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, 
> unsigned int order)
>  }
>  EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
>  
> -const struct dma_map_ops *xen_dma_ops;
> -EXPORT_SYMBOL(xen_dma_ops);
> -
>  int __init xen_mm_init(void)
>  {
>   struct gnttab_cache_flush cflush;
>   if (!xen_initial_domain())
>   return 0;
>   xen_swiotlb_init(1, false);
> - xen_dma_ops = &xen_swiotlb_dma_ops;
>  
>   cflush.op = 0;
>   cflush.a.dev_bus_addr = 0;
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index bd2b039f43a6..4b244a037349 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -64,6 +65,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, 
> u64 size,
>  
>  #ifdef CONFIG_XEN
>   if (xen_initial_domain())
> - dev->dma_ops = xen_dma_ops;
> + dev->dma_ops = &xen_swiotlb_dma_ops;
>  #endif
>  }
> diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h
> index 2982571f7cc1..43ef24dd030e 100644
> --- a/include/xen/arm/hypervisor.h
> +++ b/include/xen/arm/hypervisor.h
> @@ -19,8 +19,6 @@ static inline enum paravirt_lazy_mode 
> paravirt_get_lazy_mode(void)
>   return PARAVIRT_LAZY_NONE;
>  }
>  
> -extern const struct dma_map_ops *xen_dma_ops;
> -
>  #ifdef CONFIG_XEN
>  void __init xen_early_init(void);
>  #else
> -- 
> 2.20.1
> 


Re: [PATCH] media: staging: tegra-vde: Disable building with COMPILE_TEST

2019-08-29 Thread Thierry Reding
On Thu, Aug 29, 2019 at 04:58:22PM +0300, Dmitry Osipenko wrote:
> 29.08.2019 15:40, Thierry Reding пишет:
> > On Thu, Aug 29, 2019 at 01:39:32PM +0200, Hans Verkuil wrote:
> >> On 8/26/19 3:31 PM, YueHaibing wrote:
> >>> If COMPILE_TEST is y and IOMMU_SUPPORT is n, selecting TEGRA_VDE
> >>> to m will set IOMMU_IOVA to m, this fails the building of
> >>> TEGRA_HOST1X and DRM_TEGRA which is y like this:
> >>>
> >>> drivers/gpu/host1x/cdma.o: In function `host1x_cdma_init':
> >>> cdma.c:(.text+0x66c): undefined reference to `alloc_iova'
> >>> cdma.c:(.text+0x698): undefined reference to `__free_iova'
> >>>
> >>> drivers/gpu/drm/tegra/drm.o: In function `tegra_drm_unload':
> >>> drm.c:(.text+0xeb0): undefined reference to `put_iova_domain'
> >>> drm.c:(.text+0xeb4): undefined reference to `iova_cache_put'
> >>>
> >>> Reported-by: Hulk Robot 
> >>> Fixes: 6b2265975239 ("media: staging: tegra-vde: Fix build error")
> >>> Fixes: b301f8de1925 ("media: staging: media: tegra-vde: Add IOMMU 
> >>> support")
> >>> Signed-off-by: YueHaibing 
> >>> ---
> >>>  drivers/staging/media/tegra-vde/Kconfig | 4 ++--
> >>>  1 file changed, 2 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/staging/media/tegra-vde/Kconfig 
> >>> b/drivers/staging/media/tegra-vde/Kconfig
> >>> index ba49ea5..a41d30c 100644
> >>> --- a/drivers/staging/media/tegra-vde/Kconfig
> >>> +++ b/drivers/staging/media/tegra-vde/Kconfig
> >>> @@ -1,9 +1,9 @@
> >>>  # SPDX-License-Identifier: GPL-2.0
> >>>  config TEGRA_VDE
> >>>   tristate "NVIDIA Tegra Video Decoder Engine driver"
> >>> - depends on ARCH_TEGRA || COMPILE_TEST
> >>> + depends on ARCH_TEGRA
> >>
> >> What happens if you drop this change,
> >>
> >>>   select DMA_SHARED_BUFFER
> >>> - select IOMMU_IOVA if (IOMMU_SUPPORT || COMPILE_TEST)
> >>> + select IOMMU_IOVA if IOMMU_SUPPORT
> >>
> >> but keep this change?
> >>
> >> iova.h has stubs that are used if IOMMU_IOVA is not set, so it should
> >> work when compile testing this tegra-vde driver.
> >>
> >> Haven't tried it, but making sure that compile testing keep working is
> >> really important.
> 
> The driver's code compilation works okay, it's the linkage stage which
> fails during compile-testing.
> 
> > Yeah, that variant seems to work for me. I think it's also more correct
> > because the IOMMU_IOVA if IOMMU_SUPPORT dependency really says that the
> > IOVA usage is bound to IOMMU support. If IOMMU support is not enabled,
> > then IOVA is not needed either, so the dummies will do just fine.
> 
> Am I understanding correctly that you're suggesting to revert [1][2] and
> get back to the other problem?
> 
> [1]
> https://lore.kernel.org/linux-media/dd547b44-7abb-371f-aeee-a82b96f82...@gmail.com/T/
> [2] https://patchwork.ozlabs.org/patch/1136619/
> 
> If we want to keep compile testing, I guess the only reasonable variant
> right now is to select IOMMU_IOVA unconditionally in all of the drivers
> (vde, host1x, drm and etc) and then just ignore that IOVA will be
> compiled-and-unused if IOMMU_SUPPORT=n (note that IOMMU_SUPPORT=y in all
> of default kernel configurations).

Agreed. I think we should just select IOMMU_IOVA unconditionally. We
really do want IOMMU_SUPPORT always as well, but it might be nice to be
able to switch it off for testing or so. In the cases that really matter
we will be enabling both IOMMU_SUPPORT and IOMMU_IOVA anyway, so might
as well select IOMMU_IOVA always. It's not terribly big and I can't
imagine anyone wanting to run a kernel without IOMMU_SUPPORT for
anything other than testing.

Thierry


signature.asc
Description: PGP signature


Re: remove default fallbacks in dma_map_ops v3

2019-08-29 Thread Christoph Hellwig
I've applied this to the dma-mapping for-next tree now.

If there are any issues with the parisc patch I'll happily take
incremental patches.


Re: cleanup the dma_pgprot handling v2

2019-08-29 Thread Christoph Hellwig
I've pulled this into the dma-mapping for-next tree now.


Re: [PATCH] dma-mapping: remove dma_{alloc,free,mmap}_writecombine

2019-08-29 Thread Christoph Hellwig
On Tue, Jul 30, 2019 at 04:50:45PM +0300, Laurent Pinchart wrote:
> I would have indented this line to match the rest. Apart from that,

I've fixed that up now that I've applied the patch.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 4/5] PCI/vmd: Stop overriding dma_map_ops

2019-08-29 Thread Christoph Hellwig
On Wed, Aug 28, 2019 at 09:01:06AM -0600, Keith Busch wrote:
> On Wed, Aug 28, 2019 at 07:14:42AM -0700, Christoph Hellwig wrote:
> > With a little tweak to the intel-iommu code we should be able to work
> > around the VMD mess for the requester IDs without having to create giant
> > amounts of boilerplate DMA ops wrapping code.  The other advantage of
> > this scheme is that we can respect the real DMA masks for the actual
> > devices, and I bet it will only be a matter of time until we'll see the
> > first DMA challeneged NVMe devices.
> 
> This tests out fine on VMD hardware, but it's quite different than the
> previous patch. In v1, the original dev was used in iommu_need_mapping(),
> but this time it's the vmd device. Is this still using the actual device's
> DMA mask then?

True.  But then again I think the old one was broken as well, as it
will pass the wrong dev to identity_mapping() or
iommu_request_dma_domain_for_dev.   So I guess I'll need to respin it
a bit to do the work in iommu_need_mapping again, and then factor
that one to make it obvious what device we deal with.


Re: [PATCH 2/5] x86/pci: Add a to_pci_sysdata helper

2019-08-29 Thread h...@lst.de
On Wed, Aug 28, 2019 at 04:41:45PM +, Derrick, Jonathan wrote:
> > diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
> > index 6fa846920f5f..75fe28492290 100644
> > --- a/arch/x86/include/asm/pci.h
> > +++ b/arch/x86/include/asm/pci.h
> > @@ -35,12 +35,15 @@ extern int noioapicreroute;
> >  
> >  #ifdef CONFIG_PCI
> >  
> > +static inline struct pci_sysdata *to_pci_sysdata(struct pci_bus *bus)
> Can you make the argument const to avoid all the warnings from callers
> passing const struct pci_bus

Yes, I already fixed this up after getting a build bot warning for a
NUMA config (which seems to be the only one passing a const).


Re: [PATCH] media: staging: tegra-vde: Disable building with COMPILE_TEST

2019-08-29 Thread Dmitry Osipenko
29.08.2019 15:40, Thierry Reding пишет:
> On Thu, Aug 29, 2019 at 01:39:32PM +0200, Hans Verkuil wrote:
>> On 8/26/19 3:31 PM, YueHaibing wrote:
>>> If COMPILE_TEST is y and IOMMU_SUPPORT is n, selecting TEGRA_VDE
>>> to m will set IOMMU_IOVA to m, this fails the building of
>>> TEGRA_HOST1X and DRM_TEGRA which is y like this:
>>>
>>> drivers/gpu/host1x/cdma.o: In function `host1x_cdma_init':
>>> cdma.c:(.text+0x66c): undefined reference to `alloc_iova'
>>> cdma.c:(.text+0x698): undefined reference to `__free_iova'
>>>
>>> drivers/gpu/drm/tegra/drm.o: In function `tegra_drm_unload':
>>> drm.c:(.text+0xeb0): undefined reference to `put_iova_domain'
>>> drm.c:(.text+0xeb4): undefined reference to `iova_cache_put'
>>>
>>> Reported-by: Hulk Robot 
>>> Fixes: 6b2265975239 ("media: staging: tegra-vde: Fix build error")
>>> Fixes: b301f8de1925 ("media: staging: media: tegra-vde: Add IOMMU support")
>>> Signed-off-by: YueHaibing 
>>> ---
>>>  drivers/staging/media/tegra-vde/Kconfig | 4 ++--
>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/staging/media/tegra-vde/Kconfig 
>>> b/drivers/staging/media/tegra-vde/Kconfig
>>> index ba49ea5..a41d30c 100644
>>> --- a/drivers/staging/media/tegra-vde/Kconfig
>>> +++ b/drivers/staging/media/tegra-vde/Kconfig
>>> @@ -1,9 +1,9 @@
>>>  # SPDX-License-Identifier: GPL-2.0
>>>  config TEGRA_VDE
>>> tristate "NVIDIA Tegra Video Decoder Engine driver"
>>> -   depends on ARCH_TEGRA || COMPILE_TEST
>>> +   depends on ARCH_TEGRA
>>
>> What happens if you drop this change,
>>
>>> select DMA_SHARED_BUFFER
>>> -   select IOMMU_IOVA if (IOMMU_SUPPORT || COMPILE_TEST)
>>> +   select IOMMU_IOVA if IOMMU_SUPPORT
>>
>> but keep this change?
>>
>> iova.h has stubs that are used if IOMMU_IOVA is not set, so it should
>> work when compile testing this tegra-vde driver.
>>
>> Haven't tried it, but making sure that compile testing keep working is
>> really important.

The driver's code compilation works okay, it's the linkage stage which
fails during compile-testing.

> Yeah, that variant seems to work for me. I think it's also more correct
> because the IOMMU_IOVA if IOMMU_SUPPORT dependency really says that the
> IOVA usage is bound to IOMMU support. If IOMMU support is not enabled,
> then IOVA is not needed either, so the dummies will do just fine.

Am I understanding correctly that you're suggesting to revert [1][2] and
get back to the other problem?

[1]
https://lore.kernel.org/linux-media/dd547b44-7abb-371f-aeee-a82b96f82...@gmail.com/T/
[2] https://patchwork.ozlabs.org/patch/1136619/

If we want to keep compile testing, I guess the only reasonable variant
right now is to select IOMMU_IOVA unconditionally in all of the drivers
(vde, host1x, drm and etc) and then just ignore that IOVA will be
compiled-and-unused if IOMMU_SUPPORT=n (note that IOMMU_SUPPORT=y in all
of default kernel configurations).


Re: [PATCH] media: staging: tegra-vde: Disable building with COMPILE_TEST

2019-08-29 Thread Thierry Reding
On Thu, Aug 29, 2019 at 01:39:32PM +0200, Hans Verkuil wrote:
> On 8/26/19 3:31 PM, YueHaibing wrote:
> > If COMPILE_TEST is y and IOMMU_SUPPORT is n, selecting TEGRA_VDE
> > to m will set IOMMU_IOVA to m, this fails the building of
> > TEGRA_HOST1X and DRM_TEGRA which is y like this:
> > 
> > drivers/gpu/host1x/cdma.o: In function `host1x_cdma_init':
> > cdma.c:(.text+0x66c): undefined reference to `alloc_iova'
> > cdma.c:(.text+0x698): undefined reference to `__free_iova'
> > 
> > drivers/gpu/drm/tegra/drm.o: In function `tegra_drm_unload':
> > drm.c:(.text+0xeb0): undefined reference to `put_iova_domain'
> > drm.c:(.text+0xeb4): undefined reference to `iova_cache_put'
> > 
> > Reported-by: Hulk Robot 
> > Fixes: 6b2265975239 ("media: staging: tegra-vde: Fix build error")
> > Fixes: b301f8de1925 ("media: staging: media: tegra-vde: Add IOMMU support")
> > Signed-off-by: YueHaibing 
> > ---
> >  drivers/staging/media/tegra-vde/Kconfig | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/staging/media/tegra-vde/Kconfig 
> > b/drivers/staging/media/tegra-vde/Kconfig
> > index ba49ea5..a41d30c 100644
> > --- a/drivers/staging/media/tegra-vde/Kconfig
> > +++ b/drivers/staging/media/tegra-vde/Kconfig
> > @@ -1,9 +1,9 @@
> >  # SPDX-License-Identifier: GPL-2.0
> >  config TEGRA_VDE
> > tristate "NVIDIA Tegra Video Decoder Engine driver"
> > -   depends on ARCH_TEGRA || COMPILE_TEST
> > +   depends on ARCH_TEGRA
> 
> What happens if you drop this change,
> 
> > select DMA_SHARED_BUFFER
> > -   select IOMMU_IOVA if (IOMMU_SUPPORT || COMPILE_TEST)
> > +   select IOMMU_IOVA if IOMMU_SUPPORT
> 
> but keep this change?
> 
> iova.h has stubs that are used if IOMMU_IOVA is not set, so it should
> work when compile testing this tegra-vde driver.
> 
> Haven't tried it, but making sure that compile testing keep working is
> really important.

Yeah, that variant seems to work for me. I think it's also more correct
because the IOMMU_IOVA if IOMMU_SUPPORT dependency really says that the
IOVA usage is bound to IOMMU support. If IOMMU support is not enabled,
then IOVA is not needed either, so the dummies will do just fine.

Thierry


signature.asc
Description: PGP signature


Re: [PATCH v2 2/2] dma-contiguous: Use fallback alloc_pages for single pages

2019-08-29 Thread Masahiro Yamada
On Wed, Aug 28, 2019 at 9:23 PM Masahiro Yamada
 wrote:
>
> On Wed, Aug 28, 2019 at 7:53 PM Masahiro Yamada
>  wrote:
> >
> > Hi Christoph,
> >
> > On Tue, Aug 27, 2019 at 8:55 PM Christoph Hellwig  wrote:
> > >
> > > On Tue, Aug 27, 2019 at 06:03:14PM +0900, Masahiro Yamada wrote:
> > > > Yes, this makes my driver working again
> > > > when CONFIG_DMA_CMA=y.
> > > >
> > > >
> > > > If I apply the following, my driver gets back working
> > > > irrespective of CONFIG_DMA_CMA.
> > >
> > > That sounds a lot like the device simply isn't 64-bit DMA capable, and
> > > previously always got CMA allocations under the limit it actually
> > > supported.  I suggest that you submit this quirk to the mmc maintainers.
> >
> >
> > I tested v5.2 and my MMC host controller works with
> > dma_address that exceeds 32-bit physical address.
> >
> > So, I believe my MMC device is 64-bit DMA capable.
> >
> > I am still looking into the code
> > to find out what was changed.
>
>
> I retract this comment.
>
> Prior to bd2e75633c8012fc8a7431c82fda66237133bf7e,
> the descriptor table for ADMA is placed within the
> 32-bit phys address range, not exceeds the 32-bit limit.
>
> Probably, my device is not 64-bit capable.
>
> I will talk to the hardware engineer,
> and check the hardware spec just in case.
>


After looking more into my hardware,
I found out how to fix my driver:
https://lore.kernel.org/patchwork/patch/1121600/



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] media: staging: tegra-vde: Disable building with COMPILE_TEST

2019-08-29 Thread Hans Verkuil
On 8/26/19 3:31 PM, YueHaibing wrote:
> If COMPILE_TEST is y and IOMMU_SUPPORT is n, selecting TEGRA_VDE
> to m will set IOMMU_IOVA to m, this fails the building of
> TEGRA_HOST1X and DRM_TEGRA which is y like this:
> 
> drivers/gpu/host1x/cdma.o: In function `host1x_cdma_init':
> cdma.c:(.text+0x66c): undefined reference to `alloc_iova'
> cdma.c:(.text+0x698): undefined reference to `__free_iova'
> 
> drivers/gpu/drm/tegra/drm.o: In function `tegra_drm_unload':
> drm.c:(.text+0xeb0): undefined reference to `put_iova_domain'
> drm.c:(.text+0xeb4): undefined reference to `iova_cache_put'
> 
> Reported-by: Hulk Robot 
> Fixes: 6b2265975239 ("media: staging: tegra-vde: Fix build error")
> Fixes: b301f8de1925 ("media: staging: media: tegra-vde: Add IOMMU support")
> Signed-off-by: YueHaibing 
> ---
>  drivers/staging/media/tegra-vde/Kconfig | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/Kconfig 
> b/drivers/staging/media/tegra-vde/Kconfig
> index ba49ea5..a41d30c 100644
> --- a/drivers/staging/media/tegra-vde/Kconfig
> +++ b/drivers/staging/media/tegra-vde/Kconfig
> @@ -1,9 +1,9 @@
>  # SPDX-License-Identifier: GPL-2.0
>  config TEGRA_VDE
>   tristate "NVIDIA Tegra Video Decoder Engine driver"
> - depends on ARCH_TEGRA || COMPILE_TEST
> + depends on ARCH_TEGRA

What happens if you drop this change,

>   select DMA_SHARED_BUFFER
> - select IOMMU_IOVA if (IOMMU_SUPPORT || COMPILE_TEST)
> + select IOMMU_IOVA if IOMMU_SUPPORT

but keep this change?

iova.h has stubs that are used if IOMMU_IOVA is not set, so it should
work when compile testing this tegra-vde driver.

Haven't tried it, but making sure that compile testing keep working is
really important.

Regards,

Hans

>   select SRAM
>   help
>   Say Y here to enable support for the NVIDIA Tegra video decoder
> 



[PATCH 5/5] iommu: virt: Use iommu_put_resv_regions_simple()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

Use the new standard function instead of open-coding it.

Cc: Jean-Philippe Brucker 
Cc: virtualizat...@lists.linux-foundation.org
Signed-off-by: Thierry Reding 
---
 drivers/iommu/virtio-iommu.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 3ea9d7682999..bc3c7ab7f996 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -838,14 +838,6 @@ static void viommu_get_resv_regions(struct device *dev, 
struct list_head *head)
iommu_dma_get_resv_regions(dev, head);
 }
 
-static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
-{
-   struct iommu_resv_region *entry, *next;
-
-   list_for_each_entry_safe(entry, next, head, list)
-   kfree(entry);
-}
-
 static struct iommu_ops viommu_ops;
 static struct virtio_driver virtio_iommu_drv;
 
@@ -915,7 +907,7 @@ static int viommu_add_device(struct device *dev)
 err_unlink_dev:
iommu_device_unlink(&viommu->iommu, dev);
 err_free_dev:
-   viommu_put_resv_regions(dev, &vdev->resv_regions);
+   iommu_put_resv_regions_simple(dev, &vdev->resv_regions);
kfree(vdev);
 
return ret;
@@ -933,7 +925,7 @@ static void viommu_remove_device(struct device *dev)
 
iommu_group_remove_device(dev);
iommu_device_unlink(&vdev->viommu->iommu, dev);
-   viommu_put_resv_regions(dev, &vdev->resv_regions);
+   iommu_put_resv_regions_simple(dev, &vdev->resv_regions);
kfree(vdev);
 }
 
@@ -962,7 +954,7 @@ static struct iommu_ops viommu_ops = {
.remove_device  = viommu_remove_device,
.device_group   = viommu_device_group,
.get_resv_regions   = viommu_get_resv_regions,
-   .put_resv_regions   = viommu_put_resv_regions,
+   .put_resv_regions   = iommu_put_resv_regions_simple,
.of_xlate   = viommu_of_xlate,
 };
 
-- 
2.22.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/5] iommu: arm: Use iommu_put_resv_regions_simple()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

Use the new standard function instead of open-coding it.

Cc: Will Deacon 
Cc: Robin Murphy 
Signed-off-by: Thierry Reding 
---
 drivers/iommu/arm-smmu-v3.c | 11 +--
 drivers/iommu/arm-smmu.c| 11 +--
 2 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 0ad6d34d1e96..b3b7ca2c057a 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2263,15 +2263,6 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
 }
 
-static void arm_smmu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
-   struct iommu_resv_region *entry, *next;
-
-   list_for_each_entry_safe(entry, next, head, list)
-   kfree(entry);
-}
-
 static struct iommu_ops arm_smmu_ops = {
.capable= arm_smmu_capable,
.domain_alloc   = arm_smmu_domain_alloc,
@@ -2289,7 +2280,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_set_attr= arm_smmu_domain_set_attr,
.of_xlate   = arm_smmu_of_xlate,
.get_resv_regions   = arm_smmu_get_resv_regions,
-   .put_resv_regions   = arm_smmu_put_resv_regions,
+   .put_resv_regions   = iommu_put_resv_regions_simple,
.pgsize_bitmap  = -1UL, /* Restricted during device attach */
 };
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index d6fe997e9466..e547b4322bcc 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1724,15 +1724,6 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
 }
 
-static void arm_smmu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
-   struct iommu_resv_region *entry, *next;
-
-   list_for_each_entry_safe(entry, next, head, list)
-   kfree(entry);
-}
-
 static struct iommu_ops arm_smmu_ops = {
.capable= arm_smmu_capable,
.domain_alloc   = arm_smmu_domain_alloc,
@@ -1750,7 +1741,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_set_attr= arm_smmu_domain_set_attr,
.of_xlate   = arm_smmu_of_xlate,
.get_resv_regions   = arm_smmu_get_resv_regions,
-   .put_resv_regions   = arm_smmu_put_resv_regions,
+   .put_resv_regions   = iommu_put_resv_regions_simple,
.pgsize_bitmap  = -1UL, /* Restricted during device attach */
 };
 
-- 
2.22.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/5] iommu: amd: Use iommu_put_resv_regions_simple()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

Use the new standard function instead of open-coding it.

Signed-off-by: Thierry Reding 
---
 drivers/iommu/amd_iommu.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 04a9f8443344..7d8896d5fab9 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3160,15 +3160,6 @@ static void amd_iommu_get_resv_regions(struct device 
*dev,
list_add_tail(®ion->list, head);
 }
 
-static void amd_iommu_put_resv_regions(struct device *dev,
-struct list_head *head)
-{
-   struct iommu_resv_region *entry, *next;
-
-   list_for_each_entry_safe(entry, next, head, list)
-   kfree(entry);
-}
-
 static void amd_iommu_apply_resv_region(struct device *dev,
  struct iommu_domain *domain,
  struct iommu_resv_region *region)
@@ -3216,7 +3207,7 @@ const struct iommu_ops amd_iommu_ops = {
.remove_device = amd_iommu_remove_device,
.device_group = amd_iommu_device_group,
.get_resv_regions = amd_iommu_get_resv_regions,
-   .put_resv_regions = amd_iommu_put_resv_regions,
+   .put_resv_regions = iommu_put_resv_regions_simple,
.apply_resv_region = amd_iommu_apply_resv_region,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
-- 
2.22.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/5] iommu: Implement iommu_put_resv_regions_simple()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

Implement a generic function for removing reserved regions. This can be
used by drivers that don't do anything fancy with these regions other
than allocating memory for them.

Signed-off-by: Thierry Reding 
---
 drivers/iommu/iommu.c | 19 +++
 include/linux/iommu.h |  2 ++
 2 files changed, 21 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0f585b614657..73a2a6b13507 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2170,6 +2170,25 @@ void iommu_put_resv_regions(struct device *dev, struct 
list_head *list)
ops->put_resv_regions(dev, list);
 }
 
+/**
+ * iommu_put_resv_regions_simple - Reserved region driver helper
+ * @dev: device for which to free reserved regions
+ * @list: reserved region list for device
+ *
+ * IOMMU drivers can use this to implement their .put_resv_regions() callback
+ * for simple reservations. Memory allocated for each reserved region will be
+ * freed. If an IOMMU driver allocates additional resources per region, it is
+ * going to have to implement a custom callback.
+ */
+void iommu_put_resv_regions_simple(struct device *dev, struct list_head *list)
+{
+   struct iommu_resv_region *entry, *next;
+
+   list_for_each_entry_safe(entry, next, list, list)
+   kfree(entry);
+}
+EXPORT_SYMBOL(iommu_put_resv_regions_simple);
+
 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
  size_t length, int prot,
  enum iommu_resv_type type)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 29bac5345563..d9c91e37ac2e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -434,6 +434,8 @@ extern void iommu_set_fault_handler(struct iommu_domain 
*domain,
 
 extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
+extern void iommu_put_resv_regions_simple(struct device *dev,
+ struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 extern int iommu_request_dma_domain_for_dev(struct device *dev);
 extern void iommu_set_default_passthrough(bool cmd_line);
-- 
2.22.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/5] iommu: intel: Use iommu_put_resv_regions_simple()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

Use the new standard function instead of open-coding it.

Cc: David Woodhouse 
Signed-off-by: Thierry Reding 
---
 drivers/iommu/intel-iommu.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 4658cda6f3d2..2fe5da41c786 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5386,15 +5386,6 @@ static void intel_iommu_get_resv_regions(struct device 
*device,
list_add_tail(®->list, head);
 }
 
-static void intel_iommu_put_resv_regions(struct device *dev,
-struct list_head *head)
-{
-   struct iommu_resv_region *entry, *next;
-
-   list_for_each_entry_safe(entry, next, head, list)
-   kfree(entry);
-}
-
 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
 {
struct device_domain_info *info;
@@ -5629,7 +5620,7 @@ const struct iommu_ops intel_iommu_ops = {
.add_device = intel_iommu_add_device,
.remove_device  = intel_iommu_remove_device,
.get_resv_regions   = intel_iommu_get_resv_regions,
-   .put_resv_regions   = intel_iommu_put_resv_regions,
+   .put_resv_regions   = iommu_put_resv_regions_simple,
.apply_resv_region  = intel_iommu_apply_resv_region,
.device_group   = pci_device_group,
.dev_has_feat   = intel_iommu_dev_has_feat,
-- 
2.22.0



[PATCH 0/5] iommu: Implement iommu_put_resv_regions_simple()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

Most IOMMU drivers only need to free the memory allocated for each
reserved region. Instead of open-coding the loop to do this in each
driver, extract the code into a common function that can be used by
all these drivers.

Thierry

Thierry Reding (5):
  iommu: Implement iommu_put_resv_regions_simple()
  iommu: arm: Use iommu_put_resv_regions_simple()
  iommu: amd: Use iommu_put_resv_regions_simple()
  iommu: intel: Use iommu_put_resv_regions_simple()
  iommu: virt: Use iommu_put_resv_regions_simple()

 drivers/iommu/amd_iommu.c| 11 +--
 drivers/iommu/arm-smmu-v3.c  | 11 +--
 drivers/iommu/arm-smmu.c | 11 +--
 drivers/iommu/intel-iommu.c  | 11 +--
 drivers/iommu/iommu.c| 19 +++
 drivers/iommu/virtio-iommu.c | 14 +++---
 include/linux/iommu.h|  2 ++
 7 files changed, 28 insertions(+), 51 deletions(-)

-- 
2.22.0



[PATCH 2/2] iommu: dma: Use of_iommu_get_resv_regions()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

For device tree nodes, use the standard of_iommu_get_resv_regions()
implementation to obtain the reserved memory regions associated with a
device.

Cc: Rob Herring 
Cc: Frank Rowand 
Cc: devicet...@vger.kernel.org
Signed-off-by: Thierry Reding 
---
 drivers/iommu/dma-iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index de68b4a02aea..31d48e55ab55 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -164,6 +165,8 @@ void iommu_dma_get_resv_regions(struct device *dev, struct 
list_head *list)
if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
iort_iommu_msi_get_resv_regions(dev, list);
 
+   if (dev->of_node)
+   of_iommu_get_resv_regions(dev, list);
 }
 EXPORT_SYMBOL(iommu_dma_get_resv_regions);
 
-- 
2.22.0



[PATCH 0/2] iommu: Support reserved-memory regions

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

These two patches implement support for retrieving a list of reserved
regions for a device from its device tree node. These regions are
described by the reserved-memory bindings:

Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt

These reserved memory regions will be used to establish 1:1 mappings.
One case where this is useful is when the Linux kernel wants to take
over the display controller configuration from a bootloader. In order
to ensure that the display controller can keep scanning out from the
framebuffer allocated by the bootloader without faulting after the
IOMMU has been enabled, a 1:1 mapping needs to be established.

Thierry

Thierry Reding (2):
  iommu: Implement of_iommu_get_resv_regions()
  iommu: dma: Use of_iommu_get_resv_regions()

 drivers/iommu/dma-iommu.c |  3 +++
 drivers/iommu/of_iommu.c  | 39 +++
 include/linux/of_iommu.h  |  8 
 3 files changed, 50 insertions(+)

-- 
2.22.0



[PATCH 1/2] iommu: Implement of_iommu_get_resv_regions()

2019-08-29 Thread Thierry Reding
From: Thierry Reding 

This is an implementation that IOMMU drivers can use to obtain reserved
memory regions from a device tree node. It uses the reserved-memory DT
bindings to find the regions associated with a given device. These
regions will be used to create 1:1 mappings in the IOMMU domain that
the devices will be attached to.

Cc: Rob Herring 
Cc: Frank Rowand 
Cc: devicet...@vger.kernel.org
Signed-off-by: Thierry Reding 
---
 drivers/iommu/of_iommu.c | 39 +++
 include/linux/of_iommu.h |  8 
 2 files changed, 47 insertions(+)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 614a93aa5305..0d47f626b854 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -225,3 +226,41 @@ const struct iommu_ops *of_iommu_configure(struct device 
*dev,
 
return ops;
 }
+
+/**
+ * of_iommu_get_resv_regions - reserved region driver helper for device tree
+ * @dev: device for which to get reserved regions
+ * @list: reserved region list
+ *
+ * IOMMU drivers can use this to implement their .get_resv_regions() callback
+ * for memory regions attached to a device tree node. See the reserved-memory
+ * device tree bindings on how to use these:
+ *
+ *   Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+ */
+void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
+{
+   struct of_phandle_iterator it;
+   int err;
+
+   of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) {
+   struct iommu_resv_region *region;
+   struct resource res;
+
+   err = of_address_to_resource(it.node, 0, &res);
+   if (err < 0) {
+   dev_err(dev, "failed to parse memory region %pOF: %d\n",
+   it.node, err);
+   continue;
+   }
+
+   region = iommu_alloc_resv_region(res.start, resource_size(&res),
+IOMMU_READ | IOMMU_WRITE,
+IOMMU_RESV_DIRECT_RELAXABLE);
+   if (!region)
+   continue;
+
+   list_add_tail(®ion->list, list);
+   }
+}
+EXPORT_SYMBOL(of_iommu_get_resv_regions);
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index f3d40dd7bb66..fa16b26f55bc 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -15,6 +15,9 @@ extern int of_get_dma_window(struct device_node *dn, const 
char *prefix,
 extern const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np);
 
+extern void of_iommu_get_resv_regions(struct device *dev,
+ struct list_head *list);
+
 #else
 
 static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
@@ -30,6 +33,11 @@ static inline const struct iommu_ops 
*of_iommu_configure(struct device *dev,
return NULL;
 }
 
+static inline void of_iommu_get_resv_regions(struct device *dev,
+struct list_head *list)
+{
+}
+
 #endif /* CONFIG_OF_IOMMU */
 
 #endif /* __OF_IOMMU_H */
-- 
2.22.0



Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-08-29 Thread Lu Baolu

Hi,

On 8/29/19 3:58 PM, Janusz Krzysztofik wrote:

Hi Baolu,

On Thursday, August 29, 2019 3:43:31 AM CEST Lu Baolu wrote:

Hi Janusz,

On 8/28/19 10:17 PM, Janusz Krzysztofik wrote:

We should avoid kernel panic when a intel_unmap() is called against
a non-existent domain.

Does that mean you suggest to replace
BUG_ON(!domain);
with something like
if (WARN_ON(!domain))
return;
and to not care of orphaned mappings left allocated?  Is there a way to

inform

users that their active DMA mappings are no longer valid and they

shouldn't

call dma_unmap_*()?


But we shouldn't expect the IOMMU driver not
cleaning up the domain info when a device remove notification comes and
wait until all file descriptors being closed, right?

Shouldn't then the IOMMU driver take care of cleaning up resources still
allocated on device remove before it invalidates and forgets their

pointers?




You are right. We need to wait until all allocated resources (iova and
mappings) to be released.

How about registering a callback for BUS_NOTIFY_UNBOUND_DRIVER, and
removing the domain info when the driver detachment completes?


Device core calls BUS_NOTIFY_UNBOUND_DRIVER on each driver unbind, regardless
of a device being removed or not.  As long as the device is not unplugged and
the BUS_NOTIFY_REMOVED_DEVICE notification not generated, an unbound driver is
not a problem here.
Morever, BUS_NOTIFY_UNBOUND_DRIVER  is called even before
BUS_NOTIFY_REMOVED_DEVICE so that wouldn't help anyway.
Last but not least, bus events are independent of the IOMMU driver use via
DMA-API it exposes.


Fair enough.



If keeping data for unplugged devices and reusing it on device re-plug is not
acceptable then maybe the IOMMU driver should perform reference counting of
its internal resources occupied by DMA-API users and perform cleanups on last
release?


I am not saying that keeping data is not acceptable. I just want to
check whether there are any other solutions.

Best regards,
Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH] iommu/vt-d: Fix IOMMU field not populated on device hot re-plug

2019-08-29 Thread Janusz Krzysztofik
Hi Baolu,

On Thursday, August 29, 2019 3:43:31 AM CEST Lu Baolu wrote:
> Hi Janusz,
> 
> On 8/28/19 10:17 PM, Janusz Krzysztofik wrote:
> >> We should avoid kernel panic when a intel_unmap() is called against
> >> a non-existent domain.
> > Does that mean you suggest to replace
> > BUG_ON(!domain);
> > with something like
> > if (WARN_ON(!domain))
> > return;
> > and to not care of orphaned mappings left allocated?  Is there a way to 
inform
> > users that their active DMA mappings are no longer valid and they 
shouldn't
> > call dma_unmap_*()?
> > 
> >> But we shouldn't expect the IOMMU driver not
> >> cleaning up the domain info when a device remove notification comes and
> >> wait until all file descriptors being closed, right?
> > Shouldn't then the IOMMU driver take care of cleaning up resources still
> > allocated on device remove before it invalidates and forgets their 
pointers?
> > 
> 
> You are right. We need to wait until all allocated resources (iova and
> mappings) to be released.
> 
> How about registering a callback for BUS_NOTIFY_UNBOUND_DRIVER, and
> removing the domain info when the driver detachment completes?

Device core calls BUS_NOTIFY_UNBOUND_DRIVER on each driver unbind, regardless 
of a device being removed or not.  As long as the device is not unplugged and 
the BUS_NOTIFY_REMOVED_DEVICE notification not generated, an unbound driver is 
not a problem here.
Morever, BUS_NOTIFY_UNBOUND_DRIVER  is called even before 
BUS_NOTIFY_REMOVED_DEVICE so that wouldn't help anyway.
Last but not least, bus events are independent of the IOMMU driver use via 
DMA-API it exposes.

If keeping data for unplugged devices and reusing it on device re-plug is not 
acceptable then maybe the IOMMU driver should perform reference counting of 
its internal resources occupied by DMA-API users and perform cleanups on last 
release?

Thanks,
Janusz


> > Thanks,
> > Janusz
> 
> Best regards,
> Baolu
> 




___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu