Re: [PATCH 2/7] iommu/dma-iommu: Split iommu_dma_map_msi_msg in two parts

2019-04-23 Thread Christoph Hellwig
On Thu, Apr 18, 2019 at 06:26:06PM +0100, Julien Grall wrote:
> +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
>  {
> + struct device *dev = msi_desc_to_dev(desc);
>   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
>   struct iommu_dma_cookie *cookie;
>   unsigned long flags;
>  
> + if (!domain || !domain->iova_cookie) {
> + desc->iommu_cookie = NULL;
> + return 0;
> + }
>  
>   cookie = domain->iova_cookie;
>  
> @@ -908,10 +908,33 @@ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
>* of an MSI from within an IPI handler.
>*/
>   spin_lock_irqsave(&cookie->msi_lock, flags);
> + desc->iommu_cookie = iommu_dma_get_msi_page(dev, msi_addr, domain);
>   spin_unlock_irqrestore(&cookie->msi_lock, flags);
>  
> + return (desc->iommu_cookie) ? 0 : -ENOMEM;

No need for the braces.  Also I personally find a:

if (!desc->iommu_cookie)
return -ENOMEM;
return 0;

much more readable, but that might just be personal preference.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 16/29] drm: Simplify stacktrace handling

2019-04-23 Thread Daniel Vetter
On Thu, Apr 18, 2019 at 10:41:35AM +0200, Thomas Gleixner wrote:
> Replace the indirection through struct stack_trace by using the storage
> array based interfaces.
> 
> The original code in all printing functions is really wrong. It allocates a
> storage array on stack which is unused because depot_fetch_stack() does not
> store anything in it. It overwrites the entries pointer in the stack_trace
> struct so it points to the depot storage.

Thanks for cleaning this up for us!

> Signed-off-by: Thomas Gleixner 
> Cc: intel-...@lists.freedesktop.org
> Cc: Joonas Lahtinen 
> Cc: Maarten Lankhorst 
> Cc: dri-de...@lists.freedesktop.org
> Cc: David Airlie 
> Cc: Jani Nikula 
> Cc: Daniel Vetter 
> Cc: Rodrigo Vivi 

Acked-by: Daniel Vetter 

for merging through whatever tree is convenient for you (or tell me I
should pick it up into drm-next when the prep work landed).

Cheers, Daniel

> ---
>  drivers/gpu/drm/drm_mm.c|   22 +++---
>  drivers/gpu/drm/i915/i915_vma.c |   11 ---
>  drivers/gpu/drm/i915/intel_runtime_pm.c |   21 +++--
>  3 files changed, 18 insertions(+), 36 deletions(-)
> 
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -106,22 +106,19 @@
>  static noinline void save_stack(struct drm_mm_node *node)
>  {
>   unsigned long entries[STACKDEPTH];
> - struct stack_trace trace = {
> - .entries = entries,
> - .max_entries = STACKDEPTH,
> - .skip = 1
> - };
> + unsigned int n;
>  
> - save_stack_trace(&trace);
> + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
>  
>   /* May be called under spinlock, so avoid sleeping */
> - node->stack = depot_save_stack(&trace, GFP_NOWAIT);
> + node->stack = stack_depot_save(entries, n, GFP_NOWAIT);
>  }
>  
>  static void show_leaks(struct drm_mm *mm)
>  {
>   struct drm_mm_node *node;
> - unsigned long entries[STACKDEPTH];
> + unsigned long *entries;
> + unsigned int nr_entries;
>   char *buf;
>  
>   buf = kmalloc(BUFSZ, GFP_KERNEL);
> @@ -129,19 +126,14 @@ static void show_leaks(struct drm_mm *mm
>   return;
>  
>   list_for_each_entry(node, drm_mm_nodes(mm), node_list) {
> - struct stack_trace trace = {
> - .entries = entries,
> - .max_entries = STACKDEPTH
> - };
> -
>   if (!node->stack) {
>   DRM_ERROR("node [%08llx + %08llx]: unknown owner\n",
> node->start, node->size);
>   continue;
>   }
>  
> - depot_fetch_stack(node->stack, &trace);
> - snprint_stack_trace(buf, BUFSZ, &trace, 0);
> + nr_entries = stack_depot_fetch(node->stack, &entries);
> + stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0);
>   DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s",
> node->start, node->size, buf);
>   }
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -36,11 +36,8 @@
>  
>  static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>  {
> - unsigned long entries[12];
> - struct stack_trace trace = {
> - .entries = entries,
> - .max_entries = ARRAY_SIZE(entries),
> - };
> + unsigned long *entries;
> + unsigned int nr_entries;
>   char buf[512];
>  
>   if (!vma->node.stack) {
> @@ -49,8 +46,8 @@ static void vma_print_allocator(struct i
>   return;
>   }
>  
> - depot_fetch_stack(vma->node.stack, &trace);
> - snprint_stack_trace(buf, sizeof(buf), &trace, 0);
> + nr_entries = stack_depot_fetch(vma->node.stack, &entries);
> + stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0);
>   DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n",
>vma->node.start, vma->node.size, reason, buf);
>  }
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -60,27 +60,20 @@
>  static noinline depot_stack_handle_t __save_depot_stack(void)
>  {
>   unsigned long entries[STACKDEPTH];
> - struct stack_trace trace = {
> - .entries = entries,
> - .max_entries = ARRAY_SIZE(entries),
> - .skip = 1,
> - };
> + unsigned int n;
>  
> - save_stack_trace(&trace);
> - return depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
> + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> + return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
>  }
>  
>  static void __print_depot_stack(depot_stack_handle_t stack,
>   char *buf, int sz, int indent)
>  {
> - unsigned long entries[STACKDEPTH];
> - struct stack_trace trace = {
> - .entries = entries,
> - .max_entries = ARRAY_SIZE(entries),
> -

Re: [PATCH v3 02/10] swiotlb: Factor out slot allocation and free

2019-04-23 Thread Lu Baolu

Hi Christoph,

On 4/23/19 2:12 PM, Christoph Hellwig wrote:

On Tue, Apr 23, 2019 at 09:58:19AM +0800, Lu Baolu wrote:

554 for (i = 0; i < nslots; i++)
555 io_tlb_orig_addr[index+i] = orig_addr + (i <<
IO_TLB_SHIFT);

Could the tlb orig address set to PAGE_ALIGN_DOWN(orig_addr)? We
couldn't assume the bounce buffer just starts from the beginning of the
slot. Or anything I missed?


I don't see why we need to align the orig_addr.  We only use
io_tlb_orig_addr to find the address(es) for the swiotlb_bounce calls,
and I don't see a good reason why we'd need to align those.



Let me show you an example. Normally, if IOMMU is on, the device DMAs
with an iova. IOMMU takes the responsibility to translate the iova to
the physical address in paging mode.

   Physical
   IOVA Buffer
.-.  .-.
| IOMMU   |  | IOMMU   |
| PAGE|  | PAGE|
.-. ---> .-.
| Buffer  |  | Buffer  |
'-'  '-'
| |  | |
| |  | |
'-'  '-'
.---.
| IOMMU |
'---'

When we add the bounce buffer between IOVA and physical buffer, the
bounced buffer must starts from the same offset in a page, otherwise,
IOMMU can't work here.


  Bouce Physical
   IOVA   Buffer Buffer
.-. .-.   .-.
| | .-> | Buffer  | <---. | |
| | |   '-' | | |
.-. |   | | | .-.
| Buffer  |NO   | |  YES  | Buffer  |
'-' | |   '-'
| | | |   | |
| | | |   | |
'-' '-'   '-'
   .---. .-.
   | IOMMU | | swiotlb |
   '---' '-'

A workable buffer location looks like below.


  Bouce Physical
   IOVA   Buffer Buffer
.-. .-.   .-.
| | | |   | |
| | | |   | |
.-. --->.-.<- .-.
| Buffer  |YES  | Buffer  |  YES  | Buffer  |
'-' '-'   '-'
| | | |   | |
| | | |   | |
'-' '-'   '-'
   .---. .-.
   | IOMMU | | swiotlb |
   '---' '-'

Best regards,
Lu Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH next 13/25] iommu/omap: Use dev_get_drvdata()

2019-04-23 Thread Kefeng Wang
Using dev_get_drvdata directly.

Cc: Joerg Roedel 
Cc: iommu@lists.linux-foundation.org
Signed-off-by: Kefeng Wang 
---
 drivers/iommu/omap-iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index d2fb347aa4ff..b16c711fc5fc 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -38,8 +38,7 @@
 
 static const struct iommu_ops omap_iommu_ops;
 
-#define to_iommu(dev)  \
-   ((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev)))
+#define to_iommu(dev)  ((struct omap_iommu *)dev_get_drvdata(dev))
 
 /* bitmap of the page sizes currently supported */
 #define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 08/10] iommu/vt-d: Check whether device requires bounce buffer

2019-04-23 Thread Lu Baolu

Hi,

On 4/23/19 2:08 PM, Christoph Hellwig wrote:

Again, this and the option should not be in a specific iommu driver.



The option of whether bounce is ignored should be in the specific iommu
driver.


Why?  As a user I could not care less which IOMMU driver my particular
system uses.



Looks reasonable to me. Let's listen to more opinions.

Best regards,
Lu Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 25/29] livepatch: Simplify stack trace retrieval

2019-04-23 Thread Miroslav Benes
On Thu, 18 Apr 2019, Thomas Gleixner wrote:

> Replace the indirection through struct stack_trace by using the storage
> array based interfaces.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Miroslav Benes 

Feel free to take it through tip or let us know to pick it up.

Miroslav
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Peter Zijlstra
On Mon, Apr 22, 2019 at 10:27:45AM -0300, Mauro Carvalho Chehab wrote:

>  .../{atomic_bitops.txt => atomic_bitops.rst}  |  2 +

What's happend to atomic_t.txt, also NAK, I still occationally touch
these files.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Julien Grall

Hi Thomas,

On 4/18/19 8:28 PM, Thomas Gleixner wrote:

On Thu, 18 Apr 2019, Julien Grall wrote:


When an MSI doorbell is located downstream of an IOMMU, it is required
to swizzle the physical address with an appropriately-mapped IOVA for any
device attached to one of our DMA ops domain.

At the moment, the allocation of the mapping may be done when composing
the message. However, the composing may be done in non-preemtible
context while the allocation requires to be called from preemptible
context.

A follow-up patch will split the current logic in two functions
requiring to keep an IOMMU cookie per MSI.

This patch introduces a new field in msi_desc to store an IOMMU cookie
when CONFIG_IOMMU_DMA is selected.


# git grep 'This patch' Documentation/process/

Applied to the whole series.


Sorry for that. I will rework all the commit messages and resend the series.

Cheers,

--
Julien Grall
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 12/21] dma-iommu: factor atomic pool allocations into helpers

2019-04-23 Thread Robin Murphy

On 2019-04-19 10:07 am, Christoph Hellwig wrote:

On Thu, Apr 18, 2019 at 05:41:00PM +0100, Robin Murphy wrote:

  From a very high level POV this looks ok, but sometimes a bit to
convoluted to me.  The major issue why I went with the version I posted
is that I can cleanly ifdef out the remap code in just a few sections.
In this version it is spread out a lot more, and the use of IS_ENABLED
means that we'd need a lot more stubs for functionality that won't
ever be called but needs to be compilable.


What functionality do you have planned in that regard? I did do a quick
build test of my arm64 config with DMA_DIRECT_REMAP hacked out, and
dma-iommu.o appeared to link OK (although other bits of arm64 and
dma-direct didn't, as expected). I will try x86 with IOMMU_DMA to make
sure, though.


Yeah, this seems to actually work, there just is a huge chunk of
remapping that is hopefully discarded by the compiler even without the
ifdefs.


Right, the major point of this approach is that you don't need stubs; 
indeed, stubs themselves fall into the category of "#ifdefed code I'd 
prefer to avoid". The preprocessing essentially resolves to:


if (0)
function_that_doesnt_exist();

so compilation treats it as an external reference, but since constant 
folding ends up eliding the call, that symbol isn't referenced in the 
final object, so linking never has to resolve it. All it needs is a 
declaration to avoid a compiler warning, but that's the same declaration 
that's needed anyway for when the function does exist. Similarly, static 
functions like iommu_dma_alloc_remap() still get compiled - so we don't 
lose coverage - but then get discarded at the link stage (via 
gc-sections) since they end up unreferenced. It's really pretty neat.


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 12/21] dma-iommu: factor atomic pool allocations into helpers

2019-04-23 Thread Robin Murphy

On 19/04/2019 09:23, Christoph Hellwig wrote:

On Thu, Apr 18, 2019 at 07:15:00PM +0100, Robin Murphy wrote:

Still, I've worked in the vm_map_pages() stuff pending in MM and given them
the same treatment to finish the picture. Both x86_64_defconfig and
i386_defconfig do indeed compile and link fine as I expected, so I really
would like to understand the concern around #ifdefs better.


This looks generally fine to me.  One thing I'd like to do is to
generally make use of the fact that __iommu_dma_get_pages returns NULL
for the force contigous case as that cleans up a few things.  Also
for the !DMA_REMAP case we need to try the page allocator when
dma_alloc_from_contiguous does not return a page.  What do you thing
of the following incremental diff?  If that is fine with you I can
fold that in and add back in the remaining patches from my series
not obsoleted by your patches and resend.


Wouldn't this suffice? Since we also use alloc_pages() in the coherent 
atomic case, the free path should already be able to deal with it.


Let me take a proper look at v3 and see how it all looks in context.

Robin.

->8-
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 1bc8d1de1a1d..0a02ddc27862 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -944,6 +944,8 @@ static void *iommu_dma_alloc(struct device *dev, 
size_t size,

   (attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
page = dma_alloc_from_contiguous(dev, count, page_order,
 gfp & __GFP_NOWARN);
+   if (!page)
+   page = alloc_pages(gfp, page_order);
} else {
return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs);
}
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Marc Zyngier
Hi Julien,

On 18/04/2019 18:26, Julien Grall wrote:
> When an MSI doorbell is located downstream of an IOMMU, it is required
> to swizzle the physical address with an appropriately-mapped IOVA for any
> device attached to one of our DMA ops domain.
> 
> At the moment, the allocation of the mapping may be done when composing
> the message. However, the composing may be done in non-preemtible
> context while the allocation requires to be called from preemptible
> context.
> 
> A follow-up patch will split the current logic in two functions
> requiring to keep an IOMMU cookie per MSI.
> 
> This patch introduces a new field in msi_desc to store an IOMMU cookie
> when CONFIG_IOMMU_DMA is selected.
> 
> Signed-off-by: Julien Grall 
> ---
>  include/linux/msi.h | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 7e9b81c3b50d..d7907feef1bb 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -77,6 +77,9 @@ struct msi_desc {
>   struct device   *dev;
>   struct msi_msg  msg;
>   struct irq_affinity_desc*affinity;
> +#ifdef CONFIG_IOMMU_DMA
> + const void  *iommu_cookie;
> +#endif
>  
>   union {
>   /* PCI MSI/X specific data */
> 

Given that this is the only member in this structure that is dependent
on a config option, you could also add a couple of accessors that would
do nothing when IOMMU_DMA is not selected (and use that in the DMA code).

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Julien Grall

On 4/23/19 11:23 AM, Marc Zyngier wrote:

Hi Julien,


Hi Marc,


On 18/04/2019 18:26, Julien Grall wrote:

When an MSI doorbell is located downstream of an IOMMU, it is required
to swizzle the physical address with an appropriately-mapped IOVA for any
device attached to one of our DMA ops domain.

At the moment, the allocation of the mapping may be done when composing
the message. However, the composing may be done in non-preemtible
context while the allocation requires to be called from preemptible
context.

A follow-up patch will split the current logic in two functions
requiring to keep an IOMMU cookie per MSI.

This patch introduces a new field in msi_desc to store an IOMMU cookie
when CONFIG_IOMMU_DMA is selected.

Signed-off-by: Julien Grall 
---
  include/linux/msi.h | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 7e9b81c3b50d..d7907feef1bb 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -77,6 +77,9 @@ struct msi_desc {
struct device   *dev;
struct msi_msg  msg;
struct irq_affinity_desc*affinity;
+#ifdef CONFIG_IOMMU_DMA
+   const void  *iommu_cookie;
+#endif
  
  	union {

/* PCI MSI/X specific data */



Given that this is the only member in this structure that is dependent
on a config option, you could also add a couple of accessors that would
do nothing when IOMMU_DMA is not selected (and use that in the DMA code).


I haven't seen any use of the helpers so far because the DMA code is 
also protected by IOMMU_DMA.


I can add the helpers in the next version if you see any use outside of 
the DMA code.


Cheers,

--
Julien Grall
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 10/18] iommu/vt-d: Add custom allocator for IOASID

2019-04-23 Thread Jean-Philippe Brucker
On 19/04/2019 05:29, Jacob Pan wrote:
> If it is OK with you, I will squash my changes into your ioasid patch
> and address the review comments in the v2 of this set, OK?
> i.e. 
> [PATCH 02/18] ioasid: Add custom IOASID allocator
> [PATCH 03/18] ioasid: Convert ioasid_idr to XArray

That's fine by me, although the "base" and "custom" patches are already
relatively large, so it might make sense to keep them separate

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] xen/swiotlb: fix an issue and improve swiotlb-xen

2019-04-23 Thread Juergen Gross
While hunting an issue in swiotlb-xen I stumbled over a wrong test
and found some areas for improvement.

Juergen Gross (3):
  xen/swiotlb: fix condition for calling xen_destroy_contiguous_region()
  xen/swiotlb: simplify range_straddles_page_boundary()
  xen/swiotlb: remember having called xen_create_contiguous_region()

 drivers/xen/swiotlb-xen.c | 37 -
 1 file changed, 12 insertions(+), 25 deletions(-)

-- 
2.16.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] xen/swiotlb: simplify range_straddles_page_boundary()

2019-04-23 Thread Juergen Gross
range_straddles_page_boundary() is open coding several macros from
include/xen/page.h. Use those instead. Additionally there is no need
to have check_pages_physically_contiguous() as a separate function as
it is used only once, so merge it into range_straddles_page_boundary().

Signed-off-by: Juergen Gross 
---
 drivers/xen/swiotlb-xen.c | 28 ++--
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 42a3924e6d91..43b6e65ae256 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -92,34 +92,18 @@ static inline dma_addr_t xen_virt_to_bus(void *address)
return xen_phys_to_bus(virt_to_phys(address));
 }
 
-static int check_pages_physically_contiguous(unsigned long xen_pfn,
-unsigned int offset,
-size_t length)
+static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
 {
-   unsigned long next_bfn;
-   int i;
-   int nr_pages;
+   unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
+   unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size);
 
next_bfn = pfn_to_bfn(xen_pfn);
-   nr_pages = (offset + length + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT;
 
-   for (i = 1; i < nr_pages; i++) {
+   for (i = 1; i < nr_pages; i++)
if (pfn_to_bfn(++xen_pfn) != ++next_bfn)
-   return 0;
-   }
-   return 1;
-}
+   return 1;
 
-static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
-{
-   unsigned long xen_pfn = XEN_PFN_DOWN(p);
-   unsigned int offset = p & ~XEN_PAGE_MASK;
-
-   if (offset + size <= XEN_PAGE_SIZE)
-   return 0;
-   if (check_pages_physically_contiguous(xen_pfn, offset, size))
-   return 0;
-   return 1;
+   return 0;
 }
 
 static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
-- 
2.16.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] xen/swiotlb: fix condition for calling xen_destroy_contiguous_region()

2019-04-23 Thread Juergen Gross
The condition in xen_swiotlb_free_coherent() for deciding whether to
call xen_destroy_contiguous_region() is wrong: in case the region to
be freed is not contiguous calling xen_destroy_contiguous_region() is
the wrong thing to do: it would result in inconsistent mappings of
multiple PFNs to the same MFN. This will lead to various strange
crashes or data corruption.

Instead of calling xen_destroy_contiguous_region() in that case a
warning should be issued as that situation should never occur.

Cc: sta...@vger.kernel.org
Signed-off-by: Juergen Gross 
---
 drivers/xen/swiotlb-xen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 877baf2a94f4..42a3924e6d91 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -360,8 +360,8 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
size, void *vaddr,
/* Convert the size to actually allocated. */
size = 1UL << (order + XEN_PAGE_SHIFT);
 
-   if (((dev_addr + size - 1 <= dma_mask)) ||
-   range_straddles_page_boundary(phys, size))
+   if ((dev_addr + size - 1 <= dma_mask) &&
+   !WARN_ON(range_straddles_page_boundary(phys, size)))
xen_destroy_contiguous_region(phys, order);
 
xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
-- 
2.16.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/7] iommu/dma-iommu: Split iommu_dma_map_msi_msg in two parts

2019-04-23 Thread Marc Zyngier
On 18/04/2019 18:26, Julien Grall wrote:
> On RT, the function iommu_dma_map_msi_msg may be called from
> non-preemptible context. This will lead to a splat with
> CONFIG_DEBUG_ATOMIC_SLEEP as the function is using spin_lock
> (they can sleep on RT).
> 
> The function iommu_dma_map_msi_msg is used to map the MSI page in the
> IOMMU PT and update the MSI message with the IOVA.
> 
> Only the part to lookup for the MSI page requires to be called in
> preemptible context. As the MSI page cannot change over the lifecycle
> of the MSI interrupt, the lookup can be cached and re-used later on.
> 
> This patch split the function iommu_dma_map_msi_msg in two new
> functions:
> - iommu_dma_prepare_msi: This function will prepare the mapping in
> the IOMMU and store the cookie in the structure msi_desc. This
> function should be called in preemptible context.
> - iommu_dma_compose_msi_msg: This function will update the MSI
> message with the IOVA when the device is behind an IOMMU.
> 
> Signed-off-by: Julien Grall 
> ---
>  drivers/iommu/dma-iommu.c | 43 ---
>  include/linux/dma-iommu.h | 21 +
>  2 files changed, 53 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 77aabe637a60..f5c1f1685095 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -888,17 +888,17 @@ static struct iommu_dma_msi_page 
> *iommu_dma_get_msi_page(struct device *dev,
>   return NULL;
>  }
>  
> -void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
> +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)

I quite like the idea of moving from having an irq to having an msi_desc
passed to the IOMMU layer...

>  {
> - struct device *dev = msi_desc_to_dev(irq_get_msi_desc(irq));
> + struct device *dev = msi_desc_to_dev(desc);
>   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
>   struct iommu_dma_cookie *cookie;
> - struct iommu_dma_msi_page *msi_page;
> - phys_addr_t msi_addr = (u64)msg->address_hi << 32 | msg->address_lo;
>   unsigned long flags;
>  
> - if (!domain || !domain->iova_cookie)
> - return;
> + if (!domain || !domain->iova_cookie) {
> + desc->iommu_cookie = NULL;
> + return 0;
> + }
>  
>   cookie = domain->iova_cookie;
>  
> @@ -908,10 +908,33 @@ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
>* of an MSI from within an IPI handler.
>*/
>   spin_lock_irqsave(&cookie->msi_lock, flags);
> - msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
> + desc->iommu_cookie = iommu_dma_get_msi_page(dev, msi_addr, domain);
>   spin_unlock_irqrestore(&cookie->msi_lock, flags);
>  
> - if (WARN_ON(!msi_page)) {
> + return (desc->iommu_cookie) ? 0 : -ENOMEM;
> +}
> +
> +void iommu_dma_compose_msi_msg(int irq, struct msi_msg *msg)

... but I'd like it even better if it was uniform. Can you please move
the irq_get_msi_desc() to the callers of iommu_dma_compose_msi_msg(),
and make both functions take a msi_desc?

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] xen/swiotlb: remember having called xen_create_contiguous_region()

2019-04-23 Thread Juergen Gross
Instead of always calling xen_destroy_contiguous_region() in case the
memory is DMA-able for the used device, do so only in case it has been
made DMA-able via xen_create_contiguous_region() before.

This will avoid a lot of xen_destroy_contiguous_region() calls for
64-bit capable devices.

As the memory in question is owned by swiotlb-xen the PG_owner_priv_1
flag of the first allocated page can be used for remembering.

Signed-off-by: Juergen Gross 
---
 drivers/xen/swiotlb-xen.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 43b6e65ae256..a72f181d8e20 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -321,6 +321,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
size,
xen_free_coherent_pages(hwdev, size, ret, 
(dma_addr_t)phys, attrs);
return NULL;
}
+   SetPageOwnerPriv1(virt_to_page(ret));
}
memset(ret, 0, size);
return ret;
@@ -344,9 +345,11 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
size, void *vaddr,
/* Convert the size to actually allocated. */
size = 1UL << (order + XEN_PAGE_SHIFT);
 
-   if ((dev_addr + size - 1 <= dma_mask) &&
-   !WARN_ON(range_straddles_page_boundary(phys, size)))
-   xen_destroy_contiguous_region(phys, order);
+   if (PageOwnerPriv1(virt_to_page(vaddr))) {
+   if (!WARN_ON(range_straddles_page_boundary(phys, size)))
+   xen_destroy_contiguous_region(phys, order);
+   ClearPageOwnerPriv1(virt_to_page(vaddr));
+   }
 
xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
 }
-- 
2.16.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/7] iommu/dma-iommu: Split iommu_dma_map_msi_msg in two parts

2019-04-23 Thread Julien Grall

Hi,

On 4/23/19 8:08 AM, Christoph Hellwig wrote:

On Thu, Apr 18, 2019 at 06:26:06PM +0100, Julien Grall wrote:

+int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
  {
+   struct device *dev = msi_desc_to_dev(desc);
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_dma_cookie *cookie;
unsigned long flags;
  
+	if (!domain || !domain->iova_cookie) {

+   desc->iommu_cookie = NULL;
+   return 0;
+   }
  
  	cookie = domain->iova_cookie;
  
@@ -908,10 +908,33 @@ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)

 * of an MSI from within an IPI handler.
 */
spin_lock_irqsave(&cookie->msi_lock, flags);
+   desc->iommu_cookie = iommu_dma_get_msi_page(dev, msi_addr, domain);
spin_unlock_irqrestore(&cookie->msi_lock, flags);
  
+	return (desc->iommu_cookie) ? 0 : -ENOMEM;


No need for the braces.  Also I personally find a:

if (!desc->iommu_cookie)
return -ENOMEM;
return 0;

much more readable, but that might just be personal preference.


I am happy either way. I will use your suggestion in the next version.

Cheers,

--
Julien Grall
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/1] iommu/arm-smmu: Log CBFRSYNRA register on context fault

2019-04-23 Thread Robin Murphy

On 22/04/2019 08:10, Vivek Gautam wrote:

Bits[15:0] in CBFRSYNRA register contain information about
StreamID of the incoming transaction that generated the
fault. Dump CBFRSYNRA register to get this info.
This is specially useful in a distributed SMMU architecture
where multiple masters are connected to the SMMU.
SID information helps to quickly identify the faulting
master device.

Signed-off-by: Vivek Gautam 
Reviewed-by: Bjorn Andersson 
---

Changes since v1:
  - Addressed review comments, given by Bjorn, for nits.

  drivers/iommu/arm-smmu-regs.h | 2 ++
  drivers/iommu/arm-smmu.c  | 7 +--
  2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
index a1226e4ab5f8..e9132a926761 100644
--- a/drivers/iommu/arm-smmu-regs.h
+++ b/drivers/iommu/arm-smmu-regs.h
@@ -147,6 +147,8 @@ enum arm_smmu_s2cr_privcfg {
  #define CBAR_IRPTNDX_SHIFT24
  #define CBAR_IRPTNDX_MASK 0xff
  
+#define ARM_SMMU_GR1_CBFRSYNRA(n)	(0x400 + ((n) << 2))

+
  #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
  #define CBA2R_RW64_32BIT  (0 << 0)
  #define CBA2R_RW64_64BIT  (1 << 0)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 045d93884164..e000473f8205 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -575,7 +575,9 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
+   void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
void __iomem *cb_base;
+   u32 cbfrsynra;


Nit: I would simply add to the existing "u32 fsr, fsynr;" declaration, 
but that's the sort of thing that could hopefully be fixed up when 
applying (or otherwise I might bulldoze it anyway in my eventual rework 
of register accesses throughout the driver). Regardless,


Reviewed-by: Robin Murphy 


cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
@@ -585,10 +587,11 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
  
  	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);

iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+   cbfrsynra = readl_relaxed(gr1_base + 
ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
  
  	dev_err_ratelimited(smmu->dev,

-   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-   fsr, iova, fsynr, cfg->cbndx);
+   "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
cbfrsynra=0x%x, cb=%d\n",
+   fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
  
  	writel(fsr, cb_base + ARM_SMMU_CB_FSR);

return IRQ_HANDLED;


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/1] iommu/arm-smmu: Log CBFRSYNRA register on context fault

2019-04-23 Thread Ard Biesheuvel
On Tue, 23 Apr 2019 at 13:13, Robin Murphy  wrote:
>
> On 22/04/2019 08:10, Vivek Gautam wrote:
> > Bits[15:0] in CBFRSYNRA register contain information about
> > StreamID of the incoming transaction that generated the
> > fault. Dump CBFRSYNRA register to get this info.
> > This is specially useful in a distributed SMMU architecture
> > where multiple masters are connected to the SMMU.
> > SID information helps to quickly identify the faulting
> > master device.
> >
> > Signed-off-by: Vivek Gautam 
> > Reviewed-by: Bjorn Andersson 
> > ---
> >
> > Changes since v1:
> >   - Addressed review comments, given by Bjorn, for nits.
> >
> >   drivers/iommu/arm-smmu-regs.h | 2 ++
> >   drivers/iommu/arm-smmu.c  | 7 +--
> >   2 files changed, 7 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
> > index a1226e4ab5f8..e9132a926761 100644
> > --- a/drivers/iommu/arm-smmu-regs.h
> > +++ b/drivers/iommu/arm-smmu-regs.h
> > @@ -147,6 +147,8 @@ enum arm_smmu_s2cr_privcfg {
> >   #define CBAR_IRPTNDX_SHIFT  24
> >   #define CBAR_IRPTNDX_MASK   0xff
> >
> > +#define ARM_SMMU_GR1_CBFRSYNRA(n)(0x400 + ((n) << 2))
> > +
> >   #define ARM_SMMU_GR1_CBA2R(n)   (0x800 + ((n) << 2))
> >   #define CBA2R_RW64_32BIT(0 << 0)
> >   #define CBA2R_RW64_64BIT(1 << 0)
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > index 045d93884164..e000473f8205 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -575,7 +575,9 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
> > *dev)
> >   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> >   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >   struct arm_smmu_device *smmu = smmu_domain->smmu;
> > + void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
> >   void __iomem *cb_base;
> > + u32 cbfrsynra;
>
> Nit: I would simply add to the existing "u32 fsr, fsynr;" declaration,
> but that's the sort of thing that could hopefully be fixed up when
> applying (or otherwise I might bulldoze it anyway in my eventual rework
> of register accesses throughout the driver). Regardless,
>
> Reviewed-by: Robin Murphy 
>
> >   cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
> >   fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
> > @@ -585,10 +587,11 @@ static irqreturn_t arm_smmu_context_fault(int irq, 
> > void *dev)
> >
> >   fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
> >   iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
> > + cbfrsynra = readl_relaxed(gr1_base + 
> > ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
> >
> >   dev_err_ratelimited(smmu->dev,
> > - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
> > cb=%d\n",
> > - fsr, iova, fsynr, cfg->cbndx);
> > + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, 
> > cbfrsynra=0x%x, cb=%d\n",
> > + fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
> >
> >   writel(fsr, cb_base + ARM_SMMU_CB_FSR);
> >   return IRQ_HANDLED;
> >
>

This is something I've had to hack up locally in the past, so

Acked-by: Ard Biesheuvel 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/1] iommu/arm-smmu: Log CBFRSYNRA register on context fault

2019-04-23 Thread Will Deacon
On Mon, Apr 22, 2019 at 12:40:36PM +0530, Vivek Gautam wrote:
> Bits[15:0] in CBFRSYNRA register contain information about
> StreamID of the incoming transaction that generated the
> fault. Dump CBFRSYNRA register to get this info.
> This is specially useful in a distributed SMMU architecture
> where multiple masters are connected to the SMMU.
> SID information helps to quickly identify the faulting
> master device.
> 
> Signed-off-by: Vivek Gautam 
> Reviewed-by: Bjorn Andersson 
> ---
> 
> Changes since v1:
>  - Addressed review comments, given by Bjorn, for nits.

Thanks, applied with minor fixup from Robin and Acks.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 0/9] Add PCI ATS support to Arm SMMUv3

2019-04-23 Thread Will Deacon
On Wed, Apr 17, 2019 at 07:24:39PM +0100, Jean-Philippe Brucker wrote:
> This series enables PCI ATS in SMMUv3. Changes since v2 [1]:
> 
> * Fix build failure when building arm-smmu-v3 without CONFIG_PCI
>   Patches 1 and 2 are new.
> 
> * Only enable ATS if the root complex supports it. For the moment, only
>   IORT provides this information. I have patches for devicetree but
>   they are less mature and I'd rather make it a separate series.
> 
> * Tried to address most comments. I'll see if I can improve the firmware
>   code when adding devicetree support (see [2]).
> 
> Note that there is a small conflict with the SVA API. This series
> applies on top of Joerg's api-features branch for v5.2.

I'll pick this up via the SMMU queue, since the conflict with api-features
is trivial to resolve.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] arm64: swiotlb: cma_alloc error spew

2019-04-23 Thread Robin Murphy

On 17/04/2019 21:48, dann frazier wrote:

hey,
   I'm seeing an issue on a couple of arm64 systems[*] where they spew
~10K "cma: cma_alloc: alloc failed" messages at boot. The errors are
non-fatal, and bumping up cma to a large enough size (~128M) gets rid
of them - but that seems suboptimal. Bisection shows that this started
after commit fafadcd16595 ("swiotlb: don't dip into swiotlb pool for
coherent allocations"). It looks like __dma_direct_alloc_pages()
is opportunistically using CMA memory but falls back to non-CMA if CMA
disabled or unavailable. I've demonstrated that this fallback is
indeed returning a valid pointer. So perhaps the issue is really just
the warning emission.


The CMA area being full isn't necessarily an ignorable non-problem, 
since it means you won't be able to allocate the kind of large buffers 
for which CMA was intended. The question is, is it actually filling up 
with allocations that deserve to be there, or is this the same as I've 
seen on a log from a ThunderX2 system where it's getting exhausted by 
thousands upon thousands of trivial single page allocations? If it's the 
latter (CONFIG_CMA_DEBUG should help shed some light if necessary), then 
that does lean towards spending a bit more effort on this idea:


https://lore.kernel.org/lkml/20190327080821.gb20...@lst.de/

Robin.


The following naive patch solves the problem for me - just silence the
cma errors, since it looks like a soft error. But is there a better
approach?

[*] APM X-Gene & HiSilicon Hi1620 w/ SMMU disabled

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 6310ad01f915b..0324aa606c173 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -112,7 +112,7 @@ struct page *__dma_direct_alloc_pages(struct device *dev, 
size_t size,
 /* CMA can be used only in the context which permits sleeping */
 if (gfpflags_allow_blocking(gfp)) {
 page = dma_alloc_from_contiguous(dev, count, page_order,
-gfp & __GFP_NOWARN);
+true);
 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
 dma_release_from_contiguous(dev, page, count);
 page = NULL;





___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Marc Zyngier
On 23/04/2019 11:51, Julien Grall wrote:
> On 4/23/19 11:23 AM, Marc Zyngier wrote:
>> Hi Julien,
> 
> Hi Marc,
> 
>> On 18/04/2019 18:26, Julien Grall wrote:
>>> When an MSI doorbell is located downstream of an IOMMU, it is required
>>> to swizzle the physical address with an appropriately-mapped IOVA for any
>>> device attached to one of our DMA ops domain.
>>>
>>> At the moment, the allocation of the mapping may be done when composing
>>> the message. However, the composing may be done in non-preemtible
>>> context while the allocation requires to be called from preemptible
>>> context.
>>>
>>> A follow-up patch will split the current logic in two functions
>>> requiring to keep an IOMMU cookie per MSI.
>>>
>>> This patch introduces a new field in msi_desc to store an IOMMU cookie
>>> when CONFIG_IOMMU_DMA is selected.
>>>
>>> Signed-off-by: Julien Grall 
>>> ---
>>>   include/linux/msi.h | 3 +++
>>>   1 file changed, 3 insertions(+)
>>>
>>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>>> index 7e9b81c3b50d..d7907feef1bb 100644
>>> --- a/include/linux/msi.h
>>> +++ b/include/linux/msi.h
>>> @@ -77,6 +77,9 @@ struct msi_desc {
>>> struct device   *dev;
>>> struct msi_msg  msg;
>>> struct irq_affinity_desc*affinity;
>>> +#ifdef CONFIG_IOMMU_DMA
>>> +   const void  *iommu_cookie;
>>> +#endif
>>>   
>>> union {
>>> /* PCI MSI/X specific data */
>>>
>>
>> Given that this is the only member in this structure that is dependent
>> on a config option, you could also add a couple of accessors that would
>> do nothing when IOMMU_DMA is not selected (and use that in the DMA code).
> 
> I haven't seen any use of the helpers so far because the DMA code is 
> also protected by IOMMU_DMA.
> 
> I can add the helpers in the next version if you see any use outside of 
> the DMA code.

There may not be any user user yet, but I'd surely like to see the
accessors. This isn't very different from the stub functions you add in
patch #2.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: revert dma direct internals abuse

2019-04-23 Thread Thomas Hellstrom via iommu
Hi, Christoph,

On Mon, 2019-04-22 at 19:56 +0200, h...@lst.de wrote:
> On Wed, Apr 10, 2019 at 03:01:14PM +, Thomas Hellstrom wrote:
> > > So can you please respin a version acceptable to you and submit
> > > it
> > > for 5.1 ASAP?  Otherwise I'll need to move ahead with the simple
> > > revert.
> > 
> > I will. 
> > I need to do some testing to investigate how to best choose between
> > the
> > options, but will have something ready for 5.1.
> 
> I still don't see anything in -rc6..

Been on easter vacation. I have a patch ready for review, though, will
send it out in a moment.

It turns out that to do something well-behaved in case someone sets
swiotlb=force, the variable swiotlb_force would need to be exported.
(I can't rely on swiotlb_nr_tbl()).

That's not currently done in the patch, but instead the driver just
malfunctions like most other graphics drivers.

/Thomas

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Mike Snitzer
On Tue, Apr 23 2019 at  4:31am -0400,
Peter Zijlstra  wrote:

> On Mon, Apr 22, 2019 at 10:27:45AM -0300, Mauro Carvalho Chehab wrote:
> 
> >  .../{atomic_bitops.txt => atomic_bitops.rst}  |  2 +
> 
> What's happend to atomic_t.txt, also NAK, I still occationally touch
> these files.

Seems Mauro's point is in the future we need to touch these .rst files
in terms of ReST compatible changes.

I'm dreading DM documentation changes in the future.. despite Mauro and
Jon Corbet informing me that ReST is simple, etc.

Mike
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Peter Zijlstra
On Tue, Apr 23, 2019 at 08:55:19AM -0400, Mike Snitzer wrote:
> On Tue, Apr 23 2019 at  4:31am -0400,
> Peter Zijlstra  wrote:
> 
> > On Mon, Apr 22, 2019 at 10:27:45AM -0300, Mauro Carvalho Chehab wrote:
> > 
> > >  .../{atomic_bitops.txt => atomic_bitops.rst}  |  2 +
> > 
> > What's happend to atomic_t.txt, also NAK, I still occationally touch
> > these files.
> 
> Seems Mauro's point is in the future we need to touch these .rst files
> in terms of ReST compatible changes.
> 
> I'm dreading DM documentation changes in the future.. despite Mauro and
> Jon Corbet informing me that ReST is simple, etc.

Well, it _can_ be simple, I've seen examples of rst that were not far
from generated HTML contents. And I must give Jon credit for not
accepting that atrocious crap.

But yes, I have 0 motivation to learn or abide by rst. It simply doesn't
give me anything in return. There is no upside, only worse text files :/
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Robin Murphy

On 23/04/2019 12:46, Marc Zyngier wrote:

On 23/04/2019 11:51, Julien Grall wrote:

On 4/23/19 11:23 AM, Marc Zyngier wrote:

Hi Julien,


Hi Marc,


On 18/04/2019 18:26, Julien Grall wrote:

When an MSI doorbell is located downstream of an IOMMU, it is required
to swizzle the physical address with an appropriately-mapped IOVA for any
device attached to one of our DMA ops domain.

At the moment, the allocation of the mapping may be done when composing
the message. However, the composing may be done in non-preemtible
context while the allocation requires to be called from preemptible
context.

A follow-up patch will split the current logic in two functions
requiring to keep an IOMMU cookie per MSI.

This patch introduces a new field in msi_desc to store an IOMMU cookie
when CONFIG_IOMMU_DMA is selected.

Signed-off-by: Julien Grall 
---
   include/linux/msi.h | 3 +++
   1 file changed, 3 insertions(+)

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 7e9b81c3b50d..d7907feef1bb 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -77,6 +77,9 @@ struct msi_desc {
struct device   *dev;
struct msi_msg  msg;
struct irq_affinity_desc*affinity;
+#ifdef CONFIG_IOMMU_DMA
+   const void  *iommu_cookie;
+#endif
   
   	union {

/* PCI MSI/X specific data */



Given that this is the only member in this structure that is dependent
on a config option, you could also add a couple of accessors that would
do nothing when IOMMU_DMA is not selected (and use that in the DMA code).


I haven't seen any use of the helpers so far because the DMA code is
also protected by IOMMU_DMA.

I can add the helpers in the next version if you see any use outside of
the DMA code.


There may not be any user user yet, but I'd surely like to see the
accessors. This isn't very different from the stub functions you add in
patch #2.


If you foresee this being useful in general, do you reckon it would be 
worth decoupling it under its own irqchip-layer Kconfig which can then 
be selected by IOMMU_DMA?


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Mike Snitzer
On Tue, Apr 23 2019 at  9:01am -0400,
Peter Zijlstra  wrote:

> On Tue, Apr 23, 2019 at 08:55:19AM -0400, Mike Snitzer wrote:
> > On Tue, Apr 23 2019 at  4:31am -0400,
> > Peter Zijlstra  wrote:
> > 
> > > On Mon, Apr 22, 2019 at 10:27:45AM -0300, Mauro Carvalho Chehab wrote:
> > > 
> > > >  .../{atomic_bitops.txt => atomic_bitops.rst}  |  2 +
> > > 
> > > What's happend to atomic_t.txt, also NAK, I still occationally touch
> > > these files.
> > 
> > Seems Mauro's point is in the future we need to touch these .rst files
> > in terms of ReST compatible changes.
> > 
> > I'm dreading DM documentation changes in the future.. despite Mauro and
> > Jon Corbet informing me that ReST is simple, etc.
> 
> Well, it _can_ be simple, I've seen examples of rst that were not far
> from generated HTML contents. And I must give Jon credit for not
> accepting that atrocious crap.
> 
> But yes, I have 0 motivation to learn or abide by rst. It simply doesn't
> give me anything in return. There is no upside, only worse text files :/

Right, but these changes aren't meant for our benefit.  They are for
users who get cleaner web accessible Linux kernel docs.  Seems the
decision has been made that the users' benefit, and broader
modernization of Linux docs, outweighs the inconvenience for engineers
who maintain the content of said documentation.

This kind of thing happens a lot these days: pile on engineers, they can
take it :/
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Marc Zyngier
On 23/04/2019 14:19, Robin Murphy wrote:
> On 23/04/2019 12:46, Marc Zyngier wrote:
>> On 23/04/2019 11:51, Julien Grall wrote:
>>> On 4/23/19 11:23 AM, Marc Zyngier wrote:
 Hi Julien,
>>>
>>> Hi Marc,
>>>
 On 18/04/2019 18:26, Julien Grall wrote:
> When an MSI doorbell is located downstream of an IOMMU, it is required
> to swizzle the physical address with an appropriately-mapped IOVA for any
> device attached to one of our DMA ops domain.
>
> At the moment, the allocation of the mapping may be done when composing
> the message. However, the composing may be done in non-preemtible
> context while the allocation requires to be called from preemptible
> context.
>
> A follow-up patch will split the current logic in two functions
> requiring to keep an IOMMU cookie per MSI.
>
> This patch introduces a new field in msi_desc to store an IOMMU cookie
> when CONFIG_IOMMU_DMA is selected.
>
> Signed-off-by: Julien Grall 
> ---
>include/linux/msi.h | 3 +++
>1 file changed, 3 insertions(+)
>
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 7e9b81c3b50d..d7907feef1bb 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -77,6 +77,9 @@ struct msi_desc {
>   struct device   *dev;
>   struct msi_msg  msg;
>   struct irq_affinity_desc*affinity;
> +#ifdef CONFIG_IOMMU_DMA
> + const void  *iommu_cookie;
> +#endif
>
>   union {
>   /* PCI MSI/X specific data */
>

 Given that this is the only member in this structure that is dependent
 on a config option, you could also add a couple of accessors that would
 do nothing when IOMMU_DMA is not selected (and use that in the DMA code).
>>>
>>> I haven't seen any use of the helpers so far because the DMA code is
>>> also protected by IOMMU_DMA.
>>>
>>> I can add the helpers in the next version if you see any use outside of
>>> the DMA code.
>>
>> There may not be any user user yet, but I'd surely like to see the
>> accessors. This isn't very different from the stub functions you add in
>> patch #2.
> 
> If you foresee this being useful in general, do you reckon it would be 
> worth decoupling it under its own irqchip-layer Kconfig which can then 
> be selected by IOMMU_DMA?

I think that'd be a useful thing to do, as most architectures do not
require this dynamic mapping of MSIs.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/3] xen/swiotlb: fix condition for calling xen_destroy_contiguous_region()

2019-04-23 Thread Boris Ostrovsky
On 4/23/19 6:54 AM, Juergen Gross wrote:
> The condition in xen_swiotlb_free_coherent() for deciding whether to
> call xen_destroy_contiguous_region() is wrong: in case the region to
> be freed is not contiguous calling xen_destroy_contiguous_region() is
> the wrong thing to do: it would result in inconsistent mappings of
> multiple PFNs to the same MFN. This will lead to various strange
> crashes or data corruption.
>
> Instead of calling xen_destroy_contiguous_region() in that case a
> warning should be issued as that situation should never occur.
>
> Cc: sta...@vger.kernel.org
> Signed-off-by: Juergen Gross 

Reviewed-by: Boris Ostrovsky 


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/3] xen/swiotlb: simplify range_straddles_page_boundary()

2019-04-23 Thread Boris Ostrovsky
On 4/23/19 6:54 AM, Juergen Gross wrote:
> range_straddles_page_boundary() is open coding several macros from
> include/xen/page.h. Use those instead. Additionally there is no need
> to have check_pages_physically_contiguous() as a separate function as
> it is used only once, so merge it into range_straddles_page_boundary().
>
> Signed-off-by: Juergen Gross 

Reviewed-by: Boris Ostrovsky 


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] xen/swiotlb: remember having called xen_create_contiguous_region()

2019-04-23 Thread Boris Ostrovsky
On 4/23/19 6:54 AM, Juergen Gross wrote:
> Instead of always calling xen_destroy_contiguous_region() in case the
> memory is DMA-able for the used device, do so only in case it has been
> made DMA-able via xen_create_contiguous_region() before.
>
> This will avoid a lot of xen_destroy_contiguous_region() calls for
> 64-bit capable devices.
>
> As the memory in question is owned by swiotlb-xen the PG_owner_priv_1
> flag of the first allocated page can be used for remembering.

I think a new enum in pageflags would be useful, and be consistent with
other flag uses.

-boris


>
> Signed-off-by: Juergen Gross 
> ---
>  drivers/xen/swiotlb-xen.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 43b6e65ae256..a72f181d8e20 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -321,6 +321,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
> size,
>   xen_free_coherent_pages(hwdev, size, ret, 
> (dma_addr_t)phys, attrs);
>   return NULL;
>   }
> + SetPageOwnerPriv1(virt_to_page(ret));
>   }
>   memset(ret, 0, size);
>   return ret;
> @@ -344,9 +345,11 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
> size, void *vaddr,
>   /* Convert the size to actually allocated. */
>   size = 1UL << (order + XEN_PAGE_SHIFT);
>  
> - if ((dev_addr + size - 1 <= dma_mask) &&
> - !WARN_ON(range_straddles_page_boundary(phys, size)))
> - xen_destroy_contiguous_region(phys, order);
> + if (PageOwnerPriv1(virt_to_page(vaddr))) {
> + if (!WARN_ON(range_straddles_page_boundary(phys, size)))
> + xen_destroy_contiguous_region(phys, order);
> + ClearPageOwnerPriv1(virt_to_page(vaddr));
> + }
>  
>   xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
>  }

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 12/21] dma-iommu: factor atomic pool allocations into helpers

2019-04-23 Thread Christoph Hellwig
On Tue, Apr 23, 2019 at 11:01:44AM +0100, Robin Murphy wrote:
> On 19/04/2019 09:23, Christoph Hellwig wrote:
>> On Thu, Apr 18, 2019 at 07:15:00PM +0100, Robin Murphy wrote:
>>> Still, I've worked in the vm_map_pages() stuff pending in MM and given them
>>> the same treatment to finish the picture. Both x86_64_defconfig and
>>> i386_defconfig do indeed compile and link fine as I expected, so I really
>>> would like to understand the concern around #ifdefs better.
>>
>> This looks generally fine to me.  One thing I'd like to do is to
>> generally make use of the fact that __iommu_dma_get_pages returns NULL
>> for the force contigous case as that cleans up a few things.  Also
>> for the !DMA_REMAP case we need to try the page allocator when
>> dma_alloc_from_contiguous does not return a page.  What do you thing
>> of the following incremental diff?  If that is fine with you I can
>> fold that in and add back in the remaining patches from my series
>> not obsoleted by your patches and resend.
>
> Wouldn't this suffice? Since we also use alloc_pages() in the coherent 
> atomic case, the free path should already be able to deal with it.

Yepp, that is about what I've done in v3, except that I've also folded
that coherent atomic case in a way very similar to dma-direct.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 0/3] PCIe Host request to reserve IOVA

2019-04-23 Thread Joerg Roedel
On Thu, Apr 18, 2019 at 06:48:28PM -0500, Bjorn Helgaas wrote:
> To make progress on this, I think we need an ack from Joerg for the
> dma-iommu.c part, an ack from Ray or Scott for the pcie-iproc.c part,
> and an ack from Robin for the thing as a whole.

I wait for Robin to review the dma-iommu change. If he is okay with it,
I am too.


Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/6] iommu/ipmmu-vmsa: Link IOMMUs and devices in sysfs

2019-04-23 Thread Geert Uytterhoeven
Hi Simon,

On Thu, Apr 11, 2019 at 10:12 AM Simon Horman  wrote:
> On Thu, Apr 11, 2019 at 10:10:28AM +0200, Simon Horman wrote:
> > On Wed, Apr 03, 2019 at 08:21:43PM +0200, Geert Uytterhoeven wrote:
> > > As of commit 7af9a5fdb9e0ca33 ("iommu/ipmmu-vmsa: Use
> > > iommu_device_sysfs_add()/remove()"), IOMMU devices show up under
> > > /sys/class/iommus/, but their "devices" subdirectories are empty.
> >
> > Should the path be /sys/class/iommu/ (no trailing 's') ?

Yes it does, thanks. Will fix.

> > > Likewise, devices tied to an IOMMU do not have an "iommu" backlink.
> > >
> > > Make sure all links are created, on both arm32 and arm64.
> > >
> > > Signed-off-by: Geert Uytterhoeven 
> > > Reviewed-by: Laurent Pinchart 
> > > ---
> > > v2:
> > >   - Add Reviewed-by.
> > > ---
> > >  drivers/iommu/ipmmu-vmsa.c | 24 +---
> > >  1 file changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
> > > index 9a380c10655e182d..9f2b781e20a0eba6 100644
> > > --- a/drivers/iommu/ipmmu-vmsa.c
> > > +++ b/drivers/iommu/ipmmu-vmsa.c
> > > @@ -885,27 +885,37 @@ static int ipmmu_init_arm_mapping(struct device 
> > > *dev)
> > >
> > >  static int ipmmu_add_device(struct device *dev)
> > >  {
> > > +   struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
> > > struct iommu_group *group;
> > > +   int ret;
> > >
> > > /*
> > >  * Only let through devices that have been verified in xlate()
> > >  */
> > > -   if (!to_ipmmu(dev))
> > > +   if (!mmu)
> > > return -ENODEV;
> > >
> > > -   if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA))
> > > -   return ipmmu_init_arm_mapping(dev);
> > > +   if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) {
> > > +   ret = ipmmu_init_arm_mapping(dev);
> > > +   if (ret)
> > > +   return ret;
> > > +   } else {
> > > +   group = iommu_group_get_for_dev(dev);
> > > +   if (IS_ERR(group))
> > > +   return PTR_ERR(group);
> > >
> > > -   group = iommu_group_get_for_dev(dev);
> > > -   if (IS_ERR(group))
> > > -   return PTR_ERR(group);
> > > +   iommu_group_put(group);
> > > +   }
> > >
> > > -   iommu_group_put(group);
> > > +   iommu_device_link(&mmu->iommu, dev);
>
> Also, is there any value in propagating the return value
> of iommu_device_link() ?

I don't think so.  Sysfs information is not super-critical, and most
drivers just
ignore such return values.  Also, the sysfs cleanup code handles cleanup
after failed setup fine.

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Mauro Carvalho Chehab
Em Tue, 23 Apr 2019 09:21:00 -0400
Mike Snitzer  escreveu:

> On Tue, Apr 23 2019 at  9:01am -0400,
> Peter Zijlstra  wrote:
> 
> > On Tue, Apr 23, 2019 at 08:55:19AM -0400, Mike Snitzer wrote:  
> > > On Tue, Apr 23 2019 at  4:31am -0400,
> > > Peter Zijlstra  wrote:
> > >   
> > > > On Mon, Apr 22, 2019 at 10:27:45AM -0300, Mauro Carvalho Chehab wrote:
> > > >   
> > > > >  .../{atomic_bitops.txt => atomic_bitops.rst}  |  2 +  
> > > > 
> > > > What's happend to atomic_t.txt, also NAK, I still occationally touch
> > > > these files.  
> > > 
> > > Seems Mauro's point is in the future we need to touch these .rst files
> > > in terms of ReST compatible changes.
> > > 
> > > I'm dreading DM documentation changes in the future.. despite Mauro and
> > > Jon Corbet informing me that ReST is simple, etc.  

ReST is simple[1], and neither Jon or me wants to burden developers to
use complex documents all over the Kernel tree. ReST is just a way to 
make the documents with similar visual. The main advantage of ReST is
that documents can be better organized, as they will be inside some
index.rst file.

[1] Ok, as any document, you could write an easy or hard to read stuff.
The way we're using on most places is to be just a coding style with
benefits. I wrote a quick 101 guide to ReST at the end, with all you
probably need to know about it.

So, for example, in the specific case of atomic_bitops, all it takes for
it to be parsed by Sphinx is to rename it to .rst. With that, it can be
added into an index.rst file, like at Documentation/driver-api/index.rst.

The document, as is, will be displayed like this:


https://www.infradead.org/~mchehab/rst_conversion/driver-api/atomic_bitops.html?highlight=atomic_t

And the original text file can also be seen from the output data:


https://www.infradead.org/~mchehab/rst_conversion/_sources/driver-api/atomic_bitops.rst.txt

> > 
> > Well, it _can_ be simple, I've seen examples of rst that were not far
> > from generated HTML contents. And I must give Jon credit for not
> > accepting that atrocious crap.
> > 
> > But yes, I have 0 motivation to learn or abide by rst. It simply doesn't
> > give me anything in return. There is no upside, only worse text files :/  
> 
> Right, but these changes aren't meant for our benefit.  They are for
> users who get cleaner web accessible Linux kernel docs.  Seems the
> decision has been made that the users' benefit, and broader
> modernization of Linux docs, outweighs the inconvenience for engineers
> who maintain the content of said documentation.

> This kind of thing happens a lot these days: pile on engineers, they can
> take it :/

Yes, that's the main goal: ensure that more people will see the
documents and write less crappy code. So, overall, reducing the
time we spent with reviews of bad code.




=
My 101 ReST quick reference guide
=

Basically, a "quick" ReST guide for those that don't want to learn it
and like to have an easy to read text document would be

1) to format documents like:

=
Doc Title
=

foo chapter
===

bar section
---

foobar sub-section
^^

foobarzeta sub-sub-section
..

(the actual character used to mark the titles can be different,
provided that you use the same character for the same title
level - the above is just the way *I* use, as it makes easier for
me to remember the title level).

2) remember that ReST considers new lines with same indentation as
   belonging to the same paragraph. So,

foo
bar

is identical to:

foo bar

while
foo
   bar

will make "foo" bold, and write bar on the next line. So, if you
want to have them on two separate lines on its output, it should
be either write it as:

foo

bar

or you could use a list:

- foo
- bar

Btw, *a lot* of Kernel documents already have the above format.

3) literal values should be either inside ``foo``, `foo` or on an
   indented line after a ::, like:

example::

# some_command_to_be_typed

If you follow those three simple rules, your document will be properly
parsed. The above covers 90% of what we normally use.

Tables are also easy to write there, as it recognizes two ways to write
ascii tables, with are already popular ways to write them.

So, those are valid tables:

Without a title:

===   ===
foo   foo description
bar   bar description
===   ===


+---+-+
| foo   | foo description |
+---+-+
| bar   | bar description |
+---+-+

(both will produce exactly the same output)

With a title:

= ===
field description
= ==

Re: [PATCH v2 6/6] iommu/ipmmu-vmsa: Add suspend/resume support

2019-04-23 Thread Geert Uytterhoeven
Hi Simon,

On Thu, Apr 11, 2019 at 10:39 AM Simon Horman  wrote:
> On Wed, Apr 03, 2019 at 08:21:48PM +0200, Geert Uytterhoeven wrote:
> > During PSCI system suspend, R-Car Gen3 SoCs are powered down, and all
> > IPMMU state is lost.  Hence after s2ram, devices wired behind an IPMMU,
> > and configured to use it, will see their DMA operations hang.
> >
> > To fix this, restore all IPMMU contexts, and re-enable all active
> > micro-TLBs during system resume.
> >
> > Signed-off-by: Geert Uytterhoeven 
> > ---
> > This patch takes a different approach than the BSP, which implements a
> > bulk save/restore of all registers during system suspend/resume.
> >
> > v2:
> >   - Drop PSCI checks.
> > ---
> >  drivers/iommu/ipmmu-vmsa.c | 47 +-
> >  1 file changed, 46 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
> > index 56e84bcc9532e1ce..408ad0b2591925e0 100644
> > --- a/drivers/iommu/ipmmu-vmsa.c
> > +++ b/drivers/iommu/ipmmu-vmsa.c
> > @@ -36,7 +36,10 @@
> >  #define arm_iommu_detach_device(...) do {} while (0)
> >  #endif
> >
> > -#define IPMMU_CTX_MAX 8U
> > +#define IPMMU_CTX_MAX8U
> > +#define IPMMU_CTX_INVALID-1
> > +
> > +#define IPMMU_UTLB_MAX   48U
> >
> >  struct ipmmu_features {
> >   bool use_ns_alias_offset;
> > @@ -58,6 +61,7 @@ struct ipmmu_vmsa_device {
> >   spinlock_t lock;/* Protects ctx and domains[] 
> > */
> >   DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
> >   struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
>
> It might be possible to save a bit of memory on Gen 2 systems by
> making IPMMU_UTLB_MAX 32 when the driver is compiled for such systems.

This comment applies to the line below, right?

> > + s8 utlb_ctx[IPMMU_UTLB_MAX];

So it's just 16 bytes, not 16 pointers.

Doing so would reduce struct ipmmu_vmsa_device from 148 to 132 bytes.
Given that structure is allocated by devm_kzalloc(), and devres alignment
was increased in commit a66d972465d15b1d ("devres: Align data[] to
ARCH_KMALLOC_MINALIGN"), I guess it won't make a difference.

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Jonathan Corbet
On Tue, 23 Apr 2019 15:01:32 +0200
Peter Zijlstra  wrote:

> But yes, I have 0 motivation to learn or abide by rst. It simply doesn't
> give me anything in return. There is no upside, only worse text files :/

So I believe it gives even you one thing in return: documentation that is
more accessible for both readers and authors.  More readable docs should
lead to more educated developers who understand the code better.  More
writable docs will bring more people in to help to improve them.  The
former effect has been reported in the GPU community, where they say that
the quality of submissions has improved along with the docs.  The latter
can be observed in the increased number of people working on the docs
overall, something that Linus noted in the 5.1-rc1 announcement.

Hopefully that's worth something :)

Thanks,

jon
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] xen/swiotlb: remember having called xen_create_contiguous_region()

2019-04-23 Thread Stefano Stabellini
On Tue, 23 Apr 2019, Juergen Gross wrote:
> Instead of always calling xen_destroy_contiguous_region() in case the
> memory is DMA-able for the used device, do so only in case it has been
> made DMA-able via xen_create_contiguous_region() before.
> 
> This will avoid a lot of xen_destroy_contiguous_region() calls for
> 64-bit capable devices.
> 
> As the memory in question is owned by swiotlb-xen the PG_owner_priv_1
> flag of the first allocated page can be used for remembering.

Although the patch looks OK, this sentence puzzles me. Why do you say
that the memory in question is owned by swiotlb-xen? Because it was
returned by xen_alloc_coherent_pages? Both the x86 and the Arm
implementation return fresh new memory, hence, it should be safe to set
the PageOwnerPriv1 flag?

My concern with this approach is with the semantics of PG_owner_priv_1.
Is a page marked with PG_owner_priv_1 only supposed to be used by the
owner?


> Signed-off-by: Juergen Gross 
> ---
>  drivers/xen/swiotlb-xen.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 43b6e65ae256..a72f181d8e20 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -321,6 +321,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
> size,
>   xen_free_coherent_pages(hwdev, size, ret, 
> (dma_addr_t)phys, attrs);
>   return NULL;
>   }
> + SetPageOwnerPriv1(virt_to_page(ret));
>   }
>   memset(ret, 0, size);
>   return ret;
> @@ -344,9 +345,11 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
> size, void *vaddr,
>   /* Convert the size to actually allocated. */
>   size = 1UL << (order + XEN_PAGE_SHIFT);
>  
> - if ((dev_addr + size - 1 <= dma_mask) &&
> - !WARN_ON(range_straddles_page_boundary(phys, size)))
> - xen_destroy_contiguous_region(phys, order);
> + if (PageOwnerPriv1(virt_to_page(vaddr))) {
> + if (!WARN_ON(range_straddles_page_boundary(phys, size)))
> + xen_destroy_contiguous_region(phys, order);
> + ClearPageOwnerPriv1(virt_to_page(vaddr));
> + }
>  
>   xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
>  }
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Peter Zijlstra
On Tue, Apr 23, 2019 at 10:30:53AM -0600, Jonathan Corbet wrote:
> On Tue, 23 Apr 2019 15:01:32 +0200
> Peter Zijlstra  wrote:
> 
> > But yes, I have 0 motivation to learn or abide by rst. It simply doesn't
> > give me anything in return. There is no upside, only worse text files :/
> 
> So I believe it gives even you one thing in return: documentation that is
> more accessible for both readers and authors.

I know I'm an odd duck; but no. They're _less_ accessible for me, as
both a reader and author. They look 'funny' when read as a text file
(the only way it makes sense to read them; I spend 99% of my time on a
computer looking at monospace text interfaces; mutt, vim and console, in
that approximate order).

When writing, I now have to be bothered about this format crap over just
trying to write a coherent document.

Look at crap like this:

"The memory allocations via :c:func:`kmalloc`, :c:func:`vmalloc`,
:c:func:`kmem_cache_alloc` and"

That should've been written like:

"The memory allocations via kmalloc(), vmalloc(), kmem_cache_alloc()
and"

Heck, that paragraph isn't even properly flowed.

Then there's the endless stuck ':' key, and the mysterious "''" because
\" isn't a character, oh wait.

Bah..
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Borislav Petkov
On Tue, Apr 23, 2019 at 07:11:58PM +0200, Peter Zijlstra wrote:
> I know I'm an odd duck; but no. They're _less_ accessible for me, as
> both a reader and author. They look 'funny' when read as a text file
> (the only way it makes sense to read them; I spend 99% of my time on a
> computer looking at monospace text interfaces; mutt, vim and console, in
> that approximate order).

+1

It is probably fine to stare at them here
https://www.kernel.org/doc/html/latest/ and the end result is good
for showing them in browsers but after this conversion, it is
getting more and more painful to work with those files. For example,
Documentation/x86/x86_64/mm.txt we use a lot. I'd hate it if I had to go
sort out rest muck first just so that I can read it.

I think we can simply leave some text files be text files and be done
with it.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Wes Turner
- Accessible, usable docs are worth something in ROI
  - https://www.writethedocs.org/
  - https://read-the-docs.readthedocs.io/en/latest/
  -
https://github.com/rtfd/readthedocs-docker-images/issues/47#issuecomment-485712800
- Dockerfile that extends from readthedocs/build:latest (which has the
GBs of latex necessary to run `make latexpdf` for all you PDF lovers out
there)

- https://github.com/yoloseem/awesome-sphinxdoc
  - There are various Sphinx extensions for optionally including generated
API docs for various languages
  - If you add the extensions you want installed to your requirements.txt
or environment.yml, ReadTheDocs will install those for every build. You can
also create (and maintain) a custom Docker image with all of the docs
building dependencies installed (e.g. requirements_dev.txt and/or
docs/requirements.txt)

- https://kernel.readthedocs.io/en/latest/kernel-documentation.html
  - This says "Copyright 2016"? That's set in conf.py

I keep a tools doc in ReST:
- https://westurner.github.io/tools/#sphinx
- https://westurner.github.io/tools/#docutils

I'll just CC those sections here
wrapped in a Markdown fenced code block

```rst

.. index:: Docutils
.. _docutils:

Docutils
~~~
| Homepage: http://docutils.sourceforge.net
| PyPI: https://pypi.python.org/pypi/docutils
| Docs: http://docutils.sourceforge.net/docs/
| Docs: http://docutils.sourceforge.net/rst.html
| Docs: http://docutils.sourceforge.net/docs/ref/doctree.html
| Docs: https://docutils.readthedocs.io/en/sphinx-docs/
| Docs:
https://docutils.readthedocs.io/en/sphinx-docs/ref/rst/restructuredtext.html
| Src: svn http://svn.code.sf.net/p/docutils/code/trunk

Docutils is a :ref:`Python` library which 'parses" :ref:`ReStructuredText`
lightweight markup language into a doctree (~DOM)
which can be serialized into
HTML, ePub, MOBI, LaTeX, man pages,
Open Document files,
XML, JSON, and a number of other formats.


.. index:: Sphinx
.. _sphinx:

Sphinx
~
| Wikipedia: `<
https://en.wikipedia.org/wiki/Sphinx_(documentation_generator)>`_
| Homepage: https://pypi.python.org/pypi/Sphinx
| Src: git https://github.com/sphinx-doc/sphinx
| Pypi: https://pypi.python.org/pypi/Sphinx
| Docs: http://sphinx-doc.org/contents.html
| Docs: http://sphinx-doc.org/markup/code.html
| Docs: http://www.sphinx-doc.org/en/stable/markup/inline.html#ref-role
| Docs: http://pygments.org/docs/lexers/
| Docs: http://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html
| Docs: https://github.com/yoloseem/awesome-sphinxdoc

Sphinx is a tool for working with
:ref:`ReStructuredText` documentation trees
and rendering them into HTML, PDF, LaTeX, ePub,
and a number of other formats.

[...]

```

FWIW, ReadTheDocs can host multiple versions of the docs according to the
repo
tags you specify in the web admin.
There may be a way to use the RTD JS UI for selecting versions
with the docs hosted on your own server?
Such as https://www.kernel.org/doc/html/latest/

- https://github.com/torvalds/linux/blob/master/Documentation/conf.py
- https://github.com/torvalds/linux/blob/master/Documentation/Makefile

-
https://github.com/torvalds/linux/blob/master/Documentation/doc-guide/index.rst
-
https://github.com/torvalds/linux/blob/master/Documentation/doc-guide/sphinx.rst
-
https://github.com/torvalds/linux/blob/master/Documentation/doc-guide/kernel-doc.rst

- https://www.kernel.org/doc/html/latest/
- https://www.kernel.org/doc/html/latest/doc-guide/
-
https://www.kernel.org/doc/html/latest/doc-guide/sphinx.html#sphinx-install
-
https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html#writing-kernel-doc-comments


On Tue, Apr 23, 2019 at 12:31 PM Jonathan Corbet  wrote:

> On Tue, 23 Apr 2019 15:01:32 +0200
> Peter Zijlstra  wrote:
>
> > But yes, I have 0 motivation to learn or abide by rst. It simply doesn't
> > give me anything in return. There is no upside, only worse text files :/
>
> So I believe it gives even you one thing in return: documentation that is
> more accessible for both readers and authors.  More readable docs should
> lead to more educated developers who understand the code better.  More
> writable docs will bring more people in to help to improve them.  The
> former effect has been reported in the GPU community, where they say that
> the quality of submissions has improved along with the docs.  The latter
> can be observed in the increased number of people working on the docs
> overall, something that Linus noted in the 5.1-rc1 announcement.
>
> Hopefully that's worth something :)
>
> Thanks,
>
> jon
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Wes Turner
- Accessible, usable docs are worth something in ROI
  - https://www.writethedocs.org/
  - https://read-the-docs.readthedocs.io/en/latest/
  -
https://github.com/rtfd/readthedocs-docker-images/issues/47#issuecomment-485712800
- Dockerfile that extends from readthedocs/build:latest (which has the
GBs of latex necessary to run `make latexpdf` for all you PDF lovers out
there)

- https://github.com/yoloseem/awesome-sphinxdoc
  - There are various Sphinx extensions for optionally including generated
API docs for various languages
  - If you add the extensions you want installed to your requirements.txt
or environment.yml, ReadTheDocs will install those for every build. You can
also create (and maintain) a custom Docker image with all of the docs
building dependencies installed (e.g. requirements_dev.txt and/or
docs/requirements.txt)

- https://kernel.readthedocs.io/en/latest/kernel-documentation.html
  - This says "Copyright 2016"? That's set in conf.py

I keep a tools doc in ReST:
- https://westurner.github.io/tools/#sphinx
- https://westurner.github.io/tools/#docutils

I'll just CC those sections here:

```rst

.. index:: Docutils
.. _docutils:

Docutils
~~~
| Homepage: http://docutils.sourceforge.net
| PyPI: https://pypi.python.org/pypi/docutils
| Docs: http://docutils.sourceforge.net/docs/
| Docs: http://docutils.sourceforge.net/rst.html
| Docs: http://docutils.sourceforge.net/docs/ref/doctree.html
| Docs: https://docutils.readthedocs.io/en/sphinx-docs/
| Docs:
https://docutils.readthedocs.io/en/sphinx-docs/ref/rst/restructuredtext.html
| Src: svn http://svn.code.sf.net/p/docutils/code/trunk

Docutils is a :ref:`Python` library which 'parses" :ref:`ReStructuredText`
lightweight markup language into a doctree (~DOM)
which can be serialized into
HTML, ePub, MOBI, LaTeX, man pages,
Open Document files,
XML, JSON, and a number of other formats.

.. index:: Sphinx
.. _sphinx:

Sphinx
~
| Wikipedia: `<
https://en.wikipedia.org/wiki/Sphinx_(documentation_generator)>`_
| Homepage: https://pypi.python.org/pypi/Sphinx
| Src: git https://github.com/sphinx-doc/sphinx
| Pypi: https://pypi.python.org/pypi/Sphinx
| Docs: http://sphinx-doc.org/contents.html
| Docs: http://sphinx-doc.org/markup/code.html
| Docs: http://www.sphinx-doc.org/en/stable/markup/inline.html#ref-role
| Docs: http://pygments.org/docs/lexers/
| Docs: http://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html
| Docs: https://github.com/yoloseem/awesome-sphinxdoc


Sphinx is a tool for working with
:ref:`ReStructuredText` documentation trees
and rendering them into HTML, PDF, LaTeX, ePub,
and a number of other formats.

Sphinx extends :ref:`Docutils` with a number of useful markup behaviors
which are not supported by other ReStructuredText parsers.

Most other ReStructuredText parsers do not support Sphinx directives;
so, for example,

* GitHub and BitBucket do not support Sphinx but do support ReStructuredText
  so ``README.rst`` containing Sphinx tags renders in plaintext or raises
errors.

  For example, the index page of this
  :ref:`Sphinx` documentation set is generated from
  a file named ``index.rst`` that referenced by ``docs/conf.py``,
  which is utilized by ``sphinx-build`` in the ``Makefile``.

  * Input:

.. code:: bash

  _indexrst="$WORKON_HOME/src/westurner/tools/index.rst"
  e $_indexrst

  # with westurner/dotfiles.venv
  mkvirtualenv westurner
  we westurner tools; mkdir -p $_SRC
  git clone ssh://g...@github.com/westurner/tools
  cdw; e index.rst# ew index.rst

https://github.com/westurner/tools/blob/master/index.rst

https://raw.githubusercontent.com/westurner/tools/master/index.rst



  * Output:

.. code:: bash

  cd $_WRD# cdwrd; cdw
  git status; make   # gitw status; makew 
  make html singlehtml# make docs
  web ./_build/html/index.html# make open

  make gh-pages   # ghp-import -n -p ./_build/html/ -b gh-pages
  make push   # gitw push  

https://github.com/westurner/tools/blob/gh-pages/index.html

https://westurner.github.io/tools/


* RawGit:

  dev/test: https://rawgit.com/westurner/tools/gh-pages/index.html

  CDN: https://cdn.rawgit.com/westurner/tools/gh-pages/index.html

  * Output: *ReadTheDocs*:

https://.readthedocs.io/en//

https://read-the-docs.readthedocs.io/en/latest/


.. glossary::

   Sphinx Builder
  A Sphinx Builder transforms :ref:`ReStructuredText` into various
  output forms:

 * HTML
 * LaTeX
 * PDF
 * ePub
 * MOBI
 * JSON
 * OpenDocument (OpenOffice)
 * Office Open XML (MS Word)

  See: `Sphinx Builders `_

   Sphinx ReStructuredText
  Sphinx extends :ref:`ReStructuredText` with roles and directives
  which only work with Sphinx.

   Sphinx Directive
  Sphinx extensions of :ref:`Docu

Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Peter Zijlstra


A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing in e-mail?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Jonathan Corbet
On Tue, 23 Apr 2019 19:11:58 +0200
Peter Zijlstra  wrote:

> When writing, I now have to be bothered about this format crap over just
> trying to write a coherent document.

Just write text, it'll all work out in the end :)

> Look at crap like this:
> 
> "The memory allocations via :c:func:`kmalloc`, :c:func:`vmalloc`,
> :c:func:`kmem_cache_alloc` and"
> 
> That should've been written like:
> 
> "The memory allocations via kmalloc(), vmalloc(), kmem_cache_alloc()
> and"

Yeah, I get it.  That markup generates cross-references, which can be
seriously useful for readers - we want that.  But I do wonder if we
couldn't do it automatically with just a little bit of scripting work.
It's not to hard to recognize this_is_a_function(), after all.  I'll look
into that, it would definitely help to remove some gunk from the source
docs.

jon
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] arm64: swiotlb: cma_alloc error spew

2019-04-23 Thread dann frazier
On Tue, Apr 23, 2019 at 5:32 AM Robin Murphy  wrote:
>
> On 17/04/2019 21:48, dann frazier wrote:
> > hey,
> >I'm seeing an issue on a couple of arm64 systems[*] where they spew
> > ~10K "cma: cma_alloc: alloc failed" messages at boot. The errors are
> > non-fatal, and bumping up cma to a large enough size (~128M) gets rid
> > of them - but that seems suboptimal. Bisection shows that this started
> > after commit fafadcd16595 ("swiotlb: don't dip into swiotlb pool for
> > coherent allocations"). It looks like __dma_direct_alloc_pages()
> > is opportunistically using CMA memory but falls back to non-CMA if CMA
> > disabled or unavailable. I've demonstrated that this fallback is
> > indeed returning a valid pointer. So perhaps the issue is really just
> > the warning emission.
>
> The CMA area being full isn't necessarily an ignorable non-problem,
> since it means you won't be able to allocate the kind of large buffers
> for which CMA was intended. The question is, is it actually filling up
> with allocations that deserve to be there, or is this the same as I've
> seen on a log from a ThunderX2 system where it's getting exhausted by
> thousands upon thousands of trivial single page allocations? If it's the
> latter (CONFIG_CMA_DEBUG should help shed some light if necessary),

Appears so. Here's a histogram of count/size w/ a cma= large enough to
avoid failures:

$ dmesg | grep "cma: cma_alloc(cma" | sed -r 's/.*count
([0-9]+)\,.*/\1/' | sort -n | uniq -c
   2062 1
 32 2
266 8
  2 24
  4 32
256 33
  7 64
  2 128
  2 1024

  -dann

> then
> that does lean towards spending a bit more effort on this idea:
>
> https://lore.kernel.org/lkml/20190327080821.gb20...@lst.de/
>
> Robin.
>
> > The following naive patch solves the problem for me - just silence the
> > cma errors, since it looks like a soft error. But is there a better
> > approach?
> >
> > [*] APM X-Gene & HiSilicon Hi1620 w/ SMMU disabled
> >
> > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> > index 6310ad01f915b..0324aa606c173 100644
> > --- a/kernel/dma/direct.c
> > +++ b/kernel/dma/direct.c
> > @@ -112,7 +112,7 @@ struct page *__dma_direct_alloc_pages(struct device 
> > *dev, size_t size,
> >  /* CMA can be used only in the context which permits sleeping */
> >  if (gfpflags_allow_blocking(gfp)) {
> >  page = dma_alloc_from_contiguous(dev, count, page_order,
> > -gfp & __GFP_NOWARN);
> > +true);
> >  if (page && !dma_coherent_ok(dev, page_to_phys(page), 
> > size)) {
> >  dma_release_from_contiguous(dev, page, count);
> >  page = NULL;
> >
> >
> >
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Peter Zijlstra
On Tue, Apr 23, 2019 at 11:53:49AM -0600, Jonathan Corbet wrote:
> > Look at crap like this:
> > 
> > "The memory allocations via :c:func:`kmalloc`, :c:func:`vmalloc`,
> > :c:func:`kmem_cache_alloc` and"
> > 
> > That should've been written like:
> > 
> > "The memory allocations via kmalloc(), vmalloc(), kmem_cache_alloc()
> > and"
> 
> Yeah, I get it.  That markup generates cross-references, which can be
> seriously useful for readers - we want that.

The funny thing is; that sentence continues (on a new line) like:

"friends are traced and the pointers, together with additional"

So while it then has cross-references to a few functions, all 'friends'
are left dangling. So what's the point of the cross-references?

Also, 'make ctags' and follow tag (ctrl-] for fellow vim users) will get
you to the function, no magic markup required.

> But I do wonder if we
> couldn't do it automatically with just a little bit of scripting work.
> It's not to hard to recognize this_is_a_function(), after all.  I'll look
> into that, it would definitely help to remove some gunk from the source
> docs.

That would be good; less markup is more.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] xen/swiotlb: remember having called xen_create_contiguous_region()

2019-04-23 Thread Juergen Gross
On 23/04/2019 19:05, Stefano Stabellini wrote:
> On Tue, 23 Apr 2019, Juergen Gross wrote:
>> Instead of always calling xen_destroy_contiguous_region() in case the
>> memory is DMA-able for the used device, do so only in case it has been
>> made DMA-able via xen_create_contiguous_region() before.
>>
>> This will avoid a lot of xen_destroy_contiguous_region() calls for
>> 64-bit capable devices.
>>
>> As the memory in question is owned by swiotlb-xen the PG_owner_priv_1
>> flag of the first allocated page can be used for remembering.
> 
> Although the patch looks OK, this sentence puzzles me. Why do you say
> that the memory in question is owned by swiotlb-xen? Because it was
> returned by xen_alloc_coherent_pages? Both the x86 and the Arm
> implementation return fresh new memory, hence, it should be safe to set
> the PageOwnerPriv1 flag?
> 
> My concern with this approach is with the semantics of PG_owner_priv_1.
> Is a page marked with PG_owner_priv_1 only supposed to be used by the
> owner?

The owner of the page is free to use the flag.

Like Grant pages are marked by the grant driver using this flag. And
Xen page tables are using it in PV-guests for indicating a "Pinned"
page table.


Juergen
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] Remove old no iommu direct mapping code

2019-04-23 Thread Tom Murphy via iommu
These checks were intended to handle devices not mapped by the IOMMU.
Since the AMD IOMMU driver uses per-device dma_ops these functions can
no longer be called by direct mapped devices. So these checks aren't
needed anymore.

Signed-off-by: Tom Murphy 
---
 drivers/iommu/amd_iommu.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b319e51c379b..67cdc9e5304b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2503,9 +2503,7 @@ static dma_addr_t map_page(struct device *dev, struct 
page *page,
u64 dma_mask;
 
domain = get_domain(dev);
-   if (PTR_ERR(domain) == -EINVAL)
-   return (dma_addr_t)paddr;
-   else if (IS_ERR(domain))
+   if (IS_ERR(domain))
return DMA_MAPPING_ERROR;
 
dma_mask = *dev->dma_mask;
@@ -2676,11 +2674,7 @@ static void *alloc_coherent(struct device *dev, size_t 
size,
struct page *page;
 
domain = get_domain(dev);
-   if (PTR_ERR(domain) == -EINVAL) {
-   page = alloc_pages(flag, get_order(size));
-   *dma_addr = page_to_phys(page);
-   return page_address(page);
-   } else if (IS_ERR(domain))
+   if (IS_ERR(domain))
return NULL;
 
dma_dom   = to_dma_ops_domain(domain);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Mauro Carvalho Chehab
Em Tue, 23 Apr 2019 19:20:06 +0200
Borislav Petkov  escreveu:

> On Tue, Apr 23, 2019 at 07:11:58PM +0200, Peter Zijlstra wrote:
> > I know I'm an odd duck; but no. They're _less_ accessible for me, as
> > both a reader and author. They look 'funny' when read as a text file
> > (the only way it makes sense to read them; I spend 99% of my time on a
> > computer looking at monospace text interfaces; mutt, vim and console, in
> > that approximate order).  
> 
> +1
> 
> It is probably fine to stare at them here
> https://www.kernel.org/doc/html/latest/ and the end result is good
> for showing them in browsers but after this conversion, it is
> getting more and more painful to work with those files. For example,
> Documentation/x86/x86_64/mm.txt we use a lot. I'd hate it if I had to go
> sort out rest muck first just so that I can read it.

That's my view about how that specific file would be after
converted to ReST:


https://git.linuxtv.org/mchehab/experimental.git/tree/Documentation/x86/x86_64/mm.rst?h=convert_rst_renames

I don't have any troubles reading/understanding it as a plain text
file, and its html output is also nice (although Sphinx 1.7.8 seems to
have some issues when parsing some cells - probably due to some bug):

https://www.infradead.org/~mchehab/rst_conversion/x86/x86_64/mm.html

> 
> I think we can simply leave some text files be text files and be done
> with it.

Changbin's approach was somewhat close to what you want. He simply
prepended the tables with ::, in order to show them as plain old
ascii:


https://lore.kernel.org/lkml/20190423162932.21428-60-changbin...@gmail.com/

Both equally works, from ReST conversion PoV. I'm fine ether way.

I prefer my approach, as, IMHO, it is visually nicer on both text and
html versions, but his approach is likely easier to maintain, as doing
ascii artwork by hand is sometimes painful.

Thanks,
Mauro
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Mauro Carvalho Chehab
Em Tue, 23 Apr 2019 11:53:49 -0600
Jonathan Corbet  escreveu:

> On Tue, 23 Apr 2019 19:11:58 +0200
> Peter Zijlstra  wrote:
> 

> > Look at crap like this:
> > 
> > "The memory allocations via :c:func:`kmalloc`, :c:func:`vmalloc`,
> > :c:func:`kmem_cache_alloc` and"
> > 
> > That should've been written like:
> > 
> > "The memory allocations via kmalloc(), vmalloc(), kmem_cache_alloc()
> > and"  
> 
> Yeah, I get it.  That markup generates cross-references, which can be
> seriously useful for readers - we want that.  But I do wonder if we
> couldn't do it automatically with just a little bit of scripting work.
> It's not to hard to recognize this_is_a_function(), after all.  I'll look
> into that, it would definitely help to remove some gunk from the source
> docs.

While on it, one thing that I noticed on several documents is that
they reference other documents by their names. On this conversion,
I avoided replacing that by a :ref:`` tag or a :doc:`` tag. I only
added cross references on two cases:

- a latex file that got converted to ReST and had such
  cross-references already;

- one of the document sets that seemed to be using some other
  markup language very close to ReST, but with a different
  cross-reference markup. So, I just converted it to use
  the syntax that Sphinx would recognize.

Anyway, one of the things that occurred to me is that maybe
some scripting work or a ReST extension could do something to parse
"Documentation/foo" as :doc:`Documentation/foo` without needing to 
explicitly use any ReST specific tags.

Thanks,
Mauro
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Jonathan Corbet
On Tue, 23 Apr 2019 17:19:44 -0300
Mauro Carvalho Chehab  wrote:

> Anyway, one of the things that occurred to me is that maybe
> some scripting work or a ReST extension could do something to parse
> "Documentation/foo" as :doc:`Documentation/foo` without needing to 
> explicitly use any ReST specific tags.

That probably makes sense too.  People do want to link to specific
subsections within documents, though; maybe we could allow
"Documentation/foo#bar" for that.  Such "markup" could even be useful for
people reading the plain-text files.

Thanks,

jon
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Borislav Petkov
On Tue, Apr 23, 2019 at 05:05:02PM -0300, Mauro Carvalho Chehab wrote:
> That's my view about how that specific file would be after
> converted to ReST:
> 
>   
> https://git.linuxtv.org/mchehab/experimental.git/tree/Documentation/x86/x86_64/mm.rst?h=convert_rst_renames
> 
> I don't have any troubles reading/understanding it as a plain text
> file,

If that is all the changes it would need, then I guess that's ok. Btw,
those rst-conversion patches don't really show what got changed. Dunno
if git can even show that properly. I diffed the two files by hand to
see what got changed, see end of mail.

So I guess if table in rst means, one needs to draw rows and columns, I
guess that's ok. It's not like I have to do it every day.

But exactly this - *having* to do rst formatting would mean a lot of
getting used to and people writing something which is not necessarily
correct rst and someone else fixing up after them.

Another pain point is changing the file paths. Without cscope I would've
been cursing each time I'm looking for kernel-parameters.txt, for
example. First of all, it is in Documentation/admin-guide/ now and then
there's Documentation/admin-guide/kernel-parameters.rst too.

I guess the .rst sucks in the .txt file and shows it monospaced. Oh
well.

So* I'd suggest having as less markup in those files as possible and if
it is needed, automate adding the needed markup, as Jon suggested.

The perfect example was the one which Peter gave and I had to paste in a
thread today:

"The memory allocations via :c:func:`kmalloc`, :c:func:`vmalloc`,
:c:func:`kmem_cache_alloc` and"

That is very unreadable.

Anyway, stuff like that. Just giving my feedback here in case you're
interested. :-)

> and its html output is also nice (although Sphinx 1.7.8 seems to
> have some issues when parsing some cells - probably due to some bug):
> 
>   https://www.infradead.org/~mchehab/rst_conversion/x86/x86_64/mm.html

I don't know how that looks in your browser but in mine those addresses
are not in monospaced font and there's no properly reading them.

And yap, the cells parsing fun I see too.

> Changbin's approach was somewhat close to what you want. He simply
> prepended the tables with ::, in order to show them as plain old
> ascii:
> 
>   
> https://lore.kernel.org/lkml/20190423162932.21428-60-changbin...@gmail.com/

Yap, that's better.

I mean, the file is just as readable in plain old ASCII, if not even
more so. At least to me but I prefer simple things so...

> 
> Both equally works, from ReST conversion PoV. I'm fine ether way.
> 
> I prefer my approach, as, IMHO, it is visually nicer on both text and
> html versions, but his approach is likely easier to maintain, as doing
> ascii artwork by hand is sometimes painful.

Yap.

Thx.

---
--- mm.old  2019-04-23 23:18:55.954335784 +0200
+++ mm.new  2019-04-23 23:18:48.122335821 +0200
@@ -18,51 +18,68 @@ Notes:
notation than "16 EB", which few will recognize at first sight as 16 
exabytes.
It also shows it nicely how incredibly large 64-bit address space is.
 
-
-Start addr|   Offset   | End addr |  Size   | VM area 
description
-
-  ||  | |
-  |0   | 7fff |  128 TB | user-space 
virtual memory, different per mm
-__||__|_|___
-  ||  | |
- 8000 | +128TB | 7fff | ~16M TB | ... huge, almost 
64 bits wide hole of non-canonical
-  ||  | | virtual 
memory addresses up to the -128 TB
-  ||  | | starting 
offset of kernel mappings.
-__||__|_|___
-|
-| Kernel-space 
virtual memory, shared between all processes:
-|___
-  ||  | |
- 8000 | -128TB | 87ff |8 TB | ... guard hole, 
also reserved for hypervisor
- 8800 | -120TB | 887f |  0.5 TB | LDT remap for PTI
- 8880 | -119.5  TB | c87f |   64 TB | direct mapping 
of all physical memory (page_offset_base)
- c880 |  -55.5  TB | c8ff |  0.5 TB | ... unused hole
- c900 |  -55 

Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Jonathan Corbet
On Tue, 23 Apr 2019 23:38:16 +0200
Borislav Petkov  wrote:

> But exactly this - *having* to do rst formatting would mean a lot of
> getting used to and people writing something which is not necessarily
> correct rst and someone else fixing up after them.

Remember that most of our docs are 99% RST even though they were written
by people who had never even heard of RST.  I really don't think it's a
big deal - a far smaller cognitive load than trying to keep up with any
given subsystem's variable-declaration-ordering rules, for example :)

> Another pain point is changing the file paths. Without cscope I would've
> been cursing each time I'm looking for kernel-parameters.txt, for
> example. First of all, it is in Documentation/admin-guide/ now and then
> there's Documentation/admin-guide/kernel-parameters.rst too.

Moving of files has nothing to do with RST, of course.  That you can
blame entirely on me trying to bring some order to Documentation/.  As a
predecessor of mine once put it (https://lkml.org/lkml/2007/7/3/422):

Documentation/* is a gigantic mess, currently organized based on
where random passers-by put things down last.

When other parts of the kernel tree turn out to be organized in
less-than-useful ways, we move things around.  I'm trying to do the same
in Documentation/, with an attempt to be sympathetic toward our readers,
sort things by intended audience, and create (someday) a coherent whole.
I agree that moving docs is a short-term annoyance, but I'm hoping that
it brings a long-term benefit.

> So* I'd suggest having as less markup in those files as possible and if
> it is needed, automate adding the needed markup, as Jon suggested.

Minimal markup is the policy (it's even documented :).  Automating stuff
that can be automated is an area that has definitely not received
enough attention; hopefully some things can be done there in the very
near future.

Thanks,

jon
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [RFC PATCH] dma-mapping: create iommu mapping for newly allocated dma coherent mem

2019-04-23 Thread Laurentiu Tudor
Hello,

> -Original Message-
> From: Christoph Hellwig 
> Sent: Monday, April 22, 2019 9:11 PM
> 
> On Mon, Apr 22, 2019 at 07:51:25PM +0300, laurentiu.tu...@nxp.com wrote:
> > From: Laurentiu Tudor 
> >
> > If possible / available call into the DMA API to get a proper iommu
> > mapping and a dma address for the newly allocated coherent dma memory.
> 
> I don't think this is so simple.  The original use case of
> dma_declare_coherent_memory was memory that is local to a device, where
> we copy in data through a MMIO mapping and the device can then access
> it.  This use case still seems to be alive in the ohci-sm501 and
> ohci-tmio drivers.  Going through the iommu in those cases would be
> counter productive.

I had a feeling that I didn't get the whole story and something isn't quite 
right with this patch. 😊 But I'm happy that we have a discussion started on the 
topic and, I must say, I'm very interested in getting to the bottom of it (I 
have some patches enabling SMMU on a couple of NXP chips and depend on 
resolving this).
I'll try to understand what you're planning but in the meantime please let me 
know if you think I can be of any help.

---
Thanks & Best Regards, Laurentiu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 00/19] Shared virtual address IOMMU and VT-d support

2019-04-23 Thread Jacob Pan
Shared virtual address (SVA), a.k.a, Shared virtual memory (SVM) on Intel
platforms allow address space sharing between device DMA and applications.
SVA can reduce programming complexity and enhance security.
This series is intended to enable SVA virtualization, i.e. shared guest
application address space and physical device DMA address. Only IOMMU portion
of the changes are included in this series. Additional support is needed in
VFIO and QEMU (will be submitted separately) to complete this functionality.

To make incremental changes and reduce the size of each patchset. This series
does not inlcude support for page request services.

In VT-d implementation, PASID table is per device and maintained in the host.
Guest PASID table is shadowed in VMM where virtual IOMMU is emulated.

.-.  .---.
|   vIOMMU|  | Guest process CR3, FL only|
| |  '---'
./
| PASID Entry |--- PASID cache flush -
'-'   |
| |   V
| |CR3 in GPA
'-'
Guest
--| Shadow |--|
  vv  v
Host
.-.  .--.
|   pIOMMU|  | Bind FL for GVA-GPA  |
| |  '--'
./  |
| PASID Entry | V (Nested xlate)
'\.--.
| |   |SL for GPA-HPA, default domain|
| |   '--'
'-'
Where:
 - FL = First level/stage one page tables
 - SL = Second level/stage two page tables


This work is based on collaboration with other developers on the IOMMU
mailing list. Notably,

[1] [PATCH v6 00/22] SMMUv3 Nested Stage Setup by Eric Auger
https://lkml.org/lkml/2019/3/17/124

[2] [RFC PATCH 2/6] drivers core: Add I/O ASID allocator by Jean-Philippe
Brucker
https://www.spinics.net/lists/iommu/msg30639.html

[3] [RFC PATCH 0/5] iommu: APIs for paravirtual PASID allocation by Lu Baolu
https://lkml.org/lkml/2018/11/12/1921

[4] [PATCH v5 00/23] IOMMU and VT-d driver support for Shared Virtual
Address (SVA)
https://lwn.net/Articles/754331/

There are roughly three parts:
1. Generic PASID allocator [1] with extension to support custom allocator
2. IOMMU cache invalidation passdown from guest to host
3. Guest PASID bind for nested translation

All generic IOMMU APIs are reused from [1], which has a v7 just published with
no real impact to the patches used here. It is worth noting that unlike sMMU
nested stage setup, where PASID table is owned by the guest, VT-d PASID table is
owned by the host, individual PASIDs are bound instead of the PASID table.

This series is based on the new VT-d 3.0 Specification 
(https://software.intel.com/sites/default/files/managed/c5/15/vt-directed-io-spec.pdf).
This is different than the older series in [4] which was based on the older
specification that does not have scalable mode.


ChangeLog:
- V2
  - Rebased on Joerg's IOMMU x86/vt-d branch v5.1-rc4
  - Integrated with Eric Auger's new v7 series for common APIs
  (https://github.com/eauger/linux/tree/v5.1-rc3-2stage-v7)
  - Addressed review comments from Andy Shevchenko and Alex Williamson 
on
IOASID custom allocator.
  - Support multiple custom IOASID allocators (vIOMMUs) and dynamic
registration.


Jacob Pan (16):
  driver core: add per device iommu param
  iommu: introduce device fault data
  iommu: introduce device fault report API
  iommu: Introduce attach/detach_pasid_table API
  ioasid: Convert ioasid_idr to XArray
  ioasid: Add custom IOASID allocator
  iommu/vt-d: Add custom allocator for IOASID
  iommu/vt-d: Replace Intel specific PASID allocator with IOASID
  iommu/vt-d: Move domain helper to header
  iommu/vt-d: Add nested translation support
  iommu: Add guest PASID bind function
  iommu/vt-d: Add bind guest PASID support
  iommu/vtd: Clean up for SVM device list
  iommu: Add max num of cache and granu types
  iommu/vt-d: Support flushing more translation cache types
  iommu/vt-d: Add svm/sva invalidate function

Jean-Philippe Brucker (1):
  drivers core: Add I/O ASID allocator

Liu, Yi L (1):
  iommu: Introduce cache_invalidate API

Lu Baolu (1):
  iommu/vt-d: Enlightened PASID allocation

 drivers/base/Kconfig|   6 +
 drivers/base/Makefile   |   1 +
 drivers/base/ioasid.c   | 265 
 drivers/iommu/Kconfig   |   1 +
 drivers/iommu/dmar.c|  48 
 drivers/iommu/intel-iommu.c | 236 ++--
 drivers/iommu/intel-pasid.c | 189 -
 drivers/iommu/intel-pasid.h |  24 +++-
 drivers/iommu/intel-svm.c   | 289 +++-
 drivers/iommu/iommu.c  

[PATCH v2 01/19] driver core: add per device iommu param

2019-04-23 Thread Jacob Pan
DMA faults can be detected by IOMMU at device level. Adding a pointer
to struct device allows IOMMU subsystem to report relevant faults
back to the device driver for further handling.
For direct assigned device (or user space drivers), guest OS holds
responsibility to handle and respond per device IOMMU fault.
Therefore we need fault reporting mechanism to propagate faults beyond
IOMMU subsystem.

There are two other IOMMU data pointers under struct device today, here
we introduce iommu_param as a parent pointer such that all device IOMMU
data can be consolidated here. The idea was suggested here by Greg KH
and Joerg. The name iommu_param is chosen here since iommu_data has been used.

Suggested-by: Greg Kroah-Hartman 
Reviewed-by: Greg Kroah-Hartman 
Signed-off-by: Jacob Pan 
Link: https://lkml.org/lkml/2017/10/6/81
---
 include/linux/device.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/device.h b/include/linux/device.h
index 4e6987e..2cd48a6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -42,6 +42,7 @@ struct iommu_ops;
 struct iommu_group;
 struct iommu_fwspec;
 struct dev_pin_info;
+struct iommu_param;
 
 struct bus_attribute {
struct attributeattr;
@@ -959,6 +960,7 @@ struct dev_links_info {
  * device (i.e. the bus driver that discovered the device).
  * @iommu_group: IOMMU group the device belongs to.
  * @iommu_fwspec: IOMMU-specific properties supplied by firmware.
+ * @iommu_param: Per device generic IOMMU runtime data
  *
  * @offline_disabled: If set, the device is permanently online.
  * @offline:   Set after successful invocation of bus type's .offline().
@@ -1052,6 +1054,7 @@ struct device {
void(*release)(struct device *dev);
struct iommu_group  *iommu_group;
struct iommu_fwspec *iommu_fwspec;
+   struct iommu_param  *iommu_param;
 
booloffline_disabled:1;
booloffline:1;
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 03/19] iommu: introduce device fault report API

2019-04-23 Thread Jacob Pan
Traditionally, device specific faults are detected and handled within
their own device drivers. When IOMMU is enabled, faults such as DMA
related transactions are detected by IOMMU. There is no generic
reporting mechanism to report faults back to the in-kernel device
driver or the guest OS in case of assigned devices.

This patch introduces a registration API for device specific fault
handlers. This differs from the existing iommu_set_fault_handler/
report_iommu_fault infrastructures in several ways:
- it allows to report more sophisticated fault events (both
  unrecoverable faults and page request faults) due to the nature
  of the iommu_fault struct
- it is device specific and not domain specific.

The current iommu_report_device_fault() implementation only handles
the "shoot and forget" unrecoverable fault case. Handling of page
request faults or stalled faults will come later.

Signed-off-by: Jacob Pan 
Signed-off-by: Ashok Raj 
Signed-off-by: Jean-Philippe Brucker 
Signed-off-by: Eric Auger 

---
v6 -> v7:
- use struct iommu_param *param = dev->iommu_param;

v4 -> v5:
- remove stuff related to recoverable faults
---
 drivers/iommu/iommu.c | 135 +-
 include/linux/iommu.h |  36 +-
 2 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f8fe112..75c352c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -648,6 +648,13 @@ int iommu_group_add_device(struct iommu_group *group, 
struct device *dev)
goto err_free_name;
}
 
+   dev->iommu_param = kzalloc(sizeof(*dev->iommu_param), GFP_KERNEL);
+   if (!dev->iommu_param) {
+   ret = -ENOMEM;
+   goto err_free_name;
+   }
+   mutex_init(&dev->iommu_param->lock);
+
kobject_get(group->devices_kobj);
 
dev->iommu_group = group;
@@ -678,6 +685,7 @@ int iommu_group_add_device(struct iommu_group *group, 
struct device *dev)
mutex_unlock(&group->mutex);
dev->iommu_group = NULL;
kobject_put(group->devices_kobj);
+   kfree(dev->iommu_param);
 err_free_name:
kfree(device->name);
 err_remove_link:
@@ -724,7 +732,7 @@ void iommu_group_remove_device(struct device *dev)
sysfs_remove_link(&dev->kobj, "iommu_group");
 
trace_remove_device_from_group(group->id, dev);
-
+   kfree(dev->iommu_param);
kfree(device->name);
kfree(device);
dev->iommu_group = NULL;
@@ -859,6 +867,131 @@ int iommu_group_unregister_notifier(struct iommu_group 
*group,
 EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
 
 /**
+ * iommu_register_device_fault_handler() - Register a device fault handler
+ * @dev: the device
+ * @handler: the fault handler
+ * @data: private data passed as argument to the handler
+ *
+ * When an IOMMU fault event is received, this handler gets called with the
+ * fault event and data as argument.
+ *
+ * Return 0 if the fault handler was installed successfully, or an error.
+ */
+int iommu_register_device_fault_handler(struct device *dev,
+   iommu_dev_fault_handler_t handler,
+   void *data)
+{
+   struct iommu_param *param = dev->iommu_param;
+   int ret = 0;
+
+   /*
+* Device iommu_param should have been allocated when device is
+* added to its iommu_group.
+*/
+   if (!param)
+   return -EINVAL;
+
+   mutex_lock(¶m->lock);
+   /* Only allow one fault handler registered for each device */
+   if (param->fault_param) {
+   ret = -EBUSY;
+   goto done_unlock;
+   }
+
+   get_device(dev);
+   param->fault_param =
+   kzalloc(sizeof(struct iommu_fault_param), GFP_KERNEL);
+   if (!param->fault_param) {
+   put_device(dev);
+   ret = -ENOMEM;
+   goto done_unlock;
+   }
+   mutex_init(¶m->fault_param->lock);
+   param->fault_param->handler = handler;
+   param->fault_param->data = data;
+   INIT_LIST_HEAD(¶m->fault_param->faults);
+
+done_unlock:
+   mutex_unlock(¶m->lock);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_register_device_fault_handler);
+
+/**
+ * iommu_unregister_device_fault_handler() - Unregister the device fault 
handler
+ * @dev: the device
+ *
+ * Remove the device fault handler installed with
+ * iommu_register_device_fault_handler().
+ *
+ * Return 0 on success, or an error.
+ */
+int iommu_unregister_device_fault_handler(struct device *dev)
+{
+   struct iommu_param *param = dev->iommu_param;
+   int ret = 0;
+
+   if (!param)
+   return -EINVAL;
+
+   mutex_lock(¶m->lock);
+
+   if (!param->fault_param)
+   goto unlock;
+
+   /* we cannot unregister handler if there are pending faults */
+   if (!list_empty(¶m->fault_param->faults)) {
+   ret = -EBUSY

[PATCH v2 02/19] iommu: introduce device fault data

2019-04-23 Thread Jacob Pan
Device faults detected by IOMMU can be reported outside the IOMMU
subsystem for further processing. This patch introduces
a generic device fault data structure.

The fault can be either an unrecoverable fault or a page request,
also referred to as a recoverable fault.

We only care about non internal faults that are likely to be reported
to an external subsystem.

Signed-off-by: Jacob Pan 
Signed-off-by: Jean-Philippe Brucker 
Signed-off-by: Liu, Yi L 
Signed-off-by: Ashok Raj 
Signed-off-by: Eric Auger 

---
v4 -> v5:
- simplified struct iommu_fault_event comment
- Moved IOMMU_FAULT_PERM outside of the struct
- Removed IOMMU_FAULT_PERM_INST
- s/IOMMU_FAULT_PAGE_REQUEST_PASID_PRESENT/
  IOMMU_FAULT_PAGE_REQUEST_PASID_VALID

v3 -> v4:
- use a union containing aither an unrecoverable fault or a page
  request message. Move the device private data in the page request
  structure. Reshuffle the fields and use flags.
- move fault perm attributes to the uapi
- remove a bunch of iommu_fault_reason enum values that were related
  to internal errors
---
 include/linux/iommu.h  |  44 +
 include/uapi/linux/iommu.h | 115 +
 2 files changed, 159 insertions(+)
 create mode 100644 include/uapi/linux/iommu.h

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 480921d..810bde2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define IOMMU_READ (1 << 0)
 #define IOMMU_WRITE(1 << 1)
@@ -49,6 +50,7 @@ struct device;
 struct iommu_domain;
 struct notifier_block;
 struct iommu_sva;
+struct iommu_fault_event;
 
 /* iommu fault flags */
 #define IOMMU_FAULT_READ   0x0
@@ -58,6 +60,7 @@ typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
 typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *,
   void *);
+typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *, void *);
 
 struct iommu_domain_geometry {
dma_addr_t aperture_start; /* First address that can be mapped*/
@@ -301,6 +304,46 @@ struct iommu_device {
struct device *dev;
 };
 
+/**
+ * struct iommu_fault_event - Generic fault event
+ *
+ * Can represent recoverable faults such as a page requests or
+ * unrecoverable faults such as DMA or IRQ remapping faults.
+ *
+ * @fault: fault descriptor
+ * @iommu_private: used by the IOMMU driver for storing fault-specific
+ * data. Users should not modify this field before
+ * sending the fault response.
+ */
+struct iommu_fault_event {
+   struct iommu_fault fault;
+   u64 iommu_private;
+};
+
+/**
+ * struct iommu_fault_param - per-device IOMMU fault data
+ * @dev_fault_handler: Callback function to handle IOMMU faults at device level
+ * @data: handler private data
+ *
+ */
+struct iommu_fault_param {
+   iommu_dev_fault_handler_t handler;
+   void *data;
+};
+
+/**
+ * struct iommu_param - collection of per-device IOMMU data
+ *
+ * @fault_param: IOMMU detected device fault reporting data
+ *
+ * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
+ * struct iommu_group  *iommu_group;
+ * struct iommu_fwspec *iommu_fwspec;
+ */
+struct iommu_param {
+   struct iommu_fault_param *fault_param;
+};
+
 int  iommu_device_register(struct iommu_device *iommu);
 void iommu_device_unregister(struct iommu_device *iommu);
 int  iommu_device_sysfs_add(struct iommu_device *iommu,
@@ -500,6 +543,7 @@ struct iommu_ops {};
 struct iommu_group {};
 struct iommu_fwspec {};
 struct iommu_device {};
+struct iommu_fault_param {};
 
 static inline bool iommu_present(struct bus_type *bus)
 {
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
new file mode 100644
index 000..edcc0dd
--- /dev/null
+++ b/include/uapi/linux/iommu.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOMMU user API definitions
+ */
+
+#ifndef _UAPI_IOMMU_H
+#define _UAPI_IOMMU_H
+
+#include 
+
+#define IOMMU_FAULT_PERM_WRITE (1 << 0) /* write */
+#define IOMMU_FAULT_PERM_EXEC  (1 << 1) /* exec */
+#define IOMMU_FAULT_PERM_PRIV  (1 << 2) /* privileged */
+
+/*  Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+   IOMMU_FAULT_DMA_UNRECOV = 1,/* unrecoverable fault */
+   IOMMU_FAULT_PAGE_REQ,   /* page request fault */
+};
+
+enum iommu_fault_reason {
+   IOMMU_FAULT_REASON_UNKNOWN = 0,
+
+   /* Could not access the PASID table (fetch caused external abort) */
+   IOMMU_FAULT_REASON_PASID_FETCH,
+
+   /* pasid entry is invalid or has configuration errors */
+   IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+
+   /*
+* PASID is out of range (e.g. exceeds the maximum PASID
+* supported by the IOMMU) or disabled.
+  

[PATCH v2 04/19] iommu: Introduce attach/detach_pasid_table API

2019-04-23 Thread Jacob Pan
In virtualization use case, when a guest is assigned
a PCI host device, protected by a virtual IOMMU on the guest,
the physical IOMMU must be programmed to be consistent with
the guest mappings. If the physical IOMMU supports two
translation stages it makes sense to program guest mappings
onto the first stage/level (ARM/Intel terminology) while the host
owns the stage/level 2.

In that case, it is mandated to trap on guest configuration
settings and pass those to the physical iommu driver.

This patch adds a new API to the iommu subsystem that allows
to set/unset the pasid table information.

A generic iommu_pasid_table_config struct is introduced in
a new iommu.h uapi header. This is going to be used by the VFIO
user API.

Signed-off-by: Jean-Philippe Brucker 
Signed-off-by: Liu, Yi L 
Signed-off-by: Ashok Raj 
Signed-off-by: Jacob Pan 
Signed-off-by: Eric Auger 
Reviewed-by: Jean-Philippe Brucker 

---

This patch generalizes the API introduced by Jacob & co-authors in
https://lwn.net/Articles/754331/

v4 -> v5:
- no returned valued for dummy definition of iommu_detach_pasid_table
- fix order in comment
- added Jean's R-b

v3 -> v4:
- s/set_pasid_table/attach_pasid_table
- restore detach_pasid_table. Detach can be used on unwind path.
- add padding
- remove @abort
- signature used for config and format
- add comments for fields in the SMMU struct

v2 -> v3:
- replace unbind/bind by set_pasid_table
- move table pointer and pasid bits in the generic part of the struct

v1 -> v2:
- restore the original pasid table name
- remove the struct device * parameter in the API
- reworked iommu_pasid_smmuv3
---
 drivers/iommu/iommu.c  | 19 +++
 include/linux/iommu.h  | 18 ++
 include/uapi/linux/iommu.h | 47 ++
 3 files changed, 84 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 75c352c..2a68786 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1528,6 +1528,25 @@ int iommu_attach_device(struct iommu_domain *domain, 
struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device);
 
+int iommu_attach_pasid_table(struct iommu_domain *domain,
+struct iommu_pasid_table_config *cfg)
+{
+   if (unlikely(!domain->ops->attach_pasid_table))
+   return -ENODEV;
+
+   return domain->ops->attach_pasid_table(domain, cfg);
+}
+EXPORT_SYMBOL_GPL(iommu_attach_pasid_table);
+
+void iommu_detach_pasid_table(struct iommu_domain *domain)
+{
+   if (unlikely(!domain->ops->detach_pasid_table))
+   return;
+
+   domain->ops->detach_pasid_table(domain);
+}
+EXPORT_SYMBOL_GPL(iommu_detach_pasid_table);
+
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a42019a..131cf80 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -227,6 +227,8 @@ struct iommu_sva_ops {
  * @sva_bind: Bind process address space to device
  * @sva_unbind: Unbind process address space from device
  * @sva_get_pasid: Get PASID associated to a SVA handle
+ * @attach_pasid_table: attach a pasid table
+ * @detach_pasid_table: detach the pasid table
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  */
 struct iommu_ops {
@@ -286,6 +288,9 @@ struct iommu_ops {
  void *drvdata);
void (*sva_unbind)(struct iommu_sva *handle);
int (*sva_get_pasid)(struct iommu_sva *handle);
+   int (*attach_pasid_table)(struct iommu_domain *domain,
+ struct iommu_pasid_table_config *cfg);
+   void (*detach_pasid_table)(struct iommu_domain *domain);
 
unsigned long pgsize_bitmap;
 };
@@ -394,6 +399,9 @@ extern int iommu_attach_device(struct iommu_domain *domain,
   struct device *dev);
 extern void iommu_detach_device(struct iommu_domain *domain,
struct device *dev);
+extern int iommu_attach_pasid_table(struct iommu_domain *domain,
+   struct iommu_pasid_table_config *cfg);
+extern void iommu_detach_pasid_table(struct iommu_domain *domain);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
@@ -897,6 +905,13 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct 
device *dev)
return -ENODEV;
 }
 
+static inline
+int iommu_attach_pasid_table(struct iommu_domain *domain,
+struct iommu_pasid_table_config *cfg)
+{
+   return -ENODEV;
+}
+
 static inline struct iommu_sva *
 iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
 {
@@ -918,6 +933,9 @@ static inline int iommu_sva_get_pasid(struct iommu_sva 
*handle)
retu

[PATCH v2 10/19] iommu/vt-d: Add custom allocator for IOASID

2019-04-23 Thread Jacob Pan
When VT-d driver runs in the guest, PASID allocation must be
performed via virtual command interface. This patch register a
custom IOASID allocator which takes precedence over the default
IDR based allocator. The resulting IOASID allocation will always
come from the host. This ensures that PASID namespace is system-
wide.

Signed-off-by: Lu Baolu 
Signed-off-by: Liu, Yi L 
Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel-iommu.c | 58 +
 include/linux/intel-iommu.h |  2 ++
 2 files changed, 60 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index d93c4bd..ec6f22d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1711,6 +1711,8 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
+   ioasid_unregister_allocator(&iommu->pasid_allocator);
+
 #endif
 }
 
@@ -4811,6 +4813,46 @@ static int __init platform_optin_force_iommu(void)
return 1;
 }
 
+static ioasid_t intel_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
+{
+   struct intel_iommu *iommu = data;
+   ioasid_t ioasid;
+
+   /*
+* VT-d virtual command interface always uses the full 20 bit
+* PASID range. Host can partition guest PASID range based on
+* policies but it is out of guest's control.
+*/
+   if (min < PASID_MIN || max > PASID_MAX)
+   return -EINVAL;
+
+   if (vcmd_alloc_pasid(iommu, &ioasid))
+   return INVALID_IOASID;
+
+   return ioasid;
+}
+
+static int intel_ioasid_free(ioasid_t ioasid, void *data)
+{
+   struct iommu_pasid_alloc_info *svm;
+   struct intel_iommu *iommu = data;
+
+   if (!iommu || !cap_caching_mode(iommu->cap))
+   return -EINVAL;
+   /*
+* Sanity check the ioasid owner is done at upper layer, e.g. VFIO
+* We can only free the PASID when all the devices are unbond.
+*/
+   svm = ioasid_find(NULL, ioasid, NULL);
+   if (!svm) {
+   pr_warn("Freeing unbond IOASID %d\n", ioasid);
+   return -EBUSY;
+   }
+   vcmd_free_pasid(iommu, ioasid);
+
+   return 0;
+}
+
 int __init intel_iommu_init(void)
 {
int ret = -ENODEV;
@@ -4912,6 +4954,22 @@ int __init intel_iommu_init(void)
   "%s", iommu->name);
iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
iommu_device_register(&iommu->iommu);
+   if (cap_caching_mode(iommu->cap) && sm_supported(iommu)) {
+   /*
+* Register a custom ASID allocator if we are running
+* in a guest, the purpose is to have a system wide 
PASID
+* namespace among all PASID users.
+* There can be multiple vIOMMUs in each guest but only
+* one allocator is active. All vIOMMU allocators will
+* eventually be calling the same host allocator.
+*/
+   iommu->pasid_allocator.alloc = intel_ioasid_alloc;
+   iommu->pasid_allocator.free = intel_ioasid_free;
+   iommu->pasid_allocator.pdata = (void *)iommu;
+   ret = 
ioasid_register_allocator(&iommu->pasid_allocator);
+   if (ret)
+   pr_warn("Custom PASID allocator registeration 
failed\n");
+   }
}
 
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index bff907b..c24c8aa 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -549,6 +550,7 @@ struct intel_iommu {
 #ifdef CONFIG_INTEL_IOMMU_SVM
struct page_req_dsc *prq;
unsigned char prq_name[16];/* Name for PRQ interrupt */
+   struct ioasid_allocator pasid_allocator; /* Custom allocator for PASIDs 
*/
 #endif
struct q_inval  *qi;/* Queued invalidation info */
u32 *iommu_state; /* Store iommu states between suspend and resume.*/
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 07/19] ioasid: Convert ioasid_idr to XArray

2019-04-23 Thread Jacob Pan
IDR is to be replaced by XArray, keep up with the changes.
XArray has internal locking for normal APIs used here, also removed
radix tree related preload.

Suggested-by: Ira Weiny 
Signed-off-by: Jacob Pan 
---
 drivers/base/ioasid.c | 29 -
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/base/ioasid.c b/drivers/base/ioasid.c
index cf122b2..c4012aa 100644
--- a/drivers/base/ioasid.c
+++ b/drivers/base/ioasid.c
@@ -4,7 +4,7 @@
  * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
  * free IOASIDs with ioasid_alloc and ioasid_free.
  */
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -16,13 +16,12 @@ struct ioasid_data {
struct rcu_head rcu;
 };
 
-static DEFINE_IDR(ioasid_idr);
-
+static DEFINE_XARRAY_ALLOC(ioasid_xa);
 /**
  * ioasid_alloc - Allocate an IOASID
  * @set: the IOASID set
  * @min: the minimum ID (inclusive)
- * @max: the maximum ID (exclusive)
+ * @max: the maximum ID (inclusive)
  * @private: data private to the caller
  *
  * Allocate an ID between @min and @max (or %0 and %INT_MAX). Return the
@@ -41,13 +40,13 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, 
ioasid_t max,
 
data->set = set;
data->private = private;
+   if (xa_alloc(&ioasid_xa, &id, data, XA_LIMIT(min, max), GFP_KERNEL)) {
+   pr_err("Failed to alloc ioasid from %d to %d\n", min, max);
+   goto exit_free;
+   }
 
-   idr_preload(GFP_KERNEL);
-   idr_lock(&ioasid_idr);
-   data->id = id = idr_alloc(&ioasid_idr, data, min, max, GFP_ATOMIC);
-   idr_unlock(&ioasid_idr);
-   idr_preload_end();
-
+   data->id = id;
+exit_free:
if (id < 0) {
kfree(data);
return INVALID_IOASID;
@@ -64,12 +63,8 @@ void ioasid_free(ioasid_t ioasid)
 {
struct ioasid_data *ioasid_data;
 
-   idr_lock(&ioasid_idr);
-   ioasid_data = idr_remove(&ioasid_idr, ioasid);
-   idr_unlock(&ioasid_idr);
-
-   if (ioasid_data)
-   kfree_rcu(ioasid_data, rcu);
+   ioasid_data = xa_erase(&ioasid_xa, ioasid);
+   kfree_rcu(ioasid_data, rcu);
 }
 EXPORT_SYMBOL_GPL(ioasid_free);
 
@@ -93,7 +88,7 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
struct ioasid_data *ioasid_data;
 
rcu_read_lock();
-   ioasid_data = idr_find(&ioasid_idr, ioasid);
+   ioasid_data = xa_load(&ioasid_xa, ioasid);
if (ioasid_data && ioasid_data->set == set) {
priv = ioasid_data->private;
if (getter && !getter(priv))
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 06/19] drivers core: Add I/O ASID allocator

2019-04-23 Thread Jacob Pan
From: Jean-Philippe Brucker 

Some devices might support multiple DMA address spaces, in particular
those that have the PCI PASID feature. PASID (Process Address Space ID)
allows to share process address spaces with devices (SVA), partition a
device into VM-assignable entities (VFIO mdev) or simply provide
multiple DMA address space to kernel drivers. Add a global PASID
allocator usable by different drivers at the same time. Name it I/O ASID
to avoid confusion with ASIDs allocated by arch code, which are usually
a separate ID space.

The IOASID space is global. Each device can have its own PASID space,
but by convention the IOMMU ended up having a global PASID space, so
that with SVA, each mm_struct is associated to a single PASID.

The allocator doesn't really belong in drivers/iommu because some
drivers would like to allocate PASIDs for devices that aren't managed by
an IOMMU, using the same ID space as IOMMU. It doesn't really belong in
drivers/pci either since platform device also support PASID. Add the
allocator in drivers/base.

Signed-off-by: Jean-Philippe Brucker 
---
 drivers/base/Kconfig   |   6 +++
 drivers/base/Makefile  |   1 +
 drivers/base/ioasid.c  | 106 +
 include/linux/ioasid.h |  40 +++
 4 files changed, 153 insertions(+)
 create mode 100644 drivers/base/ioasid.c
 create mode 100644 include/linux/ioasid.h

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 059700e..47c1348 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -182,6 +182,12 @@ config DMA_SHARED_BUFFER
  APIs extension; the file's descriptor can then be passed on to other
  driver.
 
+config IOASID
+   bool
+   help
+ Enable the I/O Address Space ID allocator. A single ID space shared
+ between different users.
+
 config DMA_FENCE_TRACE
bool "Enable verbose DMA_FENCE_TRACE messages"
depends on DMA_SHARED_BUFFER
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 1574520..aafa2ac 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_PINCTRL) += pinctrl.o
 obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
 obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o
 obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o
+obj-$(CONFIG_IOASID) += ioasid.o
 
 obj-y  += test/
 
diff --git a/drivers/base/ioasid.c b/drivers/base/ioasid.c
new file mode 100644
index 000..cf122b2
--- /dev/null
+++ b/drivers/base/ioasid.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I/O Address Space ID allocator. There is one global IOASID space, split into
+ * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
+ * free IOASIDs with ioasid_alloc and ioasid_free.
+ */
+#include 
+#include 
+#include 
+#include 
+
+struct ioasid_data {
+   ioasid_t id;
+   struct ioasid_set *set;
+   void *private;
+   struct rcu_head rcu;
+};
+
+static DEFINE_IDR(ioasid_idr);
+
+/**
+ * ioasid_alloc - Allocate an IOASID
+ * @set: the IOASID set
+ * @min: the minimum ID (inclusive)
+ * @max: the maximum ID (exclusive)
+ * @private: data private to the caller
+ *
+ * Allocate an ID between @min and @max (or %0 and %INT_MAX). Return the
+ * allocated ID on success, or INVALID_IOASID on failure. The @private pointer
+ * is stored internally and can be retrieved with ioasid_find().
+ */
+ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
+ void *private)
+{
+   int id = -1;
+   struct ioasid_data *data;
+
+   data = kzalloc(sizeof(*data), GFP_KERNEL);
+   if (!data)
+   return INVALID_IOASID;
+
+   data->set = set;
+   data->private = private;
+
+   idr_preload(GFP_KERNEL);
+   idr_lock(&ioasid_idr);
+   data->id = id = idr_alloc(&ioasid_idr, data, min, max, GFP_ATOMIC);
+   idr_unlock(&ioasid_idr);
+   idr_preload_end();
+
+   if (id < 0) {
+   kfree(data);
+   return INVALID_IOASID;
+   }
+   return id;
+}
+EXPORT_SYMBOL_GPL(ioasid_alloc);
+
+/**
+ * ioasid_free - Free an IOASID
+ * @ioasid: the ID to remove
+ */
+void ioasid_free(ioasid_t ioasid)
+{
+   struct ioasid_data *ioasid_data;
+
+   idr_lock(&ioasid_idr);
+   ioasid_data = idr_remove(&ioasid_idr, ioasid);
+   idr_unlock(&ioasid_idr);
+
+   if (ioasid_data)
+   kfree_rcu(ioasid_data, rcu);
+}
+EXPORT_SYMBOL_GPL(ioasid_free);
+
+/**
+ * ioasid_find - Find IOASID data
+ * @set: the IOASID set
+ * @ioasid: the IOASID to find
+ * @getter: function to call on the found object
+ *
+ * The optional getter function allows to take a reference to the found object
+ * under the rcu lock. The function can also check if the object is still 
valid:
+ * if @getter returns false, then the object is invalid and NULL is returned.
+ *
+ * If the IOASID has been allocated for this set, return the private pointe

[PATCH v2 05/19] iommu: Introduce cache_invalidate API

2019-04-23 Thread Jacob Pan
From: "Liu, Yi L" 

In any virtualization use case, when the first translation stage
is "owned" by the guest OS, the host IOMMU driver has no knowledge
of caching structure updates unless the guest invalidation activities
are trapped by the virtualizer and passed down to the host.

Since the invalidation data are obtained from user space and will be
written into physical IOMMU, we must allow security check at various
layers. Therefore, generic invalidation data format are proposed here,
model specific IOMMU drivers need to convert them into their own format.

Signed-off-by: Liu, Yi L 
Signed-off-by: Jean-Philippe Brucker 
Signed-off-by: Jacob Pan 
Signed-off-by: Ashok Raj 
Signed-off-by: Eric Auger 

---
v6 -> v7:
- detail which fields are used for each invalidation type
- add a comment about multiple cache invalidation

v5 -> v6:
- fix merge issue

v3 -> v4:
- full reshape of the API following Alex' comments

v1 -> v2:
- add arch_id field
- renamed tlb_invalidate into cache_invalidate as this API allows
  to invalidate context caches on top of IOTLBs

v1:
renamed sva_invalidate into tlb_invalidate and add iommu_ prefix in
header. Commit message reworded.
---
 drivers/iommu/iommu.c  | 14 +
 include/linux/iommu.h  | 15 +
 include/uapi/linux/iommu.h | 78 ++
 3 files changed, 107 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2a68786..498c28a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1547,6 +1547,20 @@ void iommu_detach_pasid_table(struct iommu_domain 
*domain)
 }
 EXPORT_SYMBOL_GPL(iommu_detach_pasid_table);
 
+int iommu_cache_invalidate(struct iommu_domain *domain, struct device *dev,
+  struct iommu_cache_invalidate_info *inv_info)
+{
+   int ret = 0;
+
+   if (unlikely(!domain->ops->cache_invalidate))
+   return -ENODEV;
+
+   ret = domain->ops->cache_invalidate(domain, dev, inv_info);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
+
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 131cf80..4b92e4b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -229,6 +229,7 @@ struct iommu_sva_ops {
  * @sva_get_pasid: Get PASID associated to a SVA handle
  * @attach_pasid_table: attach a pasid table
  * @detach_pasid_table: detach the pasid table
+ * @cache_invalidate: invalidate translation caches
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  */
 struct iommu_ops {
@@ -292,6 +293,9 @@ struct iommu_ops {
  struct iommu_pasid_table_config *cfg);
void (*detach_pasid_table)(struct iommu_domain *domain);
 
+   int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
+   struct iommu_cache_invalidate_info *inv_info);
+
unsigned long pgsize_bitmap;
 };
 
@@ -402,6 +406,9 @@ extern void iommu_detach_device(struct iommu_domain *domain,
 extern int iommu_attach_pasid_table(struct iommu_domain *domain,
struct iommu_pasid_table_config *cfg);
 extern void iommu_detach_pasid_table(struct iommu_domain *domain);
+extern int iommu_cache_invalidate(struct iommu_domain *domain,
+ struct device *dev,
+ struct iommu_cache_invalidate_info *inv_info);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
@@ -936,6 +943,14 @@ static inline int iommu_sva_get_pasid(struct iommu_sva 
*handle)
 static inline
 void iommu_detach_pasid_table(struct iommu_domain *domain) {}
 
+static inline int
+iommu_cache_invalidate(struct iommu_domain *domain,
+  struct device *dev,
+  struct iommu_cache_invalidate_info *inv_info)
+{
+   return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 532a640..61a3fb7 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -159,4 +159,82 @@ struct iommu_pasid_table_config {
};
 };
 
+/* defines the granularity of the invalidation */
+enum iommu_inv_granularity {
+   IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */
+   IOMMU_INV_GRANU_PASID,  /* pasid-selective invalidation */
+   IOMMU_INV_GRANU_ADDR,   /* page-selective invalidation */
+};
+
+/**
+ * Address Selective Invalidation Structure
+ *
+ * @flags indicates the granularity of the address-selective invalidation
+ * - if PASID bit is set, @pasid field is populated and the invalidation
+ *   relates to cache entries tagged with this PASID and matching the
+

[PATCH v2 12/19] iommu/vt-d: Move domain helper to header

2019-04-23 Thread Jacob Pan
Move domainer helper to header to be used by SVA code.

Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel-iommu.c | 6 --
 include/linux/intel-iommu.h | 6 ++
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 785330a..77bbe1b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -427,12 +427,6 @@ static void init_translation_status(struct intel_iommu 
*iommu)
iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
 }
 
-/* Convert generic 'struct iommu_domain to private struct dmar_domain */
-static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
-{
-   return container_of(dom, struct dmar_domain, domain);
-}
-
 static int __init intel_iommu_setup(char *str)
 {
if (!str)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index c24c8aa..48fa164 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -597,6 +597,12 @@ static inline void __iommu_flush_cache(
clflush_cache_range(addr, size);
 }
 
+/* Convert generic 'struct iommu_domain to private struct dmar_domain */
+static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
+{
+   return container_of(dom, struct dmar_domain, domain);
+}
+
 /*
  * 0: readable
  * 1: writable
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 11/19] iommu/vt-d: Replace Intel specific PASID allocator with IOASID

2019-04-23 Thread Jacob Pan
Make use of generic IOASID code to manage PASID allocation,
free, and lookup.

Signed-off-by: Jacob Pan 
---
 drivers/iommu/Kconfig   |  1 +
 drivers/iommu/intel-iommu.c |  9 -
 drivers/iommu/intel-pasid.c | 36 
 drivers/iommu/intel-svm.c   | 41 -
 4 files changed, 29 insertions(+), 58 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6f07f3b..7f92009 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -204,6 +204,7 @@ config INTEL_IOMMU_SVM
bool "Support for Shared Virtual Memory with Intel IOMMU"
depends on INTEL_IOMMU && X86
select PCI_PASID
+   select IOASID
select MMU_NOTIFIER
help
  Shared Virtual Memory (SVM) provides a facility for devices
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ec6f22d..785330a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5153,7 +5153,7 @@ static void auxiliary_unlink_device(struct dmar_domain 
*domain,
domain->auxd_refcnt--;
 
if (!domain->auxd_refcnt && domain->default_pasid > 0)
-   intel_pasid_free_id(domain->default_pasid);
+   ioasid_free(domain->default_pasid);
 }
 
 static int aux_domain_add_dev(struct dmar_domain *domain,
@@ -5171,9 +5171,8 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
if (domain->default_pasid <= 0) {
int pasid;
 
-   pasid = intel_pasid_alloc_id(domain, PASID_MIN,
-pci_max_pasids(to_pci_dev(dev)),
-GFP_KERNEL);
+   pasid = ioasid_alloc(NULL, PASID_MIN, 
pci_max_pasids(to_pci_dev(dev)) - 1,
+   domain);
if (pasid <= 0) {
pr_err("Can't allocate default pasid\n");
return -ENODEV;
@@ -5210,7 +5209,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
spin_unlock(&iommu->lock);
spin_unlock_irqrestore(&device_domain_lock, flags);
if (!domain->auxd_refcnt && domain->default_pasid > 0)
-   intel_pasid_free_id(domain->default_pasid);
+   ioasid_free(domain->default_pasid);
 
return ret;
 }
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index 5b1d3be..d339e8f 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -26,42 +26,6 @@
  */
 static DEFINE_SPINLOCK(pasid_lock);
 u32 intel_pasid_max_id = PASID_MAX;
-static DEFINE_IDR(pasid_idr);
-
-int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp)
-{
-   int ret, min, max;
-
-   min = max_t(int, start, PASID_MIN);
-   max = min_t(int, end, intel_pasid_max_id);
-
-   WARN_ON(in_interrupt());
-   idr_preload(gfp);
-   spin_lock(&pasid_lock);
-   ret = idr_alloc(&pasid_idr, ptr, min, max, GFP_ATOMIC);
-   spin_unlock(&pasid_lock);
-   idr_preload_end();
-
-   return ret;
-}
-
-void intel_pasid_free_id(int pasid)
-{
-   spin_lock(&pasid_lock);
-   idr_remove(&pasid_idr, pasid);
-   spin_unlock(&pasid_lock);
-}
-
-void *intel_pasid_lookup_id(int pasid)
-{
-   void *p;
-
-   spin_lock(&pasid_lock);
-   p = idr_find(&pasid_idr, pasid);
-   spin_unlock(&pasid_lock);
-
-   return p;
-}
 
 int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid)
 {
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 8f87304..8fff212 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "intel-pasid.h"
@@ -211,7 +212,9 @@ static void intel_mm_release(struct mmu_notifier *mn, 
struct mm_struct *mm)
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list) {
intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
-   intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+   /* for emulated iommu, PASID cache invalidation implies 
IOTLB/DTLB */
+   if (!cap_caching_mode(svm->iommu->cap))
+   intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, 
!svm->mm);
}
rcu_read_unlock();
 
@@ -332,16 +335,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
if (pasid_max > intel_pasid_max_id)
pasid_max = intel_pasid_max_id;
 
-   /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
-   ret = intel_pasid_alloc_id(svm,
-  !!cap_caching_mode(iommu->cap),
-  pasid_max - 1, GFP_KERNEL);
-   if (ret < 0) {
+   /* Do not use PASID 0, reserved for RID to PASID */
+   svm->pasid = io

[PATCH v2 08/19] ioasid: Add custom IOASID allocator

2019-04-23 Thread Jacob Pan
Sometimes, IOASID allocation must be handled by platform specific
code. The use cases are guest vIOMMU and pvIOMMU where IOASIDs need
to be allocated by the host via enlightened or paravirt interfaces.

This patch adds an extension to the IOASID allocator APIs such that
platform drivers can register a custom allocator, possibly at boot
time, to take over the allocation. Xarray is still used for tracking
and searching purposes internal to the IOASID code. Private data of
an IOASID can also be set after the allocation.

There can be multiple custom allocators registered but only one is
used at a time. In case of hot removal of devices that provides the
allocator, all IOASIDs must be freed prior to unregistering the
allocator. Default XArray based allocator cannot be mixed with
custom allocators, i.e. custom allocators will not be used if there
are outstanding IOASIDs allocated by the default XA allocator.

Signed-off-by: Jacob Pan 
---
 drivers/base/ioasid.c  | 182 ++---
 include/linux/ioasid.h |  15 +++-
 2 files changed, 187 insertions(+), 10 deletions(-)

diff --git a/drivers/base/ioasid.c b/drivers/base/ioasid.c
index c4012aa..5cb36a4 100644
--- a/drivers/base/ioasid.c
+++ b/drivers/base/ioasid.c
@@ -17,6 +17,120 @@ struct ioasid_data {
 };
 
 static DEFINE_XARRAY_ALLOC(ioasid_xa);
+static DEFINE_MUTEX(ioasid_allocator_lock);
+static struct ioasid_allocator *ioasid_allocator;
+
+static LIST_HEAD(custom_allocators);
+/*
+ * A flag to track if ioasid default allocator already been used, this will
+ * prevent custom allocator from being used. The reason is that custom 
allocator
+ * must have unadulterated space to track private data with xarray, there 
cannot
+ * be a mix been default and custom allocated IOASIDs.
+ */
+static int default_allocator_used;
+
+/**
+ * ioasid_register_allocator - register a custom allocator
+ * @allocator: the custom allocator to be registered
+ *
+ * Custom allocator take precedence over the default xarray based allocator.
+ * Private data associated with the ASID are managed by ASID common code
+ * similar to data stored in xa.
+ *
+ * There can be multiple allocators registered but only one is active. In case
+ * of runtime removal of an custom allocator, the next one is activated based
+ * on the registration ordering.
+ */
+int ioasid_register_allocator(struct ioasid_allocator *allocator)
+{
+   struct ioasid_allocator *pallocator;
+   int ret = 0;
+
+   if (!allocator)
+   return -EINVAL;
+
+   mutex_lock(&ioasid_allocator_lock);
+   if (list_empty(&custom_allocators))
+   ioasid_allocator = allocator;
+   else {
+   /* Check if the allocator is already registered */
+   list_for_each_entry(pallocator, &custom_allocators, list) {
+   if (pallocator == allocator) {
+   pr_err("IOASID allocator already exist\n");
+   ret = -EEXIST;
+   goto out_unlock;
+   }
+   }
+   }
+   list_add_tail(&allocator->list, &custom_allocators);
+
+out_unlock:
+   mutex_unlock(&ioasid_allocator_lock);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(ioasid_register_allocator);
+
+/**
+ * ioasid_unregister_allocator - Remove a custom IOASID allocator
+ * @allocator: the custom allocator to be removed
+ *
+ * Remove an allocator from the list, activate the next allocator in
+ * the order it was  registration.
+ */
+void ioasid_unregister_allocator(struct ioasid_allocator *allocator)
+{
+   if (!allocator)
+   return;
+
+   if (list_empty(&custom_allocators)) {
+   pr_warn("No custom IOASID allocators active!\n");
+   return;
+   }
+
+   mutex_lock(&ioasid_allocator_lock);
+   list_del(&allocator->list);
+   if (list_empty(&custom_allocators)) {
+   pr_info("No custom IOASID allocators\n");
+   /*
+* All IOASIDs should have been freed before the last allocator
+* is unregistered.
+*/
+   BUG_ON(!xa_empty(&ioasid_xa));
+   ioasid_allocator = NULL;
+   } else if (allocator == ioasid_allocator) {
+   ioasid_allocator = list_entry(&custom_allocators, struct 
ioasid_allocator, list);
+   pr_info("IOASID allocator changed");
+   }
+   mutex_unlock(&ioasid_allocator_lock);
+}
+EXPORT_SYMBOL_GPL(ioasid_unregister_allocator);
+
+/**
+ * ioasid_set_data - Set private data for an allocated ioasid
+ * @ioasid: the ID to set data
+ * @data:   the private data
+ *
+ * For IOASID that is already allocated, private data can be set
+ * via this API. Future lookup can be done via ioasid_find.
+ */
+int ioasid_set_data(ioasid_t ioasid, void *data)
+{
+   struct ioasid_data *ioasid_data;
+   int ret = 0;
+
+   ioasid_data = xa_load(&ioasid_xa, ioasid);
+   if

[PATCH v2 09/19] iommu/vt-d: Enlightened PASID allocation

2019-04-23 Thread Jacob Pan
From: Lu Baolu 

If Intel IOMMU runs in caching mode, a.k.a. virtual IOMMU, the
IOMMU driver should rely on the emulation software to allocate
and free PASID IDs. The Intel vt-d spec revision 3.0 defines a
register set to support this. This includes a capability register,
a virtual command register and a virtual response register. Refer
to section 10.4.42, 10.4.43, 10.4.44 for more information.

This patch adds the enlightened PASID allocation/free interfaces
via the virtual command register.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Signed-off-by: Liu Yi L 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/intel-pasid.c | 70 +
 drivers/iommu/intel-pasid.h | 13 -
 include/linux/intel-iommu.h |  2 ++
 3 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index 03b12d2..5b1d3be 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -63,6 +63,76 @@ void *intel_pasid_lookup_id(int pasid)
return p;
 }
 
+int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid)
+{
+   u64 res;
+   u64 cap;
+   u8 err_code;
+   unsigned long flags;
+   int ret = 0;
+
+   if (!ecap_vcs(iommu->ecap)) {
+   pr_warn("IOMMU: %s: Hardware doesn't support virtual command\n",
+   iommu->name);
+   return -ENODEV;
+   }
+
+   cap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
+   if (!(cap & DMA_VCS_PAS)) {
+   pr_warn("IOMMU: %s: Emulation software doesn't support PASID 
allocation\n",
+   iommu->name);
+   return -ENODEV;
+   }
+
+   raw_spin_lock_irqsave(&iommu->register_lock, flags);
+   dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC);
+   IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+   raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+   err_code = VCMD_VRSP_EC(res);
+   switch (err_code) {
+   case VCMD_VRSP_EC_SUCCESS:
+   *pasid = VCMD_VRSP_RESULE(res);
+   break;
+   case VCMD_VRSP_EC_UNAVAIL:
+   pr_info("IOMMU: %s: No PASID available\n", iommu->name);
+   ret = -ENOMEM;
+   break;
+   default:
+   ret = -ENODEV;
+   pr_warn("IOMMU: %s: Unkonwn error code %d\n",
+   iommu->name, err_code);
+   }
+
+   return ret;
+}
+
+void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid)
+{
+   u64 res;
+   u8 err_code;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(&iommu->register_lock, flags);
+   dmar_writeq(iommu->reg + DMAR_VCMD_REG, (pasid << 8) | VCMD_CMD_FREE);
+   IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+   raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+   err_code = VCMD_VRSP_EC(res);
+   switch (err_code) {
+   case VCMD_VRSP_EC_SUCCESS:
+   break;
+   case VCMD_VRSP_EC_INVAL:
+   pr_info("IOMMU: %s: Invalid PASID\n", iommu->name);
+   break;
+   default:
+   pr_warn("IOMMU: %s: Unkonwn error code %d\n",
+   iommu->name, err_code);
+   }
+}
+
 /*
  * Per device pasid table management:
  */
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 23537b3..0999dfe 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -19,6 +19,16 @@
 #define PASID_PDE_SHIFT6
 #define MAX_NR_PASID_BITS  20
 
+/* Virtual command interface for enlightened pasid management. */
+#define VCMD_CMD_ALLOC 0x1
+#define VCMD_CMD_FREE  0x2
+#define VCMD_VRSP_IP   0x1
+#define VCMD_VRSP_EC(e)(((e) >> 1) & 0x3)
+#define VCMD_VRSP_EC_SUCCESS   0
+#define VCMD_VRSP_EC_UNAVAIL   1
+#define VCMD_VRSP_EC_INVAL 1
+#define VCMD_VRSP_RESULE(e)(((e) >> 8) & 0xf)
+
 /*
  * Domain ID reserved for pasid entries programmed for first-level
  * only and pass-through transfer modes.
@@ -69,5 +79,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
   struct device *dev, int pasid);
 void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
 struct device *dev, int pasid);
-
+int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid);
+void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid);
 #endif /* __INTEL_PASID_H */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6925a18..bff907b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -173,6 +173,7 @@
 #define ecap_smpwc(e)  (((e) >> 48) & 0x1)
 #define ecap_flts(e)   

[PATCH v2 14/19] iommu: Add guest PASID bind function

2019-04-23 Thread Jacob Pan
Guest shared virtual address (SVA) may require host to shadow guest
PASID tables. Guest PASID can also be allocated from the host via
enlightened interfaces. In this case, guest needs to bind the guest
mm, i.e. cr3 in guest phisical address to the actual PASID table in
the host IOMMU. Nesting will be turned on such that guest virtual
address can go through a two level translation:
- 1st level translates GVA to GPA
- 2nd level translates GPA to HPA
This patch introduces APIs to bind guest PASID data to the assigned
device entry in the physical IOMMU. See the diagram below for usage
explaination.

.-.  .---.
|   vIOMMU|  | Guest process mm, FL only |
| |  '---'
./
| PASID Entry |--- PASID cache flush -
'-'   |
| |   V
| |
'-'
Guest
--| Shadow |--|
  vv  v
Host
.-.  .--.
|   pIOMMU|  | Bind FL for GVA-GPA  |
| |  '--'
./  |
| PASID Entry | V (Nested xlate)
'\.-.
| |   |Set SL to GPA-HPA|
| |   '-'
'-'

Where:
 - FL = First level/stage one page tables
 - SL = Second level/stage two page tables

Signed-off-by: Jacob Pan 
Signed-off-by: Liu Yi L 
---
 drivers/iommu/iommu.c  | 20 
 include/linux/iommu.h  | 10 ++
 include/uapi/linux/iommu.h | 15 ++-
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 498c28a..072f8f3 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1561,6 +1561,26 @@ int iommu_cache_invalidate(struct iommu_domain *domain, 
struct device *dev,
 }
 EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
 
+int iommu_sva_bind_gpasid(struct iommu_domain *domain,
+   struct device *dev, struct gpasid_bind_data *data)
+{
+   if (unlikely(!domain->ops->sva_bind_gpasid))
+   return -ENODEV;
+
+   return domain->ops->sva_bind_gpasid(domain, dev, data);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_bind_gpasid);
+
+int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
+   int pasid)
+{
+   if (unlikely(!domain->ops->sva_unbind_gpasid))
+   return -ENODEV;
+
+   return domain->ops->sva_unbind_gpasid(dev, pasid);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
+
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 4b92e4b..611388e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -231,6 +231,8 @@ struct iommu_sva_ops {
  * @detach_pasid_table: detach the pasid table
  * @cache_invalidate: invalidate translation caches
  * @pgsize_bitmap: bitmap of all possible supported page sizes
+ * @sva_bind_gpasid: bind guest pasid and mm
+ * @sva_unbind_gpasid: unbind guest pasid and mm
  */
 struct iommu_ops {
bool (*capable)(enum iommu_cap);
@@ -295,6 +297,10 @@ struct iommu_ops {
 
int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
struct iommu_cache_invalidate_info *inv_info);
+   int (*sva_bind_gpasid)(struct iommu_domain *domain,
+   struct device *dev, struct gpasid_bind_data *data);
+
+   int (*sva_unbind_gpasid)(struct device *dev, int pasid);
 
unsigned long pgsize_bitmap;
 };
@@ -409,6 +415,10 @@ extern void iommu_detach_pasid_table(struct iommu_domain 
*domain);
 extern int iommu_cache_invalidate(struct iommu_domain *domain,
  struct device *dev,
  struct iommu_cache_invalidate_info *inv_info);
+extern int iommu_sva_bind_gpasid(struct iommu_domain *domain,
+   struct device *dev, struct gpasid_bind_data *data);
+extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
+   struct device *dev, int pasid);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
 extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 61a3fb7..5c95905 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -235,6 +235,19 @@ struct iommu_cache_invalidate_info {
struct iommu_inv_addr_info addr_info;
};
 };
-
+/**
+ * struct gpasid_bind_data - Information about device and guest PASID binding
+ * @gcr3:  Guest CR3 value from guest mm
+ * @pasid:   

[PATCH v2 15/19] iommu/vt-d: Add bind guest PASID support

2019-04-23 Thread Jacob Pan
When supporting guest SVA with emulated IOMMU, the guest PASID
table is shadowed in VMM. Updates to guest vIOMMU PASID table
will result in PASID cache flush which will be passed down to
the host as bind guest PASID calls.

For the SL page tables, it will be harvested from device's
default domain (request w/o PASID), or aux domain in case of
mediated device.

.-.  .---.
|   vIOMMU|  | Guest process CR3, FL only|
| |  '---'
./
| PASID Entry |--- PASID cache flush -
'-'   |
| |   V
| |CR3 in GPA
'-'
Guest
--| Shadow |--|
  vv  v
Host
.-.  .--.
|   pIOMMU|  | Bind FL for GVA-GPA  |
| |  '--'
./  |
| PASID Entry | V (Nested xlate)
'\.--.
| |   |SL for GPA-HPA, default domain|
| |   '--'
'-'
Where:
 - FL = First level/stage one page tables
 - SL = Second level/stage two page tables

Signed-off-by: Jacob Pan 
Signed-off-by: Liu, Yi L 
---
 drivers/iommu/intel-iommu.c |   4 +
 drivers/iommu/intel-svm.c   | 174 
 include/linux/intel-iommu.h |  10 ++-
 include/linux/intel-svm.h   |   7 ++
 4 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 77bbe1b..89989b5 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5768,6 +5768,10 @@ const struct iommu_ops intel_iommu_ops = {
.dev_enable_feat= intel_iommu_dev_enable_feat,
.dev_disable_feat   = intel_iommu_dev_disable_feat,
.pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
+#ifdef CONFIG_INTEL_IOMMU_SVM
+   .sva_bind_gpasid= intel_svm_bind_gpasid,
+   .sva_unbind_gpasid  = intel_svm_unbind_gpasid,
+#endif
 };
 
 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 8fff212..0a973c2 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -227,6 +227,180 @@ static const struct mmu_notifier_ops intel_mmuops = {
 
 static DEFINE_MUTEX(pasid_mutex);
 static LIST_HEAD(global_svm_list);
+#define for_each_svm_dev() \
+   list_for_each_entry(sdev, &svm->devs, list) \
+   if (dev == sdev->dev)   \
+
+int intel_svm_bind_gpasid(struct iommu_domain *domain,
+   struct device *dev,
+   struct gpasid_bind_data *data)
+{
+   struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+   struct intel_svm_dev *sdev;
+   struct intel_svm *svm = NULL;
+   struct dmar_domain *ddomain;
+   int pasid_max;
+   int ret = 0;
+
+   if (WARN_ON(!iommu) || !data)
+   return -EINVAL;
+
+   if (dev_is_pci(dev)) {
+   pasid_max = pci_max_pasids(to_pci_dev(dev));
+   if (pasid_max < 0)
+   return -EINVAL;
+   } else
+   pasid_max = 1 << 20;
+
+   if (data->pasid <= 0 || data->pasid >= pasid_max)
+   return -EINVAL;
+
+   ddomain = to_dmar_domain(domain);
+   /* REVISIT:
+* Sanity check adddress width and paging mode support
+* width matching in two dimensions:
+* 1. paging mode CPU <= IOMMU
+* 2. address width Guest <= Host.
+*/
+   mutex_lock(&pasid_mutex);
+   svm = ioasid_find(NULL, data->pasid, NULL);
+   if (IS_ERR(svm)) {
+   ret = PTR_ERR(svm);
+   goto out;
+   }
+   if (svm) {
+   if (list_empty(&svm->devs)) {
+   dev_err(dev, "GPASID %d has no devices bond but SVA is 
allocated\n",
+   data->pasid);
+   ret = -ENODEV; /*
+   * If we found svm for the PASID, there 
must be at
+   * least one device bond, otherwise svm 
should be freed.
+   */
+   goto out;
+   }
+   for_each_svm_dev() {
+   /* In case of multiple sub-devices of the same pdev 
assigned, we should
+* allow multiple bind calls with the same PASID and 
pdev.
+*/
+   sdev->users++;
+   goto out;
+   }
+   } else {
+   /* We come here when PASID has never been bond to a device. */
+   svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+   if (!svm) {
+   

[PATCH v2 19/19] iommu/vt-d: Add svm/sva invalidate function

2019-04-23 Thread Jacob Pan
When Shared Virtual Address (SVA) is enabled for a guest OS via
vIOMMU, we need to provide invalidation support at IOMMU API and driver
level. This patch adds Intel VT-d specific function to implement
iommu passdown invalidate API for shared virtual address.

The use case is for supporting caching structure invalidation
of assigned SVM capable devices. Emulated IOMMU exposes queue
invalidation capability and passes down all descriptors from the guest
to the physical IOMMU.

The assumption is that guest to host device ID mapping should be
resolved prior to calling IOMMU driver. Based on the device handle,
host IOMMU driver can replace certain fields before submit to the
invalidation queue.

Signed-off-by: Jacob Pan 
Signed-off-by: Ashok Raj 
Signed-off-by: Liu, Yi L 
---
 drivers/iommu/intel-iommu.c | 159 
 1 file changed, 159 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 89989b5..54a3d22 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5338,6 +5338,164 @@ static void intel_iommu_aux_detach_device(struct 
iommu_domain *domain,
aux_domain_remove_dev(to_dmar_domain(domain), dev);
 }
 
+/*
+ * 2D array for converting and sanitizing IOMMU generic TLB granularity to
+ * VT-d granularity. Invalidation is typically included in the unmap operation
+ * as a result of DMA or VFIO unmap. However, for assigned device where guest
+ * could own the first level page tables without being shadowed by QEMU. In
+ * this case there is no pass down unmap to the host IOMMU as a result of unmap
+ * in the guest. Only invalidations are trapped and passed down.
+ * In all cases, only first level TLB invalidation (request with PASID) can be
+ * passed down, therefore we do not include IOTLB granularity for request
+ * without PASID (second level).
+ *
+ * For an example, to find the VT-d granularity encoding for IOTLB
+ * type and page selective granularity within PASID:
+ * X: indexed by iommu cache type
+ * Y: indexed by enum iommu_inv_granularity
+ * [IOMMU_INV_TYPE_TLB][IOMMU_INV_GRANU_PAGE_PASID]
+ *
+ * Granu_map array indicates validity of the table. 1: valid, 0: invalid
+ *
+ */
+const static int 
inv_type_granu_map[NR_IOMMU_CACHE_TYPE][NR_IOMMU_CACHE_INVAL_GRANU] = {
+   /* PASID based IOTLB, support PASID selective and page selective */
+   {0, 1, 1},
+   /* PASID based dev TLBs, only support all PASIDs or single PASID */
+   {1, 1, 0},
+   /* PASID cache */
+   {1, 1, 0}
+};
+
+const static u64 
inv_type_granu_table[NR_IOMMU_CACHE_TYPE][NR_IOMMU_CACHE_INVAL_GRANU] = {
+   /* PASID based IOTLB */
+   {0, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
+   /* PASID based dev TLBs */
+   {QI_DEV_IOTLB_GRAN_ALL, QI_DEV_IOTLB_GRAN_PASID_SEL, 0},
+   /* PASID cache */
+   {QI_PC_ALL_PASIDS, QI_PC_PASID_SEL, 0},
+};
+
+static inline int to_vtd_granularity(int type, int granu, u64 *vtd_granu)
+{
+   if (type >= NR_IOMMU_CACHE_TYPE || granu >= NR_IOMMU_CACHE_INVAL_GRANU 
||
+   !inv_type_granu_map[type][granu])
+   return -EINVAL;
+
+   *vtd_granu = inv_type_granu_table[type][granu];
+
+   return 0;
+}
+
+static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
+{
+   u64 nr_pages;
+   /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
+* IOMMU cache invalidate API passes granu_size in bytes, and number of
+* granu size in contiguous memory.
+*/
+
+   nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
+   return order_base_2(nr_pages);
+}
+
+static int intel_iommu_sva_invalidate(struct iommu_domain *domain,
+   struct device *dev, struct iommu_cache_invalidate_info 
*inv_info)
+{
+   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+   struct device_domain_info *info;
+   struct intel_iommu *iommu;
+   unsigned long flags;
+   int cache_type;
+   u8 bus, devfn;
+   u16 did, sid;
+   int ret = 0;
+   u64 granu;
+   u64 size;
+
+   if (!inv_info || !dmar_domain ||
+   inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+   return -EINVAL;
+
+   if (!dev || !dev_is_pci(dev))
+   return -ENODEV;
+
+   iommu = device_to_iommu(dev, &bus, &devfn);
+   if (!iommu)
+   return -ENODEV;
+
+   spin_lock(&iommu->lock);
+   spin_lock_irqsave(&device_domain_lock, flags);
+   info = iommu_support_dev_iotlb(dmar_domain, iommu, bus, devfn);
+   if (!info) {
+   ret = -EINVAL;
+   goto out_unlock;
+   }
+   did = dmar_domain->iommu_did[iommu->seq_id];
+   sid = PCI_DEVID(bus, devfn);
+   size = to_vtd_size(inv_info->addr_info.granule_size, 
inv_info->addr_info.nb_granules);
+
+   for_each_set_bit(cache_type, (unsigned long *)&inv_info->cache, 
NR_IOMMU_CACHE_TYPE) {
+
+   ret

[PATCH v2 13/19] iommu/vt-d: Add nested translation support

2019-04-23 Thread Jacob Pan
Nested translation mode is supported in VT-d 3.0 Spec.CH 3.8.
With PASID granular translation type set to 0x11b, translation
result from the first level(FL) also subject to a second level(SL)
page table translation. This mode is used for SVA virtualization,
where FL performs guest virtual to guest physical translation and
SL performs guest physical to host physical translation.

Signed-off-by: Jacob Pan 
Signed-off-by: Liu, Yi L 
---
 drivers/iommu/intel-pasid.c | 101 
 drivers/iommu/intel-pasid.h |  11 +
 2 files changed, 112 insertions(+)

diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index d339e8f..04127cf 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -688,3 +688,104 @@ int intel_pasid_setup_pass_through(struct intel_iommu 
*iommu,
 
return 0;
 }
+
+/**
+ * intel_pasid_setup_nested() - Set up PASID entry for nested translation
+ * which is used for vSVA. The first level page tables are used for
+ * GVA-GPA translation in the guest, second level page tables are used
+ * for GPA to HPA translation.
+ *
+ * @iommu:  Iommu which the device belong to
+ * @dev:Device to be set up for translation
+ * @pgd:First level PGD, treated as GPA
+ * @pasid:  PASID to be programmed in the device PASID table
+ * @flags:  Additional info such as supervisor PASID
+ * @domain: Domain info for setting up second level page tables
+ * @addr_width: Address width of the first level (guest)
+ */
+int intel_pasid_setup_nested(struct intel_iommu *iommu,
+   struct device *dev, pgd_t *gpgd,
+   int pasid, int flags,
+   struct dmar_domain *domain,
+   int addr_width)
+{
+   struct pasid_entry *pte;
+   struct dma_pte *pgd;
+   u64 pgd_val;
+   int agaw;
+   u16 did;
+
+   if (!ecap_nest(iommu->ecap)) {
+   pr_err("No nested translation support on %s\n",
+  iommu->name);
+   return -EINVAL;
+   }
+
+   pte = intel_pasid_get_entry(dev, pasid);
+   if (WARN_ON(!pte))
+   return -EINVAL;
+
+   pasid_clear_entry(pte);
+
+   /* Sanity checking performed by caller to make sure address
+* width matching in two dimensions:
+* 1. CPU vs. IOMMU
+* 2. Guest vs. Host.
+*/
+   switch (addr_width) {
+   case 57:
+   pasid_set_flpm(pte, 1);
+   break;
+   case 48:
+   pasid_set_flpm(pte, 0);
+   break;
+   default:
+   dev_err(dev, "Invalid paging mode %d\n", addr_width);
+   return -EINVAL;
+   }
+
+   /* Setup the first level page table pointer in GPA */
+   pasid_set_flptr(pte, (u64)gpgd);
+   if (flags & PASID_FLAG_SUPERVISOR_MODE) {
+   if (!ecap_srs(iommu->ecap)) {
+   pr_err("No supervisor request support on %s\n",
+  iommu->name);
+   return -EINVAL;
+   }
+   pasid_set_sre(pte);
+   }
+
+   /* Setup the second level based on the given domain */
+   pgd = domain->pgd;
+
+   for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+   pgd = phys_to_virt(dma_pte_addr(pgd));
+   if (!dma_pte_present(pgd)) {
+   dev_err(dev, "Invalid domain page table\n");
+   return -EINVAL;
+   }
+   }
+   pgd_val = virt_to_phys(pgd);
+   pasid_set_slptr(pte, pgd_val);
+   pasid_set_fault_enable(pte);
+
+   did = domain->iommu_did[iommu->seq_id];
+   pasid_set_domain_id(pte, did);
+
+   pasid_set_address_width(pte, agaw);
+   pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+   pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
+   pasid_set_present(pte);
+
+   if (!ecap_coherent(iommu->ecap))
+   clflush_cache_range(pte, sizeof(*pte));
+
+   if (cap_caching_mode(iommu->cap)) {
+   pasid_cache_invalidation_with_pasid(iommu, did, pasid);
+   iotlb_invalidation_with_pasid(iommu, did, pasid);
+   } else
+   iommu_flush_write_buffer(iommu);
+
+   return 0;
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 0999dfe..c4fc1af 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -42,6 +42,7 @@
  * to vmalloc or even module mappings.
  */
 #define PASID_FLAG_SUPERVISOR_MODE BIT(0)
+#define PASID_FLAG_NESTED  BIT(1)
 
 struct pasid_dir_entry {
u64 val;
@@ -51,6 +52,11 @@ struct pasid_entry {
u64 val[8];
 };
 
+#define PASID_ENTRY_PGTT_FL_ONLY   (1)
+#define PASID_ENTRY_PGTT_SL_ONLY   (2)
+#define PASID_ENTRY_PGTT_NESTED(3)
+#define PASID_ENTRY_PGTT_PT(4)
+
 /* The representative of a PASID

[PATCH v2 16/19] iommu/vtd: Clean up for SVM device list

2019-04-23 Thread Jacob Pan
Use combined macro for_each_svm_dev() to simplify SVM device iteration.

Suggested-by: Andy Shevchenko 
Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel-svm.c | 76 ++-
 1 file changed, 36 insertions(+), 40 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 0a973c2..39dfb2e 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -447,15 +447,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
goto out;
}
 
-   list_for_each_entry(sdev, &svm->devs, list) {
-   if (dev == sdev->dev) {
-   if (sdev->ops != ops) {
-   ret = -EBUSY;
-   goto out;
-   }
-   sdev->users++;
-   goto success;
+   for_each_svm_dev() {
+   if (sdev->ops != ops) {
+   ret = -EBUSY;
+   goto out;
}
+   sdev->users++;
+   goto success;
}
 
break;
@@ -585,40 +583,38 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
if (!svm)
goto out;
 
-   list_for_each_entry(sdev, &svm->devs, list) {
-   if (dev == sdev->dev) {
-   ret = 0;
-   sdev->users--;
-   if (!sdev->users) {
-   list_del_rcu(&sdev->list);
-   /* Flush the PASID cache and IOTLB for this 
device.
-* Note that we do depend on the hardware *not* 
using
-* the PASID any more. Just as we depend on 
other
-* devices never using PASIDs that they have no 
right
-* to use. We have a *shared* PASID table, 
because it's
-* large and has to be physically contiguous. 
So it's
-* hard to be as defensive as we might like. */
-   intel_pasid_tear_down_entry(iommu, dev, 
svm->pasid);
-   intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, 
!svm->mm);
-   kfree_rcu(sdev, rcu);
-
-   if (list_empty(&svm->devs)) {
-   ioasid_free(svm->pasid);
-   if (svm->mm)
-   
mmu_notifier_unregister(&svm->notifier, svm->mm);
-
-   list_del(&svm->list);
-
-   /* We mandate that no page faults may 
be outstanding
-* for the PASID when 
intel_svm_unbind_mm() is called.
-* If that is not obeyed, subtle errors 
will happen.
-* Let's make them less subtle... */
-   memset(svm, 0x6b, sizeof(*svm));
-   kfree(svm);
-   }
+   for_each_svm_dev() {
+   ret = 0;
+   sdev->users--;
+   if (!sdev->users) {
+   list_del_rcu(&sdev->list);
+   /* Flush the PASID cache and IOTLB for this device.
+* Note that we do depend on the hardware *not* using
+* the PASID any more. Just as we depend on other
+* devices never using PASIDs that they have no right
+* to use. We have a *shared* PASID table, because it's
+* large and has to be physically contiguous. So it's
+* hard to be as defensive as we might like. */
+   intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
+   intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, 
!svm->mm);
+   kfree_rcu(sdev, rcu);
+
+   if (list_empty(&svm->devs)) {
+   ioasid_free(svm->pasid);
+   if (svm->mm)
+   mmu_notifier_unregister(&svm->notifier, 
svm->mm);
+
+   list_del(&svm->list);
+
+   /* We mandate that no page faults may be 
outstanding
+* for the PASID when intel_svm_unbind_mm() is 
called.
+* If that is not obeyed, subtl

[PATCH v2 18/19] iommu/vt-d: Support flushing more translation cache types

2019-04-23 Thread Jacob Pan
When Shared Virtual Memory is exposed to a guest via vIOMMU, extended
IOTLB invalidation may be passed down from outside IOMMU subsystems.
This patch adds invalidation functions that can be used for additional
translation cache types.

Signed-off-by: Jacob Pan 
---
 drivers/iommu/dmar.c| 48 +
 include/linux/intel-iommu.h | 21 
 2 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9c49300..680894e 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1357,6 +1357,20 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, 
u64 addr,
qi_submit_sync(&desc, iommu);
 }
 
+void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u64 addr, u32 pasid,
+   unsigned int size_order, u64 granu)
+{
+   struct qi_desc desc;
+
+   desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
+   QI_EIOTLB_GRAN(granu) | QI_EIOTLB_TYPE;
+   desc.qw1 = QI_EIOTLB_ADDR(addr) | QI_EIOTLB_IH(0) |
+   QI_EIOTLB_AM(size_order);
+   desc.qw2 = 0;
+   desc.qw3 = 0;
+   qi_submit_sync(&desc, iommu);
+}
+
 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u16 qdep, u64 addr, unsigned mask)
 {
@@ -1380,6 +1394,40 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 
sid, u16 pfsid,
qi_submit_sync(&desc, iommu);
 }
 
+void qi_flush_dev_piotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+   u32 pasid,  u16 qdep, u64 addr, unsigned size, u64 granu)
+{
+   struct qi_desc desc;
+
+   desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
+   QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
+   QI_DEV_IOTLB_PFSID(pfsid);
+   desc.qw1 |= QI_DEV_EIOTLB_GLOB(granu);
+
+   /* If S bit is 0, we only flush a single page. If S bit is set,
+* The least significant zero bit indicates the size. VT-d spec
+* 6.5.2.6
+*/
+   if (!size)
+   desc.qw0 = QI_DEV_EIOTLB_ADDR(addr) & ~QI_DEV_EIOTLB_SIZE;
+   else {
+   unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size);
+
+   desc.qw1 = QI_DEV_EIOTLB_ADDR(addr & ~mask) | 
QI_DEV_EIOTLB_SIZE;
+   }
+   qi_submit_sync(&desc, iommu);
+}
+
+void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, int 
pasid)
+{
+   struct qi_desc desc;
+
+   desc.qw0 = QI_PC_TYPE | QI_PC_DID(did) | QI_PC_GRAN(granu) | 
QI_PC_PASID(pasid);
+   desc.qw1 = 0;
+   desc.qw2 = 0;
+   desc.qw3 = 0;
+   qi_submit_sync(&desc, iommu);
+}
 /*
  * Disable Queued Invalidation interface.
  */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 5d67d0d4..38e5efb 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -339,7 +339,7 @@ enum {
 #define QI_IOTLB_GRAN(gran)(((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4))
 #define QI_IOTLB_ADDR(addr)(((u64)addr) & VTD_PAGE_MASK)
 #define QI_IOTLB_IH(ih)(((u64)ih) << 6)
-#define QI_IOTLB_AM(am)(((u8)am))
+#define QI_IOTLB_AM(am)(((u8)am) & 0x3f)
 
 #define QI_CC_FM(fm)   (((u64)fm) << 48)
 #define QI_CC_SID(sid) (((u64)sid) << 32)
@@ -357,17 +357,22 @@ enum {
 #define QI_PC_DID(did) (((u64)did) << 16)
 #define QI_PC_GRAN(gran)   (((u64)gran) << 4)
 
-#define QI_PC_ALL_PASIDS   (QI_PC_TYPE | QI_PC_GRAN(0))
-#define QI_PC_PASID_SEL(QI_PC_TYPE | QI_PC_GRAN(1))
+/* PASID cache invalidation granu */
+#define QI_PC_ALL_PASIDS   0
+#define QI_PC_PASID_SEL1
 
 #define QI_EIOTLB_ADDR(addr)   ((u64)(addr) & VTD_PAGE_MASK)
 #define QI_EIOTLB_GL(gl)   (((u64)gl) << 7)
 #define QI_EIOTLB_IH(ih)   (((u64)ih) << 6)
-#define QI_EIOTLB_AM(am)   (((u64)am))
+#define QI_EIOTLB_AM(am)   (((u64)am) & 0x3f)
 #define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32)
 #define QI_EIOTLB_DID(did) (((u64)did) << 16)
 #define QI_EIOTLB_GRAN(gran)   (((u64)gran) << 4)
 
+/* QI Dev-IOTLB inv granu */
+#define QI_DEV_IOTLB_GRAN_ALL  1
+#define QI_DEV_IOTLB_GRAN_PASID_SEL0
+
 #define QI_DEV_EIOTLB_ADDR(a)  ((u64)(a) & VTD_PAGE_MASK)
 #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
 #define QI_DEV_EIOTLB_GLOB(g)  ((u64)g)
@@ -658,8 +663,16 @@ extern void qi_flush_context(struct intel_iommu *iommu, 
u16 did, u16 sid,
 u8 fm, u64 type);
 extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
  unsigned int size_order, u64 type);
+extern void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+   u32 pasid, unsigned int size_order, u64 type);
 extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u16 qdep, u64 addr, unsigned mask);
+
+extern void qi_flus

[PATCH v2 17/19] iommu: Add max num of cache and granu types

2019-04-23 Thread Jacob Pan
To convert to/from cache types and granularities between generic and
VT-d specific counterparts, a 2D arrary is used. Introduce the limits
to help define the converstion array size.

Signed-off-by: Jacob Pan 
---
 include/uapi/linux/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 5c95905..2d8fac8 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -197,6 +197,7 @@ struct iommu_inv_addr_info {
__u64   granule_size;
__u64   nb_granules;
 };
+#define NR_IOMMU_CACHE_INVAL_GRANU (3)
 
 /**
  * First level/stage invalidation information
@@ -235,6 +236,7 @@ struct iommu_cache_invalidate_info {
struct iommu_inv_addr_info addr_info;
};
 };
+#define NR_IOMMU_CACHE_TYPE(3)
 /**
  * struct gpasid_bind_data - Information about device and guest PASID binding
  * @gcr3:  Guest CR3 value from guest mm
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] arm64: swiotlb: cma_alloc error spew

2019-04-23 Thread dann frazier
On Tue, Apr 23, 2019 at 12:03 PM dann frazier
 wrote:
>
> On Tue, Apr 23, 2019 at 5:32 AM Robin Murphy  wrote:
> >
> > On 17/04/2019 21:48, dann frazier wrote:
> > > hey,
> > >I'm seeing an issue on a couple of arm64 systems[*] where they spew
> > > ~10K "cma: cma_alloc: alloc failed" messages at boot. The errors are
> > > non-fatal, and bumping up cma to a large enough size (~128M) gets rid
> > > of them - but that seems suboptimal. Bisection shows that this started
> > > after commit fafadcd16595 ("swiotlb: don't dip into swiotlb pool for
> > > coherent allocations"). It looks like __dma_direct_alloc_pages()
> > > is opportunistically using CMA memory but falls back to non-CMA if CMA
> > > disabled or unavailable. I've demonstrated that this fallback is
> > > indeed returning a valid pointer. So perhaps the issue is really just
> > > the warning emission.
> >
> > The CMA area being full isn't necessarily an ignorable non-problem,
> > since it means you won't be able to allocate the kind of large buffers
> > for which CMA was intended. The question is, is it actually filling up
> > with allocations that deserve to be there, or is this the same as I've
> > seen on a log from a ThunderX2 system where it's getting exhausted by
> > thousands upon thousands of trivial single page allocations? If it's the
> > latter (CONFIG_CMA_DEBUG should help shed some light if necessary),
>
> Appears so. Here's a histogram of count/size w/ a cma= large enough to
> avoid failures:
>
> $ dmesg | grep "cma: cma_alloc(cma" | sed -r 's/.*count
> ([0-9]+)\,.*/\1/' | sort -n | uniq -c
>2062 1
>  32 2
> 266 8
>   2 24
>   4 32
> 256 33

And IIUC, this is also a big culprit. The debugfs bitmap seems to show
that the alignment of each of these leaves 31 pages unused, which adds
up to 31MB!

  -dann

>   7 64
>   2 128
>   2 1024
>
>   -dann
>
> > then
> > that does lean towards spending a bit more effort on this idea:
> >
> > https://lore.kernel.org/lkml/20190327080821.gb20...@lst.de/
> >
> > Robin.
> >
> > > The following naive patch solves the problem for me - just silence the
> > > cma errors, since it looks like a soft error. But is there a better
> > > approach?
> > >
> > > [*] APM X-Gene & HiSilicon Hi1620 w/ SMMU disabled
> > >
> > > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> > > index 6310ad01f915b..0324aa606c173 100644
> > > --- a/kernel/dma/direct.c
> > > +++ b/kernel/dma/direct.c
> > > @@ -112,7 +112,7 @@ struct page *__dma_direct_alloc_pages(struct device 
> > > *dev, size_t size,
> > >  /* CMA can be used only in the context which permits sleeping */
> > >  if (gfpflags_allow_blocking(gfp)) {
> > >  page = dma_alloc_from_contiguous(dev, count, page_order,
> > > -gfp & __GFP_NOWARN);
> > > +true);
> > >  if (page && !dma_coherent_ok(dev, page_to_phys(page), 
> > > size)) {
> > >  dma_release_from_contiguous(dev, page, count);
> > >  page = NULL;
> > >
> > >
> > >
> > >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] Remove old no iommu direct mapping code

2019-04-23 Thread Christoph Hellwig
https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/commit/?h=x86/amd&id=7a5dbf3ab2f04905cf8468c66fcdbfb643068bcb
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 06/19] drivers core: Add I/O ASID allocator

2019-04-23 Thread Christoph Hellwig
On Tue, Apr 23, 2019 at 04:31:06PM -0700, Jacob Pan wrote:
> The allocator doesn't really belong in drivers/iommu because some
> drivers would like to allocate PASIDs for devices that aren't managed by
> an IOMMU, using the same ID space as IOMMU. It doesn't really belong in
> drivers/pci either since platform device also support PASID. Add the
> allocator in drivers/base.

I'd still add it to drivers/iommu, just selectable separately from the
core iommu code..
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 56/79] docs: Documentation/*.txt: rename all ReST files to *.rst

2019-04-23 Thread Peter Zijlstra
On Tue, Apr 23, 2019 at 11:38:16PM +0200, Borislav Petkov wrote:
> If that is all the changes it would need, then I guess that's ok. Btw,
> those rst-conversion patches don't really show what got changed. Dunno
> if git can even show that properly. I diffed the two files by hand to
> see what got changed, see end of mail.

That is not a happy diff; that table has gotten waay worse to read due
to all that extra table crap.

> ---
> --- mm.old2019-04-23 23:18:55.954335784 +0200
> +++ mm.new2019-04-23 23:18:48.122335821 +0200
> @@ -18,51 +18,68 @@ Notes:
> notation than "16 EB", which few will recognize at first sight as 16 
> exabytes.
> It also shows it nicely how incredibly large 64-bit address space is.
>  
> -
> -Start addr|   Offset   | End addr |  Size   | VM area 
> description
> -
> -  ||  | |
> -  |0   | 7fff |  128 TB | user-space 
> virtual memory, different per mm
> -__||__|_|___
> -  ||  | |
> - 8000 | +128TB | 7fff | ~16M TB | ... huge, 
> almost 64 bits wide hole of non-canonical
> -  ||  | | virtual 
> memory addresses up to the -128 TB
> -  ||  | | starting 
> offset of kernel mappings.
> -__||__|_|___
> -|
> -| Kernel-space 
> virtual memory, shared between all processes:
> -|___
> -  ||  | |
> - 8000 | -128TB | 87ff |8 TB | ... guard 
> hole, also reserved for hypervisor
> - 8800 | -120TB | 887f |  0.5 TB | LDT remap for 
> PTI
> - 8880 | -119.5  TB | c87f |   64 TB | direct mapping 
> of all physical memory (page_offset_base)
> - c880 |  -55.5  TB | c8ff |  0.5 TB | ... unused hole
> - c900 |  -55TB | e8ff |   32 TB | 
> vmalloc/ioremap space (vmalloc_base)
> - e900 |  -23TB | e9ff |1 TB | ... unused hole
> - ea00 |  -22TB | eaff |1 TB | virtual memory 
> map (vmemmap_base)
> - eb00 |  -21TB | ebff |1 TB | ... unused hole
> - ec00 |  -20TB | fbff |   16 TB | KASAN shadow 
> memory
> -__||__|_|
> -|
> -| Identical 
> layout to the 56-bit one from here on:
> -|
> -  ||  | |
> - fc00 |   -4TB | fdff |2 TB | ... unused hole
> -  ||  | | vaddr_end for 
> KASLR
> - fe00 |   -2TB | fe7f |  0.5 TB | cpu_entry_area 
> mapping
> - fe80 |   -1.5  TB | feff |  0.5 TB | ... unused hole
> - ff00 |   -1TB | ff7f |  0.5 TB | %esp fixup 
> stacks
> - ff80 | -512GB | ffee |  444 GB | ... unused hole
> - ffef |  -68GB | fffe |   64 GB | EFI region 
> mapping space
> -  |   -4GB | 7fff |2 GB | ... unused hole
> - 8000 |   -2GB | 9fff |  512 MB | kernel text 
> mapping, mapped to physical address 0
> - 8000 |-2048MB |  | |
> - a000 |-1536MB | feff | 1520 MB | module mapping 
> space
> - ff00 |  -16MB |  | |
> -FIXADDR_START | ~-11MB | ff5f | ~0.5 MB | 
> kernel-internal fixmap range, variable size and offset
> - ff60 |  -10MB | ff600fff |4 kB | legacy 
> vsyscall ABI
> - ffe0 |   -2MB |  |2 MB | ... unused hole
> -__||__