[PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Borislav Petkov
Hi,

here's v2 with build breakage fixed on ppc and s390 (obviously I can't
grep :-\) and commit message extended to explain more why.

Thx.

---
From: Borislav Petkov 
Date: Tue, 17 Mar 2020 12:03:05 +0100

Back then when the whole SME machinery started getting mainlined, it
was agreed that for simplicity, clarity and sanity's sake, the terms
denoting encrypted and not-encrypted memory should be "encrypted" and
"decrypted". And the majority of the code sticks to that convention
except those two. So rename them.

The intent of "encrypted" and "decrypted" is to represent the binary
concept of memory which is encrypted and of memory which is not.

The further differentiation between decrypted and unencrypted memory is
not needed in the code (for now) and therefore keep things simple by
using solely the two terms.

No functional changes.

[ Convert forgotten s390 and ppc function variants. ]
Reported-by: kbuild test robot 
Signed-off-by: Borislav Petkov 
---
 arch/powerpc/include/asm/mem_encrypt.h |  2 +-
 arch/powerpc/platforms/pseries/Kconfig |  2 +-
 arch/s390/Kconfig  |  2 +-
 arch/s390/mm/init.c|  2 +-
 arch/x86/Kconfig   |  2 +-
 arch/x86/mm/mem_encrypt.c  |  4 ++--
 include/linux/dma-direct.h |  8 
 kernel/dma/Kconfig |  2 +-
 kernel/dma/direct.c| 14 +++---
 kernel/dma/mapping.c   |  2 +-
 10 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/mem_encrypt.h 
b/arch/powerpc/include/asm/mem_encrypt.h
index ba9dab07c1be..f0705cd3b079 100644
--- a/arch/powerpc/include/asm/mem_encrypt.h
+++ b/arch/powerpc/include/asm/mem_encrypt.h
@@ -15,7 +15,7 @@ static inline bool mem_encrypt_active(void)
return is_secure_guest();
 }
 
-static inline bool force_dma_unencrypted(struct device *dev)
+static inline bool force_dma_decrypted(struct device *dev)
 {
return is_secure_guest();
 }
diff --git a/arch/powerpc/platforms/pseries/Kconfig 
b/arch/powerpc/platforms/pseries/Kconfig
index 24c18362e5ea..a78e2c3e1d92 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -151,7 +151,7 @@ config PPC_SVM
depends on PPC_PSERIES
select SWIOTLB
select ARCH_HAS_MEM_ENCRYPT
-   select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+   select ARCH_HAS_FORCE_DMA_DECRYPTED
help
 There are certain POWER platforms which support secure guests using
 the Protected Execution Facility, with the help of an Ultravisor
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8abe77536d9d..ab1dbb7415b4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -192,7 +192,7 @@ config S390
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
select HAVE_NMI
-   select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+   select ARCH_HAS_FORCE_DMA_DECRYPTED
select SWIOTLB
select GENERIC_ALLOCATOR
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ac44bd76db4b..5fed85f9fea6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -157,7 +157,7 @@ int set_memory_decrypted(unsigned long addr, int numpages)
 }
 
 /* are we a protected virtualization guest? */
-bool force_dma_unencrypted(struct device *dev)
+bool force_dma_decrypted(struct device *dev)
 {
return is_prot_virt_guest();
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index beea77046f9b..2ae904f505e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1525,7 +1525,7 @@ config AMD_MEM_ENCRYPT
depends on X86_64 && CPU_SUP_AMD
select DYNAMIC_PHYSICAL_MASK
select ARCH_USE_MEMREMAP_PROT
-   select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+   select ARCH_HAS_FORCE_DMA_DECRYPTED
---help---
  Say yes to enable support for the encryption of system memory.
  This requires an AMD processor that supports Secure Memory
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index a03614bd3e1a..66d09f269e6d 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -350,8 +350,8 @@ bool sev_active(void)
return sme_me_mask && sev_enabled;
 }
 
-/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
-bool force_dma_unencrypted(struct device *dev)
+/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_DECRYPTED */
+bool force_dma_decrypted(struct device *dev)
 {
/*
 * For SEV, all DMA must be to unencrypted addresses.
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 24b8684aa21d..9f955844e9c7 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -26,14 +26,14 @@ static inline phys_addr_t __dma_to_phys(struct device *dev, 
dma_addr_t dev_addr)
 }
 #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
 
-#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED
-bool force_dma_unencrypted(struct device *

Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Christoph Hellwig
On Thu, Mar 19, 2020 at 11:16:57AM +0100, Borislav Petkov wrote:
> Hi,
> 
> here's v2 with build breakage fixed on ppc and s390 (obviously I can't
> grep :-\) and commit message extended to explain more why.

I thought we agreed that decrypted is absolutely the wrong term.

So NAK - if you want to change things it needs to go the other way.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Borislav Petkov
On Thu, Mar 19, 2020 at 11:20:11AM +0100, Christoph Hellwig wrote:
> I thought we agreed that decrypted is absolutely the wrong term.

I don't think we did. At least I don't know where we did that.

> So NAK - if you want to change things it needs to go the other way.

We are already using "decrypted" everywhere in arch/x86/. Changing that
would be a *lot* more churn.

And it is just a term, for chrissakes, to denote memory which is not
encrypted. And it would make our lifes easier if we had only *two* terms
instead of three or more. Especially if the concept we denote with this
is a binary one: encrypted memory and *not* encrypted memory.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Robin Murphy

[since this is in my inbox...]

On 2020-03-19 10:28 am, Borislav Petkov wrote:

On Thu, Mar 19, 2020 at 11:20:11AM +0100, Christoph Hellwig wrote:

I thought we agreed that decrypted is absolutely the wrong term.


I don't think we did. At least I don't know where we did that.


So NAK - if you want to change things it needs to go the other way.


We are already using "decrypted" everywhere in arch/x86/. Changing that
would be a *lot* more churn.

And it is just a term, for chrissakes, to denote memory which is not
encrypted. And it would make our lifes easier if we had only *two* terms
instead of three or more. Especially if the concept we denote with this
is a binary one: encrypted memory and *not* encrypted memory.


Let me add another vote from a native English speaker that "unencrypted" 
is the appropriate term to imply the *absence* of encryption, whereas 
"decrypted" implies the *reversal* of applied encryption.


Naming things is famously hard, for good reason - names are *important* 
for understanding. Just because a decision was already made one way 
doesn't mean that that decision was necessarily right. Churning one area 
to be consistently inaccurate just because it's less work than churning 
another area to be consistently accurate isn't really the best excuse.


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Borislav Petkov
On Thu, Mar 19, 2020 at 11:06:15AM +, Robin Murphy wrote:
> Let me add another vote from a native English speaker that "unencrypted" is
> the appropriate term to imply the *absence* of encryption, whereas
> "decrypted" implies the *reversal* of applied encryption.
> 
> Naming things is famously hard, for good reason - names are *important* for
> understanding. Just because a decision was already made one way doesn't mean
> that that decision was necessarily right. Churning one area to be
> consistently inaccurate just because it's less work than churning another
> area to be consistently accurate isn't really the best excuse.

Well, the reason we chose "decrypted" vs something else is so to be as
different from "encrypted" as possible. If we called it "unencrypted"
you'd have stuff like:

   if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);

and I *betcha* people will misread this and maybe even introduce bugs.

So I don't think renaming it to "unencrypted" is better. And yes, I'm
deliberately putting the language semantics here on a second place
because of readability examples like the one above.

But ok, since people don't want this, we can leave it as is. It's not
like I don't have anything better to do.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: arm-smmu-v3 high cpu usage for NVMe

2020-03-19 Thread John Garry

Hi Will,



On Thu, Jan 02, 2020 at 05:44:39PM +, John Garry wrote:

And for the overall system, we have:

   PerfTop:   85864 irqs/sec  kernel:89.6%  exact:  0.0% lost: 0/34434 drop:
0/40116 [4000Hz cycles],  (all, 96 CPUs)
--

 27.43%  [kernel]  [k] arm_smmu_cmdq_issue_cmdlist
 11.71%  [kernel]  [k] _raw_spin_unlock_irqrestore
  6.35%  [kernel]  [k] _raw_spin_unlock_irq
  2.65%  [kernel]  [k] get_user_pages_fast
  2.03%  [kernel]  [k] __slab_free
  1.55%  [kernel]  [k] tick_nohz_idle_exit
  1.47%  [kernel]  [k] arm_lpae_map
  1.39%  [kernel]  [k] __fget
  1.14%  [kernel]  [k] __lock_text_start
  1.09%  [kernel]  [k] _raw_spin_lock
  1.08%  [kernel]  [k] bio_release_pages.part.42
  1.03%  [kernel]  [k] __sbitmap_get_word
  0.97%  [kernel]  [k] arm_smmu_atc_inv_domain.constprop.42
  0.91%  [kernel]  [k] fput_many
  0.88%  [kernel]  [k] __arm_lpae_map

One thing to note is that we still spend an appreciable amount of time in
arm_smmu_atc_inv_domain(), which is disappointing when considering it should
effectively be a noop.

As for arm_smmu_cmdq_issue_cmdlist(), I do note that during the testing our
batch size is 1, so we're not seeing the real benefit of the batching. I
can't help but think that we could improve this code to try to combine CMD
SYNCs for small batches.

Anyway, let me know your thoughts or any questions. I'll have a look if a
get a chance for other possible bottlenecks.


Did you ever get any more information on this? I don't have any SMMUv3
hardware any more, so I can't really dig into this myself.


I'm only getting back to look at this now, as SMMU performance is a bit 
of a hot topic again for us.


So one thing we are doing which looks to help performance is this series 
from Marc:


https://lore.kernel.org/lkml/9171c554-50d2-142b-96ae-1357952fc...@huawei.com/T/#mee5562d1efd6aaeb8d2682bdb6807fe7b5d7f56d

So that is just spreading the per-CPU load for NVMe interrupt handling 
(where the DMA unmapping is happening), so I'd say just side-stepping 
any SMMU issue really.


Going back to the SMMU, I wanted to run epbf and perf annotate to help 
profile this, but was having no luck getting them to work properly. I'll 
look at this again now.


Cheers,
John
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 13/15] iommu/qcom: Use accessor functions for iommu private data

2020-03-19 Thread Joerg Roedel
Hi Jean-Philippe,

On Mon, Mar 16, 2020 at 04:52:23PM +0100, Jean-Philippe Brucker wrote:
> Should be:
> 
>   if (!dev_iommu_priv_set(dev))

Thanks a lot for your reviews! I made the changes to arm-smmu and the
qcom driver you requested and will post a new version later today.

Thanks,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma: Fix max PFN arithmetic overflow on 32 bit systems

2020-03-19 Thread Robin Murphy

On 2020-03-02 6:16 pm, Alexander Dahl wrote:

For ARCH=x86 (32 bit) when you set CONFIG_IOMMU_INTEL since c5a5dc4cbbf4
("iommu/vt-d: Don't switch off swiotlb if bounce page is used") there's
a dependency on CONFIG_SWIOTLB, which was not necessarily active before.

The init code for swiotlb in 'pci_swiotlb_detect_4gb()' compares
something against MAX_DMA32_PFN to decide if it should be active.
However that define suffers from an arithmetic overflow since
1b7e03ef7570 ("x86, NUMA: Enable emulation on 32bit too") when it was
first made visible to x86_32.

The effect is at boot time 64 MiB (default size) were allocated for
bounce buffers now, which is a noticeable amount of memory on small
systems. We noticed this effect on the fli4l Linux distribution when
migrating from kernel v4.19 (LTS) to v5.4 (LTS) on boards like pcengines
ALIX 2D3 with 256 MiB memory for example:

   Linux version 5.4.22 (buildroot@buildroot) (gcc version 7.3.0 (Buildroot 
2018.02.8)) #1 SMP Mon Nov 26 23:40:00 CET 2018
   …
   Memory: 183484K/261756K available (4594K kernel code, 393K rwdata, 1660K 
rodata, 536K init, 456K bss , 78272K reserved, 0K cma-reserved, 0K highmem)
   …
   PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
   software IO TLB: mapped [mem 0x0bb78000-0x0fb78000] (64MB)

The initial analysis and the suggested fix was done by user 'sourcejedi'
at stackoverflow and explicitly marked as GPLv2 for inclusion in the
Linux kernel:

   https://unix.stackexchange.com/a/520525/50007

Fixes: https://web.nettworks.org/bugs/browse/FFL-2560
Fixes: https://unix.stackexchange.com/q/520065/50007
Suggested-by: Alan Jenkins 
Signed-off-by: Alexander Dahl 
---
We tested this in qemu and on real hardware with fli4l on top of v5.4,
v5.5, and v5.6-rc kernels, but only as far as the reserved memory goes.
The patch itself is based on v5.6-rc3 (IIRC).

A quick grep over the kernel code showed me this define MAX_DMA32_PFN is
used in other places as well. I would appreciate feedback on this,
because I can not oversee all side effects this might have?!

Thanks again to Alan who proposed the fix, and for his permission to
send it upstream.

Greets
Alex
---
  arch/x86/include/asm/dma.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 00f7cf45e699..e25514eca8d6 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -74,7 +74,7 @@
  #define MAX_DMA_PFN   ((16UL * 1024 * 1024) >> PAGE_SHIFT)
  
  /* 4GB broken PCI/AGP hardware bus master zone */

-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
+#define MAX_DMA32_PFN (4UL * ((1024 * 1024 * 1024) >> PAGE_SHIFT))


FWIW, wouldn't s/UL/ULL/ in the original expression suffice? Failing 
that, rather than awkward parenthesis trickery it might be clearer to 
just copy the one from arch/mips/include/asm/dma.h.


Robin.

  
  #ifdef CONFIG_X86_32

  /* The maximum address that we can perform a DMA transfer to on this platform 
*/


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 2/6] iommu: Configure default domain with def_domain_type

2020-03-19 Thread Joerg Roedel
Hi Baolu,

On Sat, Mar 14, 2020 at 09:07:01AM +0800, Lu Baolu wrote:
> +static int iommu_group_change_def_domain(struct iommu_group *group, int type)
> +{
> + struct group_device *grp_dev, *temp;
> + struct iommu_domain *new, *old;
> + const struct iommu_ops *ops;
> + int ret = 0;
> +
> + if ((type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_DMA) ||
> + !group->default_domain || type == group->default_domain->type ||
> + !group->default_domain->ops)
> + return -EINVAL;
> +
> + if (group->domain != group->default_domain)
> + return -EBUSY;
> +
> + iommu_group_ref_get(group);
> + old = group->default_domain;
> + ops = group->default_domain->ops;
> +
> + /* Allocate a new domain of requested type. */
> + new = ops->domain_alloc(type);
> + if (!new) {
> + ret = -ENOMEM;
> + goto domain_out;
> + }
> + new->type = type;
> + new->ops = ops;
> + new->pgsize_bitmap = group->default_domain->pgsize_bitmap;
> +
> + group->default_domain = new;
> + group->domain = new;
> + list_for_each_entry_safe(grp_dev, temp, &group->devices, list) {
> + struct device *dev;
> +
> + dev = grp_dev->dev;
> + if (device_is_bound(dev)) {
> + ret = -EINVAL;
> + goto device_out;
> + }
> +
> + iommu_group_create_direct_mappings(group, dev);
> + ret = __iommu_attach_device(group->domain, dev);
> + if (ret)
> + goto device_out;

In case of a failure here with a group containing multiple devices, the
other devices temporarily lose their mappings. Please only do the
device_is_bound() check in the loop and the actual re-attachment of the
group with a call to __iommu_attach_group().

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/vt-d: Fix page request descriptor size

2020-03-19 Thread Joerg Roedel
On Tue, Mar 17, 2020 at 09:10:18AM +0800, Lu Baolu wrote:
> From: Jacob Pan 
> 
> Intel VT-d might support PRS (Page Reqest Support) when it's
> running in the scalable mode. Each page request descriptor
> occupies 32 bytes and is 32-bytes aligned. The page request
> descriptor offset mask should be 32-bytes aligned.
> 
> Fixes: 5b438f4ba315d ("iommu/vt-d: Support page request in scalable mode")
> Signed-off-by: Jacob Pan 
> Signed-off-by: Liu Yi L 
> Signed-off-by: Lu Baolu 
> ---
>  drivers/iommu/intel-svm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Applied, thanks.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/vt-d: silence a RCU-list debugging warning

2020-03-19 Thread Joerg Roedel
On Wed, Mar 18, 2020 at 01:27:53PM +0800, Lu Baolu wrote:
> On 2020/3/17 23:03, Qian Cai wrote:
> > dmar_find_atsr() calls list_for_each_entry_rcu() outside of an RCU read
> > side critical section but with dmar_global_lock held. Silence this
> > false positive.
> > 
> >   drivers/iommu/intel-iommu.c:4504 RCU-list traversed in non-reader 
> > section!!
> >   1 lock held by swapper/0/1:
> >   #0: 9755bee8 (dmar_global_lock){+.+.}, at: 
> > intel_iommu_init+0x1a6/0xe19
> > 
> >   Call Trace:
> >dump_stack+0xa4/0xfe
> >lockdep_rcu_suspicious+0xeb/0xf5
> >dmar_find_atsr+0x1ab/0x1c0
> >dmar_parse_one_atsr+0x64/0x220
> >dmar_walk_remapping_entries+0x130/0x380
> >dmar_table_init+0x166/0x243
> >intel_iommu_init+0x1ab/0xe19
> >pci_iommu_init+0x1a/0x44
> >do_one_initcall+0xae/0x4d0
> >kernel_init_freeable+0x412/0x4c5
> >kernel_init+0x19/0x193
> > 
> > Signed-off-by: Qian Cai 
> 
> How about changing the commit subject to
> "iommu/vt-d: Silence RCU-list debugging warning in dmar_find_atsr()"?
> 
> Acked-by: Lu Baolu 

Fixed up the subject and applied it, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 2/6] arm/smmu: Add auxiliary domain support for arm-smmuv2

2020-03-19 Thread Jordan Crouse
On Wed, Mar 18, 2020 at 04:43:07PM -0700, Rob Clark wrote:
> On Wed, Mar 18, 2020 at 3:48 PM Will Deacon  wrote:
> >
> > On Tue, Jan 28, 2020 at 03:16:06PM -0700, Jordan Crouse wrote:
> > > Support auxiliary domains for arm-smmu-v2 to initialize and support
> > > multiple pagetables for a single SMMU context bank. Since the smmu-v2
> > > hardware doesn't have any built in support for switching the pagetable
> > > base it is left as an exercise to the caller to actually use the 
> > > pagetable.
> > >
> > > Aux domains are supported if split pagetable (TTBR1) support has been
> > > enabled on the master domain.  Each auxiliary domain will reuse the
> > > configuration of the master domain. By default the a domain with TTBR1
> > > support will have the TTBR0 region disabled so the first attached aux
> > > domain will enable the TTBR0 region in the hardware and conversely the
> > > last domain to be detached will disable TTBR0 translations.  All 
> > > subsequent
> > > auxiliary domains create a pagetable but not touch the hardware.
> > >
> > > The leaf driver will be able to query the physical address of the
> > > pagetable with the DOMAIN_ATTR_PTBASE attribute so that it can use the
> > > address with whatever means it has to switch the pagetable base.
> > >
> > > Following is a pseudo code example of how a domain can be created
> > >
> > >  /* Check to see if aux domains are supported */
> > >  if (iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)) {
> > >iommu = iommu_domain_alloc(...);
> > >
> > >if (iommu_aux_attach_device(domain, dev))
> > >return FAIL;
> > >
> > >   /* Save the base address of the pagetable for use by the driver
> > >   iommu_domain_get_attr(domain, DOMAIN_ATTR_PTBASE, &ptbase);
> > >  }
> >
> > I'm not really understanding what the pagetable base gets used for here and,
> > to be honest with you, the whole thing feels like a huge layering violation
> > with the way things are structured today. Why doesn't the caller just
> > interface with io-pgtable directly?
> >
> > Finally, if we need to support context-switching TTBR0 for a live domain
> > then that code really needs to live inside the SMMU driver because the
> > ASID and TLB management necessary to do that safely doesn't belong anywhere
> > else.
> 
> Hi Will,
> 
> We do in fact need live domain switching, that is really the whole
> point.  The GPU CP (command processor/parser) is directly updating
> TTBR0 and triggering TLB flush, asynchronously from the CPU.

Right. This is entirely done in hardware with a GPU that has complete access to
the context bank registers. All the driver does is send the PTBASE to the
command stream see [1] and especially [2] (look for CP_SMMU_TABLE_UPDATE).

As for interacting with the io-pgtable directly I would love to do that but it
would need some new infrastructure to either pull the io-pgtable from the aux
domain or to create an io-pgtable ourselves and pass it for use by the aux
domain. I'm not sure if that is better for the layering violation.

> And I think the answer about ASID is easy (on current hw).. it must be 
> zero[*].

Right now the GPU microcode still uses TLBIALL. I want to assign each new aux
domain its own ASID in the hopes that we could some day change that but for now
having a uinque ASID doesn't help.

Jordan

[1] https://patchwork.freedesktop.org/patch/351089/
[2] https://patchwork.freedesktop.org/patch/351090/

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3] iommu/arm-smmu-v3: Add SMMUv3.2 range invalidation support

2020-03-19 Thread Rob Herring
On Wed, Mar 18, 2020 at 4:19 PM Will Deacon  wrote:
>
> Hi Rob,
>
> On Mon, Feb 24, 2020 at 04:31:29PM -0600, Rob Herring wrote:
> > Arm SMMUv3.2 adds support for TLB range invalidate operations.
> > Support for range invalidate is determined by the RIL bit in the IDR3
> > register.
> >
> > The range invalidate is in units of the leaf page size and operates on
> > 1-32 chunks of a power of 2 multiple pages. First, we determine from the
> > size what power of 2 multiple we can use. Then we calculate how many
> > chunks (1-31) of the power of 2 size for the range on the iteration. On
> > each iteration, we move up in size by at least 5 bits.
> >
> > Cc: Jean-Philippe Brucker 
> > Cc: Will Deacon 
> > Cc: Robin Murphy 
> > Cc: Joerg Roedel 
> > Reviewed-by: Eric Auger 
> > Signed-off-by: Rob Herring 
> > ---
> > v3:
> > - Use inv_range local instead of modifying granule
> > - Simplify the TG calculation
> > - Use shift instead of divide by power of 2.
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 69 +++--
> >  1 file changed, 67 insertions(+), 2 deletions(-)
>
> I've queued this one, but I had to resolve some conflicts with the command
> queue batching changes, so please can you take a quick look at my
> resolution?
>
> https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/commit/?h=for-joerg/arm-smmu/updates

Thanks, LGTM.

Rob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma: Fix max PFN arithmetic overflow on 32 bit systems

2020-03-19 Thread Alexander Dahl
Hello Robin,

On Thu, Mar 19, 2020 at 01:50:56PM +, Robin Murphy wrote:
> On 2020-03-02 6:16 pm, Alexander Dahl wrote:
> > ---
> >   arch/x86/include/asm/dma.h | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
> > index 00f7cf45e699..e25514eca8d6 100644
> > --- a/arch/x86/include/asm/dma.h
> > +++ b/arch/x86/include/asm/dma.h
> > @@ -74,7 +74,7 @@
> >   #define MAX_DMA_PFN   ((16UL * 1024 * 1024) >> PAGE_SHIFT)
> >   /* 4GB broken PCI/AGP hardware bus master zone */
> > -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
> > +#define MAX_DMA32_PFN (4UL * ((1024 * 1024 * 1024) >> PAGE_SHIFT))
> 
> FWIW, wouldn't s/UL/ULL/ in the original expression suffice? Failing that,
> rather than awkward parenthesis trickery it might be clearer to just copy
> the one from arch/mips/include/asm/dma.h.

Both of your suggestions yield the correct result, and at least for me
both look easier to understand than what Alan proposed in the first
place. 

I would opt for the variant which is already in arch/mips, because it
avoids 64 bit calculation, is most obvious in intent in my eyes, and
we have the same calculation twice then instead of two variants.

Thanks for your review, I'll send a v2. :-)

Greets
Alex

-- 
/"\ ASCII RIBBON | »With the first link, the chain is forged. The first
\ / CAMPAIGN | speech censured, the first thought forbidden, the
 X  AGAINST  | first freedom denied, chains us all irrevocably.«
/ \ HTML MAIL| (Jean-Luc Picard, quoting Judge Aaron Satie)


signature.asc
Description: PGP signature
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Thomas Gleixner
Borislav Petkov  writes:

> On Thu, Mar 19, 2020 at 11:06:15AM +, Robin Murphy wrote:
>> Let me add another vote from a native English speaker that "unencrypted" is
>> the appropriate term to imply the *absence* of encryption, whereas
>> "decrypted" implies the *reversal* of applied encryption.
>> 
>> Naming things is famously hard, for good reason - names are *important* for
>> understanding. Just because a decision was already made one way doesn't mean
>> that that decision was necessarily right. Churning one area to be
>> consistently inaccurate just because it's less work than churning another
>> area to be consistently accurate isn't really the best excuse.
>
> Well, the reason we chose "decrypted" vs something else is so to be as
> different from "encrypted" as possible. If we called it "unencrypted"
> you'd have stuff like:
>
>if (force_dma_unencrypted(dev))
> set_memory_encrypted((unsigned long)cpu_addr, 1 << 
> page_order);

TBH, I don't see how

if (force_dma_decrypted(dev))
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
   
makes more sense than the above. It's both non-sensical unless there is
a big fat comment explaining what this is about.

Thanks,

tglx

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Borislav Petkov
On Thu, Mar 19, 2020 at 06:25:49PM +0100, Thomas Gleixner wrote:
> TBH, I don't see how
> 
>   if (force_dma_decrypted(dev))
>   set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
>
> makes more sense than the above. It's both non-sensical unless there is

9087c37584fb ("dma-direct: Force unencrypted DMA under SME for certain DMA 
masks")

> a big fat comment explaining what this is about.

ACK to that.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: arm-smmu-v3 high cpu usage for NVMe

2020-03-19 Thread Jean-Philippe Brucker
On Thu, Mar 19, 2020 at 12:54:59PM +, John Garry wrote:
> Hi Will,
> 
> > 
> > On Thu, Jan 02, 2020 at 05:44:39PM +, John Garry wrote:
> > > And for the overall system, we have:
> > > 
> > >PerfTop:   85864 irqs/sec  kernel:89.6%  exact:  0.0% lost: 0/34434 
> > > drop:
> > > 0/40116 [4000Hz cycles],  (all, 96 CPUs)
> > > --
> > > 
> > >  27.43%  [kernel]  [k] arm_smmu_cmdq_issue_cmdlist
> > >  11.71%  [kernel]  [k] _raw_spin_unlock_irqrestore
> > >   6.35%  [kernel]  [k] _raw_spin_unlock_irq
> > >   2.65%  [kernel]  [k] get_user_pages_fast
> > >   2.03%  [kernel]  [k] __slab_free
> > >   1.55%  [kernel]  [k] tick_nohz_idle_exit
> > >   1.47%  [kernel]  [k] arm_lpae_map
> > >   1.39%  [kernel]  [k] __fget
> > >   1.14%  [kernel]  [k] __lock_text_start
> > >   1.09%  [kernel]  [k] _raw_spin_lock
> > >   1.08%  [kernel]  [k] bio_release_pages.part.42
> > >   1.03%  [kernel]  [k] __sbitmap_get_word
> > >   0.97%  [kernel]  [k] arm_smmu_atc_inv_domain.constprop.42
> > >   0.91%  [kernel]  [k] fput_many
> > >   0.88%  [kernel]  [k] __arm_lpae_map
> > > 
> > > One thing to note is that we still spend an appreciable amount of time in
> > > arm_smmu_atc_inv_domain(), which is disappointing when considering it 
> > > should
> > > effectively be a noop.
> > > 
> > > As for arm_smmu_cmdq_issue_cmdlist(), I do note that during the testing 
> > > our
> > > batch size is 1, so we're not seeing the real benefit of the batching. I
> > > can't help but think that we could improve this code to try to combine CMD
> > > SYNCs for small batches.
> > > 
> > > Anyway, let me know your thoughts or any questions. I'll have a look if a
> > > get a chance for other possible bottlenecks.
> > 
> > Did you ever get any more information on this? I don't have any SMMUv3
> > hardware any more, so I can't really dig into this myself.
> 
> I'm only getting back to look at this now, as SMMU performance is a bit of a
> hot topic again for us.
> 
> So one thing we are doing which looks to help performance is this series
> from Marc:
> 
> https://lore.kernel.org/lkml/9171c554-50d2-142b-96ae-1357952fc...@huawei.com/T/#mee5562d1efd6aaeb8d2682bdb6807fe7b5d7f56d
> 
> So that is just spreading the per-CPU load for NVMe interrupt handling
> (where the DMA unmapping is happening), so I'd say just side-stepping any
> SMMU issue really.
> 
> Going back to the SMMU, I wanted to run epbf and perf annotate to help
> profile this, but was having no luck getting them to work properly. I'll
> look at this again now.

Could you also try with the upcoming ATS change currently in Will's tree?
They won't improve your numbers but it'd be good to check that they don't
make things worse.

I've run a bunch of netperf instances on multiple cores and collecting
SMMU usage (on TaiShan 2280). I'm getting the following ratio pretty
consistently.

- 6.07% arm_smmu_iotlb_sync
   - 5.74% arm_smmu_tlb_inv_range
5.09% arm_smmu_cmdq_issue_cmdlist
0.28% __pi_memset
0.08% __pi_memcpy
0.08% arm_smmu_atc_inv_domain.constprop.37
0.07% arm_smmu_cmdq_build_cmd
0.01% arm_smmu_cmdq_batch_add
 0.31% __pi_memset

So arm_smmu_atc_inv_domain() takes about 1.4% of arm_smmu_iotlb_sync(),
when ATS is not used. According to the annotations, the load from the
atomic_read(), that checks whether the domain uses ATS, is 77% of the
samples in arm_smmu_atc_inv_domain() (265 of 345 samples), so I'm not sure
there is much room for optimization there.

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/virtio: Reject IOMMU page granule larger than PAGE_SIZE

2020-03-19 Thread Alex Williamson
On Wed, 18 Mar 2020 17:14:05 +0100
Auger Eric  wrote:

> Hi,
> 
> On 3/18/20 1:00 PM, Robin Murphy wrote:
> > On 2020-03-18 11:40 am, Jean-Philippe Brucker wrote:  
> >> We don't currently support IOMMUs with a page granule larger than the
> >> system page size. The IOVA allocator has a BUG_ON() in this case, and
> >> VFIO has a WARN_ON().  
> 
> Adding Alex in CC in case he has time to jump in. At the moment I don't
> get why this WARN_ON() is here.
> 
> This was introduced in
> c8dbca165bb090f926996a572ea2b5b577b34b70 vfio/iommu_type1: Avoid overflow

I don't recall if I had something specific in mind when adding this
warning, but if PAGE_SIZE is smaller than the minimum IOMMU page size,
then we need multiple PAGE_SIZE pages per IOMMU TLB entry.  Therefore
those pages must be contiguous.  Aside from requiring only hugepage
backing, how could a user make sure that their virtual address buffer
is physically contiguous?  I don't think we have any sanity checking
code that does this, thus the warning.  Thanks,

Alex

> >> It might be possible to remove these obstacles if necessary. If the host
> >> uses 64kB pages and the guest uses 4kB, then a device driver calling
> >> alloc_page() followed by dma_map_page() will create a 64kB mapping for a
> >> 4kB physical page, allowing the endpoint to access the neighbouring 60kB
> >> of memory. This problem could be worked around with bounce buffers.  
> > 
> > FWIW the fundamental issue is that callers of iommu_map() may expect to
> > be able to map two or more page-aligned regions directly adjacent to
> > each other for scatter-gather purposes (or ring buffer tricks), and
> > that's just not possible if the IOMMU granule is too big. Bounce
> > buffering would be a viable workaround for the streaming DMA API and
> > certain similar use-cases, but not in general (e.g. coherent DMA, VFIO,
> > GPUs, etc.)
> > 
> > Robin.
> >   
> >> For the moment, rather than triggering the IOVA BUG_ON() on mismatched
> >> page sizes, abort the virtio-iommu probe with an error message.  
> 
> I understand this is a introduced as a temporary solution but this
> sounds as an important limitation to me. For instance this will prevent
> from running a fedora guest exposed with a virtio-iommu with a RHEL host.
> 
> Thanks
> 
> Eric
> >>
> >> Reported-by: Bharat Bhushan 
> >> Signed-off-by: Jean-Philippe Brucker 
> >> ---
> >>   drivers/iommu/virtio-iommu.c | 9 +
> >>   1 file changed, 9 insertions(+)
> >>
> >> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> >> index 6d4e3c2a2ddb..80d5d8f621ab 100644
> >> --- a/drivers/iommu/virtio-iommu.c
> >> +++ b/drivers/iommu/virtio-iommu.c
> >> @@ -998,6 +998,7 @@ static int viommu_probe(struct virtio_device *vdev)
> >>   struct device *parent_dev = vdev->dev.parent;
> >>   struct viommu_dev *viommu = NULL;
> >>   struct device *dev = &vdev->dev;
> >> +    unsigned long viommu_page_size;
> >>   u64 input_start = 0;
> >>   u64 input_end = -1UL;
> >>   int ret;
> >> @@ -1028,6 +1029,14 @@ static int viommu_probe(struct virtio_device
> >> *vdev)
> >>   goto err_free_vqs;
> >>   }
> >>   +    viommu_page_size = 1UL << __ffs(viommu->pgsize_bitmap);
> >> +    if (viommu_page_size > PAGE_SIZE) {
> >> +    dev_err(dev, "granule 0x%lx larger than system page size
> >> 0x%lx\n",
> >> +    viommu_page_size, PAGE_SIZE);
> >> +    ret = -EINVAL;
> >> +    goto err_free_vqs;
> >> +    }
> >> +
> >>   viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ |
> >> VIRTIO_IOMMU_MAP_F_WRITE;
> >>   viommu->last_domain = ~0U;
> >>    
> >   

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Michal Suchánek
On Thu, Mar 19, 2020 at 06:25:49PM +0100, Thomas Gleixner wrote:
> Borislav Petkov  writes:
> 
> > On Thu, Mar 19, 2020 at 11:06:15AM +, Robin Murphy wrote:
> >> Let me add another vote from a native English speaker that "unencrypted" is
> >> the appropriate term to imply the *absence* of encryption, whereas
> >> "decrypted" implies the *reversal* of applied encryption.
Even as a non-native speaker I can clearly see the distinction.
> >> 
> >> Naming things is famously hard, for good reason - names are *important* for
> >> understanding. Just because a decision was already made one way doesn't 
> >> mean
> >> that that decision was necessarily right. Churning one area to be
> >> consistently inaccurate just because it's less work than churning another
> >> area to be consistently accurate isn't really the best excuse.
> >
> > Well, the reason we chose "decrypted" vs something else is so to be as
> > different from "encrypted" as possible. If we called it "unencrypted"
> > you'd have stuff like:
> >
> >if (force_dma_unencrypted(dev))
> > set_memory_encrypted((unsigned long)cpu_addr, 1 << 
> > page_order);

If you want something with high edit distance from 'encrypted' meaning
the opposite there is already 'cleartext' which was designed for this
exact purpose.

Thanks

Michal
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -v2] treewide: Rename "unencrypted" to "decrypted"

2020-03-19 Thread Thomas Gleixner
Borislav Petkov  writes:
> On Thu, Mar 19, 2020 at 06:25:49PM +0100, Thomas Gleixner wrote:
>> TBH, I don't see how
>> 
>>  if (force_dma_decrypted(dev))
>>  set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
>>
>> makes more sense than the above. It's both non-sensical unless there is
>
> 9087c37584fb ("dma-direct: Force unencrypted DMA under SME for certain DMA 
> masks")

Reading the changelog again...

I have to say that force_dma_unencrypted() makes way more sense in that
context than force_dma_decrypted(). It still wants a comment.

Linguistical semantics and correctness matters a lot. Consistency is
required as well, but not for the price of ambiguous wording.

Thanks,

tglx


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] iommu/vt-d: Fix mm reference leak

2020-03-19 Thread Jacob Pan
Move canonical address check before mmget_not_zero() to avoid mm
reference leak.

Fixes: 9d8c3af31607 ("iommu/vt-d: IOMMU Page Request needs to check if
address is canonical.")

Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel-svm.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 1483f1845762..56253c59ca10 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -861,14 +861,15 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 * any faults on kernel addresses. */
if (!svm->mm)
goto bad_req;
-   /* If the mm is already defunct, don't handle faults. */
-   if (!mmget_not_zero(svm->mm))
-   goto bad_req;
 
/* If address is not canonical, return invalid response */
if (!is_canonical_address(address))
goto bad_req;
 
+   /* If the mm is already defunct, don't handle faults. */
+   if (!mmget_not_zero(svm->mm))
+   goto bad_req;
+
down_read(&svm->mm->mmap_sem);
vma = find_extend_vma(svm->mm, address);
if (!vma || address < vma->vm_start)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] Misc bug fixes for VT-d SVM

2020-03-19 Thread Jacob Pan
Just a few bug fixes while testing Intel SVM code.

Jacob Pan (3):
  iommu/vt-d: Remove redundant IOTLB flush
  iommu/vt-d: Fix mm reference leak
  iommu/vt-d: Add build dependency on IOASID

 drivers/iommu/Kconfig |  1 +
 drivers/iommu/intel-svm.c | 13 ++---
 2 files changed, 7 insertions(+), 7 deletions(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] iommu/vt-d: Add build dependency on IOASID

2020-03-19 Thread Jacob Pan
IOASID code is needed by VT-d scalable mode for PASID allocation.
Add explicit dependency such that IOASID is built-in whenever Intel
IOMMU is enabled.
Otherwise, aux domain code will fail when IOMMU is built-in and IOASID
is compiled as a module.

Signed-off-by: Jacob Pan 
---
 drivers/iommu/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d2fade984999..25149544d57c 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -188,6 +188,7 @@ config INTEL_IOMMU
select NEED_DMA_MAP_STATE
select DMAR_TABLE
select SWIOTLB
+   select IOASID
help
  DMA remapping (DMAR) devices support enables independent address
  translations for Direct Memory Access (DMA) from devices.
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] iommu/vt-d: Remove redundant IOTLB flush

2020-03-19 Thread Jacob Pan
IOTLB flush already included in the PASID tear down process. There
is no need to flush again.

Cc: Lu Baolu 
Signed-off-by: Jacob Pan 
---
 drivers/iommu/intel-svm.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 8f42d717d8d7..1483f1845762 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -268,10 +268,9 @@ static void intel_mm_release(struct mmu_notifier *mn, 
struct mm_struct *mm)
 * *has* to handle gracefully without affecting other processes.
 */
rcu_read_lock();
-   list_for_each_entry_rcu(sdev, &svm->devs, list) {
+   list_for_each_entry_rcu(sdev, &svm->devs, list)
intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
-   intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
-   }
+
rcu_read_unlock();
 
 }
@@ -731,7 +730,6 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 * large and has to be physically contiguous. So it's
 * hard to be as defensive as we might like. */
intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
-   intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
kfree_rcu(sdev, rcu);
 
if (list_empty(&svm->devs)) {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/8] Convert the intel iommu driver to the dma-iommu api

2020-03-19 Thread Tom Murphy
Any news on this? Is there anyone who wants to try and fix this possible bug?

On Mon, 23 Dec 2019 at 03:41, Jani Nikula  wrote:
>
> On Mon, 23 Dec 2019, Robin Murphy  wrote:
> > On 2019-12-23 10:37 am, Jani Nikula wrote:
> >> On Sat, 21 Dec 2019, Tom Murphy  wrote:
> >>> This patchset converts the intel iommu driver to the dma-iommu api.
> >>>
> >>> While converting the driver I exposed a bug in the intel i915 driver
> >>> which causes a huge amount of artifacts on the screen of my
> >>> laptop. You can see a picture of it here:
> >>> https://github.com/pippy360/kernelPatches/blob/master/IMG_20191219_225922.jpg
> >>>
> >>> This issue is most likely in the i915 driver and is most likely caused
> >>> by the driver not respecting the return value of the
> >>> dma_map_ops::map_sg function. You can see the driver ignoring the
> >>> return value here:
> >>> https://github.com/torvalds/linux/blob/7e0165b2f1a912a06e381e91f0f4e495f4ac3736/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c#L51
> >>>
> >>> Previously this didn’t cause issues because the intel map_sg always
> >>> returned the same number of elements as the input scatter gather list
> >>> but with the change to this dma-iommu api this is no longer the
> >>> case. I wasn’t able to track the bug down to a specific line of code
> >>> unfortunately.
> >>>
> >>> Could someone from the intel team look at this?
> >>
> >> Let me get this straight. There is current API that on success always
> >> returns the same number of elements as the input scatter gather
> >> list. You propose to change the API so that this is no longer the case?
> >
> > No, the API for dma_map_sg() has always been that it may return fewer
> > DMA segments than nents - see Documentation/DMA-API.txt (and otherwise,
> > the return value would surely be a simple success/fail condition).
> > Relying on a particular implementation behaviour has never been strictly
> > correct, even if it does happen to be a very common behaviour.
> >
> >> A quick check of various dma_map_sg() calls in the kernel seems to
> >> indicate checking for 0 for errors and then ignoring the non-zero return
> >> is a common pattern. Are you sure it's okay to make the change you're
> >> proposing?
> >
> > Various code uses tricks like just iterating the mapped list until the
> > first segment with zero sg_dma_len(). Others may well simply have bugs.
>
> Thanks for the clarification.
>
> BR,
> Jani.
>
> >
> > Robin.
> >
> >> Anyway, due to the time of year and all, I'd like to ask you to file a
> >> bug against i915 at [1] so this is not forgotten, and please let's not
> >> merge the changes before this is resolved.
> >>
> >>
> >> Thanks,
> >> Jani.
> >>
> >>
> >> [1] https://gitlab.freedesktop.org/drm/intel/issues/new
> >>
> >>
>
> --
> Jani Nikula, Intel Open Source Graphics Center
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 3/8] iommu/vt-d: Remove IOVA handling code from non-dma_ops path

2020-03-19 Thread Tom Murphy
Could we merge patch 1-3 from this series? it just cleans up weird
code and merging these patches will cover some of the work needed to
move the intel iommu driver to the dma-iommu api in the future.

On Sat, 21 Dec 2019 at 07:04, Tom Murphy  wrote:
>
> Remove all IOVA handling code from the non-dma_ops path in the intel
> iommu driver.
>
> There's no need for the non-dma_ops path to keep track of IOVAs. The
> whole point of the non-dma_ops path is that it allows the IOVAs to be
> handled separately. The IOVA handling code removed in this patch is
> pointless.
>
> Signed-off-by: Tom Murphy 
> ---
>  drivers/iommu/intel-iommu.c | 89 ++---
>  1 file changed, 33 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 64b1a9793daa..8d72ea0fb843 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1908,7 +1908,8 @@ static void domain_exit(struct dmar_domain *domain)
> domain_remove_dev_info(domain);
>
> /* destroy iovas */
> -   put_iova_domain(&domain->iovad);
> +   if (domain->domain.type == IOMMU_DOMAIN_DMA)
> +   put_iova_domain(&domain->iovad);
>
> if (domain->pgd) {
> struct page *freelist;
> @@ -2671,19 +2672,9 @@ static struct dmar_domain *set_domain_for_dev(struct 
> device *dev,
>  }
>
>  static int iommu_domain_identity_map(struct dmar_domain *domain,
> -unsigned long long start,
> -unsigned long long end)
> +unsigned long first_vpfn,
> +unsigned long last_vpfn)
>  {
> -   unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
> -   unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
> -
> -   if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
> - dma_to_mm_pfn(last_vpfn))) {
> -   pr_err("Reserving iova failed\n");
> -   return -ENOMEM;
> -   }
> -
> -   pr_debug("Mapping reserved region %llx-%llx\n", start, end);
> /*
>  * RMRR range might have overlap with physical memory range,
>  * clear it first
> @@ -2760,7 +2751,8 @@ static int __init si_domain_init(int hw)
>
> for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
> ret = iommu_domain_identity_map(si_domain,
> -   PFN_PHYS(start_pfn), 
> PFN_PHYS(end_pfn));
> +   mm_to_dma_pfn(start_pfn),
> +   mm_to_dma_pfn(end_pfn));
> if (ret)
> return ret;
> }
> @@ -4593,58 +4585,37 @@ static int intel_iommu_memory_notifier(struct 
> notifier_block *nb,
>unsigned long val, void *v)
>  {
> struct memory_notify *mhp = v;
> -   unsigned long long start, end;
> -   unsigned long start_vpfn, last_vpfn;
> +   unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
> +   unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
> +   mhp->nr_pages - 1);
>
> switch (val) {
> case MEM_GOING_ONLINE:
> -   start = mhp->start_pfn << PAGE_SHIFT;
> -   end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
> -   if (iommu_domain_identity_map(si_domain, start, end)) {
> -   pr_warn("Failed to build identity map for 
> [%llx-%llx]\n",
> -   start, end);
> +   if (iommu_domain_identity_map(si_domain, start_vpfn,
> +   last_vpfn)) {
> +   pr_warn("Failed to build identity map for 
> [%lx-%lx]\n",
> +   start_vpfn, last_vpfn);
> return NOTIFY_BAD;
> }
> break;
>
> case MEM_OFFLINE:
> case MEM_CANCEL_ONLINE:
> -   start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
> -   last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
> -   while (start_vpfn <= last_vpfn) {
> -   struct iova *iova;
> +   {
> struct dmar_drhd_unit *drhd;
> struct intel_iommu *iommu;
> struct page *freelist;
>
> -   iova = find_iova(&si_domain->iovad, start_vpfn);
> -   if (iova == NULL) {
> -   pr_debug("Failed get IOVA for PFN %lx\n",
> -start_vpfn);
> -   break;
> -   }
> -
> -   iova = split_and_remove_iova(&si_domain->iovad, iova,
> -start_vpfn, last