[PATCH] iommu/amd: Add support for X2APIC IOMMU interrupts

2019-07-15 Thread Suthikulpanit, Suravee
AMD IOMMU requires IntCapXT registers to be setup in order to generate
its own interrupts (for Event Log, PPR Log, and GA Log) with 32-bit
APIC destination ID. Without this support, AMD IOMMU MSI interrupts
will not be routed correctly when booting the system in X2APIC mode.

Cc: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
 drivers/iommu/amd_iommu_init.c  | 90 +
 drivers/iommu/amd_iommu_types.h |  9 +
 2 files changed, 99 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index ff40ba7..a4c5b1e 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -35,6 +35,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -1935,6 +1937,90 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
return 0;
 }
 
+#define XT_INT_DEST_MODE(x)(((x) & 0x1ULL) << 2)
+#define XT_INT_DEST_LO(x)  (((x) & 0xFFULL) << 8)
+#define XT_INT_VEC(x)  (((x) & 0xFFULL) << 32)
+#define XT_INT_DEST_HI(x)  x) >> 24) & 0xFFULL) << 56)
+
+/**
+ * Setup the IntCapXT registers with interrupt routing information
+ * based on the PCI MSI capability block registers, accessed via
+ * MMIO MSI address low/hi and MSI data registers.
+ */
+static void iommu_update_intcapxt(struct amd_iommu *iommu)
+{
+   u64 val;
+   u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
+   u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
+   u32 data= readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
+   bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+   u32 dest= ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
+
+   if (x2apic_enabled())
+   dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
+
+   val = XT_INT_VEC(data & 0xFF) |
+ XT_INT_DEST_MODE(dm) |
+ XT_INT_DEST_LO(dest) |
+ XT_INT_DEST_HI(dest);
+
+   /**
+* Current IOMMU implemtation uses the same IRQ for all
+* 3 IOMMU interrupts.
+*/
+   writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+   writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+   writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+}
+
+static void _irq_notifier_notify(struct irq_affinity_notify *notify,
+const cpumask_t *mask)
+{
+   struct amd_iommu *iommu;
+
+   for_each_iommu(iommu) {
+   if (iommu->dev->irq == notify->irq) {
+   iommu_update_intcapxt(iommu);
+   break;
+   }
+   }
+}
+
+static void _irq_notifier_release(struct kref *ref)
+{
+}
+
+static int iommu_init_intcapxt(struct amd_iommu *iommu)
+{
+   int ret;
+   struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
+
+   /**
+* IntCapXT requires XTSup=1, which can be inferred
+* amd_iommu_xt_mode.
+*/
+   if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
+   return 0;
+
+   /**
+* Also, we need to setup notifier to update the IntCapXT registers
+* whenever the irq affinity is changed from user-space.
+*/
+   notify->irq = iommu->dev->irq;
+   notify->notify = _irq_notifier_notify,
+   notify->release = _irq_notifier_release,
+   ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
+   if (ret) {
+   pr_err("Failed to register irq affinity notifier (devid=%#x, 
irq %d)\n",
+  iommu->devid, iommu->dev->irq);
+   return ret;
+   }
+
+   iommu_update_intcapxt(iommu);
+   iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
+   return ret;
+}
+
 static int iommu_init_msi(struct amd_iommu *iommu)
 {
int ret;
@@ -1951,6 +2037,10 @@ static int iommu_init_msi(struct amd_iommu *iommu)
return ret;
 
 enable_faults:
+   ret = iommu_init_intcapxt(iommu);
+   if (ret)
+   return ret;
+
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
 
if (iommu->ppr_log != NULL)
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 87965e4..39be804f 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -72,6 +72,12 @@
 #define MMIO_PPR_LOG_OFFSET0x0038
 #define MMIO_GA_LOG_BASE_OFFSET0x00e0
 #define MMIO_GA_LOG_TAIL_OFFSET0x00e8
+#define MMIO_MSI_ADDR_LO_OFFSET0x015C
+#define MMIO_MSI_ADDR_HI_OFFSET0x0160
+#define MMIO_MSI_DATA_OFFSET   0x0164
+#define MMIO_INTCAPXT_EVT_OFFSET   0x0170
+#define MMIO_INTCAPXT_PPR_OFFSET   0x0178
+#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180
 #define MMIO_CMD_HEAD_OFFSET   0x2000
 #define MMIO_CMD_TAIL_OFFSET   0x2008
 #define MMIO_EVT_HEAD_OFFSET   0x2010
@@ -162,6 +168,7 @@
 #define CONTROL_GALOG_EN0x1CULL
 #define CONTROL_GAINT_EN0x1DULL
 #define CONTROL

Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-15 Thread Benjamin Herrenschmidt
On Mon, 2019-07-15 at 19:03 -0300, Thiago Jung Bauermann wrote:
> > > Indeed. The idea is that QEMU can offer the flag, old guests can
> > > reject
> > > it (or even new guests can reject it, if they decide not to
> > > convert into
> > > secure VMs) and the feature negotiation will succeed with the
> > > flag
> > > unset.
> > 
> > OK. And then what does QEMU do? Assume guest is not encrypted I
> > guess?
> 
> There's nothing different that QEMU needs to do, with or without the
> flag. the perspective of the host, a secure guest and a regular guest
> work the same way with respect to virtio.

This is *precisely* why I was against adding a flag and touch the
protocol negociation with qemu in the first place, back when I cared
about that stuff...

Guys, this has gone in circles over and over again.

This has nothing to do with qemu. Qemu doesn't need to know about this.
It's entirely guest local. This is why the one-liner in virtio was a
far better and simpler solution.

This is something the guest does to itself (with the participation of a
ultravisor but that's not something qemu cares about at this stage, at
least not as far as virtio is concerned).

Basically, the guest "hides" its memory from the host using a HW secure
memory facility. As a result, it needs to ensure that all of its DMA
pages are bounced through insecure pages that aren't hidden. That's it,
it's all guest side. Qemu shouldn't have to care about it at all.

Cheers,
Ben.




Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-15 Thread Thiago Jung Bauermann


Michael S. Tsirkin  writes:

> On Mon, Jul 15, 2019 at 07:03:03PM -0300, Thiago Jung Bauermann wrote:
>> 
>> Michael S. Tsirkin  writes:
>> 
>> > On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote:
>> >>
>> >> Michael S. Tsirkin  writes:
>> >>
>> >> > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote:
>> >> >>
>> >> >>
>> >> >> Michael S. Tsirkin  writes:
>> >> >>
>> >> >> > So this is what I would call this option:
>> >> >> >
>> >> >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS
>> >> >> >
>> >> >> > and the explanation should state that all device
>> >> >> > addresses are translated by the platform to identical
>> >> >> > addresses.
>> >> >> >
>> >> >> > In fact this option then becomes more, not less restrictive
>> >> >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise
>> >> >> > by guest to only create identity mappings,
>> >> >> > and only before driver_ok is set.
>> >> >> > This option then would always be negotiated together with
>> >> >> > VIRTIO_F_ACCESS_PLATFORM.
>> >> >> >
>> >> >> > Host then must verify that
>> >> >> > 1. full 1:1 mappings are created before driver_ok
>> >> >> > or can we make sure this happens before features_ok?
>> >> >> > that would be ideal as we could require that features_ok fails
>> >> >> > 2. mappings are not modified between driver_ok and reset
>> >> >> > i guess attempts to change them will fail -
>> >> >> > possibly by causing a guest crash
>> >> >> > or some other kind of platform-specific error
>> >> >>
>> >> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but 
>> >> >> requiring
>> >> >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is
>> >> >> SLOF as I mentioned above, another is that we would be requiring all
>> >> >> guests running on the machine (secure guests or not, since we would use
>> >> >> the same configuration for all guests) to support it. But
>> >> >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For
>> >> >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know 
>> >> >> about
>> >> >> it and wouldn't be able to use the device.
>> >> >
>> >> > OK and your target is to enable use with kernel drivers within
>> >> > guests, right?
>> >>
>> >> Right.
>> >>
>> >> > My question is, we are defining a new flag here, I guess old guests
>> >> > then do not set it. How does it help old guests? Or maybe it's
>> >> > not designed to ...
>> >>
>> >> Indeed. The idea is that QEMU can offer the flag, old guests can reject
>> >> it (or even new guests can reject it, if they decide not to convert into
>> >> secure VMs) and the feature negotiation will succeed with the flag
>> >> unset.
>> >
>> > OK. And then what does QEMU do? Assume guest is not encrypted I guess?
>> 
>> There's nothing different that QEMU needs to do, with or without the
>> flag. the perspective of the host, a secure guest and a regular guest
>> work the same way with respect to virtio.
>
> OK. So now let's get back to implementation. What will
> Linux guest driver do? It can't activate DMA API blindly since that
> will assume translation also works, right?

It can on pseries, because we always have a 1:1 window mapping the whole
guest memory.

> Or do we somehow limit it to just a specific platform?

Yes, we want to accept the new flag only on secure pseries guests.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-15 Thread Michael S. Tsirkin
On Mon, Jul 15, 2019 at 07:03:03PM -0300, Thiago Jung Bauermann wrote:
> 
> Michael S. Tsirkin  writes:
> 
> > On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote:
> >>
> >> Michael S. Tsirkin  writes:
> >>
> >> > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote:
> >> >>
> >> >>
> >> >> Michael S. Tsirkin  writes:
> >> >>
> >> >> > So this is what I would call this option:
> >> >> >
> >> >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS
> >> >> >
> >> >> > and the explanation should state that all device
> >> >> > addresses are translated by the platform to identical
> >> >> > addresses.
> >> >> >
> >> >> > In fact this option then becomes more, not less restrictive
> >> >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise
> >> >> > by guest to only create identity mappings,
> >> >> > and only before driver_ok is set.
> >> >> > This option then would always be negotiated together with
> >> >> > VIRTIO_F_ACCESS_PLATFORM.
> >> >> >
> >> >> > Host then must verify that
> >> >> > 1. full 1:1 mappings are created before driver_ok
> >> >> > or can we make sure this happens before features_ok?
> >> >> > that would be ideal as we could require that features_ok fails
> >> >> > 2. mappings are not modified between driver_ok and reset
> >> >> > i guess attempts to change them will fail -
> >> >> > possibly by causing a guest crash
> >> >> > or some other kind of platform-specific error
> >> >>
> >> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring
> >> >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is
> >> >> SLOF as I mentioned above, another is that we would be requiring all
> >> >> guests running on the machine (secure guests or not, since we would use
> >> >> the same configuration for all guests) to support it. But
> >> >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For
> >> >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about
> >> >> it and wouldn't be able to use the device.
> >> >
> >> > OK and your target is to enable use with kernel drivers within
> >> > guests, right?
> >>
> >> Right.
> >>
> >> > My question is, we are defining a new flag here, I guess old guests
> >> > then do not set it. How does it help old guests? Or maybe it's
> >> > not designed to ...
> >>
> >> Indeed. The idea is that QEMU can offer the flag, old guests can reject
> >> it (or even new guests can reject it, if they decide not to convert into
> >> secure VMs) and the feature negotiation will succeed with the flag
> >> unset.
> >
> > OK. And then what does QEMU do? Assume guest is not encrypted I guess?
> 
> There's nothing different that QEMU needs to do, with or without the
> flag. the perspective of the host, a secure guest and a regular guest
> work the same way with respect to virtio.

OK. So now let's get back to implementation. What will
Linux guest driver do? It can't activate DMA API blindly since that
will assume translation also works, right?
Or do we somehow limit it to just a specific platform?

> --
> Thiago Jung Bauermann
> IBM Linux Technology Center


Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-15 Thread Thiago Jung Bauermann


Michael S. Tsirkin  writes:

> On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote:
>>
>> Michael S. Tsirkin  writes:
>>
>> > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote:
>> >>
>> >>
>> >> Michael S. Tsirkin  writes:
>> >>
>> >> > So this is what I would call this option:
>> >> >
>> >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS
>> >> >
>> >> > and the explanation should state that all device
>> >> > addresses are translated by the platform to identical
>> >> > addresses.
>> >> >
>> >> > In fact this option then becomes more, not less restrictive
>> >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise
>> >> > by guest to only create identity mappings,
>> >> > and only before driver_ok is set.
>> >> > This option then would always be negotiated together with
>> >> > VIRTIO_F_ACCESS_PLATFORM.
>> >> >
>> >> > Host then must verify that
>> >> > 1. full 1:1 mappings are created before driver_ok
>> >> > or can we make sure this happens before features_ok?
>> >> > that would be ideal as we could require that features_ok fails
>> >> > 2. mappings are not modified between driver_ok and reset
>> >> > i guess attempts to change them will fail -
>> >> > possibly by causing a guest crash
>> >> > or some other kind of platform-specific error
>> >>
>> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring
>> >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is
>> >> SLOF as I mentioned above, another is that we would be requiring all
>> >> guests running on the machine (secure guests or not, since we would use
>> >> the same configuration for all guests) to support it. But
>> >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For
>> >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about
>> >> it and wouldn't be able to use the device.
>> >
>> > OK and your target is to enable use with kernel drivers within
>> > guests, right?
>>
>> Right.
>>
>> > My question is, we are defining a new flag here, I guess old guests
>> > then do not set it. How does it help old guests? Or maybe it's
>> > not designed to ...
>>
>> Indeed. The idea is that QEMU can offer the flag, old guests can reject
>> it (or even new guests can reject it, if they decide not to convert into
>> secure VMs) and the feature negotiation will succeed with the flag
>> unset.
>
> OK. And then what does QEMU do? Assume guest is not encrypted I guess?

There's nothing different that QEMU needs to do, with or without the
flag. the perspective of the host, a secure guest and a regular guest
work the same way with respect to virtio.

--
Thiago Jung Bauermann
IBM Linux Technology Center
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-15 Thread Michael S. Tsirkin
On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote:
> 
> Michael S. Tsirkin  writes:
> 
> > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote:
> >>
> >>
> >> Michael S. Tsirkin  writes:
> >>
> >> > So this is what I would call this option:
> >> >
> >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS
> >> >
> >> > and the explanation should state that all device
> >> > addresses are translated by the platform to identical
> >> > addresses.
> >> >
> >> > In fact this option then becomes more, not less restrictive
> >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise
> >> > by guest to only create identity mappings,
> >> > and only before driver_ok is set.
> >> > This option then would always be negotiated together with
> >> > VIRTIO_F_ACCESS_PLATFORM.
> >> >
> >> > Host then must verify that
> >> > 1. full 1:1 mappings are created before driver_ok
> >> > or can we make sure this happens before features_ok?
> >> > that would be ideal as we could require that features_ok fails
> >> > 2. mappings are not modified between driver_ok and reset
> >> > i guess attempts to change them will fail -
> >> > possibly by causing a guest crash
> >> > or some other kind of platform-specific error
> >>
> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring
> >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is
> >> SLOF as I mentioned above, another is that we would be requiring all
> >> guests running on the machine (secure guests or not, since we would use
> >> the same configuration for all guests) to support it. But
> >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For
> >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about
> >> it and wouldn't be able to use the device.
> >
> > OK and your target is to enable use with kernel drivers within
> > guests, right?
> 
> Right.
> 
> > My question is, we are defining a new flag here, I guess old guests
> > then do not set it. How does it help old guests? Or maybe it's
> > not designed to ...
> 
> Indeed. The idea is that QEMU can offer the flag, old guests can reject
> it (or even new guests can reject it, if they decide not to convert into
> secure VMs) and the feature negotiation will succeed with the flag
> unset.

OK. And then what does QEMU do? Assume guest is not encrypted I guess?

> --
> Thiago Jung Bauermann
> IBM Linux Technology Center
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-15 Thread Thiago Jung Bauermann


Michael S. Tsirkin  writes:

> On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote:
>>
>>
>> Michael S. Tsirkin  writes:
>>
>> > So this is what I would call this option:
>> >
>> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS
>> >
>> > and the explanation should state that all device
>> > addresses are translated by the platform to identical
>> > addresses.
>> >
>> > In fact this option then becomes more, not less restrictive
>> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise
>> > by guest to only create identity mappings,
>> > and only before driver_ok is set.
>> > This option then would always be negotiated together with
>> > VIRTIO_F_ACCESS_PLATFORM.
>> >
>> > Host then must verify that
>> > 1. full 1:1 mappings are created before driver_ok
>> > or can we make sure this happens before features_ok?
>> > that would be ideal as we could require that features_ok fails
>> > 2. mappings are not modified between driver_ok and reset
>> > i guess attempts to change them will fail -
>> > possibly by causing a guest crash
>> > or some other kind of platform-specific error
>>
>> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring
>> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is
>> SLOF as I mentioned above, another is that we would be requiring all
>> guests running on the machine (secure guests or not, since we would use
>> the same configuration for all guests) to support it. But
>> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For
>> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about
>> it and wouldn't be able to use the device.
>
> OK and your target is to enable use with kernel drivers within
> guests, right?

Right.

> My question is, we are defining a new flag here, I guess old guests
> then do not set it. How does it help old guests? Or maybe it's
> not designed to ...

Indeed. The idea is that QEMU can offer the flag, old guests can reject
it (or even new guests can reject it, if they decide not to convert into
secure VMs) and the feature negotiation will succeed with the flag
unset.

--
Thiago Jung Bauermann
IBM Linux Technology Center

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code

2019-07-15 Thread Thiago Jung Bauermann


Christoph Hellwig  writes:

> On Mon, Jul 15, 2019 at 04:03:17PM +0200, Halil Pasic wrote:
>> > I thought about that but couldn't put my finger on a general concept.
>> > Is it "guest with memory inaccessible to the host"?
>> >
>>
>> Well, force_dma_unencrypted() is a much better name thatn sev_active():
>> s390 has no AMD SEV, that is sure, but for virtio to work we do need to
>> make our dma accessible to the hypervisor. Yes, your "guest with memory
>> inaccessible to the host" shows into the right direction IMHO.
>> Unfortunately I don't have too many cycles to spend on this right now.
>
> In x86 it means that we need to remove dma encryption using
> set_memory_decrypted before using it for DMA purposes.  In the SEV
> case that seems to be so that the hypervisor can access it, in the SME
> case that Tom just fixes it is because there is an encrypted bit set
> in the physical address, and if the device doesn't support a large
> enough DMA address the direct mapping code has to encrypt the pages
> used for the contigous allocation.
>
>> Being on cc for your patch made me realize that things got broken on
>> s390. Thanks! I've sent out a patch that fixes protvirt, but we are going
>> to benefit from your cleanups. I think with your cleanups and that patch
>> of mine both sev_active() and sme_active() can be removed. Feel free to
>> do so. If not, I can attend to it as well.
>
> Yes, I think with the dma-mapping fix and this series sme_active and
> sev_active should be gone from common code.  We should also be able
> to remove the exports x86 has for them.
>
> I'll wait a few days and will then feed the dma-mapping fix to Linus,
> it might make sense to either rebase Thiagos series on top of the
> dma-mapping for-next branch, or wait a few days before reposting.

I'll rebase on top of dma-mapping/for-next and do the break up of patch
2 that you mentioned as well.

--
Thiago Jung Bauermann
IBM Linux Technology Center



Re: [PATCH 1/3] x86, s390: Move ARCH_HAS_MEM_ENCRYPT definition to arch/Kconfig

2019-07-15 Thread Thiago Jung Bauermann


Hello Janani,

Thanks for reviewing the patch.

janani  writes:

> On 2019-07-12 23:45, Thiago Jung Bauermann wrote:
>> powerpc is also going to use this feature, so put it in a generic location.
>>
>> Signed-off-by: Thiago Jung Bauermann 
>> Reviewed-by: Thomas Gleixner 
>> ---
>>  arch/Kconfig  | 3 +++
>>  arch/s390/Kconfig | 3 ---
>>  arch/x86/Kconfig  | 4 +---
>>  3 files changed, 4 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index c47b328eada0..4ef3499d4480 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -927,6 +927,9 @@ config LOCK_EVENT_COUNTS
>>the chance of application behavior change because of timing
>>differences. The counts are reported via debugfs.
>>
>> +config ARCH_HAS_MEM_ENCRYPT
>> +bool
>> +
>>  source "kernel/gcov/Kconfig"
>>
>>  source "scripts/gcc-plugins/Kconfig"
>> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
>> index 5d8570ed6cab..f820e631bf89 100644
>> --- a/arch/s390/Kconfig
>> +++ b/arch/s390/Kconfig
>> @@ -1,7 +1,4 @@
>>  # SPDX-License-Identifier: GPL-2.0
>> -config ARCH_HAS_MEM_ENCRYPT
>> -def_bool y
>> -
>
>  Since you are removing the "def_bool y" when ARCH_HAS_MEM_ENCRYPT is moved to
> arch/Kconfig, does the s390/Kconfig need "select ARCH_HAS_MEM_ENCRYPT" added
> like you do for x86/Kconfig?

Indeed, I missed that. Thanks for spotting it!

>
>  - Janani
>
>>  config MMU
>>  def_bool y
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index c9f331bb538b..5d3295f2df94 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -68,6 +68,7 @@ config X86
>>  select ARCH_HAS_FORTIFY_SOURCE
>>  select ARCH_HAS_GCOV_PROFILE_ALL
>>  select ARCH_HAS_KCOVif X86_64
>> +select ARCH_HAS_MEM_ENCRYPT
>>  select ARCH_HAS_MEMBARRIER_SYNC_CORE
>>  select ARCH_HAS_PMEM_APIif X86_64
>>  select ARCH_HAS_PTE_SPECIAL
>> @@ -1520,9 +1521,6 @@ config X86_CPA_STATISTICS
>>helps to determine the effectiveness of preserving large and huge
>>page mappings when mapping protections are changed.
>>
>> -config ARCH_HAS_MEM_ENCRYPT
>> -def_bool y
>> -
>>  config AMD_MEM_ENCRYPT
>>  bool "AMD Secure Memory Encryption (SME) support"
>>  depends on X86_64 && CPU_SUP_AMD


-- 
Thiago Jung Bauermann
IBM Linux Technology Center



Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code

2019-07-15 Thread Lendacky, Thomas
On 7/15/19 9:30 AM, Christoph Hellwig wrote:
> On Mon, Jul 15, 2019 at 04:03:17PM +0200, Halil Pasic wrote:
>>> I thought about that but couldn't put my finger on a general concept.
>>> Is it "guest with memory inaccessible to the host"?
>>>
>>
>> Well, force_dma_unencrypted() is a much better name thatn sev_active():
>> s390 has no AMD SEV, that is sure, but for virtio to work we do need to
>> make our dma accessible to the hypervisor. Yes, your "guest with memory
>> inaccessible to the host" shows into the right direction IMHO.
>> Unfortunately I don't have too many cycles to spend on this right now.
> 
> In x86 it means that we need to remove dma encryption using
> set_memory_decrypted before using it for DMA purposes.  In the SEV
> case that seems to be so that the hypervisor can access it, in the SME
> case that Tom just fixes it is because there is an encrypted bit set
> in the physical address, and if the device doesn't support a large
> enough DMA address the direct mapping code has to encrypt the pages
> used for the contigous allocation.

Just a correction/clarification...

For SME, when a device doesn't support a large enough DMA address to
accommodate the encryption bit as part of the DMA address, the direct
mapping code has to provide un-encrypted pages. For un-encrypted pages,
the DMA address now does not include the encryption bit, making it
acceptable to the device. Since the device is now using a DMA address
without the encryption bit, the physical address in the CPU page table
must match (the call to set_memory_decrypted) so that both the device and
the CPU interact in the same way with the memory.

Thanks,
Tom

> 
>> Being on cc for your patch made me realize that things got broken on
>> s390. Thanks! I've sent out a patch that fixes protvirt, but we are going
>> to benefit from your cleanups. I think with your cleanups and that patch
>> of mine both sev_active() and sme_active() can be removed. Feel free to
>> do so. If not, I can attend to it as well.
> 
> Yes, I think with the dma-mapping fix and this series sme_active and
> sev_active should be gone from common code.  We should also be able
> to remove the exports x86 has for them.
> 
> I'll wait a few days and will then feed the dma-mapping fix to Linus,
> it might make sense to either rebase Thiagos series on top of the
> dma-mapping for-next branch, or wait a few days before reposting.
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/3] x86, s390: Move ARCH_HAS_MEM_ENCRYPT definition to arch/Kconfig

2019-07-15 Thread janani

On 2019-07-12 23:45, Thiago Jung Bauermann wrote:
powerpc is also going to use this feature, so put it in a generic 
location.


Signed-off-by: Thiago Jung Bauermann 
Reviewed-by: Thomas Gleixner 
---
 arch/Kconfig  | 3 +++
 arch/s390/Kconfig | 3 ---
 arch/x86/Kconfig  | 4 +---
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c47b328eada0..4ef3499d4480 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -927,6 +927,9 @@ config LOCK_EVENT_COUNTS
  the chance of application behavior change because of timing
  differences. The counts are reported via debugfs.

+config ARCH_HAS_MEM_ENCRYPT
+   bool
+
 source "kernel/gcov/Kconfig"

 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5d8570ed6cab..f820e631bf89 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-config ARCH_HAS_MEM_ENCRYPT
-def_bool y
-


 Since you are removing the "def_bool y" when ARCH_HAS_MEM_ENCRYPT is 
moved to arch/Kconfig, does the s390/Kconfig need "select 
ARCH_HAS_MEM_ENCRYPT" added like you do for x86/Kconfig?


 - Janani


 config MMU
def_bool y

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c9f331bb538b..5d3295f2df94 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -68,6 +68,7 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOVif X86_64
+   select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PMEM_APIif X86_64
select ARCH_HAS_PTE_SPECIAL
@@ -1520,9 +1521,6 @@ config X86_CPA_STATISTICS
  helps to determine the effectiveness of preserving large and huge
  page mappings when mapping protections are changed.

-config ARCH_HAS_MEM_ENCRYPT
-   def_bool y
-
 config AMD_MEM_ENCRYPT
bool "AMD Secure Memory Encryption (SME) support"
depends on X86_64 && CPU_SUP_AMD

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted

2019-07-15 Thread Michael S. Tsirkin
On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote:
> 
> 
> Michael S. Tsirkin  writes:
> 
> > On Thu, Jun 27, 2019 at 10:58:40PM -0300, Thiago Jung Bauermann wrote:
> >>
> >> Michael S. Tsirkin  writes:
> >>
> >> > On Mon, Jun 03, 2019 at 10:13:59PM -0300, Thiago Jung Bauermann wrote:
> >> >>
> >> >>
> >> >> Michael S. Tsirkin  writes:
> >> >>
> >> >> > On Wed, Apr 17, 2019 at 06:42:00PM -0300, Thiago Jung Bauermann wrote:
> >> >> >> I rephrased it in terms of address translation. What do you think of
> >> >> >> this version? The flag name is slightly different too:
> >> >> >>
> >> >> >>
> >> >> >> VIRTIO_F_ACCESS_PLATFORM_NO_TRANSLATION This feature has the same
> >> >> >> meaning as VIRTIO_F_ACCESS_PLATFORM both when set and when not 
> >> >> >> set,
> >> >> >> with the exception that address translation is guaranteed to be
> >> >> >> unnecessary when accessing memory addresses supplied to the 
> >> >> >> device
> >> >> >> by the driver. Which is to say, the device will always use 
> >> >> >> physical
> >> >> >> addresses matching addresses used by the driver (typically 
> >> >> >> meaning
> >> >> >> physical addresses used by the CPU) and not translated further. 
> >> >> >> This
> >> >> >> flag should be set by the guest if offered, but to allow for
> >> >> >> backward-compatibility device implementations allow for it to be
> >> >> >> left unset by the guest. It is an error to set both this flag and
> >> >> >> VIRTIO_F_ACCESS_PLATFORM.
> >> >> >
> >> >> >
> >> >> >
> >> >> >
> >> >> > OK so VIRTIO_F_ACCESS_PLATFORM is designed to allow unpriveledged
> >> >> > drivers. This is why devices fail when it's not negotiated.
> >> >>
> >> >> Just to clarify, what do you mean by unprivileged drivers? Is it drivers
> >> >> implemented in guest userspace such as with VFIO? Or unprivileged in
> >> >> some other sense such as needing to use bounce buffers for some reason?
> >> >
> >> > I had drivers in guest userspace in mind.
> >>
> >> Great. Thanks for clarifying.
> >>
> >> I don't think this flag would work for guest userspace drivers. Should I
> >> add a note about that in the flag definition?
> >
> > I think you need to clarify access protection rules. Is it only
> > translation that is bypassed or is any platform-specific
> > protection mechanism bypassed too?
> 
> It is only translation. In a secure guest, if the device tries to access
> a memory address that wasn't provided by the driver then the
> architecture will deny that access. If the device accesses addresses
> provided to it by the driver, then there's no protection mechanism or
> translation to get in the way.
> 
> >> >> > This confuses me.
> >> >> > If driver is unpriveledged then what happens with this flag?
> >> >> > It can supply any address it wants. Will that corrupt kernel
> >> >> > memory?
> >> >>
> >> >> Not needing address translation doesn't necessarily mean that there's no
> >> >> IOMMU. On powerpc we don't use VIRTIO_F_ACCESS_PLATFORM but there's
> >> >> always an IOMMU present. And we also support VFIO drivers. The VFIO API
> >> >> for pseries (sPAPR section in Documentation/vfio.txt) has extra ioctls
> >> >> to program the IOMMU.
> >> >>
> >> >> For our use case, we don't need address translation because we set up an
> >> >> identity mapping in the IOMMU so that the device can use guest physical
> >> >> addresses.
> >
> > OK so I think I am beginning to see it in a different light.  Right now the 
> > specific
> > platform creates an identity mapping. That in turn means DMA API can be
> > fast - it does not need to do anything.  What you are looking for is a
> > way to tell host it's an identity mapping - just as an optimization.
> >
> > Is that right?
> 
> Almost. Theoretically it is just an optimization. But in practice the
> pseries boot firmware (SLOF) doesn't support IOMMU_PLATFORM so it's not
> possible to boot a guest from a device with that flag set.
> 
> > So this is what I would call this option:
> >
> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS
> >
> > and the explanation should state that all device
> > addresses are translated by the platform to identical
> > addresses.
> >
> > In fact this option then becomes more, not less restrictive
> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise
> > by guest to only create identity mappings,
> > and only before driver_ok is set.
> > This option then would always be negotiated together with
> > VIRTIO_F_ACCESS_PLATFORM.
> >
> > Host then must verify that
> > 1. full 1:1 mappings are created before driver_ok
> > or can we make sure this happens before features_ok?
> > that would be ideal as we could require that features_ok fails
> > 2. mappings are not modified between driver_ok and reset
> > i guess attempts to change them will fail -
> > possibly by causing a guest crash
> > or some other kind of platform-specific error
> 
> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring
> it

[PATCH AUTOSEL 4.14 043/105] iommu: Fix a leak in iommu_insert_resv_region

2019-07-15 Thread Sasha Levin
From: Eric Auger 

[ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ]

In case we expand an existing region, we unlink
this latter and insert the larger one. In
that case we should free the original region after
the insertion. Also we can immediately return.

Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions")

Signed-off-by: Eric Auger 
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/iommu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3de5c0bcb5cc..1620a6f49989 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -205,18 +205,21 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
pos = pos->next;
} else if ((start >= a) && (end <= b)) {
if (new->type == type)
-   goto done;
+   return 0;
else
pos = pos->next;
} else {
if (new->type == type) {
phys_addr_t new_start = min(a, start);
phys_addr_t new_end = max(b, end);
+   int ret;
 
list_del(&entry->list);
entry->start = new_start;
entry->length = new_end - new_start + 1;
-   iommu_insert_resv_region(entry, regions);
+   ret = iommu_insert_resv_region(entry, regions);
+   kfree(entry);
+   return ret;
} else {
pos = pos->next;
}
@@ -229,7 +232,6 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
return -ENOMEM;
 
list_add_tail(®ion->list, pos);
-done:
return 0;
 }
 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code

2019-07-15 Thread Christoph Hellwig
On Mon, Jul 15, 2019 at 04:03:17PM +0200, Halil Pasic wrote:
> > I thought about that but couldn't put my finger on a general concept.
> > Is it "guest with memory inaccessible to the host"?
> > 
> 
> Well, force_dma_unencrypted() is a much better name thatn sev_active():
> s390 has no AMD SEV, that is sure, but for virtio to work we do need to
> make our dma accessible to the hypervisor. Yes, your "guest with memory
> inaccessible to the host" shows into the right direction IMHO.
> Unfortunately I don't have too many cycles to spend on this right now.

In x86 it means that we need to remove dma encryption using
set_memory_decrypted before using it for DMA purposes.  In the SEV
case that seems to be so that the hypervisor can access it, in the SME
case that Tom just fixes it is because there is an encrypted bit set
in the physical address, and if the device doesn't support a large
enough DMA address the direct mapping code has to encrypt the pages
used for the contigous allocation.

> Being on cc for your patch made me realize that things got broken on
> s390. Thanks! I've sent out a patch that fixes protvirt, but we are going
> to benefit from your cleanups. I think with your cleanups and that patch
> of mine both sev_active() and sme_active() can be removed. Feel free to
> do so. If not, I can attend to it as well.

Yes, I think with the dma-mapping fix and this series sme_active and
sev_active should be gone from common code.  We should also be able
to remove the exports x86 has for them.

I'll wait a few days and will then feed the dma-mapping fix to Linus,
it might make sense to either rebase Thiagos series on top of the
dma-mapping for-next branch, or wait a few days before reposting.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 4.19 058/158] iommu: Fix a leak in iommu_insert_resv_region

2019-07-15 Thread Sasha Levin
From: Eric Auger 

[ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ]

In case we expand an existing region, we unlink
this latter and insert the larger one. In
that case we should free the original region after
the insertion. Also we can immediately return.

Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions")

Signed-off-by: Eric Auger 
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/iommu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8c15c5980299..bc14825edc9c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -211,18 +211,21 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
pos = pos->next;
} else if ((start >= a) && (end <= b)) {
if (new->type == type)
-   goto done;
+   return 0;
else
pos = pos->next;
} else {
if (new->type == type) {
phys_addr_t new_start = min(a, start);
phys_addr_t new_end = max(b, end);
+   int ret;
 
list_del(&entry->list);
entry->start = new_start;
entry->length = new_end - new_start + 1;
-   iommu_insert_resv_region(entry, regions);
+   ret = iommu_insert_resv_region(entry, regions);
+   kfree(entry);
+   return ret;
} else {
pos = pos->next;
}
@@ -235,7 +238,6 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
return -ENOMEM;
 
list_add_tail(®ion->list, pos);
-done:
return 0;
 }
 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 5.1 077/219] iommu: Fix a leak in iommu_insert_resv_region

2019-07-15 Thread Sasha Levin
From: Eric Auger 

[ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ]

In case we expand an existing region, we unlink
this latter and insert the larger one. In
that case we should free the original region after
the insertion. Also we can immediately return.

Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions")

Signed-off-by: Eric Auger 
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/iommu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 109de67d5d72..2d06c507fbed 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -241,18 +241,21 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
pos = pos->next;
} else if ((start >= a) && (end <= b)) {
if (new->type == type)
-   goto done;
+   return 0;
else
pos = pos->next;
} else {
if (new->type == type) {
phys_addr_t new_start = min(a, start);
phys_addr_t new_end = max(b, end);
+   int ret;
 
list_del(&entry->list);
entry->start = new_start;
entry->length = new_end - new_start + 1;
-   iommu_insert_resv_region(entry, regions);
+   ret = iommu_insert_resv_region(entry, regions);
+   kfree(entry);
+   return ret;
} else {
pos = pos->next;
}
@@ -265,7 +268,6 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
return -ENOMEM;
 
list_add_tail(®ion->list, pos);
-done:
return 0;
 }
 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code

2019-07-15 Thread Halil Pasic
On Fri, 12 Jul 2019 18:55:47 -0300
Thiago Jung Bauermann  wrote:

> 
> [ Cc'ing Tom Lendacky which I forgot to do earlier. Sorry about that. ]
> 
> Hello Halil,
> 
> Thanks for the quick review.
> 
> Halil Pasic  writes:
> 
> > On Fri, 12 Jul 2019 02:36:31 -0300
> > Thiago Jung Bauermann  wrote:
> >
> >> Secure Encrypted Virtualization is an x86-specific feature, so it shouldn't
> >> appear in generic kernel code because it forces non-x86 architectures to
> >> define the sev_active() function, which doesn't make a lot of sense.
> >
> > sev_active() might be just bad (too specific) name for a general
> > concept. s390 code defines it drives the right behavior in
> > kernel/dma/direct.c (which uses it).
> 
> I thought about that but couldn't put my finger on a general concept.
> Is it "guest with memory inaccessible to the host"?
> 

Well, force_dma_unencrypted() is a much better name thatn sev_active():
s390 has no AMD SEV, that is sure, but for virtio to work we do need to
make our dma accessible to the hypervisor. Yes, your "guest with memory
inaccessible to the host" shows into the right direction IMHO.
Unfortunately I don't have too many cycles to spend on this right now.

> Since your proposed definiton for force_dma_unencrypted() is simply to
> make it equivalent to sev_active(), I thought it was more
> straightforward to make each arch define force_dma_unencrypted()
> directly.

I did not mean to propose equivalence. I intended to say the name
sev_active() is not suitable for a common concept. On the other hand
we do have a common concept -- as common code needs to do or not do
things depending on whether "memory is protected/encrypted" or not. I'm
fine with the name force_dma_unencrypted(), especially because I don't
have a better name.

> 
> Also, does sev_active() drive the right behavior for s390 in
> elfcorehdr_read() as well?
> 

AFAIU, since s390 does not override it boils down to the same, whether
sev_active() returns true or false. I'm no expert in that area, but I
strongly hope that is the right behavior. @Janosch: can you help me
out with this one?

> >> To solve this problem, add an x86 elfcorehdr_read() function to override
> >> the generic weak implementation. To do that, it's necessary to make
> >> read_from_oldmem() public so that it can be used outside of vmcore.c.
> >>
> >> Signed-off-by: Thiago Jung Bauermann 
> >> ---
> >>  arch/x86/kernel/crash_dump_64.c |  5 +
> >>  fs/proc/vmcore.c|  8 
> >>  include/linux/crash_dump.h  | 14 ++
> >>  include/linux/mem_encrypt.h |  1 -
> >>  4 files changed, 23 insertions(+), 5 deletions(-)
> >
> > Does not seem to apply to today's or yesterdays master.
> 
> It assumes the presence of the two patches I mentioned in the cover
> letter. Only one of them is in master.
> 
> I hadn't realized the s390 virtio patches were on their way to upstream.
> I was keeping an eye on the email thread but didn't see they were picked
> up in the s390 pull request. I'll add a new patch to this series making
> the corresponding changes to s390's  as well.
> 

Being on cc for your patch made me realize that things got broken on
s390. Thanks! I've sent out a patch that fixes protvirt, but we are going
to benefit from your cleanups. I think with your cleanups and that patch
of mine both sev_active() and sme_active() can be removed. Feel free to
do so. If not, I can attend to it as well.

Regards,
Halil

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 5.2 229/249] iommu/arm-smmu-v3: Invalidate ATC when detaching a device

2019-07-15 Thread Sasha Levin
From: Jean-Philippe Brucker 

[ Upstream commit 8dd8f005bdd45823fc153ef490239558caf6ff20 ]

We make the invalid assumption in arm_smmu_detach_dev() that the ATC is
clear after calling pci_disable_ats(). For one thing, only enabling the
PCIe ATS capability constitutes an implicit invalidation event, so the
comment was wrong. More importantly, the ATS capability isn't necessarily
disabled by pci_disable_ats() in a PF, if the associated VFs have ATS
enabled. Explicitly invalidate all ATC entries in arm_smmu_detach_dev().
The endpoint cannot form new ATC entries because STE.EATS is clear.

Fixes: 9ce27afc0830 ("iommu/arm-smmu-v3: Add support for PCI ATS")
Reported-by: Manoj Kumar 
Reported-by: Robin Murphy 
Signed-off-by: Jean-Philippe Brucker 
Acked-by: Will Deacon 
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/arm-smmu-v3.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 4d5a694f02c2..0fee8f7957ec 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1884,9 +1884,13 @@ static int arm_smmu_enable_ats(struct arm_smmu_master 
*master)
 
 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
 {
+   struct arm_smmu_cmdq_ent cmd;
+
if (!master->ats_enabled || !dev_is_pci(master->dev))
return;
 
+   arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
+   arm_smmu_atc_inv_master(master, &cmd);
pci_disable_ats(to_pci_dev(master->dev));
master->ats_enabled = false;
 }
@@ -1906,7 +1910,6 @@ static void arm_smmu_detach_dev(struct arm_smmu_master 
*master)
master->domain = NULL;
arm_smmu_install_ste_for_dev(master);
 
-   /* Disabling ATS invalidates all ATC entries */
arm_smmu_disable_ats(master);
 }
 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH AUTOSEL 5.2 089/249] iommu: Fix a leak in iommu_insert_resv_region

2019-07-15 Thread Sasha Levin
From: Eric Auger 

[ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ]

In case we expand an existing region, we unlink
this latter and insert the larger one. In
that case we should free the original region after
the insertion. Also we can immediately return.

Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions")

Signed-off-by: Eric Auger 
Signed-off-by: Joerg Roedel 
Signed-off-by: Sasha Levin 
---
 drivers/iommu/iommu.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9f0a2844371c..30db41e9f15c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -225,18 +225,21 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
pos = pos->next;
} else if ((start >= a) && (end <= b)) {
if (new->type == type)
-   goto done;
+   return 0;
else
pos = pos->next;
} else {
if (new->type == type) {
phys_addr_t new_start = min(a, start);
phys_addr_t new_end = max(b, end);
+   int ret;
 
list_del(&entry->list);
entry->start = new_start;
entry->length = new_end - new_start + 1;
-   iommu_insert_resv_region(entry, regions);
+   ret = iommu_insert_resv_region(entry, regions);
+   kfree(entry);
+   return ret;
} else {
pos = pos->next;
}
@@ -249,7 +252,6 @@ static int iommu_insert_resv_region(struct 
iommu_resv_region *new,
return -ENOMEM;
 
list_add_tail(®ion->list, pos);
-done:
return 0;
 }
 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 07/21] iommu/io-pgtable-arm-v7s: Extend MediaTek 4GB Mode

2019-07-15 Thread Will Deacon
On Sun, Jul 14, 2019 at 12:41:20PM +0800, Yong Wu wrote:
> On Thu, 2019-07-11 at 13:31 +0100, Will Deacon wrote:
> > This looks like the right sort of idea. Basically, I was thinking that you
> > can use the oas in conjunction with the quirk to specify whether or not
> > your two magic bits should be set. You could also then cap the oas using
> > the size of phys_addr_t to deal with my other comment.
> > 
> > Finally, I was hoping you could drop the |= BIT_ULL(32) and the &=
> > ~BIT_ULL(32) bits of the mtk driver if the pgtable code now accepts higher
> > addresses. Did that not work out?
> 
> After the current patch, the pgtable has accepted the higher address.
> the " |= BIT_ULL(32)" and "& = ~ BIT_ULL(32)" is for a special case(we
> call it 4GB mode).
> 
> Now MediaTek IOMMU support 2 kind memory:
> 1) normal case: PA is 0x4000_ - 0x3__. the PA won't be
> remapped. mt8183 and the non-4GB mode of mt8173/mt2712 use this mode.
> 
> 2) 4GB Mode: PA is 0x4000_ - 0x1_3fff_. But the PA will remapped
> to 0x1__ to 0x1__. This is for the 4GB mode of
> mt8173/mt2712. This case is so special that we should change the PA
> manually(add bit32).
> (mt2712 and mt8173 have both mode: 4GB and non-4GB.)
> 
> If we try to use oas and our quirk to cover this two case. Then I can
> use "oas == 33" only for this 4GB mode. and "oas == 34" for the normal
> case even though the PA mayn't reach 34bit. Also I should add some
> "workaround" for the 4GB mode(oas==33).
> 
> I copy the new patch in the mail below(have dropped the "|= BIT_ULL(32)"
> and the "&= ~BIT_ULL(32)) in mtk iommu". please help have a look if it
> is ok.
> (another thing: Current the PA can support over 4GB. So the quirk name
> "MTK_4GB" looks not suitable, I used a new patch rename to "MTK_EXT").

Makes sense, thanks. One comment below.

> @@ -205,7 +216,20 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte
> pte, int lvl,
>   else
>   mask = ARM_V7S_LVL_MASK(lvl);
>  
> - return pte & mask;
> + paddr = pte & mask;
> + if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
> + (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)) {
> + /*
> +  * Workaround for MTK 4GB Mode:
> +  * Add BIT32 only when PA < 0x4000_.
> +  */
> + if ((cfg->oas == 33 && paddr < 0x4000UL) ||
> + (cfg->oas > 33 && (pte & ARM_V7S_ATTR_MTK_PA_BIT32)))
> + paddr |= BIT_ULL(32);
> + if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
> + paddr |= BIT_ULL(33);
> + }
> + return paddr;
>  }
>  
>  static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
> @@ -326,9 +350,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot,
> int lvl,
>   if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
>   pte |= ARM_V7S_ATTR_NS_SECTION;
>  
> - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)
> - pte |= ARM_V7S_ATTR_MTK_4GB;
> -
>   return pte;
>  }
>  
> @@ -742,7 +763,9 @@ static struct io_pgtable
> *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
>  {
>   struct arm_v7s_io_pgtable *data;
>  
> - if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
> + if (cfg->ias > ARM_V7S_ADDR_BITS ||
> + (cfg->oas > ARM_V7S_ADDR_BITS &&
> +  !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
>   return NULL;

I think you can rework this to do something like:

if (cfg->ias > ARM_V7S_ADDR_BITS)
return NULL;

if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) {
if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
cfg->oas = min(cfg->oas, ARM_V7S_ADDR_BITS);
else if (cfg->oas > 34)
return NULL;
} else if (cfg->oas > ARM_V7S_ADDR_BITS) {
return NULL;
}

so that we clamp the oas when phys_addr_t is 32-bit for you. That should
allow you to remove lots of the checking from iopte_to_paddr() too if you
check against oas in the map() function.

Does that make sense?

Will