[PATCH] iommu/amd: Add support for X2APIC IOMMU interrupts
AMD IOMMU requires IntCapXT registers to be setup in order to generate its own interrupts (for Event Log, PPR Log, and GA Log) with 32-bit APIC destination ID. Without this support, AMD IOMMU MSI interrupts will not be routed correctly when booting the system in X2APIC mode. Cc: Joerg Roedel Signed-off-by: Suravee Suthikulpanit --- drivers/iommu/amd_iommu_init.c | 90 + drivers/iommu/amd_iommu_types.h | 9 + 2 files changed, 99 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index ff40ba7..a4c5b1e 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include #include #include @@ -1935,6 +1937,90 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return 0; } +#define XT_INT_DEST_MODE(x)(((x) & 0x1ULL) << 2) +#define XT_INT_DEST_LO(x) (((x) & 0xFFULL) << 8) +#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32) +#define XT_INT_DEST_HI(x) x) >> 24) & 0xFFULL) << 56) + +/** + * Setup the IntCapXT registers with interrupt routing information + * based on the PCI MSI capability block registers, accessed via + * MMIO MSI address low/hi and MSI data registers. + */ +static void iommu_update_intcapxt(struct amd_iommu *iommu) +{ + u64 val; + u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET); + u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET); + u32 data= readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET); + bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + u32 dest= ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF); + + if (x2apic_enabled()) + dest |= MSI_ADDR_EXT_DEST_ID(addr_hi); + + val = XT_INT_VEC(data & 0xFF) | + XT_INT_DEST_MODE(dm) | + XT_INT_DEST_LO(dest) | + XT_INT_DEST_HI(dest); + + /** +* Current IOMMU implemtation uses the same IRQ for all +* 3 IOMMU interrupts. +*/ + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); +} + +static void _irq_notifier_notify(struct irq_affinity_notify *notify, +const cpumask_t *mask) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + if (iommu->dev->irq == notify->irq) { + iommu_update_intcapxt(iommu); + break; + } + } +} + +static void _irq_notifier_release(struct kref *ref) +{ +} + +static int iommu_init_intcapxt(struct amd_iommu *iommu) +{ + int ret; + struct irq_affinity_notify *notify = &iommu->intcapxt_notify; + + /** +* IntCapXT requires XTSup=1, which can be inferred +* amd_iommu_xt_mode. +*/ + if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) + return 0; + + /** +* Also, we need to setup notifier to update the IntCapXT registers +* whenever the irq affinity is changed from user-space. +*/ + notify->irq = iommu->dev->irq; + notify->notify = _irq_notifier_notify, + notify->release = _irq_notifier_release, + ret = irq_set_affinity_notifier(iommu->dev->irq, notify); + if (ret) { + pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n", + iommu->devid, iommu->dev->irq); + return ret; + } + + iommu_update_intcapxt(iommu); + iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); + return ret; +} + static int iommu_init_msi(struct amd_iommu *iommu) { int ret; @@ -1951,6 +2037,10 @@ static int iommu_init_msi(struct amd_iommu *iommu) return ret; enable_faults: + ret = iommu_init_intcapxt(iommu); + if (ret) + return ret; + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 87965e4..39be804f 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -72,6 +72,12 @@ #define MMIO_PPR_LOG_OFFSET0x0038 #define MMIO_GA_LOG_BASE_OFFSET0x00e0 #define MMIO_GA_LOG_TAIL_OFFSET0x00e8 +#define MMIO_MSI_ADDR_LO_OFFSET0x015C +#define MMIO_MSI_ADDR_HI_OFFSET0x0160 +#define MMIO_MSI_DATA_OFFSET 0x0164 +#define MMIO_INTCAPXT_EVT_OFFSET 0x0170 +#define MMIO_INTCAPXT_PPR_OFFSET 0x0178 +#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 @@ -162,6 +168,7 @@ #define CONTROL_GALOG_EN0x1CULL #define CONTROL_GAINT_EN0x1DULL #define CONTROL
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
On Mon, 2019-07-15 at 19:03 -0300, Thiago Jung Bauermann wrote: > > > Indeed. The idea is that QEMU can offer the flag, old guests can > > > reject > > > it (or even new guests can reject it, if they decide not to > > > convert into > > > secure VMs) and the feature negotiation will succeed with the > > > flag > > > unset. > > > > OK. And then what does QEMU do? Assume guest is not encrypted I > > guess? > > There's nothing different that QEMU needs to do, with or without the > flag. the perspective of the host, a secure guest and a regular guest > work the same way with respect to virtio. This is *precisely* why I was against adding a flag and touch the protocol negociation with qemu in the first place, back when I cared about that stuff... Guys, this has gone in circles over and over again. This has nothing to do with qemu. Qemu doesn't need to know about this. It's entirely guest local. This is why the one-liner in virtio was a far better and simpler solution. This is something the guest does to itself (with the participation of a ultravisor but that's not something qemu cares about at this stage, at least not as far as virtio is concerned). Basically, the guest "hides" its memory from the host using a HW secure memory facility. As a result, it needs to ensure that all of its DMA pages are bounced through insecure pages that aren't hidden. That's it, it's all guest side. Qemu shouldn't have to care about it at all. Cheers, Ben.
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
Michael S. Tsirkin writes: > On Mon, Jul 15, 2019 at 07:03:03PM -0300, Thiago Jung Bauermann wrote: >> >> Michael S. Tsirkin writes: >> >> > On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote: >> >> >> >> Michael S. Tsirkin writes: >> >> >> >> > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote: >> >> >> >> >> >> >> >> >> Michael S. Tsirkin writes: >> >> >> >> >> >> > So this is what I would call this option: >> >> >> > >> >> >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS >> >> >> > >> >> >> > and the explanation should state that all device >> >> >> > addresses are translated by the platform to identical >> >> >> > addresses. >> >> >> > >> >> >> > In fact this option then becomes more, not less restrictive >> >> >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise >> >> >> > by guest to only create identity mappings, >> >> >> > and only before driver_ok is set. >> >> >> > This option then would always be negotiated together with >> >> >> > VIRTIO_F_ACCESS_PLATFORM. >> >> >> > >> >> >> > Host then must verify that >> >> >> > 1. full 1:1 mappings are created before driver_ok >> >> >> > or can we make sure this happens before features_ok? >> >> >> > that would be ideal as we could require that features_ok fails >> >> >> > 2. mappings are not modified between driver_ok and reset >> >> >> > i guess attempts to change them will fail - >> >> >> > possibly by causing a guest crash >> >> >> > or some other kind of platform-specific error >> >> >> >> >> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but >> >> >> requiring >> >> >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is >> >> >> SLOF as I mentioned above, another is that we would be requiring all >> >> >> guests running on the machine (secure guests or not, since we would use >> >> >> the same configuration for all guests) to support it. But >> >> >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For >> >> >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know >> >> >> about >> >> >> it and wouldn't be able to use the device. >> >> > >> >> > OK and your target is to enable use with kernel drivers within >> >> > guests, right? >> >> >> >> Right. >> >> >> >> > My question is, we are defining a new flag here, I guess old guests >> >> > then do not set it. How does it help old guests? Or maybe it's >> >> > not designed to ... >> >> >> >> Indeed. The idea is that QEMU can offer the flag, old guests can reject >> >> it (or even new guests can reject it, if they decide not to convert into >> >> secure VMs) and the feature negotiation will succeed with the flag >> >> unset. >> > >> > OK. And then what does QEMU do? Assume guest is not encrypted I guess? >> >> There's nothing different that QEMU needs to do, with or without the >> flag. the perspective of the host, a secure guest and a regular guest >> work the same way with respect to virtio. > > OK. So now let's get back to implementation. What will > Linux guest driver do? It can't activate DMA API blindly since that > will assume translation also works, right? It can on pseries, because we always have a 1:1 window mapping the whole guest memory. > Or do we somehow limit it to just a specific platform? Yes, we want to accept the new flag only on secure pseries guests. -- Thiago Jung Bauermann IBM Linux Technology Center ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
On Mon, Jul 15, 2019 at 07:03:03PM -0300, Thiago Jung Bauermann wrote: > > Michael S. Tsirkin writes: > > > On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote: > >> > >> Michael S. Tsirkin writes: > >> > >> > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote: > >> >> > >> >> > >> >> Michael S. Tsirkin writes: > >> >> > >> >> > So this is what I would call this option: > >> >> > > >> >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS > >> >> > > >> >> > and the explanation should state that all device > >> >> > addresses are translated by the platform to identical > >> >> > addresses. > >> >> > > >> >> > In fact this option then becomes more, not less restrictive > >> >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise > >> >> > by guest to only create identity mappings, > >> >> > and only before driver_ok is set. > >> >> > This option then would always be negotiated together with > >> >> > VIRTIO_F_ACCESS_PLATFORM. > >> >> > > >> >> > Host then must verify that > >> >> > 1. full 1:1 mappings are created before driver_ok > >> >> > or can we make sure this happens before features_ok? > >> >> > that would be ideal as we could require that features_ok fails > >> >> > 2. mappings are not modified between driver_ok and reset > >> >> > i guess attempts to change them will fail - > >> >> > possibly by causing a guest crash > >> >> > or some other kind of platform-specific error > >> >> > >> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring > >> >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is > >> >> SLOF as I mentioned above, another is that we would be requiring all > >> >> guests running on the machine (secure guests or not, since we would use > >> >> the same configuration for all guests) to support it. But > >> >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For > >> >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about > >> >> it and wouldn't be able to use the device. > >> > > >> > OK and your target is to enable use with kernel drivers within > >> > guests, right? > >> > >> Right. > >> > >> > My question is, we are defining a new flag here, I guess old guests > >> > then do not set it. How does it help old guests? Or maybe it's > >> > not designed to ... > >> > >> Indeed. The idea is that QEMU can offer the flag, old guests can reject > >> it (or even new guests can reject it, if they decide not to convert into > >> secure VMs) and the feature negotiation will succeed with the flag > >> unset. > > > > OK. And then what does QEMU do? Assume guest is not encrypted I guess? > > There's nothing different that QEMU needs to do, with or without the > flag. the perspective of the host, a secure guest and a regular guest > work the same way with respect to virtio. OK. So now let's get back to implementation. What will Linux guest driver do? It can't activate DMA API blindly since that will assume translation also works, right? Or do we somehow limit it to just a specific platform? > -- > Thiago Jung Bauermann > IBM Linux Technology Center
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
Michael S. Tsirkin writes: > On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote: >> >> Michael S. Tsirkin writes: >> >> > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote: >> >> >> >> >> >> Michael S. Tsirkin writes: >> >> >> >> > So this is what I would call this option: >> >> > >> >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS >> >> > >> >> > and the explanation should state that all device >> >> > addresses are translated by the platform to identical >> >> > addresses. >> >> > >> >> > In fact this option then becomes more, not less restrictive >> >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise >> >> > by guest to only create identity mappings, >> >> > and only before driver_ok is set. >> >> > This option then would always be negotiated together with >> >> > VIRTIO_F_ACCESS_PLATFORM. >> >> > >> >> > Host then must verify that >> >> > 1. full 1:1 mappings are created before driver_ok >> >> > or can we make sure this happens before features_ok? >> >> > that would be ideal as we could require that features_ok fails >> >> > 2. mappings are not modified between driver_ok and reset >> >> > i guess attempts to change them will fail - >> >> > possibly by causing a guest crash >> >> > or some other kind of platform-specific error >> >> >> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring >> >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is >> >> SLOF as I mentioned above, another is that we would be requiring all >> >> guests running on the machine (secure guests or not, since we would use >> >> the same configuration for all guests) to support it. But >> >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For >> >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about >> >> it and wouldn't be able to use the device. >> > >> > OK and your target is to enable use with kernel drivers within >> > guests, right? >> >> Right. >> >> > My question is, we are defining a new flag here, I guess old guests >> > then do not set it. How does it help old guests? Or maybe it's >> > not designed to ... >> >> Indeed. The idea is that QEMU can offer the flag, old guests can reject >> it (or even new guests can reject it, if they decide not to convert into >> secure VMs) and the feature negotiation will succeed with the flag >> unset. > > OK. And then what does QEMU do? Assume guest is not encrypted I guess? There's nothing different that QEMU needs to do, with or without the flag. the perspective of the host, a secure guest and a regular guest work the same way with respect to virtio. -- Thiago Jung Bauermann IBM Linux Technology Center ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
On Mon, Jul 15, 2019 at 05:29:06PM -0300, Thiago Jung Bauermann wrote: > > Michael S. Tsirkin writes: > > > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote: > >> > >> > >> Michael S. Tsirkin writes: > >> > >> > So this is what I would call this option: > >> > > >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS > >> > > >> > and the explanation should state that all device > >> > addresses are translated by the platform to identical > >> > addresses. > >> > > >> > In fact this option then becomes more, not less restrictive > >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise > >> > by guest to only create identity mappings, > >> > and only before driver_ok is set. > >> > This option then would always be negotiated together with > >> > VIRTIO_F_ACCESS_PLATFORM. > >> > > >> > Host then must verify that > >> > 1. full 1:1 mappings are created before driver_ok > >> > or can we make sure this happens before features_ok? > >> > that would be ideal as we could require that features_ok fails > >> > 2. mappings are not modified between driver_ok and reset > >> > i guess attempts to change them will fail - > >> > possibly by causing a guest crash > >> > or some other kind of platform-specific error > >> > >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring > >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is > >> SLOF as I mentioned above, another is that we would be requiring all > >> guests running on the machine (secure guests or not, since we would use > >> the same configuration for all guests) to support it. But > >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For > >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about > >> it and wouldn't be able to use the device. > > > > OK and your target is to enable use with kernel drivers within > > guests, right? > > Right. > > > My question is, we are defining a new flag here, I guess old guests > > then do not set it. How does it help old guests? Or maybe it's > > not designed to ... > > Indeed. The idea is that QEMU can offer the flag, old guests can reject > it (or even new guests can reject it, if they decide not to convert into > secure VMs) and the feature negotiation will succeed with the flag > unset. OK. And then what does QEMU do? Assume guest is not encrypted I guess? > -- > Thiago Jung Bauermann > IBM Linux Technology Center ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
Michael S. Tsirkin writes: > On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote: >> >> >> Michael S. Tsirkin writes: >> >> > So this is what I would call this option: >> > >> > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS >> > >> > and the explanation should state that all device >> > addresses are translated by the platform to identical >> > addresses. >> > >> > In fact this option then becomes more, not less restrictive >> > than VIRTIO_F_ACCESS_PLATFORM - it's a promise >> > by guest to only create identity mappings, >> > and only before driver_ok is set. >> > This option then would always be negotiated together with >> > VIRTIO_F_ACCESS_PLATFORM. >> > >> > Host then must verify that >> > 1. full 1:1 mappings are created before driver_ok >> > or can we make sure this happens before features_ok? >> > that would be ideal as we could require that features_ok fails >> > 2. mappings are not modified between driver_ok and reset >> > i guess attempts to change them will fail - >> > possibly by causing a guest crash >> > or some other kind of platform-specific error >> >> I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring >> it to be accompanied by ACCESS_PLATFORM can be a problem. One reason is >> SLOF as I mentioned above, another is that we would be requiring all >> guests running on the machine (secure guests or not, since we would use >> the same configuration for all guests) to support it. But >> ACCESS_PLATFORM is relatively recent so it's a bit early for that. For >> instance, Ubuntu 16.04 LTS (which is still supported) doesn't know about >> it and wouldn't be able to use the device. > > OK and your target is to enable use with kernel drivers within > guests, right? Right. > My question is, we are defining a new flag here, I guess old guests > then do not set it. How does it help old guests? Or maybe it's > not designed to ... Indeed. The idea is that QEMU can offer the flag, old guests can reject it (or even new guests can reject it, if they decide not to convert into secure VMs) and the feature negotiation will succeed with the flag unset. -- Thiago Jung Bauermann IBM Linux Technology Center ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code
Christoph Hellwig writes: > On Mon, Jul 15, 2019 at 04:03:17PM +0200, Halil Pasic wrote: >> > I thought about that but couldn't put my finger on a general concept. >> > Is it "guest with memory inaccessible to the host"? >> > >> >> Well, force_dma_unencrypted() is a much better name thatn sev_active(): >> s390 has no AMD SEV, that is sure, but for virtio to work we do need to >> make our dma accessible to the hypervisor. Yes, your "guest with memory >> inaccessible to the host" shows into the right direction IMHO. >> Unfortunately I don't have too many cycles to spend on this right now. > > In x86 it means that we need to remove dma encryption using > set_memory_decrypted before using it for DMA purposes. In the SEV > case that seems to be so that the hypervisor can access it, in the SME > case that Tom just fixes it is because there is an encrypted bit set > in the physical address, and if the device doesn't support a large > enough DMA address the direct mapping code has to encrypt the pages > used for the contigous allocation. > >> Being on cc for your patch made me realize that things got broken on >> s390. Thanks! I've sent out a patch that fixes protvirt, but we are going >> to benefit from your cleanups. I think with your cleanups and that patch >> of mine both sev_active() and sme_active() can be removed. Feel free to >> do so. If not, I can attend to it as well. > > Yes, I think with the dma-mapping fix and this series sme_active and > sev_active should be gone from common code. We should also be able > to remove the exports x86 has for them. > > I'll wait a few days and will then feed the dma-mapping fix to Linus, > it might make sense to either rebase Thiagos series on top of the > dma-mapping for-next branch, or wait a few days before reposting. I'll rebase on top of dma-mapping/for-next and do the break up of patch 2 that you mentioned as well. -- Thiago Jung Bauermann IBM Linux Technology Center
Re: [PATCH 1/3] x86, s390: Move ARCH_HAS_MEM_ENCRYPT definition to arch/Kconfig
Hello Janani, Thanks for reviewing the patch. janani writes: > On 2019-07-12 23:45, Thiago Jung Bauermann wrote: >> powerpc is also going to use this feature, so put it in a generic location. >> >> Signed-off-by: Thiago Jung Bauermann >> Reviewed-by: Thomas Gleixner >> --- >> arch/Kconfig | 3 +++ >> arch/s390/Kconfig | 3 --- >> arch/x86/Kconfig | 4 +--- >> 3 files changed, 4 insertions(+), 6 deletions(-) >> >> diff --git a/arch/Kconfig b/arch/Kconfig >> index c47b328eada0..4ef3499d4480 100644 >> --- a/arch/Kconfig >> +++ b/arch/Kconfig >> @@ -927,6 +927,9 @@ config LOCK_EVENT_COUNTS >>the chance of application behavior change because of timing >>differences. The counts are reported via debugfs. >> >> +config ARCH_HAS_MEM_ENCRYPT >> +bool >> + >> source "kernel/gcov/Kconfig" >> >> source "scripts/gcc-plugins/Kconfig" >> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig >> index 5d8570ed6cab..f820e631bf89 100644 >> --- a/arch/s390/Kconfig >> +++ b/arch/s390/Kconfig >> @@ -1,7 +1,4 @@ >> # SPDX-License-Identifier: GPL-2.0 >> -config ARCH_HAS_MEM_ENCRYPT >> -def_bool y >> - > > Since you are removing the "def_bool y" when ARCH_HAS_MEM_ENCRYPT is moved to > arch/Kconfig, does the s390/Kconfig need "select ARCH_HAS_MEM_ENCRYPT" added > like you do for x86/Kconfig? Indeed, I missed that. Thanks for spotting it! > > - Janani > >> config MMU >> def_bool y >> >> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig >> index c9f331bb538b..5d3295f2df94 100644 >> --- a/arch/x86/Kconfig >> +++ b/arch/x86/Kconfig >> @@ -68,6 +68,7 @@ config X86 >> select ARCH_HAS_FORTIFY_SOURCE >> select ARCH_HAS_GCOV_PROFILE_ALL >> select ARCH_HAS_KCOVif X86_64 >> +select ARCH_HAS_MEM_ENCRYPT >> select ARCH_HAS_MEMBARRIER_SYNC_CORE >> select ARCH_HAS_PMEM_APIif X86_64 >> select ARCH_HAS_PTE_SPECIAL >> @@ -1520,9 +1521,6 @@ config X86_CPA_STATISTICS >>helps to determine the effectiveness of preserving large and huge >>page mappings when mapping protections are changed. >> >> -config ARCH_HAS_MEM_ENCRYPT >> -def_bool y >> - >> config AMD_MEM_ENCRYPT >> bool "AMD Secure Memory Encryption (SME) support" >> depends on X86_64 && CPU_SUP_AMD -- Thiago Jung Bauermann IBM Linux Technology Center
Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code
On 7/15/19 9:30 AM, Christoph Hellwig wrote: > On Mon, Jul 15, 2019 at 04:03:17PM +0200, Halil Pasic wrote: >>> I thought about that but couldn't put my finger on a general concept. >>> Is it "guest with memory inaccessible to the host"? >>> >> >> Well, force_dma_unencrypted() is a much better name thatn sev_active(): >> s390 has no AMD SEV, that is sure, but for virtio to work we do need to >> make our dma accessible to the hypervisor. Yes, your "guest with memory >> inaccessible to the host" shows into the right direction IMHO. >> Unfortunately I don't have too many cycles to spend on this right now. > > In x86 it means that we need to remove dma encryption using > set_memory_decrypted before using it for DMA purposes. In the SEV > case that seems to be so that the hypervisor can access it, in the SME > case that Tom just fixes it is because there is an encrypted bit set > in the physical address, and if the device doesn't support a large > enough DMA address the direct mapping code has to encrypt the pages > used for the contigous allocation. Just a correction/clarification... For SME, when a device doesn't support a large enough DMA address to accommodate the encryption bit as part of the DMA address, the direct mapping code has to provide un-encrypted pages. For un-encrypted pages, the DMA address now does not include the encryption bit, making it acceptable to the device. Since the device is now using a DMA address without the encryption bit, the physical address in the CPU page table must match (the call to set_memory_decrypted) so that both the device and the CPU interact in the same way with the memory. Thanks, Tom > >> Being on cc for your patch made me realize that things got broken on >> s390. Thanks! I've sent out a patch that fixes protvirt, but we are going >> to benefit from your cleanups. I think with your cleanups and that patch >> of mine both sev_active() and sme_active() can be removed. Feel free to >> do so. If not, I can attend to it as well. > > Yes, I think with the dma-mapping fix and this series sme_active and > sev_active should be gone from common code. We should also be able > to remove the exports x86 has for them. > > I'll wait a few days and will then feed the dma-mapping fix to Linus, > it might make sense to either rebase Thiagos series on top of the > dma-mapping for-next branch, or wait a few days before reposting. > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/3] x86, s390: Move ARCH_HAS_MEM_ENCRYPT definition to arch/Kconfig
On 2019-07-12 23:45, Thiago Jung Bauermann wrote: powerpc is also going to use this feature, so put it in a generic location. Signed-off-by: Thiago Jung Bauermann Reviewed-by: Thomas Gleixner --- arch/Kconfig | 3 +++ arch/s390/Kconfig | 3 --- arch/x86/Kconfig | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index c47b328eada0..4ef3499d4480 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -927,6 +927,9 @@ config LOCK_EVENT_COUNTS the chance of application behavior change because of timing differences. The counts are reported via debugfs. +config ARCH_HAS_MEM_ENCRYPT + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5d8570ed6cab..f820e631bf89 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -config ARCH_HAS_MEM_ENCRYPT -def_bool y - Since you are removing the "def_bool y" when ARCH_HAS_MEM_ENCRYPT is moved to arch/Kconfig, does the s390/Kconfig need "select ARCH_HAS_MEM_ENCRYPT" added like you do for x86/Kconfig? - Janani config MMU def_bool y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c9f331bb538b..5d3295f2df94 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -68,6 +68,7 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOVif X86_64 + select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_APIif X86_64 select ARCH_HAS_PTE_SPECIAL @@ -1520,9 +1521,6 @@ config X86_CPA_STATISTICS helps to determine the effectiveness of preserving large and huge page mappings when mapping protections are changed. -config ARCH_HAS_MEM_ENCRYPT - def_bool y - config AMD_MEM_ENCRYPT bool "AMD Secure Memory Encryption (SME) support" depends on X86_64 && CPU_SUP_AMD ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] virtio_ring: Use DMA API if guest memory is encrypted
On Sun, Jul 14, 2019 at 02:51:18AM -0300, Thiago Jung Bauermann wrote: > > > Michael S. Tsirkin writes: > > > On Thu, Jun 27, 2019 at 10:58:40PM -0300, Thiago Jung Bauermann wrote: > >> > >> Michael S. Tsirkin writes: > >> > >> > On Mon, Jun 03, 2019 at 10:13:59PM -0300, Thiago Jung Bauermann wrote: > >> >> > >> >> > >> >> Michael S. Tsirkin writes: > >> >> > >> >> > On Wed, Apr 17, 2019 at 06:42:00PM -0300, Thiago Jung Bauermann wrote: > >> >> >> I rephrased it in terms of address translation. What do you think of > >> >> >> this version? The flag name is slightly different too: > >> >> >> > >> >> >> > >> >> >> VIRTIO_F_ACCESS_PLATFORM_NO_TRANSLATION This feature has the same > >> >> >> meaning as VIRTIO_F_ACCESS_PLATFORM both when set and when not > >> >> >> set, > >> >> >> with the exception that address translation is guaranteed to be > >> >> >> unnecessary when accessing memory addresses supplied to the > >> >> >> device > >> >> >> by the driver. Which is to say, the device will always use > >> >> >> physical > >> >> >> addresses matching addresses used by the driver (typically > >> >> >> meaning > >> >> >> physical addresses used by the CPU) and not translated further. > >> >> >> This > >> >> >> flag should be set by the guest if offered, but to allow for > >> >> >> backward-compatibility device implementations allow for it to be > >> >> >> left unset by the guest. It is an error to set both this flag and > >> >> >> VIRTIO_F_ACCESS_PLATFORM. > >> >> > > >> >> > > >> >> > > >> >> > > >> >> > OK so VIRTIO_F_ACCESS_PLATFORM is designed to allow unpriveledged > >> >> > drivers. This is why devices fail when it's not negotiated. > >> >> > >> >> Just to clarify, what do you mean by unprivileged drivers? Is it drivers > >> >> implemented in guest userspace such as with VFIO? Or unprivileged in > >> >> some other sense such as needing to use bounce buffers for some reason? > >> > > >> > I had drivers in guest userspace in mind. > >> > >> Great. Thanks for clarifying. > >> > >> I don't think this flag would work for guest userspace drivers. Should I > >> add a note about that in the flag definition? > > > > I think you need to clarify access protection rules. Is it only > > translation that is bypassed or is any platform-specific > > protection mechanism bypassed too? > > It is only translation. In a secure guest, if the device tries to access > a memory address that wasn't provided by the driver then the > architecture will deny that access. If the device accesses addresses > provided to it by the driver, then there's no protection mechanism or > translation to get in the way. > > >> >> > This confuses me. > >> >> > If driver is unpriveledged then what happens with this flag? > >> >> > It can supply any address it wants. Will that corrupt kernel > >> >> > memory? > >> >> > >> >> Not needing address translation doesn't necessarily mean that there's no > >> >> IOMMU. On powerpc we don't use VIRTIO_F_ACCESS_PLATFORM but there's > >> >> always an IOMMU present. And we also support VFIO drivers. The VFIO API > >> >> for pseries (sPAPR section in Documentation/vfio.txt) has extra ioctls > >> >> to program the IOMMU. > >> >> > >> >> For our use case, we don't need address translation because we set up an > >> >> identity mapping in the IOMMU so that the device can use guest physical > >> >> addresses. > > > > OK so I think I am beginning to see it in a different light. Right now the > > specific > > platform creates an identity mapping. That in turn means DMA API can be > > fast - it does not need to do anything. What you are looking for is a > > way to tell host it's an identity mapping - just as an optimization. > > > > Is that right? > > Almost. Theoretically it is just an optimization. But in practice the > pseries boot firmware (SLOF) doesn't support IOMMU_PLATFORM so it's not > possible to boot a guest from a device with that flag set. > > > So this is what I would call this option: > > > > VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS > > > > and the explanation should state that all device > > addresses are translated by the platform to identical > > addresses. > > > > In fact this option then becomes more, not less restrictive > > than VIRTIO_F_ACCESS_PLATFORM - it's a promise > > by guest to only create identity mappings, > > and only before driver_ok is set. > > This option then would always be negotiated together with > > VIRTIO_F_ACCESS_PLATFORM. > > > > Host then must verify that > > 1. full 1:1 mappings are created before driver_ok > > or can we make sure this happens before features_ok? > > that would be ideal as we could require that features_ok fails > > 2. mappings are not modified between driver_ok and reset > > i guess attempts to change them will fail - > > possibly by causing a guest crash > > or some other kind of platform-specific error > > I think VIRTIO_F_ACCESS_PLATFORM_IDENTITY_ADDRESS is good, but requiring > it
[PATCH AUTOSEL 4.14 043/105] iommu: Fix a leak in iommu_insert_resv_region
From: Eric Auger [ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ] In case we expand an existing region, we unlink this latter and insert the larger one. In that case we should free the original region after the insertion. Also we can immediately return. Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions") Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/iommu.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3de5c0bcb5cc..1620a6f49989 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -205,18 +205,21 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, pos = pos->next; } else if ((start >= a) && (end <= b)) { if (new->type == type) - goto done; + return 0; else pos = pos->next; } else { if (new->type == type) { phys_addr_t new_start = min(a, start); phys_addr_t new_end = max(b, end); + int ret; list_del(&entry->list); entry->start = new_start; entry->length = new_end - new_start + 1; - iommu_insert_resv_region(entry, regions); + ret = iommu_insert_resv_region(entry, regions); + kfree(entry); + return ret; } else { pos = pos->next; } @@ -229,7 +232,6 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, return -ENOMEM; list_add_tail(®ion->list, pos); -done: return 0; } -- 2.20.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code
On Mon, Jul 15, 2019 at 04:03:17PM +0200, Halil Pasic wrote: > > I thought about that but couldn't put my finger on a general concept. > > Is it "guest with memory inaccessible to the host"? > > > > Well, force_dma_unencrypted() is a much better name thatn sev_active(): > s390 has no AMD SEV, that is sure, but for virtio to work we do need to > make our dma accessible to the hypervisor. Yes, your "guest with memory > inaccessible to the host" shows into the right direction IMHO. > Unfortunately I don't have too many cycles to spend on this right now. In x86 it means that we need to remove dma encryption using set_memory_decrypted before using it for DMA purposes. In the SEV case that seems to be so that the hypervisor can access it, in the SME case that Tom just fixes it is because there is an encrypted bit set in the physical address, and if the device doesn't support a large enough DMA address the direct mapping code has to encrypt the pages used for the contigous allocation. > Being on cc for your patch made me realize that things got broken on > s390. Thanks! I've sent out a patch that fixes protvirt, but we are going > to benefit from your cleanups. I think with your cleanups and that patch > of mine both sev_active() and sme_active() can be removed. Feel free to > do so. If not, I can attend to it as well. Yes, I think with the dma-mapping fix and this series sme_active and sev_active should be gone from common code. We should also be able to remove the exports x86 has for them. I'll wait a few days and will then feed the dma-mapping fix to Linus, it might make sense to either rebase Thiagos series on top of the dma-mapping for-next branch, or wait a few days before reposting. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH AUTOSEL 4.19 058/158] iommu: Fix a leak in iommu_insert_resv_region
From: Eric Auger [ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ] In case we expand an existing region, we unlink this latter and insert the larger one. In that case we should free the original region after the insertion. Also we can immediately return. Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions") Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/iommu.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8c15c5980299..bc14825edc9c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -211,18 +211,21 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, pos = pos->next; } else if ((start >= a) && (end <= b)) { if (new->type == type) - goto done; + return 0; else pos = pos->next; } else { if (new->type == type) { phys_addr_t new_start = min(a, start); phys_addr_t new_end = max(b, end); + int ret; list_del(&entry->list); entry->start = new_start; entry->length = new_end - new_start + 1; - iommu_insert_resv_region(entry, regions); + ret = iommu_insert_resv_region(entry, regions); + kfree(entry); + return ret; } else { pos = pos->next; } @@ -235,7 +238,6 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, return -ENOMEM; list_add_tail(®ion->list, pos); -done: return 0; } -- 2.20.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH AUTOSEL 5.1 077/219] iommu: Fix a leak in iommu_insert_resv_region
From: Eric Auger [ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ] In case we expand an existing region, we unlink this latter and insert the larger one. In that case we should free the original region after the insertion. Also we can immediately return. Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions") Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/iommu.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 109de67d5d72..2d06c507fbed 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -241,18 +241,21 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, pos = pos->next; } else if ((start >= a) && (end <= b)) { if (new->type == type) - goto done; + return 0; else pos = pos->next; } else { if (new->type == type) { phys_addr_t new_start = min(a, start); phys_addr_t new_end = max(b, end); + int ret; list_del(&entry->list); entry->start = new_start; entry->length = new_end - new_start + 1; - iommu_insert_resv_region(entry, regions); + ret = iommu_insert_resv_region(entry, regions); + kfree(entry); + return ret; } else { pos = pos->next; } @@ -265,7 +268,6 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, return -ENOMEM; list_add_tail(®ion->list, pos); -done: return 0; } -- 2.20.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 3/3] fs/core/vmcore: Move sev_active() reference to x86 arch code
On Fri, 12 Jul 2019 18:55:47 -0300 Thiago Jung Bauermann wrote: > > [ Cc'ing Tom Lendacky which I forgot to do earlier. Sorry about that. ] > > Hello Halil, > > Thanks for the quick review. > > Halil Pasic writes: > > > On Fri, 12 Jul 2019 02:36:31 -0300 > > Thiago Jung Bauermann wrote: > > > >> Secure Encrypted Virtualization is an x86-specific feature, so it shouldn't > >> appear in generic kernel code because it forces non-x86 architectures to > >> define the sev_active() function, which doesn't make a lot of sense. > > > > sev_active() might be just bad (too specific) name for a general > > concept. s390 code defines it drives the right behavior in > > kernel/dma/direct.c (which uses it). > > I thought about that but couldn't put my finger on a general concept. > Is it "guest with memory inaccessible to the host"? > Well, force_dma_unencrypted() is a much better name thatn sev_active(): s390 has no AMD SEV, that is sure, but for virtio to work we do need to make our dma accessible to the hypervisor. Yes, your "guest with memory inaccessible to the host" shows into the right direction IMHO. Unfortunately I don't have too many cycles to spend on this right now. > Since your proposed definiton for force_dma_unencrypted() is simply to > make it equivalent to sev_active(), I thought it was more > straightforward to make each arch define force_dma_unencrypted() > directly. I did not mean to propose equivalence. I intended to say the name sev_active() is not suitable for a common concept. On the other hand we do have a common concept -- as common code needs to do or not do things depending on whether "memory is protected/encrypted" or not. I'm fine with the name force_dma_unencrypted(), especially because I don't have a better name. > > Also, does sev_active() drive the right behavior for s390 in > elfcorehdr_read() as well? > AFAIU, since s390 does not override it boils down to the same, whether sev_active() returns true or false. I'm no expert in that area, but I strongly hope that is the right behavior. @Janosch: can you help me out with this one? > >> To solve this problem, add an x86 elfcorehdr_read() function to override > >> the generic weak implementation. To do that, it's necessary to make > >> read_from_oldmem() public so that it can be used outside of vmcore.c. > >> > >> Signed-off-by: Thiago Jung Bauermann > >> --- > >> arch/x86/kernel/crash_dump_64.c | 5 + > >> fs/proc/vmcore.c| 8 > >> include/linux/crash_dump.h | 14 ++ > >> include/linux/mem_encrypt.h | 1 - > >> 4 files changed, 23 insertions(+), 5 deletions(-) > > > > Does not seem to apply to today's or yesterdays master. > > It assumes the presence of the two patches I mentioned in the cover > letter. Only one of them is in master. > > I hadn't realized the s390 virtio patches were on their way to upstream. > I was keeping an eye on the email thread but didn't see they were picked > up in the s390 pull request. I'll add a new patch to this series making > the corresponding changes to s390's as well. > Being on cc for your patch made me realize that things got broken on s390. Thanks! I've sent out a patch that fixes protvirt, but we are going to benefit from your cleanups. I think with your cleanups and that patch of mine both sev_active() and sme_active() can be removed. Feel free to do so. If not, I can attend to it as well. Regards, Halil ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH AUTOSEL 5.2 229/249] iommu/arm-smmu-v3: Invalidate ATC when detaching a device
From: Jean-Philippe Brucker [ Upstream commit 8dd8f005bdd45823fc153ef490239558caf6ff20 ] We make the invalid assumption in arm_smmu_detach_dev() that the ATC is clear after calling pci_disable_ats(). For one thing, only enabling the PCIe ATS capability constitutes an implicit invalidation event, so the comment was wrong. More importantly, the ATS capability isn't necessarily disabled by pci_disable_ats() in a PF, if the associated VFs have ATS enabled. Explicitly invalidate all ATC entries in arm_smmu_detach_dev(). The endpoint cannot form new ATC entries because STE.EATS is clear. Fixes: 9ce27afc0830 ("iommu/arm-smmu-v3: Add support for PCI ATS") Reported-by: Manoj Kumar Reported-by: Robin Murphy Signed-off-by: Jean-Philippe Brucker Acked-by: Will Deacon Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/arm-smmu-v3.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4d5a694f02c2..0fee8f7957ec 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1884,9 +1884,13 @@ static int arm_smmu_enable_ats(struct arm_smmu_master *master) static void arm_smmu_disable_ats(struct arm_smmu_master *master) { + struct arm_smmu_cmdq_ent cmd; + if (!master->ats_enabled || !dev_is_pci(master->dev)) return; + arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); + arm_smmu_atc_inv_master(master, &cmd); pci_disable_ats(to_pci_dev(master->dev)); master->ats_enabled = false; } @@ -1906,7 +1910,6 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; arm_smmu_install_ste_for_dev(master); - /* Disabling ATS invalidates all ATC entries */ arm_smmu_disable_ats(master); } -- 2.20.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH AUTOSEL 5.2 089/249] iommu: Fix a leak in iommu_insert_resv_region
From: Eric Auger [ Upstream commit ad0834dedaa15c3a176f783c0373f836e44b4700 ] In case we expand an existing region, we unlink this latter and insert the larger one. In that case we should free the original region after the insertion. Also we can immediately return. Fixes: 6c65fb318e8b ("iommu: iommu_get_group_resv_regions") Signed-off-by: Eric Auger Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/iommu.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9f0a2844371c..30db41e9f15c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -225,18 +225,21 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, pos = pos->next; } else if ((start >= a) && (end <= b)) { if (new->type == type) - goto done; + return 0; else pos = pos->next; } else { if (new->type == type) { phys_addr_t new_start = min(a, start); phys_addr_t new_end = max(b, end); + int ret; list_del(&entry->list); entry->start = new_start; entry->length = new_end - new_start + 1; - iommu_insert_resv_region(entry, regions); + ret = iommu_insert_resv_region(entry, regions); + kfree(entry); + return ret; } else { pos = pos->next; } @@ -249,7 +252,6 @@ static int iommu_insert_resv_region(struct iommu_resv_region *new, return -ENOMEM; list_add_tail(®ion->list, pos); -done: return 0; } -- 2.20.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v8 07/21] iommu/io-pgtable-arm-v7s: Extend MediaTek 4GB Mode
On Sun, Jul 14, 2019 at 12:41:20PM +0800, Yong Wu wrote: > On Thu, 2019-07-11 at 13:31 +0100, Will Deacon wrote: > > This looks like the right sort of idea. Basically, I was thinking that you > > can use the oas in conjunction with the quirk to specify whether or not > > your two magic bits should be set. You could also then cap the oas using > > the size of phys_addr_t to deal with my other comment. > > > > Finally, I was hoping you could drop the |= BIT_ULL(32) and the &= > > ~BIT_ULL(32) bits of the mtk driver if the pgtable code now accepts higher > > addresses. Did that not work out? > > After the current patch, the pgtable has accepted the higher address. > the " |= BIT_ULL(32)" and "& = ~ BIT_ULL(32)" is for a special case(we > call it 4GB mode). > > Now MediaTek IOMMU support 2 kind memory: > 1) normal case: PA is 0x4000_ - 0x3__. the PA won't be > remapped. mt8183 and the non-4GB mode of mt8173/mt2712 use this mode. > > 2) 4GB Mode: PA is 0x4000_ - 0x1_3fff_. But the PA will remapped > to 0x1__ to 0x1__. This is for the 4GB mode of > mt8173/mt2712. This case is so special that we should change the PA > manually(add bit32). > (mt2712 and mt8173 have both mode: 4GB and non-4GB.) > > If we try to use oas and our quirk to cover this two case. Then I can > use "oas == 33" only for this 4GB mode. and "oas == 34" for the normal > case even though the PA mayn't reach 34bit. Also I should add some > "workaround" for the 4GB mode(oas==33). > > I copy the new patch in the mail below(have dropped the "|= BIT_ULL(32)" > and the "&= ~BIT_ULL(32)) in mtk iommu". please help have a look if it > is ok. > (another thing: Current the PA can support over 4GB. So the quirk name > "MTK_4GB" looks not suitable, I used a new patch rename to "MTK_EXT"). Makes sense, thanks. One comment below. > @@ -205,7 +216,20 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte > pte, int lvl, > else > mask = ARM_V7S_LVL_MASK(lvl); > > - return pte & mask; > + paddr = pte & mask; > + if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) && > + (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)) { > + /* > + * Workaround for MTK 4GB Mode: > + * Add BIT32 only when PA < 0x4000_. > + */ > + if ((cfg->oas == 33 && paddr < 0x4000UL) || > + (cfg->oas > 33 && (pte & ARM_V7S_ATTR_MTK_PA_BIT32))) > + paddr |= BIT_ULL(32); > + if (pte & ARM_V7S_ATTR_MTK_PA_BIT33) > + paddr |= BIT_ULL(33); > + } > + return paddr; > } > > static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl, > @@ -326,9 +350,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, > int lvl, > if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) > pte |= ARM_V7S_ATTR_NS_SECTION; > > - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) > - pte |= ARM_V7S_ATTR_MTK_4GB; > - > return pte; > } > > @@ -742,7 +763,9 @@ static struct io_pgtable > *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, > { > struct arm_v7s_io_pgtable *data; > > - if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS) > + if (cfg->ias > ARM_V7S_ADDR_BITS || > + (cfg->oas > ARM_V7S_ADDR_BITS && > + !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT))) > return NULL; I think you can rework this to do something like: if (cfg->ias > ARM_V7S_ADDR_BITS) return NULL; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) { if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) cfg->oas = min(cfg->oas, ARM_V7S_ADDR_BITS); else if (cfg->oas > 34) return NULL; } else if (cfg->oas > ARM_V7S_ADDR_BITS) { return NULL; } so that we clamp the oas when phys_addr_t is 32-bit for you. That should allow you to remove lots of the checking from iopte_to_paddr() too if you check against oas in the map() function. Does that make sense? Will