IOMMU_CACHE means "normal DMA to this iommu_domain's IOVA should be cache
coherent" and is used by the DMA API. The definition allows for special
non-coherent DMA to exist - ie processing of the no-snoop flag in PCIe
TLPs - so long as this behavior is opt-in by the device driver.

The flag is mainly used by the DMA API to synchronize the IOMMU setting
with the expected cache behavior of the DMA master. eg based on
dev_is_dma_coherent() in some case.

For Intel IOMMU IOMMU_CACHE was redefined to mean 'force all DMA to be
cache coherent' which has the practical effect of causing the IOMMU to
ignore the no-snoop bit in a PCIe TLP.

x86 platforms are always IOMMU_CACHE, so Intel should ignore this flag.

Instead use the new domain op enforce_cache_coherency() which causes every
IOPTE created in the domain to have the no-snoop blocking behavior.

Reconfigure VFIO to always use IOMMU_CACHE and call
enforce_cache_coherency() to operate the special Intel behavior.

Remove the IOMMU_CACHE test from Intel IOMMU.

Ultimately VFIO plumbs the result of enforce_cache_coherency() back into
the x86 platform code through kvm_arch_register_noncoherent_dma() which
controls if the WBINVD instruction is available in the guest. No other
arch implements kvm_arch_register_noncoherent_dma().

Signed-off-by: Jason Gunthorpe <j...@nvidia.com>
---
 drivers/iommu/intel/iommu.c     |  7 ++-----
 drivers/vfio/vfio_iommu_type1.c | 30 +++++++++++++++++++-----------
 include/linux/intel-iommu.h     |  1 -
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index f08611a6cc4799..8f3674e997df06 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -641,7 +641,6 @@ static unsigned long domain_super_pgsize_bitmap(struct 
dmar_domain *domain)
 static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
        domain_update_iommu_coherency(domain);
-       domain->iommu_snooping = domain_update_iommu_snooping(NULL);
        domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
 
        /*
@@ -4283,7 +4282,6 @@ static int md_domain_init(struct dmar_domain *domain, int 
guest_width)
        domain->agaw = width_to_agaw(adjust_width);
 
        domain->iommu_coherency = false;
-       domain->iommu_snooping = false;
        domain->iommu_superpage = 0;
        domain->max_addr = 0;
 
@@ -4422,8 +4420,7 @@ static int intel_iommu_map(struct iommu_domain *domain,
                prot |= DMA_PTE_READ;
        if (iommu_prot & IOMMU_WRITE)
                prot |= DMA_PTE_WRITE;
-       if (((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) ||
-           dmar_domain->enforce_no_snoop)
+       if (dmar_domain->enforce_no_snoop)
                prot |= DMA_PTE_SNP;
 
        max_addr = iova + size;
@@ -4550,7 +4547,7 @@ static bool intel_iommu_enforce_cache_coherency(struct 
iommu_domain *domain)
 {
        struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 
-       if (!dmar_domain->iommu_snooping)
+       if (!domain_update_iommu_snooping(NULL))
                return false;
        dmar_domain->enforce_no_snoop = true;
        return true;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9394aa9444c10c..c13b9290e35759 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -84,8 +84,8 @@ struct vfio_domain {
        struct iommu_domain     *domain;
        struct list_head        next;
        struct list_head        group_list;
-       int                     prot;           /* IOMMU_CACHE */
-       bool                    fgsp;           /* Fine-grained super pages */
+       bool                    fgsp : 1;       /* Fine-grained super pages */
+       bool                    enforce_cache_coherency : 1;
 };
 
 struct vfio_dma {
@@ -1461,7 +1461,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, 
dma_addr_t iova,
 
        list_for_each_entry(d, &iommu->domain_list, next) {
                ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
-                               npage << PAGE_SHIFT, prot | d->prot);
+                               npage << PAGE_SHIFT, prot | IOMMU_CACHE);
                if (ret)
                        goto unwind;
 
@@ -1771,7 +1771,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
                        }
 
                        ret = iommu_map(domain->domain, iova, phys,
-                                       size, dma->prot | domain->prot);
+                                       size, dma->prot | IOMMU_CACHE);
                        if (ret) {
                                if (!dma->iommu_mapped) {
                                        vfio_unpin_pages_remote(dma, iova,
@@ -1859,7 +1859,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain 
*domain)
                return;
 
        ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
-                       IOMMU_READ | IOMMU_WRITE | domain->prot);
+                       IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
        if (!ret) {
                size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
 
@@ -2267,8 +2267,15 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
                goto out_detach;
        }
 
-       if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
-               domain->prot |= IOMMU_CACHE;
+       /*
+        * If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+        * no-snoop set) then VFIO always turns this feature on because on Intel
+        * platforms it optimizes KVM to disable wbinvd emulation.
+        */
+       if (domain->domain->ops->enforce_cache_coherency)
+               domain->enforce_cache_coherency =
+                       domain->domain->ops->enforce_cache_coherency(
+                               domain->domain);
 
        /*
         * Try to match an existing compatible domain.  We don't want to
@@ -2279,7 +2286,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
         */
        list_for_each_entry(d, &iommu->domain_list, next) {
                if (d->domain->ops == domain->domain->ops &&
-                   d->prot == domain->prot) {
+                   d->enforce_cache_coherency ==
+                           domain->enforce_cache_coherency) {
                        iommu_detach_group(domain->domain, group->iommu_group);
                        if (!iommu_attach_group(d->domain,
                                                group->iommu_group)) {
@@ -2611,14 +2619,14 @@ static void vfio_iommu_type1_release(void *iommu_data)
        kfree(iommu);
 }
 
-static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
+static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu)
 {
        struct vfio_domain *domain;
        int ret = 1;
 
        mutex_lock(&iommu->lock);
        list_for_each_entry(domain, &iommu->domain_list, next) {
-               if (!(domain->prot & IOMMU_CACHE)) {
+               if (!(domain->enforce_cache_coherency)) {
                        ret = 0;
                        break;
                }
@@ -2641,7 +2649,7 @@ static int vfio_iommu_type1_check_extension(struct 
vfio_iommu *iommu,
        case VFIO_DMA_CC_IOMMU:
                if (!iommu)
                        return 0;
-               return vfio_domains_have_iommu_cache(iommu);
+               return vfio_domains_have_enforce_cache_coherency(iommu);
        default:
                return 0;
        }
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1f930c0c225d94..bc39f633efdf03 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -539,7 +539,6 @@ struct dmar_domain {
 
        u8 has_iotlb_device: 1;
        u8 iommu_coherency: 1;          /* indicate coherency of iommu access */
-       u8 iommu_snooping: 1;           /* indicate snooping control feature */
        u8 enforce_no_snoop : 1;        /* Create IOPTEs with snoop control */
 
        struct list_head devices;       /* all devices' list */
-- 
2.35.1

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to