> From: Jason Gunthorpe <j...@nvidia.com>
> Sent: Thursday, March 24, 2022 6:55 AM
> 
> On Wed, Mar 23, 2022 at 05:34:18PM -0300, Jason Gunthorpe wrote:
> 
> > Stated another way, any platform that wires dev_is_dma_coherent() to
> > true, like all x86 does, must support IOMMU_CACHE and report
> > IOMMU_CAP_CACHE_COHERENCY for every iommu_domain the platform
> > supports. The platform obviously declares it support this in order to
> > support the in-kernel DMA API.
> 
> That gives me a nice simple idea:
> 
> diff --git a/drivers/iommu/iommufd/device.c
> b/drivers/iommu/iommufd/device.c
> index 3c6b95ad026829..8366884df4a030 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -8,6 +8,7 @@
>  #include <linux/pci.h>
>  #include <linux/irqdomain.h>
>  #include <linux/dma-iommu.h>
> +#include <linux/dma-map-ops.h>
> 
>  #include "iommufd_private.h"
> 
> @@ -61,6 +62,10 @@ struct iommufd_device
> *iommufd_bind_pci_device(int fd, struct pci_dev *pdev,
>       struct iommu_group *group;
>       int rc;
> 
> +     /* iommufd always uses IOMMU_CACHE */
> +     if (!dev_is_dma_coherent(&pdev->dev))
> +             return ERR_PTR(-EINVAL);
> +
>       ictx = iommufd_fget(fd);
>       if (!ictx)
>               return ERR_PTR(-EINVAL);
> diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
> index 48149988c84bbc..3d6df1ffbf93e6 100644
> --- a/drivers/iommu/iommufd/ioas.c
> +++ b/drivers/iommu/iommufd/ioas.c
> @@ -129,7 +129,8 @@ static int conv_iommu_prot(u32 map_flags)
>        * We provide no manual cache coherency ioctls to userspace and
> most
>        * architectures make the CPU ops for cache flushing privileged.
>        * Therefore we require the underlying IOMMU to support CPU
> coherent
> -      * operation.
> +      * operation. Support for IOMMU_CACHE is enforced by the
> +      * dev_is_dma_coherent() test during bind.
>        */
>       iommu_prot = IOMMU_CACHE;
>       if (map_flags & IOMMU_IOAS_MAP_WRITEABLE)
> 
> Looking at it I would say all the places that test
> IOMMU_CAP_CACHE_COHERENCY can be replaced with
> dev_is_dma_coherent()
> except for the one call in VFIO that is supporting the Intel no-snoop
> behavior.
> 
> Then we can rename IOMMU_CAP_CACHE_COHERENCY to something like
> IOMMU_CAP_ENFORCE_CACHE_COHERENCY and just create a
> IOMMU_ENFORCE_CACHE prot flag for Intel IOMMU to use instead of
> abusing IOMMU_CACHE.
> 

Based on that here is a quick tweak of the force-snoop part (not compiled).

Several notes:

- IOMMU_CAP_CACHE_COHERENCY is kept as it's checked in vfio's
  group attach interface. Removing it may require a group_is_dma_coherent();

- vdpa is not changed as force-snoop is only for integrated GPU today which
  is not managed by vdpa. But adding the snoop support is easy if necessary;

- vfio type1 reports force-snoop fact to KVM via VFIO_DMA_CC_IOMMU. For
  iommufd the compat layer may leverage that interface but more thoughts
  are required for non-compat usage how that can be reused or whether a
  new one is required between iommufd and kvm. Per earlier  discussions
  Paolo prefers to reuse.

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5b196cf..06cca04 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5110,7 +5110,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
                prot |= DMA_PTE_READ;
        if (iommu_prot & IOMMU_WRITE)
                prot |= DMA_PTE_WRITE;
-       if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
+       /* nothing to do for IOMMU_CACHE */
+       if ((iommu_prot & IOMMU_SNOOP) && dmar_domain->iommu_snooping)
                prot |= DMA_PTE_SNP;
 
        max_addr = iova + size;
@@ -5236,6 +5237,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct 
iommu_domain *domain,
 static bool intel_iommu_capable(enum iommu_cap cap)
 {
        if (cap == IOMMU_CAP_CACHE_COHERENCY)
+               return true;
+       if (cap == IOMMU_CAP_FORCE_SNOOP)
                return domain_update_iommu_snooping(NULL);
        if (cap == IOMMU_CAP_INTR_REMAP)
                return irq_remapping_enabled == 1;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9394aa9..abc4cfe 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2270,6 +2270,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
        if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
                domain->prot |= IOMMU_CACHE;
 
+       if (iommu_capable(bus, IOMMU_CAP_FORCE_SNOOP)
+               domain->prot |= IOMMU_SNOOP;
+
        /*
         * Try to match an existing compatible domain.  We don't want to
         * preclude an IOMMU driver supporting multiple bus_types and being
@@ -2611,14 +2614,14 @@ static void vfio_iommu_type1_release(void *iommu_data)
        kfree(iommu);
 }
 
-static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
+static int vfio_domains_have_iommu_snoop(struct vfio_iommu *iommu)
 {
        struct vfio_domain *domain;
        int ret = 1;
 
        mutex_lock(&iommu->lock);
        list_for_each_entry(domain, &iommu->domain_list, next) {
-               if (!(domain->prot & IOMMU_CACHE)) {
+               if (!(domain->prot & IOMMU_SNOOP)) {
                        ret = 0;
                        break;
                }
@@ -2641,7 +2644,7 @@ static int vfio_iommu_type1_check_extension(struct 
vfio_iommu *iommu,
        case VFIO_DMA_CC_IOMMU:
                if (!iommu)
                        return 0;
-               return vfio_domains_have_iommu_cache(iommu);
+               return vfio_domains_have_iommu_snoop(iommu);
        default:
                return 0;
        }
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index de0c57a..45184d7 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -21,6 +21,8 @@
 #define IOMMU_CACHE    (1 << 2) /* DMA cache coherency */
 #define IOMMU_NOEXEC   (1 << 3)
 #define IOMMU_MMIO     (1 << 4) /* e.g. things like MSI doorbells */
+#define IOMMU_SNOOP    (1 << 5) /* force DMA to snoop */
+
 /*
  * Where the bus hardware includes a privilege level as part of its access type
  * markings, and certain devices are capable of issuing transactions marked as
@@ -106,6 +108,8 @@ enum iommu_cap {
                                           transactions */
        IOMMU_CAP_INTR_REMAP,           /* IOMMU supports interrupt isolation */
        IOMMU_CAP_NOEXEC,               /* IOMMU_NOEXEC flag */
+       IOMMU_CAP_FORCE_SNOOP,          /* IOMMU forces all transactions to
+                                          snoop cache */
 };
 
 /* These are the possible reserved region types */

Thanks
Kevin

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to