IOMMU advertises Access/Dirty bits if the extended capability
DMAR register reports it (ECAP, mnemonic ECAP.SSADS). The first
stage table, though, has not bit for advertising, unless referenced via
a scalable-mode PASID Entry. Relevant Intel IOMMU SDM ref for first stage
table "3.6.2 Accessed, Extended Accessed, and Dirty Flags" and second
stage table "3.7.2 Accessed and Dirty Flags".

To enable it scalable-mode for the second-stage table is required,
solimit the use of dirty-bit to scalable-mode and discarding the
first stage configured DMAR domains. To use SSADS, we set a bit in
the scalable-mode PASID Table entry, by setting bit 9 (SSADE). When
doing so, flush all iommu caches. Relevant SDM refs:

"3.7.2 Accessed and Dirty Flags"
"6.5.3.3 Guidance to Software for Invalidations,
 Table 23. Guidance to Software for Invalidations"

Dirty bit on the PTE is located in the same location (bit 9). The IOTLB
caches some attributes when SSADE is enabled and dirty-ness information,
so we also need to flush IOTLB to make sure IOMMU attempts to set the
dirty bit again. Relevant manuals over the hardware translation is
chapter 6 with some special mention to:

"6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations"
"6.2.4 IOTLB"

Signed-off-by: Joao Martins <joao.m.mart...@oracle.com>
---
Shouldn't probably be as aggresive as to flush all; needs
checking with hardware (and invalidations guidance) as to understand
what exactly needs flush.
---
 drivers/iommu/intel/iommu.c | 109 ++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel/pasid.c |  76 +++++++++++++++++++++++++
 drivers/iommu/intel/pasid.h |   7 +++
 include/linux/intel-iommu.h |  14 +++++
 4 files changed, 206 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ce33f85c72ab..92af43f27241 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5089,6 +5089,113 @@ static void intel_iommu_iotlb_sync_map(struct 
iommu_domain *domain,
        }
 }
 
+static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
+                                         bool enable)
+{
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+       struct device_domain_info *info;
+       unsigned long flags;
+       int ret = -EINVAL;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       if (list_empty(&dmar_domain->devices)) {
+               spin_unlock_irqrestore(&device_domain_lock, flags);
+               return ret;
+       }
+
+       list_for_each_entry(info, &dmar_domain->devices, link) {
+               if (!info->dev || (info->domain != dmar_domain))
+                       continue;
+
+               /* Dirty tracking is second-stage level SM only */
+               if ((info->domain && domain_use_first_level(info->domain)) ||
+                   !ecap_slads(info->iommu->ecap) ||
+                   !sm_supported(info->iommu) || !intel_iommu_sm) {
+                       ret = -EOPNOTSUPP;
+                       continue;
+               }
+
+               ret = intel_pasid_setup_dirty_tracking(info->iommu, 
info->domain,
+                                                    info->dev, PASID_RID2PASID,
+                                                    enable);
+               if (ret)
+                       break;
+       }
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       /*
+        * We need to flush context TLB and IOTLB with any cached translations
+        * to force the incoming DMA requests for have its IOTLB entries tagged
+        * with A/D bits
+        */
+       intel_flush_iotlb_all(domain);
+       return ret;
+}
+
+static int intel_iommu_get_dirty_tracking(struct iommu_domain *domain)
+{
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+       struct device_domain_info *info;
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       list_for_each_entry(info, &dmar_domain->devices, link) {
+               if (!info->dev || (info->domain != dmar_domain))
+                       continue;
+
+               /* Dirty tracking is second-stage level SM only */
+               if ((info->domain && domain_use_first_level(info->domain)) ||
+                   !ecap_slads(info->iommu->ecap) ||
+                   !sm_supported(info->iommu) || !intel_iommu_sm) {
+                       ret = -EOPNOTSUPP;
+                       continue;
+               }
+
+               if (!intel_pasid_dirty_tracking_enabled(info->iommu, 
info->domain,
+                                                info->dev, PASID_RID2PASID)) {
+                       ret = -EINVAL;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       return ret;
+}
+
+static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
+                                           unsigned long iova, size_t size,
+                                           struct iommu_dirty_bitmap *dirty)
+{
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+       unsigned long end = iova + size - 1;
+       unsigned long pgsize;
+       int ret;
+
+       ret = intel_iommu_get_dirty_tracking(domain);
+       if (ret)
+               return ret;
+
+       do {
+               struct dma_pte *pte;
+               int lvl = 0;
+
+               pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl);
+               pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
+               if (!pte || !dma_pte_present(pte)) {
+                       iova += pgsize;
+                       continue;
+               }
+
+               /* It is writable, set the bitmap */
+               if (dma_sl_pte_test_and_clear_dirty(pte))
+                       iommu_dirty_bitmap_record(dirty, iova, pgsize);
+               iova += pgsize;
+       } while (iova < end);
+
+       return 0;
+}
+
 const struct iommu_ops intel_iommu_ops = {
        .capable                = intel_iommu_capable,
        .domain_alloc           = intel_iommu_domain_alloc,
@@ -5119,6 +5226,8 @@ const struct iommu_ops intel_iommu_ops = {
                .iotlb_sync             = intel_iommu_tlb_sync,
                .iova_to_phys           = intel_iommu_iova_to_phys,
                .free                   = intel_iommu_domain_free,
+               .set_dirty_tracking     = intel_iommu_set_dirty_tracking,
+               .read_and_clear_dirty   = intel_iommu_read_and_clear_dirty,
        }
 };
 
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 10fb82ea467d..90c7e018bc5c 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -331,6 +331,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 
bits)
        WRITE_ONCE(*ptr, (old & ~mask) | bits);
 }
 
+static inline u64 pasid_get_bits(u64 *ptr)
+{
+       return READ_ONCE(*ptr);
+}
+
 /*
  * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
  * PASID entry.
@@ -389,6 +394,36 @@ static inline void pasid_set_fault_enable(struct 
pasid_entry *pe)
        pasid_set_bits(&pe->val[0], 1 << 1, 0);
 }
 
+/*
+ * Enable second level A/D bits by setting the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_ssade(struct pasid_entry *pe)
+{
+       pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9);
+}
+
+/*
+ * Enable second level A/D bits by setting the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_clear_ssade(struct pasid_entry *pe)
+{
+       pasid_set_bits(&pe->val[0], 1 << 9, 0);
+}
+
+/*
+ * Checks if second level A/D bits by setting the SLADE (Second Level
+ * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
+ * entry is enabled.
+ */
+static inline bool pasid_get_ssade(struct pasid_entry *pe)
+{
+       return pasid_get_bits(&pe->val[0]) & (1 << 9);
+}
+
 /*
  * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
  * scalable mode PASID entry.
@@ -725,6 +760,47 @@ int intel_pasid_setup_second_level(struct intel_iommu 
*iommu,
        return 0;
 }
 
+/*
+ * Set up dirty tracking on a second only translation type.
+ */
+int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
+                                    struct dmar_domain *domain,
+                                    struct device *dev, u32 pasid,
+                                    bool enabled)
+{
+       struct pasid_entry *pte;
+
+       pte = intel_pasid_get_entry(dev, pasid);
+       if (!pte) {
+               dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+               return -ENODEV;
+       }
+
+       if (enabled)
+               pasid_set_ssade(pte);
+       else
+               pasid_clear_ssade(pte);
+       return 0;
+}
+
+/*
+ * Set up dirty tracking on a second only translation type.
+ */
+bool intel_pasid_dirty_tracking_enabled(struct intel_iommu *iommu,
+                                       struct dmar_domain *domain,
+                                       struct device *dev, u32 pasid)
+{
+       struct pasid_entry *pte;
+
+       pte = intel_pasid_get_entry(dev, pasid);
+       if (!pte) {
+               dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+               return false;
+       }
+
+       return pasid_get_ssade(pte);
+}
+
 /*
  * Set up the scalable mode pasid entry for passthrough translation type.
  */
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index ab4408c824a5..3dab86017228 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -115,6 +115,13 @@ int intel_pasid_setup_first_level(struct intel_iommu 
*iommu,
 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
                                   struct dmar_domain *domain,
                                   struct device *dev, u32 pasid);
+int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
+                                    struct dmar_domain *domain,
+                                    struct device *dev, u32 pasid,
+                                    bool enabled);
+bool intel_pasid_dirty_tracking_enabled(struct intel_iommu *iommu,
+                                       struct dmar_domain *domain,
+                                       struct device *dev, u32 pasid);
 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
                                   struct dmar_domain *domain,
                                   struct device *dev, u32 pasid);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 5cfda90b2cca..1328d1805197 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -47,6 +47,9 @@
 #define DMA_FL_PTE_DIRTY       BIT_ULL(6)
 #define DMA_FL_PTE_XD          BIT_ULL(63)
 
+#define DMA_SL_PTE_DIRTY_BIT   9
+#define DMA_SL_PTE_DIRTY       BIT_ULL(DMA_SL_PTE_DIRTY_BIT)
+
 #define ADDR_WIDTH_5LEVEL      (57)
 #define ADDR_WIDTH_4LEVEL      (48)
 
@@ -677,6 +680,17 @@ static inline bool dma_pte_present(struct dma_pte *pte)
        return (pte->val & 3) != 0;
 }
 
+static inline bool dma_sl_pte_dirty(struct dma_pte *pte)
+{
+       return (pte->val & DMA_SL_PTE_DIRTY) != 0;
+}
+
+static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte)
+{
+       return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
+                                 (unsigned long *)&pte->val);
+}
+
 static inline bool dma_pte_superpage(struct dma_pte *pte)
 {
        return (pte->val & DMA_PTE_LARGE_PAGE);
-- 
2.17.2

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to