Hi,

On 09/06/2018 11:11 AM, Tian, Kevin wrote:
From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, August 30, 2018 9:35 AM

This adds the interfaces to setup or tear down the structures
for second level page table translations. This includes types
of second level only translation and pass through.

Cc: Ashok Raj <ashok....@intel.com>
Cc: Jacob Pan <jacob.jun....@linux.intel.com>
Cc: Kevin Tian <kevin.t...@intel.com>
Cc: Liu Yi L <yi.l....@intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.ku...@intel.com>
Signed-off-by: Lu Baolu <baolu...@linux.intel.com>
Reviewed-by: Ashok Raj <ashok....@intel.com>
---
  drivers/iommu/intel-iommu.c |   2 +-
  drivers/iommu/intel-pasid.c | 246
++++++++++++++++++++++++++++++++++++
  drivers/iommu/intel-pasid.h |   7 +
  include/linux/intel-iommu.h |   3 +
  4 files changed, 257 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 562da10bf93e..de6b909bb47a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1232,7 +1232,7 @@ static void iommu_set_root_entry(struct
intel_iommu *iommu)
        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
  }

-static void iommu_flush_write_buffer(struct intel_iommu *iommu)
+void iommu_flush_write_buffer(struct intel_iommu *iommu)
  {
        u32 val;
        unsigned long flag;
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index d6e90cd5b062..edcea1d8b9fc 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -9,6 +9,7 @@

  #define pr_fmt(fmt)   "DMAR: " fmt

+#include <linux/bitops.h>
  #include <linux/dmar.h>
  #include <linux/intel-iommu.h>
  #include <linux/iommu.h>
@@ -291,3 +292,248 @@ void intel_pasid_clear_entry(struct device *dev,
int pasid)

        pasid_clear_entry(pe);
  }
+
+static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
+{
+       u64 old;
+
+       old = READ_ONCE(*ptr);
+       WRITE_ONCE(*ptr, (old & ~mask) | bits);
+}
+
+/*
+ * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
+ * PASID entry.
+ */
+static inline void
+pasid_set_domain_id(struct pasid_entry *pe, u64 value)
+{
+       pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
+}
+
+/*
+ * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_address_root(struct pasid_entry *pe, u64 value)

is address_root too general? especially when the entry could contain both
1st level and 2nd level pointers.


Yes. Should be changed to a specific name like pasid_set_slpt_ptr().

+{
+       pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
+}
+
+/*
+ * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
+ * entry.
+ */
+static inline void
+pasid_set_address_width(struct pasid_entry *pe, u64 value)
+{
+       pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
+}
+
+/*
+ * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_translation_type(struct pasid_entry *pe, u64 value)
+{
+       pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
+}
+
+/*
+ * Enable fault processing by clearing the FPD(Fault Processing
+ * Disable) field (Bit 1) of a scalable mode PASID entry.
+ */
+static inline void pasid_set_fault_enable(struct pasid_entry *pe)
+{
+       pasid_set_bits(&pe->val[0], 1 << 1, 0);
+}
+
+/*
+ * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
+ * scalable mode PASID entry.
+ */
+static inline void pasid_set_sre(struct pasid_entry *pe)
+{
+       pasid_set_bits(&pe->val[2], 1 << 0, 1);
+}
+
+/*
+ * Setup the P(Present) field (Bit 0) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_present(struct pasid_entry *pe)
+{
+       pasid_set_bits(&pe->val[0], 1 << 0, 1);
+}

it's a long list and there could be more in the future. What about
defining some macro to simplify LOC, e.g.

#define PASID_SET(name, i, m, b)                                \
static inline void pasid_set_name(struct pasid_entry *pe)       \
{                                                               \
        pasid_set_bits(&pe->val[i], m, b);                       \
}

PASID_SET(present, 0, 1<<0, 1);
PASID_SET(sre, 2, 1<<0, 1);
...


Fair enough. This looks more concise.

+
+/*
+ * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
+ * entry.
+ */
+static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
+{
+       pasid_set_bits(&pe->val[1], 1 << 23, value);
+}
+
+static void
+pasid_based_pasid_cache_invalidation(struct intel_iommu *iommu,
+                                    int did, int pasid)

pasid_cache_invalidation_with_pasid

Okay.


+{
+       struct qi_desc desc;
+
+       desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL |
QI_PC_PASID(pasid);
+       desc.qw1 = 0;
+       desc.qw2 = 0;
+       desc.qw3 = 0;
+
+       qi_submit_sync(&desc, iommu);
+}
+
+static void
+pasid_based_iotlb_cache_invalidation(struct intel_iommu *iommu,
+                                    u16 did, u32 pasid)

iotlb_invalidation_with_pasid

Okay.


+{
+       struct qi_desc desc;
+
+       desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
+                       QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
QI_EIOTLB_TYPE;
+       desc.qw1 = 0;
+       desc.qw2 = 0;
+       desc.qw3 = 0;
+
+       qi_submit_sync(&desc, iommu);
+}
+
+static void
+pasid_based_dev_iotlb_cache_invalidation(struct intel_iommu *iommu,
+                                        struct device *dev, int pasid)

devtlb_invalidation_with_pasid

Okay.


+{
+       struct device_domain_info *info;
+       u16 sid, qdep, pfsid;
+
+       info = dev->archdata.iommu;
+       if (!info || !info->ats_enabled)
+               return;
+
+       sid = info->bus << 8 | info->devfn;
+       qdep = info->ats_qdep;
+       pfsid = info->pfsid;
+
+       qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 -
VTD_PAGE_SHIFT);
+}
+
+static void tear_down_one_pasid_entry(struct intel_iommu *iommu,
+                                     struct device *dev, u16 did,
+                                     int pasid)
+{
+       struct pasid_entry *pte;

ptep


Okay.

+
+       intel_pasid_clear_entry(dev, pasid);
+
+       if (!ecap_coherent(iommu->ecap)) {
+               pte = intel_pasid_get_entry(dev, pasid);
+               clflush_cache_range(pte, sizeof(*pte));
+       }
+
+       pasid_based_pasid_cache_invalidation(iommu, did, pasid);
+       pasid_based_iotlb_cache_invalidation(iommu, did, pasid);
+
+       /* Device IOTLB doesn't need to be flushed in caching mode. */
+       if (!cap_caching_mode(iommu->cap))
+               pasid_based_dev_iotlb_cache_invalidation(iommu, dev,
pasid);

can you elaborate, or point to any spec reference?


In the driver, device iotlb doesn't get flushed in caching mode. I just
follow what have been done there.

It also makes sense to me since only the bare metal host needs to
consider whether and how to flush the device iotlb.

+}
+
+/*
+ * Set up the scalable mode pasid table entry for second only or
+ * passthrough translation type.
+ */
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,

second_level doesn't imply passthrough. what about intel_pasid_
setup_common, which is then invoked by SL or PT individually (
or even FL)?

Fair enough. Will refine this part of code.


+                                  struct dmar_domain *domain,
+                                  struct device *dev, int pasid,
+                                  bool pass_through)
+{
+       struct pasid_entry *pte;
+       struct dma_pte *pgd;
+       u64 pgd_val;
+       int agaw;
+       u16 did;
+
+       /*
+        * If hardware advertises no support for second level translation,
+        * we only allow pass through translation setup.
+        */
+       if (!(ecap_slts(iommu->ecap) || pass_through)) {
+               pr_err("No first level translation support on %s, only pass-

first->second

Sure.


through mode allowed\n",
+                      iommu->name);
+               return -EINVAL;
+       }
+
+       /*
+        * Skip top levels of page tables for iommu which has less agaw

skip doesn't mean error

Yes. But it's an error if we can't skip ... :-)


+        * than default. Unnecessary for PT mode.
+        */
+       pgd = domain->pgd;
+       if (!pass_through) {
+               for (agaw = domain->agaw; agaw != iommu->agaw; agaw--)
{
+                       pgd = phys_to_virt(dma_pte_addr(pgd));
+                       if (!dma_pte_present(pgd)) {
+                               dev_err(dev, "Invalid domain page table\n");
+                               return -EINVAL;
+                       }
+               }
+       }
+       pgd_val = pass_through ? 0 : virt_to_phys(pgd);
+       did = pass_through ? FLPT_DEFAULT_DID :
+                       domain->iommu_did[iommu->seq_id];
+
+       pte = intel_pasid_get_entry(dev, pasid);
+       if (!pte) {
+               dev_err(dev, "Failed to get pasid entry of PASID %d\n",
pasid);
+               return -ENODEV;
+       }
+
+       pasid_clear_entry(pte);
+       pasid_set_domain_id(pte, did);
+
+       if (!pass_through)
+               pasid_set_address_root(pte, pgd_val);
+
+       pasid_set_address_width(pte, iommu->agaw);
+       pasid_set_translation_type(pte, pass_through ? 4 : 2);
+       pasid_set_fault_enable(pte);
+       pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+       /*
+        * Since it is a second level only translation setup, we should
+        * set SRE bit as well (addresses are expected to be GPAs).
+        */
+       pasid_set_sre(pte);
+       pasid_set_present(pte);
+
+       if (!ecap_coherent(iommu->ecap))
+               clflush_cache_range(pte, sizeof(*pte));
+
+       if (cap_caching_mode(iommu->cap)) {
+               pasid_based_pasid_cache_invalidation(iommu, did, pasid);
+               pasid_based_iotlb_cache_invalidation(iommu, did, pasid);
+       } else {
+               iommu_flush_write_buffer(iommu);
+       }
+
+       return 0;
+}
+
+/*
+ * Tear down the scalable mode pasid table entry for second only or
+ * passthrough translation type.
+ */
+void intel_pasid_tear_down_second_level(struct intel_iommu *iommu,
+                                       struct dmar_domain *domain,
+                                       struct device *dev, int pasid)
+{
+       u16 did = domain->iommu_did[iommu->seq_id];
+
+       tear_down_one_pasid_entry(iommu, dev, did, pasid);
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 03c1612d173c..85b158a1826a 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -49,5 +49,12 @@ struct pasid_table *intel_pasid_get_table(struct
device *dev);
  int intel_pasid_get_dev_max_id(struct device *dev);
  struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
  void intel_pasid_clear_entry(struct device *dev, int pasid);
+int intel_pasid_setup_second_level(struct intel_iommu *iommu,
+                                  struct dmar_domain *domain,
+                                  struct device *dev, int pasid,
+                                  bool pass_through);
+void intel_pasid_tear_down_second_level(struct intel_iommu *iommu,
+                                       struct dmar_domain *domain,
+                                       struct device *dev, int pasid);

  #endif /* __INTEL_PASID_H */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 72aff482b293..d77d23dfd221 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -115,6 +115,8 @@
   * Extended Capability Register
   */

+#define ecap_smpwc(e)          (((e) >> 48) & 0x1)
+#define ecap_slts(e)           (((e) >> 46) & 0x1)
  #define ecap_smts(e)          (((e) >> 43) & 0x1)
  #define ecap_dit(e)           ((e >> 41) & 0x1)
  #define ecap_pasid(e)         ((e >> 40) & 0x1)
@@ -571,6 +573,7 @@ void free_pgtable_page(void *vaddr);
  struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
  int for_each_device_domain(int (*fn)(struct device_domain_info *info,
                                     void *data), void *data);
+void iommu_flush_write_buffer(struct intel_iommu *iommu);

  #ifdef CONFIG_INTEL_IOMMU_SVM
  int intel_svm_init(struct intel_iommu *iommu);
--
2.17.1



Best regards,
Lu Baolu
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to