[RFC PATCH 5/5] iommu/virtio-iommu: Support attaching VT-d IO pgtable

2023-11-05 Thread Tina Zhang
Add VT-d IO page table support to ATTACH_TABLE request.

Signed-off-by: Tina Zhang 
---
 drivers/iommu/virtio-iommu.c  | 23 +++
 include/uapi/linux/virtio_iommu.h | 26 ++
 2 files changed, 49 insertions(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index b1ceaac974e2..b02eeb1d27a4 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -991,12 +991,25 @@ static int viommu_attach_pgtable(struct viommu_domain 
*vdomain,
};
 
/* TODO: bypass flag? */
+   if (vdomain->bypass == true)
+   return 0;
 
switch (fmt) {
case VIRT_IO_PGTABLE:
req.format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VIRT);
req.pgd = cpu_to_le64((u64)cfg->virt.pgd);
break;
+   case INTEL_IOMMU: {
+   struct virtio_iommu_req_attach_pgt_vtd *vtd_req =
+   (struct virtio_iommu_req_attach_pgt_vtd *)
+
+   vtd_req->format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VTD);
+   vtd_req->pgd = cpu_to_le64((u64)cfg->virt.pgd);
+   vtd_req->addr_width = cpu_to_le32(cfg->oas);
+   vtd_req->pasid = IOMMU_NO_PASID;
+   break;
+   }
+
default:
return -EINVAL;
};
@@ -1034,6 +1047,16 @@ static int viommu_setup_pgtable(struct viommu_domain 
*vdomain,
case VIRTIO_IOMMU_FORMAT_PGTF_VIRT:
fmt = VIRT_IO_PGTABLE;
break;
+   case VIRTIO_IOMMU_FORMAT_PGTF_VTD:
+   {
+   struct virtio_iommu_probe_pgt_vtd *vtd_desc =
+   (struct virtio_iommu_probe_pgt_vtd *)desc;
+
+   cfg.vtd_cfg.cap_reg = le64_to_cpu(vtd_desc->cap_reg);
+   cfg.vtd_cfg.ecap_reg = le64_to_cpu(vtd_desc->ecap_reg);
+   fmt = INTEL_IOMMU;
+   break;
+   }
default:
dev_warn(vdev->dev, "unsupported page table format 0x%x\n",
 le16_to_cpu(desc->format));
diff --git a/include/uapi/linux/virtio_iommu.h 
b/include/uapi/linux/virtio_iommu.h
index 656be1f3d926..17e0d5fcdd54 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -139,6 +139,22 @@ struct virtio_iommu_req_attach_pgt_virt {
struct virtio_iommu_req_tailtail;
 };
 
+/* Vt-d I/O Page Table Descriptor */
+struct virtio_iommu_req_attach_pgt_vtd {
+   struct virtio_iommu_req_headhead;
+   __le32  domain;
+   __le32  endpoint;
+   __le32  flags;
+   __le16  format;
+   __u8reserved[2];
+   __le32  pasid;
+   __le64  pgd;
+   __le64  fl_flags;
+   __le32  addr_width;
+   __u8reserved2[36];
+   struct virtio_iommu_req_tailtail;
+};
+
 #define VIRTIO_IOMMU_MAP_F_READ(1 << 0)
 #define VIRTIO_IOMMU_MAP_F_WRITE   (1 << 1)
 #define VIRTIO_IOMMU_MAP_F_MMIO(1 << 2)
@@ -224,6 +240,8 @@ struct virtio_iommu_probe_pasid_size {
 #define VIRTIO_IOMMU_FORMAT_PSTF_ARM_SMMU_V3   2
 /* Virt I/O page table format */
 #define VIRTIO_IOMMU_FORMAT_PGTF_VIRT  3
+/* VT-d I/O page table format */
+#define VIRTIO_IOMMU_FORMAT_PGTF_VTD   4
 
 struct virtio_iommu_probe_table_format {
struct virtio_iommu_probe_property  head;
@@ -231,6 +249,14 @@ struct virtio_iommu_probe_table_format {
__u8reserved[2];
 };
 
+struct virtio_iommu_probe_pgt_vtd {
+   struct virtio_iommu_probe_property  head;
+   __le16  format;
+   __u8reserved[2];
+   __le64  cap_reg;
+   __le64  ecap_reg;
+};
+
 struct virtio_iommu_req_probe {
struct virtio_iommu_req_headhead;
__le32  endpoint;
-- 
2.39.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 4/5] iommu/vt-d: Adapt alloc_pgtable interface to be used by others

2023-11-05 Thread Tina Zhang
The generic IO page table framework provides a set of interfaces for
invoking IO page table operations. Other entity (e.g., virtio-iommu
driver) can use the interface to ask VT-d driver to generate a VT-d
format IO page table. This patch adds the support.

Signed-off-by: Tina Zhang 
---
 drivers/iommu/intel/iommu.c | 69 +++--
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 80bd1993861c..d714e780a031 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5248,17 +5248,80 @@ static phys_addr_t pgtable_iova_to_phys(struct 
io_pgtable_ops *ops,
return intel_iommu_iova_to_phys(_domain->domain, iova);
 }
 
+static void __iommu_calculate_cfg(struct io_pgtable_cfg *cfg)
+{
+   unsigned long fl_sagaw, sl_sagaw, sagaw;
+   int agaw, addr_width;
+
+   fl_sagaw = BIT(2) | (cap_fl5lp_support(cfg->vtd_cfg.cap_reg) ? BIT(3) : 
0);
+   sl_sagaw = cap_sagaw(cfg->vtd_cfg.cap_reg);
+   sagaw = fl_sagaw & sl_sagaw;
+
+   for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); agaw >= 0; 
agaw--) {
+   if (test_bit(agaw, ))
+   break;
+   }
+
+   addr_width = agaw_to_width(agaw);
+   if (cfg->ias > addr_width)
+   cfg->ias = addr_width;
+   if (cfg->oas != addr_width)
+   cfg->oas = addr_width;
+}
+
 static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void 
*cookie)
 {
-   struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg);
+   struct dmar_io_pgtable *pgtable;
+   struct dmar_domain *domain;
+   int adjust_width;
+
+   /* Platform must have nested translation support */
+   if (!ecap_nest(cfg->vtd_cfg.ecap_reg))
+   return NULL;
+
+   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+   if (!domain)
+   return NULL;
+
+   domain->nid = NUMA_NO_NODE;
+   domain->use_first_level = true;
+   domain->has_iotlb_device = false;
+   INIT_LIST_HEAD(>devices);
+   spin_lock_init(>lock);
+   xa_init(>iommu_array);
+
+   /* calculate AGAW */
+   __iommu_calculate_cfg(cfg);
+   domain->gaw = cfg->ias;
+   adjust_width = guestwidth_to_adjustwidth(domain->gaw);
+   domain->agaw = width_to_agaw(adjust_width);
+
+   domain->iommu_coherency = ecap_smpwc(cfg->vtd_cfg.ecap_reg);
+   domain->force_snooping = true;
+   domain->iommu_superpage = cap_fl1gp_support(cfg->vtd_cfg.ecap_reg) ? 2 
: 1;
+   domain->max_addr = 0;
+
+   cfg->coherent_walk = domain->iommu_coherency;
+
+   pgtable = >dmar_iop;
 
+   /* always allocate the top pgd */
+   domain->pgd = alloc_pgtable_page(domain->nid, GFP_KERNEL);
+   if (!domain->pgd)
+   goto out_free_domain;
+   domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
+
+   cfg->virt.pgd = virt_to_phys(domain->pgd);
+   cfg->tlb = _ops;
pgtable->iop.ops.map_pages = pgtable_map_pages;
pgtable->iop.ops.unmap_pages = pgtable_unmap_pages;
pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys;
 
-   cfg->tlb = _ops;
-
return >iop;
+
+out_free_domain:
+   kfree(domain);
+   return NULL;
 }
 
 struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = {
-- 
2.39.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 3/5] iommu/io-pgtable: Introduce struct vtd_cfg

2023-11-05 Thread Tina Zhang
VT-d hardware cap/ecap information is needed for driver to generate VT-d
format IO page table. Add struct vtd_cfg to keep the info.

Signed-off-by: Tina Zhang 
---
 include/linux/io-pgtable.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index b2857c18f963..ae6a2e44b027 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -147,6 +147,11 @@ struct io_pgtable_cfg {
u32 n_ttbrs;
} apple_dart_cfg;
 
+   struct {
+   u64 cap_reg;
+   u64 ecap_reg;
+   } vtd_cfg;
+
struct {
dma_addr_t  pgd;
} virt;
-- 
2.39.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 1/5] iommu/virtio-iommu: Correct the values of granule and nr_pages

2023-11-05 Thread Tina Zhang
The value of granule is ilog2(pgsize). When the value of pgsize isn't
a power of two, granule would make pgsize less than the actual size of
pgsize. E.g., if pgsize = 0x6000 and granule = ilog2(gather->pgsize), then
granule = 0xe. 2^0xe = 0x4000 makes the pgsize (0x4000) smaller than the
actual pgsize (0x6000). Invalidating IOTLB with smaller range would lead
to cache incoherence. So, roundup pgsize value to the nearest power of 2
to make sure the granule won't make pgsize less than the actual size. The
value of "gather->end - gather->start + 1" also needs similar adjustment.

Signed-off-by: Tina Zhang 
---
 drivers/iommu/virtio-iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 08e310672e57..b1ceaac974e2 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -1289,8 +1289,8 @@ static void viommu_iotlb_sync(struct iommu_domain *domain,
if (!gather->pgsize)
return;
 
-   granule = ilog2(gather->pgsize);
-   nr_pages = (gather->end - gather->start + 1) >> granule;
+   granule = ilog2(__roundup_pow_of_two(gather->pgsize));
+   nr_pages = __roundup_pow_of_two(gather->end - gather->start + 
1) >> granule;
req = (struct virtio_iommu_req_invalidate) {
.head.type  = VIRTIO_IOMMU_T_INVALIDATE,
.inv_gran   = cpu_to_le16(VIRTIO_IOMMU_INVAL_G_VA),
-- 
2.39.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 0/5] virtio-iommu: Add VT-d IO page table

2023-11-05 Thread Tina Zhang
The proposal about virtio-iommu support page tables is being discussed in
the virtio-comment mailing list[1]. This patch-set based on Jean's
virtio-iommu/pgtables branch[2] tries to follow the proposal and add the
basic VT-d IO page table support to virtio-iommu.

On Intel platform with VT-d nested translation enabled, there are two
main benefits for enabling virtual IOMMU support VT-d IO page table:
1) Allowing vSVM (aka vSVA) usage. Virtual Shared Virtual Addressing
   (vSVA) allows the virtual processor and virtual device to use the
   same virtual addresses.
2) Accelerating DMA buffer map operation for vIOVA usage by removing
   the context switch on DMA buffer map operation.
(Note: this patch-set doesn't include the whole patch-set for enabling
vSVM on virtio-iommu, only includes the part for vIOVA case. However,
the vSVM enabling patch-set needs to base on this patch-set.)

There are three changes in this patch-set:
1) The first patch is a bug fixing patch that tries to resolve an issue
   about IOTLB invalidation request with incorrect page size.
2) The next 3 patches are about adding generic IO page table support to
   VT-d driver.
3) The last one introduces the VT-d page format table to virtio-iommu
   driver.

The patch-set is also available at github:
https://github.com/TinaZhangZW/linux/tree/vt-d-pgtable

The QEMU part is available here:
https://github.com/TinaZhangZW/qemu/tree/virtio-iommu/vt-d-pgtable


[1]:https://lists.oasis-open.org/archives/virtio-comment/202310/msg00018.html
[2]:https://jpbrucker.net/git/linux/log/?h=virtio-iommu/pgtables

Tina Zhang (5):
  iommu/virtio-iommu: Correct the values of granule and nr_pages
  iommu/vt-d: Add generic IO page table support
  iommu/io-pgtable: Introduce struct vtd_cfg
  iommu/vt-d: Adapt alloc_pgtable interface to be used by others
  iommu/virtio-iommu: Support attaching VT-d IO pgtable

 drivers/iommu/intel/Kconfig   |   1 +
 drivers/iommu/intel/iommu.c   | 157 ++
 drivers/iommu/intel/iommu.h   |   7 ++
 drivers/iommu/io-pgtable.c|   3 +
 drivers/iommu/virtio-iommu.c  |  27 -
 include/linux/io-pgtable.h|   7 ++
 include/uapi/linux/virtio_iommu.h |  26 +
 7 files changed, 226 insertions(+), 2 deletions(-)

-- 
2.39.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support

2023-11-05 Thread Tina Zhang
Add basic hook up code to implement generic IO page table framework.

Signed-off-by: Tina Zhang 
---
 drivers/iommu/intel/Kconfig |  1 +
 drivers/iommu/intel/iommu.c | 94 +
 drivers/iommu/intel/iommu.h |  7 +++
 drivers/iommu/io-pgtable.c  |  3 ++
 include/linux/io-pgtable.h  |  2 +
 5 files changed, 107 insertions(+)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f589..8334e7e50e69 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -15,6 +15,7 @@ config INTEL_IOMMU
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
+   select IOMMU_IO_PGTABLE
select NEED_DMA_MAP_STATE
select DMAR_TABLE
select SWIOTLB
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dbcdf7b95b9f..80bd1993861c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "iommu.h"
 #include "../dma-iommu.h"
@@ -67,6 +68,20 @@
 #define LEVEL_STRIDE   (9)
 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
 
+#define io_pgtable_cfg_to_dmar_pgtable(x) \
+   container_of((x), struct dmar_io_pgtable, pgtbl_cfg)
+
+#define io_pgtable_to_dmar_pgtable(x) \
+   container_of((x), struct dmar_io_pgtable, iop)
+
+#define io_pgtable_to_dmar_domain(x) \
+   container_of(io_pgtable_to_dmar_pgtable(x), \
+   struct dmar_domain, dmar_iop)
+
+#define io_pgtable_ops_to_dmar_domain(x) \
+   container_of(io_pgtable_to_dmar_pgtable(io_pgtable_ops_to_pgtable(x)), \
+   struct dmar_domain, dmar_iop)
+
 static inline int agaw_to_level(int agaw)
 {
return agaw + 2;
@@ -5171,3 +5186,82 @@ int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, 
u64 oa, u64 ob)
 
return ret;
 }
+
+static void flush_all(void *cookie)
+{
+}
+
+static void flush_walk(unsigned long iova, size_t size,
+  size_t granule, void *cookie)
+{
+}
+
+static void add_page(struct iommu_iotlb_gather *gather,
+unsigned long iova, size_t granule,
+void *cookie)
+{
+}
+
+static const struct iommu_flush_ops flush_ops = {
+   .tlb_flush_all  = flush_all,
+   .tlb_flush_walk = flush_walk,
+   .tlb_add_page   = add_page,
+};
+
+static void free_pgtable(struct io_pgtable *iop)
+{
+   struct dmar_domain *dmar_domain = io_pgtable_to_dmar_domain(iop);
+
+   if (dmar_domain->pgd) {
+   LIST_HEAD(freelist);
+
+   domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw), 
);
+   put_pages_list();
+   }
+}
+
+static int pgtable_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+phys_addr_t paddr, size_t pgsize, size_t pgcount,
+int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+   struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+   return intel_iommu_map_pages(_domain->domain, iova, paddr, pgsize,
+pgcount, iommu_prot, gfp, mapped);
+}
+
+static size_t pgtable_unmap_pages(struct io_pgtable_ops *ops, unsigned long 
iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
+{
+   struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+   return intel_iommu_unmap_pages(_domain->domain, iova, pgsize,
+  pgcount, gather);
+}
+
+static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops,
+   unsigned long iova)
+{
+   struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+   return intel_iommu_iova_to_phys(_domain->domain, iova);
+}
+
+static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void 
*cookie)
+{
+   struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg);
+
+   pgtable->iop.ops.map_pages = pgtable_map_pages;
+   pgtable->iop.ops.unmap_pages = pgtable_unmap_pages;
+   pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys;
+
+   cfg->tlb = _ops;
+
+   return >iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = {
+   .alloc = alloc_pgtable,
+   .free  = free_pgtable,
+};
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 8d0aac71c135..5207fea6477a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -579,6 +580,11 @@ struct iommu_domain_info {
 * to VT-d spec, section 9.3 */
 };
 
+struct dmar_io_pgtable {
+   struct io_pgtable_cfg   pgtbl_cfg;
+   struct io_pgtable