[RFC PATCH 5/5] iommu/virtio-iommu: Support attaching VT-d IO pgtable
Add VT-d IO page table support to ATTACH_TABLE request. Signed-off-by: Tina Zhang --- drivers/iommu/virtio-iommu.c | 23 +++ include/uapi/linux/virtio_iommu.h | 26 ++ 2 files changed, 49 insertions(+) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index b1ceaac974e2..b02eeb1d27a4 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -991,12 +991,25 @@ static int viommu_attach_pgtable(struct viommu_domain *vdomain, }; /* TODO: bypass flag? */ + if (vdomain->bypass == true) + return 0; switch (fmt) { case VIRT_IO_PGTABLE: req.format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VIRT); req.pgd = cpu_to_le64((u64)cfg->virt.pgd); break; + case INTEL_IOMMU: { + struct virtio_iommu_req_attach_pgt_vtd *vtd_req = + (struct virtio_iommu_req_attach_pgt_vtd *) + + vtd_req->format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VTD); + vtd_req->pgd = cpu_to_le64((u64)cfg->virt.pgd); + vtd_req->addr_width = cpu_to_le32(cfg->oas); + vtd_req->pasid = IOMMU_NO_PASID; + break; + } + default: return -EINVAL; }; @@ -1034,6 +1047,16 @@ static int viommu_setup_pgtable(struct viommu_domain *vdomain, case VIRTIO_IOMMU_FORMAT_PGTF_VIRT: fmt = VIRT_IO_PGTABLE; break; + case VIRTIO_IOMMU_FORMAT_PGTF_VTD: + { + struct virtio_iommu_probe_pgt_vtd *vtd_desc = + (struct virtio_iommu_probe_pgt_vtd *)desc; + + cfg.vtd_cfg.cap_reg = le64_to_cpu(vtd_desc->cap_reg); + cfg.vtd_cfg.ecap_reg = le64_to_cpu(vtd_desc->ecap_reg); + fmt = INTEL_IOMMU; + break; + } default: dev_warn(vdev->dev, "unsupported page table format 0x%x\n", le16_to_cpu(desc->format)); diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 656be1f3d926..17e0d5fcdd54 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -139,6 +139,22 @@ struct virtio_iommu_req_attach_pgt_virt { struct virtio_iommu_req_tailtail; }; +/* Vt-d I/O Page Table Descriptor */ +struct virtio_iommu_req_attach_pgt_vtd { + struct virtio_iommu_req_headhead; + __le32 domain; + __le32 endpoint; + __le32 flags; + __le16 format; + __u8reserved[2]; + __le32 pasid; + __le64 pgd; + __le64 fl_flags; + __le32 addr_width; + __u8reserved2[36]; + struct virtio_iommu_req_tailtail; +}; + #define VIRTIO_IOMMU_MAP_F_READ(1 << 0) #define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1) #define VIRTIO_IOMMU_MAP_F_MMIO(1 << 2) @@ -224,6 +240,8 @@ struct virtio_iommu_probe_pasid_size { #define VIRTIO_IOMMU_FORMAT_PSTF_ARM_SMMU_V3 2 /* Virt I/O page table format */ #define VIRTIO_IOMMU_FORMAT_PGTF_VIRT 3 +/* VT-d I/O page table format */ +#define VIRTIO_IOMMU_FORMAT_PGTF_VTD 4 struct virtio_iommu_probe_table_format { struct virtio_iommu_probe_property head; @@ -231,6 +249,14 @@ struct virtio_iommu_probe_table_format { __u8reserved[2]; }; +struct virtio_iommu_probe_pgt_vtd { + struct virtio_iommu_probe_property head; + __le16 format; + __u8reserved[2]; + __le64 cap_reg; + __le64 ecap_reg; +}; + struct virtio_iommu_req_probe { struct virtio_iommu_req_headhead; __le32 endpoint; -- 2.39.3 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[RFC PATCH 4/5] iommu/vt-d: Adapt alloc_pgtable interface to be used by others
The generic IO page table framework provides a set of interfaces for invoking IO page table operations. Other entity (e.g., virtio-iommu driver) can use the interface to ask VT-d driver to generate a VT-d format IO page table. This patch adds the support. Signed-off-by: Tina Zhang --- drivers/iommu/intel/iommu.c | 69 +++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 80bd1993861c..d714e780a031 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5248,17 +5248,80 @@ static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops, return intel_iommu_iova_to_phys(_domain->domain, iova); } +static void __iommu_calculate_cfg(struct io_pgtable_cfg *cfg) +{ + unsigned long fl_sagaw, sl_sagaw, sagaw; + int agaw, addr_width; + + fl_sagaw = BIT(2) | (cap_fl5lp_support(cfg->vtd_cfg.cap_reg) ? BIT(3) : 0); + sl_sagaw = cap_sagaw(cfg->vtd_cfg.cap_reg); + sagaw = fl_sagaw & sl_sagaw; + + for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); agaw >= 0; agaw--) { + if (test_bit(agaw, )) + break; + } + + addr_width = agaw_to_width(agaw); + if (cfg->ias > addr_width) + cfg->ias = addr_width; + if (cfg->oas != addr_width) + cfg->oas = addr_width; +} + static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { - struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg); + struct dmar_io_pgtable *pgtable; + struct dmar_domain *domain; + int adjust_width; + + /* Platform must have nested translation support */ + if (!ecap_nest(cfg->vtd_cfg.ecap_reg)) + return NULL; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + domain->nid = NUMA_NO_NODE; + domain->use_first_level = true; + domain->has_iotlb_device = false; + INIT_LIST_HEAD(>devices); + spin_lock_init(>lock); + xa_init(>iommu_array); + + /* calculate AGAW */ + __iommu_calculate_cfg(cfg); + domain->gaw = cfg->ias; + adjust_width = guestwidth_to_adjustwidth(domain->gaw); + domain->agaw = width_to_agaw(adjust_width); + + domain->iommu_coherency = ecap_smpwc(cfg->vtd_cfg.ecap_reg); + domain->force_snooping = true; + domain->iommu_superpage = cap_fl1gp_support(cfg->vtd_cfg.ecap_reg) ? 2 : 1; + domain->max_addr = 0; + + cfg->coherent_walk = domain->iommu_coherency; + + pgtable = >dmar_iop; + /* always allocate the top pgd */ + domain->pgd = alloc_pgtable_page(domain->nid, GFP_KERNEL); + if (!domain->pgd) + goto out_free_domain; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + + cfg->virt.pgd = virt_to_phys(domain->pgd); + cfg->tlb = _ops; pgtable->iop.ops.map_pages = pgtable_map_pages; pgtable->iop.ops.unmap_pages = pgtable_unmap_pages; pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys; - cfg->tlb = _ops; - return >iop; + +out_free_domain: + kfree(domain); + return NULL; } struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = { -- 2.39.3 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[RFC PATCH 3/5] iommu/io-pgtable: Introduce struct vtd_cfg
VT-d hardware cap/ecap information is needed for driver to generate VT-d format IO page table. Add struct vtd_cfg to keep the info. Signed-off-by: Tina Zhang --- include/linux/io-pgtable.h | 5 + 1 file changed, 5 insertions(+) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b2857c18f963..ae6a2e44b027 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -147,6 +147,11 @@ struct io_pgtable_cfg { u32 n_ttbrs; } apple_dart_cfg; + struct { + u64 cap_reg; + u64 ecap_reg; + } vtd_cfg; + struct { dma_addr_t pgd; } virt; -- 2.39.3 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[RFC PATCH 1/5] iommu/virtio-iommu: Correct the values of granule and nr_pages
The value of granule is ilog2(pgsize). When the value of pgsize isn't a power of two, granule would make pgsize less than the actual size of pgsize. E.g., if pgsize = 0x6000 and granule = ilog2(gather->pgsize), then granule = 0xe. 2^0xe = 0x4000 makes the pgsize (0x4000) smaller than the actual pgsize (0x6000). Invalidating IOTLB with smaller range would lead to cache incoherence. So, roundup pgsize value to the nearest power of 2 to make sure the granule won't make pgsize less than the actual size. The value of "gather->end - gather->start + 1" also needs similar adjustment. Signed-off-by: Tina Zhang --- drivers/iommu/virtio-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 08e310672e57..b1ceaac974e2 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1289,8 +1289,8 @@ static void viommu_iotlb_sync(struct iommu_domain *domain, if (!gather->pgsize) return; - granule = ilog2(gather->pgsize); - nr_pages = (gather->end - gather->start + 1) >> granule; + granule = ilog2(__roundup_pow_of_two(gather->pgsize)); + nr_pages = __roundup_pow_of_two(gather->end - gather->start + 1) >> granule; req = (struct virtio_iommu_req_invalidate) { .head.type = VIRTIO_IOMMU_T_INVALIDATE, .inv_gran = cpu_to_le16(VIRTIO_IOMMU_INVAL_G_VA), -- 2.39.3 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[RFC PATCH 0/5] virtio-iommu: Add VT-d IO page table
The proposal about virtio-iommu support page tables is being discussed in the virtio-comment mailing list[1]. This patch-set based on Jean's virtio-iommu/pgtables branch[2] tries to follow the proposal and add the basic VT-d IO page table support to virtio-iommu. On Intel platform with VT-d nested translation enabled, there are two main benefits for enabling virtual IOMMU support VT-d IO page table: 1) Allowing vSVM (aka vSVA) usage. Virtual Shared Virtual Addressing (vSVA) allows the virtual processor and virtual device to use the same virtual addresses. 2) Accelerating DMA buffer map operation for vIOVA usage by removing the context switch on DMA buffer map operation. (Note: this patch-set doesn't include the whole patch-set for enabling vSVM on virtio-iommu, only includes the part for vIOVA case. However, the vSVM enabling patch-set needs to base on this patch-set.) There are three changes in this patch-set: 1) The first patch is a bug fixing patch that tries to resolve an issue about IOTLB invalidation request with incorrect page size. 2) The next 3 patches are about adding generic IO page table support to VT-d driver. 3) The last one introduces the VT-d page format table to virtio-iommu driver. The patch-set is also available at github: https://github.com/TinaZhangZW/linux/tree/vt-d-pgtable The QEMU part is available here: https://github.com/TinaZhangZW/qemu/tree/virtio-iommu/vt-d-pgtable [1]:https://lists.oasis-open.org/archives/virtio-comment/202310/msg00018.html [2]:https://jpbrucker.net/git/linux/log/?h=virtio-iommu/pgtables Tina Zhang (5): iommu/virtio-iommu: Correct the values of granule and nr_pages iommu/vt-d: Add generic IO page table support iommu/io-pgtable: Introduce struct vtd_cfg iommu/vt-d: Adapt alloc_pgtable interface to be used by others iommu/virtio-iommu: Support attaching VT-d IO pgtable drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 157 ++ drivers/iommu/intel/iommu.h | 7 ++ drivers/iommu/io-pgtable.c| 3 + drivers/iommu/virtio-iommu.c | 27 - include/linux/io-pgtable.h| 7 ++ include/uapi/linux/virtio_iommu.h | 26 + 7 files changed, 226 insertions(+), 2 deletions(-) -- 2.39.3 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
Add basic hook up code to implement generic IO page table framework. Signed-off-by: Tina Zhang --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 94 + drivers/iommu/intel/iommu.h | 7 +++ drivers/iommu/io-pgtable.c | 3 ++ include/linux/io-pgtable.h | 2 + 5 files changed, 107 insertions(+) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 2e56bd79f589..8334e7e50e69 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -15,6 +15,7 @@ config INTEL_IOMMU select DMA_OPS select IOMMU_API select IOMMU_IOVA + select IOMMU_IO_PGTABLE select NEED_DMA_MAP_STATE select DMAR_TABLE select SWIOTLB diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dbcdf7b95b9f..80bd1993861c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "iommu.h" #include "../dma-iommu.h" @@ -67,6 +68,20 @@ #define LEVEL_STRIDE (9) #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) +#define io_pgtable_cfg_to_dmar_pgtable(x) \ + container_of((x), struct dmar_io_pgtable, pgtbl_cfg) + +#define io_pgtable_to_dmar_pgtable(x) \ + container_of((x), struct dmar_io_pgtable, iop) + +#define io_pgtable_to_dmar_domain(x) \ + container_of(io_pgtable_to_dmar_pgtable(x), \ + struct dmar_domain, dmar_iop) + +#define io_pgtable_ops_to_dmar_domain(x) \ + container_of(io_pgtable_to_dmar_pgtable(io_pgtable_ops_to_pgtable(x)), \ + struct dmar_domain, dmar_iop) + static inline int agaw_to_level(int agaw) { return agaw + 2; @@ -5171,3 +5186,82 @@ int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) return ret; } + +static void flush_all(void *cookie) +{ +} + +static void flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void add_page(struct iommu_iotlb_gather *gather, +unsigned long iova, size_t granule, +void *cookie) +{ +} + +static const struct iommu_flush_ops flush_ops = { + .tlb_flush_all = flush_all, + .tlb_flush_walk = flush_walk, + .tlb_add_page = add_page, +}; + +static void free_pgtable(struct io_pgtable *iop) +{ + struct dmar_domain *dmar_domain = io_pgtable_to_dmar_domain(iop); + + if (dmar_domain->pgd) { + LIST_HEAD(freelist); + + domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw), ); + put_pages_list(); + } +} + +static int pgtable_map_pages(struct io_pgtable_ops *ops, unsigned long iova, +phys_addr_t paddr, size_t pgsize, size_t pgcount, +int iommu_prot, gfp_t gfp, size_t *mapped) +{ + struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops); + + return intel_iommu_map_pages(_domain->domain, iova, paddr, pgsize, +pgcount, iommu_prot, gfp, mapped); +} + +static size_t pgtable_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops); + + return intel_iommu_unmap_pages(_domain->domain, iova, pgsize, + pgcount, gather); +} + +static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops, + unsigned long iova) +{ + struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops); + + return intel_iommu_iova_to_phys(_domain->domain, iova); +} + +static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg); + + pgtable->iop.ops.map_pages = pgtable_map_pages; + pgtable->iop.ops.unmap_pages = pgtable_unmap_pages; + pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys; + + cfg->tlb = _ops; + + return >iop; +} + +struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = { + .alloc = alloc_pgtable, + .free = free_pgtable, +}; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 8d0aac71c135..5207fea6477a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -579,6 +580,11 @@ struct iommu_domain_info { * to VT-d spec, section 9.3 */ }; +struct dmar_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable