> From: Jan Beulich > Sent: Tuesday, January 11, 2022 12:32 AM > > ... depending on feature availability (and absence of quirks). > > Also make the page table dumping function aware of superpages. > > Signed-off-by: Jan Beulich <jbeul...@suse.com>
Reviewed-by: Kevin Tian <kevin.t...@intel.com> > --- > v3: Rename queue_free_pt()'s last parameter. Replace "level > 1" checks > where possible. Tighten assertion. > > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -826,18 +826,37 @@ static int __must_check iommu_flush_iotl > return iommu_flush_iotlb(d, INVALID_DFN, 0, 0); > } > > +static void queue_free_pt(struct domain *d, mfn_t mfn, unsigned int level) > +{ > + if ( level > 1 ) > + { > + struct dma_pte *pt = map_domain_page(mfn); > + unsigned int i; > + > + for ( i = 0; i < PTE_NUM; ++i ) > + if ( dma_pte_present(pt[i]) && !dma_pte_superpage(pt[i]) ) > + queue_free_pt(d, maddr_to_mfn(dma_pte_addr(pt[i])), > + level - 1); > + > + unmap_domain_page(pt); > + } > + > + iommu_queue_free_pgtable(d, mfn_to_page(mfn)); > +} > + > /* clear one page's page table */ > static int dma_pte_clear_one(struct domain *domain, daddr_t addr, > unsigned int order, > unsigned int *flush_flags) > { > struct domain_iommu *hd = dom_iommu(domain); > - struct dma_pte *page = NULL, *pte = NULL; > + struct dma_pte *page = NULL, *pte = NULL, old; > u64 pg_maddr; > + unsigned int level = (order / LEVEL_STRIDE) + 1; > > spin_lock(&hd->arch.mapping_lock); > - /* get last level pte */ > - pg_maddr = addr_to_dma_page_maddr(domain, addr, 1, flush_flags, > false); > + /* get target level pte */ > + pg_maddr = addr_to_dma_page_maddr(domain, addr, level, flush_flags, > false); > if ( pg_maddr < PAGE_SIZE ) > { > spin_unlock(&hd->arch.mapping_lock); > @@ -845,7 +864,7 @@ static int dma_pte_clear_one(struct doma > } > > page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); > - pte = page + address_level_offset(addr, 1); > + pte = &page[address_level_offset(addr, level)]; > > if ( !dma_pte_present(*pte) ) > { > @@ -854,14 +873,20 @@ static int dma_pte_clear_one(struct doma > return 0; > } > > + old = *pte; > dma_clear_pte(*pte); > - *flush_flags |= IOMMU_FLUSHF_modified; > > spin_unlock(&hd->arch.mapping_lock); > iommu_sync_cache(pte, sizeof(struct dma_pte)); > > unmap_vtd_domain_page(page); > > + *flush_flags |= IOMMU_FLUSHF_modified; > + > + if ( order && !dma_pte_superpage(old) ) > + queue_free_pt(domain, maddr_to_mfn(dma_pte_addr(old)), > + order / LEVEL_STRIDE); > + > return 0; > } > > @@ -1952,6 +1977,7 @@ static int __must_check intel_iommu_map_ > struct domain_iommu *hd = dom_iommu(d); > struct dma_pte *page, *pte, old, new = {}; > u64 pg_maddr; > + unsigned int level = (IOMMUF_order(flags) / LEVEL_STRIDE) + 1; > int rc = 0; > > /* Do nothing if VT-d shares EPT page table */ > @@ -1976,7 +2002,7 @@ static int __must_check intel_iommu_map_ > return 0; > } > > - pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1, > flush_flags, > + pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), level, > flush_flags, > true); > if ( pg_maddr < PAGE_SIZE ) > { > @@ -1985,13 +2011,15 @@ static int __must_check intel_iommu_map_ > } > > page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); > - pte = &page[dfn_x(dfn) & LEVEL_MASK]; > + pte = &page[address_level_offset(dfn_to_daddr(dfn), level)]; > old = *pte; > > dma_set_pte_addr(new, mfn_to_maddr(mfn)); > dma_set_pte_prot(new, > ((flags & IOMMUF_readable) ? DMA_PTE_READ : 0) | > ((flags & IOMMUF_writable) ? DMA_PTE_WRITE : 0)); > + if ( IOMMUF_order(flags) ) > + dma_set_pte_superpage(new); > > /* Set the SNP on leaf page table if Snoop Control available */ > if ( iommu_snoop ) > @@ -2012,8 +2040,14 @@ static int __must_check intel_iommu_map_ > > *flush_flags |= IOMMU_FLUSHF_added; > if ( dma_pte_present(old) ) > + { > *flush_flags |= IOMMU_FLUSHF_modified; > > + if ( IOMMUF_order(flags) && !dma_pte_superpage(old) ) > + queue_free_pt(d, maddr_to_mfn(dma_pte_addr(old)), > + IOMMUF_order(flags) / LEVEL_STRIDE); > + } > + > return rc; > } > > @@ -2370,6 +2404,7 @@ static int __init vtd_setup(void) > { > struct acpi_drhd_unit *drhd; > struct vtd_iommu *iommu; > + unsigned int large_sizes = PAGE_SIZE_2M | PAGE_SIZE_1G; > int ret; > bool reg_inval_supported = true; > > @@ -2412,6 +2447,11 @@ static int __init vtd_setup(void) > cap_sps_2mb(iommu->cap) ? ", 2MB" : "", > cap_sps_1gb(iommu->cap) ? ", 1GB" : ""); > > + if ( !cap_sps_2mb(iommu->cap) ) > + large_sizes &= ~PAGE_SIZE_2M; > + if ( !cap_sps_1gb(iommu->cap) ) > + large_sizes &= ~PAGE_SIZE_1G; > + > #ifndef iommu_snoop > if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) ) > iommu_snoop = false; > @@ -2483,6 +2523,9 @@ static int __init vtd_setup(void) > if ( ret ) > goto error; > > + ASSERT(iommu_ops.page_sizes == PAGE_SIZE_4K); > + iommu_ops.page_sizes |= large_sizes; > + > register_keyhandler('V', vtd_dump_iommu_info, "dump iommu info", 1); > > return 0; > @@ -2797,7 +2840,7 @@ static void vtd_dump_page_table_level(pa > continue; > > address = gpa + offset_level_address(i, level); > - if ( next_level >= 1 ) > + if ( next_level && !dma_pte_superpage(*pte) ) > vtd_dump_page_table_level(dma_pte_addr(*pte), next_level, > address, indent + 1); > else >