> From: Jan Beulich
> Sent: Tuesday, January 11, 2022 12:32 AM
> 
> ... depending on feature availability (and absence of quirks).
> 
> Also make the page table dumping function aware of superpages.
> 
> Signed-off-by: Jan Beulich <jbeul...@suse.com>

Reviewed-by: Kevin Tian <kevin.t...@intel.com>

> ---
> v3: Rename queue_free_pt()'s last parameter. Replace "level > 1" checks
>     where possible. Tighten assertion.
> 
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -826,18 +826,37 @@ static int __must_check iommu_flush_iotl
>      return iommu_flush_iotlb(d, INVALID_DFN, 0, 0);
>  }
> 
> +static void queue_free_pt(struct domain *d, mfn_t mfn, unsigned int level)
> +{
> +    if ( level > 1 )
> +    {
> +        struct dma_pte *pt = map_domain_page(mfn);
> +        unsigned int i;
> +
> +        for ( i = 0; i < PTE_NUM; ++i )
> +            if ( dma_pte_present(pt[i]) && !dma_pte_superpage(pt[i]) )
> +                queue_free_pt(d, maddr_to_mfn(dma_pte_addr(pt[i])),
> +                              level - 1);
> +
> +        unmap_domain_page(pt);
> +    }
> +
> +    iommu_queue_free_pgtable(d, mfn_to_page(mfn));
> +}
> +
>  /* clear one page's page table */
>  static int dma_pte_clear_one(struct domain *domain, daddr_t addr,
>                               unsigned int order,
>                               unsigned int *flush_flags)
>  {
>      struct domain_iommu *hd = dom_iommu(domain);
> -    struct dma_pte *page = NULL, *pte = NULL;
> +    struct dma_pte *page = NULL, *pte = NULL, old;
>      u64 pg_maddr;
> +    unsigned int level = (order / LEVEL_STRIDE) + 1;
> 
>      spin_lock(&hd->arch.mapping_lock);
> -    /* get last level pte */
> -    pg_maddr = addr_to_dma_page_maddr(domain, addr, 1, flush_flags,
> false);
> +    /* get target level pte */
> +    pg_maddr = addr_to_dma_page_maddr(domain, addr, level, flush_flags,
> false);
>      if ( pg_maddr < PAGE_SIZE )
>      {
>          spin_unlock(&hd->arch.mapping_lock);
> @@ -845,7 +864,7 @@ static int dma_pte_clear_one(struct doma
>      }
> 
>      page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
> -    pte = page + address_level_offset(addr, 1);
> +    pte = &page[address_level_offset(addr, level)];
> 
>      if ( !dma_pte_present(*pte) )
>      {
> @@ -854,14 +873,20 @@ static int dma_pte_clear_one(struct doma
>          return 0;
>      }
> 
> +    old = *pte;
>      dma_clear_pte(*pte);
> -    *flush_flags |= IOMMU_FLUSHF_modified;
> 
>      spin_unlock(&hd->arch.mapping_lock);
>      iommu_sync_cache(pte, sizeof(struct dma_pte));
> 
>      unmap_vtd_domain_page(page);
> 
> +    *flush_flags |= IOMMU_FLUSHF_modified;
> +
> +    if ( order && !dma_pte_superpage(old) )
> +        queue_free_pt(domain, maddr_to_mfn(dma_pte_addr(old)),
> +                      order / LEVEL_STRIDE);
> +
>      return 0;
>  }
> 
> @@ -1952,6 +1977,7 @@ static int __must_check intel_iommu_map_
>      struct domain_iommu *hd = dom_iommu(d);
>      struct dma_pte *page, *pte, old, new = {};
>      u64 pg_maddr;
> +    unsigned int level = (IOMMUF_order(flags) / LEVEL_STRIDE) + 1;
>      int rc = 0;
> 
>      /* Do nothing if VT-d shares EPT page table */
> @@ -1976,7 +2002,7 @@ static int __must_check intel_iommu_map_
>          return 0;
>      }
> 
> -    pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1,
> flush_flags,
> +    pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), level,
> flush_flags,
>                                        true);
>      if ( pg_maddr < PAGE_SIZE )
>      {
> @@ -1985,13 +2011,15 @@ static int __must_check intel_iommu_map_
>      }
> 
>      page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
> -    pte = &page[dfn_x(dfn) & LEVEL_MASK];
> +    pte = &page[address_level_offset(dfn_to_daddr(dfn), level)];
>      old = *pte;
> 
>      dma_set_pte_addr(new, mfn_to_maddr(mfn));
>      dma_set_pte_prot(new,
>                       ((flags & IOMMUF_readable) ? DMA_PTE_READ  : 0) |
>                       ((flags & IOMMUF_writable) ? DMA_PTE_WRITE : 0));
> +    if ( IOMMUF_order(flags) )
> +        dma_set_pte_superpage(new);
> 
>      /* Set the SNP on leaf page table if Snoop Control available */
>      if ( iommu_snoop )
> @@ -2012,8 +2040,14 @@ static int __must_check intel_iommu_map_
> 
>      *flush_flags |= IOMMU_FLUSHF_added;
>      if ( dma_pte_present(old) )
> +    {
>          *flush_flags |= IOMMU_FLUSHF_modified;
> 
> +        if ( IOMMUF_order(flags) && !dma_pte_superpage(old) )
> +            queue_free_pt(d, maddr_to_mfn(dma_pte_addr(old)),
> +                          IOMMUF_order(flags) / LEVEL_STRIDE);
> +    }
> +
>      return rc;
>  }
> 
> @@ -2370,6 +2404,7 @@ static int __init vtd_setup(void)
>  {
>      struct acpi_drhd_unit *drhd;
>      struct vtd_iommu *iommu;
> +    unsigned int large_sizes = PAGE_SIZE_2M | PAGE_SIZE_1G;
>      int ret;
>      bool reg_inval_supported = true;
> 
> @@ -2412,6 +2447,11 @@ static int __init vtd_setup(void)
>                 cap_sps_2mb(iommu->cap) ? ", 2MB" : "",
>                 cap_sps_1gb(iommu->cap) ? ", 1GB" : "");
> 
> +        if ( !cap_sps_2mb(iommu->cap) )
> +            large_sizes &= ~PAGE_SIZE_2M;
> +        if ( !cap_sps_1gb(iommu->cap) )
> +            large_sizes &= ~PAGE_SIZE_1G;
> +
>  #ifndef iommu_snoop
>          if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
>              iommu_snoop = false;
> @@ -2483,6 +2523,9 @@ static int __init vtd_setup(void)
>      if ( ret )
>          goto error;
> 
> +    ASSERT(iommu_ops.page_sizes == PAGE_SIZE_4K);
> +    iommu_ops.page_sizes |= large_sizes;
> +
>      register_keyhandler('V', vtd_dump_iommu_info, "dump iommu info", 1);
> 
>      return 0;
> @@ -2797,7 +2840,7 @@ static void vtd_dump_page_table_level(pa
>              continue;
> 
>          address = gpa + offset_level_address(i, level);
> -        if ( next_level >= 1 )
> +        if ( next_level && !dma_pte_superpage(*pte) )
>              vtd_dump_page_table_level(dma_pte_addr(*pte), next_level,
>                                        address, indent + 1);
>          else
> 

Reply via email to