On Wed, 2026-05-27 at 01:46 -0400, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
> 
> 
> Structure VTDAddressSpace includes some elements suitable for emulated  
> device and passthrough device without PASID, e.g., address space,  
> different memory regions, etc, it is also protected by vtd iommu lock,  
> all these are useless and become a burden for passthrough device with  
> PASID.
> 
> When there are lots of PASIDs used in one device, the AS and MRs are  
> all registered to memory core and impact the whole system performance.
> 
> So instead of using VTDAddressSpace to cache pasid entry for each pasid  
> of a passthrough device, we define a light weight structure  
> VTDAccelPASIDCacheEntry with only necessary elements for each pasid. We  
> will use this struct as a parameter to conduct binding/unbinding to  
> nested hwpt and to record the current bound nested hwpt. It's also  
> designed to support IOMMU_NO_PASID.
> 
> VTDAccelPASIDCacheEntry is designed to only be used in intel_iommu_accel.c,  

For consistency with next line:  
s@intel_iommu_accel.c@hw/i386/intel_iommu_accel.c

> similarly VTDPASIDCacheEntry should only be used in hw/i386/intel_iommu.c
> 
> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc  
> (pasid cache invalidation) request, walk through each pasid in each  
> passthrough device for valid pasid entries, create a new  
> VTDAccelPASIDCacheEntry if not existing yet.
> 
> IOMMU_NO_PASID of passthrough device still need to register MRs in case  
> guest does not operate in scalable mode. So for IOMMU_NO_PASID, we have  
> both VTDPASIDCacheEntry and VTDAccelPASIDCacheEntry.
> 
> Co-developed-by: Yi Liu <[[email protected]](mailto:[email protected])>  
> Signed-off-by: Yi Liu <[[email protected]](mailto:[email protected])>  
> Signed-off-by: Zhenzhong Duan 
> <[[email protected]](mailto:[email protected])>  
> Tested-by: Xudong Hao <[[email protected]](mailto:[email protected])>  
> ---  
>  hw/i386/intel_iommu_accel.h    |  13 +++  
>  hw/i386/intel_iommu_internal.h |   8 ++  
>  hw/i386/intel_iommu.c          |   3 +  
>  hw/i386/intel_iommu_accel.c    | 156 +++++++++++++++++++++++++++++++++  
>  4 files changed, 180 insertions(+)
> 
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h  
> index e5f0b077b4..c9b1823745 100644  
> --- a/hw/i386/intel_iommu_accel.h  
> +++ b/hw/i386/intel_iommu_accel.h  
> @@ -12,6 +12,13 @@  
>  #define HW_I386_INTEL_IOMMU_ACCEL_H  
>  #include CONFIG_DEVICES
> 
> +typedef struct VTDAccelPASIDCacheEntry {  
> +    VTDHostIOMMUDevice *vtd_hiod;  
> +    VTDPASIDEntry pasid_entry;  
> +    uint32_t pasid;  
> +    QLIST_ENTRY(VTDAccelPASIDCacheEntry) next;  
> +} VTDAccelPASIDCacheEntry;  
> +  
>  #ifdef CONFIG_VTD_ACCEL  
>  bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,  
>                            Error **errp);  
> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, 
> Error **errp);  
>  void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t 
> domain_id,  
>                                        uint32_t pasid, hwaddr addr,  
>                                        uint64_t npages, bool ih);  
> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo 
> *pc_info);  
>  void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);  
>  #else  
>  static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,  
> @@ -49,6 +57,11 @@ static inline void 
> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,  
>  {  
>  }
> 
> +static inline void vtd_accel_pasid_cache_sync(IntelIOMMUState *s,  
> +                                              VTDPASIDCacheInfo *pc_info)  
> +{  
> +}  
> +  
>  static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)  
>  {  
>  }  
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h  
> index ee59df09f8..9307825017 100644  
> --- a/hw/i386/intel_iommu_internal.h  
> +++ b/hw/i386/intel_iommu_internal.h  
> @@ -615,6 +615,7 @@ typedef struct VTDRootEntry VTDRootEntry;  
>  #define VTD_CTX_ENTRY_LEGACY_SIZE     16  
>  #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
> 
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)  
>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))  
>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL  
>  #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL  
> @@ -645,6 +646,7 @@ typedef struct VTDPIOTLBInvInfo {  
>  #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)  
>  #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) & 
> VTD_PASID_DIR_BITS_MASK)  
>  #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing 
> Disable */  
> +#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)  
>  #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)  
>  #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)  
>  #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing 
> Disable */  
> @@ -710,6 +712,7 @@ typedef struct VTDHostIOMMUDevice {  
>      PCIBus *bus;  
>      uint8_t devfn;  
>      HostIOMMUDevice *hiod;  
> +    QLIST_HEAD(, VTDAccelPASIDCacheEntry) pasid_cache_list;  
>  } VTDHostIOMMUDevice;
> 
>  /*  
> @@ -767,6 +770,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry 
> *p1, VTDPASIDEntry *p2)  
>      return memcmp(p1, p2, sizeof(*p1));  
>  }
> 
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)  
> +{  
> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);  
> +}  
> +  
>  int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid, 
>  
>                                    VTDPASIDDirEntry *pdire);  
>  int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,  
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c  
> index 2deea8fbe2..94a6a740f1 100644  
> --- a/hw/i386/intel_iommu.c  
> +++ b/hw/i386/intel_iommu.c  
> @@ -3181,6 +3181,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, 
> VTDPASIDCacheInfo *pc_info)  
>      g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked, 
>  
>                           pc_info);  
>      vtd_iommu_unlock(s);  
> +  
> +    vtd_accel_pasid_cache_sync(s, pc_info);  
>  }
> 
>  static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)  
> @@ -4751,6 +4753,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void 
> *opaque, int devfn,  
>      vtd_hiod->devfn = (uint8_t)devfn;  
>      vtd_hiod->iommu_state = s;  
>      vtd_hiod->hiod = hiod;  
> +    QLIST_INIT(&vtd_hiod->pasid_cache_list);
> 
>      if (!vtd_check_hiod(s, vtd_hiod, errp)) {  
>          g_free(vtd_hiod);  
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c  
> index 10bdbba632..a66d63b4c8 100644  
> --- a/hw/i386/intel_iommu_accel.c  
> +++ b/hw/i386/intel_iommu_accel.c  
> @@ -259,6 +259,162 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState 
> *s, uint16_t domain_id,  
>                           vtd_flush_host_piotlb_locked, &piotlb_info);  
>  }
> 
> +static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,  
> +                              VTDPASIDEntry *pe)  
> +{  
> +    VTDAccelPASIDCacheEntry *vtd_pce;  
> +  
> +    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {  
> +        if (vtd_pce->pasid == pasid) {  
> +            if (vtd_pasid_entry_compare(pe, &vtd_pce->pasid_entry)) {  
> +                vtd_pce->pasid_entry = *pe;  
> +            }  
> +            return;  
> +        }  
> +    }  
> +  
> +    vtd_pce = g_malloc0(sizeof(VTDAccelPASIDCacheEntry));  
> +    vtd_pce->vtd_hiod = vtd_hiod;  
> +    vtd_pce->pasid = pasid;  
> +    vtd_pce->pasid_entry = *pe;  
> +    QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);  
> +}  
> +  
> +/*  
> + * This function walks over PASID range within [start, end) in a single  
> + * PASID table for entries matching @info type/did, then create  
> + * VTDAccelPASIDCacheEntry if not exist yet.  
> + */  
> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,  
> +                                        dma_addr_t pt_base, int start, int 
> end,  
> +                                        VTDPASIDCacheInfo *info)  
> +{  
> +    IntelIOMMUState *s = vtd_hiod->iommu_state;  
> +    VTDPASIDEntry pe;  
> +    int pasid;  
> +  
> +    for (pasid = start; pasid < end; pasid++) {  
> +        if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||  
> +            !vtd_pe_present(&pe)) {  
> +            continue;  
> +        }  
> +  
> +        if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||  
> +             info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&  
> +            (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {  
> +            /*  
> +             * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI  
> +             * requires domain id check. If domain id check fail,  
> +             * go to next pasid.  
> +             */  
> +            continue;  
> +        }  
> +  
> +        vtd_accel_fill_pc(vtd_hiod, pasid, &pe);  
> +    }  
> +}  
> +  
> +/*  
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.  
> + * This function aims at looping over a range of PASIDs in the given  
> + * two level table to identify the pasid config in guest.  
> + */  
> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,  
> +                                    dma_addr_t pdt_base, int start, int end, 
>  
> +                                    VTDPASIDCacheInfo *info)  
> +{  
> +    VTDPASIDDirEntry pdire;  
> +    int pasid = start;  
> +    int pasid_next;  
> +    dma_addr_t pt_base;  
> +  
> +    while (pasid < end) {  
> +        pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &  
> +                     ~(VTD_PASID_TABLE_ENTRY_NUM - 1);  
> +        pasid_next = pasid_next < end ? pasid_next : end;  
> +  
> +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)  
> +            && vtd_pdire_present(&pdire)) {  
> +            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;  
> +            vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, 
> pasid_next,  
> +                                        info);  
> +        }  
> +        pasid = pasid_next;  
> +    }  
> +}  
> +  
> +static void vtd_accel_replay_pasid_bind_for_dev(VTDHostIOMMUDevice 
> *vtd_hiod,  
> +                                                int start, int end,  
> +                                                VTDPASIDCacheInfo *pc_info)  
> +{  
> +    IntelIOMMUState *s = vtd_hiod->iommu_state;  
> +    VTDContextEntry ce;  
> +    int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;  
> +  
> +    if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),  
> +                                  vtd_hiod->devfn, &ce)) {  
> +        VTDPASIDCacheInfo walk_info = *pc_info;  
> +        uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *  
> +                                VTD_PASID_TABLE_ENTRY_NUM;  
> +  
> +        end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));  
> +  
> +        vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),  
> +                                start, end, &walk_info);  
> +    }  
> +}  
> +  
> +/*  
> + * This function replays the guest pasid bindings by walking the two level  
> + * guest PASID table. For each valid pasid entry, it creates an entry  
> + * VTDAccelPASIDCacheEntry dynamically if not exist yet. This entry holds  
> + * info specific to a pasid  
> + */  
> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo 
> *pc_info)  
> +{  
> +    int start = IOMMU_NO_PASID, end = 1 << s->pasid;  
> +    VTDHostIOMMUDevice *vtd_hiod;  
> +    GHashTableIter hiod_it;  
> +  
> +    if (!s->fsts) {  
> +        return;  
> +    }  
> +  
> +    switch (pc_info->type) {  
> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:  
> +        start = pc_info->pasid;  
> +        end = pc_info->pasid + 1;  
> +        /* fall through */  
> +    case VTD_INV_DESC_PASIDC_G_DSI:  
> +        /*  
> +         * loop all assigned devices, do domain id check in  
> +         * vtd_sm_pasid_table_walk_one() after get pasid entry.  
> +         */  
> +        break;  
> +    case VTD_INV_DESC_PASIDC_G_GLOBAL:  
> +        /* loop all assigned devices */  
> +        break;  
> +    default:  
> +        g_assert_not_reached();  
> +    }  
> +  
> +    /*  
> +     * Loop all the vtd_hiod instances to sync the "pasid cache" per the  
> +     * guest pasid configuration.  
> +     *  
> +     * VTD translation callback never accesses vtd_hiod and its 
> corresponding  
> +     * cached pasid entry, so no iommu lock needed here.  
> +     */  
> +    g_hash_table_iter_init(&hiod_it, s->vtd_host_iommu_dev);  
> +    while (g_hash_table_iter_next(&hiod_it, NULL, (void **)&vtd_hiod)) {  
> +        if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),  
> +                                 TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {  
> +            continue;  
> +        }  
> +        vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);  
> +    }  
> +}  
> +  
>  static uint64_t vtd_get_host_iommu_quirks(uint32_t type,  
>                                            void *caps, uint32_t size)  
>  {  
> --  
> 2.47.3
> 

Reply via email to