>-----Original Message----- >From: Liu, Yi L <[email protected]> >Subject: Re: [PATCH v5 09/15] intel_iommu_accel: Handle PASID entry addition >for >pc_inv_dsc request > >On 5/9/26 12:08, Zhenzhong Duan wrote: >> Structure VTDAddressSpace includes some elements suitable for emulated >> device and passthrough device without PASID, e.g., address space, >> different memory regions, etc, it is also protected by vtd iommu lock, >> all these are useless and become a burden for passthrough device with >> PASID. >> >> When there are lots of PASIDs used in one device, the AS and MRs are >> all registered to memory core and impact the whole system performance. >> >> So instead of using VTDAddressSpace to cache pasid entry for each pasid >> of a passthrough device, we define a light weight structure >> VTDAccelPASIDCacheEntry with only necessary elements for each pasid. We >> will use this struct as a parameter to conduct binding/unbinding to >> nested hwpt and to record the current bound nested hwpt. It's also >> designed to support IOMMU_NO_PASID. >> >> VTDAccelPASIDCacheEntry is designed to only be used in intel_iommu_accel.c, >> similarly VTDPASIDCacheEntry should only be used in hw/i386/intel_iommu.c >> >> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc >> (pasid cache invalidation) request, walk through each pasid in each >> passthrough device for valid pasid entries, create a new >> VTDAccelPASIDCacheEntry if not existing yet. >> >> IOMMU_NO_PASID of passthrough device still need to register MRs in case >> guest does not operate in scalable mode. So for IOMMU_NO_PASID, we have >> both VTDAPASIDCacheEntry and VTDAccelPASIDCacheEntry. > >The implementation LGTM. But I got a question to ask here. >VTDAPASIDCacheEntry is cached in VTDAddressSpace, while >VTDAccelPASIDCacheEntry is cached in VTDHostIOMMUDevice. A natural >question is why usingVTDHostIOMMUDevice instead of VTDAddressSpace. I >think it might be valuable to mark the reason of this choice. This >would help maintaining it in future as we might forgot the reason. :)
Will do. Thanks Zhenzhong > >> Co-developed-by: Yi Liu <[email protected]> >> Signed-off-by: Yi Liu <[email protected]> >> Signed-off-by: Zhenzhong Duan <[email protected]> >> Tested-by: Xudong Hao <[email protected]> >> --- >> hw/i386/intel_iommu_accel.h | 13 +++ >> hw/i386/intel_iommu_internal.h | 8 ++ >> hw/i386/intel_iommu.c | 3 + >> hw/i386/intel_iommu_accel.c | 156 +++++++++++++++++++++++++++++++++ >> 4 files changed, 180 insertions(+) >> >> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h >> index e5f0b077b4..c9b1823745 100644 >> --- a/hw/i386/intel_iommu_accel.h >> +++ b/hw/i386/intel_iommu_accel.h >> @@ -12,6 +12,13 @@ >> #define HW_I386_INTEL_IOMMU_ACCEL_H >> #include CONFIG_DEVICES >> >> +typedef struct VTDAccelPASIDCacheEntry { >> + VTDHostIOMMUDevice *vtd_hiod; >> + VTDPASIDEntry pasid_entry; >> + uint32_t pasid; >> + QLIST_ENTRY(VTDAccelPASIDCacheEntry) next; >> +} VTDAccelPASIDCacheEntry; >> + >> #ifdef CONFIG_VTD_ACCEL >> bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice >*vtd_hiod, >> Error **errp); >> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace >*vtd_as, Error **errp); >> void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t >domain_id, >> uint32_t pasid, hwaddr addr, >> uint64_t npages, bool ih); >> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo >*pc_info); >> void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops); >> #else >> static inline bool vtd_check_hiod_accel(IntelIOMMUState *s, >> @@ -49,6 +57,11 @@ static inline void >vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, >> { >> } >> >> +static inline void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, >> + VTDPASIDCacheInfo *pc_info) >> +{ >> +} >> + >> static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops) >> { >> } >> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h >> index 0141316f83..623dc24760 100644 >> --- a/hw/i386/intel_iommu_internal.h >> +++ b/hw/i386/intel_iommu_internal.h >> @@ -615,6 +615,7 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_CTX_ENTRY_LEGACY_SIZE 16 >> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32 >> >> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3) >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | >~VTD_HAW_MASK(aw)) >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL >> #define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL >> @@ -645,6 +646,7 @@ typedef struct VTDPIOTLBInvInfo { >> #define VTD_PASID_DIR_BITS_MASK (0x3fffULL) >> #define VTD_PASID_DIR_INDEX(pasid) (((pasid) >> 6) & >VTD_PASID_DIR_BITS_MASK) >> #define VTD_PASID_DIR_FPD (1ULL << 1) /* Fault Processing >> Disable */ >> +#define VTD_PASID_TABLE_ENTRY_NUM (1ULL << 6) >> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL) >> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & >VTD_PASID_TABLE_BITS_MASK) >> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing >> Disable >*/ >> @@ -710,6 +712,7 @@ typedef struct VTDHostIOMMUDevice { >> PCIBus *bus; >> uint8_t devfn; >> HostIOMMUDevice *hiod; >> + QLIST_HEAD(, VTDAccelPASIDCacheEntry) pasid_cache_list; >> } VTDHostIOMMUDevice; >> >> /* >> @@ -767,6 +770,11 @@ static inline int >vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2) >> return memcmp(p1, p2, sizeof(*p1)); >> } >> >> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce) >> +{ >> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7); >> +} >> + >> int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t >> pasid, >> VTDPASIDDirEntry *pdire); >> int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid, >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index b50c556c40..e1e32959d3 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -3181,6 +3181,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState >*s, VTDPASIDCacheInfo *pc_info) >> g_hash_table_foreach(s->vtd_address_spaces, >vtd_pasid_cache_sync_locked, >> pc_info); >> vtd_iommu_unlock(s); >> + >> + vtd_accel_pasid_cache_sync(s, pc_info); >> } >> >> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s) >> @@ -4751,6 +4753,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, >void *opaque, int devfn, >> vtd_hiod->devfn = (uint8_t)devfn; >> vtd_hiod->iommu_state = s; >> vtd_hiod->hiod = hiod; >> + QLIST_INIT(&vtd_hiod->pasid_cache_list); >> >> if (!vtd_check_hiod(s, vtd_hiod, errp)) { >> g_free(vtd_hiod); >> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c >> index 10bdbba632..a66d63b4c8 100644 >> --- a/hw/i386/intel_iommu_accel.c >> +++ b/hw/i386/intel_iommu_accel.c >> @@ -259,6 +259,162 @@ void >vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id, >> vtd_flush_host_piotlb_locked, &piotlb_info); >> } >> >> +static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid, >> + VTDPASIDEntry *pe) >> +{ >> + VTDAccelPASIDCacheEntry *vtd_pce; >> + >> + QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) { >> + if (vtd_pce->pasid == pasid) { >> + if (vtd_pasid_entry_compare(pe, &vtd_pce->pasid_entry)) { >> + vtd_pce->pasid_entry = *pe; >> + } >> + return; >> + } >> + } >> + >> + vtd_pce = g_malloc0(sizeof(VTDAccelPASIDCacheEntry)); >> + vtd_pce->vtd_hiod = vtd_hiod; >> + vtd_pce->pasid = pasid; >> + vtd_pce->pasid_entry = *pe; >> + QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next); >> +} >> + >> +/* >> + * This function walks over PASID range within [start, end) in a single >> + * PASID table for entries matching @info type/did, then create >> + * VTDAccelPASIDCacheEntry if not exist yet. >> + */ >> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod, >> + dma_addr_t pt_base, int start, int >> end, >> + VTDPASIDCacheInfo *info) >> +{ >> + IntelIOMMUState *s = vtd_hiod->iommu_state; >> + VTDPASIDEntry pe; >> + int pasid; >> + >> + for (pasid = start; pasid < end; pasid++) { >> + if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) || >> + !vtd_pe_present(&pe)) { >> + continue; >> + } >> + >> + if ((info->type == VTD_INV_DESC_PASIDC_G_DSI || >> + info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) && >> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) { >> + /* >> + * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI >> + * requires domain id check. If domain id check fail, >> + * go to next pasid. >> + */ >> + continue; >> + } >> + >> + vtd_accel_fill_pc(vtd_hiod, pasid, &pe); >> + } >> +} >> + >> +/* >> + * In VT-d scalable mode translation, PASID dir + PASID table is used. >> + * This function aims at looping over a range of PASIDs in the given >> + * two level table to identify the pasid config in guest. >> + */ >> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod, >> + dma_addr_t pdt_base, int start, int end, >> + VTDPASIDCacheInfo *info) >> +{ >> + VTDPASIDDirEntry pdire; >> + int pasid = start; >> + int pasid_next; >> + dma_addr_t pt_base; >> + >> + while (pasid < end) { >> + pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) & >> + ~(VTD_PASID_TABLE_ENTRY_NUM - 1); >> + pasid_next = pasid_next < end ? pasid_next : end; >> + >> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire) >> + && vtd_pdire_present(&pdire)) { >> + pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK; >> + vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, >> pasid_next, >> + info); >> + } >> + pasid = pasid_next; >> + } >> +} >> + >> +static void vtd_accel_replay_pasid_bind_for_dev(VTDHostIOMMUDevice >*vtd_hiod, >> + int start, int end, >> + VTDPASIDCacheInfo *pc_info) >> +{ >> + IntelIOMMUState *s = vtd_hiod->iommu_state; >> + VTDContextEntry ce; >> + int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2; >> + >> + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus), >> + vtd_hiod->devfn, &ce)) { >> + VTDPASIDCacheInfo walk_info = *pc_info; >> + uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) * >> + VTD_PASID_TABLE_ENTRY_NUM; >> + >> + end = MIN(end, MIN(dev_max_pasid, ce_max_pasid)); >> + >> + vtd_sm_pasid_table_walk(vtd_hiod, >VTD_CE_GET_PASID_DIR_TABLE(&ce), >> + start, end, &walk_info); >> + } >> +} >> + >> +/* >> + * This function replays the guest pasid bindings by walking the two level >> + * guest PASID table. For each valid pasid entry, it creates an entry >> + * VTDAccelPASIDCacheEntry dynamically if not exist yet. This entry holds >> + * info specific to a pasid >> + */ >> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo >*pc_info) >> +{ >> + int start = IOMMU_NO_PASID, end = 1 << s->pasid; >> + VTDHostIOMMUDevice *vtd_hiod; >> + GHashTableIter hiod_it; >> + >> + if (!s->fsts) { >> + return; >> + } >> + >> + switch (pc_info->type) { >> + case VTD_INV_DESC_PASIDC_G_PASID_SI: >> + start = pc_info->pasid; >> + end = pc_info->pasid + 1; >> + /* fall through */ >> + case VTD_INV_DESC_PASIDC_G_DSI: >> + /* >> + * loop all assigned devices, do domain id check in >> + * vtd_sm_pasid_table_walk_one() after get pasid entry. >> + */ >> + break; >> + case VTD_INV_DESC_PASIDC_G_GLOBAL: >> + /* loop all assigned devices */ >> + break; >> + default: >> + g_assert_not_reached(); >> + } >> + >> + /* >> + * Loop all the vtd_hiod instances to sync the "pasid cache" per the >> + * guest pasid configuration. >> + * >> + * VTD translation callback never accesses vtd_hiod and its >> corresponding >> + * cached pasid entry, so no iommu lock needed here. >> + */ >> + g_hash_table_iter_init(&hiod_it, s->vtd_host_iommu_dev); >> + while (g_hash_table_iter_next(&hiod_it, NULL, (void **)&vtd_hiod)) { >> + if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod), >> + TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) { >> + continue; >> + } >> + vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info); >> + } >> +} >> + >> static uint64_t vtd_get_host_iommu_quirks(uint32_t type, >> void *caps, uint32_t size) >> {
