Hi Eric, >-----Original Message----- >From: Eric Auger <eric.au...@redhat.com> >Subject: Re: [PATCH v5 12/21] intel_iommu: Handle PASID entry addition > >Hi Zhenzhong, > >On 8/22/25 8:40 AM, Zhenzhong Duan wrote: >> When guest creates new PASID entries, QEMU will capture the guest pasid >> selective pasid cache invalidation, walk through each passthrough device >> and each pasid, when a match is found, identify an existing vtd_as or >> create a new one and update its corresponding cached pasid entry. > >You need to emphasize that the support is currently limited to >Requests-without-PASID
OK. >> >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Yi Sun <yi.y....@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> hw/i386/intel_iommu_internal.h | 2 + >> hw/i386/intel_iommu.c | 176 >++++++++++++++++++++++++++++++++- >> 2 files changed, 175 insertions(+), 3 deletions(-) >> >> diff --git a/hw/i386/intel_iommu_internal.h >b/hw/i386/intel_iommu_internal.h >> index b9b76dd996..fb2a919e87 100644 >> --- a/hw/i386/intel_iommu_internal.h >> +++ b/hw/i386/intel_iommu_internal.h >> @@ -559,6 +559,7 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_CTX_ENTRY_LEGACY_SIZE 16 >> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32 >> >> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], >9, 3) >> #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | >~VTD_HAW_MASK(aw)) >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 >0xffffffffffe00000ULL >> @@ -589,6 +590,7 @@ typedef struct VTDPASIDCacheInfo { >> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL) >> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & >VTD_PASID_TABLE_BITS_MASK) >> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault >Processing Disable */ >> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6) >> >> /* PASID Granular Translation Type Mask */ >> #define VTD_PASID_ENTRY_P 1ULL >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index a2ee6d684e..7d2c9feae7 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -826,6 +826,11 @@ static inline bool >vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe) >> } >> } >> >> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry >*ce) >> +{ >> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7); >> +} >> + >> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) >> { >> return pdire->val & 1; >> @@ -1647,9 +1652,9 @@ static gboolean >vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer value, >> } >> >> /* Translate iommu pasid to vtd_as */ >> -static inline >> -VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState >*s, >> - uint16_t sid, >uint32_t pasid) >> +static VTDAddressSpace >*vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s, >> + >uint16_t sid, >> + >uint32_t pasid) >> { >> struct vtd_as_raw_key key = { >> .sid = sid, >> @@ -3220,10 +3225,172 @@ remove: >> return true; >> } >> >> +/* >> + * This function walks over PASID range within [start, end) in a single >> + * PASID table for entries matching @info type/did, then retrieve/create >> + * vtd_as and fill associated pasid entry cache. >> + */ >> +static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s, >> + dma_addr_t pt_base, >> + int start, >> + int end, >> + VTDPASIDCacheInfo >*info) >> +{ >> + VTDPASIDEntry pe; >> + int pasid = start; >> + >> + while (pasid < end) { >> + if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) >> + && vtd_pe_present(&pe)) { >> + int bus_n = pci_bus_num(info->bus), devfn = info->devfn; >> + uint16_t sid = PCI_BUILD_BDF(bus_n, devfn); >> + VTDPASIDCacheEntry *pc_entry; >> + VTDAddressSpace *vtd_as; >> + >> + vtd_iommu_lock(s); >> + /* >> + * When indexed by rid2pasid, vtd_as should have been >created, >> + * e.g., by PCI subsystem. For other iommu pasid, we need >to >> + * create vtd_as dynamically. Other iommu pasid is same >value >since you don't support somthing else than rid2pasid, I would drop that >and simplify the code. See below. >> + * as PCI's pasid, so it's used as input of vtd_find_add_as(). >> + */ >> + vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid); >> + vtd_iommu_unlock(s); >> + if (!vtd_as) { >> + vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid); >you could check the vtd_as already exists here per the rid2pasid support >limitation In this series, I do include some basic codes for non-rid2pasid because they share some common code with rid2pasid and we already have emulated rid2pasid support in vIOMMU for a long time, it's not bad to accumulate some supporting code for non-rid2pasid for passthrough device. But I can do the factor out if you insist to have only rid_pasid code. >> + } >> + >> + if ((info->type == VTD_PASID_CACHE_DOMSI || >> + info->type == VTD_PASID_CACHE_PASIDSI) && >> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) { >> + /* >> + * VTD_PASID_CACHE_DOMSI and >VTD_PASID_CACHE_PASIDSI >> + * requires domain id check. If domain id check fail, >fails >> + * go to next pasid. >> + */ >> + pasid++; >> + continue; >> + } >> + >> + pc_entry = &vtd_as->pasid_cache_entry; >> + /* >> + * pasid cache update and clear are handled in >> + * vtd_flush_pasid_locked(), only care new pasid entry >here. >> + */ >> + if (!pc_entry->valid) { >> + pc_entry->pasid_entry = pe; >> + pc_entry->valid = true; >> + } >> + } >> + pasid++; >> + } >> +} >> + >> +/* >> + * In VT-d scalable mode translation, PASID dir + PASID table is used. >> + * This function aims at looping over a range of PASIDs in the given >> + * two level table to identify the pasid config in guest. >> + */ >> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s, >> + dma_addr_t pdt_base, >> + int start, int end, >> + VTDPASIDCacheInfo *info) >> +{ >> + VTDPASIDDirEntry pdire; >> + int pasid = start; >> + int pasid_next; >> + dma_addr_t pt_base; >> + >> + while (pasid < end) { >> + pasid_next = >> + (pasid + VTD_PASID_TBL_ENTRY_NUM) & >~(VTD_PASID_TBL_ENTRY_NUM - 1); >> + pasid_next = pasid_next < end ? pasid_next : end; >> + >> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire) >> + && vtd_pdire_present(&pdire)) { >> + pt_base = pdire.val & >VTD_PASID_TABLE_BASE_ADDR_MASK; >> + vtd_sm_pasid_table_walk_one(s, pt_base, pasid, >pasid_next, info); >> + } >> + pasid = pasid_next; >> + } >> +} >> + >> +static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s, >> + int start, int end, >> + VTDPASIDCacheInfo >*info) >> +{ >> + VTDContextEntry ce; >> + >> + if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus), >info->devfn, >> + &ce)) { >> + uint32_t max_pasid; >> + >> + max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) * >VTD_PASID_TBL_ENTRY_NUM; >> + if (end > max_pasid) { >> + end = max_pasid; >> + } >> + vtd_sm_pasid_table_walk(s, >> + >VTD_CE_GET_PASID_DIR_TABLE(&ce), >> + start, >> + end, >> + info); >> + } >> +} >> + >> +/* >> + * This function replays the guest pasid bindings by walking the two level >> + * guest PASID table. For each valid pasid entry, it finds or creates a >> + * vtd_as and caches pasid entry in vtd_as. >> + */ >> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s, >> + VTDPASIDCacheInfo >*pc_info) >> +{ >> + /* >> + * Currently only Requests-without-PASID is supported, as vIOMMU >doesn't >> + * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1). >> + */ >> + int start = 0, end = 1; >> + VTDHostIOMMUDevice *vtd_hiod; >> + VTDPASIDCacheInfo walk_info; >> + GHashTableIter as_it; >> + >> + switch (pc_info->type) { >> + case VTD_PASID_CACHE_PASIDSI: >> + start = pc_info->pasid; >> + end = pc_info->pasid + 1; >if you never replay a range, you could simplify the code for now because >some code paths are not properly tested OK. Instead of assignment of start and end variable, maybe just an assert(!pc_info->pasid). >> + /* fall through */ >> + case VTD_PASID_CACHE_DOMSI: >Why can't we have other invalidation types along with request without >PASID? It is not obvious to me at least why it couldn't be used by the >guest. Would deserve a comment in the commit desc I think. Other invalidation types are indeed used, just in pasid scope [0, 1), because [start, end) are already initialized to [0, 1), nothing more here, so just break. Thanks Zhenzhong >> + case VTD_PASID_CACHE_GLOBAL_INV: >> + /* loop all assigned devices */ >> + break; >> + default: >> + error_setg(&error_fatal, "invalid pc_info->type for replay"); >> + } >> + >> + /* >> + * In this replay, one only needs to care about the devices which are >> + * backed by host IOMMU. Those devices have a corresponding >vtd_hiod >> + * in s->vtd_host_iommu_dev. For devices not backed by host >IOMMU, it >> + * is not necessary to replay the bindings since their cache could be >> + * re-created in the future DMA address translation. >> + * >> + * VTD translation callback never accesses vtd_hiod and its >corresponding >> + * cached pasid entry, so no iommu lock needed here. >> + */ >> + walk_info = *pc_info; >> + g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev); >> + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) { >> + walk_info.bus = vtd_hiod->bus; >> + walk_info.devfn = vtd_hiod->devfn; >> + vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info); >> + } >> +} >> + >> /* >> * For a PASID cache invalidation, this function handles below scenarios: >> * a) a present cached pasid entry needs to be removed >> * b) a present cached pasid entry needs to be updated >> + * c) a present cached pasid entry needs to be created >> */ >> static void vtd_pasid_cache_sync(IntelIOMMUState *s, >VTDPASIDCacheInfo *pc_info) >> { >> @@ -3239,6 +3406,9 @@ static void >vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info) >> g_hash_table_foreach_remove(s->vtd_address_spaces, >vtd_flush_pasid_locked, >> pc_info); >> vtd_iommu_unlock(s); >> + >> + /* c): loop all passthrough device for new pasid entries */ >> + vtd_replay_guest_pasid_bindings(s, pc_info); >> } >> >> static bool vtd_process_pasid_desc(IntelIOMMUState *s, >Thanks > >Eric