>-----Original Message----- >From: Liu, Yi L <yi.l....@intel.com> >Subject: Re: [PATCH v1 06/15] intel_iommu: Handle PASID entry removing and >updating > >On 2025/6/6 18:04, Zhenzhong Duan wrote: >> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the >> pasid entry and track PASID usage and future PASID tagged DMA address >> translation support in vIOMMU. >> >> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and >> never freed. For other pasid, VTDAddressSpace instance is created/destroyed >> per the guest pasid entry set up/destroy for passthrough devices. While for >> emulated devices, VTDAddressSpace instance is created in the PASID tagged >DMA >> translation and be destroyed per guest PASID cache invalidation. This focuses >> on the PASID cache management for passthrough devices as there is no PASID >> capable emulated devices yet. >> >> When guest modifies a PASID entry, QEMU will capture the guest pasid >selective >> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per >the >> invalidation reasons: >> >> a) a present pasid entry moved to non-present >> b) a present pasid entry to be a present entry >> c) a non-present pasid entry moved to present >> >> This handles a) and b), following patch will handle c). >> >> vIOMMU emulator could figure out the reason by fetching latest guest pasid >entry >> and compare it with the PASID cache. > >To aovid confusion, maybe better to say cached pasid entry. :)
Sure. > >> >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Yi Sun <yi.y....@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> hw/i386/intel_iommu_internal.h | 26 ++++ >> include/hw/i386/intel_iommu.h | 6 + >> hw/i386/intel_iommu.c | 252 +++++++++++++++++++++++++++++++-- >> hw/i386/trace-events | 3 + >> 4 files changed, 277 insertions(+), 10 deletions(-) >> >> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h >> index 18bc22fc72..82b84db80f 100644 >> --- a/hw/i386/intel_iommu_internal.h >> +++ b/hw/i386/intel_iommu_internal.h >> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason { >> * request while disabled */ >> VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ >> >> + VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */ >> /* PASID directory entry access failure */ >> VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, >> /* The Present(P) field of pasid directory entry is 0 */ >> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc; >> #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL >> #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL >> >> +#define VTD_INV_DESC_PASIDC_G (3ULL << 4) >> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfffffULL) >> +#define VTD_INV_DESC_PASIDC_DID(val) (((val) >> 16) & >VTD_DOMAIN_ID_MASK) >> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0 0xfff000000000f1c0ULL >> + >> +#define VTD_INV_DESC_PASIDC_DSI (0ULL << 4) >> +#define VTD_INV_DESC_PASIDC_PASID_SI (1ULL << 4) >> +#define VTD_INV_DESC_PASIDC_GLOBAL (3ULL << 4) >> + >> /* Information about page-selective IOTLB invalidate */ >> struct VTDIOTLBPageInvInfo { >> uint16_t domain_id; >> @@ -552,6 +562,21 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | >~VTD_HAW_MASK(aw)) >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL >> >> +typedef enum VTDPCInvType { >> + /* pasid cache invalidation rely on guest PASID entry */ >> + VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation */ >> + VTD_PASID_CACHE_DOMSI, /* pasid cache domain selective invalidation >*/ >> + VTD_PASID_CACHE_PASIDSI, /* pasid cache pasid selective invalidation >> */ >> +} VTDPCInvType; >> + >> +typedef struct VTDPASIDCacheInfo { >> + VTDPCInvType type; >> + uint16_t domain_id; >> + uint32_t pasid; >> + PCIBus *bus; >> + uint16_t devfn; >> +} VTDPASIDCacheInfo; >> + >> /* PASID Table Related Definitions */ >> #define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL) >> #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL) >> @@ -563,6 +588,7 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL) >> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & >VTD_PASID_TABLE_BITS_MASK) >> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing >> Disable >*/ >> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6) >> >> /* PASID Granular Translation Type Mask */ >> #define VTD_PASID_ENTRY_P 1ULL >> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h >> index 50f9b27a45..fbc9da903a 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -95,6 +95,11 @@ struct VTDPASIDEntry { >> uint64_t val[8]; >> }; >> >> +typedef struct VTDPASIDCacheEntry { >> + struct VTDPASIDEntry pasid_entry; >> + bool cache_filled; >> +} VTDPASIDCacheEntry; >> + >> struct VTDAddressSpace { >> PCIBus *bus; >> uint8_t devfn; >> @@ -107,6 +112,7 @@ struct VTDAddressSpace { >> MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */ >> IntelIOMMUState *iommu_state; >> VTDContextCacheEntry context_cache_entry; >> + VTDPASIDCacheEntry pasid_cache_entry; >> QLIST_ENTRY(VTDAddressSpace) next; >> /* Superset of notifier flags that this address space has */ >> IOMMUNotifierFlag notifier_flags; >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 112e09e305..c7162647e6 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -825,6 +825,11 @@ static inline bool >vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe) >> } >> } >> >> +static inline uint16_t vtd_pe_get_did(VTDPASIDEntry *pe) >> +{ >> + return VTD_SM_PASID_ENTRY_DID((pe)->val[1]); >> +} >> + >> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) >> { >> return pdire->val & 1; >> @@ -3104,6 +3109,236 @@ static bool >vtd_process_piotlb_desc(IntelIOMMUState *s, >> return true; >> } >> >> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as, >> + uint32_t pasid, VTDPASIDEntry >> *pe) >> +{ >> + IntelIOMMUState *s = vtd_as->iommu_state; >> + VTDContextEntry ce; >> + int ret; >> + >> + if (!s->root_scalable) { >> + return -VTD_FR_RTADDR_INV_TTM; >> + } >> + >> + ret = vtd_as_to_context_entry(vtd_as, &ce); >> + if (ret) { >> + return ret; >> + } >> + >> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid); >> +} >> + >> +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2) >> +{ >> + return !memcmp(p1, p2, sizeof(*p1)); >> +} >> + >> +/* >> + * This function fills in the pasid entry in &vtd_as. Caller >> + * of this function should hold iommu_lock. >> + */ >> +static void vtd_fill_pe_in_cache(IntelIOMMUState *s, VTDAddressSpace >*vtd_as, >> + VTDPASIDEntry *pe) >> +{ >> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; >> + >> + if (vtd_pasid_entry_compare(pe, &pc_entry->pasid_entry)) { >> + /* No need to go further as cached pasid entry is latest */ >> + return; >> + } >> + >> + pc_entry->pasid_entry = *pe; >> + pc_entry->cache_filled = true; >> + /* >> + * TODO: send pasid bind to host for passthru devices >> + */ >> +} >> + >> +/* >> + * This function is used to clear cached pasid entry in vtd_as >> + * instances. Caller of this function should hold iommu_lock. > >it also covers pasid entry update. Will do like: * This function is used to update or clear cached pasid entry in vtd_as > >> + */ >> +static gboolean vtd_flush_pasid(gpointer key, gpointer value, >> + gpointer user_data) >> +{ >> + VTDPASIDCacheInfo *pc_info = user_data; >> + VTDAddressSpace *vtd_as = value; >> + IntelIOMMUState *s = vtd_as->iommu_state; >> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; >> + VTDPASIDEntry pe; >> + uint16_t did; >> + uint32_t pasid; >> + int ret; >> + >> + /* Replay only filled pasid entry cache for passthrough device */ > >the comment of this helper already implies only continue if the >pc_entry->cache_filled is true. Also, replay is a concept in the >upper level helpers, no need to mention it here. I noticed replay >in low level helpers in other patch as well, please drop them as well. :) Will do. > >> + if (!pc_entry->cache_filled) { >> + return false; >> + } >> + did = vtd_pe_get_did(&pc_entry->pasid_entry); >> + >> + if (vtd_as_to_iommu_pasid_locked(vtd_as, &pasid)) { >> + goto remove; >> + } >> + >> + switch (pc_info->type) { >> + case VTD_PASID_CACHE_PASIDSI: >> + if (pc_info->pasid != pasid) { >> + return false; >> + } >> + /* Fall through */ >> + case VTD_PASID_CACHE_DOMSI: >> + if (pc_info->domain_id != did) { >> + return false; >> + } >> + /* Fall through */ >> + case VTD_PASID_CACHE_GLOBAL_INV: >> + break; >> + default: >> + error_report("invalid pc_info->type"); >> + abort(); >> + } >> + >> + /* >> + * pasid cache invalidation may indicate a present pasid >> + * entry to present pasid entry modification. To cover such >> + * case, vIOMMU emulator needs to fetch latest guest pasid >> + * entry and check cached pasid entry, then update pasid >> + * cache and send pasid bind/unbind to host properly. >> + */ >> + ret = vtd_dev_get_pe_from_pasid(vtd_as, pasid, &pe); >> + if (ret) { >> + /* >> + * No valid pasid entry in guest memory. e.g. pasid entry >> + * was modified to be either all-zero or non-present. Either >> + * case means existing pasid cache should be removed. >> + */ >> + goto remove; >> + } >> + >> + vtd_fill_pe_in_cache(s, vtd_as, &pe); >> + return false; >> + >> +remove: >> + /* >> + * TODO: send pasid unbind to host for passthru devices >> + */ >> + pc_entry->cache_filled = false; >> + >> + /* >> + * Don't remove address space of PCI_NO_PASID which is created by PCI >> + * sub-system. >> + */ > >I get why it cannot be removed. But I think the ce and pe field of this >vtd_as might need to be updated given it is supposed to be removed if it >is not PCI_NO_PASID. With patch2 dropped, we always call vtd_dev_to_context_entry() to get ce, vtd_as->ce only take effect for emulated device. About pe, when we jump to remove label, that means pe.present == false, We record that through pc_entry->cache_filled = false, so updating unpresented pe field looks no need? > >> + if (vtd_as->pasid == PCI_NO_PASID) { >> + return false; >> + } >> + return true; >> +} >> + >> +/* >> + * This function syncs the pasid bindings between guest and host. >> + * It includes updating the pasid cache in vIOMMU and updating the >> + * pasid bindings per guest's latest pasid entry presence. >> + */ >> +static void vtd_pasid_cache_sync(IntelIOMMUState *s, >> + VTDPASIDCacheInfo *pc_info) >> +{ >> + if (!s->flts || !s->root_scalable || !s->dmar_enabled) { >> + return; >> + } >> + >> + /* >> + * Regards to a pasid cache invalidation, e.g. a PSI. >> + * it could be either cases of below: >> + * a) a present pasid entry moved to non-present >> + * b) a present pasid entry to be a present entry >> + * c) a non-present pasid entry moved to present >> + * >> + * Different invalidation granularity may affect different device >> + * scope and pasid scope. But for each invalidation granularity, >> + * it needs to do two steps to sync host and guest pasid binding. >> + * >> + * Here is the handling of a PSI: >> + * 1) loop all the existing vtd_as instances to update them >> + * according to the latest guest pasid entry in pasid table. >> + * this will make sure affected existing vtd_as instances >> + * cached the latest pasid entries. Also, during the loop, the >> + * host should be notified if needed. e.g. pasid unbind or pasid >> + * update. Should be able to cover case a) and case b). >> + * >> + * 2) loop all devices to cover case c) >> + * - For devices which are backed by HostIOMMUDeviceIOMMUFD >instances, >> + * we loop them and check if guest pasid entry exists. If yes, >> + * it is case c), we update the pasid cache and also notify >> + * host. >> + * - For devices which are not backed by HostIOMMUDeviceIOMMUFD, >> + * it is not necessary to create pasid cache at this phase since >> + * it could be created when vIOMMU does DMA address translation. >> + * This is not yet implemented since there is no emulated >> + * pasid-capable devices today. If we have such devices in >> + * future, the pasid cache shall be created there. >> + * Other granularity follow the same steps, just with different scope >> + * >> + */ >> + >> + vtd_iommu_lock(s); >> + /* >> + * Step 1: loop all the existing vtd_as instances for pasid unbind and >> + * update. >> + */ >> + g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid, >> + pc_info); >> + vtd_iommu_unlock(s); > >just realized s->vtd_address_spaces is not protected by iommu_lock. If so, >will it be ok to drop the lock here and add it in the lower level helpers >if lock is needed? Could you elaborate which lower level helper to add iommu_lock? We want to protect update to vtd_address_spaces list with iommu_lock here. Thanks Zhenzhong