>-----Original Message-----
>From: Eric Auger <[email protected]>
>Subject: Re: [PATCH rfcv2 13/20] intel_iommu: Add PASID cache management
>infrastructure
>
>
>
>
>On 2/19/25 9:22 AM, Zhenzhong Duan wrote:
>> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
>> pasid entry and track PASID usage and future PASID tagged DMA address
>> translation support in vIOMMU.
>>
>> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and
>> never freed. For other pasid, VTDAddressSpace instance is created/destroyed
>> per the guest pasid entry set up/destroy for passthrough devices. While for
>> emulated devices, VTDAddressSpace instance is created in the PASID tagged
>DMA
>> translation and be destroyed per guest PASID cache invalidation. This focuses
>> on the PASID cache management for passthrough devices as there is no PASID
>> capable emulated devices yet.
>>
>> When guest modifies a PASID entry, QEMU will capture the guest pasid
>selective
>> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per
>the
>> invalidation reasons:
>>
>> *) a present pasid entry moved to non-present
>> *) a present pasid entry to be a present entry
>> *) a non-present pasid entry moved to present
>>
>> vIOMMU emulator could figure out the reason by fetching latest guest pasid
>entry
>> and compare it with the PASID cache.
>>
>> Signed-off-by: Yi Liu <[email protected]>
>> Signed-off-by: Yi Sun <[email protected]>
>> Signed-off-by: Zhenzhong Duan <[email protected]>
>> ---
>> hw/i386/intel_iommu_internal.h | 29 ++
>> include/hw/i386/intel_iommu.h | 6 +
>> hw/i386/intel_iommu.c | 484 ++++++++++++++++++++++++++++++++-
>Don't you have ways to split this patch. It has a huge change set and
>this is really heavy to digest at once (at least for me).
Sure, will try.
>> hw/i386/trace-events | 4 +
>> 4 files changed, 513 insertions(+), 10 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 18bc22fc72..632fda2853 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason {
>> * request while disabled */
>> VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */
>>
>> + VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */
>> /* PASID directory entry access failure */
>> VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>> /* The Present(P) field of pasid directory entry is 0 */
>> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc;
>> #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL
>> #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
>>
>> +#define VTD_INV_DESC_PASIDC_G (3ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfffffULL)
>> +#define VTD_INV_DESC_PASIDC_DID(val) (((val) >> 16) &
>VTD_DOMAIN_ID_MASK)
>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0 0xfff000000000f1c0ULL
>> +
>> +#define VTD_INV_DESC_PASIDC_DSI (0ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID_SI (1ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_GLOBAL (3ULL << 4)
>> +
>> /* Information about page-selective IOTLB invalidate */
>> struct VTDIOTLBPageInvInfo {
>> uint16_t domain_id;
>> @@ -548,10 +558,28 @@ typedef struct VTDRootEntry VTDRootEntry;
>> #define VTD_CTX_ENTRY_LEGACY_SIZE 16
>> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>>
>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(val) (((val) >> 9) & 0x7)
>> #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
>>
>> +typedef enum VTDPCInvType {
>> + /* force reset all */
>> + VTD_PASID_CACHE_FORCE_RESET = 0,
>> + /* pasid cache invalidation rely on guest PASID entry */
>> + VTD_PASID_CACHE_GLOBAL_INV,
>> + VTD_PASID_CACHE_DOMSI,
>> + VTD_PASID_CACHE_PASIDSI,
>> +} VTDPCInvType;
>> +
>> +typedef struct VTDPASIDCacheInfo {
>> + VTDPCInvType type;
>> + uint16_t domain_id;
>> + uint32_t pasid;
>> + PCIBus *bus;
>> + uint16_t devfn;
>> +} VTDPASIDCacheInfo;
>> +
>> /* PASID Table Related Definitions */
>> #define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL)
>> #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>> @@ -563,6 +591,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
>> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing
>> Disable
>*/
>> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6)
>>
>> /* PASID Granular Translation Type Mask */
>> #define VTD_PASID_ENTRY_P 1ULL
>> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
>> index 50f9b27a45..fbc9da903a 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>> uint64_t val[8];
>> };
>>
>> +typedef struct VTDPASIDCacheEntry {
>> + struct VTDPASIDEntry pasid_entry;
>> + bool cache_filled;
>> +} VTDPASIDCacheEntry;
>> +
>> struct VTDAddressSpace {
>> PCIBus *bus;
>> uint8_t devfn;
>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>> MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
>> IntelIOMMUState *iommu_state;
>> VTDContextCacheEntry context_cache_entry;
>> + VTDPASIDCacheEntry pasid_cache_entry;
>> QLIST_ENTRY(VTDAddressSpace) next;
>> /* Superset of notifier flags that this address space has */
>> IOMMUNotifierFlag notifier_flags;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index fafa199f52..b8f3b85803 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -86,6 +86,8 @@ struct vtd_iotlb_key {
>> static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>> static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier
>*n);
>>
>> +static void vtd_pasid_cache_reset(IntelIOMMUState *s);
>use _locked suffix to be consistent with the others and emphases the
>lock is held?
Will do.
>> +
>> static void vtd_panic_require_caching_mode(void)
>> {
>> error_report("We need to set caching-mode=on for intel-iommu to enable "
>> @@ -390,6 +392,7 @@ static void vtd_reset_caches(IntelIOMMUState *s)
>> vtd_iommu_lock(s);
>> vtd_reset_iotlb_locked(s);
>> vtd_reset_context_cache_locked(s);
>> + vtd_pasid_cache_reset(s);
>> vtd_iommu_unlock(s);
>> }
>>
>> @@ -825,6 +828,16 @@ static inline bool
>vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>> }
>> }
>>
>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>> +{
>> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce->val[0]) + 7);
>> +}
>> +
>> +static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe)
>
>nit: vtd_pe_get_did as the filed is named DID?
Will do.
>
>> +{
>> + return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
>> +}
>> +
>> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>> {
>> return pdire->val & 1;
>> @@ -1617,6 +1630,54 @@ static int
>vtd_as_to_context_entry(VTDAddressSpace *vtd_as, VTDContextEntry *ce)
>> }
>> }
>>
>> +/* Translate to iommu pasid if PCI_NO_PASID */
>I don't really get the comment above. Ay best, shouldn't it be put in
Will do, it's pci's pasid value vs. guest's pasid value which I call iommu
pasid.
>
>(vtd_as->pasid == PCI_NO_PASID) block?
>What you call "iommu pasid" is the value set in RID_PASID, right? Is "iommu
>pasid" a conventional terminology?
PCI subsystem uses PCI_NO_PASID(-1) to represent a Requests-without-PASID,
but guest doesn't recognize PCI_NO_PASID(-1), so vIOMMU has to translate
PCI_NO_PASID(-1) to an normal pasid value(pasid >= 0), I call it iommu pasid
to distinguish with PCI's pasid, it's not conventional terminology.
>
>> +static int vtd_as_to_iommu_pasid(VTDAddressSpace *vtd_as, uint32_t *pasid)
>> +{
>> + VTDContextEntry ce;
>> + int ret;
>> +
>> + ret = vtd_as_to_context_entry(vtd_as, &ce);
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + if (vtd_as->pasid == PCI_NO_PASID) {
>> + *pasid = VTD_CE_GET_RID2PASID(&ce);
>This is called RID_PASID in the spec. I think it would be easier for the
>reader if could have a direct match with named fields so that we can
>easily seach the spec.
I agree with you about the match, but rid2pasid is wildly used in intel_iommu.c
for a long history. I'd like to here more voice before renaming.
>> + } else {
>> + *pasid = vtd_as->pasid;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static gboolean vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer
>value,
>> + gpointer user_data)
>why iommu_pasid and not directly pasid?
Because vtd_as->pasid is PCI's pasid, it needs to be transformed into iommu
pasid.
>> +{
>> + VTDAddressSpace *vtd_as = (VTDAddressSpace *)value;
>> + struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data;
>> + uint16_t sid = PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn);
>> + uint32_t pasid;
>> +
>> + if (vtd_as_to_iommu_pasid(vtd_as, &pasid)) {
>> + return false;
>> + }
>> +
>> + return (pasid == target->pasid) && (sid == target->sid);
>> +}
>> +
>> +/* Translate iommu pasid to vtd_as */
>> +static VTDAddressSpace *vtd_as_from_iommu_pasid(IntelIOMMUState *s,
>> + uint16_t sid, uint32_t
>> pasid)
>> +{
>> + struct vtd_as_raw_key key = {
>> + .sid = sid,
>> + .pasid = pasid
>> + };
>> +
>> + return g_hash_table_find(s->vtd_address_spaces,
>> + vtd_find_as_by_sid_and_iommu_pasid, &key);
>> +}
>> +
>> static int vtd_sync_shadow_page_hook(const IOMMUTLBEvent *event,
>> void *private)
>> {
>> @@ -3062,6 +3123,412 @@ static bool
>vtd_process_piotlb_desc(IntelIOMMUState *s,
>> return true;
>> }
>>
>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>> + uint32_t pasid, VTDPASIDEntry
>> *pe)
>does "pe" means pasid entry? It is not obvious for a dummy reader like
>me. May be worth a comment at least once.
May VTDPASIDEntry have given you enough hint?
>> +{
>> + IntelIOMMUState *s = vtd_as->iommu_state;
>> + VTDContextEntry ce;
>> + int ret;
>> +
>> + if (!s->root_scalable) {
>> + return -VTD_FR_RTADDR_INV_TTM;
>> + }
>> +
>> + ret = vtd_as_to_context_entry(vtd_as, &ce);
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
>> +}
>> +
>> +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>> +{
>> + return !memcmp(p1, p2, sizeof(*p1));
>> +}
>> +
>> +/*
>> + * This function fills in the pasid entry in &vtd_as. Caller
>> + * of this function should hold iommu_lock.
>> + */
>> +static void vtd_fill_pe_in_cache(IntelIOMMUState *s, VTDAddressSpace
>*vtd_as,
>> + VTDPASIDEntry *pe)
>seems some other functions used the _locked suffix
Yes, vtd_pasid_cache_reset_locked() is, so no need to add suffix here, right?
>> +{
>> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +
>> + if (vtd_pasid_entry_compare(pe, &pc_entry->pasid_entry)) {
>> + /* No need to go further as cached pasid entry is latest */
>> + return;
>> + }
>> +
>> + pc_entry->pasid_entry = *pe;
>> + pc_entry->cache_filled = true;
>> + /*
>> + * TODO: send pasid bind to host for passthru devices
>> + */
>what does it mean?
Here means a new entry is found, we need to send pasid<->hwpt binding
to host.
>> +}
>> +
>> +/*
>> + * This function is used to clear cached pasid entry in vtd_as
>> + * instances. Caller of this function should hold iommu_lock.
>> + */
>> +static gboolean vtd_flush_pasid(gpointer key, gpointer value,
>> + gpointer user_data)
>> +{
>> + VTDPASIDCacheInfo *pc_info = user_data;
>> + VTDAddressSpace *vtd_as = value;
>> + IntelIOMMUState *s = vtd_as->iommu_state;
>> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> + VTDPASIDEntry pe;
>> + uint16_t did;
>> + uint32_t pasid;
>> + int ret;
>> +
>> + /* Replay only fill pasid entry cache for passthrough device */
>filled
Will do.
Thanks
Zhenzhong