>-----Original Message-----
>From: Liu, Yi L <yi.l....@intel.com>
>Subject: Re: [PATCH v1 06/15] intel_iommu: Handle PASID entry removing and
>updating
>
>On 2025/6/6 18:04, Zhenzhong Duan wrote:
>> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
>> pasid entry and track PASID usage and future PASID tagged DMA address
>> translation support in vIOMMU.
>>
>> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and
>> never freed. For other pasid, VTDAddressSpace instance is created/destroyed
>> per the guest pasid entry set up/destroy for passthrough devices. While for
>> emulated devices, VTDAddressSpace instance is created in the PASID tagged
>DMA
>> translation and be destroyed per guest PASID cache invalidation. This focuses
>> on the PASID cache management for passthrough devices as there is no PASID
>> capable emulated devices yet.
>>
>> When guest modifies a PASID entry, QEMU will capture the guest pasid
>selective
>> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per
>the
>> invalidation reasons:
>>
>>      a) a present pasid entry moved to non-present
>>      b) a present pasid entry to be a present entry
>>      c) a non-present pasid entry moved to present
>>
>> This handles a) and b), following patch will handle c).
>>
>> vIOMMU emulator could figure out the reason by fetching latest guest pasid
>entry
>> and compare it with the PASID cache.
>
>To aovid confusion, maybe better to say cached pasid entry. :)

Sure.

>
>>
>> Signed-off-by: Yi Liu <yi.l....@intel.com>
>> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>> ---
>>   hw/i386/intel_iommu_internal.h |  26 ++++
>>   include/hw/i386/intel_iommu.h  |   6 +
>>   hw/i386/intel_iommu.c          | 252 +++++++++++++++++++++++++++++++--
>>   hw/i386/trace-events           |   3 +
>>   4 files changed, 277 insertions(+), 10 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 18bc22fc72..82b84db80f 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason {
>>                                     * request while disabled */
>>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>
>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>>       /* PASID directory entry access failure */
>>       VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>       /* The Present(P) field of pasid directory entry is 0 */
>> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>
>> +#define VTD_INV_DESC_PASIDC_G          (3ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfffffULL)
>> +#define VTD_INV_DESC_PASIDC_DID(val)   (((val) >> 16) &
>VTD_DOMAIN_ID_MASK)
>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0  0xfff000000000f1c0ULL
>> +
>> +#define VTD_INV_DESC_PASIDC_DSI        (0ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID_SI   (1ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_GLOBAL     (3ULL << 4)
>> +
>>   /* Information about page-selective IOTLB invalidate */
>>   struct VTDIOTLBPageInvInfo {
>>       uint16_t domain_id;
>> @@ -552,6 +562,21 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>>
>> +typedef enum VTDPCInvType {
>> +    /* pasid cache invalidation rely on guest PASID entry */
>> +    VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation */
>> +    VTD_PASID_CACHE_DOMSI,      /* pasid cache domain selective invalidation
>*/
>> +    VTD_PASID_CACHE_PASIDSI,    /* pasid cache pasid selective invalidation 
>> */
>> +} VTDPCInvType;
>> +
>> +typedef struct VTDPASIDCacheInfo {
>> +    VTDPCInvType type;
>> +    uint16_t domain_id;
>> +    uint32_t pasid;
>> +    PCIBus *bus;
>> +    uint16_t devfn;
>> +} VTDPASIDCacheInfo;
>> +
>>   /* PASID Table Related Definitions */
>>   #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>   #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>> @@ -563,6 +588,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>   #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>>   #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing 
>> Disable
>*/
>> +#define VTD_PASID_TBL_ENTRY_NUM       (1ULL << 6)
>>
>>   /* PASID Granular Translation Type Mask */
>>   #define VTD_PASID_ENTRY_P              1ULL
>> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
>> index 50f9b27a45..fbc9da903a 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>       uint64_t val[8];
>>   };
>>
>> +typedef struct VTDPASIDCacheEntry {
>> +    struct VTDPASIDEntry pasid_entry;
>> +    bool cache_filled;
>> +} VTDPASIDCacheEntry;
>> +
>>   struct VTDAddressSpace {
>>       PCIBus *bus;
>>       uint8_t devfn;
>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>       MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
>>       IntelIOMMUState *iommu_state;
>>       VTDContextCacheEntry context_cache_entry;
>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>       QLIST_ENTRY(VTDAddressSpace) next;
>>       /* Superset of notifier flags that this address space has */
>>       IOMMUNotifierFlag notifier_flags;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 112e09e305..c7162647e6 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -825,6 +825,11 @@ static inline bool
>vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>>       }
>>   }
>>
>> +static inline uint16_t vtd_pe_get_did(VTDPASIDEntry *pe)
>> +{
>> +    return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
>> +}
>> +
>>   static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>>   {
>>       return pdire->val & 1;
>> @@ -3104,6 +3109,236 @@ static bool
>vtd_process_piotlb_desc(IntelIOMMUState *s,
>>       return true;
>>   }
>>
>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>> +                                            uint32_t pasid, VTDPASIDEntry 
>> *pe)
>> +{
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDContextEntry ce;
>> +    int ret;
>> +
>> +    if (!s->root_scalable) {
>> +        return -VTD_FR_RTADDR_INV_TTM;
>> +    }
>> +
>> +    ret = vtd_as_to_context_entry(vtd_as, &ce);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
>> +}
>> +
>> +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>> +{
>> +    return !memcmp(p1, p2, sizeof(*p1));
>> +}
>> +
>> +/*
>> + * This function fills in the pasid entry in &vtd_as. Caller
>> + * of this function should hold iommu_lock.
>> + */
>> +static void vtd_fill_pe_in_cache(IntelIOMMUState *s, VTDAddressSpace
>*vtd_as,
>> +                                 VTDPASIDEntry *pe)
>> +{
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +
>> +    if (vtd_pasid_entry_compare(pe, &pc_entry->pasid_entry)) {
>> +        /* No need to go further as cached pasid entry is latest */
>> +        return;
>> +    }
>> +
>> +    pc_entry->pasid_entry = *pe;
>> +    pc_entry->cache_filled = true;
>> +    /*
>> +     * TODO: send pasid bind to host for passthru devices
>> +     */
>> +}
>> +
>> +/*
>> + * This function is used to clear cached pasid entry in vtd_as
>> + * instances. Caller of this function should hold iommu_lock.
>
>it also covers pasid entry update.

Will do like:

* This function is used to update or clear cached pasid entry in vtd_as

>
>> + */
>> +static gboolean vtd_flush_pasid(gpointer key, gpointer value,
>> +                                gpointer user_data)
>> +{
>> +    VTDPASIDCacheInfo *pc_info = user_data;
>> +    VTDAddressSpace *vtd_as = value;
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +    VTDPASIDEntry pe;
>> +    uint16_t did;
>> +    uint32_t pasid;
>> +    int ret;
>> +
>> +    /* Replay only filled pasid entry cache for passthrough device */
>
>the comment of this helper already implies only continue if the
>pc_entry->cache_filled is true. Also, replay is a concept in the
>upper level helpers, no need to mention it here. I noticed replay
>in low level helpers in other patch as well, please drop them as well. :)

Will do.

>
>> +    if (!pc_entry->cache_filled) {
>> +        return false;
>> +    }
>> +    did = vtd_pe_get_did(&pc_entry->pasid_entry);
>> +
>> +    if (vtd_as_to_iommu_pasid_locked(vtd_as, &pasid)) {
>> +        goto remove;
>> +    }
>> +
>> +    switch (pc_info->type) {
>> +    case VTD_PASID_CACHE_PASIDSI:
>> +        if (pc_info->pasid != pasid) {
>> +            return false;
>> +        }
>> +        /* Fall through */
>> +    case VTD_PASID_CACHE_DOMSI:
>> +        if (pc_info->domain_id != did) {
>> +            return false;
>> +        }
>> +        /* Fall through */
>> +    case VTD_PASID_CACHE_GLOBAL_INV:
>> +        break;
>> +    default:
>> +        error_report("invalid pc_info->type");
>> +        abort();
>> +    }
>> +
>> +    /*
>> +     * pasid cache invalidation may indicate a present pasid
>> +     * entry to present pasid entry modification. To cover such
>> +     * case, vIOMMU emulator needs to fetch latest guest pasid
>> +     * entry and check cached pasid entry, then update pasid
>> +     * cache and send pasid bind/unbind to host properly.
>> +     */
>> +    ret = vtd_dev_get_pe_from_pasid(vtd_as, pasid, &pe);
>> +    if (ret) {
>> +        /*
>> +         * No valid pasid entry in guest memory. e.g. pasid entry
>> +         * was modified to be either all-zero or non-present. Either
>> +         * case means existing pasid cache should be removed.
>> +         */
>> +        goto remove;
>> +    }
>> +
>> +    vtd_fill_pe_in_cache(s, vtd_as, &pe);
>> +    return false;
>> +
>> +remove:
>> +    /*
>> +     * TODO: send pasid unbind to host for passthru devices
>> +     */
>> +    pc_entry->cache_filled = false;
>> +
>> +    /*
>> +     * Don't remove address space of PCI_NO_PASID which is created by PCI
>> +     * sub-system.
>> +     */
>
>I get why it cannot be removed. But I think the ce and pe field of this
>vtd_as might need to be updated given it is supposed to be removed if it
>is not PCI_NO_PASID.

With patch2 dropped, we always call vtd_dev_to_context_entry() to get ce,
vtd_as->ce only take effect for emulated device.

About pe, when we jump to remove label, that means pe.present == false,
We record that through pc_entry->cache_filled = false, so updating
unpresented pe field looks no need?

>
>> +    if (vtd_as->pasid == PCI_NO_PASID) {
>> +        return false;
>> +    }
>> +    return true;
>> +}
>> +
>> +/*
>> + * This function syncs the pasid bindings between guest and host.
>> + * It includes updating the pasid cache in vIOMMU and updating the
>> + * pasid bindings per guest's latest pasid entry presence.
>> + */
>> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>> +                                 VTDPASIDCacheInfo *pc_info)
>> +{
>> +    if (!s->flts || !s->root_scalable || !s->dmar_enabled) {
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * Regards to a pasid cache invalidation, e.g. a PSI.
>> +     * it could be either cases of below:
>> +     * a) a present pasid entry moved to non-present
>> +     * b) a present pasid entry to be a present entry
>> +     * c) a non-present pasid entry moved to present
>> +     *
>> +     * Different invalidation granularity may affect different device
>> +     * scope and pasid scope. But for each invalidation granularity,
>> +     * it needs to do two steps to sync host and guest pasid binding.
>> +     *
>> +     * Here is the handling of a PSI:
>> +     * 1) loop all the existing vtd_as instances to update them
>> +     *    according to the latest guest pasid entry in pasid table.
>> +     *    this will make sure affected existing vtd_as instances
>> +     *    cached the latest pasid entries. Also, during the loop, the
>> +     *    host should be notified if needed. e.g. pasid unbind or pasid
>> +     *    update. Should be able to cover case a) and case b).
>> +     *
>> +     * 2) loop all devices to cover case c)
>> +     *    - For devices which are backed by HostIOMMUDeviceIOMMUFD
>instances,
>> +     *      we loop them and check if guest pasid entry exists. If yes,
>> +     *      it is case c), we update the pasid cache and also notify
>> +     *      host.
>> +     *    - For devices which are not backed by HostIOMMUDeviceIOMMUFD,
>> +     *      it is not necessary to create pasid cache at this phase since
>> +     *      it could be created when vIOMMU does DMA address translation.
>> +     *      This is not yet implemented since there is no emulated
>> +     *      pasid-capable devices today. If we have such devices in
>> +     *      future, the pasid cache shall be created there.
>> +     * Other granularity follow the same steps, just with different scope
>> +     *
>> +     */
>> +
>> +    vtd_iommu_lock(s);
>> +    /*
>> +     * Step 1: loop all the existing vtd_as instances for pasid unbind and
>> +     * update.
>> +     */
>> +    g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid,
>> +                                pc_info);
>> +    vtd_iommu_unlock(s);
>
>just realized s->vtd_address_spaces is not protected by iommu_lock. If so,
>will it be ok to drop the lock here and add it in the lower level helpers
>if lock is needed?

Could you elaborate which lower level helper to add iommu_lock?
We want to protect update to vtd_address_spaces list with iommu_lock here.

Thanks
Zhenzhong

Reply via email to