>-----Original Message-----
>From: Liu, Yi L <[email protected]>
>Subject: Re: [PATCH v5 09/15] intel_iommu_accel: Handle PASID entry addition 
>for
>pc_inv_dsc request
>
>On 5/9/26 12:08, Zhenzhong Duan wrote:
>> Structure VTDAddressSpace includes some elements suitable for emulated
>> device and passthrough device without PASID, e.g., address space,
>> different memory regions, etc, it is also protected by vtd iommu lock,
>> all these are useless and become a burden for passthrough device with
>> PASID.
>>
>> When there are lots of PASIDs used in one device, the AS and MRs are
>> all registered to memory core and impact the whole system performance.
>>
>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>> of a passthrough device, we define a light weight structure
>> VTDAccelPASIDCacheEntry with only necessary elements for each pasid. We
>> will use this struct as a parameter to conduct binding/unbinding to
>> nested hwpt and to record the current bound nested hwpt. It's also
>> designed to support IOMMU_NO_PASID.
>>
>> VTDAccelPASIDCacheEntry is designed to only be used in intel_iommu_accel.c,
>> similarly VTDPASIDCacheEntry should only be used in hw/i386/intel_iommu.c
>>
>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>> (pasid cache invalidation) request, walk through each pasid in each
>> passthrough device for valid pasid entries, create a new
>> VTDAccelPASIDCacheEntry if not existing yet.
>>
>> IOMMU_NO_PASID of passthrough device still need to register MRs in case
>> guest does not operate in scalable mode. So for IOMMU_NO_PASID, we have
>> both VTDAPASIDCacheEntry and VTDAccelPASIDCacheEntry.
>
>The implementation LGTM. But I got a question to ask here.
>VTDAPASIDCacheEntry is cached in VTDAddressSpace, while
>VTDAccelPASIDCacheEntry is cached in VTDHostIOMMUDevice. A natural
>question is why usingVTDHostIOMMUDevice instead of VTDAddressSpace. I
>think it might be valuable to mark the reason of this choice. This
>would help maintaining it in future as we might forgot the reason. :)

Hmm, you mean why not putting VTDAccelPASIDCacheEntry list in VTDAddressSpace?
There is explanation above, paste here:

  VTDAccelPASIDCacheEntry is designed to only be used in intel_iommu_accel.c,
  similarly VTDPASIDCacheEntry should only be used in hw/i386/intel_iommu.c

Thanks
Zhenzhong

>
>> Co-developed-by: Yi Liu <[email protected]>
>> Signed-off-by: Yi Liu <[email protected]>
>> Signed-off-by: Zhenzhong Duan <[email protected]>
>> Tested-by: Xudong Hao <[email protected]>
>> ---
>>   hw/i386/intel_iommu_accel.h    |  13 +++
>>   hw/i386/intel_iommu_internal.h |   8 ++
>>   hw/i386/intel_iommu.c          |   3 +
>>   hw/i386/intel_iommu_accel.c    | 156 +++++++++++++++++++++++++++++++++
>>   4 files changed, 180 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> index e5f0b077b4..c9b1823745 100644
>> --- a/hw/i386/intel_iommu_accel.h
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -12,6 +12,13 @@
>>   #define HW_I386_INTEL_IOMMU_ACCEL_H
>>   #include CONFIG_DEVICES
>>
>> +typedef struct VTDAccelPASIDCacheEntry {
>> +    VTDHostIOMMUDevice *vtd_hiod;
>> +    VTDPASIDEntry pasid_entry;
>> +    uint32_t pasid;
>> +    QLIST_ENTRY(VTDAccelPASIDCacheEntry) next;
>> +} VTDAccelPASIDCacheEntry;
>> +
>>   #ifdef CONFIG_VTD_ACCEL
>>   bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hiod,
>>                             Error **errp);
>> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace
>*vtd_as, Error **errp);
>>   void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>domain_id,
>>                                         uint32_t pasid, hwaddr addr,
>>                                         uint64_t npages, bool ih);
>> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo
>*pc_info);
>>   void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>>   #else
>>   static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>> @@ -49,6 +57,11 @@ static inline void
>vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>>   {
>>   }
>>
>> +static inline void vtd_accel_pasid_cache_sync(IntelIOMMUState *s,
>> +                                              VTDPASIDCacheInfo *pc_info)
>> +{
>> +}
>> +
>>   static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>>   {
>>   }
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 0141316f83..623dc24760 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -615,6 +615,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_CTX_ENTRY_LEGACY_SIZE     16
>>   #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>>
>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>>   #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
>> @@ -645,6 +646,7 @@ typedef struct VTDPIOTLBInvInfo {
>>   #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
>>   #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) &
>VTD_PASID_DIR_BITS_MASK)
>>   #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing 
>> Disable */
>> +#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)
>>   #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>   #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>>   #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing 
>> Disable
>*/
>> @@ -710,6 +712,7 @@ typedef struct VTDHostIOMMUDevice {
>>       PCIBus *bus;
>>       uint8_t devfn;
>>       HostIOMMUDevice *hiod;
>> +    QLIST_HEAD(, VTDAccelPASIDCacheEntry) pasid_cache_list;
>>   } VTDHostIOMMUDevice;
>>
>>   /*
>> @@ -767,6 +770,11 @@ static inline int
>vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>>       return memcmp(p1, p2, sizeof(*p1));
>>   }
>>
>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>> +{
>> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>> +}
>> +
>>   int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t 
>> pasid,
>>                                     VTDPASIDDirEntry *pdire);
>>   int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index b50c556c40..e1e32959d3 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -3181,6 +3181,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState
>*s, VTDPASIDCacheInfo *pc_info)
>>       g_hash_table_foreach(s->vtd_address_spaces,
>vtd_pasid_cache_sync_locked,
>>                            pc_info);
>>       vtd_iommu_unlock(s);
>> +
>> +    vtd_accel_pasid_cache_sync(s, pc_info);
>>   }
>>
>>   static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
>> @@ -4751,6 +4753,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus,
>void *opaque, int devfn,
>>       vtd_hiod->devfn = (uint8_t)devfn;
>>       vtd_hiod->iommu_state = s;
>>       vtd_hiod->hiod = hiod;
>> +    QLIST_INIT(&vtd_hiod->pasid_cache_list);
>>
>>       if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>>           g_free(vtd_hiod);
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index 10bdbba632..a66d63b4c8 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -259,6 +259,162 @@ void
>vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>>                            vtd_flush_host_piotlb_locked, &piotlb_info);
>>   }
>>
>> +static void vtd_accel_fill_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>> +                              VTDPASIDEntry *pe)
>> +{
>> +    VTDAccelPASIDCacheEntry *vtd_pce;
>> +
>> +    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
>> +        if (vtd_pce->pasid == pasid) {
>> +            if (vtd_pasid_entry_compare(pe, &vtd_pce->pasid_entry)) {
>> +                vtd_pce->pasid_entry = *pe;
>> +            }
>> +            return;
>> +        }
>> +    }
>> +
>> +    vtd_pce = g_malloc0(sizeof(VTDAccelPASIDCacheEntry));
>> +    vtd_pce->vtd_hiod = vtd_hiod;
>> +    vtd_pce->pasid = pasid;
>> +    vtd_pce->pasid_entry = *pe;
>> +    QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
>> +}
>> +
>> +/*
>> + * This function walks over PASID range within [start, end) in a single
>> + * PASID table for entries matching @info type/did, then create
>> + * VTDAccelPASIDCacheEntry if not exist yet.
>> + */
>> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
>> +                                        dma_addr_t pt_base, int start, int 
>> end,
>> +                                        VTDPASIDCacheInfo *info)
>> +{
>> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
>> +    VTDPASIDEntry pe;
>> +    int pasid;
>> +
>> +    for (pasid = start; pasid < end; pasid++) {
>> +        if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
>> +            !vtd_pe_present(&pe)) {
>> +            continue;
>> +        }
>> +
>> +        if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
>> +             info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
>> +            (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
>> +            /*
>> +             * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
>> +             * requires domain id check. If domain id check fail,
>> +             * go to next pasid.
>> +             */
>> +            continue;
>> +        }
>> +
>> +        vtd_accel_fill_pc(vtd_hiod, pasid, &pe);
>> +    }
>> +}
>> +
>> +/*
>> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
>> + * This function aims at looping over a range of PASIDs in the given
>> + * two level table to identify the pasid config in guest.
>> + */
>> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
>> +                                    dma_addr_t pdt_base, int start, int end,
>> +                                    VTDPASIDCacheInfo *info)
>> +{
>> +    VTDPASIDDirEntry pdire;
>> +    int pasid = start;
>> +    int pasid_next;
>> +    dma_addr_t pt_base;
>> +
>> +    while (pasid < end) {
>> +        pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
>> +                     ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
>> +        pasid_next = pasid_next < end ? pasid_next : end;
>> +
>> +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
>> +            && vtd_pdire_present(&pdire)) {
>> +            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
>> +            vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, 
>> pasid_next,
>> +                                        info);
>> +        }
>> +        pasid = pasid_next;
>> +    }
>> +}
>> +
>> +static void vtd_accel_replay_pasid_bind_for_dev(VTDHostIOMMUDevice
>*vtd_hiod,
>> +                                                int start, int end,
>> +                                                VTDPASIDCacheInfo *pc_info)
>> +{
>> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
>> +    VTDContextEntry ce;
>> +    int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
>> +
>> +    if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
>> +                                  vtd_hiod->devfn, &ce)) {
>> +        VTDPASIDCacheInfo walk_info = *pc_info;
>> +        uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
>> +                                VTD_PASID_TABLE_ENTRY_NUM;
>> +
>> +        end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
>> +
>> +        vtd_sm_pasid_table_walk(vtd_hiod,
>VTD_CE_GET_PASID_DIR_TABLE(&ce),
>> +                                start, end, &walk_info);
>> +    }
>> +}
>> +
>> +/*
>> + * This function replays the guest pasid bindings by walking the two level
>> + * guest PASID table. For each valid pasid entry, it creates an entry
>> + * VTDAccelPASIDCacheEntry dynamically if not exist yet. This entry holds
>> + * info specific to a pasid
>> + */
>> +void vtd_accel_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo
>*pc_info)
>> +{
>> +    int start = IOMMU_NO_PASID, end = 1 << s->pasid;
>> +    VTDHostIOMMUDevice *vtd_hiod;
>> +    GHashTableIter hiod_it;
>> +
>> +    if (!s->fsts) {
>> +        return;
>> +    }
>> +
>> +    switch (pc_info->type) {
>> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
>> +        start = pc_info->pasid;
>> +        end = pc_info->pasid + 1;
>> +        /* fall through */
>> +    case VTD_INV_DESC_PASIDC_G_DSI:
>> +        /*
>> +         * loop all assigned devices, do domain id check in
>> +         * vtd_sm_pasid_table_walk_one() after get pasid entry.
>> +         */
>> +        break;
>> +    case VTD_INV_DESC_PASIDC_G_GLOBAL:
>> +        /* loop all assigned devices */
>> +        break;
>> +    default:
>> +        g_assert_not_reached();
>> +    }
>> +
>> +    /*
>> +     * Loop all the vtd_hiod instances to sync the "pasid cache" per the
>> +     * guest pasid configuration.
>> +     *
>> +     * VTD translation callback never accesses vtd_hiod and its 
>> corresponding
>> +     * cached pasid entry, so no iommu lock needed here.
>> +     */
>> +    g_hash_table_iter_init(&hiod_it, s->vtd_host_iommu_dev);
>> +    while (g_hash_table_iter_next(&hiod_it, NULL, (void **)&vtd_hiod)) {
>> +        if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
>> +                                 TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>> +            continue;
>> +        }
>> +        vtd_accel_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
>> +    }
>> +}
>> +
>>   static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
>>                                             void *caps, uint32_t size)
>>   {

Reply via email to