Hi Eric,

>-----Original Message-----
>From: Eric Auger <eric.au...@redhat.com>
>Subject: Re: [PATCH v5 12/21] intel_iommu: Handle PASID entry addition
>
>Hi Zhenzhong,
>
>On 8/22/25 8:40 AM, Zhenzhong Duan wrote:
>> When guest creates new PASID entries, QEMU will capture the guest pasid
>> selective pasid cache invalidation, walk through each passthrough device
>> and each pasid, when a match is found, identify an existing vtd_as or
>> create a new one and update its corresponding cached pasid entry.
>
>You need to emphasize that the support is currently limited to
>Requests-without-PASID

OK.

>>
>> Signed-off-by: Yi Liu <yi.l....@intel.com>
>> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>> ---
>>  hw/i386/intel_iommu_internal.h |   2 +
>>  hw/i386/intel_iommu.c          | 176
>++++++++++++++++++++++++++++++++-
>>  2 files changed, 175 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index b9b76dd996..fb2a919e87 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -559,6 +559,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>  #define VTD_CTX_ENTRY_LEGACY_SIZE     16
>>  #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>>
>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0],
>9, 3)
>>  #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
>>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1
>0xffffffffffe00000ULL
>> @@ -589,6 +590,7 @@ typedef struct VTDPASIDCacheInfo {
>>  #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>  #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>>  #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault
>Processing Disable */
>> +#define VTD_PASID_TBL_ENTRY_NUM       (1ULL << 6)
>>
>>  /* PASID Granular Translation Type Mask */
>>  #define VTD_PASID_ENTRY_P              1ULL
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index a2ee6d684e..7d2c9feae7 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -826,6 +826,11 @@ static inline bool
>vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>>      }
>>  }
>>
>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry
>*ce)
>> +{
>> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>> +}
>> +
>>  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>>  {
>>      return pdire->val & 1;
>> @@ -1647,9 +1652,9 @@ static gboolean
>vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer value,
>>  }
>>
>>  /* Translate iommu pasid to vtd_as */
>> -static inline
>> -VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState
>*s,
>> -                                                uint16_t sid,
>uint32_t pasid)
>> +static VTDAddressSpace
>*vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
>> +
>uint16_t sid,
>> +
>uint32_t pasid)
>>  {
>>      struct vtd_as_raw_key key = {
>>          .sid = sid,
>> @@ -3220,10 +3225,172 @@ remove:
>>      return true;
>>  }
>>
>> +/*
>> + * This function walks over PASID range within [start, end) in a single
>> + * PASID table for entries matching @info type/did, then retrieve/create
>> + * vtd_as and fill associated pasid entry cache.
>> + */
>> +static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
>> +                                        dma_addr_t pt_base,
>> +                                        int start,
>> +                                        int end,
>> +                                        VTDPASIDCacheInfo
>*info)
>> +{
>> +    VTDPASIDEntry pe;
>> +    int pasid = start;
>> +
>> +    while (pasid < end) {
>> +        if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
>> +            && vtd_pe_present(&pe)) {
>> +            int bus_n = pci_bus_num(info->bus), devfn = info->devfn;
>> +            uint16_t sid = PCI_BUILD_BDF(bus_n, devfn);
>> +            VTDPASIDCacheEntry *pc_entry;
>> +            VTDAddressSpace *vtd_as;
>> +
>> +            vtd_iommu_lock(s);
>> +            /*
>> +             * When indexed by rid2pasid, vtd_as should have been
>created,
>> +             * e.g., by PCI subsystem. For other iommu pasid, we need
>to
>> +             * create vtd_as dynamically. Other iommu pasid is same
>value
>since you don't support somthing else than rid2pasid, I would drop that
>and simplify the code. See below.
>> +             * as PCI's pasid, so it's used as input of vtd_find_add_as().
>> +             */
>> +            vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid);
>> +            vtd_iommu_unlock(s);
>> +            if (!vtd_as) {
>> +                vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid);
>you could check the vtd_as already exists here per the rid2pasid support
>limitation

In this series, I do include some basic codes for non-rid2pasid because they 
share some common code with rid2pasid and we already have emulated rid2pasid 
support in vIOMMU for a long time, it's not bad to accumulate some supporting 
code for non-rid2pasid for passthrough device. But I can do the factor out if 
you insist to have only rid_pasid code.

>> +            }
>> +
>> +            if ((info->type == VTD_PASID_CACHE_DOMSI ||
>> +                 info->type == VTD_PASID_CACHE_PASIDSI) &&
>> +                (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
>> +                /*
>> +                 * VTD_PASID_CACHE_DOMSI and
>VTD_PASID_CACHE_PASIDSI
>> +                 * requires domain id check. If domain id check fail,
>fails
>> +                 * go to next pasid.
>> +                 */
>> +                pasid++;
>> +                continue;
>> +            }
>> +
>> +            pc_entry = &vtd_as->pasid_cache_entry;
>> +            /*
>> +             * pasid cache update and clear are handled in
>> +             * vtd_flush_pasid_locked(), only care new pasid entry
>here.
>> +             */
>> +            if (!pc_entry->valid) {
>> +                pc_entry->pasid_entry = pe;
>> +                pc_entry->valid = true;
>> +            }
>> +        }
>> +        pasid++;
>> +    }
>> +}
>> +
>> +/*
>> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
>> + * This function aims at looping over a range of PASIDs in the given
>> + * two level table to identify the pasid config in guest.
>> + */
>> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
>> +                                    dma_addr_t pdt_base,
>> +                                    int start, int end,
>> +                                    VTDPASIDCacheInfo *info)
>> +{
>> +    VTDPASIDDirEntry pdire;
>> +    int pasid = start;
>> +    int pasid_next;
>> +    dma_addr_t pt_base;
>> +
>> +    while (pasid < end) {
>> +        pasid_next =
>> +             (pasid + VTD_PASID_TBL_ENTRY_NUM) &
>~(VTD_PASID_TBL_ENTRY_NUM - 1);
>> +        pasid_next = pasid_next < end ? pasid_next : end;
>> +
>> +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
>> +            && vtd_pdire_present(&pdire)) {
>> +            pt_base = pdire.val &
>VTD_PASID_TABLE_BASE_ADDR_MASK;
>> +            vtd_sm_pasid_table_walk_one(s, pt_base, pasid,
>pasid_next, info);
>> +        }
>> +        pasid = pasid_next;
>> +    }
>> +}
>> +
>> +static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s,
>> +                                          int start, int end,
>> +                                          VTDPASIDCacheInfo
>*info)
>> +{
>> +    VTDContextEntry ce;
>> +
>> +    if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus),
>info->devfn,
>> +                                  &ce)) {
>> +        uint32_t max_pasid;
>> +
>> +        max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
>VTD_PASID_TBL_ENTRY_NUM;
>> +        if (end > max_pasid) {
>> +            end = max_pasid;
>> +        }
>> +        vtd_sm_pasid_table_walk(s,
>> +
>VTD_CE_GET_PASID_DIR_TABLE(&ce),
>> +                                start,
>> +                                end,
>> +                                info);
>> +    }
>> +}
>> +
>> +/*
>> + * This function replays the guest pasid bindings by walking the two level
>> + * guest PASID table. For each valid pasid entry, it finds or creates a
>> + * vtd_as and caches pasid entry in vtd_as.
>> + */
>> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
>> +                                            VTDPASIDCacheInfo
>*pc_info)
>> +{
>> +    /*
>> +     * Currently only Requests-without-PASID is supported, as vIOMMU
>doesn't
>> +     * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1).
>> +     */
>> +    int start = 0, end = 1;
>> +    VTDHostIOMMUDevice *vtd_hiod;
>> +    VTDPASIDCacheInfo walk_info;
>> +    GHashTableIter as_it;
>> +
>> +    switch (pc_info->type) {
>> +    case VTD_PASID_CACHE_PASIDSI:
>> +        start = pc_info->pasid;
>> +        end = pc_info->pasid + 1;
>if you never replay a range, you could simplify the code for now because
>some code paths are not properly tested

OK. Instead of assignment of start and end variable, maybe just an 
assert(!pc_info->pasid).

>> +       /* fall through */
>> +    case VTD_PASID_CACHE_DOMSI:
>Why can't we have other invalidation types along with request without
>PASID? It is not obvious to me at least why it couldn't be used by the
>guest. Would deserve a comment in the commit desc I think.

Other invalidation types are indeed used, just in pasid scope [0, 1), because 
[start, end) are already initialized to [0, 1), nothing more here, so just 
break.

Thanks
Zhenzhong

>> +    case VTD_PASID_CACHE_GLOBAL_INV:
>> +        /* loop all assigned devices */
>> +        break;
>> +    default:
>> +        error_setg(&error_fatal, "invalid pc_info->type for replay");
>> +    }
>> +
>> +    /*
>> +     * In this replay, one only needs to care about the devices which are
>> +     * backed by host IOMMU. Those devices have a corresponding
>vtd_hiod
>> +     * in s->vtd_host_iommu_dev. For devices not backed by host
>IOMMU, it
>> +     * is not necessary to replay the bindings since their cache could be
>> +     * re-created in the future DMA address translation.
>> +     *
>> +     * VTD translation callback never accesses vtd_hiod and its
>corresponding
>> +     * cached pasid entry, so no iommu lock needed here.
>> +     */
>> +    walk_info = *pc_info;
>> +    g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
>> +    while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
>> +        walk_info.bus = vtd_hiod->bus;
>> +        walk_info.devfn = vtd_hiod->devfn;
>> +        vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
>> +    }
>> +}
>> +
>>  /*
>>   * For a PASID cache invalidation, this function handles below scenarios:
>>   * a) a present cached pasid entry needs to be removed
>>   * b) a present cached pasid entry needs to be updated
>> + * c) a present cached pasid entry needs to be created
>>   */
>>  static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>VTDPASIDCacheInfo *pc_info)
>>  {
>> @@ -3239,6 +3406,9 @@ static void
>vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
>>      g_hash_table_foreach_remove(s->vtd_address_spaces,
>vtd_flush_pasid_locked,
>>                                  pc_info);
>>      vtd_iommu_unlock(s);
>> +
>> +    /* c): loop all passthrough device for new pasid entries */
>> +    vtd_replay_guest_pasid_bindings(s, pc_info);
>>  }
>>
>>  static bool vtd_process_pasid_desc(IntelIOMMUState *s,
>Thanks
>
>Eric

Reply via email to