>-----Original Message-----
>From: Liu, Yi L <[email protected]>
>Subject: Re: [PATCH v5 08/15] intel_iommu: Refactor PASID processing to use
>IOMMU_NO_PASID internally
>
>On 5/9/26 12:08, Zhenzhong Duan wrote:
>> The PCI subsystem uses PCI_NO_PASID for requests-without-PASID, but VT-d
>> uses IOMMU_NO_PASID internally. This leads to conversion and checking code
>
>s/VT-d uses IOMMU_NO_PASID internally/VT-d emulation uses
>IOMMU_NO_PASID
>internally (ecap.RPS==0)/

Will do.

>
>> between PCI_NO_PASID and IOMMU_NO_PASID throughout the
>implementation.
>>
>> Refactor to use IOMMU PASID consistently within Intel IOMMU by storing
>> IOMMU PASID value in vtd_as->pasid. After this change, PCI_NO_PASID is
>> only used at three boundary points:
>
>a typo or the third boundary is missed?

Section 2 contains two points:

1. Convert when notifying UNMAP events via memory_region_notify_iommu()
2. Convert when returning IOMMUTLBEntry in vtd_iommu_translate()

>
>>
>> 1. PCI_NO_PASID -> IOMMU_NO_PASID: Convert PCI PASID to IOMMU PASID in
>>     vtd_find_add_as() and cache in vtd_as->pasid.
>> 2. IOMMU_NO_PASID -> PCI_NO_PASID: Convert when notifying UNMAP events
>>     via memory_region_notify_iommu() and returning IOMMUTLBEntry in
>>     vtd_iommu_translate().
>>
>> This eliminates conversion/checks in PASID table lookups, simplifies
>> invalidation logic with consistent PASID values, and improves code
>> readability. The PCI subsystem interface remains unchanged to maintain
>> compatibility with other IOMMU implementations that may not use PASID 0
>> for requests-without-PASID.
>>
>> Suggested-by: Clement Mathieu--Drif <[email protected]>
>> Signed-off-by: Zhenzhong Duan <[email protected]>
>> ---
>>   include/system/memory.h     |   2 +-
>>   hw/i386/intel_iommu.c       | 164 +++++++++++++++++-------------------
>>   hw/i386/intel_iommu_accel.c |   2 +-
>>   3 files changed, 80 insertions(+), 88 deletions(-)
>>
>> diff --git a/include/system/memory.h b/include/system/memory.h
>> index 1417132f6d..1edb38b07d 100644
>> --- a/include/system/memory.h
>> +++ b/include/system/memory.h
>> @@ -150,7 +150,7 @@ struct IOMMUTLBEntry {
>>       hwaddr           translated_addr;
>>       hwaddr           addr_mask;  /* 0xfff = 4k translation */
>>       IOMMUAccessFlags perm;
>> -    uint32_t         pasid;
>> +    uint32_t         pasid; /* PCI pasid */
>>   };
>>
>>   /*
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 5e5dcdc274..b50c556c40 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -938,12 +938,8 @@ static int
>vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
>>   int vtd_ce_get_pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce,
>>                              VTDPASIDEntry *pe, uint32_t pasid)
>>   {
>> -    dma_addr_t pasid_dir_base;
>> +    dma_addr_t pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
>>
>> -    if (pasid == PCI_NO_PASID) {
>> -        pasid = IOMMU_NO_PASID;
>> -    }
>> -    pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
>>       return vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
>>   }
>>
>> @@ -953,15 +949,10 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
>>                                   uint32_t pasid)
>>   {
>>       int ret;
>> -    dma_addr_t pasid_dir_base;
>> +    dma_addr_t pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
>>       VTDPASIDDirEntry pdire;
>>       VTDPASIDEntry pe;
>>
>> -    if (pasid == PCI_NO_PASID) {
>> -        pasid = IOMMU_NO_PASID;
>> -    }
>> -    pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
>> -
>>       /*
>>        * No present bit check since fpd is meaningful even
>>        * if the present bit is clear.
>> @@ -1750,7 +1741,7 @@ static bool
>vtd_switch_address_space(VTDAddressSpace *as)
>>            *
>>            * Need to disable ir for as with PASID.
>>            */
>> -        if (as->pasid != PCI_NO_PASID) {
>> +        if (as->pasid != IOMMU_NO_PASID) {
>>               memory_region_set_enabled(&as->iommu_ir, false);
>>           } else {
>>               memory_region_set_enabled(&as->iommu_ir, true);
>> @@ -1780,7 +1771,7 @@ static bool
>vtd_switch_address_space(VTDAddressSpace *as)
>>        * We enable per as memory region (iommu_ir_fault) for catching
>>        * the translation for interrupt range through PASID + PT.
>>        */
>> -    if (pt && as->pasid != PCI_NO_PASID) {
>> +    if (pt && as->pasid != IOMMU_NO_PASID) {
>>           memory_region_set_enabled(&as->iommu_ir_fault, true);
>>       } else {
>>           memory_region_set_enabled(&as->iommu_ir_fault, false);
>> @@ -1892,7 +1883,7 @@ static VTDAddressSpace
>*vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s,
>>
>>   VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
>>   {
>> -    return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID);
>> +    return vtd_get_as_by_sid_and_pasid(s, sid, IOMMU_NO_PASID);
>>   }
>>
>>   static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
>> @@ -2121,10 +2112,6 @@ static bool
>vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>>
>>       vtd_iommu_lock(s);
>>
>> -    if (pasid == PCI_NO_PASID && s->root_scalable) {
>> -        pasid = IOMMU_NO_PASID;
>> -    }
>> -
>>       /* Try to fetch pte from IOTLB */
>>       iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
>>       if (iotlb_entry) {
>> @@ -2235,7 +2222,7 @@ static bool
>vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>>       if (ret_fr) {
>>           if (!vtd_is_recoverable_fault(-ret_fr, iommu_idx)) {
>>               vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
>> -                            addr, is_write, pasid != PCI_NO_PASID, pasid);
>> +                            addr, is_write, s->root_scalable, pasid);
>
>a typo here? s->root_scalable should be "pasid != IOMMU_NO_PASID"?

This is intentional.
pasid = vtd_as->pasid = iommu_pasid, so it never equals PCI_NO_PASID now.

Thanks
Zhenzhong

>
>Other part LGTM.
>
>Regards,
>Yi Liu
>
>>           }
>>           goto error;
>>       }
>> @@ -2489,7 +2476,7 @@ static void
>vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
>>   /*
>>    * There is no pasid field in iotlb invalidation descriptor, so 
>> IOMMU_NO_PASID
>>    * is passed as parameter. Piotlb invalidation supports pasid, pasid in its
>> - * descriptor is passed which should not be PCI_NO_PASID.
>> + * descriptor is passed.
>>    */
>>   static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
>>                                                uint16_t domain_id, hwaddr 
>> addr,
>> @@ -2503,48 +2490,41 @@ static void
>vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
>>       QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
>>           ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>>                                          vtd_as->devfn, &ce);
>> -        if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
>> +        if (ret || vtd_as->pasid != pasid ||
>> +            domain_id != vtd_get_domain_id(s, &ce, pasid)) {
>> +            continue;
>> +        }
>> +
>> +        if (vtd_as_has_map_notifier(vtd_as)) {
>>               /*
>> -             * In legacy mode, vtd_as->pasid == pasid is always true.
>> -             * In scalable mode, for vtd address space backing a PCI
>> -             * device without pasid, needs to compare pasid with
>> -             * IOMMU_NO_PASID of this device.
>> +             * When first stage translation is off, as long as we have MAP
>> +             * notifications registered in any of our IOMMU notifiers,
>> +             * we need to sync the shadow page table. Otherwise VFIO
>> +             * device attaches to nested page table instead of shadow
>> +             * page table, so no need to sync.
>>                */
>> -            if (!(vtd_as->pasid == pasid ||
>> -                  (vtd_as->pasid == PCI_NO_PASID && pasid == 
>> IOMMU_NO_PASID))) {
>> -                continue;
>> -            }
>> -
>> -            if (vtd_as_has_map_notifier(vtd_as)) {
>> -                /*
>> -                 * When first stage translation is off, as long as we have 
>> MAP
>> -                 * notifications registered in any of our IOMMU notifiers,
>> -                 * we need to sync the shadow page table. Otherwise VFIO
>> -                 * device attaches to nested page table instead of shadow
>> -                 * page table, so no need to sync.
>> -                 */
>> -                if (!s->fsts || !s->root_scalable) {
>> -                    vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, 
>> size);
>> -                }
>> -            } else {
>> -                /*
>> -                 * For UNMAP-only notifiers, we don't need to walk the
>> -                 * page tables.  We just deliver the PSI down to
>> -                 * invalidate caches.
>> -                 */
>> -                const IOMMUTLBEvent event = {
>> -                    .type = IOMMU_NOTIFIER_UNMAP,
>> -                    .entry = {
>> -                        .target_as = &address_space_memory,
>> -                        .iova = addr,
>> -                        .translated_addr = 0,
>> -                        .addr_mask = size - 1,
>> -                        .perm = IOMMU_NONE,
>> -                        .pasid = vtd_as->pasid,
>> -                    },
>> -                };
>> -                memory_region_notify_iommu(&vtd_as->iommu, 0, event);
>> +            if (!s->fsts || !s->root_scalable) {
>> +                vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
>>               }
>> +        } else {
>> +            /*
>> +             * For UNMAP-only notifiers, we don't need to walk the
>> +             * page tables.  We just deliver the PSI down to
>> +             * invalidate caches.
>> +             */
>> +            const IOMMUTLBEvent event = {
>> +                .type = IOMMU_NOTIFIER_UNMAP,
>> +                .entry = {
>> +                    .target_as = &address_space_memory,
>> +                    .iova = addr,
>> +                    .translated_addr = 0,
>> +                    .addr_mask = size - 1,
>> +                    .perm = IOMMU_NONE,
>> +                     /* Other sub-systems use PCI pasid */
>> +                    .pasid = pasid == IOMMU_NO_PASID ? PCI_NO_PASID : pasid,
>> +                },
>> +            };
>> +            memory_region_notify_iommu(&vtd_as->iommu, 0, event);
>>           }
>>       }
>>   }
>> @@ -3007,6 +2987,7 @@ static void
>vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>>       VTDIOTLBPageInvInfo info;
>>       VTDAddressSpace *vtd_as;
>>       VTDContextEntry ce;
>> +    int ret;
>>
>>       info.domain_id = domain_id;
>>       info.pasid = pasid;
>> @@ -3019,17 +3000,15 @@ static void
>vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
>>       vtd_iommu_unlock(s);
>>
>>       QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
>> -        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>> -                                      vtd_as->devfn, &ce) &&
>> -            domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
>> -            if ((vtd_as->pasid != PCI_NO_PASID || pasid != IOMMU_NO_PASID) 
>> &&
>> -                vtd_as->pasid != pasid) {
>> -                continue;
>> -            }
>> +        ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>> +                                       vtd_as->devfn, &ce);
>> +        if (ret || vtd_as->pasid != pasid ||
>> +            domain_id != vtd_get_domain_id(s, &ce, pasid)) {
>> +            continue;
>> +        }
>>
>> -            if (!s->fsts || !vtd_as_has_map_notifier(vtd_as)) {
>> -                vtd_address_space_sync(vtd_as);
>> -            }
>> +        if (!s->fsts || !vtd_as_has_map_notifier(vtd_as)) {
>> +            vtd_address_space_sync(vtd_as);
>>           }
>>       }
>>   }
>> @@ -3239,7 +3218,7 @@ static bool vtd_process_pasid_desc(IntelIOMMUState
>*s,
>>           /* PASID selective implies a DID selective */
>>           trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
>>           pc_info.did = did;
>> -        pc_info.pasid = pasid ?: PCI_NO_PASID;
>> +        pc_info.pasid = pasid;
>>           break;
>>
>>       case VTD_INV_DESC_PASIDC_G_GLOBAL:
>> @@ -3291,6 +3270,7 @@ static void
>do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
>>        * ...
>>        */
>>
>> +    uint32_t pasid = vtd_dev_as->pasid;
>>       IOMMUTLBEvent event;
>>       uint64_t sz;
>>
>> @@ -3307,7 +3287,8 @@ static void
>do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
>>       event.entry.iova = addr;
>>       event.entry.perm = IOMMU_NONE;
>>       event.entry.translated_addr = 0;
>> -    event.entry.pasid = vtd_dev_as->pasid;
>> +    /* Other sub-systems use PCI pasid */
>> +    event.entry.pasid = pasid == IOMMU_NO_PASID ? PCI_NO_PASID : pasid;
>>       memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
>>   }
>>
>> @@ -3335,7 +3316,7 @@ static bool
>vtd_process_device_piotlb_desc(IntelIOMMUState *s,
>>       sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo);
>>       if (global) {
>>           QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) {
>> -            if ((vtd_dev_as->pasid != PCI_NO_PASID) &&
>> +            if ((vtd_dev_as->pasid != IOMMU_NO_PASID) &&
>>                   (PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus),
>>                                              vtd_dev_as->devfn) == sid)) {
>>                   do_invalidate_device_tlb(vtd_dev_as, size, addr);
>> @@ -3983,13 +3964,12 @@ static void vtd_mem_write(void *opaque, hwaddr
>addr,
>>   }
>>
>>   static void vtd_prepare_identity_entry(hwaddr addr, IOMMUAccessFlags perm,
>> -                                         uint32_t pasid, IOMMUTLBEntry 
>> *iotlb)
>> +                                       IOMMUTLBEntry *iotlb)
>>   {
>>       iotlb->iova = addr & VTD_PAGE_MASK_4K;
>>       iotlb->translated_addr = addr & VTD_PAGE_MASK_4K;
>>       iotlb->addr_mask = ~VTD_PAGE_MASK_4K;
>>       iotlb->perm = perm;
>> -    iotlb->pasid = pasid;
>>   }
>>
>>   static inline void vtd_prepare_error_entry(IOMMUTLBEntry *entry)
>> @@ -4001,6 +3981,10 @@ static inline void
>vtd_prepare_error_entry(IOMMUTLBEntry *entry)
>>       entry->pasid = PCI_NO_PASID;
>>   }
>>
>> +/*
>> + * This function returns translation result to other sub-system such as PCI,
>> + * so iommu pasid is converted to PCI pasid and returned in IOMMUTLBEntry.
>> + */
>>   static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu,
>hwaddr addr,
>>                                            IOMMUAccessFlags flag, int 
>> iommu_idx)
>>   {
>> @@ -4009,7 +3993,7 @@ static IOMMUTLBEntry
>vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
>>       IOMMUTLBEntry iotlb = {
>>           /* We'll fill in the rest later. */
>>           .target_as = &address_space_memory,
>> -        .pasid = vtd_as->pasid,
>> +        .pasid = vtd_as->pasid == IOMMU_NO_PASID ? PCI_NO_PASID : vtd_as-
>>pasid,
>>       };
>>       bool success;
>>       bool is_write = flag & IOMMU_WO;
>> @@ -4017,9 +4001,8 @@ static IOMMUTLBEntry
>vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
>>       if (likely(s->dmar_enabled)) {
>>           /* Only support translated requests in scalable mode */
>>           if (iommu_idx == VTD_IDX_TRANSLATED && s->root_scalable) {
>> -            if (vtd_as->pasid == PCI_NO_PASID) {
>> -                vtd_prepare_identity_entry(addr, IOMMU_RW, PCI_NO_PASID,
>> -                                           &iotlb);
>> +            if (vtd_as->pasid == IOMMU_NO_PASID) {
>> +                vtd_prepare_identity_entry(addr, IOMMU_RW, &iotlb);
>>                   success = true;
>>               } else {
>>                   vtd_prepare_error_entry(&iotlb);
>> @@ -4034,7 +4017,7 @@ static IOMMUTLBEntry
>vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
>>           }
>>       } else {
>>           /* DMAR disabled, passthrough, use 4k-page*/
>> -        vtd_prepare_identity_entry(addr, IOMMU_RW, vtd_as->pasid, &iotlb);
>> +        vtd_prepare_identity_entry(addr, IOMMU_RW, &iotlb);
>>           success = true;
>>       }
>>
>> @@ -4460,7 +4443,7 @@ static void
>vtd_report_sid_ir_illegal_access(IntelIOMMUState *s, uint16_t sid,
>>       }
>>
>>       vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR, is_fpd_set, sid, addr,
>> -                     is_write, pasid != PCI_NO_PASID, pasid);
>> +                     is_write, pasid != IOMMU_NO_PASID, pasid);
>>   }
>>
>>   static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
>> @@ -4488,7 +4471,6 @@ static MemTxResult vtd_mem_ir_write(void *opaque,
>hwaddr addr,
>>       int ret = 0;
>>       MSIMessage from = {}, to = {};
>>       uint16_t sid = X86_IOMMU_SID_INVALID;
>> -    uint32_t pasid;
>>
>>       from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
>>       from.data = (uint32_t) value;
>> @@ -4496,11 +4478,11 @@ static MemTxResult vtd_mem_ir_write(void
>*opaque, hwaddr addr,
>>       if (!attrs.unspecified) {
>>           /* We have explicit Source ID */
>>           sid = attrs.requester_id;
>> -        pasid = attrs.pid != 0 ? attrs.pid : PCI_NO_PASID;
>>
>>           if (attrs.address_type == PCI_AT_TRANSLATED &&
>>               sid != X86_IOMMU_SID_INVALID) {
>> -            vtd_report_sid_ir_illegal_access(s, sid, pasid, from.address, 
>> true);
>> +            vtd_report_sid_ir_illegal_access(s, sid, attrs.pid, 
>> from.address,
>> +                                             true);
>>               return MEMTX_ERROR;
>>           }
>>       }
>> @@ -4562,9 +4544,19 @@ static const MemoryRegionOps
>vtd_mem_ir_fault_ops = {
>>       },
>>   };
>>
>> +/*
>> + * This function is called by many PCIIOMMUOps callbacks to get
>> + * VTDAddressSpace or create one if non-exist. Those callbacks are
>> + * used by PCI sub-system and are passed in a PCI pasid value.
>> + *
>> + * VTD honors iommu pasid, so the first thing is to convert PCI
>> + * pasid to iommu pasid.
>> + */
>>   VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>                                    int devfn, unsigned int pasid)
>>   {
>> +    pasid = pasid == PCI_NO_PASID ? IOMMU_NO_PASID : pasid;
>> +
>>       /*
>>        * We can't simply use sid here since the bus number might not be
>>        * initialized by the guest.
>> @@ -4606,7 +4598,7 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>           new_key->devfn = devfn;
>>           new_key->pasid = pasid;
>>
>> -        if (pasid == PCI_NO_PASID) {
>> +        if (pasid == IOMMU_NO_PASID) {
>>               snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
>>                        PCI_FUNC(devfn));
>>           } else {
>> @@ -5290,7 +5282,7 @@ error_get_fpd_and_report:
>>       vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
>>   error_report:
>>       vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write,
>> -                     vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid);
>> +                     vtd_as->pasid != IOMMU_NO_PASID, vtd_as->pasid);
>>       return false;
>>   }
>>
>> @@ -5381,7 +5373,7 @@ static int vtd_pri_request_page(PCIBus *bus, void
>*opaque, int devfn,
>>        */
>>
>>       /* We do not support PRI without PASID */
>> -    if (vtd_as->pasid == PCI_NO_PASID) {
>> +    if (vtd_as->pasid == IOMMU_NO_PASID) {
>>           return -EPERM;
>>       }
>>       if (exec_req && !is_read) {
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index 8940d240a1..10bdbba632 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -207,7 +207,7 @@ static void vtd_flush_host_piotlb_locked(gpointer key,
>gpointer value,
>>           return;
>>       }
>>
>> -    assert(vtd_as->pasid == PCI_NO_PASID);
>> +    assert(vtd_as->pasid == IOMMU_NO_PASID);
>>
>>       /* Nothing to do if there is no first stage HWPT attached */
>>       if (!pc_entry->valid ||

Reply via email to