On 10/24/25 10:43 AM, Zhenzhong Duan wrote:
> From: Yi Liu <[email protected]>
>
> This replays guest pasid bindings after context cache invalidation.
> Actually, programmer should issue pasid cache invalidation with proper
> granularity after issuing context cache invalidation.
>
> We see old linux such as 6.7.0-rc2 not following the spec, it sends
> pasid cache invalidation before context cache invalidation, then QEMU
> depends on context cache invalidation to get pasid entry and setup
> binding.
>
> Signed-off-by: Yi Liu <[email protected]>
> Signed-off-by: Zhenzhong Duan <[email protected]>
> ---
>  hw/i386/intel_iommu.c | 47 +++++++++++++++++++++++++++++++++++++++++++
>  hw/i386/trace-events  |  1 +
>  2 files changed, 48 insertions(+)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 1f78274204..edd1416382 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -93,6 +93,8 @@ static void vtd_address_space_refresh_all(IntelIOMMUState 
> *s);
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>  static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>  static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s);
> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> +                                        gpointer user_data);
>  
>  static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>  {
> @@ -2388,6 +2390,13 @@ static void 
> vtd_context_global_invalidate(IntelIOMMUState *s)
>       * VT-d emulation codes.
>       */
>      vtd_iommu_replay_all(s);
> +    /*
> +     * Same for pasid cache invalidation, per VT-d spec 6.5.2.1, a global
> +     * context cache invalidation should be followed by global PASID cache
> +     * invalidation. In order to work with guest not following spec,
> +     * handle global PASID cache invalidation here.
> +     */
> +    vtd_replay_pasid_bindings_all(s);
>  }
>  
>  #ifdef CONFIG_IOMMUFD
> @@ -2589,6 +2598,35 @@ vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>  }
>  #endif
>  
> +static void vtd_pasid_cache_devsi(VTDAddressSpace *vtd_as)
> +{
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    PCIBus *bus = vtd_as->bus;
> +    uint8_t devfn = vtd_as->devfn;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +        .pasid = vtd_as->pasid,
> +    };
> +    VTDPASIDCacheInfo pc_info;
> +
> +    if (!s->fsts || !s->root_scalable || !s->dmar_enabled) {
> +        return;
> +    }
> +
> +    trace_vtd_pasid_cache_devsi(pci_bus_num(bus),
> +                                VTD_PCI_SLOT(devfn), VTD_PCI_FUNC(devfn));
> +
> +    /* We fake to be global invalidation just to bypass all checks */
can you clarify which checks and why you want to bypass them?
> +    pc_info.type = VTD_INV_DESC_PASIDC_G_GLOBAL;
> +
> +    /*
> +     * We already get vtd_as of the device whose PASID cache is invalidated,
s/get/got. Not sure the comment is worth.
> +     * so just call vtd_pasid_cache_sync_locked() once.
> +     */
> +    vtd_pasid_cache_sync_locked(&key, vtd_as, &pc_info);
> +}
> +
>  /* Do a context-cache device-selective invalidation.
>   * @func_mask: FM field after shifting
>   */
> @@ -2647,6 +2685,15 @@ static void 
> vtd_context_device_invalidate(IntelIOMMUState *s,
>               * happened.
>               */
>              vtd_address_space_sync(vtd_as);
> +            /*
> +             * Per spec 6.5.2.1, context flush should be followed by PASID
> +             * cache and iotlb flush. In order to work with a guest which 
> does
> +             * not follow spec and missed PASID cache flush, e.g., linux
> +             * 6.7.0-rc2, we have vtd_pasid_cache_devsi() to invalidate PASID
> +             * cache of passthrough device. Host iommu driver would flush
> +             * piotlb when a pasid unbind is pass down to it.
passed
> +             */
> +            vtd_pasid_cache_devsi(vtd_as);
>          }
>      }
>  }
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 5a3ee1cf64..5fa5e93b68 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -28,6 +28,7 @@ vtd_pasid_cache_reset(void) ""
>  vtd_inv_desc_pasid_cache_gsi(void) ""
>  vtd_inv_desc_pasid_cache_dsi(uint16_t domain) "Domain selective PC 
> invalidation domain 0x%"PRIx16
>  vtd_inv_desc_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID 
> selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
> +vtd_pasid_cache_devsi(uint8_t bus, uint8_t dev, uint8_t fn) "Dev selective 
> PC invalidation dev: %02"PRIx8":%02"PRIx8".%02"PRIx8
>  vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
>  vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" 
> devfn %"PRIu8" not present"
>  vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t 
> domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" 
> domain 0x%"PRIx16
Besides

Reviewed-by: Eric Auger <[email protected]>

Eric


Reply via email to