Hi Shameer,
On 10/31/25 11:50 AM, Shameer Kolothum wrote:
> From: Yi Liu <[email protected]>
>
> If user wants to expose PASID capability in vIOMMU, then VFIO would also
need to report?
> report the PASID cap for this device if the underlying hardware supports
> it as well.
>
> As a start, this chooses to put the vPASID cap in the last 8 bytes of the
> vconfig space. This is a choice in the good hope of no conflict with any
> existing cap or hidden registers. For the devices that has hidden registers,
> user should figure out a proper offset for the vPASID cap. This may require
> an option for user to config it. Here we leave it as a future extension.
> There are more discussions on the mechanism of finding the proper offset.
>
> https://lore.kernel.org/kvm/bn9pr11mb5276318969a212ad0649c7be8c...@bn9pr11mb5276.namprd11.prod.outlook.com/
>
> Since we add a check to ensure the vIOMMU supports PASID, only devices
> under those vIOMMUs can synthesize the vPASID capability. This gives
> users control over which devices expose vPASID.
>
> Signed-off-by: Yi Liu <[email protected]>
> Tested-by: Zhangfei Gao <[email protected]>
> Signed-off-by: Shameer Kolothum <[email protected]>
> ---
>  hw/vfio/pci.c      | 37 +++++++++++++++++++++++++++++++++++++
>  include/hw/iommu.h |  1 +
>  2 files changed, 38 insertions(+)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 06b06afc2b..2054eac897 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -24,6 +24,7 @@
>  #include <sys/ioctl.h>
>  
>  #include "hw/hw.h"
> +#include "hw/iommu.h"
>  #include "hw/pci/msi.h"
>  #include "hw/pci/msix.h"
>  #include "hw/pci/pci_bridge.h"
> @@ -2500,7 +2501,12 @@ static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, 
> uint16_t pos)
>  
>  static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
>  {
> +    HostIOMMUDevice *hiod = vdev->vbasedev.hiod;
> +    HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>      PCIDevice *pdev = PCI_DEVICE(vdev);
> +    uint64_t max_pasid_log2 = 0;
> +    bool pasid_cap_added = false;
> +    uint64_t hw_caps;
>      uint32_t header;
>      uint16_t cap_id, next, size;
>      uint8_t cap_ver;
> @@ -2578,12 +2584,43 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
>                  pcie_add_capability(pdev, cap_id, cap_ver, next, size);
>              }
>              break;
> +        case PCI_EXT_CAP_ID_PASID:
> +             pasid_cap_added = true;
> +             /* fallthrough */
>          default:
>              pcie_add_capability(pdev, cap_id, cap_ver, next, size);
>          }
>  
>      }
>  
> +#ifdef CONFIG_IOMMUFD
> +    /*
> +     * Although we check for PCI_EXT_CAP_ID_PASID above, the Linux VFIO
> +     * framework currently hides this capability. Try to retrieve it
> +     * through alternative kernel interfaces (e.g. IOMMUFD APIs).
I don't catch this sentence . When are you supposed to read above
PCI_EXT_CAP_ID_PASID cap id then?
> +     */
> +    if (!pasid_cap_added && hiodc->get_cap) {
> +        hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_GENERIC_HW, &hw_caps, 
> NULL);
> +        hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_MAX_PASID_LOG2,
> +                       &max_pasid_log2, NULL);
> +    }
> +
> +    /*
> +     * If supported, adds the PASID capability in the end of the PCIe config
> +     * space. TODO: Add option for enabling pasid at a safe offset.
> +     */
> +    if (max_pasid_log2 && (pci_device_get_viommu_flags(pdev) &
> +                           VIOMMU_FLAG_PASID_SUPPORTED)) {
> +        bool exec_perm = (hw_caps & IOMMU_HW_CAP_PCI_PASID_EXEC) ? true : 
> false;
can't you direct set exec_perm = (hw_caps & IOMMU_HW_CAP_PCI_PASID_EXEC);
> +        bool priv_mod = (hw_caps & IOMMU_HW_CAP_PCI_PASID_PRIV) ? true : 
> false;
> +
> +        pcie_pasid_init(pdev, PCIE_CONFIG_SPACE_SIZE - 
> PCI_EXT_CAP_PASID_SIZEOF,
> +                        max_pasid_log2, exec_perm, priv_mod);
> +        /* PASID capability is fully emulated by QEMU */
> +        memset(vdev->emulated_config_bits + pdev->exp.pasid_cap, 0xff, 8);
> +    }
> +#endif
> +
>      /* Cleanup chain head ID if necessary */
>      if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) {
>          pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
> diff --git a/include/hw/iommu.h b/include/hw/iommu.h
> index 9b8bb94fc2..9635770bee 100644
> --- a/include/hw/iommu.h
> +++ b/include/hw/iommu.h
> @@ -20,6 +20,7 @@
>  enum viommu_flags {
>      /* vIOMMU needs nesting parent HWPT to create nested HWPT */
>      VIOMMU_FLAG_WANT_NESTING_PARENT = BIT_ULL(0),
> +    VIOMMU_FLAG_PASID_SUPPORTED = BIT_ULL(1),
>  };
>  
>  #endif /* HW_IOMMU_H */
Thanks

Eric


Reply via email to