Add support for synthesizing a PCIe PASID extended capability for vfio-pci devices when PASID is enabled via a vIOMMU and supported by the host IOMMU backend.
PASID capability parameters are retrieved via IOMMUFD APIs and the capability is inserted into the PCIe extended capability list using the insertion helper. A new x-vpasid-cap-offset property allows explicit control over the placement; by default the capability is placed at the end of the PCIe extended configuration space. If the kernel does not expose PASID information or insertion fails, the device continues without PASID support. Signed-off-by: Shameer Kolothum <[email protected]> --- hw/vfio/pci.c | 84 +++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.h | 1 + include/hw/core/iommu.h | 1 + 3 files changed, 86 insertions(+) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c734472721..96990576ac 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -24,6 +24,7 @@ #include <sys/ioctl.h> #include "hw/core/hw-error.h" +#include "hw/core/iommu.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/pci/pci_bridge.h" @@ -2498,9 +2499,71 @@ static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) return 0; } +/* + * Try to retrieve PASID capability information via IOMMUFD APIs and, + * if supported, synthesize a PASID PCIe extended capability for the + * VFIO device. + * + * Use user-specified PASID capability offset if provided, otherwise + * place it at the end of the PCIe extended configuration space. + */ +static void vfio_pci_synthesize_pasid_cap(VFIOPCIDevice *vdev) +{ + HostIOMMUDevice *hiod = vdev->vbasedev.hiod; + HostIOMMUDeviceClass *hiodc; + PasidInfo pasid_info; + PCIDevice *pdev = PCI_DEVICE(vdev); + uint16_t pasid_offset; + + if (vdev->vbasedev.mdev) { + return; + } + + hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + if (!hiodc || !hiodc->get_pasid_info || + !hiodc->get_pasid_info(hiod, &pasid_info) || + !(pci_device_get_viommu_flags(pdev) & VIOMMU_FLAG_PASID_SUPPORTED)) { + return; + } + + /* + * Check if user has specified an offset to place PASID CAP, + * else select the last offset as default + */ + if (vdev->vpasid_cap_offset) { + if (!QEMU_IS_ALIGNED(vdev->vpasid_cap_offset, PCI_EXT_CAP_ALIGN) || + vdev->vpasid_cap_offset < PCI_CONFIG_SPACE_SIZE || + vdev->vpasid_cap_offset + PCI_EXT_CAP_PASID_SIZEOF > + PCIE_CONFIG_SPACE_SIZE) { + error_report("vfio: invalid x-vpasid-cap-offset 0x%x, skipping PASID", + vdev->vpasid_cap_offset); + return; + } + pasid_offset = vdev->vpasid_cap_offset; + } else { + pasid_offset = PCIE_CONFIG_SPACE_SIZE - PCI_EXT_CAP_PASID_SIZEOF; + warn_report("vfio: PASID capability offset(x-vpasid-cap-offset) not specified, " + "placing at the default offset 0x%x", + pasid_offset); + } + + if (!pcie_insert_capability(pdev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, + pasid_offset, PCI_EXT_CAP_PASID_SIZEOF)) { + error_report("vfio: Placing PASID capability at offset 0x%x failed", + pasid_offset); + } + pcie_pasid_common_init(pdev, pasid_offset, pasid_info.max_pasid_log2, + pasid_info.exec_perm, pasid_info.priv_mod); + + /* PASID capability is fully emulated by QEMU */ + memset(vdev->emulated_config_bits + pdev->exp.pasid_cap, 0xff, + PCI_EXT_CAP_PASID_SIZEOF); +} + static void vfio_add_ext_cap(VFIOPCIDevice *vdev) { PCIDevice *pdev = PCI_DEVICE(vdev); + bool pasid_cap_added = false; uint32_t header; uint16_t cap_id, next, size; uint8_t cap_ver; @@ -2578,12 +2641,24 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) pcie_add_capability(pdev, cap_id, cap_ver, next, size); } break; + /* + * VFIO kernel does not expose the PASID CAP today. We may synthesize + * one later through IOMMUFD APIs. If VFIO ever starts exposing it, + * record its presence here so we do not create a duplicate CAP. + */ + case PCI_EXT_CAP_ID_PASID: + pasid_cap_added = true; + /* fallthrough */ default: pcie_add_capability(pdev, cap_id, cap_ver, next, size); } } + if (!pasid_cap_added) { + vfio_pci_synthesize_pasid_cap(vdev); + } + /* Cleanup chain head ID if necessary */ if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) { pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0); @@ -3756,6 +3831,8 @@ static const Property vfio_pci_properties[] = { TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), #endif DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true), + DEFINE_PROP_UINT16("x-vpasid-cap-offset", VFIOPCIDevice, + vpasid_cap_offset, 0), }; #ifdef CONFIG_IOMMUFD @@ -3913,6 +3990,13 @@ static void vfio_pci_class_init(ObjectClass *klass, const void *data) "destination when doing live " "migration of device state via " "multifd channels"); + object_class_property_set_description(klass, /* 11.0 */ + "x-vpasid-cap-offset", + "PCIe extended configuration space offset at which to place a " + "synthetic PASID extended capability when PASID is enabled via " + "a vIOMMU. A value of 0 (default) places the capability at the " + "end of the extended configuration space. The offset must be " + "4-byte aligned and within the PCIe extended configuration space"); } static const TypeInfo vfio_pci_info = { diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 0f78cf9cdb..d6495d7f29 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -187,6 +187,7 @@ struct VFIOPCIDevice { bool defer_kvm_irq_routing; bool clear_parent_atomics_on_exit; bool skip_vsc_check; + uint16_t vpasid_cap_offset; VFIODisplay *dpy; Notifier irqchip_change_notifier; VFIOPCICPR cpr; diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h index 9b8bb94fc2..9635770bee 100644 --- a/include/hw/core/iommu.h +++ b/include/hw/core/iommu.h @@ -20,6 +20,7 @@ enum viommu_flags { /* vIOMMU needs nesting parent HWPT to create nested HWPT */ VIOMMU_FLAG_WANT_NESTING_PARENT = BIT_ULL(0), + VIOMMU_FLAG_PASID_SUPPORTED = BIT_ULL(1), }; #endif /* HW_IOMMU_H */ -- 2.43.0
