Add support for synthesizing a PCIe PASID extended capability for vfio-pci devices when PASID is enabled via a vIOMMU and supported by the host IOMMU backend.
PASID capability parameters are retrieved via IOMMUFD APIs and the capability is inserted into the PCIe extended capability list using the insertion helper. A new x-vpasid-cap-offset property allows explicit control over the placement; by default the capability is placed at the end of the PCIe extended configuration space. If the kernel does not expose PASID information or insertion fails, the device continues without PASID support. Reviewed-by: Jonathan Cameron <[email protected]> Tested-by: Eric Auger <[email protected]> Tested-by: Zhangfei Gao <[email protected]> Signed-off-by: Shameer Kolothum <[email protected]> --- hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.h | 1 + hw/vfio/trace-events | 1 + include/hw/core/iommu.h | 1 + 4 files changed, 78 insertions(+) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c734472721..36d8fbe872 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -24,6 +24,7 @@ #include <sys/ioctl.h> #include "hw/core/hw-error.h" +#include "hw/core/iommu.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/pci/pci_bridge.h" @@ -2498,9 +2499,62 @@ static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) return 0; } +/* + * Try to retrieve PASID capability information via IOMMUFD APIs and, + * if supported, synthesize a PASID PCIe extended capability for the + * VFIO device. + * + * Use user-specified PASID capability offset if provided, otherwise + * place it at the end of the PCIe extended configuration space. + */ +static bool vfio_pci_synthesize_pasid_cap(VFIOPCIDevice *vdev, Error **errp) +{ + HostIOMMUDevice *hiod = vdev->vbasedev.hiod; + HostIOMMUDeviceClass *hiodc; + PasidInfo pasid_info; + PCIDevice *pdev = PCI_DEVICE(vdev); + uint16_t pasid_offset; + + if (!hiod) { + return true; + } + + hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + if (!hiodc || !hiodc->get_pasid_info || + !hiodc->get_pasid_info(hiod, &pasid_info) || + !(pci_device_get_viommu_flags(pdev) & VIOMMU_FLAG_PASID_SUPPORTED)) { + return true; + } + + /* Use user-specified offset if set, otherwise place PASID at the end. */ + if (vdev->vpasid_cap_offset) { + pasid_offset = vdev->vpasid_cap_offset; + } else { + pasid_offset = PCIE_CONFIG_SPACE_SIZE - PCI_EXT_CAP_PASID_SIZEOF; + } + + if (!pcie_insert_capability(pdev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, + pasid_offset, PCI_EXT_CAP_PASID_SIZEOF)) { + error_setg(errp, "vfio: Placing PASID capability at offset 0x%x failed", + pasid_offset); + return false; + } + trace_vfio_pci_synthesize_pasid_cap(vdev->vbasedev.name, pasid_offset); + + pcie_pasid_common_init(pdev, pasid_offset, pasid_info.max_pasid_log2, + pasid_info.exec_perm, pasid_info.priv_mod); + + /* PASID capability is fully emulated by QEMU */ + memset(vdev->emulated_config_bits + pdev->exp.pasid_cap, 0xff, + PCI_EXT_CAP_PASID_SIZEOF); + return true; +} + static void vfio_add_ext_cap(VFIOPCIDevice *vdev) { PCIDevice *pdev = PCI_DEVICE(vdev); + bool pasid_cap_added = false; + Error *err = NULL; uint32_t header; uint16_t cap_id, next, size; uint8_t cap_ver; @@ -2578,12 +2632,24 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) pcie_add_capability(pdev, cap_id, cap_ver, next, size); } break; + /* + * VFIO kernel does not expose the PASID CAP today. We may synthesize + * one later through IOMMUFD APIs. If VFIO ever starts exposing it, + * record its presence here so we do not create a duplicate CAP. + */ + case PCI_EXT_CAP_ID_PASID: + pasid_cap_added = true; + /* fallthrough */ default: pcie_add_capability(pdev, cap_id, cap_ver, next, size); } } + if (!pasid_cap_added && !vfio_pci_synthesize_pasid_cap(vdev, &err)) { + error_report_err(err); + } + /* Cleanup chain head ID if necessary */ if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) { pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0); @@ -3756,6 +3822,8 @@ static const Property vfio_pci_properties[] = { TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), #endif DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true), + DEFINE_PROP_UINT16("x-vpasid-cap-offset", VFIOPCIDevice, + vpasid_cap_offset, 0), }; #ifdef CONFIG_IOMMUFD @@ -3913,6 +3981,13 @@ static void vfio_pci_class_init(ObjectClass *klass, const void *data) "destination when doing live " "migration of device state via " "multifd channels"); + object_class_property_set_description(klass, /* 11.0 */ + "x-vpasid-cap-offset", + "PCIe extended configuration space offset at which to place a " + "synthetic PASID extended capability when PASID is enabled via " + "a vIOMMU. A value of 0 (default) places the capability at the " + "end of the extended configuration space. The offset must be " + "4-byte aligned and within the PCIe extended configuration space"); } static const TypeInfo vfio_pci_info = { diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 0f78cf9cdb..d6495d7f29 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -187,6 +187,7 @@ struct VFIOPCIDevice { bool defer_kvm_irq_routing; bool clear_parent_atomics_on_exit; bool skip_vsc_check; + uint16_t vpasid_cap_offset; VFIODisplay *dpy; Notifier irqchip_change_notifier; VFIOPCICPR cpr; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 180e3d526b..b48c4abe7a 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -40,6 +40,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: % vfio_pci_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_pci_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" +vfio_pci_synthesize_pasid_cap(const char *name, uint16_t offset) "%s offset: 0x%x" vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" vfio_pci_reset(const char *name) " (%s)" vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET" diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h index d5401a397b..86af315c15 100644 --- a/include/hw/core/iommu.h +++ b/include/hw/core/iommu.h @@ -20,6 +20,7 @@ enum viommu_flags { /* vIOMMU needs nesting parent HWPT to create nested HWPT */ VIOMMU_FLAG_WANT_NESTING_PARENT = BIT_ULL(0), + VIOMMU_FLAG_PASID_SUPPORTED = BIT_ULL(1), }; /* Host IOMMU quirks. Extracted from host IOMMU capabilities */ -- 2.43.0
