On 6/12/26 03:43, Nathan Chen wrote:
From: Nathan Chen <[email protected]>
Add an "ats" OnOffAuto property to vfio-pci. When the device has an ATS
extended capability in config space but we should not expose it (ats=off,
or ats=auto and kernel reports IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED), mask
the capability so the guest does not see it.
If ATS is explicitly requested but not supported by the kernel, fail
device realize.
This aligns with the kernel's per-device effective ATS reporting and allows
vfio-pci to mask ATS when the host kernel reports ATS as unsupported.
Suggested-by: Shameer Kolothum <[email protected]>
Signed-off-by: Nathan Chen <[email protected]>
---
hw/vfio/pci.h | 1 +
hw/vfio/pci.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 72 insertions(+), 3 deletions(-)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index c3a1f53d35..b48138870e 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -191,6 +191,7 @@ struct VFIOPCIDevice {
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
VFIOPCICPR cpr;
+ OnOffAuto ats;
I would move this new property field close to the other property fields
in VFIOPCIDevice.
};
/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9c06b25e63..6edb3bf5db 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2546,7 +2546,49 @@ static bool vfio_pci_synthesize_pasid_cap(VFIOPCIDevice
*vdev, Error **errp)
return true;
}
-static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
+/*
+ * Determine whether ATS capability should be advertised for @vdev, based on
+ * whether it was enabled on the command line and whether it is supported
+ * according to the kernel's IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED bit.
+ *
+ * Store whether ATS capability should be advertised in @ats_needed.
+ *
+ * Returns false only when ats=on is explicitly requested but the kernel
+ * reports IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED. Returns true in all other cases.
+ */
+static bool vfio_pci_ats_requested_and_supported(VFIOPCIDevice *vdev,
+ bool *ats_needed, Error
**errp)
+{
+ HostIOMMUDevice *hiod = vdev->vbasedev.hiod;
+ HostIOMMUDeviceClass *hiodc;
+ bool ats_supported;
+
+ if (vdev->ats == ON_OFF_AUTO_OFF) {
+ *ats_needed = false;
Why not set '*ats_needed' to false by default here too. I see it is done in
vfio_pci_add_capabilities() but it looks safer.
+ return true;
+ }
+
+ *ats_needed = true;
+ if (!hiod) {
+ return true;
+ }
+ hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ if (!hiodc || !hiodc->support_ats) {
+ return true;
+ }
+
+ ats_supported = hiodc->support_ats(hiod);
+ if (vdev->ats == ON_OFF_AUTO_ON && !ats_supported) {
+ error_setg(errp, "vfio: ATS requested but not supported by kernel");
"vfio-pci: ..."
+ *ats_needed = false;
+ return false;
+ }
+
+ *ats_needed = ats_supported;
+ return true;
+}
+
+static void vfio_add_ext_cap(VFIOPCIDevice *vdev, bool ats_needed)
{
PCIDevice *pdev = PCI_DEVICE(vdev);
bool pasid_cap_added = false;
@@ -2635,7 +2677,18 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
*/
case PCI_EXT_CAP_ID_PASID:
pasid_cap_added = true;
- /* fallthrough */
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ break;
+ case PCI_EXT_CAP_ID_ATS:
+ /*
+ * If ATS is requested and supported according to the kernel, add
+ * the ATS capability. If not supported according to the kernel or
+ * disabled on the qemu command line, omit the ATS cap.
+ */
+ if (ats_needed) {
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ }
+ break;
default:
pcie_add_capability(pdev, cap_id, cap_ver, next, size);
}
@@ -2657,6 +2710,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = PCI_DEVICE(vdev);
+ bool ats_needed = false;
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
!pdev->config[PCI_CAPABILITY_LIST]) {
@@ -2667,7 +2721,11 @@ bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev,
Error **errp)
return false;
}
- vfio_add_ext_cap(vdev);
+ if (!vfio_pci_ats_requested_and_supported(vdev, &ats_needed, errp)) {
+ return false;
+ }
+
+ vfio_add_ext_cap(vdev, ats_needed);
return true;
}
@@ -3815,6 +3873,7 @@ static const Property vfio_pci_properties[] = {
DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true),
DEFINE_PROP_UINT16("x-vpasid-cap-offset", VFIOPCIDevice,
vpasid_cap_offset, 0),
+ DEFINE_PROP_ON_OFF_AUTO("ats", VFIOPCIDevice, ats, ON_OFF_AUTO_AUTO),
};
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
@@ -3973,6 +4032,15 @@ static void vfio_pci_class_init(ObjectClass *klass,
const void *data)
"a vIOMMU. A value of 0 (default) places
the capability at the "
"end of the extended configuration
space. The offset must be "
"4-byte aligned and within the PCIe
extended configuration space");
+ object_class_property_set_description(klass, /* 11.1 */
+ "ats",
+ "Control guest visibility of the ATS PCIe
extended capability. "
+ "Valid values are on, off, and auto
(default). "
+ "'off' always masks ATS. "
+ "'on' requires ATS support for the device
and fails realize if the "
+ "host kernel reports
IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED. "
Again, IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED is a kernel UAPI bit and it is
a bit too low-level/internal a QEMU for property documentation.
What about :
"'on' requires ATS support for the device and fails realize if the "
"host kernel reports ATS as unavailable for this device. "
"'auto' masks ATS only when the host kernel reports ATS as unavailable."
?
Thanks,
C.
+ "'auto' masks ATS only when the host
kernel reports "
+
"IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED.");
}
static const TypeInfo vfio_pci_info = {