On 6/15/26 22:48, Nathan Chen wrote:
From: Nathan Chen <[email protected]>
Add an "ats" OnOffAuto property to vfio-pci. When the device has an ATS
extended capability in config space but we should not expose it (ats=off,
or ats=auto and kernel reports IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED), mask
the capability so the guest does not see it.
If ATS is explicitly requested but not supported by the kernel, fail
device realize.
This aligns with the kernel's per-device effective ATS reporting and allows
vfio-pci to mask ATS when the host kernel reports ATS as unsupported.
Introduce a new compat for the changed default for vfio-pci device ATS
property, from "on" to "auto".
Suggested-by: Shameer Kolothum <[email protected]>
Signed-off-by: Nathan Chen <[email protected]>
---
hw/vfio/pci.h | 1 +
hw/core/machine.c | 1 +
hw/vfio/pci.c | 74 +++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index c3a1f53d35..f2934f2d84 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -188,6 +188,7 @@ struct VFIOPCIDevice {
bool clear_parent_atomics_on_exit;
bool skip_vsc_check;
uint16_t vpasid_cap_offset;
+ OnOffAuto ats;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
VFIOPCICPR cpr;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 4d8b15d99e..c0cdf7c9da 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_11_0[] = {
{ "chardev-vc", "encoding", "cp437" },
{ "tpm-crb", "cap-chunk", "off" },
{ "tpm-crb", "x-allow-chunk-migration", "off" },
+ { "vfio-pci", "ats", "on" },
This compat "ats=on" for old machine types (11.0 and below) should preserve
the old guest-visible behavior : ATS is always advertised regardless of
kernel capability.
Now, if the kernel is upgraded and reports IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED,
the VM will now fail to realize the vfio-pci device, fail to boot and the
backward compatibility across machine types is broken ... Tough.
We could argue that the previous behavior was wrong but should be "bug"
compatible too.
So, we could :
1. ignore,
2. use a compat "ats=auto", which would hide ATS reported unsupported
in new kernels,
3. add a legacy compat "x-ats-strict=off" property to warn but not fail
realize, and keep ATS enabled.
In any case, we need a warning at start time.
Ideas ?
};
const size_t hw_compat_11_0_len = G_N_ELEMENTS(hw_compat_11_0);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9c06b25e63..0f592ada89 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2546,7 +2546,49 @@ static bool vfio_pci_synthesize_pasid_cap(VFIOPCIDevice
*vdev, Error **errp)
return true;
}
-static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
+/*
+ * Determine whether ATS capability should be advertised for @vdev, based on
+ * whether it was enabled on the command line and whether it is supported
+ * according to the kernel.
+ *
+ * Store whether ATS capability should be advertised in @ats_needed.
+ *
+ * Returns false only when ats=on is explicitly requested but the kernel
+ * reports it is not supported. Returns true in all other cases.
+ */
+static bool vfio_pci_ats_requested_and_supported(VFIOPCIDevice *vdev,
+ bool *ats_needed, Error
**errp)
+{
+ HostIOMMUDevice *hiod = vdev->vbasedev.hiod;
+ HostIOMMUDeviceClass *hiodc;
+ bool ats_supported;
+ *ats_needed = false;
+
+ if (vdev->ats == ON_OFF_AUTO_OFF) {
+ return true;
+ }
+
+ *ats_needed = true;
+ if (!hiod) {
+ return true;
+ }
+ hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ if (!hiodc || !hiodc->support_ats) {
+ return true;
+ }
+
+ ats_supported = hiodc->support_ats(hiod);
+ if (vdev->ats == ON_OFF_AUTO_ON && !ats_supported) {
+ error_setg(errp, "vfio-pci: ATS requested but not supported by
kernel");
+ *ats_needed = false;
+ return false;
+ }
+
+ *ats_needed = ats_supported;
+ return true;
+}
+
+static void vfio_add_ext_cap(VFIOPCIDevice *vdev, bool ats_needed)
{
PCIDevice *pdev = PCI_DEVICE(vdev);
bool pasid_cap_added = false;
@@ -2635,7 +2677,18 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
*/
case PCI_EXT_CAP_ID_PASID:
pasid_cap_added = true;
- /* fallthrough */
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ break;
+ case PCI_EXT_CAP_ID_ATS:
+ /*
+ * If ATS is requested and supported according to the kernel, add
+ * the ATS capability. If not supported according to the kernel or
+ * disabled on the qemu command line, omit the ATS cap.
+ */
+ if (ats_needed) {
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ }
+ break;
default:
pcie_add_capability(pdev, cap_id, cap_ver, next, size);
}
@@ -2657,6 +2710,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = PCI_DEVICE(vdev);
+ bool ats_needed = false;
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
!pdev->config[PCI_CAPABILITY_LIST]) {
@@ -2667,7 +2721,11 @@ bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev,
Error **errp)
return false;
}
- vfio_add_ext_cap(vdev);
+ if (!vfio_pci_ats_requested_and_supported(vdev, &ats_needed, errp)) {
+ return false;
+ }
+
+ vfio_add_ext_cap(vdev, ats_needed);
return true;
}
@@ -3815,6 +3873,7 @@ static const Property vfio_pci_properties[] = {
DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true),
DEFINE_PROP_UINT16("x-vpasid-cap-offset", VFIOPCIDevice,
vpasid_cap_offset, 0),
+ DEFINE_PROP_ON_OFF_AUTO("ats", VFIOPCIDevice, ats, ON_OFF_AUTO_AUTO),
};
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
@@ -3973,6 +4032,15 @@ static void vfio_pci_class_init(ObjectClass *klass,
const void *data)
"a vIOMMU. A value of 0 (default) places
the capability at the "
"end of the extended configuration
space. The offset must be "
"4-byte aligned and within the PCIe
extended configuration space");
+ object_class_property_set_description(klass, /* 11.1 */
+ "ats",
+ "Control guest visibility of the ATS PCIe
extended capability. "
+ "Valid values are on, off, and auto
(default). "
+ "'off' always masks ATS. "
+ "'on' requires ATS support for the device
and fails realize if the "
+ "host kernel reports ATS as unavailable
for this device. "
+ "'auto' masks ATS only when the host
kernel reports "
+ "ATS as unavailable");
if you resend, could you please fix the alignment of the two
object_class_property_set_description block() above.
Thanks,
C.
}
static const TypeInfo vfio_pci_info = {