On 2022/10/28 23:16, Alex Williamson wrote:
On Fri, 28 Oct 2022 21:26:13 +0900
Akihiko Odaki <akihiko.od...@daynix.com> wrote:

vfio_add_std_cap() is designed to ensure that capabilities do not
overlap, but it failed to do so for MSI and MSI-X capabilities.

Ensure MSI and MSI-X capabilities do not overlap with others by omitting
other overlapping capabilities.

Signed-off-by: Akihiko Odaki <akihiko.od...@daynix.com>
---
  hw/vfio/pci.c | 63 +++++++++++++++++++++++++++++++++++++++++++--------
  hw/vfio/pci.h |  3 +++
  2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 939dcc3d4a..36c8f3dc85 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1278,23 +1278,42 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev)
      }
  }
-static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
+static void vfio_msi_early_setup(VFIOPCIDevice *vdev, Error **errp)
  {
      uint16_t ctrl;
-    bool msi_64bit, msi_maskbit;
-    int ret, entries;
-    Error *err = NULL;
+    uint8_t pos;
+
+    pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSI);
+    if (!pos) {
+        return;
+    }
if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
                vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
          error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS");
-        return -errno;
+        return;
      }
-    ctrl = le16_to_cpu(ctrl);
+    vdev->msi_pos = pos;
+    vdev->msi_ctrl = le16_to_cpu(ctrl);
- msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT);
-    msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT);
-    entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1);
+    vdev->msi_cap_size = 0xa;
+    if ((vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT)) {
+        vdev->msi_cap_size += 0xa;
+    }
+    if ((vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT)) {
+        vdev->msi_cap_size += 0x4;
+    }
+}
+
+static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
+{
+    bool msi_64bit, msi_maskbit;
+    int ret, entries;
+    Error *err = NULL;
+
+    msi_64bit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT);
+    msi_maskbit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT);
+    entries = 1 << ((vdev->msi_ctrl & PCI_MSI_FLAGS_QMASK) >> 1);
trace_vfio_msi_setup(vdev->vbasedev.name, pos); @@ -1306,7 +1325,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
          error_propagate_prepend(errp, err, "msi_init failed: ");
          return ret;
      }
-    vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
return 0;
  }
@@ -1524,6 +1542,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, 
Error **errp)
      pba = le32_to_cpu(pba);
msix = g_malloc0(sizeof(*msix));
+    msix->pos = pos;
      msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
      msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
      msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
@@ -2025,6 +2044,24 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t 
pos, Error **errp)
          }
      }
+ if (cap_id != PCI_CAP_ID_MSI &&
+        range_covers_byte(vdev->msi_pos, vdev->msi_cap_size, pos)) {
+        warn_report(VFIO_MSG_PREFIX
+                    "A capability overlaps with MSI, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" 
PRIu8 ", %" PRIu8 "))",
+                    vdev->vbasedev.name, cap_id, pos,
+                    vdev->msi_pos, vdev->msi_pos + vdev->msi_cap_size);
+        return 0;
+    }
+
+    if (cap_id != PCI_CAP_ID_MSIX && vdev->msix &&
+        range_covers_byte(vdev->msix->pos, MSIX_CAP_LENGTH, pos)) {
+        warn_report(VFIO_MSG_PREFIX
+                    "A capability overlaps with MSI-X, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" 
PRIu8 ", %" PRIu8 "))",
+                    vdev->vbasedev.name, cap_id, pos,
+                    vdev->msix->pos, vdev->msix->pos + MSIX_CAP_LENGTH);
+        return 0;
+    }

Capabilities are not a single byte, the fact that it doesn't start
within the MSI or MSI-X capability is not a sufficient test.  We're
also choosing to prioritize MSI and MSI-X capabilities by protecting
that range rather than the existing behavior where we'd drop those
capabilities if they overlap with another capability that has already
been placed.  There are merits to both approaches, but I don't see any
justification here to change the current behavior.

Isn't the most similar behavior to existing to pass the available size
to vfio_msi[x]_setup() and return an errno if the size would be
exceeded?  Something like below (untested, and requires exporting
msi_cap_sizeof()).  Thanks,

It only tests the beginning of the capability currently being added because its end is determined by vfio_std_cap_max_size() so that the overlap does not happen.

A comment in vfio_add_std_cap() says:
>     /*
>      * If it becomes important to configure capabilities to their actual
> * size, use this as the default when it's something we don't recognize.
>      * Since QEMU doesn't actually handle many of the config accesses,
>      * exact size doesn't seem worthwhile.
>      */

My understanding of the problem is that while clipping is performed when overlapping two capabilities other than MSI and MSI-X according to the comment, the clipping does not happen when one of the overlapping capability is MSI or MSI-X.

According to that, the correct way to fix is to perform clipping also in such a case. As QEMU actually handles the config acccesses for MSI and MSI-X, MSI and MSI-X are always priotized over the other capabilities.

Regards,
Akihiko Odaki


Alex

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 939dcc3d4a9e..485f9bc5102d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1278,11 +1278,13 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev)
      }
  }
-static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
+static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos,
+                          uint8_t size, Error **errp)
  {
      uint16_t ctrl;
      bool msi_64bit, msi_maskbit;
      int ret, entries;
+    uint8_t msi_cap_size;
      Error *err = NULL;
if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
@@ -1295,6 +1297,10 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, 
Error **errp)
      msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT);
      msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT);
      entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1);
+    msi_cap_size = msi_cap_sizeof(ctrl);
+
+    if (msi_cap_size > size)
+           return -ENOSPC;
trace_vfio_msi_setup(vdev->vbasedev.name, pos); @@ -1306,7 +1312,7 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
          error_propagate_prepend(errp, err, "msi_init failed: ");
          return ret;
      }
-    vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
+    vdev->msi_cap_size = msi_cap_size;
return 0;
  }
@@ -1570,11 +1576,15 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, 
Error **errp)
      vfio_pci_relocate_msix(vdev, errp);
  }
-static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
+static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos,
+                           uint8_t size, Error **errp)
  {
      int ret;
      Error *err = NULL;
+ if (MSIX_CAP_LENGTH > size)
+           return -ENOSPC;
+
      vdev->msix->pending = g_new0(unsigned long,
                                   BITS_TO_LONGS(vdev->msix->entries));
      ret = msix_init(&vdev->pdev, vdev->msix->entries,
@@ -2033,14 +2043,14 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, 
uint8_t pos, Error **errp)
switch (cap_id) {
      case PCI_CAP_ID_MSI:
-        ret = vfio_msi_setup(vdev, pos, errp);
+        ret = vfio_msi_setup(vdev, pos, size, errp);
          break;
      case PCI_CAP_ID_EXP:
          vfio_check_pcie_flr(vdev, pos);
          ret = vfio_setup_pcie_cap(vdev, pos, size, errp);
          break;
      case PCI_CAP_ID_MSIX:
-        ret = vfio_msix_setup(vdev, pos, errp);
+        ret = vfio_msix_setup(vdev, pos, size, errp);
          break;
      case PCI_CAP_ID_PM:
          vfio_check_pm_reset(vdev, pos);


Reply via email to