On Fri, 28 Oct 2022 21:26:13 +0900 Akihiko Odaki <akihiko.od...@daynix.com> wrote:
> vfio_add_std_cap() is designed to ensure that capabilities do not > overlap, but it failed to do so for MSI and MSI-X capabilities. > > Ensure MSI and MSI-X capabilities do not overlap with others by omitting > other overlapping capabilities. > > Signed-off-by: Akihiko Odaki <akihiko.od...@daynix.com> > --- > hw/vfio/pci.c | 63 +++++++++++++++++++++++++++++++++++++++++++-------- > hw/vfio/pci.h | 3 +++ > 2 files changed, 56 insertions(+), 10 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 939dcc3d4a..36c8f3dc85 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1278,23 +1278,42 @@ static void vfio_disable_interrupts(VFIOPCIDevice > *vdev) > } > } > > -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > +static void vfio_msi_early_setup(VFIOPCIDevice *vdev, Error **errp) > { > uint16_t ctrl; > - bool msi_64bit, msi_maskbit; > - int ret, entries; > - Error *err = NULL; > + uint8_t pos; > + > + pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSI); > + if (!pos) { > + return; > + } > > if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), > vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { > error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); > - return -errno; > + return; > } > - ctrl = le16_to_cpu(ctrl); > + vdev->msi_pos = pos; > + vdev->msi_ctrl = le16_to_cpu(ctrl); > > - msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); > - msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); > - entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); > + vdev->msi_cap_size = 0xa; > + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT)) { > + vdev->msi_cap_size += 0xa; > + } > + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT)) { > + vdev->msi_cap_size += 0x4; > + } > +} > + > +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > +{ > + bool msi_64bit, msi_maskbit; > + int ret, entries; > + Error *err = NULL; > + > + msi_64bit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT); > + msi_maskbit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT); > + entries = 1 << ((vdev->msi_ctrl & PCI_MSI_FLAGS_QMASK) >> 1); > > trace_vfio_msi_setup(vdev->vbasedev.name, pos); > > @@ -1306,7 +1325,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, > Error **errp) > error_propagate_prepend(errp, err, "msi_init failed: "); > return ret; > } > - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : > 0); > > return 0; > } > @@ -1524,6 +1542,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, > Error **errp) > pba = le32_to_cpu(pba); > > msix = g_malloc0(sizeof(*msix)); > + msix->pos = pos; > msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; > msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; > msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; > @@ -2025,6 +2044,24 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, > uint8_t pos, Error **errp) > } > } > > + if (cap_id != PCI_CAP_ID_MSI && > + range_covers_byte(vdev->msi_pos, vdev->msi_cap_size, pos)) { > + warn_report(VFIO_MSG_PREFIX > + "A capability overlaps with MSI, ignoring (%" PRIu8 " @ > %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", > + vdev->vbasedev.name, cap_id, pos, > + vdev->msi_pos, vdev->msi_pos + vdev->msi_cap_size); > + return 0; > + } > + > + if (cap_id != PCI_CAP_ID_MSIX && vdev->msix && > + range_covers_byte(vdev->msix->pos, MSIX_CAP_LENGTH, pos)) { > + warn_report(VFIO_MSG_PREFIX > + "A capability overlaps with MSI-X, ignoring (%" PRIu8 " > @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", > + vdev->vbasedev.name, cap_id, pos, > + vdev->msix->pos, vdev->msix->pos + MSIX_CAP_LENGTH); > + return 0; > + } Capabilities are not a single byte, the fact that it doesn't start within the MSI or MSI-X capability is not a sufficient test. We're also choosing to prioritize MSI and MSI-X capabilities by protecting that range rather than the existing behavior where we'd drop those capabilities if they overlap with another capability that has already been placed. There are merits to both approaches, but I don't see any justification here to change the current behavior. Isn't the most similar behavior to existing to pass the available size to vfio_msi[x]_setup() and return an errno if the size would be exceeded? Something like below (untested, and requires exporting msi_cap_sizeof()). Thanks, Alex diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 939dcc3d4a9e..485f9bc5102d 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1278,11 +1278,13 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) } } -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, + uint8_t size, Error **errp) { uint16_t ctrl; bool msi_64bit, msi_maskbit; int ret, entries; + uint8_t msi_cap_size; Error *err = NULL; if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), @@ -1295,6 +1297,10 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); + msi_cap_size = msi_cap_sizeof(ctrl); + + if (msi_cap_size > size) + return -ENOSPC; trace_vfio_msi_setup(vdev->vbasedev.name, pos); @@ -1306,7 +1312,7 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) error_propagate_prepend(errp, err, "msi_init failed: "); return ret; } - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); + vdev->msi_cap_size = msi_cap_size; return 0; } @@ -1570,11 +1576,15 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) vfio_pci_relocate_msix(vdev, errp); } -static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) +static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, + uint8_t size, Error **errp) { int ret; Error *err = NULL; + if (MSIX_CAP_LENGTH > size) + return -ENOSPC; + vdev->msix->pending = g_new0(unsigned long, BITS_TO_LONGS(vdev->msix->entries)); ret = msix_init(&vdev->pdev, vdev->msix->entries, @@ -2033,14 +2043,14 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) switch (cap_id) { case PCI_CAP_ID_MSI: - ret = vfio_msi_setup(vdev, pos, errp); + ret = vfio_msi_setup(vdev, pos, size, errp); break; case PCI_CAP_ID_EXP: vfio_check_pcie_flr(vdev, pos); ret = vfio_setup_pcie_cap(vdev, pos, size, errp); break; case PCI_CAP_ID_MSIX: - ret = vfio_msix_setup(vdev, pos, errp); + ret = vfio_msix_setup(vdev, pos, size, errp); break; case PCI_CAP_ID_PM: vfio_check_pm_reset(vdev, pos);