On 15/09/2020 00.29, Matthew Rosato wrote: > When an s390 guest is using lazy unmapping, it can result in a very > large number of oustanding DMA requests, far beyond the default > limit configured for vfio. Let's track DMA usage similar to vfio > in the host, and trigger the guest to flush their DMA mappings > before vfio runs out. > > Signed-off-by: Matthew Rosato <mjros...@linux.ibm.com> > --- > hw/s390x/s390-pci-bus.c | 99 > +++++++++++++++++++++++++++++++++++++++++++++--- > hw/s390x/s390-pci-bus.h | 9 +++++ > hw/s390x/s390-pci-inst.c | 29 +++++++++++--- > hw/s390x/s390-pci-inst.h | 3 ++ > 4 files changed, 129 insertions(+), 11 deletions(-) > > diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c > index 92146a2..23474cd 100644 > --- a/hw/s390x/s390-pci-bus.c > +++ b/hw/s390x/s390-pci-bus.c > @@ -11,6 +11,8 @@ > * directory. > */ > > +#include <sys/ioctl.h> > + > #include "qemu/osdep.h" > #include "qapi/error.h" > #include "qapi/visitor.h" > @@ -24,6 +26,9 @@ > #include "qemu/error-report.h" > #include "qemu/module.h" > > +#include "hw/vfio/pci.h" > +#include "hw/vfio/vfio-common.h" > + > #ifndef DEBUG_S390PCI_BUS > #define DEBUG_S390PCI_BUS 0 > #endif > @@ -737,6 +742,82 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus > *bus, int32_t devfn) > object_unref(OBJECT(iommu)); > } > > +static bool s390_sync_dma_avail(int fd, unsigned int *avail) > +{ > + struct vfio_iommu_type1_info *info;
You could use g_autofree to get rid of the g_free() at the end. > + uint32_t argsz; > + bool rval = false; > + int ret; > + > + if (avail == NULL) { > + return false; > + } Since this is a "static" local function, and calling it with avail == NULL does not make too much sense, I think I'd rather turn this into an assert() instead. > + argsz = sizeof(struct vfio_iommu_type1_info); > + info = g_malloc0(argsz); > + info->argsz = argsz; > + /* > + * If the specified argsz is not large enough to contain all > + * capabilities it will be updated upon return. In this case > + * use the updated value to get the entire capability chain. > + */ > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + if (argsz != info->argsz) { > + argsz = info->argsz; > + info = g_realloc(info, argsz); > + info->argsz = argsz; > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + } > + > + if (ret) { > + goto out; > + } > + > + /* If the capability exists, update with the current value */ > + rval = vfio_get_info_dma_avail(info, avail); > + > +out: > + g_free(info); > + return rval; > +} > + > +static S390PCIDMACount *s390_start_dma_count(S390pciState *s, VFIODevice > *vdev) > +{ > + int id = vdev->group->container->fd; > + S390PCIDMACount *cnt; > + uint32_t avail; > + > + if (!s390_sync_dma_avail(id, &avail)) { > + return NULL; > + } > + > + QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { > + if (cnt->id == id) { > + cnt->users++; > + return cnt; > + } > + } > + > + cnt = g_new0(S390PCIDMACount, 1); > + cnt->id = id; > + cnt->users = 1; > + cnt->avail = avail; > + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); > + return cnt; > +} > + > +static void s390_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) > +{ > + if (cnt == NULL) { > + return; > + } Either use assert() or drop this completely (since you're checking it at the caller site already). > + cnt->users--; > + if (cnt->users == 0) { > + QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); > + } > +} > + > static void s390_pcihost_realize(DeviceState *dev, Error **errp) > { > PCIBus *b; > @@ -764,6 +845,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error > **errp) > s->bus_no = 0; > QTAILQ_INIT(&s->pending_sei); > QTAILQ_INIT(&s->zpci_devs); > + QTAILQ_INIT(&s->zpci_dma_limit); > > css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, > S390_ADAPTER_SUPPRESSIBLE, errp); > @@ -902,6 +984,7 @@ static void s390_pcihost_plug(HotplugHandler > *hotplug_dev, DeviceState *dev, > { > S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); > PCIDevice *pdev = NULL; > + VFIOPCIDevice *vpdev = NULL; > S390PCIBusDevice *pbdev = NULL; > > if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { > @@ -941,17 +1024,20 @@ static void s390_pcihost_plug(HotplugHandler > *hotplug_dev, DeviceState *dev, > } > } > > + pbdev->pdev = pdev; > + pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); > + pbdev->iommu->pbdev = pbdev; > + pbdev->state = ZPCI_FS_DISABLED; > + > if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > pbdev->fh |= FH_SHM_VFIO; > + vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); > + pbdev->iommu->dma_limit = s390_start_dma_count(s, > + &vpdev->vbasedev); > } else { > pbdev->fh |= FH_SHM_EMUL; > } > > - pbdev->pdev = pdev; > - pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); > - pbdev->iommu->pbdev = pbdev; > - pbdev->state = ZPCI_FS_DISABLED; > - > if (s390_pci_msix_init(pbdev)) { > error_setg(errp, "MSI-X support is mandatory " > "in the S390 architecture"); > @@ -1004,6 +1090,9 @@ static void s390_pcihost_unplug(HotplugHandler > *hotplug_dev, DeviceState *dev, > pbdev->fid = 0; > QTAILQ_REMOVE(&s->zpci_devs, pbdev, link); > g_hash_table_remove(s->zpci_table, &pbdev->idx); > + if (pbdev->iommu->dma_limit) { > + s390_end_dma_count(s, pbdev->iommu->dma_limit); > + } > qdev_unrealize(dev); > } > } Thomas