The patch adds a spapr-pci-vfio-host-bridge device type which is a PCI Host Bridge with VFIO support. The new device inherits from the spapr-pci-host-bridge device and adds the following properties: iommu - IOMMU group ID which represents a Partitionable Endpoint, QEMU/ppc64 uses a separate PHB for an IOMMU group so the guest kernel has to have PCI Domain support enabled. forceaddr (optional, 0 by default) - forces QEMU to copy device:function from the host address as certain guest drivers expect devices to appear in particular locations; mf (optional, 0 by default) - forces multifunction bit for the function #0 of a found device, only makes sense for multifunction devices and only with the forceaddr property set. It would not be required if there was a way to know in advance whether a device is multifunctional or not. scan (optional, 1 by default) - if non-zero, the new PHB walks through all non-bridge devices in the group and tries adding them to the PHB; if zero, all devices in the group have to be configured manually via the QEMU command line.
Examples of use: 1) Scan and add all devices from IOMMU group with ID=1 to QEMU's PHB #6: -device spapr-pci-vfio-host-bridge,id=DEVICENAME,iommu=1,index=6 2) Configure and Add 3 functions of a multifunctional device to QEMU: (the NEC PCI USB card is used as an example here): -device spapr-pci-vfio-host-bridge,id=USB,iommu=4,scan=0,index=7 \ -device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true -device vfio-pci,host=4:0:1.1,addr=1.1,bus=USB -device vfio-pci,host=4:0:1.2,addr=1.2,bus=USB Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> --- Changes: v5: * added handling of possible failure of spapr_vfio_new_table() v4: * moved IOMMU changes to separate patches * moved spapr-pci-vfio-host-bridge to new file --- hw/ppc/Makefile.objs | 2 +- hw/ppc/spapr_pci_vfio.c | 206 ++++++++++++++++++++++++++++++++++++++++++++ include/hw/pci-host/spapr.h | 13 +++ 3 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 hw/ppc/spapr_pci_vfio.c diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index ea747f0..2239192 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o # IBM pSeries (sPAPR) obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o -obj-$(CONFIG_PSERIES) += spapr_pci.o +obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_pci_vfio.o # PowerPC 4xx boards obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o obj-y += ppc4xx_pci.o diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c new file mode 100644 index 0000000..40f6673 --- /dev/null +++ b/hw/ppc/spapr_pci_vfio.c @@ -0,0 +1,206 @@ +/* + * QEMU sPAPR PCI host for VFIO + * + * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <sys/types.h> +#include <dirent.h> + +#include "hw/hw.h" +#include "hw/ppc/spapr.h" +#include "hw/pci-host/spapr.h" +#include "hw/misc/vfio.h" +#include "hw/pci/pci_bus.h" +#include "trace.h" +#include "qemu/error-report.h" + +/* sPAPR VFIO */ +static Property spapr_phb_vfio_properties[] = { + DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1), + DEFINE_PROP_UINT8("scan", sPAPRPHBVFIOState, scan, 1), + DEFINE_PROP_UINT8("mf", sPAPRPHBVFIOState, enable_multifunction, 0), + DEFINE_PROP_UINT8("forceaddr", sPAPRPHBVFIOState, force_addr, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_pci_vfio_scan(sPAPRPHBVFIOState *svphb, Error **errp) +{ + PCIHostState *phb = PCI_HOST_BRIDGE(svphb); + char *iommupath; + DIR *dirp; + struct dirent *entry; + Error *error = NULL; + + if (!svphb->scan) { + trace_spapr_pci("autoscan disabled for ", svphb->phb.dtbusname); + return; + } + + iommupath = g_strdup_printf("/sys/kernel/iommu_groups/%d/devices/", + svphb->iommugroupid); + if (!iommupath) { + return; + } + + dirp = opendir(iommupath); + if (!dirp) { + error_report("spapr-vfio: vfio scan failed on opendir: %m"); + g_free(iommupath); + return; + } + + while ((entry = readdir(dirp)) != NULL) { + Error *err = NULL; + char *tmp; + FILE *deviceclassfile; + unsigned deviceclass = 0, domainid, busid, devid, fnid; + char addr[32]; + DeviceState *dev; + + if (sscanf(entry->d_name, "%X:%X:%X.%x", + &domainid, &busid, &devid, &fnid) != 4) { + continue; + } + + tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name); + trace_spapr_pci("Reading device class from ", tmp); + + deviceclassfile = fopen(tmp, "r"); + if (deviceclassfile) { + int ret = fscanf(deviceclassfile, "%x", &deviceclass); + fclose(deviceclassfile); + if (ret != 1) { + continue; + } + } + g_free(tmp); + + if (!deviceclass) { + continue; + } + if ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8)) { + /* Skip bridges */ + continue; + } + trace_spapr_pci("Creating device from ", entry->d_name); + + dev = qdev_create(&phb->bus->qbus, "vfio-pci"); + if (!dev) { + trace_spapr_pci("failed to create vfio-pci", entry->d_name); + continue; + } + object_property_parse(OBJECT(dev), entry->d_name, "host", &err); + if (err != NULL) { + object_unref(OBJECT(dev)); + continue; + } + if (svphb->force_addr) { + snprintf(addr, sizeof(addr), "%x.%x", devid, fnid); + err = NULL; + object_property_parse(OBJECT(dev), addr, "addr", &err); + if (err != NULL) { + object_unref(OBJECT(dev)); + continue; + } + } + if (svphb->enable_multifunction) { + qdev_prop_set_bit(dev, "multifunction", 1); + } + object_property_set_bool(OBJECT(dev), true, "realized", &error); + if (error) { + object_unref(OBJECT(dev)); + error_propagate(errp, error); + break; + } + } + closedir(dirp); + g_free(iommupath); +} + +static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) +{ + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); + struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) }; + int ret; + Error *error = NULL; + + if (svphb->iommugroupid == -1) { + error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid); + return; + } + + svphb->phb.tcet = spapr_vfio_new_table(DEVICE(sphb), svphb->phb.dma_liobn); + + if (!svphb->phb.tcet) { + error_setg(errp, "spapr-vfio: failed to create VFIO TCE table"); + return; + } + + address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet), + sphb->dtbusname); + + ret = vfio_container_spapr_get_info(&svphb->phb.iommu_as, + sphb->dma_liobn, svphb->iommugroupid, + &info); + if (ret) { + error_setg_errno(errp, -ret, + "spapr-vfio: get info from container failed"); + return; + } + + svphb->phb.dma_window_start = info.dma32_window_start; + svphb->phb.dma_window_size = info.dma32_window_size; + + spapr_pci_vfio_scan(svphb, &error); + if (error) { + error_propagate(errp, error); + } +} + +static void spapr_phb_vfio_reset(DeviceState *qdev) +{ + /* Do nothing */ +} + +static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); + + dc->props = spapr_phb_vfio_properties; + dc->reset = spapr_phb_vfio_reset; + spc->finish_realize = spapr_phb_vfio_finish_realize; +} + +static const TypeInfo spapr_phb_vfio_info = { + .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE, + .parent = TYPE_SPAPR_PCI_HOST_BRIDGE, + .instance_size = sizeof(sPAPRPHBVFIOState), + .class_init = spapr_phb_vfio_class_init, + .class_size = sizeof(sPAPRPHBClass), +}; + +static void spapr_pci_vfio_register_types(void) +{ + type_register_static(&spapr_phb_vfio_info); +} + +type_init(spapr_pci_vfio_register_types) diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 0f428a1..18acb67 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -30,10 +30,14 @@ #define SPAPR_MSIX_MAX_DEVS 32 #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge" +#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" #define SPAPR_PCI_HOST_BRIDGE(obj) \ OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE) +#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \ + OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE) + #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \ OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE) #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \ @@ -41,6 +45,7 @@ typedef struct sPAPRPHBClass sPAPRPHBClass; typedef struct sPAPRPHBState sPAPRPHBState; +typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState; struct sPAPRPHBClass { PCIHostBridgeClass parent_class; @@ -78,6 +83,14 @@ struct sPAPRPHBState { QLIST_ENTRY(sPAPRPHBState) list; }; +struct sPAPRPHBVFIOState { + sPAPRPHBState phb; + + struct VFIOContainer *container; + int32_t iommugroupid; + uint8_t scan, enable_multifunction, force_addr; +}; + #define SPAPR_PCI_BASE_BUID 0x800000020000000ULL #define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL -- 1.8.4.rc4