On 03/20/2014 06:57 AM, Alex Williamson wrote: > On Wed, 2014-03-12 at 16:52 +1100, Alexey Kardashevskiy wrote: >> The patch adds a spapr-pci-vfio-host-bridge device type >> which is a PCI Host Bridge with VFIO support. The new device >> inherits from the spapr-pci-host-bridge device and adds >> the following properties: >> iommu - IOMMU group ID which represents a Partitionable >> Endpoint, QEMU/ppc64 uses a separate PHB for >> an IOMMU group so the guest kernel has to have >> PCI Domain support enabled. >> forceaddr (optional, 0 by default) - forces QEMU to copy >> device:function from the host address as >> certain guest drivers expect devices to appear in >> particular locations; >> mf (optional, 0 by default) - forces multifunction bit for >> the function #0 of a found device, only makes sense >> for multifunction devices and only with the forceaddr >> property set. It would not be required if there >> was a way to know in advance whether a device is >> multifunctional or not. >> scan (optional, 1 by default) - if non-zero, the new PHB walks >> through all non-bridge devices in the group and tries >> adding them to the PHB; if zero, all devices in the group >> have to be configured manually via the QEMU command line. >> >> Examples of use: >> 1) Scan and add all devices from IOMMU group with ID=1 to QEMU's PHB #6: >> -device spapr-pci-vfio-host-bridge,id=DEVICENAME,iommu=1,index=6 >> >> 2) Configure and Add 3 functions of a multifunctional device to QEMU: >> (the NEC PCI USB card is used as an example here): >> -device spapr-pci-vfio-host-bridge,id=USB,iommu=4,scan=0,index=7 \ >> -device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true >> -device vfio-pci,host=4:0:1.1,addr=1.1,bus=USB >> -device vfio-pci,host=4:0:1.2,addr=1.2,bus=USB > > I won't pretend to predict the reaction of QEMU device architects to > this, but it seems like the assembly we expect from config files or > outside utilities, ex. libvirt. I don't doubt this makes qemu > commandline usage more palatable, but it seems contrary to some of the > other things we do in QEMU with devices. If this is something we need, > why is it specific to spapr? IOMMU group can contain multiple devices > on any platform. On x86 we could do something similar with a p2p > bridge, switch, or root port.
At least at the moment devices are assigned to groups statically on SPAPR, they cannot be moved between groups at all, so it seems like a reasonable assumption that the user will not mind putting them all to the same QEMU instance. I should probably disable "scan" by default though, that would make more sense for libvirt. > BTW, the code skips bridges, but doesn't that mean you'll have a hard > time with forceaddr as you potentially try to overlay devfn from > multiple buses onto a single bus? > It also makes the value of this a bit > more questionable since it seems to fall apart so easily. Thanks, These "forceaddr" and "mf" are rather debug options - at the very beginning I used to have strong impression that USB NEC PCI device (2xOHCI and 1xEHCI) does not work properly if it is not multifunctional but I was wrong, just checked. So I'll just remove them as they do not help even me anymore and that's it. Thanks! > > Alex > >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >> --- >> Changes: >> v5: >> * added handling of possible failure of spapr_vfio_new_table() >> >> v4: >> * moved IOMMU changes to separate patches >> * moved spapr-pci-vfio-host-bridge to new file >> --- >> hw/ppc/Makefile.objs | 2 +- >> hw/ppc/spapr_pci_vfio.c | 206 >> ++++++++++++++++++++++++++++++++++++++++++++ >> include/hw/pci-host/spapr.h | 13 +++ >> 3 files changed, 220 insertions(+), 1 deletion(-) >> create mode 100644 hw/ppc/spapr_pci_vfio.c >> >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs >> index ea747f0..2239192 100644 >> --- a/hw/ppc/Makefile.objs >> +++ b/hw/ppc/Makefile.objs >> @@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o >> # IBM pSeries (sPAPR) >> obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o >> obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o >> -obj-$(CONFIG_PSERIES) += spapr_pci.o >> +obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_pci_vfio.o >> # PowerPC 4xx boards >> obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o >> obj-y += ppc4xx_pci.o >> diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c >> new file mode 100644 >> index 0000000..40f6673 >> --- /dev/null >> +++ b/hw/ppc/spapr_pci_vfio.c >> @@ -0,0 +1,206 @@ >> +/* >> + * QEMU sPAPR PCI host for VFIO >> + * >> + * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> copy >> + * of this software and associated documentation files (the "Software"), to >> deal >> + * in the Software without restriction, including without limitation the >> rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell >> + * copies of the Software, and to permit persons to whom the Software is >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included >> in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> + * THE SOFTWARE. >> + */ >> +#include <sys/types.h> >> +#include <dirent.h> >> + >> +#include "hw/hw.h" >> +#include "hw/ppc/spapr.h" >> +#include "hw/pci-host/spapr.h" >> +#include "hw/misc/vfio.h" >> +#include "hw/pci/pci_bus.h" >> +#include "trace.h" >> +#include "qemu/error-report.h" >> + >> +/* sPAPR VFIO */ >> +static Property spapr_phb_vfio_properties[] = { >> + DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1), >> + DEFINE_PROP_UINT8("scan", sPAPRPHBVFIOState, scan, 1), >> + DEFINE_PROP_UINT8("mf", sPAPRPHBVFIOState, enable_multifunction, 0), >> + DEFINE_PROP_UINT8("forceaddr", sPAPRPHBVFIOState, force_addr, 0), >> + DEFINE_PROP_END_OF_LIST(), >> +}; >> + >> +static void spapr_pci_vfio_scan(sPAPRPHBVFIOState *svphb, Error **errp) >> +{ >> + PCIHostState *phb = PCI_HOST_BRIDGE(svphb); >> + char *iommupath; >> + DIR *dirp; >> + struct dirent *entry; >> + Error *error = NULL; >> + >> + if (!svphb->scan) { >> + trace_spapr_pci("autoscan disabled for ", svphb->phb.dtbusname); >> + return; >> + } >> + >> + iommupath = g_strdup_printf("/sys/kernel/iommu_groups/%d/devices/", >> + svphb->iommugroupid); >> + if (!iommupath) { >> + return; >> + } >> + >> + dirp = opendir(iommupath); >> + if (!dirp) { >> + error_report("spapr-vfio: vfio scan failed on opendir: %m"); >> + g_free(iommupath); >> + return; >> + } >> + >> + while ((entry = readdir(dirp)) != NULL) { >> + Error *err = NULL; >> + char *tmp; >> + FILE *deviceclassfile; >> + unsigned deviceclass = 0, domainid, busid, devid, fnid; >> + char addr[32]; >> + DeviceState *dev; >> + >> + if (sscanf(entry->d_name, "%X:%X:%X.%x", >> + &domainid, &busid, &devid, &fnid) != 4) { >> + continue; >> + } >> + >> + tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name); >> + trace_spapr_pci("Reading device class from ", tmp); >> + >> + deviceclassfile = fopen(tmp, "r"); >> + if (deviceclassfile) { >> + int ret = fscanf(deviceclassfile, "%x", &deviceclass); >> + fclose(deviceclassfile); >> + if (ret != 1) { >> + continue; >> + } >> + } >> + g_free(tmp); >> + >> + if (!deviceclass) { >> + continue; >> + } >> + if ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8)) { >> + /* Skip bridges */ >> + continue; >> + } >> + trace_spapr_pci("Creating device from ", entry->d_name); >> + >> + dev = qdev_create(&phb->bus->qbus, "vfio-pci"); >> + if (!dev) { >> + trace_spapr_pci("failed to create vfio-pci", entry->d_name); >> + continue; >> + } >> + object_property_parse(OBJECT(dev), entry->d_name, "host", &err); >> + if (err != NULL) { >> + object_unref(OBJECT(dev)); >> + continue; >> + } >> + if (svphb->force_addr) { >> + snprintf(addr, sizeof(addr), "%x.%x", devid, fnid); >> + err = NULL; >> + object_property_parse(OBJECT(dev), addr, "addr", &err); >> + if (err != NULL) { >> + object_unref(OBJECT(dev)); >> + continue; >> + } >> + } >> + if (svphb->enable_multifunction) { >> + qdev_prop_set_bit(dev, "multifunction", 1); >> + } >> + object_property_set_bool(OBJECT(dev), true, "realized", &error); >> + if (error) { >> + object_unref(OBJECT(dev)); >> + error_propagate(errp, error); >> + break; >> + } >> + } >> + closedir(dirp); >> + g_free(iommupath); >> +} >> + >> +static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) >> +{ >> + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); >> + struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) }; >> + int ret; >> + Error *error = NULL; >> + >> + if (svphb->iommugroupid == -1) { >> + error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid); >> + return; >> + } >> + >> + svphb->phb.tcet = spapr_vfio_new_table(DEVICE(sphb), >> svphb->phb.dma_liobn); >> + >> + if (!svphb->phb.tcet) { >> + error_setg(errp, "spapr-vfio: failed to create VFIO TCE table"); >> + return; >> + } >> + >> + address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet), >> + sphb->dtbusname); >> + >> + ret = vfio_container_spapr_get_info(&svphb->phb.iommu_as, >> + sphb->dma_liobn, >> svphb->iommugroupid, >> + &info); >> + if (ret) { >> + error_setg_errno(errp, -ret, >> + "spapr-vfio: get info from container failed"); >> + return; >> + } >> + >> + svphb->phb.dma_window_start = info.dma32_window_start; >> + svphb->phb.dma_window_size = info.dma32_window_size; >> + >> + spapr_pci_vfio_scan(svphb, &error); >> + if (error) { >> + error_propagate(errp, error); >> + } >> +} >> + >> +static void spapr_phb_vfio_reset(DeviceState *qdev) >> +{ >> + /* Do nothing */ >> +} >> + >> +static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) >> +{ >> + DeviceClass *dc = DEVICE_CLASS(klass); >> + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); >> + >> + dc->props = spapr_phb_vfio_properties; >> + dc->reset = spapr_phb_vfio_reset; >> + spc->finish_realize = spapr_phb_vfio_finish_realize; >> +} >> + >> +static const TypeInfo spapr_phb_vfio_info = { >> + .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE, >> + .parent = TYPE_SPAPR_PCI_HOST_BRIDGE, >> + .instance_size = sizeof(sPAPRPHBVFIOState), >> + .class_init = spapr_phb_vfio_class_init, >> + .class_size = sizeof(sPAPRPHBClass), >> +}; >> + >> +static void spapr_pci_vfio_register_types(void) >> +{ >> + type_register_static(&spapr_phb_vfio_info); >> +} >> + >> +type_init(spapr_pci_vfio_register_types) >> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h >> index 0f428a1..18acb67 100644 >> --- a/include/hw/pci-host/spapr.h >> +++ b/include/hw/pci-host/spapr.h >> @@ -30,10 +30,14 @@ >> #define SPAPR_MSIX_MAX_DEVS 32 >> >> #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge" >> +#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" >> >> #define SPAPR_PCI_HOST_BRIDGE(obj) \ >> OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE) >> >> +#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \ >> + OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE) >> + >> #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \ >> OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE) >> #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \ >> @@ -41,6 +45,7 @@ >> >> typedef struct sPAPRPHBClass sPAPRPHBClass; >> typedef struct sPAPRPHBState sPAPRPHBState; >> +typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState; >> >> struct sPAPRPHBClass { >> PCIHostBridgeClass parent_class; >> @@ -78,6 +83,14 @@ struct sPAPRPHBState { >> QLIST_ENTRY(sPAPRPHBState) list; >> }; >> >> +struct sPAPRPHBVFIOState { >> + sPAPRPHBState phb; >> + >> + struct VFIOContainer *container; >> + int32_t iommugroupid; >> + uint8_t scan, enable_multifunction, force_addr; >> +}; >> + >> #define SPAPR_PCI_BASE_BUID 0x800000020000000ULL >> >> #define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL > > > -- Alexey