The patch adds a spapr-pci-vfio-host-bridge device type
which is a PCI Host Bridge with VFIO support. The new device
inherits from the spapr-pci-host-bridge device and adds an "iommu"
property which is an IOMMU id. This ID represents a minimal entity
for which IOMMU isolation can be guaranteed. In SPAPR architecture IOMMU
group is called a Partitionable Endpoint (PE).

Current implementation supports one IOMMU id per QEMU VFIO PHB. Since
SPAPR allows multiple PHB for no extra cost, this does not seem to
be a problem. This limitation may change in the future though.

Example of use:
Configure and Add 3 functions of a multifunctional device to QEMU:
(the NEC PCI USB card is used as an example here):
-device spapr-pci-vfio-host-bridge,id=USB,iommu=4,index=7 \
-device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true
-device vfio-pci,host=4:0:1.1,addr=1.1,bus=USB
-device vfio-pci,host=4:0:1.2,addr=1.2,bus=USB

where:
* index=7 is a QEMU PHB index (used as source for MMIO/MSI/IO windows
offset);
* iommu=4 is an IOMMU id which can be found in sysfs:
[aik@vpl2 ~]$ cd /sys/bus/pci/devices/0004:00:00.0/
[aik@vpl2 0004:00:00.0]$ ls -l iommu_group
lrwxrwxrwx 1 root root 0 Jun  5 12:49 iommu_group -> 
../../../kernel/iommu_groups/4

Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
---
Changes:
v8:
* s/vfio_container_spapr_get_info/vfio_container_ioctl/
* spapr_tce_new_table() now receives vfio_accel=true instead of
kvm_accel=false

v7:
* remove bunch of properties from VFIO PHB such as "scan", "multifunction",
"force_addr" - let management softwsare deal with it
* removed traces used in scan() (which is also removed)
* updated license
* disables in-kernel TCE table ("false" in spapr_tce_new_table())

v5:
* added handling of possible failure of spapr_vfio_new_table()

v4:
* moved IOMMU changes to separate patches
* moved spapr-pci-vfio-host-bridge to new file
---
 hw/ppc/Makefile.objs        |   3 ++
 hw/ppc/spapr_pci_vfio.c     | 102 ++++++++++++++++++++++++++++++++++++++++++++
 include/hw/pci-host/spapr.h |  11 +++++
 3 files changed, 116 insertions(+)
 create mode 100644 hw/ppc/spapr_pci_vfio.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index ea747f0..edd44d0 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o
+ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
+obj-y += spapr_pci_vfio.o
+endif
 # PowerPC 4xx boards
 obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
 obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 0000000..d3bddf2
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,102 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License,
+ *  or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "linux/vfio.h"
+#include "hw/misc/vfio.h"
+
+static Property spapr_phb_vfio_properties[] = {
+    DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
+{
+    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
+    struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
+    int ret;
+    sPAPRTCETable *tcet;
+    uint32_t liobn = svphb->phb.dma_liobn;
+
+    if (svphb->iommugroupid == -1) {
+        error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
+        return;
+    }
+
+    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+                               VFIO_CHECK_EXTENSION,
+                               (void *) VFIO_SPAPR_TCE_IOMMU);
+    if (ret != 1) {
+        error_setg_errno(errp, -ret,
+                         "spapr-vfio: SPAPR extension is not supported");
+        return;
+    }
+
+    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+                               VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "spapr-vfio: get info from container failed");
+        return;
+    }
+
+    tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
+                               SPAPR_TCE_PAGE_SHIFT,
+                               info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
+                               true);
+    if (!tcet) {
+        error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
+        return;
+    }
+
+    /* Register default 32bit DMA window */
+    memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
+                                spapr_tce_get_iommu(tcet));
+}
+
+static void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+    /* Do nothing */
+}
+
+static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
+
+    dc->props = spapr_phb_vfio_properties;
+    dc->reset = spapr_phb_vfio_reset;
+    spc->finish_realize = spapr_phb_vfio_finish_realize;
+}
+
+static const TypeInfo spapr_phb_vfio_info = {
+    .name          = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
+    .parent        = TYPE_SPAPR_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(sPAPRPHBVFIOState),
+    .class_init    = spapr_phb_vfio_class_init,
+    .class_size    = sizeof(sPAPRPHBClass),
+};
+
+static void spapr_pci_vfio_register_types(void)
+{
+    type_register_static(&spapr_phb_vfio_info);
+}
+
+type_init(spapr_pci_vfio_register_types)
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 0934518..6808e96 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -30,10 +30,14 @@
 #define SPAPR_MSIX_MAX_DEVS 32
 
 #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
 
 #define SPAPR_PCI_HOST_BRIDGE(obj) \
     OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
 
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+    OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
+
 #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
      OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
 #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
@@ -41,6 +45,7 @@
 
 typedef struct sPAPRPHBClass sPAPRPHBClass;
 typedef struct sPAPRPHBState sPAPRPHBState;
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
 
 struct sPAPRPHBClass {
     PCIHostBridgeClass parent_class;
@@ -76,6 +81,12 @@ struct sPAPRPHBState {
     QLIST_ENTRY(sPAPRPHBState) list;
 };
 
+struct sPAPRPHBVFIOState {
+    sPAPRPHBState phb;
+
+    int32_t iommugroupid;
+};
+
 #define SPAPR_PCI_BASE_BUID          0x800000020000000ULL
 
 #define SPAPR_PCI_WINDOW_BASE        0x10000000000ULL
-- 
2.0.0


Reply via email to