On 05.06.14 07:50, Alexey Kardashevskiy wrote:
The patch adds a spapr-pci-vfio-host-bridge device type
which is a PCI Host Bridge with VFIO support. The new device
inherits from the spapr-pci-host-bridge device and adds an "iommu"
property which is an IOMMU id. This ID represents a minimal entity
for which IOMMU isolation can be guaranteed. In SPAPR architecture IOMMU
group is called a Partitionable Endpoint (PE).
Current implementation supports one IOMMU id per QEMU VFIO PHB. Since
SPAPR allows multiple PHB for no extra cost, this does not seem to
be a problem. This limitation may change in the future though.
Example of use:
Configure and Add 3 functions of a multifunctional device to QEMU:
(the NEC PCI USB card is used as an example here):
-device spapr-pci-vfio-host-bridge,id=USB,iommu=4,index=7 \
-device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true
-device vfio-pci,host=4:0:1.1,addr=1.1,bus=USB
-device vfio-pci,host=4:0:1.2,addr=1.2,bus=USB
where:
* index=7 is a QEMU PHB index (used as source for MMIO/MSI/IO windows
offset);
* iommu=4 is an IOMMU id which can be found in sysfs:
[aik@vpl2 ~]$ cd /sys/bus/pci/devices/0004:00:00.0/
[aik@vpl2 0004:00:00.0]$ ls -l iommu_group
lrwxrwxrwx 1 root root 0 Jun 5 12:49 iommu_group ->
../../../kernel/iommu_groups/4
Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
---
Changes:
v7:
* remove bunch of properties from VFIO PHB such as "scan", "multifunction",
"force_addr" - let management softwsare deal with it
* removed traces used in scan() (which is also removed)
* updated license
* disables in-kernel TCE table ("false" in spapr_tce_new_table())
v5:
* added handling of possible failure of spapr_vfio_new_table()
v4:
* moved IOMMU changes to separate patches
* moved spapr-pci-vfio-host-bridge to new file
---
hw/ppc/Makefile.objs | 3 ++
hw/ppc/spapr_pci_vfio.c | 93 +++++++++++++++++++++++++++++++++++++++++++++
include/hw/pci-host/spapr.h | 11 ++++++
3 files changed, 107 insertions(+)
create mode 100644 hw/ppc/spapr_pci_vfio.c
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index ea747f0..edd44d0 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o
obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
obj-$(CONFIG_PSERIES) += spapr_pci.o
+ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
+obj-y += spapr_pci_vfio.o
+endif
# PowerPC 4xx boards
obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 0000000..ec52a18
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,93 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/misc/vfio.h"
+
+static Property spapr_phb_vfio_properties[] = {
+ DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
+{
+ sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
+ struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
+ int ret;
+ sPAPRTCETable *tcet;
+ uint32_t liobn = svphb->phb.dma_liobn;
+
+ if (svphb->iommugroupid == -1) {
+ error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
+ return;
+ }
+
+ ret = vfio_container_spapr_get_info(&svphb->phb.iommu_as,
+ svphb->iommugroupid,
+ &info);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "spapr-vfio: get info from container failed");
+ return;
+ }
+
+ tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
So what happens when the device gets a different offset? Will it write
into other devices or will DMAs just get ignored?
Alex
+ SPAPR_TCE_PAGE_SHIFT,
+ info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
+ false);
+ if (!tcet) {
+ error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
+ return;
+ }
+
+ /* Register default 32bit DMA window */
+ memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
+ spapr_tce_get_iommu(tcet));
+}
+
+static void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+ /* Do nothing */
+}
+
+static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
+
+ dc->props = spapr_phb_vfio_properties;
+ dc->reset = spapr_phb_vfio_reset;
+ spc->finish_realize = spapr_phb_vfio_finish_realize;
+}
+
+static const TypeInfo spapr_phb_vfio_info = {
+ .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
+ .parent = TYPE_SPAPR_PCI_HOST_BRIDGE,
+ .instance_size = sizeof(sPAPRPHBVFIOState),
+ .class_init = spapr_phb_vfio_class_init,
+ .class_size = sizeof(sPAPRPHBClass),
+};
+
+static void spapr_pci_vfio_register_types(void)
+{
+ type_register_static(&spapr_phb_vfio_info);
+}
+
+type_init(spapr_pci_vfio_register_types)
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 0934518..6808e96 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -30,10 +30,14 @@
#define SPAPR_MSIX_MAX_DEVS 32
#define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
#define SPAPR_PCI_HOST_BRIDGE(obj) \
OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+ OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
+
#define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
#define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
@@ -41,6 +45,7 @@
typedef struct sPAPRPHBClass sPAPRPHBClass;
typedef struct sPAPRPHBState sPAPRPHBState;
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
struct sPAPRPHBClass {
PCIHostBridgeClass parent_class;
@@ -76,6 +81,12 @@ struct sPAPRPHBState {
QLIST_ENTRY(sPAPRPHBState) list;
};
+struct sPAPRPHBVFIOState {
+ sPAPRPHBState phb;
+
+ int32_t iommugroupid;
+};
+
#define SPAPR_PCI_BASE_BUID 0x800000020000000ULL
#define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL