This adds a vfio_votify() callback to inform an IOMMU (and then its owner) that VFIO started using the IOMMU. This is used by the pseries machine to enable/disable in-kernel acceleration of TCE hypercalls.
Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> --- hw/ppc/spapr_iommu.c | 9 +++++++++ hw/ppc/spapr_pci.c | 14 ++++++++------ hw/vfio/common.c | 7 +++++++ include/exec/memory.h | 2 ++ include/hw/ppc/spapr.h | 4 ++++ 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 8a88a74..67a8356 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -136,6 +136,13 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, return ret; } +static int spapr_tce_vfio_notify(MemoryRegion *iommu, bool attached) +{ + sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); + + return spapr_tce_vfio_notify_owner(tcet->owner, tcet, attached); +} + static int spapr_tce_table_post_load(void *opaque, int version_id) { sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque); @@ -167,6 +174,7 @@ static const VMStateDescription vmstate_spapr_tce_table = { static MemoryRegionIOMMUOps spapr_iommu_ops = { .translate = spapr_tce_translate_iommu, + .vfio_notify = spapr_tce_vfio_notify, }; static int spapr_tce_table_realize(DeviceState *dev) @@ -235,6 +243,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn) tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE)); tcet->liobn = liobn; + tcet->owner = owner; snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn); object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index ee0fecf..b0cd148 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1084,6 +1084,14 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, return 0; } +int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet, + bool attached) +{ + spapr_tce_set_need_vfio(tcet, attached); + + return 0; +} + /* create OF node for pci device and required OF DT properties */ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, void *fdt, int node_offset) @@ -1118,12 +1126,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, void *fdt = NULL; int fdt_start_offset = 0, fdt_size; - if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { - sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn); - - spapr_tce_set_need_vfio(tcet, true); - } - if (dev->hotplugged) { fdt = create_device_tree(&fdt_size); fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 9bf4c3b..ca3fd47 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -384,6 +384,7 @@ static void vfio_listener_region_add(MemoryListener *listener, QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); + giommu->iommu->iommu_ops->vfio_notify(section->mr, true); memory_region_iommu_replay(giommu->iommu, &giommu->n, vfio_container_granularity(container), false); @@ -430,6 +431,7 @@ static void vfio_listener_region_del(MemoryListener *listener, VFIOContainer *container = container_of(listener, VFIOContainer, listener); hwaddr iova, end; int ret; + MemoryRegion *iommu = NULL; if (vfio_listener_skipped_section(section)) { trace_vfio_listener_region_del_skip( @@ -451,6 +453,7 @@ static void vfio_listener_region_del(MemoryListener *listener, QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { if (giommu->iommu == section->mr) { memory_region_unregister_iommu_notifier(&giommu->n); + iommu = giommu->iommu; QLIST_REMOVE(giommu, giommu_next); g_free(giommu); break; @@ -483,6 +486,10 @@ static void vfio_listener_region_del(MemoryListener *listener, "0x%"HWADDR_PRIx") = %d (%m)", container, iova, end - iova, ret); } + + if (iommu && iommu->iommu_ops && iommu->iommu_ops->vfio_notify) { + iommu->iommu_ops->vfio_notify(section->mr, false); + } } static const MemoryListener vfio_memory_listener = { diff --git a/include/exec/memory.h b/include/exec/memory.h index d5284c2..9f82629 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -150,6 +150,8 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps; struct MemoryRegionIOMMUOps { /* Return a TLB entry that contains a given address. */ IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write); + /* Called when VFIO starts/stops using this */ + int (*vfio_notify)(MemoryRegion *iommu, bool attached); }; typedef struct CoalescedMemoryRange CoalescedMemoryRange; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 8aa0c45..5d2f8f4 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -550,6 +550,7 @@ struct sPAPRTCETable { int fd; MemoryRegion root, iommu; struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */ + DeviceState *owner; QLIST_ENTRY(sPAPRTCETable) list; }; @@ -629,4 +630,7 @@ int spapr_rng_populate_dt(void *fdt); */ #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 +int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet, + bool attached); + #endif /* !defined (__HW_SPAPR_H__) */ -- 2.5.0.rc3