This implements a notification for a new IOMMU group attached to
sPAPR's logical IO bus (LIOBN) to enable in-kernel TCE acceleration.

This uses new kernel KVM_CAP_SPAPR_TCE_VFIO capability to enable
in-kernel acceleration of TCE update requests which will go via
the VFIO KVM device.

Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
---
 target/ppc/kvm_ppc.h |  6 ++++++
 hw/ppc/spapr_iommu.c | 34 ++++++++++++++++++++++++++++++++++
 target/ppc/kvm.c     |  7 ++++++-
 hw/ppc/trace-events  |  1 +
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index d6be38ecaf..2b985e1659 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -48,6 +48,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t 
page_shift,
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
+bool kvmppc_has_cap_spapr_vfio(void);
 #endif /* !CONFIG_USER_ONLY */
 bool kvmppc_has_cap_epr(void);
 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function);
@@ -231,6 +232,11 @@ static inline bool 
kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
     return true;
 }
 
+static inline bool kvmppc_has_cap_spapr_vfio(void)
+{
+    return false;
+}
+
 #endif /* !CONFIG_USER_ONLY */
 
 static inline bool kvmppc_has_cap_epr(void)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 5ccd785d5a..34baa881f0 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -17,6 +17,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include <sys/ioctl.h>
 #include "qemu/error-report.h"
 #include "hw/hw.h"
 #include "qemu/log.h"
@@ -173,6 +174,34 @@ static void 
spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu,
     }
 }
 
+static void spapr_tce_add_vfio_group(IOMMUMemoryRegion *iommu_mr,
+                                     int vfio_kvm_fd, int groupfd)
+{
+    sPAPRTCETable *tcet = container_of(iommu_mr, sPAPRTCETable, iommu);
+    struct kvm_vfio_spapr_tce param = {
+        .tablefd = tcet->fd,
+        .groupfd = groupfd,
+    };
+    struct kvm_device_attr attr = {
+        .group = KVM_DEV_VFIO_GROUP,
+        .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
+        .addr = (uint64_t)(unsigned long)&param,
+    };
+
+    if (!kvmppc_has_cap_spapr_vfio()) {
+        return;
+    }
+
+    if (param.tablefd != -1) {
+        if (ioctl(vfio_kvm_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+            error_report("vfio: failed to setup fd %d for a group with fd %d: 
%s",
+                         param.tablefd, param.groupfd, strerror(errno));
+            return;
+        }
+    }
+    trace_spapr_iommu_add_vfio_group(groupfd, param.tablefd);
+}
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
     sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@@ -284,6 +313,10 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool 
need_vfio)
 
     tcet->need_vfio = need_vfio;
 
+    if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) {
+        return;
+    }
+
     oldtable = tcet->table;
 
     tcet->table = spapr_tce_alloc_table(tcet->liobn,
@@ -643,6 +676,7 @@ static void 
spapr_iommu_memory_region_class_init(ObjectClass *klass, void *data)
     imrc->translate = spapr_tce_translate_iommu;
     imrc->get_min_page_size = spapr_tce_get_min_page_size;
     imrc->notify_flag_changed = spapr_tce_notify_flag_changed;
+    imrc->add_vfio_group = spapr_tce_add_vfio_group;
 }
 
 static const TypeInfo spapr_iommu_memory_region_info = {
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 171d3d8040..5438252bdc 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -136,7 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
-    cap_spapr_vfio = false;
+    cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
@@ -2474,6 +2474,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
     return cap_mmu_hash_v3;
 }
 
+bool kvmppc_has_cap_spapr_vfio(void)
+{
+    return cap_spapr_vfio;
+}
+
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
 {
     uint32_t host_pvr = mfpvr();
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index 4a6a6490fa..ab70d00582 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -36,6 +36,7 @@ spapr_iommu_ddw_query(uint64_t buid, uint32_t cfgaddr, 
unsigned wa, uint64_t win
 spapr_iommu_ddw_create(uint64_t buid, uint32_t cfgaddr, uint64_t pg_size, 
uint64_t req_size, uint64_t start, uint32_t liobn) "buid=0x%"PRIx64" 
addr=0x%"PRIx32", page size=0x%"PRIx64", requested=0x%"PRIx64", start 
addr=0x%"PRIx64", liobn=0x%"PRIx32
 spapr_iommu_ddw_remove(uint32_t liobn) "liobn=0x%"PRIx32
 spapr_iommu_ddw_reset(uint64_t buid, uint32_t cfgaddr) "buid=0x%"PRIx64" 
addr=0x%"PRIx32
+spapr_iommu_add_vfio_group(int groupfd, int tablefd) "Attached groupfd %d to 
liobn fd %d"
 
 # hw/ppc/spapr_drc.c
 spapr_drc_set_isolation_state(uint32_t index, int state) "drc: 0x%"PRIx32", 
state: 0x%"PRIx32
-- 
2.11.0


Reply via email to