POWER KVM supports an KVM_CAP_SPAPR_TCE capability which allows allocating
TCE tables in the host kernel memory and handle H_PUT_TCE requests
targeted to specific LIOBN (logical bus number) right in the host without
switching to QEMU. At the moment this is used for emulated devices only
and the handler only puts TCE to the table. If the in-kernel H_PUT_TCE
handler finds a LIOBN and corresponding table, it will put a TCE to
the table and complete hypercall execution. The user space will not be
notified.

Upcoming VFIO support is going to use the same sPAPRTCETable device class
so KVM_CAP_SPAPR_TCE is going to be used as well. That means that TCE
tables for VFIO are going to be allocated in the host as well.
However VFIO operates with real IOMMU tables and simple copying of
a TCE to the real hardware TCE table will not work as guest physical
to host physical address translation is requited.

So until the host kernel gets VFIO support for H_PUT_TCE, we better not
to register VFIO's TCE in the host.

This adds a place holder for KVM_CAP_SPAPR_TCE_VFIO capability. It is not
in upstream yet and being discussed so now it is always false which means
that in-kernel VFIO acceleration is not supported.

This adds a bool @vfio_accel flag to the sPAPRTCETable device telling
that sPAPRTCETable should not try allocating TCE table in the host kernel
for VFIO. The flag is false now as at the moment there is no VFIO.

This adds an vfio_accel parameter to spapr_tce_new_table(), the semantic
is the same. Since there is only emulated PCI and VIO now, the flag is set
to false. Upcoming VFIO support will set it to true.

This is a preparation patch so no change in behaviour is expected

Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
---
Changes:
v8:
* s/kvm_accel/vfio_accel/
* s/true/false/ in spapr_tce_new_table() invocations
* added  placeholder for cap_spapr_vfio
---
 hw/ppc/spapr_iommu.c   | 7 +++++--
 hw/ppc/spapr_pci.c     | 2 +-
 hw/ppc/spapr_vio.c     | 2 +-
 include/hw/ppc/spapr.h | 4 +++-
 target-ppc/kvm.c       | 7 +++++--
 target-ppc/kvm_ppc.h   | 6 ++++--
 6 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 3b6e373..855c37a 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -119,7 +119,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
         tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
                                               tcet->nb_table <<
                                               tcet->page_shift,
-                                              &tcet->fd);
+                                              &tcet->fd,
+                                              tcet->vfio_accel);
     }
 
     if (!tcet->table) {
@@ -143,7 +144,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
-                                   uint32_t nb_table)
+                                   uint32_t nb_table,
+                                   bool vfio_accel)
 {
     sPAPRTCETable *tcet;
 
@@ -162,6 +164,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn,
     tcet->bus_offset = bus_offset;
     tcet->page_shift = page_shift;
     tcet->nb_table = nb_table;
+    tcet->vfio_accel = vfio_accel;
 
     object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
 
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index ddfd8bb..658ddcb 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -658,7 +658,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, 
Error **errp)
     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
                                0,
                                SPAPR_TCE_PAGE_SHIFT,
-                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
+                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
     if (!tcet) {
         error_setg(errp, "Unable to create TCE table for %s",
                    sphb->dtbusname);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 48b0125..5dfa145 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
                                         0,
                                         SPAPR_TCE_PAGE_SHIFT,
                                         pc->rtce_window_size >>
-                                        SPAPR_TCE_PAGE_SHIFT);
+                                        SPAPR_TCE_PAGE_SHIFT, false);
         address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
     }
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 08c301f..fe1eff6 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -407,6 +407,7 @@ struct sPAPRTCETable {
     uint32_t page_shift;
     uint64_t *table;
     bool bypass;
+    bool vfio_accel;
     int fd;
     MemoryRegion iommu;
     QLIST_ENTRY(sPAPRTCETable) list;
@@ -418,7 +419,8 @@ int spapr_h_cas_compose_response(target_ulong addr, 
target_ulong size);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
-                                   uint32_t nb_table);
+                                   uint32_t nb_table,
+                                   bool vfio_accel);
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index dfa5a26..f83bdbe 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -63,6 +63,7 @@ static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
 static int cap_spapr_multitce;
+static int cap_spapr_vfio;
 static int cap_hior;
 static int cap_one_reg;
 static int cap_epr;
@@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
+    cap_spapr_vfio = false;
     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
@@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void)
     return cap_spapr_multitce;
 }
 
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+                              bool vfio_accel)
 {
     struct kvm_create_spapr_tce args = {
         .liobn = liobn,
@@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t 
window_size, int *pfd)
      * destroying the table, which the upper layers -will- do
      */
     *pfd = -1;
-    if (!cap_spapr_tce) {
+    if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
         return NULL;
     }
 
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 412cc7f..1118122 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
 #ifndef CONFIG_USER_ONLY
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
 bool kvmppc_spapr_use_multitce(void);
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+                              bool vfio_accel);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
@@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void)
 }
 
 static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
-                                            uint32_t window_size, int *fd)
+                                            uint32_t window_size, int *fd,
+                                            bool vfio_accel)
 {
     return NULL;
 }
-- 
2.0.0


Reply via email to