POWER KVM supports an KVM_CAP_SPAPR_TCE capability which allows allocating TCE tables in the host kernel memory and handle H_PUT_TCE requests targeted to specific LIOBN (logical bus number) right in the host without switching to QEMU. At the moment this is used for emulated devices only and the handler only puts TCE to the table. If the in-kernel H_PUT_TCE handler finds a LIOBN and corresponding table, it will put a TCE to the table and complete hypercall execution. The user space will not be notified.
Upcoming VFIO support is going to use the same sPAPRTCETable device class so KVM_CAP_SPAPR_TCE is going to be used as well. That means that TCE tables for VFIO are going to be allocated in the host as well. However VFIO operates with real IOMMU tables and simple copying of a TCE to the real hardware TCE table will not work as guest physical to host physical address translation is requited. So until the host kernel gets VFIO support for H_PUT_TCE, we better not to register VFIO's TCE in the host. This adds a place holder for KVM_CAP_SPAPR_TCE_VFIO capability. It is not in upstream yet and being discussed so now it is always false which means that in-kernel VFIO acceleration is not supported. This adds a bool @vfio_accel flag to the sPAPRTCETable device telling that sPAPRTCETable should not try allocating TCE table in the host kernel for VFIO. The flag is false now as at the moment there is no VFIO. This adds an vfio_accel parameter to spapr_tce_new_table(), the semantic is the same. Since there is only emulated PCI and VIO now, the flag is set to false. Upcoming VFIO support will set it to true. This is a preparation patch so no change in behaviour is expected Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> --- Changes: v8: * s/kvm_accel/vfio_accel/ * s/true/false/ in spapr_tce_new_table() invocations * added placeholder for cap_spapr_vfio --- hw/ppc/spapr_iommu.c | 7 +++++-- hw/ppc/spapr_pci.c | 2 +- hw/ppc/spapr_vio.c | 2 +- include/hw/ppc/spapr.h | 4 +++- target-ppc/kvm.c | 7 +++++-- target-ppc/kvm_ppc.h | 6 ++++-- 6 files changed, 19 insertions(+), 9 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 3b6e373..855c37a 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -119,7 +119,8 @@ static int spapr_tce_table_realize(DeviceState *dev) tcet->table = kvmppc_create_spapr_tce(tcet->liobn, tcet->nb_table << tcet->page_shift, - &tcet->fd); + &tcet->fd, + tcet->vfio_accel); } if (!tcet->table) { @@ -143,7 +144,8 @@ static int spapr_tce_table_realize(DeviceState *dev) sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, - uint32_t nb_table) + uint32_t nb_table, + bool vfio_accel) { sPAPRTCETable *tcet; @@ -162,6 +164,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, tcet->bus_offset = bus_offset; tcet->page_shift = page_shift; tcet->nb_table = nb_table; + tcet->vfio_accel = vfio_accel; object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index ddfd8bb..658ddcb 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -658,7 +658,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, 0, SPAPR_TCE_PAGE_SHIFT, - 0x40000000 >> SPAPR_TCE_PAGE_SHIFT); + 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false); if (!tcet) { error_setg(errp, "Unable to create TCE table for %s", sphb->dtbusname); diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 48b0125..5dfa145 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) 0, SPAPR_TCE_PAGE_SHIFT, pc->rtce_window_size >> - SPAPR_TCE_PAGE_SHIFT); + SPAPR_TCE_PAGE_SHIFT, false); address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id); } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 08c301f..fe1eff6 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -407,6 +407,7 @@ struct sPAPRTCETable { uint32_t page_shift; uint64_t *table; bool bypass; + bool vfio_accel; int fd; MemoryRegion iommu; QLIST_ENTRY(sPAPRTCETable) list; @@ -418,7 +419,8 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, - uint32_t nb_table); + uint32_t nb_table, + bool vfio_accel); MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet); void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass); int spapr_dma_dt(void *fdt, int node_off, const char *propname, diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index dfa5a26..f83bdbe 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -63,6 +63,7 @@ static int cap_ppc_smt; static int cap_ppc_rma; static int cap_spapr_tce; static int cap_spapr_multitce; +static int cap_spapr_vfio; static int cap_hior; static int cap_one_reg; static int cap_epr; @@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s) cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); + cap_spapr_vfio = false; cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); @@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void) return cap_spapr_multitce; } -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, + bool vfio_accel) { struct kvm_create_spapr_tce args = { .liobn = liobn, @@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) * destroying the table, which the upper layers -will- do */ *pfd = -1; - if (!cap_spapr_tce) { + if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) { return NULL; } diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index 412cc7f..1118122 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); #ifndef CONFIG_USER_ONLY off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem); bool kvmppc_spapr_use_multitce(void); -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd); +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, + bool vfio_accel); int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); int kvmppc_reset_htab(int shift_hint); uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); @@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void) } static inline void *kvmppc_create_spapr_tce(uint32_t liobn, - uint32_t window_size, int *fd) + uint32_t window_size, int *fd, + bool vfio_accel) { return NULL; } -- 2.0.0