On 29.09.2011, at 08:45, David Gibson wrote:

> The pseries machine of qemu implements the TCE mechanism used as a
> virtual IOMMU for the PAPR defined virtual IO devices.  Because the
> PAPR spec only defines a small DMA address space, the guest VIO
> drivers need to update TCE mappings very frequently - the virtual
> network device is particularly bad.  This means many slow exits to
> qemu to emulate the H_PUT_TCE hypercall.
> 
> Sufficiently recent kernels allow this to be mitigated by implementing
> H_PUT_TCE in the host kernel.  To make use of this, however, qemu
> needs to initialize the necessary TCE tables, and map them into itself
> so that the VIO device implementations can retrieve the mappings when
> they access guest memory (which is treated as a virtual DMA
> operation).
> 
> This patch adds the necessary calls to use the KVM TCE acceleration.
> If the kernel does not support acceleration, or there is some other
> error creating the accelerated TCE table, then it will still fall back
> to full userspace TCE implementation.
> 
> Signed-off-by: David Gibson <da...@gibson.dropbear.id.au>
> ---
> hw/spapr_vio.c       |    8 ++++++-
> hw/spapr_vio.h       |    1 +
> target-ppc/kvm.c     |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++
> target-ppc/kvm_ppc.h |   14 +++++++++++++
> 4 files changed, 76 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
> index 35818e1..1da3032 100644
> --- a/hw/spapr_vio.c
> +++ b/hw/spapr_vio.c
> @@ -165,7 +165,13 @@ static void rtce_init(VIOsPAPRDevice *dev)
>         * sizeof(VIOsPAPR_RTCE);
> 
>     if (size) {
> -        dev->rtce_table = g_malloc0(size);
> +        dev->rtce_table = kvmppc_create_spapr_tce(dev->reg,
> +                                                  dev->rtce_window_size,
> +                                                  &dev->kvmtce_fd);
> +
> +        if (!dev->rtce_table) {
> +            dev->rtce_table = g_malloc0(size);
> +        }
>     }
> }
> 
> diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
> index 4fe5f74..a325a5f 100644
> --- a/hw/spapr_vio.h
> +++ b/hw/spapr_vio.h
> @@ -57,6 +57,7 @@ typedef struct VIOsPAPRDevice {
>     target_ulong signal_state;
>     uint32_t rtce_window_size;
>     VIOsPAPR_RTCE *rtce_table;
> +    int kvmtce_fd;
>     VIOsPAPR_CRQ crq;
> } VIOsPAPRDevice;
> 
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 37ee902..866cf7f 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -28,6 +28,7 @@
> #include "kvm_ppc.h"
> #include "cpu.h"
> #include "device_tree.h"
> +#include "hw/sysbus.h"
> #include "hw/spapr.h"
> 
> #include "hw/sysbus.h"
> @@ -58,6 +59,7 @@ static int cap_ppc_smt = 0;
> #ifdef KVM_CAP_PPC_RMA
> static int cap_ppc_rma = 0;
> #endif
> +static int cap_spapr_tce = false;
> 
> /* XXX We have a race condition where we actually have a level triggered
>  *     interrupt, but the infrastructure can't expose that yet, so the guest
> @@ -87,6 +89,9 @@ int kvm_arch_init(KVMState *s)
> #ifdef KVM_CAP_PPC_RMA
>     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
> #endif
> +#ifdef KVM_CAP_SPAPR_TCE
> +    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
> +#endif
> 
>     if (!cap_interrupt_level) {
>         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
> @@ -792,6 +797,55 @@ off_t kvmppc_alloc_rma(const char *name)
> #endif
> }
> 
> +void *kvmppc_create_spapr_tce(target_ulong liobn, uint32_t window_size, int 
> *pfd)
> +{    struct kvm_create_spapr_tce args = {
> +        .liobn = liobn,
> +        .window_size = window_size,
> +    };
> +    long len;
> +    int fd;
> +    void *table;
> +
> +    if (!cap_spapr_tce) {
> +        return NULL;
> +    }
> +
> +    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
> +    if (fd < 0) {
> +        return NULL;
> +    }
> +
> +    len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE) * sizeof(VIOsPAPR_RTCE);
> +    /* FIXME: round this up to page size */
> +
> +    table = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
> +    if (table == MAP_FAILED) {
> +        close(fd);
> +        return NULL;
> +    }
> +
> +    *pfd = fd;
> +    return table;
> +}
> +
> +int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)

Hrm. Is this ever called somewhere?


Alex


Reply via email to