On Mon, Jan 07, 2019 at 07:43:17PM +0100, Cédric Le Goater wrote:
> This is the basic framework for the new KVM device supporting the XIVE
> native exploitation mode. The user interface exposes a new capability
> and a new KVM device to be used by QEMU.
> 
> Internally, the interface to the new KVM device is protected with a
> new interrupt mode: KVMPPC_IRQ_XIVE.
> 
> Signed-off-by: Cédric Le Goater <c...@kaod.org>
> ---
>  arch/powerpc/include/asm/kvm_host.h   |   2 +
>  arch/powerpc/include/asm/kvm_ppc.h    |  21 ++
>  arch/powerpc/kvm/book3s_xive.h        |   3 +
>  include/uapi/linux/kvm.h              |   3 +
>  arch/powerpc/kvm/book3s.c             |   7 +-
>  arch/powerpc/kvm/book3s_xive_native.c | 332 ++++++++++++++++++++++++++
>  arch/powerpc/kvm/powerpc.c            |  30 +++
>  arch/powerpc/kvm/Makefile             |   2 +-
>  8 files changed, 398 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/kvm/book3s_xive_native.c
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 0f98f00da2ea..c522e8274ad9 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -220,6 +220,7 @@ extern struct kvm_device_ops kvm_xics_ops;
>  struct kvmppc_xive;
>  struct kvmppc_xive_vcpu;
>  extern struct kvm_device_ops kvm_xive_ops;
> +extern struct kvm_device_ops kvm_xive_native_ops;
>  
>  struct kvmppc_passthru_irqmap;
>  
> @@ -446,6 +447,7 @@ struct kvmppc_passthru_irqmap {
>  #define KVMPPC_IRQ_DEFAULT   0
>  #define KVMPPC_IRQ_MPIC              1
>  #define KVMPPC_IRQ_XICS              2 /* Includes a XIVE option */
> +#define KVMPPC_IRQ_XIVE              3 /* XIVE native exploitation mode */
>  
>  #define MMIO_HPTE_CACHE_SIZE 4
>  
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
> b/arch/powerpc/include/asm/kvm_ppc.h
> index eb0d79f0ca45..1bb313f238fe 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -591,6 +591,18 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, 
> u64 icpval);
>  extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
>                              int level, bool line_status);
>  extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
> +
> +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
> +{
> +     return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
> +}
> +
> +extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
> +                                 struct kvm_vcpu *vcpu, u32 cpu);
> +extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
> +extern void kvmppc_xive_native_init_module(void);
> +extern void kvmppc_xive_native_exit_module(void);
> +
>  #else
>  static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
>                                      u32 priority) { return -1; }
> @@ -614,6 +626,15 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu 
> *vcpu, u64 icpval) { retur
>  static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, 
> u32 irq,
>                                     int level, bool line_status) { return 
> -ENODEV; }
>  static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
> +
> +static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
> +     { return 0; }
> +static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
> +                                               struct kvm_vcpu *vcpu, u32 
> cpu) { return -EBUSY; }
> +static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
> +static inline void kvmppc_xive_native_init_module(void) { }
> +static inline void kvmppc_xive_native_exit_module(void) { }
> +
>  #endif /* CONFIG_KVM_XIVE */
>  
>  /*
> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
> index 10c4aa5cd010..5f22415520b4 100644
> --- a/arch/powerpc/kvm/book3s_xive.h
> +++ b/arch/powerpc/kvm/book3s_xive.h
> @@ -12,6 +12,9 @@
>  #ifdef CONFIG_KVM_XICS
>  #include "book3s_xics.h"
>  
> +#define KVMPPC_XIVE_FIRST_IRQ        0
> +#define KVMPPC_XIVE_NR_IRQS  KVMPPC_XICS_NR_IRQS
> +
>  /*
>   * State for one guest irq source.
>   *
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 6d4ea4b6c922..52bf74a1616e 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_ARM_VM_IPA_SIZE 165
>  #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
>  #define KVM_CAP_HYPERV_CPUID 167
> +#define KVM_CAP_PPC_IRQ_XIVE 168
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -1211,6 +1212,8 @@ enum kvm_device_type {
>  #define KVM_DEV_TYPE_ARM_VGIC_V3     KVM_DEV_TYPE_ARM_VGIC_V3
>       KVM_DEV_TYPE_ARM_VGIC_ITS,
>  #define KVM_DEV_TYPE_ARM_VGIC_ITS    KVM_DEV_TYPE_ARM_VGIC_ITS
> +     KVM_DEV_TYPE_XIVE,
> +#define KVM_DEV_TYPE_XIVE            KVM_DEV_TYPE_XIVE
>       KVM_DEV_TYPE_MAX,
>  };
>  
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index bd1a677dd9e4..de7eed191107 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -1039,7 +1039,10 @@ static int kvmppc_book3s_init(void)
>  #ifdef CONFIG_KVM_XIVE
>       if (xive_enabled()) {
>               kvmppc_xive_init_module();
> +             kvmppc_xive_native_init_module();
>               kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
> +             kvm_register_device_ops(&kvm_xive_native_ops,
> +                                     KVM_DEV_TYPE_XIVE);
>       } else
>  #endif
>               kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
> @@ -1050,8 +1053,10 @@ static int kvmppc_book3s_init(void)
>  static void kvmppc_book3s_exit(void)
>  {
>  #ifdef CONFIG_KVM_XICS
> -     if (xive_enabled())
> +     if (xive_enabled()) {
>               kvmppc_xive_exit_module();
> +             kvmppc_xive_native_exit_module();
> +     }
>  #endif
>  #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
>       kvmppc_book3s_exit_pr();
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c 
> b/arch/powerpc/kvm/book3s_xive_native.c
> new file mode 100644
> index 000000000000..115143e76c45
> --- /dev/null
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -0,0 +1,332 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2017-2019, IBM Corporation.
> + */
> +
> +#define pr_fmt(fmt) "xive-kvm: " fmt
> +
> +#include <linux/anon_inodes.h>
> +#include <linux/kernel.h>
> +#include <linux/kvm_host.h>
> +#include <linux/err.h>
> +#include <linux/gfp.h>
> +#include <linux/spinlock.h>
> +#include <linux/delay.h>
> +#include <linux/percpu.h>
> +#include <linux/cpumask.h>
> +#include <asm/uaccess.h>
> +#include <asm/kvm_book3s.h>
> +#include <asm/kvm_ppc.h>
> +#include <asm/hvcall.h>
> +#include <asm/xics.h>
> +#include <asm/xive.h>
> +#include <asm/xive-regs.h>
> +#include <asm/debug.h>
> +#include <asm/debugfs.h>
> +#include <asm/time.h>
> +#include <asm/opal.h>
> +
> +#include <linux/debugfs.h>
> +#include <linux/seq_file.h>
> +
> +#include "book3s_xive.h"
> +
> +static void xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
> +{
> +     struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +     struct xive_q *q = &xc->queues[prio];
> +
> +     xive_native_disable_queue(xc->vp_id, q, prio);
> +     if (q->qpage) {
> +             put_page(virt_to_page(q->qpage));
> +             q->qpage = NULL;
> +     }
> +}
> +
> +void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
> +{
> +     struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +     int i;
> +
> +     if (!kvmppc_xive_enabled(vcpu))
> +             return;
> +
> +     if (!xc)
> +             return;
> +
> +     pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
> +
> +     /* Ensure no interrupt is still routed to that VP */
> +     xc->valid = false;
> +     kvmppc_xive_disable_vcpu_interrupts(vcpu);
> +
> +     /* Disable the VP */
> +     xive_native_disable_vp(xc->vp_id);
> +
> +     /* Free the queues & associated interrupts */
> +     for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
> +             /* Free the escalation irq */
> +             if (xc->esc_virq[i]) {
> +                     free_irq(xc->esc_virq[i], vcpu);
> +                     irq_dispose_mapping(xc->esc_virq[i]);
> +                     kfree(xc->esc_virq_names[i]);
> +                     xc->esc_virq[i] = 0;
> +             }
> +
> +             /* Free the queue */
> +             xive_native_cleanup_queue(vcpu, i);
> +     }
> +
> +     /* Free the VP */
> +     kfree(xc);
> +
> +     /* Cleanup the vcpu */
> +     vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
> +     vcpu->arch.xive_vcpu = NULL;
> +}
> +
> +int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
> +                                 struct kvm_vcpu *vcpu, u32 cpu)

Why do we need both a *vcpu and a cpu number as an integer?

> +{
> +     struct kvmppc_xive *xive = dev->private;
> +     struct kvmppc_xive_vcpu *xc;
> +     int rc;
> +
> +     pr_devel("native_connect_vcpu(cpu=%d)\n", cpu);
> +
> +     if (dev->ops != &kvm_xive_native_ops) {
> +             pr_devel("Wrong ops !\n");
> +             return -EPERM;
> +     }
> +     if (xive->kvm != vcpu->kvm)
> +             return -EPERM;
> +     if (vcpu->arch.irq_type)

Please use an explicit == / != here so we don't have to remember which
symbolic value corresponds to 0.

> +             return -EBUSY;
> +     if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
> +             pr_devel("Duplicate !\n");
> +             return -EEXIST;
> +     }
> +     if (cpu >= KVM_MAX_VCPUS) {
> +             pr_devel("Out of bounds !\n");
> +             return -EINVAL;
> +     }
> +     xc = kzalloc(sizeof(*xc), GFP_KERNEL);
> +     if (!xc)
> +             return -ENOMEM;
> +
> +     mutex_lock(&vcpu->kvm->lock);
> +     vcpu->arch.xive_vcpu = xc;
> +     xc->xive = xive;
> +     xc->vcpu = vcpu;
> +     xc->server_num = cpu;
> +     xc->vp_id = xive->vp_base + cpu;
> +     xc->valid = true;
> +
> +     rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
> +     if (rc) {
> +             pr_err("Failed to get VP info from OPAL: %d\n", rc);
> +             goto bail;
> +     }
> +
> +     /*
> +      * Enable the VP first as the single escalation mode will
> +      * affect escalation interrupts numbering
> +      */
> +     rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
> +     if (rc) {
> +             pr_err("Failed to enable VP in OPAL: %d\n", rc);
> +             goto bail;
> +     }
> +
> +     /* Configure VCPU fields for use by assembly push/pull */
> +     vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
> +     vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
> +
> +     /* TODO: initialize queues ? */
> +
> +bail:
> +     vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
> +     mutex_unlock(&vcpu->kvm->lock);
> +     if (rc)
> +             kvmppc_xive_native_cleanup_vcpu(vcpu);
> +
> +     return rc;
> +}
> +
> +static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
> +                                    struct kvm_device_attr *attr)
> +{
> +     return -ENXIO;
> +}
> +
> +static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
> +                                    struct kvm_device_attr *attr)
> +{
> +     return -ENXIO;
> +}
> +
> +static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
> +                                    struct kvm_device_attr *attr)
> +{
> +     return -ENXIO;
> +}
> +
> +static void kvmppc_xive_native_free(struct kvm_device *dev)
> +{
> +     struct kvmppc_xive *xive = dev->private;
> +     struct kvm *kvm = xive->kvm;
> +     int i;
> +
> +     debugfs_remove(xive->dentry);
> +
> +     pr_devel("Destroying xive native for partition\n");
> +
> +     if (kvm)
> +             kvm->arch.xive = NULL;
> +
> +     /* Mask and free interrupts */
> +     for (i = 0; i <= xive->max_sbid; i++) {
> +             if (xive->src_blocks[i])
> +                     kvmppc_xive_free_sources(xive->src_blocks[i]);
> +             kfree(xive->src_blocks[i]);
> +             xive->src_blocks[i] = NULL;
> +     }
> +
> +     if (xive->vp_base != XIVE_INVALID_VP)
> +             xive_native_free_vp_block(xive->vp_base);
> +
> +     kfree(xive);
> +     kfree(dev);
> +}
> +
> +static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
> +{
> +     struct kvmppc_xive *xive;
> +     struct kvm *kvm = dev->kvm;
> +     int ret = 0;
> +
> +     pr_devel("Creating xive native for partition\n");
> +
> +     if (kvm->arch.xive)
> +             return -EEXIST;
> +
> +     xive = kzalloc(sizeof(*xive), GFP_KERNEL);
> +     if (!xive)
> +             return -ENOMEM;
> +
> +     dev->private = xive;
> +     xive->dev = dev;
> +     xive->kvm = kvm;
> +     kvm->arch.xive = xive;
> +
> +     /* We use the default queue size set by the host */
> +     xive->q_order = xive_native_default_eq_shift();
> +     if (xive->q_order < PAGE_SHIFT)
> +             xive->q_page_order = 0;
> +     else
> +             xive->q_page_order = xive->q_order - PAGE_SHIFT;
> +
> +     /* Allocate a bunch of VPs */
> +     xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
> +     pr_devel("VP_Base=%x\n", xive->vp_base);
> +
> +     if (xive->vp_base == XIVE_INVALID_VP)
> +             ret = -ENOMEM;
> +
> +     xive->single_escalation = xive_native_has_single_escalation();
> +
> +     if (ret)
> +             kfree(xive);
> +
> +     return ret;
> +}
> +
> +static int xive_native_debug_show(struct seq_file *m, void *private)
> +{
> +     struct kvmppc_xive *xive = m->private;
> +     struct kvm *kvm = xive->kvm;
> +     struct kvm_vcpu *vcpu;
> +     unsigned int i;
> +
> +     if (!kvm)
> +             return 0;
> +
> +     seq_puts(m, "=========\nVCPU state\n=========\n");
> +
> +     kvm_for_each_vcpu(i, vcpu, kvm) {
> +             struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +
> +             if (!xc)
> +                     continue;
> +
> +             seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x 
> PIPR=%02x w01=%016llx w2=%08x\n",
> +                        xc->server_num,
> +                        vcpu->arch.xive_saved_state.nsr,
> +                        vcpu->arch.xive_saved_state.cppr,
> +                        vcpu->arch.xive_saved_state.ipb,
> +                        vcpu->arch.xive_saved_state.pipr,
> +                        vcpu->arch.xive_saved_state.w01,
> +                        (u32) vcpu->arch.xive_cam_word);
> +
> +             kvmppc_xive_debug_show_queues(m, vcpu);
> +     }
> +
> +     return 0;
> +}
> +
> +static int xive_native_debug_open(struct inode *inode, struct file *file)
> +{
> +     return single_open(file, xive_native_debug_show, inode->i_private);
> +}
> +
> +static const struct file_operations xive_native_debug_fops = {
> +     .open = xive_native_debug_open,
> +     .read = seq_read,
> +     .llseek = seq_lseek,
> +     .release = single_release,
> +};
> +
> +static void xive_native_debugfs_init(struct kvmppc_xive *xive)
> +{
> +     char *name;
> +
> +     name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
> +     if (!name) {
> +             pr_err("%s: no memory for name\n", __func__);
> +             return;
> +     }
> +
> +     xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
> +                                        xive, &xive_native_debug_fops);
> +
> +     pr_debug("%s: created %s\n", __func__, name);
> +     kfree(name);
> +}
> +
> +static void kvmppc_xive_native_init(struct kvm_device *dev)
> +{
> +     struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
> +
> +     /* Register some debug interfaces */
> +     xive_native_debugfs_init(xive);
> +}
> +
> +struct kvm_device_ops kvm_xive_native_ops = {
> +     .name = "kvm-xive-native",
> +     .create = kvmppc_xive_native_create,
> +     .init = kvmppc_xive_native_init,
> +     .destroy = kvmppc_xive_native_free,
> +     .set_attr = kvmppc_xive_native_set_attr,
> +     .get_attr = kvmppc_xive_native_get_attr,
> +     .has_attr = kvmppc_xive_native_has_attr,
> +};
> +
> +void kvmppc_xive_native_init_module(void)
> +{
> +     ;
> +}
> +
> +void kvmppc_xive_native_exit_module(void)
> +{
> +     ;
> +}
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index b90a7d154180..01d526e15e9d 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -566,6 +566,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
> ext)
>       case KVM_CAP_PPC_ENABLE_HCALL:
>  #ifdef CONFIG_KVM_XICS
>       case KVM_CAP_IRQ_XICS:
> +#endif
> +#ifdef CONFIG_KVM_XIVE
> +     case KVM_CAP_PPC_IRQ_XIVE:
>  #endif
>       case KVM_CAP_PPC_GET_CPU_CHAR:
>               r = 1;
> @@ -753,6 +756,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
>               else
>                       kvmppc_xics_free_icp(vcpu);
>               break;
> +     case KVMPPC_IRQ_XIVE:
> +             kvmppc_xive_native_cleanup_vcpu(vcpu);
> +             break;
>       }
>  
>       kvmppc_core_vcpu_free(vcpu);
> @@ -1941,6 +1947,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu 
> *vcpu,
>               break;
>       }
>  #endif /* CONFIG_KVM_XICS */
> +#ifdef CONFIG_KVM_XIVE
> +     case KVM_CAP_PPC_IRQ_XIVE: {
> +             struct fd f;
> +             struct kvm_device *dev;
> +
> +             r = -EBADF;
> +             f = fdget(cap->args[0]);
> +             if (!f.file)
> +                     break;
> +
> +             r = -ENXIO;
> +             if (!xive_enabled())
> +                     break;
> +
> +             r = -EPERM;
> +             dev = kvm_device_from_filp(f.file);
> +             if (dev)
> +                     r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
> +                                                         cap->args[1]);
> +
> +             fdput(f);
> +             break;
> +     }
> +#endif /* CONFIG_KVM_XIVE */
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>       case KVM_CAP_PPC_FWNMI:
>               r = -EINVAL;
> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
> index 64f1135e7732..806cbe488410 100644
> --- a/arch/powerpc/kvm/Makefile
> +++ b/arch/powerpc/kvm/Makefile
> @@ -99,7 +99,7 @@ endif
>  kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
>       book3s_xics.o
>  
> -kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
> +kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
>  kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
>  
>  kvm-book3s_64-module-objs := \

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to