On Thu, 2015-03-19 at 15:55 +0100, Eric Auger wrote:
> This patch introduces a new KVM_DEV_VFIO_DEVICE group.
> 
> This is a new control channel which enables KVM to cooperate with
> viable VFIO devices.
> 
> The patch introduces 2 attributes for this group:
> KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ.
> Their purpose is to turn a VFIO device IRQ into a forwarded IRQ and
>  respectively unset the feature.
> 
> The generic part introduced here interact with VFIO, genirq, KVM while
> the architecture specific part mostly takes care of the virtual interrupt
> controller programming.
> 
> Architecture specific implementation is enabled when
> __KVM_HAVE_ARCH_KVM_VFIO_FORWARD is set. When not set those
> functions are void.
> 
> Signed-off-by: Eric Auger <eric.au...@linaro.org>
> 
> ---
> 
> v3 -> v4:
> - use new kvm_vfio_dev_irq struct
> - improve error handling according to Alex comments
> - full rework or generic/arch specific split to accomodate for
>   unforward that never fails
> - kvm_vfio_get_vfio_device and kvm_vfio_put_vfio_device removed from
>   that patch file and introduced before (since also used by Feng)
> - guard kvm_vfio_control_irq_forward call with
>   __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> 
> v2 -> v3:
> - add API comments in kvm_host.h
> - improve the commit message
> - create a private kvm_vfio_fwd_irq struct
> - fwd_irq_action replaced by a bool and removal of VFIO_IRQ_CLEANUP. This
>   latter action will be handled in vgic.
> - add a vfio_device handle argument to kvm_arch_set_fwd_state. The goal is
>   to move platform specific stuff in architecture specific code.
> - kvm_arch_set_fwd_state renamed into kvm_arch_vfio_set_forward
> - increment the ref counter each time we do an IRQ forwarding and decrement
>   this latter each time one IRQ forward is unset. Simplifies the whole
>   ref counting.
> - simplification of list handling: create, search, removal
> 
> v1 -> v2:
> - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> - original patch file separated into 2 parts: generic part moved in vfio.c
>   and ARM specific part(kvm_arch_set_fwd_state)
> ---
>  include/linux/kvm_host.h |  47 +++++++
>  virt/kvm/vfio.c          | 311 
> ++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 355 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f4e1829..8f17f87 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1042,6 +1042,15 @@ struct kvm_device_ops {
>                     unsigned long arg);
>  };
>  
> +/* internal self-contained structure describing a forwarded IRQ */
> +struct kvm_fwd_irq {
> +     struct kvm *kvm; /* VM to inject the GSI into */
> +     struct vfio_device *vdev; /* vfio device the IRQ belongs to */
> +     __u32 index; /* VFIO device IRQ index */
> +     __u32 subindex; /* VFIO device IRQ subindex */
> +     __u32 gsi; /* gsi, ie. virtual IRQ number */
> +};
> +
>  void kvm_device_get(struct kvm_device *dev);
>  void kvm_device_put(struct kvm_device *dev);
>  struct kvm_device *kvm_device_from_filp(struct file *filp);
> @@ -1065,6 +1074,44 @@ inline void kvm_arch_resume_guest(struct kvm *kvm) {}
>  
>  #endif
>  
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> +/**
> + * kvm_arch_set_forward - Sets forwarding for a given IRQ
> + *
> + * @kvm: handle to the VM
> + * @host_irq: physical IRQ number
> + * @guest_irq: virtual IRQ number
> + * returns 0 on success, < 0 on failure
> + */
> +int kvm_arch_set_forward(struct kvm *kvm,
> +                      unsigned int host_irq, unsigned int guest_irq);
> +
> +/**
> + * kvm_arch_unset_forward - Unsets forwarding for a given IRQ
> + *
> + * @kvm: handle to the VM
> + * @host_irq: physical IRQ number
> + * @guest_irq: virtual IRQ number
> + * @active: returns whether the IRQ is active
> + */
> +void kvm_arch_unset_forward(struct kvm *kvm,
> +                         unsigned int host_irq,
> +                         unsigned int guest_irq,
> +                         bool *active);
> +
> +#else
> +static inline int kvm_arch_set_forward(struct kvm *kvm,
> +                                    unsigned int host_irq,
> +                                    unsigned int guest_irq)
> +{
> +     return -ENOENT;
> +}
> +static inline void kvm_arch_unset_forward(struct kvm *kvm,
> +                                       unsigned int host_irq,
> +                                       unsigned int guest_irq,
> +                                       bool *active) {}
> +#endif
> +
>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>  
>  static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index c995e51..4847597 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -19,14 +19,30 @@
>  #include <linux/uaccess.h>
>  #include <linux/vfio.h>
>  #include "vfio.h"
> +#include <linux/platform_device.h>
> +#include <linux/irq.h>
> +#include <linux/spinlock.h>
> +#include <linux/interrupt.h>
> +#include <linux/delay.h>
> +
> +#define DEBUG_FORWARD
> +#define DEBUG_UNFORWARD


These shouldn't be here

>  
>  struct kvm_vfio_group {
>       struct list_head node;
>       struct vfio_group *vfio_group;
>  };
>  
> +/* private linkable kvm_fwd_irq struct */
> +struct kvm_vfio_fwd_irq_node {
> +     struct list_head link;
> +     struct kvm_fwd_irq fwd_irq;
> +};
> +
>  struct kvm_vfio {
>       struct list_head group_list;
> +     /* list of registered VFIO forwarded IRQs */
> +     struct list_head fwd_node_list;
>       struct mutex lock;
>       bool noncoherent;
>  };
> @@ -320,12 +336,293 @@ static int kvm_vfio_set_group(struct kvm_device *dev, 
> long attr, u64 arg)
>       return -ENXIO;
>  }
>  
> +/**
> + * kvm_vfio_find_fwd_irq - checks whether a forwarded IRQ already is
> + * registered in the list of forwarded IRQs
> + *
> + * @kv: handle to the kvm-vfio device
> + * @fwd: handle to the forwarded irq struct
> + * In the positive returns the handle to its node in the kvm-vfio
> + * forwarded IRQ list, returns NULL otherwise.
> + * Must be called with kv->lock hold.
> + */
> +static struct kvm_vfio_fwd_irq_node *kvm_vfio_find_fwd_irq(
> +                             struct kvm_vfio *kv,
> +                             struct kvm_fwd_irq *fwd)
> +{
> +     struct kvm_vfio_fwd_irq_node *node;
> +
> +     list_for_each_entry(node, &kv->fwd_node_list, link) {
> +             if ((node->fwd_irq.index == fwd->index) &&
> +                 (node->fwd_irq.subindex == fwd->subindex) &&
> +                 (node->fwd_irq.vdev == fwd->vdev))
> +                     return node;
> +     }
> +     return NULL;
> +}
> +/**
> + * kvm_vfio_register_fwd_irq - Allocates, populates and registers a
> + * forwarded IRQ
> + *
> + * @kv: handle to the kvm-vfio device
> + * @fwd: handle to the forwarded irq struct
> + * In case of success returns a handle to the new list node,
> + * NULL otherwise.
> + * Must be called with kv->lock hold.
> + */
> +static struct kvm_vfio_fwd_irq_node *kvm_vfio_register_fwd_irq(
> +                             struct kvm_vfio *kv,
> +                             struct kvm_fwd_irq *fwd)
> +{
> +     struct kvm_vfio_fwd_irq_node *node;
> +
> +     node = kmalloc(sizeof(*node), GFP_KERNEL);
> +     if (!node)
> +             return ERR_PTR(-ENOMEM);
> +
> +     node->fwd_irq = *fwd;
> +
> +     list_add(&node->link, &kv->fwd_node_list);
> +
> +     return node;
> +}
> +
> +/**
> + * kvm_vfio_unregister_fwd_irq - unregisters and frees a forwarded IRQ
> + *
> + * @node: handle to the node struct
> + * Must be called with kv->lock hold.
> + */
> +static void kvm_vfio_unregister_fwd_irq(struct kvm_vfio_fwd_irq_node *node)
> +{
> +     list_del(&node->link);
> +     kvm_vfio_put_vfio_device(node->fwd_irq.vdev);
> +     kfree(node);
> +}
> +
> +static int kvm_vfio_platform_get_irq(struct vfio_device *vdev, int index)
> +{
> +     int hwirq;
> +     struct platform_device *platdev;
> +     struct device *dev = kvm_vfio_external_base_device(vdev);
> +
> +     if (dev->bus == &platform_bus_type) {
> +             platdev = to_platform_device(dev);
> +             hwirq = platform_get_irq(platdev, index);
> +             return hwirq;
> +     } else
> +             return -EINVAL;
> +}

This seems like it was intended to be bus_type agnostic, but it's got
platform in the name.

> +
> +/**
> + * kvm_vfio_set_forward - turns a VFIO device IRQ into a forwarded IRQ
> + * @kv: handle to the kvm-vfio device
> + * @fd: file descriptor of the vfio device the IRQ belongs to
> + * @fwd: handle to the forwarded irq struct
> + *
> + * Registers and turns an IRQ as forwarded. The operation only is allowed
> + * if the IRQ is not in progress (active at interrupt controller level or
> + * auto-masked by the handler). In case the user-space masked the IRQ,
> + * the operation will fail too.
> + * returns:
> + * -EAGAIN if the IRQ is in progress or VFIO masked
> + * -EEXIST if the IRQ is already registered as forwarded
> + * -ENOMEM on memory shortage
> + */
> +static int kvm_vfio_set_forward(struct kvm_vfio *kv, int fd,
> +                             struct kvm_fwd_irq *fwd)
> +{
> +     int ret = -EAGAIN;
> +     struct kvm_vfio_fwd_irq_node *node;
> +     struct vfio_platform_device *vpdev = vfio_device_data(fwd->vdev);
> +     int index = fwd->index;
> +     int host_irq = kvm_vfio_platform_get_irq(fwd->vdev, fwd->index);
> +     bool active;
> +
> +     kvm_arch_halt_guest(fwd->kvm);
> +
> +     disable_irq(host_irq);
> +
> +     active = kvm_vfio_external_is_active(vpdev, index);
> +
> +     if (active)
> +             goto out;
> +
> +     node = kvm_vfio_register_fwd_irq(kv, fwd);
> +     if (IS_ERR(node)) {
> +             ret = PTR_ERR(node);
> +             goto out;
> +     }
> +
> +     kvm_vfio_external_set_automasked(vpdev, index, false);
> +
> +     ret = kvm_arch_set_forward(fwd->kvm, host_irq, fwd->gsi);
> +
> +out:
> +     enable_irq(host_irq);
> +
> +     kvm_arch_resume_guest(fwd->kvm);
> +
> +     return ret;


If only we were passing around a vfio_device instead of a
vfio_platform_device and we had abstraction in place for the
kvm_vfio_external_foo callbacks...

> +}
> +
> +/**
> + * kvm_vfio_unset_forward - Sets a VFIO device IRQ as non forwarded
> + * @kv: handle to the kvm-vfio device
> + * @node: handle to the node to unset
> + *
> + * Calls the architecture specific implementation of set_forward and
> + * unregisters the IRQ from the forwarded IRQ list.
> + */
> +static void kvm_vfio_unset_forward(struct kvm_vfio *kv,
> +                                struct kvm_vfio_fwd_irq_node *node)
> +{
> +     struct kvm_fwd_irq fwd = node->fwd_irq;
> +     int host_irq = kvm_vfio_platform_get_irq(fwd.vdev, fwd.index);
> +     int index = fwd.index;
> +     struct vfio_platform_device *vpdev = vfio_device_data(fwd.vdev);
> +     bool active = false;
> +
> +     kvm_arch_halt_guest(fwd.kvm);
> +
> +     disable_irq(host_irq);
> +
> +     kvm_arch_unset_forward(fwd.kvm, host_irq, fwd.gsi, &active);
> +
> +     if (active)
> +             kvm_vfio_external_mask(vpdev, index);
> +
> +     kvm_vfio_external_set_automasked(vpdev, index, true);
> +     enable_irq(host_irq);
> +
> +     kvm_arch_resume_guest(fwd.kvm);
> +
> +     kvm_vfio_unregister_fwd_irq(node);

Same

> +}
> +
> +static int kvm_vfio_control_irq_forward(struct kvm_device *kdev, long attr,
> +                                     int32_t __user *argp)
> +{
> +     struct kvm_vfio_dev_irq user_fwd_irq;
> +     struct kvm_fwd_irq fwd;
> +     struct vfio_device *vdev;
> +     struct kvm_vfio *kv = kdev->private;
> +     struct kvm_vfio_fwd_irq_node *node;
> +     unsigned long minsz;
> +     int ret = 0;
> +     u32 *gsi = NULL;
> +     size_t size;
> +
> +     minsz = offsetofend(struct kvm_vfio_dev_irq, count);
> +
> +     if (copy_from_user(&user_fwd_irq, argp, minsz))
> +             return -EFAULT;
> +
> +     if (user_fwd_irq.argsz < minsz)
> +             return -EINVAL;
> +
> +     vdev = kvm_vfio_get_vfio_device(user_fwd_irq.fd);
> +     if (IS_ERR(vdev))
> +             return PTR_ERR(vdev);
> +
> +     ret = kvm_vfio_platform_get_irq(vdev, user_fwd_irq.index);
> +     if (ret < 0)
> +             goto error;
> +
> +     size = sizeof(__u32);
> +     if (user_fwd_irq.argsz - minsz < size) {
> +             ret = -EINVAL;
> +             goto error;
> +     }
> +
> +     gsi = memdup_user((void __user *)((unsigned long)argp + minsz), size);
> +     if (IS_ERR(gsi)) {
> +             ret = PTR_ERR(gsi);
> +             goto error;
> +     }
> +
> +     fwd.vdev =  vdev;
> +     fwd.kvm =  kdev->kvm;
> +     fwd.index = user_fwd_irq.index;
> +     fwd.subindex = 0;
> +     fwd.gsi = *gsi;
> +
> +     node = kvm_vfio_find_fwd_irq(kv, &fwd);
> +
> +     switch (attr) {
> +     case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> +             if (node) {
> +                     ret = -EEXIST;
> +                     goto error;
> +             }
> +             mutex_lock(&kv->lock);
> +             ret = kvm_vfio_set_forward(kv, user_fwd_irq.fd, &fwd);
> +             mutex_unlock(&kv->lock);
> +             break;
> +     case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> +             if (!node) {
> +                     ret = -ENOENT;
> +                     goto error;
> +             }
> +             mutex_lock(&kv->lock);
> +             kvm_vfio_unset_forward(kv, node);
> +             mutex_unlock(&kv->lock);
> +             kvm_vfio_put_vfio_device(vdev);
> +             ret = 0;
> +             break;
> +     }
> +
> +     kfree(gsi);
> +error:
> +     if (ret < 0)
> +             kvm_vfio_put_vfio_device(vdev);
> +     return ret;
> +}
> +
> +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
> +{
> +     int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
> +     int ret;
> +
> +     switch (attr) {
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> +     case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> +     case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> +             ret = kvm_vfio_control_irq_forward(kdev, attr, argp);
> +             break;
> +#endif
> +     default:
> +             ret = -ENXIO;
> +     }
> +     return ret;
> +}
> +
> +/**
> + * kvm_vfio_clean_fwd_irq - Unset forwarding state of all
> + * registered forwarded IRQs and free their list nodes.
> + * @kv: kvm-vfio device
> + *
> + * Loop on all registered device/IRQ combos, reset the non forwarded state,
> + * void the lists and release the reference
> + */
> +static int kvm_vfio_clean_fwd_irq(struct kvm_vfio *kv)
> +{
> +     struct kvm_vfio_fwd_irq_node *node, *tmp;
> +
> +     list_for_each_entry_safe(node, tmp, &kv->fwd_node_list, link) {
> +             kvm_vfio_unset_forward(kv, node);
> +     }
> +     return 0;
> +}
> +
>  static int kvm_vfio_set_attr(struct kvm_device *dev,
>                            struct kvm_device_attr *attr)
>  {
>       switch (attr->group) {
>       case KVM_DEV_VFIO_GROUP:
>               return kvm_vfio_set_group(dev, attr->attr, attr->addr);
> +     case KVM_DEV_VFIO_DEVICE:
> +             return kvm_vfio_set_device(dev, attr->attr, attr->addr);
>       }
>  
>       return -ENXIO;
> @@ -341,10 +638,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
>               case KVM_DEV_VFIO_GROUP_DEL:
>                       return 0;
>               }
> -
>               break;
> +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD
> +     case KVM_DEV_VFIO_DEVICE:
> +             switch (attr->attr) {
> +             case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:
> +             case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
> +                     return 0;
> +             }
> +             break;
> +#endif
>       }
> -
>       return -ENXIO;
>  }
>  
> @@ -358,7 +662,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
>               list_del(&kvg->node);
>               kfree(kvg);
>       }
> -
> +     kvm_vfio_clean_fwd_irq(kv);
>       kvm_vfio_update_coherency(dev);
>  
>       kfree(kv);
> @@ -390,6 +694,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 
> type)
>               return -ENOMEM;
>  
>       INIT_LIST_HEAD(&kv->group_list);
> +     INIT_LIST_HEAD(&kv->fwd_node_list);
>       mutex_init(&kv->lock);
>  
>       dev->private = kv;

It's so close to not tainting the kvm-vfio device with anything platform
specific, can't we provide that last bit of abstraction somehow?
Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to