This adds the API for userspace to instantiate an XICS device in a VM
and connect VCPUs to it.  The API consists of a new device type for
the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which
functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl,
which is used to assert and deassert interrupt inputs of the XICS.

The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES.
Each attribute within this group corresponds to the state of one
interrupt source.  The attribute number is the same as the interrupt
source number.

Signed-off-by: Paul Mackerras <pau...@samba.org>
---
 Documentation/virtual/kvm/api.txt          |    8 ++
 Documentation/virtual/kvm/devices/xics.txt |   66 +++++++++
 arch/powerpc/kvm/book3s_xics.c             |  206 +++++++++++++++++++++++++++-
 arch/powerpc/kvm/powerpc.c                 |   31 +++++
 include/linux/kvm_host.h                   |    1 +
 include/uapi/linux/kvm.h                   |   14 ++
 virt/kvm/kvm_main.c                        |   14 ++
 7 files changed, 335 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/xics.txt

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 54bb6ad..db230f8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2756,3 +2756,11 @@ Parameters: args[0] is the MPIC device fd
             args[1] is the MPIC CPU number for this vcpu
 
 This capability connects the vcpu to an in-kernel MPIC device.
+
+6.7 KVM_CAP_IRQ_XICS
+
+Architectures: ppc
+Parameters: args[0] is the XICS device fd
+            args[1] is the XICS CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XICS device.
diff --git a/Documentation/virtual/kvm/devices/xics.txt 
b/Documentation/virtual/kvm/devices/xics.txt
new file mode 100644
index 0000000..4286493
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xics.txt
@@ -0,0 +1,66 @@
+XICS interrupt controller
+
+Device type supported: KVM_DEV_TYPE_XICS
+
+Groups:
+  KVM_DEV_XICS_SOURCES
+  Attributes: One per interrupt source, indexed by the source number.
+
+This device emulates the XICS (eXternal Interrupt Controller
+Specification) defined in PAPR.  The XICS has a set of interrupt
+sources, each identified by a 20-bit source number, and a set of
+Interrupt Control Presentation (ICP) entities, also called "servers",
+each associated with a virtual CPU.
+
+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
+the interrupt server number (i.e. the vcpu number from the XICS's
+point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
+64 bits of state which can be read and written using the
+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
+state word has the following bitfields, starting at the
+least-significant end of the word:
+
+* Unused, 16 bits
+
+* Pending interrupt priority, 8 bits
+  Zero is the highest priority, 255 means no interrupt is pending.
+
+* Pending IPI (inter-processor interrupt) priority, 8 bits
+  Zero is the highest priority, 255 means no IPI is pending.
+
+* Pending interrupt source number, 24 bits
+  Zero means no interrupt pending, 2 means an IPI is pending
+
+* Current processor priority, 8 bits
+  Zero is the highest priority, meaning no interrupts can be
+  delivered, and 255 is the lowest priority.
+
+Each source has 64 bits of state that can be read and written using
+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+the interrupt source number.  The 64 bit state word has the following
+bitfields, starting from the least-significant end of the word:
+
+* Destination (server number), 32 bits
+  This specifies where the interrupt should be sent, and is the
+  interrupt server number specified for the destination vcpu.
+
+* Priority, 8 bits
+  This is the priority specified for this interrupt source, where 0 is
+  the highest priority and 255 is the lowest.  An interrupt with a
+  priority of 255 will never be delivered.
+
+* Level sensitive flag, 1 bit
+  This bit is 1 for a level-sensitive interrupt source, or 0 for
+  edge-sensitive (or MSI).
+
+* Masked flag, 1 bit
+  This bit is set to 1 if the interrupt is masked (cannot be delivered
+  regardless of its priority), for example by the ibm,int-off RTAS
+  call, or 0 if it is not masked.
+
+* Pending flag, 1 bit
+  This bit is 1 if the source has a pending interrupt, otherwise 0.
+
+Only one XICS instance may be created per VM.
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 4eb4f4b..eb58abf 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -11,6 +11,7 @@
 #include <linux/kvm_host.h>
 #include <linux/err.h>
 #include <linux/gfp.h>
+#include <linux/anon_inodes.h>
 
 #include <asm/uaccess.h>
 #include <asm/kvm_book3s.h>
@@ -890,8 +891,8 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
        kfree(name);
 }
 
-struct kvmppc_ics *kvmppc_xics_create_ics(struct kvmppc_xics *xics,
-                                         int irq)
+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvmppc_xics *xics,
+                                                int irq)
 {
        struct kvmppc_ics *ics;
        int i, icsid;
@@ -1043,6 +1044,94 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 
icpval)
        return 0;
 }
 
+static int xics_get_source(struct kvm *kvm, long irq, u64 addr)
+{
+       int ret;
+       struct kvmppc_xics *xics = kvm->arch.xics;
+       struct kvmppc_ics *ics;
+       struct ics_irq_state *irqp;
+       u64 __user *ubufp = (u64 __user *) addr;
+       u16 idx;
+       u64 val, prio;
+
+       ics = kvmppc_xics_find_ics(xics, irq, &idx);
+       if (!ics)
+               return -ENOENT;
+
+       irqp = &ics->irq_state[idx];
+       mutex_lock(&ics->lock);
+       ret = -ENOENT;
+       if (irqp->exists) {
+               val = irqp->server;
+               prio = irqp->priority;
+               if (prio == MASKED) {
+                       val |= KVM_XICS_MASKED;
+                       prio = irqp->saved_priority;
+               }
+               val |= prio << KVM_XICS_PRIORITY_SHIFT;
+               if (irqp->asserted)
+                       val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
+               else if (irqp->masked_pending || irqp->resend)
+                       val |= KVM_XICS_PENDING;
+               ret = 0;
+       }
+       mutex_unlock(&ics->lock);
+
+       if (!ret && put_user(val, ubufp))
+               ret = -EFAULT;
+
+       return ret;
+}
+
+static int xics_set_source(struct kvm *kvm, long irq, u64 addr)
+{
+       struct kvmppc_xics *xics = kvm->arch.xics;
+       struct kvmppc_ics *ics;
+       struct ics_irq_state *irqp;
+       u64 __user *ubufp = (u64 __user *) addr;
+       u16 idx;
+       u64 val;
+       u8 prio;
+       u32 server;
+
+       if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+               return -ENOENT;
+
+       ics = kvmppc_xics_find_ics(xics, irq, &idx);
+       if (!ics) {
+               ics = kvmppc_xics_create_ics(xics, irq);
+               if (!ics)
+                       return -ENOMEM;
+       }
+       irqp = &ics->irq_state[idx];
+       if (get_user(val, ubufp))
+               return -EFAULT;
+
+       server = val & KVM_XICS_DESTINATION_MASK;
+       prio = val >> KVM_XICS_PRIORITY_SHIFT;
+       if (prio != MASKED && kvmppc_xics_find_server(kvm, server) == NULL)
+               return -EINVAL;
+
+       mutex_lock(&ics->lock);
+       irqp->server = server;
+       irqp->saved_priority = prio;
+       if (val & KVM_XICS_MASKED)
+               prio = MASKED;
+       irqp->priority = prio;
+       irqp->resend = 0;
+       irqp->masked_pending = 0;
+       irqp->asserted = 0;
+       if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
+               irqp->asserted = 1;
+       irqp->exists = 1;
+       mutex_unlock(&ics->lock);
+
+       if (val & KVM_XICS_PENDING)
+               icp_deliver_irq(xics, NULL, irqp->number);
+
+       return 0;
+}
+
 /* -- ioctls -- */
 
 int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
@@ -1069,14 +1158,70 @@ int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct 
kvm_irq_level *args)
        return r;
 }
 
-void kvmppc_xics_free(struct kvmppc_xics *xics)
+static int xics_set_attr(struct kvmppc_xics *xics, struct kvm_device_attr 
*attr)
+{
+       switch (attr->group) {
+       case KVM_DEV_XICS_GRP_SOURCES:
+               return xics_set_source(xics->kvm, attr->attr, attr->addr);
+       }
+       return -ENXIO;
+}
+
+static int xics_get_attr(struct kvmppc_xics *xics, struct kvm_device_attr 
*attr)
+{
+       switch (attr->group) {
+       case KVM_DEV_XICS_GRP_SOURCES:
+               return xics_get_source(xics->kvm, attr->attr, attr->addr);
+       }
+       return -ENXIO;
+}
+
+static int xics_has_attr(struct kvmppc_xics *xics, struct kvm_device_attr 
*attr)
+{
+       switch (attr->group) {
+       case KVM_DEV_XICS_GRP_SOURCES:
+               if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+                   attr->attr < KVMPPC_XICS_NR_IRQS)
+                       return 0;
+               break;
+       }
+       return -ENXIO;
+}
+
+static long kvm_xics_ioctl(struct file *filp, unsigned int ioctl,
+                          unsigned long arg)
+{
+       struct kvmppc_xics *xics = filp->private_data;
+       struct kvm_device_attr attr;
+       int (*accessor)(struct kvmppc_xics *xics, struct kvm_device_attr *attr);
+
+       switch (ioctl) {
+       case KVM_SET_DEVICE_ATTR:
+               accessor = xics_set_attr;
+               break;
+       case KVM_GET_DEVICE_ATTR:
+               accessor = xics_get_attr;
+               break;
+       case KVM_HAS_DEVICE_ATTR:
+               accessor = xics_has_attr;
+               break;
+       default:
+               return -ENOTTY;
+       }
+
+       if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+               return -EFAULT;
+
+       return accessor(xics, &attr);
+}
+
+static void kvmppc_xics_free(struct kvmppc_xics *xics)
 {
        int i;
 
        debugfs_remove(xics->dentry);
 
        if (xics->kvm) {
-               kvm_put_kvm(xics->kvm);
                xics->kvm->arch.xics = NULL;
                xics->kvm = NULL;
        }
@@ -1088,9 +1233,30 @@ void kvmppc_xics_free(struct kvmppc_xics *xics)
        kfree(xics);
 }
 
+static void kvmppc_xics_put(struct kvmppc_xics *xics)
+{
+       if (xics && atomic_dec_and_test(&xics->usecount))
+               kvmppc_xics_free(xics);
+}
+
+static int kvm_xics_release(struct inode *inode, struct file *filp)
+{
+       struct kvmppc_xics *xics = filp->private_data;
+
+       kvmppc_xics_put(xics);
+       kvm_put_kvm(xics->kvm);
+       return 0;
+}
+
+static const struct file_operations kvm_xics_fops = {
+       .unlocked_ioctl = kvm_xics_ioctl,
+       .release = kvm_xics_release,
+};
+
 int kvm_create_xics(struct kvm *kvm, u32 type)
 {
        struct kvmppc_xics *xics;
+       int fd;
 
        /* Already there ? */
        if (kvm->arch.xics)
@@ -1100,6 +1266,12 @@ int kvm_create_xics(struct kvm *kvm, u32 type)
        if (!xics)
                return -ENOMEM;
 
+       fd = anon_inode_getfd("kvm-xics", &kvm_xics_fops, xics, O_RDWR);
+       if (fd < 0) {
+               kfree(xics);
+               return fd;
+       }
+
        xics->kvm = kvm;
        kvm->arch.xics = xics;
        xics_debugfs_init(xics);
@@ -1112,8 +1284,31 @@ int kvm_create_xics(struct kvm *kvm, u32 type)
        }
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 
+       atomic_set(&xics->usecount, 1);
        kvm_get_kvm(kvm);
-       return 0;
+       return fd;
+}
+
+int kvmppc_xics_connect_vcpu(struct file *xics_filp, struct kvm_vcpu *vcpu,
+                            u32 xcpu)
+{
+       struct kvmppc_xics *xics = xics_filp->private_data;
+       int r = -EBUSY;
+
+       if (xics_filp->f_op != &kvm_xics_fops)
+               return -EPERM;
+       if (xics->kvm != vcpu->kvm)
+               return -EPERM;
+       if (vcpu->arch.irq_type)
+               return -EBUSY;
+
+       r = kvmppc_xics_create_icp(vcpu, xcpu);
+       if (!r) {
+               atomic_inc(&xics->usecount);
+               vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+       }
+
+       return r;
 }
 
 void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
@@ -1123,4 +1318,5 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
        kfree(vcpu->arch.icp);
        vcpu->arch.icp = NULL;
        vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+       kvmppc_xics_put(vcpu->kvm->arch.xics);
 }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index bc6d7cf..b6dd4aa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -341,6 +341,9 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_SPAPR_TCE:
        case KVM_CAP_PPC_ALLOC_HTAB:
        case KVM_CAP_PPC_RTAS:
+#ifdef CONFIG_KVM_XICS
+       case KVM_CAP_IRQ_XICS:
+#endif
                r = 1;
                break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -830,6 +833,21 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                break;
        }
 #endif
+#ifdef CONFIG_KVM_XICS
+       case KVM_CAP_IRQ_XICS: {
+               struct file *filp;
+
+               r = -EBADF;
+               filp = fget(cap->args[0]);
+               if (!filp)
+                       break;
+
+               r = kvmppc_xics_connect_vcpu(filp, vcpu, cap->args[1]);
+
+               fput(filp);
+               break;
+       }
+#endif /* CONFIG_KVM_XICS */
        default:
                r = -EINVAL;
                break;
@@ -1038,6 +1056,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
                break;
        }
 #endif /* CONFIG_PPC_BOOK3S_64 */
+
+       case KVM_IRQ_LINE: {
+               struct kvm *kvm = filp->private_data;
+               struct kvm_irq_level args;
+
+               r = -EFAULT;
+               if (copy_from_user(&args, argp, sizeof(args)))
+                       break;
+
+               /* Call all the interrupt controllers */
+               r = kvm_vm_ioctl_xics_irq(kvm, &args);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 852a3a1..41963e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1085,6 +1085,7 @@ static inline bool 
kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 }
 
 int kvm_create_mpic(struct kvm *kvm, u32 type);
+extern int kvm_create_xics(struct kvm *kvm, u32 type);
 
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 #else
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 373f4aa..28b269e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -671,6 +671,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_DEVICE_CTRL 89
 #define KVM_CAP_IRQ_MPIC 90
 #define KVM_CAP_PPC_RTAS 91
+#define KVM_CAP_IRQ_XICS 92
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -940,6 +941,19 @@ struct kvm_device_attr {
 #define KVM_DEV_MPIC_GRP_REGISTER      2       /* 32-bit */
 #define KVM_DEV_MPIC_GRP_IRQ_ACTIVE    3       /* 32-bit */
 
+/* PPC64 eXternal Interrupt Controller Specification */
+#define KVM_DEV_TYPE_XICS              3
+#define KVM_DEV_XICS_GRP_SOURCES       1       /* 64-bit source attributes */
+
+/* Layout of 64-bit source attribute values */
+#define  KVM_XICS_DESTINATION_SHIFT    0
+#define  KVM_XICS_DESTINATION_MASK     0xffffffffULL
+#define  KVM_XICS_PRIORITY_SHIFT       32
+#define  KVM_XICS_PRIORITY_MASK                0xff
+#define  KVM_XICS_LEVEL_SENSITIVE      (1ULL << 40)
+#define  KVM_XICS_MASKED               (1ULL << 41)
+#define  KVM_XICS_PENDING              (1ULL << 42)
+
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE        _IOWR(KVMIO,  0xe0, struct kvm_create_device)
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ca3adf9..b97d3e9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2171,6 +2171,20 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
                return kvm_create_mpic(kvm, cd->type);
        }
 #endif
+#ifdef CONFIG_KVM_XICS
+       case KVM_DEV_TYPE_XICS: {
+               int fd;
+
+               if (cd->flags & KVM_CREATE_DEVICE_TEST)
+                       return 0;
+
+               fd = kvm_create_xics(kvm, cd->type);
+               if (fd < 0)
+                       return fd;
+               cd->fd = fd;
+               return 0;
+       }
+#endif
        default:
                return -ENODEV;
        }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to