The current code is geared towards using a user-mode (A)PIC.  This patch adds
an "irqdevice" abstraction, and implements a "userint" model to handle the
duties of the original code.  Later, we can develop other irqdevice models
to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate

Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]>
---

 drivers/kvm/Makefile    |    2 
 drivers/kvm/irqdevice.h |  176 +++++++++++++++++++++++++++++++++++++
 drivers/kvm/kvm.h       |   94 +++++++++++++++++++-
 drivers/kvm/kvm_main.c  |   58 +++++++++---
 drivers/kvm/svm.c       |  162 ++++++++++++++++++++++++----------
 drivers/kvm/userint.c   |  223 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/vmx.c       |  161 +++++++++++++++++++++++++---------
 7 files changed, 769 insertions(+), 107 deletions(-)

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..540afbc 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h
new file mode 100644
index 0000000..097d179
--- /dev/null
+++ b/drivers/kvm/irqdevice.h
@@ -0,0 +1,176 @@
+/*
+ * Defines an interface for an abstract interrupt controller.  The model
+ * consists of a unit with an arbitrary number of input lines N (IRQ0-(N-1)),
+ * an arbitrary number of output lines (INTR) (LINT, EXTINT, NMI, etc), and
+ * methods for completing an interrupt-acknowledge cycle (INTA).  A particular
+ * implementation of this model will define various policies, such as
+ * irq-to-vector translation, INTA/auto-EOI policy, etc.
+ *
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ *   Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __IRQDEVICE_H
+#define __IRQDEVICE_H
+
+struct kvm_irqdevice;
+
+typedef enum {
+       kvm_irqpin_localint,
+       kvm_irqpin_extint,
+       kvm_irqpin_smi,
+       kvm_irqpin_nmi,
+       kvm_irqpin_invalid, /* must always be last */
+} kvm_irqpin_t;
+
+
+struct kvm_irqsink {
+       void (*set_intr)(struct kvm_irqsink *this,
+                        struct kvm_irqdevice *dev,
+                        kvm_irqpin_t pin);
+
+       void *private;
+};
+
+#define KVM_IRQACKDATA_VECTOR_VALID   (1 << 0)
+#define KVM_IRQACKDATA_VECTOR_PENDING (1 << 1)
+
+#define KVM_IRQACK_FLAG_PEEK          (1 << 0)
+
+struct kvm_irqack_data {
+       int flags;
+       int vector;
+};
+
+struct kvm_irqdevice {
+       int  (*ack)(struct kvm_irqdevice *this, int flags,
+                   struct kvm_irqack_data *data);
+       int  (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
+       void (*destructor)(struct kvm_irqdevice *this);
+
+       void               *private;
+       struct kvm_irqsink  sink;
+};
+
+/**
+ * kvm_irqdevice_init - initialize the kvm_irqdevice for use
+ * @dev: The device
+ *
+ * Description: Initialize the kvm_irqdevice for use.  Should be called before
+ *              calling any derived implementation init functions
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev)
+{
+       memset(dev, 0, sizeof(*dev));
+}
+
+/**
+ * kvm_irqdevice_ack - read and ack the highest priority vector from the device
+ * @dev: The device
+ * @flags: Modifies default behavior
+ *           [ KVM_IRQACK_FLAG_PEEK - Dont ack vector, just check status ]
+ * @data: A pointer to a kvm_irqack_data structure to hold the result
+ *
+ * Description: Read the highest priority pending vector from the device,
+ *              potentially invoking auto-EOI depending on device policy
+ *
+ *              Successful return indicates that the *data* structure is valid
+ *
+ *               data.flags -
+ *                  [KVM_IRQACKDATA_VECTOR_VALID - data.vector is valid]
+ *                  [KVM_IRQACKDATA_VECTOR_PENDING - more vectors are pending]
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_ack(struct kvm_irqdevice *dev, int flags,
+                                   struct kvm_irqack_data *data)
+{
+       return dev->ack(dev, flags, data);
+}
+
+/**
+ * kvm_irqdevice_set_pin - allows the caller to assert/deassert an IRQ
+ * @dev: The device
+ * @pin: The input pin to alter
+ * @level: The value to set (1 = assert, 0 = deassert)
+ *
+ * Description: Allows the caller to assert/deassert an IRQ input pin to the
+ *              device according to device policy.
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin,
+                                 int level)
+{
+       return dev->set_pin(dev, pin, level);
+}
+
+/**
+ * kvm_irqdevice_register_sink - registers an kvm_irqsink object
+ * @dev: The device
+ * @sink: The sink to register.  Data will be copied so building object from
+ *        transient storage is ok.
+ *
+ * Description: Registers an kvm_irqsink object as an INTR callback
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev,
+                                              const struct kvm_irqsink *sink)
+{
+       dev->sink = *sink;
+}
+
+/**
+ * kvm_irqdevice_destructor - destroys an irqdevice
+ * @dev: The device
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_destructor(struct kvm_irqdevice *dev)
+{
+       dev->destructor(dev);
+}
+
+/**
+ * kvm_irqdevice_set_intr - invokes a registered INTR callback
+ * @dev: The device
+ * @pin: Identifies the pin to alter -
+ *           [ KVM_IRQPIN_LOCALINT (default) - an vector is pending on this
+ *                                             device]
+ *           [ KVM_IRQPIN_EXTINT - a vector is pending on an external device]
+ *           [ KVM_IRQPIN_SMI - system-management-interrupt pin]
+ *           [ KVM_IRQPIN_NMI - non-maskable-interrupt pin
+ *
+ * Description: Invokes a registered INTR callback (if present).  This
+ *              function is meant to be used privately by a irqdevice
+ *              implementation.
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_set_intr(struct kvm_irqdevice *dev,
+                                         kvm_irqpin_t pin)
+{
+       struct kvm_irqsink *sink = &dev->sink;
+       if (sink->set_intr)
+               sink->set_intr(sink, dev, pin);
+}
+
+#endif /*  __IRQDEVICE_H */
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 3289801..1c8af97 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 
 #include "vmx.h"
+#include "irqdevice.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -160,6 +161,11 @@ struct vmcs {
 
 struct kvm_vcpu;
 
+int kvm_user_irqdev_init(struct kvm_irqdevice *dev);
+int kvm_user_irqdev_save(struct kvm_irqdevice *this, void *data);
+int kvm_user_irqdev_restore(struct kvm_irqdevice *this, void *data);
+int kvm_userint_init(struct kvm_vcpu *vcpu);
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -316,6 +322,18 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct 
kvm_io_bus *bus, gpa_t addr);
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
                             struct kvm_io_device *dev);
 
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
+/*
+ * structure for maintaining info for interrupting an executing VCPU
+ */
+struct kvm_vcpu_irq {
+       spinlock_t           lock;
+       struct kvm_irqdevice dev;
+       int                  pending;
+       int                  deferred;
+};
+
 struct kvm_vcpu {
        struct kvm *kvm;
        union {
@@ -328,9 +346,7 @@ struct kvm_vcpu {
        u64 host_tsc;
        struct kvm_run *run;
        int interrupt_window_open;
-       unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
-       unsigned long irq_pending[NR_IRQ_WORDS];
+       struct kvm_vcpu_irq irq;
        unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
        unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
@@ -408,6 +424,78 @@ struct kvm_vcpu {
        struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
 };
 
+/*
+ * These two functions are helpers for determining if a standard interrupt
+ * is pending to replace the old "if (vcpu->irq_summary)" logic.
+ */
+
+/*
+ * Assumes lock already held
+ */
+static inline int __kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu)
+{
+       int pending = vcpu->irq.pending;
+
+       if (vcpu->irq.deferred != -1)
+               __set_bit(kvm_irqpin_localint, &pending);
+
+       return pending;
+}
+
+static inline int kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu)
+{
+       int ret = 0;
+       int flags;
+
+       spin_lock_irqsave(&vcpu->irq.lock, flags);
+       ret = __kvm_vcpu_irq_pending(vcpu);
+       spin_unlock_irqrestore(&vcpu->irq.lock, flags);
+
+       return ret;
+}
+
+/*
+ * Assumes lock already held
+ */
+static inline int kvm_vcpu_irq_pop(struct kvm_vcpu *vcpu,
+                                  struct kvm_irqack_data *data)
+{
+       int ret = 0;
+
+       if (vcpu->irq.deferred != -1) {
+               ret = kvm_irqdevice_ack(&vcpu->irq.dev, KVM_IRQACK_FLAG_PEEK,
+                                       data);
+               data->flags |= KVM_IRQACKDATA_VECTOR_VALID;
+               data->vector = vcpu->irq.deferred;
+               vcpu->irq.deferred = -1;
+       } else
+               ret = kvm_irqdevice_ack(&vcpu->irq.dev, 0, data);
+
+       /*
+        * If there are no more interrupts we must clear the status flag
+        */
+       if (!(data->flags & KVM_IRQACKDATA_VECTOR_PENDING))
+               __clear_bit(kvm_irqpin_localint, &vcpu->irq.pending);
+
+       return ret;
+}
+
+static inline void __kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq)
+{
+       BUG_ON(vcpu->irq.deferred != -1); /* We can only hold one deferred */
+
+       vcpu->irq.deferred = irq;
+}
+
+static inline void kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq)
+{
+       int flags;
+
+       spin_lock_irqsave(&vcpu->irq.lock, flags);
+       __kvm_vcpu_irq_push(vcpu, irq);
+       spin_unlock_irqrestore(&vcpu->irq.lock, flags);
+}
+
 struct kvm_mem_alias {
        gfn_t base_gfn;
        unsigned long npages;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 89b00e5..897b20f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -323,6 +323,11 @@ static struct kvm *kvm_create_vm(void)
                struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
                mutex_init(&vcpu->mutex);
+
+               memset(&vcpu->irq, 0, sizeof(vcpu->irq));
+               spin_lock_init(&vcpu->irq.lock);
+               vcpu->irq.deferred = -1;
+
                vcpu->cpu = -1;
                vcpu->kvm = kvm;
                vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -390,6 +395,7 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
        vcpu_load(vcpu);
        kvm_mmu_destroy(vcpu);
        vcpu_put(vcpu);
+       kvm_irqdevice_destructor(&vcpu->irq.dev);
        kvm_arch_ops->vcpu_free(vcpu);
        free_page((unsigned long)vcpu->run);
        vcpu->run = NULL;
@@ -2009,8 +2015,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        sregs->efer = vcpu->shadow_efer;
        sregs->apic_base = vcpu->apic_base;
 
-       memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
-              sizeof sregs->interrupt_bitmap);
+       kvm_user_irqdev_save(&vcpu->irq.dev, &sregs->interrupt_bitmap);
 
        vcpu_put(vcpu);
 
@@ -2027,7 +2032,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                    struct kvm_sregs *sregs)
 {
        int mmu_reset_needed = 0;
-       int i;
        struct descriptor_table dt;
 
        vcpu_load(vcpu);
@@ -2064,12 +2068,8 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu 
*vcpu,
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
 
-       memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
-              sizeof vcpu->irq_pending);
-       vcpu->irq_summary = 0;
-       for (i = 0; i < NR_IRQ_WORDS; ++i)
-               if (vcpu->irq_pending[i])
-                       __set_bit(i, &vcpu->irq_summary);
+       kvm_user_irqdev_restore(&vcpu->irq.dev,
+                               &sregs->interrupt_bitmap[0]);
 
        set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -2230,14 +2230,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu 
*vcpu,
 {
        if (irq->irq < 0 || irq->irq >= 256)
                return -EINVAL;
-       vcpu_load(vcpu);
-
-       set_bit(irq->irq, vcpu->irq_pending);
-       set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
 
-       vcpu_put(vcpu);
-
-       return 0;
+       return kvm_irqdevice_set_pin(&vcpu->irq.dev, irq->irq, 1);
 }
 
 static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2339,6 +2333,32 @@ out1:
 }
 
 /*
+ * This function will be invoked whenever the vcpu->irq.dev raises its INTR
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this,
+                         struct kvm_irqdevice *dev,
+                         kvm_irqpin_t pin)
+{
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vcpu->irq.lock, flags);
+       __set_bit(pin, &vcpu->irq.pending);
+       spin_unlock_irqrestore(&vcpu->irq.lock, flags);
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+       struct kvm_irqsink sink = {
+               .set_intr   = kvm_vcpu_intr,
+               .private    = vcpu
+       };
+
+       kvm_irqdevice_register_sink(&vcpu->irq.dev, &sink);
+}
+
+/*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
@@ -2385,6 +2405,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int 
n)
        if (r < 0)
                goto out_free_vcpus;
 
+       kvm_irqdevice_init(&vcpu->irq.dev);
+       kvm_vcpu_irqsink_init(vcpu);
+       r = kvm_userint_init(vcpu);
+       if (r < 0)
+               goto out_free_vcpus;
+
        kvm_arch_ops->vcpu_load(vcpu);
        r = kvm_mmu_setup(vcpu);
        if (r >= 0)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b621403..a9d917a 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -106,24 +106,6 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
                                (cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2;
 }
 
-static inline u8 pop_irq(struct kvm_vcpu *vcpu)
-{
-       int word_index = __ffs(vcpu->irq_summary);
-       int bit_index = __ffs(vcpu->irq_pending[word_index]);
-       int irq = word_index * BITS_PER_LONG + bit_index;
-
-       clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-       if (!vcpu->irq_pending[word_index])
-               clear_bit(word_index, &vcpu->irq_summary);
-       return irq;
-}
-
-static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
-{
-       set_bit(irq, vcpu->irq_pending);
-       set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
-}
-
 static inline void clgi(void)
 {
        asm volatile (SVM_CLGI);
@@ -904,7 +886,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
        int r;
 
        if (is_external_interrupt(exit_int_info))
-               push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
+               /*
+                * An exception was taken while we were trying to inject an
+                * IRQ.  We must defer the injection of the vector until
+                * the next window.
+                */
+               kvm_vcpu_irq_push(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
 
        spin_lock(&vcpu->kvm->lock);
 
@@ -1114,7 +1101,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
 {
        vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
        skip_emulated_instruction(vcpu);
-       if (vcpu->irq_summary)
+       if (kvm_vcpu_irq_pending(vcpu))
                return 1;
 
        kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1285,7 +1272,7 @@ static int interrupt_window_interception(struct kvm_vcpu 
*vcpu,
         * possible
         */
        if (kvm_run->request_interrupt_window &&
-           !vcpu->irq_summary) {
+           !kvm_vcpu_irq_pending(vcpu)) {
                ++vcpu->stat.irq_window_exits;
                kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
                return 0;
@@ -1384,60 +1371,135 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
 }
 
 
-static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
-{
-       struct vmcb_control_area *control;
-
-       control = &vcpu->svm->vmcb->control;
-       control->int_vector = pop_irq(vcpu);
-       control->int_ctl &= ~V_INTR_PRIO_MASK;
-       control->int_ctl |= V_IRQ_MASK |
-               ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
-}
-
 static void kvm_reput_irq(struct kvm_vcpu *vcpu)
 {
        struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
 
        if (control->int_ctl & V_IRQ_MASK) {
                control->int_ctl &= ~V_IRQ_MASK;
-               push_irq(vcpu, control->int_vector);
+               kvm_vcpu_irq_push(vcpu, control->int_vector);
        }
 
        vcpu->interrupt_window_open =
                !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-                                      struct kvm_run *kvm_run)
+static void do_intr_requests(struct kvm_vcpu *vcpu,
+                           struct kvm_run *kvm_run,
+                           kvm_irqpin_t pin)
 {
        struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+       int pending = 0;
 
        vcpu->interrupt_window_open =
                (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
                 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
 
-       if (vcpu->interrupt_window_open && vcpu->irq_summary)
+       if (vcpu->interrupt_window_open) {
                /*
-                * If interrupts enabled, and not blocked by sti or mov ss. 
Good.
+                * If interrupts enabled, and not blocked by sti or mov ss.
+                * Good.
                 */
-               kvm_do_inject_irq(vcpu);
+               struct kvm_irqack_data ack;
+               int r = 0;
+
+               memset(&ack, 0, sizeof(ack));
+
+               switch (pin) {
+               case kvm_irqpin_localint:
+                       r = kvm_vcpu_irq_pop(vcpu, &ack);
+                       break;
+               case kvm_irqpin_extint:
+                       printk(KERN_WARNING "KVM: external-interrupts not " \
+                              "handled yet\n");
+                       __clear_bit(pin, &vcpu->irq.pending);
+                       break;
+               case kvm_irqpin_nmi:
+                       /*
+                        * FIXME: Someday we will handle this using the
+                        * specific SVN NMI features.  For now, just inject
+                        * the NMI as a standard interrupt on vector 2
+                        */
+                       ack.flags |= KVM_IRQACKDATA_VECTOR_VALID;
+                       ack.vector = 2;
+                       __clear_bit(pin, &vcpu->irq.pending);
+                       break;
+               default:
+                       panic("KVM: unknown interrupt pin raised: %d\n", pin);
+                       break;
+               }
+
+               BUG_ON(r < 0);
+
+               if (ack.flags & KVM_IRQACKDATA_VECTOR_VALID) {
+                       control = &vcpu->svm->vmcb->control;
+                       control->int_vector = ack.vector;
+                       control->int_ctl &= ~V_INTR_PRIO_MASK;
+                       control->int_ctl |= V_IRQ_MASK |
+                               ((/*control->int_vector >> 4*/ 0xf) <<
+                                V_INTR_PRIO_SHIFT);
+               }
+       }
 
        /*
-        * Interrupts blocked.  Wait for unblock.
+        * Re-read the pending interrupt state.  If anything is still
+        * pending we need to cause an exit on the next window
         */
-       if (!vcpu->interrupt_window_open &&
-           (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+       pending = __kvm_vcpu_irq_pending(vcpu);
+
+       if (test_bit(pin, &pending) || kvm_run->request_interrupt_window)
+               /*
+                * Trigger a VMEXIT on the next IRQ window
+                */
                control->intercept |= 1ULL << INTERCEPT_VINTR;
-       } else
-               control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
+static void clear_pending_controls(struct kvm_vcpu *vcpu)
+{
+       struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+
+       control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+                                 struct kvm_run *kvm_run)
+{
+       int pending = __kvm_vcpu_irq_pending(vcpu);
+
+       clear_pending_controls(vcpu);
+
+       while (pending) {
+               kvm_irqpin_t pin = __fls(pending);
+
+               switch (pin) {
+               case kvm_irqpin_localint:
+               case kvm_irqpin_extint:
+               case kvm_irqpin_nmi:
+                       do_intr_requests(vcpu, kvm_run, pin);
+                       break;
+               case kvm_irqpin_smi:
+                       /* ignored (for now) */
+                       printk(KERN_WARNING "KVM: dropping unhandled SMI\n");
+                       __clear_bit(pin, &vcpu->irq.pending);
+                       break;
+               case kvm_irqpin_invalid:
+                       /* drop */
+                       break;
+               default:
+                       panic("KVM: unknown interrupt pin raised: %d\n", pin);
+                       break;
+               }
+
+               __clear_bit(pin, &pending);
+       }
 }
 
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
                              struct kvm_run *kvm_run)
 {
-       kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-                                                 vcpu->irq_summary == 0);
+       kvm_run->ready_for_interrupt_injection =
+               (vcpu->interrupt_window_open &&
+                !kvm_vcpu_irq_pending(vcpu));
        kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
        kvm_run->cr8 = vcpu->cr8;
        kvm_run->apic_base = vcpu->apic_base;
@@ -1452,7 +1514,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
                                          struct kvm_run *kvm_run)
 {
-       return (!vcpu->irq_summary &&
+       return (!kvm_vcpu_irq_pending(vcpu) &&
                kvm_run->request_interrupt_window &&
                vcpu->interrupt_window_open &&
                (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
@@ -1482,9 +1544,17 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
        int r;
 
 again:
+       spin_lock(&vcpu->irq.lock);
+
+       /*
+        * We must inject interrupts (if any) while the irq_lock
+        * is held
+        */
        if (!vcpu->mmio_read_completed)
                do_interrupt_requests(vcpu, kvm_run);
 
+       spin_unlock(&vcpu->irq.lock);
+
        clgi();
 
        pre_svm_run(vcpu);
diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c
new file mode 100644
index 0000000..a60707d
--- /dev/null
+++ b/drivers/kvm/userint.c
@@ -0,0 +1,223 @@
+/*
+ * User Interrupts IRQ device
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere
+ * (typically in userspace/QEMU).  Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has
+ * already occured.  Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (i.e. no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ *    upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped)
+ *    because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code,
+ *     Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ *   Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+
+/*
+ *----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses
+ * a summary field to accelerate lookups.  Assumes external locking
+ *---------------------------------------------------------------------
+ */
+
+struct bitarray {
+       unsigned long summary; /* 1 per word in pending */
+       unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+       return this->summary ? 1 : 0;
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+       if (!this->summary)
+               return -1;
+       else {
+               int word_index = __fls(this->summary);
+               int bit_index  = __fls(this->pending[word_index]);
+
+               return word_index * BITS_PER_LONG + bit_index;
+       }
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+       __set_bit(nr, &this->pending);
+       __set_bit(nr / BITS_PER_LONG, &this->summary);
+}
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+       int word = nr / BITS_PER_LONG;
+
+       __clear_bit(nr, &this->pending);
+       if (!this->pending[word])
+               __clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+       return test_bit(nr, &this->pending);
+}
+
+static inline int bitarray_test_and_set(struct bitarray *this, int nr, int val)
+{
+       if (bitarray_test(this, nr) != val) {
+               if (val)
+                       bitarray_set(this, nr);
+               else
+                       bitarray_clear(this, nr);
+               return 1;
+       }
+
+       return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------
+ */
+
+struct kvm_user_irqdev {
+       spinlock_t      lock;
+       atomic_t        ref_count;
+       struct bitarray pending;
+};
+
+static int user_irqdev_ack(struct kvm_irqdevice *this, int flags,
+                          struct kvm_irqack_data *data)
+{
+       struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+
+       spin_lock(&s->lock);
+
+       if (!(flags & KVM_IRQACK_FLAG_PEEK)) {
+               int irq = bitarray_findhighest(&s->pending);
+
+               if (irq > -1) {
+                       /*
+                        * Automatically clear the interrupt as the EOI
+                        * mechanism (if any) will take place in userspace
+                        */
+                       bitarray_clear(&s->pending, irq);
+
+                       data->flags |= KVM_IRQACKDATA_VECTOR_VALID;
+               }
+
+               data->vector = irq;
+       }
+
+       if (bitarray_pending(&s->pending))
+               data->flags |= KVM_IRQACKDATA_VECTOR_PENDING;
+
+       spin_unlock(&s->lock);
+
+       return 0;
+}
+
+static int user_irqdev_set_pin(struct kvm_irqdevice *this, int irq, int level)
+{
+       struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+       int forward = 0;
+
+       spin_lock(&s->lock);
+       forward = bitarray_test_and_set(&s->pending, irq, level);
+       spin_unlock(&s->lock);
+
+       /*
+        * alert the higher layer software we have changes
+        */
+       if (forward)
+               kvm_irqdevice_set_intr(this, kvm_irqpin_localint);
+
+       return 0;
+}
+
+static void user_irqdev_destructor(struct kvm_irqdevice *this)
+{
+       struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+
+       if (atomic_dec_and_test(&s->ref_count))
+               kfree(s);
+}
+
+int kvm_user_irqdev_init(struct kvm_irqdevice *irqdev)
+{
+       struct kvm_user_irqdev *s;
+
+       s = kzalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return -ENOMEM;
+
+       spin_lock_init(&s->lock);
+
+       irqdev->ack         = user_irqdev_ack;
+       irqdev->set_pin     = user_irqdev_set_pin;
+       irqdev->destructor  = user_irqdev_destructor;
+
+       irqdev->private = s;
+       atomic_inc(&s->ref_count);
+
+       return 0;
+}
+
+int kvm_user_irqdev_save(struct kvm_irqdevice *this, void *data)
+{
+       struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+
+       spin_lock(&s->lock);
+       memcpy(data, s->pending.pending, sizeof s->pending.pending);
+       spin_unlock(&s->lock);
+
+       return 0;
+}
+
+int kvm_user_irqdev_restore(struct kvm_irqdevice *this, void *data)
+{
+       struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+       int i;
+       int forward = 0;
+
+       spin_lock(&s->lock);
+
+       /*
+        * walk the interrupt-bitmap and inject an IRQ for each bit found
+        */
+       for (i = 0; i < 256; ++i) {
+               int val  = test_bit(i, data);
+               forward |= bitarray_test_and_set(&s->pending, i, val);
+       }
+
+       spin_unlock(&s->lock);
+
+       /*
+        * alert the higher layer software we have changes
+        */
+       if (forward)
+               kvm_irqdevice_set_intr(this, kvm_irqpin_localint);
+
+       return 0;
+}
+
+int kvm_userint_init(struct kvm_vcpu *vcpu)
+{
+       return kvm_user_irqdev_init(&vcpu->irq.dev);
+}
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 3411813..f0e5826 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1454,52 +1454,118 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, 
int irq)
        vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
 }
 
-static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
+static void do_intr_requests(struct kvm_vcpu *vcpu,
+                           struct kvm_run *kvm_run,
+                           kvm_irqpin_t pin)
 {
-       int word_index = __ffs(vcpu->irq_summary);
-       int bit_index = __ffs(vcpu->irq_pending[word_index]);
-       int irq = word_index * BITS_PER_LONG + bit_index;
-
-       clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-       if (!vcpu->irq_pending[word_index])
-               clear_bit(word_index, &vcpu->irq_summary);
-
-       if (vcpu->rmode.active) {
-               inject_rmode_irq(vcpu, irq);
-               return;
-       }
-       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-                       irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
-}
-
-
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-                                      struct kvm_run *kvm_run)
-{
-       u32 cpu_based_vm_exec_control;
+       int pending = 0;
 
        vcpu->interrupt_window_open =
                ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
                 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
 
        if (vcpu->interrupt_window_open &&
-           vcpu->irq_summary &&
-           !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
+           !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) {
                /*
-                * If interrupts enabled, and not blocked by sti or mov ss. 
Good.
+                * If interrupts enabled, and not blocked by sti or mov ss.
+                * Good.
                 */
-               kvm_do_inject_irq(vcpu);
+               struct kvm_irqack_data ack;
+               int r = 0;
+
+               memset(&ack, 0, sizeof(ack));
+
+               switch (pin) {
+               case kvm_irqpin_localint:
+                       r = kvm_vcpu_irq_pop(vcpu, &ack);
+                       break;
+               case kvm_irqpin_extint:
+                       printk(KERN_WARNING "KVM: external-interrupts not " \
+                              "handled yet\n");
+                       __clear_bit(pin, &vcpu->irq.pending);
+                       break;
+               case kvm_irqpin_nmi:
+                       /*
+                        * FIXME: Someday we will handle this using the
+                        * specific VMX NMI features.  For now, just inject
+                        * the NMI as a standard interrupt on vector 2
+                        */
+                       ack.flags |= KVM_IRQACKDATA_VECTOR_VALID;
+                       ack.vector = 2;
+                       __clear_bit(pin, &vcpu->irq.pending);
+                       break;
+               default:
+                       panic("KVM: unknown interrupt pin raised: %d\n", pin);
+                       break;
+               }
+
+               BUG_ON(r < 0);
 
-       cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-       if (!vcpu->interrupt_window_open &&
-           (vcpu->irq_summary || kvm_run->request_interrupt_window))
+               if (ack.flags & KVM_IRQACKDATA_VECTOR_VALID) {
+                       if (vcpu->rmode.active)
+                               inject_rmode_irq(vcpu, ack.vector);
+                       else
+                               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+                                            ack.vector |
+                                            INTR_TYPE_EXT_INTR |
+                                            INTR_INFO_VALID_MASK);
+               }
+       }
+
+       /*
+        * Re-read the pending interrupt state.  If anything is still
+        * pending we need to cause an exit on the next window
+        */
+       pending = __kvm_vcpu_irq_pending(vcpu);
+
+       if (test_bit(pin, &pending) || kvm_run->request_interrupt_window) {
                /*
-                * Interrupts blocked.  Wait for unblock.
+                * Trigger a VMEXIT on the next IRQ window
                 */
-               cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-       else
-               cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
-       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+               u32 cbvec = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+               cbvec |= CPU_BASED_VIRTUAL_INTR_PENDING;
+               vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cbvec);
+       }
+}
+
+static void clear_pending_controls(struct kvm_vcpu *vcpu)
+{
+       u32 cbvec = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       cbvec &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cbvec);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+                                 struct kvm_run *kvm_run)
+{
+       int pending = __kvm_vcpu_irq_pending(vcpu);
+
+       clear_pending_controls(vcpu);
+
+       while (pending) {
+               kvm_irqpin_t pin = __fls(pending);
+
+               switch (pin) {
+               case kvm_irqpin_localint:
+               case kvm_irqpin_extint:
+               case kvm_irqpin_nmi:
+                       do_intr_requests(vcpu, kvm_run, pin);
+                       break;
+               case kvm_irqpin_smi:
+                       /* ignored (for now) */
+                       printk(KERN_WARNING "KVM: dropping unhandled SMI\n");
+                       __clear_bit(pin, &vcpu->irq.pending);
+                       break;
+               case kvm_irqpin_invalid:
+                       /* drop */
+                       break;
+               default:
+                       panic("KVM: unknown interrupt pin raised: %d\n", pin);
+                       break;
+               }
+
+               __clear_bit(pin, &pending);
+       }
 }
 
 static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
@@ -1554,9 +1620,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
        }
 
        if (is_external_interrupt(vect_info)) {
+               /*
+                * An exception was taken while we were trying to inject an
+                * IRQ.  We must defer the injection of the vector until
+                * the next window.
+                */
                int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-               set_bit(irq, vcpu->irq_pending);
-               set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+               kvm_vcpu_irq_push(vcpu, irq);
        }
 
        if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1872,8 +1942,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
        kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
        kvm_run->cr8 = vcpu->cr8;
        kvm_run->apic_base = vcpu->apic_base;
-       kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-                                                 vcpu->irq_summary == 0);
+       kvm_run->ready_for_interrupt_injection =
+               (vcpu->interrupt_window_open &&
+                !kvm_vcpu_irq_pending(vcpu));
 }
 
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1884,7 +1955,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
         * possible
         */
        if (kvm_run->request_interrupt_window &&
-           !vcpu->irq_summary) {
+           !kvm_vcpu_irq_pending(vcpu)) {
                kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
                ++vcpu->stat.irq_window_exits;
                return 0;
@@ -1895,7 +1966,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        skip_emulated_instruction(vcpu);
-       if (vcpu->irq_summary)
+       if (kvm_vcpu_irq_pending(vcpu))
                return 1;
 
        kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1965,7 +2036,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
                                          struct kvm_run *kvm_run)
 {
-       return (!vcpu->irq_summary &&
+       return (!kvm_vcpu_irq_pending(vcpu) &&
                kvm_run->request_interrupt_window &&
                vcpu->interrupt_window_open &&
                (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
@@ -1981,9 +2052,17 @@ preempted:
                kvm_guest_debug_pre(vcpu);
 
 again:
+       spin_lock(&vcpu->irq.lock);
+
+       /*
+        * We must inject interrupts (if any) while the irq.lock
+        * is held
+        */
        if (!vcpu->mmio_read_completed)
                do_interrupt_requests(vcpu, kvm_run);
 
+       spin_unlock(&vcpu->irq.lock);
+
        vmx_save_host_state(vcpu);
        kvm_load_guest_fpu(vcpu);
 


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to