Hi All,
  Attached are the first three patches in my queue.  The first two you are 
likely familiar with at this point (though I have made some more of the 
requested changes to 02-irqdevice.patch).  The last item 
(03-preemptible-cpu.patch) adds an implementation to the previously unused 
kvm_vcpu_intr() callback.  This acts as a functional example of the INTR 
callback mechanism as Avi requested.  Note that the work related to IF/NMI/TPR 
classification of interrupts happens later in my queue and is not mature enough 
to share yet, but hopefully soon.

Thoughts?
-Greg
KVM: Adds support for in-kernel mmio handlers

From:  <>

Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]>
---

 drivers/kvm/kvm.h      |   31 ++++++++++++++++++
 drivers/kvm/kvm_main.c |   82 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fceeb84..181099f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -236,6 +236,36 @@ struct kvm_pio_request {
        int rep;
 };
 
+struct kvm_io_device {
+       unsigned long (*read)(struct kvm_io_device *this,
+                             gpa_t addr,
+                             int length);
+       void (*write)(struct kvm_io_device *this,
+                     gpa_t addr,
+                     int length,
+                     unsigned long val);
+       int (*in_range)(struct kvm_io_device *this, gpa_t addr);
+
+       void             *private;
+};
+
+/*
+ * It would be nice to use something smarter than a linear search, TBD...
+ * Thankfully we dont expect many devices to register (famous last words :),
+ * so until then it will suffice.  At least its abstracted so we can change
+ * in one place.
+ */
+struct kvm_io_bus {
+       int                   dev_count;
+#define NR_IOBUS_DEVS 6
+       struct kvm_io_device *devs[NR_IOBUS_DEVS];
+};
+
+void kvm_io_bus_init(struct kvm_io_bus *bus);
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, 
+                            struct kvm_io_device *dev);
+
 struct kvm_vcpu {
        struct kvm *kvm;
        union {
@@ -345,6 +375,7 @@ struct kvm {
        unsigned long rmap_overflow;
        struct list_head vm_list;
        struct file *filp;
+       struct kvm_io_bus mmio_bus;
 };
 
 struct kvm_stat {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4473174..c3c0059 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -294,6 +294,7 @@ static struct kvm *kvm_create_vm(void)
 
        spin_lock_init(&kvm->lock);
        INIT_LIST_HEAD(&kvm->active_mmu_pages);
+       kvm_io_bus_init(&kvm->mmio_bus);
        for (i = 0; i < KVM_MAX_VCPUS; ++i) {
                struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
@@ -1015,12 +1016,25 @@ static int emulator_write_std(unsigned long addr,
        return X86EMUL_UNHANDLEABLE;
 }
 
+static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, 
+                                               gpa_t addr)
+{
+       /*
+        * Note that its important to have this wrapper function because 
+        * in the very near future we will be checking for MMIOs against 
+        * the LAPIC as well as the general MMIO bus 
+        */
+       return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+}
+
 static int emulator_read_emulated(unsigned long addr,
                                  unsigned long *val,
                                  unsigned int bytes,
                                  struct x86_emulate_ctxt *ctxt)
 {
-       struct kvm_vcpu *vcpu = ctxt->vcpu;
+       struct kvm_vcpu      *vcpu = ctxt->vcpu;
+       struct kvm_io_device *mmio_dev;
+       gpa_t                 gpa;
 
        if (vcpu->mmio_read_completed) {
                memcpy(val, vcpu->mmio_data, bytes);
@@ -1029,18 +1043,26 @@ static int emulator_read_emulated(unsigned long addr,
        } else if (emulator_read_std(addr, val, bytes, ctxt)
                   == X86EMUL_CONTINUE)
                return X86EMUL_CONTINUE;
-       else {
-               gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
-               if (gpa == UNMAPPED_GVA)
-                       return X86EMUL_PROPAGATE_FAULT;
-               vcpu->mmio_needed = 1;
-               vcpu->mmio_phys_addr = gpa;
-               vcpu->mmio_size = bytes;
-               vcpu->mmio_is_write = 0;
+       gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+       if (gpa == UNMAPPED_GVA)
+               return X86EMUL_PROPAGATE_FAULT;
 
-               return X86EMUL_UNHANDLEABLE;
+       /*
+        * Is this MMIO handled locally? 
+        */
+       mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+       if (mmio_dev) {
+               *val = mmio_dev->read(mmio_dev, gpa, bytes);
+               return X86EMUL_CONTINUE;
        }
+
+       vcpu->mmio_needed = 1;
+       vcpu->mmio_phys_addr = gpa;
+       vcpu->mmio_size = bytes;
+       vcpu->mmio_is_write = 0;
+       
+       return X86EMUL_UNHANDLEABLE;
 }
 
 static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1068,8 +1090,9 @@ static int emulator_write_emulated(unsigned long addr,
                                   unsigned int bytes,
                                   struct x86_emulate_ctxt *ctxt)
 {
-       struct kvm_vcpu *vcpu = ctxt->vcpu;
-       gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+       struct kvm_vcpu      *vcpu = ctxt->vcpu;
+       struct kvm_io_device *mmio_dev;
+       gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
        if (gpa == UNMAPPED_GVA)
                return X86EMUL_PROPAGATE_FAULT;
@@ -1077,6 +1100,15 @@ static int emulator_write_emulated(unsigned long addr,
        if (emulator_write_phys(vcpu, gpa, val, bytes))
                return X86EMUL_CONTINUE;
 
+       /*
+        * Is this MMIO handled locally?
+        */
+       mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+       if (mmio_dev) {
+               mmio_dev->write(mmio_dev, gpa, bytes, val);
+               return X86EMUL_CONTINUE;
+       }
+
        vcpu->mmio_needed = 1;
        vcpu->mmio_phys_addr = gpa;
        vcpu->mmio_size = bytes;
@@ -2911,6 +2943,32 @@ static int kvm_cpu_hotplug(struct notifier_block 
*notifier, unsigned long val,
        return NOTIFY_OK;
 }
 
+void kvm_io_bus_init(struct kvm_io_bus *bus)
+{
+       memset(bus, 0, sizeof(*bus));
+}
+
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
+{
+       int i;
+
+       for (i = 0; i < bus->dev_count; i++) {
+               struct kvm_io_device *pos = bus->devs[i];
+
+               if (pos->in_range(pos, addr))
+                       return pos;
+       }
+
+       return NULL;
+}
+
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
+{
+       BUG_ON(bus->dev_count >= (NR_IOBUS_DEVS-1));
+
+       bus->devs[bus->dev_count++] = dev;
+}
+
 static struct notifier_block kvm_cpu_notifier = {
        .notifier_call = kvm_cpu_hotplug,
        .priority = 20, /* must be > scheduler priority */
KVM: Add irqdevice object

From:  <>

The current code is geared towards using a user-mode (A)PIC.  This patch adds
an "irqdevice" abstraction, and implements a "userint" model to handle the
duties of the original code.  Later, we can develop other irqdevice models 
to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate

Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]>
---

 drivers/kvm/Makefile    |    2 
 drivers/kvm/irqdevice.h |  170 ++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/kvm.h       |    9 +-
 drivers/kvm/kvm_main.c  |   57 ++++++++++---
 drivers/kvm/svm.c       |   33 ++++----
 drivers/kvm/userint.c   |  202 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/vmx.c       |   29 +++----
 7 files changed, 449 insertions(+), 53 deletions(-)

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..540afbc 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h
new file mode 100644
index 0000000..fe284bc
--- /dev/null
+++ b/drivers/kvm/irqdevice.h
@@ -0,0 +1,170 @@
+/*
+ * Defines an interface for an abstract interrupt controller.  The model 
+ * consists of a unit with an arbitrary number of input lines (IRQ0-N), an
+ * output line (INTR), and methods for completing an interrupt-acknowledge
+ * cycle (INTA).  A particular implementation of this model will define
+ * various policies, such as irq-to-vector translation, INTA/auto-EOI policy,
+ * etc.  
+ * 
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an 
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another 
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ *   Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __IRQDEVICE_H
+#define __IRQDEVICE_H
+
+#define KVM_IRQFLAGS_NMI  (1 << 0)
+#define KVM_IRQFLAGS_PEEK (1 << 1)
+
+struct kvm_irqdevice;
+
+struct kvm_irqsink {
+       void (*raise_intr)(struct kvm_irqsink *this, 
+                          struct kvm_irqdevice *dev);
+
+       void *private;
+};
+
+struct kvm_irqdevice {
+       int  (*pending)(struct kvm_irqdevice *this, int flags);
+       int  (*read_vector)(struct kvm_irqdevice *this, int flags); 
+       int  (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
+       int  (*summary)(struct kvm_irqdevice *this, void *data);
+       void (*destructor)(struct kvm_irqdevice *this);
+
+       void               *private;
+       struct kvm_irqsink  sink;
+};
+
+/**
+ * kvm_irqdevice_init - initialize the kvm_irqdevice for use
+ * @dev: The device
+ *
+ * Description: Initialize the kvm_irqdevice for use.  Should be called before 
+ *              calling any derived implementation init functions
+ * 
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev)
+{
+       memset(dev, 0, sizeof(*dev));
+}
+
+/**
+ * kvm_irqdevice_pending - efficiently determines if an interrupt is pending
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ *            [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ * 
+ * Description: Efficiently determines if an interrupt is pending on an 
+ *              irqdevice
+ *
+ * Returns: (int)
+ *    [0 = no iterrupts pending (per "flags" criteria)]
+ *    [1 = one or more interrupts are pending]
+ */
+static inline int kvm_irqdevice_pending(struct kvm_irqdevice *dev, int flags)
+{
+       return dev->pending(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_read_vector - read the highest priority vector from the device
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ *            [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ *            [+ KVM_IRQFLAGS_PEEK: Do not auto-acknowledge interrupt]
+ *
+ * Description: Read the highest priority pending vector from the device, 
+ *              potentially invoking auto-EOI depending on device policy
+ *
+ * Returns: (int)
+ *   [ -1 = no interrupts pending (per "flags" criteria)]
+ *   [>=0 = the highest pending vector]
+ */
+static inline int kvm_irqdevice_read_vector(struct kvm_irqdevice *dev, 
+                                           int flags)
+{
+       return dev->read_vector(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_set_pin - allows the caller to assert/deassert an IRQ
+ * @dev: The device
+ * @pin: The input pin to alter
+ * @level: The value to set (1 = assert, 0 = deassert)
+ *
+ * Description: Allows the caller to assert/deassert an IRQ input pin to the 
+ *              device according to device policy.
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin,
+                                 int level)
+{
+       return dev->set_pin(dev, pin, level);
+}
+
+/**
+ * kvm_irqdevice_summary - loads a summary bitmask
+ * @dev: The device
+ * @data: A pointer to a region capable of holding a 256 bit bitmap
+ *
+ * Description: Loads a summary bitmask of all pending vectors (0-255)
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_summary(struct kvm_irqdevice *dev, void *data)
+{
+       return dev->summary(dev, data);
+}
+
+/**
+ * kvm_irqdevice_register_sink - registers an kvm_irqsink object
+ * @dev: The device
+ * @sink: The sink to register.  Data will be copied so building object from 
+ *        transient storage is ok.
+ *
+ * Description: Registers an kvm_irqsink object as an INTR callback
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev, 
+                                              const struct kvm_irqsink *sink)
+{
+       dev->sink = *sink;
+}
+
+/*
+ * kvm_irqdevice_raise_intr - invokes a registered INTR callback
+ * @dev: The device
+ *
+ * Description: Invokes a registered INTR callback (if present).  This
+ *              function is meant to be used privately by a irqdevice 
+ *              implementation. 
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_raise_intr(struct kvm_irqdevice *dev)
+{
+       struct kvm_irqsink *sink = &dev->sink;
+       if (sink->raise_intr)
+               sink->raise_intr(sink, dev);
+}
+
+#endif /*  __IRQDEVICE_H */
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 181099f..58966d9 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 
 #include "vmx.h"
+#include "irqdevice.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -157,6 +158,8 @@ struct vmcs {
 
 struct kvm_vcpu;
 
+int kvm_userint_init(struct kvm_irqdevice *dev);
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -266,6 +269,8 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus 
*bus, gpa_t addr);
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus, 
                             struct kvm_io_device *dev);
 
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
 struct kvm_vcpu {
        struct kvm *kvm;
        union {
@@ -278,9 +283,7 @@ struct kvm_vcpu {
        u64 host_tsc;
        struct kvm_run *run;
        int interrupt_window_open;
-       unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
-       unsigned long irq_pending[NR_IRQ_WORDS];
+       struct kvm_irqdevice irq_dev;
        unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
        unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c3c0059..7e00412 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1989,8 +1989,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        sregs->efer = vcpu->shadow_efer;
        sregs->apic_base = vcpu->apic_base;
 
-       memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
-              sizeof sregs->interrupt_bitmap);
+       kvm_irqdevice_summary(&vcpu->irq_dev, &sregs->interrupt_bitmap);
 
        vcpu_put(vcpu);
 
@@ -2044,13 +2043,11 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu 
*vcpu,
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
 
-       memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
-              sizeof vcpu->irq_pending);
-       vcpu->irq_summary = 0;
-       for (i = 0; i < NR_IRQ_WORDS; ++i)
-               if (vcpu->irq_pending[i])
-                       __set_bit(i, &vcpu->irq_summary);
-
+       /* walk the interrupt-bitmap and inject an IRQ for each bit found */
+       for (i = 0; i < 256; ++i)
+               if (test_bit(i, &sregs->interrupt_bitmap[0]))
+                       kvm_irqdevice_set_pin(&vcpu->irq_dev, i, 1);
+ 
        set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
        set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -2210,14 +2207,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu 
*vcpu,
 {
        if (irq->irq < 0 || irq->irq >= 256)
                return -EINVAL;
-       vcpu_load(vcpu);
-
-       set_bit(irq->irq, vcpu->irq_pending);
-       set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
 
-       vcpu_put(vcpu);
-
-       return 0;
+       return kvm_irqdevice_set_pin(&vcpu->irq_dev, irq->irq, 1);
 }
 
 static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2319,6 +2310,36 @@ out1:
 }
 
 /*
+ * This function will be invoked whenever the vcpu->irq_dev raises its INTR 
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this, 
+                         struct kvm_irqdevice *dev)
+{
+       /*
+        * Our irq device is requesting to interrupt the vcpu.  If it is
+        * currently running, we should inject a host IPI to force a VMEXIT 
+        */
+       
+       /*
+        * FIXME: Implement this or the CPU wont notice the interrupt until
+        * the next natural VMEXIT.  Note that this is how the system
+        * has always worked, so nothing is broken here.  This is a future
+        * enhancement
+        */
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+       struct kvm_irqsink sink = {
+               .raise_intr = kvm_vcpu_intr,
+               .private    = vcpu
+       };
+       
+       kvm_irqdevice_register_sink(&vcpu->irq_dev, &sink);
+}
+
+/*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
@@ -2364,6 +2385,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int 
n)
        if (r < 0)
                goto out_free_vcpus;
 
+       kvm_irqdevice_init(&vcpu->irq_dev);
+       kvm_vcpu_irqsink_init(vcpu);
+       kvm_userint_init(&vcpu->irq_dev);
+
        kvm_arch_ops->vcpu_load(vcpu);
        r = kvm_mmu_setup(vcpu);
        if (r >= 0)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b7e1410..e59a548 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -108,20 +108,16 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
 
 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
 {
-       int word_index = __ffs(vcpu->irq_summary);
-       int bit_index = __ffs(vcpu->irq_pending[word_index]);
-       int irq = word_index * BITS_PER_LONG + bit_index;
-
-       clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-       if (!vcpu->irq_pending[word_index])
-               clear_bit(word_index, &vcpu->irq_summary);
-       return irq;
+       return kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
 }
 
 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
 {
-       set_bit(irq, vcpu->irq_pending);
-       set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+       /* FIXME: We probably want to reserve the "set_pin" verb for
+        * actual interrupt requests, not for putting back something
+        * previously pending.  Lets revisit this
+        */
+       kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1);
 }
 
 static inline void clgi(void)
@@ -1092,7 +1088,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
 {
        vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
        skip_emulated_instruction(vcpu);
-       if (vcpu->irq_summary)
+       if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
                return 1;
 
        kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1263,7 +1259,7 @@ static int interrupt_window_interception(struct kvm_vcpu 
*vcpu,
         * possible
         */
        if (kvm_run->request_interrupt_window &&
-           !vcpu->irq_summary) {
+           !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
                ++kvm_stat.irq_window_exits;
                kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
                return 0;
@@ -1399,7 +1395,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
                (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
                 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
 
-       if (vcpu->interrupt_window_open && vcpu->irq_summary)
+       if (vcpu->interrupt_window_open && 
+           kvm_irqdevice_pending(&vcpu->irq_dev, 0))
                /*
                 * If interrupts enabled, and not blocked by sti or mov ss. 
Good.
                 */
@@ -1409,7 +1406,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
         * Interrupts blocked.  Wait for unblock.
         */
        if (!vcpu->interrupt_window_open &&
-           (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+           (kvm_irqdevice_pending(&vcpu->irq_dev, 0) || 
+            kvm_run->request_interrupt_window)) {
                control->intercept |= 1ULL << INTERCEPT_VINTR;
        } else
                control->intercept &= ~(1ULL << INTERCEPT_VINTR);
@@ -1418,8 +1416,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
                              struct kvm_run *kvm_run)
 {
-       kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-                                                 vcpu->irq_summary == 0);
+       kvm_run->ready_for_interrupt_injection = 
+               (vcpu->interrupt_window_open && 
+                !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
        kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
        kvm_run->cr8 = vcpu->cr8;
        kvm_run->apic_base = vcpu->apic_base;
@@ -1434,7 +1433,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
                                          struct kvm_run *kvm_run)
 {
-       return (!vcpu->irq_summary &&
+       return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
                kvm_run->request_interrupt_window &&
                vcpu->interrupt_window_open &&
                (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c
new file mode 100644
index 0000000..8363060
--- /dev/null
+++ b/drivers/kvm/userint.c
@@ -0,0 +1,202 @@
+/*
+ * User Interrupts IRQ device 
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere 
+ * (typically in userspace/QEMU).  Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has 
+ * already occured.  Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (i.e. no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ *    upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped) 
+ *    because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code, 
+ *     Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ *   Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+
+/*
+ *----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses 
+ * a summary field to accelerate lookups.  Assumes external locking 
+ *---------------------------------------------------------------------
+ */
+
+struct bitarray {
+       unsigned long summary; /* 1 per word in pending */
+       unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+       return this->summary ? 1 : 0;   
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+       if (!this->summary)
+               return -1;
+       else {
+               int word_index = __fls(this->summary);
+               int bit_index  = __fls(this->pending[word_index]);
+               
+               return word_index * BITS_PER_LONG + bit_index;  
+       }
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+       __set_bit(nr, &this->pending);
+       __set_bit(nr / BITS_PER_LONG, &this->summary); 
+} 
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+       int word = nr / BITS_PER_LONG;
+
+       __clear_bit(nr, &this->pending);
+       if (!this->pending[word])
+               __clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+       return test_bit(nr, &this->pending);
+}
+
+/*
+ *----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------
+ */
+
+struct kvm_userint {
+       spinlock_t      lock;
+       struct bitarray irq_pending;
+       int             nmi_pending;
+};
+
+static int userint_pending(struct kvm_irqdevice *this, int flags)
+{
+       struct kvm_userint *s = (struct kvm_userint*)this->private;
+       int ret;
+
+       spin_lock_irq(&s->lock);
+
+       if (flags & KVM_IRQFLAGS_NMI)
+               ret = s->nmi_pending;
+       else
+               ret = bitarray_pending(&s->irq_pending);
+
+       spin_unlock_irq(&s->lock);
+
+       return ret;
+}
+
+static int userint_read_vector(struct kvm_irqdevice *this, int flags)
+{
+       struct kvm_userint *s = (struct kvm_userint*)this->private;
+       int          irq;
+
+       spin_lock_irq(&s->lock);
+
+       /*
+        * NMIs take priority, so if there is an NMI pending, or
+        * if we are filtering out NMIs, only consider them 
+        */
+       if (s->nmi_pending || (flags & KVM_IRQFLAGS_NMI))
+               irq = s->nmi_pending ? 2 : -1;
+       else
+               irq = bitarray_findhighest(&s->irq_pending);
+       
+       if ((irq > -1) && !(flags & KVM_IRQFLAGS_PEEK)) {
+               /*
+                * If the "peek" flag is not set, automatically clear the 
+                * interrupt as the EOI mechanism (if any) will take place 
+                * in userspace 
+                */
+               bitarray_clear(&s->irq_pending, irq);
+               if (irq == 2)
+                       s->nmi_pending = 0;
+       }
+
+       spin_unlock_irq(&s->lock);
+
+       return irq;
+}
+
+static int userint_set_pin(struct kvm_irqdevice* this, int irq, int level)
+{
+       struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+       if (!level)
+               return 0; /* We dont care about deasserts */
+
+       spin_lock_irq(&s->lock);
+
+       /*
+        * Update the local state 
+        */
+       bitarray_set(&s->irq_pending, irq);
+       if (irq == 2)
+               s->nmi_pending = 1;
+
+       spin_unlock_irq(&s->lock);
+
+       /*
+        * And then alert the higher layer software we have changes 
+        */
+       kvm_irqdevice_raise_intr(this);
+
+       return 0;
+}
+
+static int userint_summary(struct kvm_irqdevice* this, void *data)
+{      
+       struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+       spin_lock_irq(&s->lock);
+       memcpy(data, s->irq_pending.pending, sizeof s->irq_pending.pending);
+       spin_unlock_irq(&s->lock);
+
+       return 0;
+}
+
+static void userint_destructor(struct kvm_irqdevice *this)
+{
+       kfree(this->private);
+}
+
+int kvm_userint_init(struct kvm_irqdevice *dev)
+{
+       struct kvm_userint *s;
+
+       s = kzalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+           return -ENOMEM;
+
+       spin_lock_init(&s->lock);
+
+       dev->pending     = userint_pending;
+       dev->read_vector = userint_read_vector;
+       dev->set_pin     = userint_set_pin;
+       dev->summary     = userint_summary;
+       dev->destructor  = userint_destructor;
+
+       dev->private = s;
+
+       return 0;
+}
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 61a6116..a0fdf02 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int 
irq)
 
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
-       int word_index = __ffs(vcpu->irq_summary);
-       int bit_index = __ffs(vcpu->irq_pending[word_index]);
-       int irq = word_index * BITS_PER_LONG + bit_index;
-
-       clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-       if (!vcpu->irq_pending[word_index])
-               clear_bit(word_index, &vcpu->irq_summary);
+       int irq = kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
+       BUG_ON(irq < 0);
 
        if (vcpu->rmode.active) {
                inject_rmode_irq(vcpu, irq);
@@ -1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
                 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
 
        if (vcpu->interrupt_window_open &&
-           vcpu->irq_summary &&
+           kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
            !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
                /*
                 * If interrupts enabled, and not blocked by sti or mov ss. 
Good.
@@ -1255,7 +1250,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
        cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
        if (!vcpu->interrupt_window_open &&
-           (vcpu->irq_summary || kvm_run->request_interrupt_window))
+           (kvm_irqdevice_pending(&vcpu->irq_dev, 0) ||
+            kvm_run->request_interrupt_window))
                /*
                 * Interrupts blocked.  Wait for unblock.
                 */
@@ -1314,8 +1310,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 
        if (is_external_interrupt(vect_info)) {
                int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-               set_bit(irq, vcpu->irq_pending);
-               set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+               /* FIXME: Is this right? */
+               kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1); 
        }
 
        if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1619,8 +1615,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
        kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
        kvm_run->cr8 = vcpu->cr8;
        kvm_run->apic_base = vcpu->apic_base;
-       kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-                                                 vcpu->irq_summary == 0);
+       kvm_run->ready_for_interrupt_injection = 
+               (vcpu->interrupt_window_open && 
+                !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
 }
 
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1631,7 +1628,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
         * possible
         */
        if (kvm_run->request_interrupt_window &&
-           !vcpu->irq_summary) {
+           !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
                kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
                ++kvm_stat.irq_window_exits;
                return 0;
@@ -1642,7 +1639,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        skip_emulated_instruction(vcpu);
-       if (vcpu->irq_summary)
+       if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
                return 1;
 
        kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1713,7 +1710,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
                                          struct kvm_run *kvm_run)
 {
-       return (!vcpu->irq_summary &&
+       return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
                kvm_run->request_interrupt_window &&
                vcpu->interrupt_window_open &&
                (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
KVM: Preemptible VCPU

From:  <>

This adds support for interrupting an executing CPU

Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]>
---

 drivers/kvm/Makefile   |    2 -
 drivers/kvm/condvar.c  |  109 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/condvar.h  |   36 ++++++++++++++++
 drivers/kvm/kvm.h      |   12 +++++
 drivers/kvm/kvm_main.c |   47 ++++++++++++++++++---
 drivers/kvm/svm.c      |   35 +++++++++++++++
 drivers/kvm/vmx.c      |   35 +++++++++++++++
 7 files changed, 270 insertions(+), 6 deletions(-)

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 540afbc..b3bef0e 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o condvar.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/condvar.c b/drivers/kvm/condvar.c
new file mode 100644
index 0000000..87e464a
--- /dev/null
+++ b/drivers/kvm/condvar.c
@@ -0,0 +1,109 @@
+/*
+ * Condition Variable
+ *
+ * Copyright (C) 2007, Novell
+ *
+ * Authors:
+ *   Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "condvar.h"
+
+void condvar_init(struct condvar *cv)
+{
+       wait_queue_head_t __head = __WAIT_QUEUE_HEAD_INITIALIZER(cv->queue);
+
+       memset(cv, 0, sizeof(*cv));
+       cv->queue = __head;
+}
+EXPORT_SYMBOL_GPL(condvar_init);
+
+/*
+ * Assumes the lock is already held
+ */
+int condvar_wait(struct condvar *cv, void *l, long timeout)
+{
+       DEFINE_WAIT(__wait);    
+       int _ret = 0;
+       
+       BUG_ON(!cv->lock_ops);
+
+       /*
+        * first place ourselves on the waitqueue before releasing the lock 
+        */
+       prepare_to_wait(&cv->queue, &__wait, TASK_UNINTERRUPTIBLE);
+       
+       /*
+        * now actually release the lock to unblock any potential signalers 
+        */
+       cv->lock_ops->unlock(l);
+       
+       /*
+        * finally, reschedule until we are re-awoken 
+        */ 
+       if (timeout > -1)
+               schedule_timeout(timeout);
+       else
+               schedule();
+       finish_wait(&cv->queue, &__wait);
+       
+       /*
+        * if we get here, its because someone signaled us.  
+        * reaquire the lock 
+        */
+       cv->lock_ops->lock(l);
+       
+       return _ret;
+}
+EXPORT_SYMBOL_GPL(condvar_wait);
+
+/*
+ * Assumes the lock is already held
+ */
+int condvar_signal(struct condvar *cv)
+{
+       wake_up(&cv->queue);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(condvar_signal);
+
+/*
+ *------------------------------------------------------------------------
+ * spinlock_condvar
+ *
+ * spinlock_lock/unlock can sometimes be implemented as macros, so 
+ * assigning them as function pointers directly is probably not going to 
+ * work.  Therefore we need these lightweight wrappers
+ *------------------------------------------------------------------------
+ */
+
+static void spinlock_condvar_lock(void *l)
+{
+       spinlock_t *lock = (spinlock_t*)l;
+
+       spin_lock(lock);
+} 
+
+static void spinlock_condvar_unlock(void *l)
+{
+       spinlock_t *lock = (spinlock_t*)l;
+
+       spin_unlock(lock);
+} 
+
+static struct cv_lock_ops spinlock_ops = {
+    .lock   = spinlock_condvar_lock,
+    .unlock = spinlock_condvar_unlock
+};
+
+void spinlock_condvar_init(struct condvar *cv)
+{
+       condvar_init(cv);
+
+       cv->lock_ops = &spinlock_ops;
+}
+
diff --git a/drivers/kvm/condvar.h b/drivers/kvm/condvar.h
new file mode 100644
index 0000000..58ed523
--- /dev/null
+++ b/drivers/kvm/condvar.h
@@ -0,0 +1,36 @@
+/*
+ * Condition Variable
+ *
+ * Copyright (C) 2007, Novell
+ *
+ * Authors:
+ *   Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+struct cv_lock_ops {
+       void (*lock)(void *);
+       void (*unlock)(void *);
+};
+
+struct condvar {
+       wait_queue_head_t   queue;
+       struct cv_lock_ops *lock_ops;
+};
+
+void condvar_init(struct condvar *cv);
+int condvar_wait(struct condvar *cv, void *l, long timeout);
+int condvar_signal(struct condvar *cv);
+
+/*
+ *------------------------------------------------------------------------
+ * spinlock_condvar
+ *------------------------------------------------------------------------
+ */
+
+void spinlock_condvar_init(struct condvar *cv);
+
+
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 58966d9..703ffe0 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -14,6 +14,7 @@
 
 #include "vmx.h"
 #include "irqdevice.h"
+#include "condvar.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -271,6 +272,16 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 
 #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
 
+/*
+ * structure for maintaining info for interrupting an executing VCPU
+ */
+struct kvm_vcpu_irq {
+       spinlock_t          lock;
+       struct condvar      cv;
+       struct task_struct *task;
+       int                 pending;
+};
+
 struct kvm_vcpu {
        struct kvm *kvm;
        union {
@@ -284,6 +295,7 @@ struct kvm_vcpu {
        struct kvm_run *run;
        int interrupt_window_open;
        struct kvm_irqdevice irq_dev;
+       struct kvm_vcpu_irq irq;
        unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
        unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7e00412..ea3609e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -299,6 +299,11 @@ static struct kvm *kvm_create_vm(void)
                struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
                mutex_init(&vcpu->mutex);
+
+               memset(&vcpu->irq, 0, sizeof(vcpu->irq));
+               spin_lock_init(&vcpu->irq.lock);
+               spinlock_condvar_init(&vcpu->irq.cv);
+
                vcpu->cpu = -1;
                vcpu->kvm = kvm;
                vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -2320,13 +2325,45 @@ static void kvm_vcpu_intr(struct kvm_irqsink *this,
         * Our irq device is requesting to interrupt the vcpu.  If it is
         * currently running, we should inject a host IPI to force a VMEXIT 
         */
-       
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
+
        /*
-        * FIXME: Implement this or the CPU wont notice the interrupt until
-        * the next natural VMEXIT.  Note that this is how the system
-        * has always worked, so nothing is broken here.  This is a future
-        * enhancement
+        * HACK ALERT!
+        *
+        * We want to send a virtual interrupt signal to the task that owns
+        * the guest.  However, the signal will only force a VMEXIT (via
+        * a reschedule IPI) if the task is currently in GUEST mode.  There
+        * is a race condition between the time that we mark the vcpu as
+        * running and the time the system actually enter guest mode.  Since
+        * there doesnt appear to be any way to help with this situation from
+        * the VT hardware, we are forced to wait to make sure the guest 
+        * actually gets interrupted in a reasonable amount of time.  If it
+        * does not, we assume that the IPI failed because it was too early
+        * and must try again until it does.
+        *
+        * This condvar/spinlock/timeout/retry eliminate the race in a safe
+        * manner, at the expense of making the INTR delivery synchronous
         */
+       spin_lock(&vcpu->irq.lock);
+       
+       if (vcpu->irq.task) {
+               struct timespec tmo = {
+                       .tv_sec  = 0,
+                       .tv_nsec = 100000 /* 100us */
+               };
+
+               BUG_ON(vcpu->irq.task == current);
+                       
+               while (vcpu->irq.task) {
+                       send_sig(SIGSTOP, vcpu->irq.task, 0);
+                       condvar_wait(&vcpu->irq.cv, &vcpu->irq.lock,
+                                    timespec_to_jiffies(&tmo));
+               }
+               
+               vcpu->irq.pending = 1;
+       }
+       
+       spin_unlock(&vcpu->irq.lock);
 }
 
 static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index e59a548..6bc2fb1 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1463,9 +1463,25 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
        int r;
 
 again:
+       spin_lock(&vcpu->irq.lock);
+
+       /*
+        * Setting vcpu->task signals to outsiders that the VMCS is 
+        * effectively in GUEST mode, and therefore must be signalled
+        * to transition the task back to HOST mode if any new interrupts
+        * arrive.
+        */
+       vcpu->irq.task = current;
+
+       /*
+        * We also must inject interrupts (if any) while the irq_lock
+        * is held
+        */
        if (!vcpu->mmio_read_completed)
                do_interrupt_requests(vcpu, kvm_run);
 
+       spin_unlock(&vcpu->irq.lock);
+
        clgi();
 
        pre_svm_run(vcpu);
@@ -1617,6 +1633,25 @@ again:
        reload_tss(vcpu);
 
        /*
+        * Signal that we have transitioned back to host mode 
+        */
+       spin_lock(&vcpu->irq.lock);
+
+       vcpu->irq.task = NULL;
+       condvar_signal(&vcpu->irq.cv);
+
+       /*
+        * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP
+        * signal.  Counter it with a SIGCONT
+        */
+       if(vcpu->irq.pending) {
+           send_sig(SIGCONT, current, 0);
+           vcpu->irq.pending = 0;
+       }
+
+       spin_unlock(&vcpu->irq.lock);
+
+       /*
         * Profile KVM exit RIPs:
         */
        if (unlikely(prof_on == KVM_PROFILING))
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index a0fdf02..f7b716b 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1748,9 +1748,25 @@ again:
        vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
 #endif
 
+       spin_lock(&vcpu->irq.lock);
+
+       /*
+        * Setting vcpu->task signals to outsiders that the VMCS is 
+        * effectively in GUEST mode, and therefore must be signalled
+        * to transition the task back to HOST mode if any new interrupts
+        * arrive.
+        */
+       vcpu->irq.task = current;
+
+       /*
+        * We also must inject interrupts (if any) while the irq_lock
+        * is held
+        */
        if (!vcpu->mmio_read_completed)
                do_interrupt_requests(vcpu, kvm_run);
 
+       spin_unlock(&vcpu->irq.lock);
+
        if (vcpu->guest_debug.enabled)
                kvm_guest_debug_pre(vcpu);
 
@@ -1911,6 +1927,25 @@ again:
 
        asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 
+       /*
+        * Signal that we have transitioned back to host mode 
+        */
+       spin_lock(&vcpu->irq.lock);
+
+       vcpu->irq.task = NULL;
+       condvar_signal(&vcpu->irq.cv);
+
+       /*
+        * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP
+        * signal.  Counter it with a SIGCONT
+        */
+       if(vcpu->irq.pending) {
+           send_sig(SIGCONT, current, 0);
+           vcpu->irq.pending = 0;
+       }
+
+       spin_unlock(&vcpu->irq.lock);
+
        if (fail) {
                kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
                kvm_run->fail_entry.hardware_entry_failure_reason

Attachment: series
Description: Binary data

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
kvm-devel mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to