The current code is geared towards using a user-mode (A)PIC. This patch adds
an "irqdevice" abstraction, and implements a "userint" model to handle the
duties of the original code. Later, we can develop other irqdevice models
to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate
Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]>
---
drivers/kvm/Makefile | 2
drivers/kvm/irqdevice.h | 176 +++++++++++++++++++++++++++++++++++++
drivers/kvm/kvm.h | 94 +++++++++++++++++++-
drivers/kvm/kvm_main.c | 58 +++++++++---
drivers/kvm/svm.c | 162 ++++++++++++++++++++++++----------
drivers/kvm/userint.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/kvm/vmx.c | 161 +++++++++++++++++++++++++---------
7 files changed, 769 insertions(+), 107 deletions(-)
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..540afbc 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h
new file mode 100644
index 0000000..097d179
--- /dev/null
+++ b/drivers/kvm/irqdevice.h
@@ -0,0 +1,176 @@
+/*
+ * Defines an interface for an abstract interrupt controller. The model
+ * consists of a unit with an arbitrary number of input lines N (IRQ0-(N-1)),
+ * an arbitrary number of output lines (INTR) (LINT, EXTINT, NMI, etc), and
+ * methods for completing an interrupt-acknowledge cycle (INTA). A particular
+ * implementation of this model will define various policies, such as
+ * irq-to-vector translation, INTA/auto-EOI policy, etc.
+ *
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ * Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __IRQDEVICE_H
+#define __IRQDEVICE_H
+
+struct kvm_irqdevice;
+
+typedef enum {
+ kvm_irqpin_localint,
+ kvm_irqpin_extint,
+ kvm_irqpin_smi,
+ kvm_irqpin_nmi,
+ kvm_irqpin_invalid, /* must always be last */
+} kvm_irqpin_t;
+
+
+struct kvm_irqsink {
+ void (*set_intr)(struct kvm_irqsink *this,
+ struct kvm_irqdevice *dev,
+ kvm_irqpin_t pin);
+
+ void *private;
+};
+
+#define KVM_IRQACKDATA_VECTOR_VALID (1 << 0)
+#define KVM_IRQACKDATA_VECTOR_PENDING (1 << 1)
+
+#define KVM_IRQACK_FLAG_PEEK (1 << 0)
+
+struct kvm_irqack_data {
+ int flags;
+ int vector;
+};
+
+struct kvm_irqdevice {
+ int (*ack)(struct kvm_irqdevice *this, int flags,
+ struct kvm_irqack_data *data);
+ int (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
+ void (*destructor)(struct kvm_irqdevice *this);
+
+ void *private;
+ struct kvm_irqsink sink;
+};
+
+/**
+ * kvm_irqdevice_init - initialize the kvm_irqdevice for use
+ * @dev: The device
+ *
+ * Description: Initialize the kvm_irqdevice for use. Should be called before
+ * calling any derived implementation init functions
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev)
+{
+ memset(dev, 0, sizeof(*dev));
+}
+
+/**
+ * kvm_irqdevice_ack - read and ack the highest priority vector from the device
+ * @dev: The device
+ * @flags: Modifies default behavior
+ * [ KVM_IRQACK_FLAG_PEEK - Dont ack vector, just check status ]
+ * @data: A pointer to a kvm_irqack_data structure to hold the result
+ *
+ * Description: Read the highest priority pending vector from the device,
+ * potentially invoking auto-EOI depending on device policy
+ *
+ * Successful return indicates that the *data* structure is valid
+ *
+ * data.flags -
+ * [KVM_IRQACKDATA_VECTOR_VALID - data.vector is valid]
+ * [KVM_IRQACKDATA_VECTOR_PENDING - more vectors are pending]
+ *
+ * Returns: (int)
+ * [-1 = failure]
+ * [ 0 = success]
+ */
+static inline int kvm_irqdevice_ack(struct kvm_irqdevice *dev, int flags,
+ struct kvm_irqack_data *data)
+{
+ return dev->ack(dev, flags, data);
+}
+
+/**
+ * kvm_irqdevice_set_pin - allows the caller to assert/deassert an IRQ
+ * @dev: The device
+ * @pin: The input pin to alter
+ * @level: The value to set (1 = assert, 0 = deassert)
+ *
+ * Description: Allows the caller to assert/deassert an IRQ input pin to the
+ * device according to device policy.
+ *
+ * Returns: (int)
+ * [-1 = failure]
+ * [ 0 = success]
+ */
+static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin,
+ int level)
+{
+ return dev->set_pin(dev, pin, level);
+}
+
+/**
+ * kvm_irqdevice_register_sink - registers an kvm_irqsink object
+ * @dev: The device
+ * @sink: The sink to register. Data will be copied so building object from
+ * transient storage is ok.
+ *
+ * Description: Registers an kvm_irqsink object as an INTR callback
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev,
+ const struct kvm_irqsink *sink)
+{
+ dev->sink = *sink;
+}
+
+/**
+ * kvm_irqdevice_destructor - destroys an irqdevice
+ * @dev: The device
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_destructor(struct kvm_irqdevice *dev)
+{
+ dev->destructor(dev);
+}
+
+/**
+ * kvm_irqdevice_set_intr - invokes a registered INTR callback
+ * @dev: The device
+ * @pin: Identifies the pin to alter -
+ * [ KVM_IRQPIN_LOCALINT (default) - an vector is pending on this
+ * device]
+ * [ KVM_IRQPIN_EXTINT - a vector is pending on an external device]
+ * [ KVM_IRQPIN_SMI - system-management-interrupt pin]
+ * [ KVM_IRQPIN_NMI - non-maskable-interrupt pin
+ *
+ * Description: Invokes a registered INTR callback (if present). This
+ * function is meant to be used privately by a irqdevice
+ * implementation.
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_set_intr(struct kvm_irqdevice *dev,
+ kvm_irqpin_t pin)
+{
+ struct kvm_irqsink *sink = &dev->sink;
+ if (sink->set_intr)
+ sink->set_intr(sink, dev, pin);
+}
+
+#endif /* __IRQDEVICE_H */
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 3289801..1c8af97 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include "vmx.h"
+#include "irqdevice.h"
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -160,6 +161,11 @@ struct vmcs {
struct kvm_vcpu;
+int kvm_user_irqdev_init(struct kvm_irqdevice *dev);
+int kvm_user_irqdev_save(struct kvm_irqdevice *this, void *data);
+int kvm_user_irqdev_restore(struct kvm_irqdevice *this, void *data);
+int kvm_userint_init(struct kvm_vcpu *vcpu);
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -316,6 +322,18 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct
kvm_io_bus *bus, gpa_t addr);
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
+/*
+ * structure for maintaining info for interrupting an executing VCPU
+ */
+struct kvm_vcpu_irq {
+ spinlock_t lock;
+ struct kvm_irqdevice dev;
+ int pending;
+ int deferred;
+};
+
struct kvm_vcpu {
struct kvm *kvm;
union {
@@ -328,9 +346,7 @@ struct kvm_vcpu {
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
- unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
- unsigned long irq_pending[NR_IRQ_WORDS];
+ struct kvm_vcpu_irq irq;
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
@@ -408,6 +424,78 @@ struct kvm_vcpu {
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
};
+/*
+ * These two functions are helpers for determining if a standard interrupt
+ * is pending to replace the old "if (vcpu->irq_summary)" logic.
+ */
+
+/*
+ * Assumes lock already held
+ */
+static inline int __kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu)
+{
+ int pending = vcpu->irq.pending;
+
+ if (vcpu->irq.deferred != -1)
+ __set_bit(kvm_irqpin_localint, &pending);
+
+ return pending;
+}
+
+static inline int kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+ int flags;
+
+ spin_lock_irqsave(&vcpu->irq.lock, flags);
+ ret = __kvm_vcpu_irq_pending(vcpu);
+ spin_unlock_irqrestore(&vcpu->irq.lock, flags);
+
+ return ret;
+}
+
+/*
+ * Assumes lock already held
+ */
+static inline int kvm_vcpu_irq_pop(struct kvm_vcpu *vcpu,
+ struct kvm_irqack_data *data)
+{
+ int ret = 0;
+
+ if (vcpu->irq.deferred != -1) {
+ ret = kvm_irqdevice_ack(&vcpu->irq.dev, KVM_IRQACK_FLAG_PEEK,
+ data);
+ data->flags |= KVM_IRQACKDATA_VECTOR_VALID;
+ data->vector = vcpu->irq.deferred;
+ vcpu->irq.deferred = -1;
+ } else
+ ret = kvm_irqdevice_ack(&vcpu->irq.dev, 0, data);
+
+ /*
+ * If there are no more interrupts we must clear the status flag
+ */
+ if (!(data->flags & KVM_IRQACKDATA_VECTOR_PENDING))
+ __clear_bit(kvm_irqpin_localint, &vcpu->irq.pending);
+
+ return ret;
+}
+
+static inline void __kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq)
+{
+ BUG_ON(vcpu->irq.deferred != -1); /* We can only hold one deferred */
+
+ vcpu->irq.deferred = irq;
+}
+
+static inline void kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq)
+{
+ int flags;
+
+ spin_lock_irqsave(&vcpu->irq.lock, flags);
+ __kvm_vcpu_irq_push(vcpu, irq);
+ spin_unlock_irqrestore(&vcpu->irq.lock, flags);
+}
+
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 89b00e5..897b20f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -323,6 +323,11 @@ static struct kvm *kvm_create_vm(void)
struct kvm_vcpu *vcpu = &kvm->vcpus[i];
mutex_init(&vcpu->mutex);
+
+ memset(&vcpu->irq, 0, sizeof(vcpu->irq));
+ spin_lock_init(&vcpu->irq.lock);
+ vcpu->irq.deferred = -1;
+
vcpu->cpu = -1;
vcpu->kvm = kvm;
vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -390,6 +395,7 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
vcpu_load(vcpu);
kvm_mmu_destroy(vcpu);
vcpu_put(vcpu);
+ kvm_irqdevice_destructor(&vcpu->irq.dev);
kvm_arch_ops->vcpu_free(vcpu);
free_page((unsigned long)vcpu->run);
vcpu->run = NULL;
@@ -2009,8 +2015,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
sregs->efer = vcpu->shadow_efer;
sregs->apic_base = vcpu->apic_base;
- memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
- sizeof sregs->interrupt_bitmap);
+ kvm_user_irqdev_save(&vcpu->irq.dev, &sregs->interrupt_bitmap);
vcpu_put(vcpu);
@@ -2027,7 +2032,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
int mmu_reset_needed = 0;
- int i;
struct descriptor_table dt;
vcpu_load(vcpu);
@@ -2064,12 +2068,8 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu,
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
- memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
- sizeof vcpu->irq_pending);
- vcpu->irq_summary = 0;
- for (i = 0; i < NR_IRQ_WORDS; ++i)
- if (vcpu->irq_pending[i])
- __set_bit(i, &vcpu->irq_summary);
+ kvm_user_irqdev_restore(&vcpu->irq.dev,
+ &sregs->interrupt_bitmap[0]);
set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -2230,14 +2230,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu
*vcpu,
{
if (irq->irq < 0 || irq->irq >= 256)
return -EINVAL;
- vcpu_load(vcpu);
-
- set_bit(irq->irq, vcpu->irq_pending);
- set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
- vcpu_put(vcpu);
-
- return 0;
+ return kvm_irqdevice_set_pin(&vcpu->irq.dev, irq->irq, 1);
}
static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2339,6 +2333,32 @@ out1:
}
/*
+ * This function will be invoked whenever the vcpu->irq.dev raises its INTR
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this,
+ struct kvm_irqdevice *dev,
+ kvm_irqpin_t pin)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vcpu->irq.lock, flags);
+ __set_bit(pin, &vcpu->irq.pending);
+ spin_unlock_irqrestore(&vcpu->irq.lock, flags);
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_irqsink sink = {
+ .set_intr = kvm_vcpu_intr,
+ .private = vcpu
+ };
+
+ kvm_irqdevice_register_sink(&vcpu->irq.dev, &sink);
+}
+
+/*
* Creates some virtual cpus. Good luck creating more than one.
*/
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
@@ -2385,6 +2405,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int
n)
if (r < 0)
goto out_free_vcpus;
+ kvm_irqdevice_init(&vcpu->irq.dev);
+ kvm_vcpu_irqsink_init(vcpu);
+ r = kvm_userint_init(vcpu);
+ if (r < 0)
+ goto out_free_vcpus;
+
kvm_arch_ops->vcpu_load(vcpu);
r = kvm_mmu_setup(vcpu);
if (r >= 0)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b621403..a9d917a 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -106,24 +106,6 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
(cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2;
}
-static inline u8 pop_irq(struct kvm_vcpu *vcpu)
-{
- int word_index = __ffs(vcpu->irq_summary);
- int bit_index = __ffs(vcpu->irq_pending[word_index]);
- int irq = word_index * BITS_PER_LONG + bit_index;
-
- clear_bit(bit_index, &vcpu->irq_pending[word_index]);
- if (!vcpu->irq_pending[word_index])
- clear_bit(word_index, &vcpu->irq_summary);
- return irq;
-}
-
-static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
-{
- set_bit(irq, vcpu->irq_pending);
- set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
-}
-
static inline void clgi(void)
{
asm volatile (SVM_CLGI);
@@ -904,7 +886,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
int r;
if (is_external_interrupt(exit_int_info))
- push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
+ /*
+ * An exception was taken while we were trying to inject an
+ * IRQ. We must defer the injection of the vector until
+ * the next window.
+ */
+ kvm_vcpu_irq_push(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
spin_lock(&vcpu->kvm->lock);
@@ -1114,7 +1101,7 @@ static int halt_interception(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
skip_emulated_instruction(vcpu);
- if (vcpu->irq_summary)
+ if (kvm_vcpu_irq_pending(vcpu))
return 1;
kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1285,7 +1272,7 @@ static int interrupt_window_interception(struct kvm_vcpu
*vcpu,
* possible
*/
if (kvm_run->request_interrupt_window &&
- !vcpu->irq_summary) {
+ !kvm_vcpu_irq_pending(vcpu)) {
++vcpu->stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0;
@@ -1384,60 +1371,135 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
}
-static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
-{
- struct vmcb_control_area *control;
-
- control = &vcpu->svm->vmcb->control;
- control->int_vector = pop_irq(vcpu);
- control->int_ctl &= ~V_INTR_PRIO_MASK;
- control->int_ctl |= V_IRQ_MASK |
- ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
-}
-
static void kvm_reput_irq(struct kvm_vcpu *vcpu)
{
struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
if (control->int_ctl & V_IRQ_MASK) {
control->int_ctl &= ~V_IRQ_MASK;
- push_irq(vcpu, control->int_vector);
+ kvm_vcpu_irq_push(vcpu, control->int_vector);
}
vcpu->interrupt_window_open =
!(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
}
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static void do_intr_requests(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run,
+ kvm_irqpin_t pin)
{
struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+ int pending = 0;
vcpu->interrupt_window_open =
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
- if (vcpu->interrupt_window_open && vcpu->irq_summary)
+ if (vcpu->interrupt_window_open) {
/*
- * If interrupts enabled, and not blocked by sti or mov ss.
Good.
+ * If interrupts enabled, and not blocked by sti or mov ss.
+ * Good.
*/
- kvm_do_inject_irq(vcpu);
+ struct kvm_irqack_data ack;
+ int r = 0;
+
+ memset(&ack, 0, sizeof(ack));
+
+ switch (pin) {
+ case kvm_irqpin_localint:
+ r = kvm_vcpu_irq_pop(vcpu, &ack);
+ break;
+ case kvm_irqpin_extint:
+ printk(KERN_WARNING "KVM: external-interrupts not " \
+ "handled yet\n");
+ __clear_bit(pin, &vcpu->irq.pending);
+ break;
+ case kvm_irqpin_nmi:
+ /*
+ * FIXME: Someday we will handle this using the
+ * specific SVN NMI features. For now, just inject
+ * the NMI as a standard interrupt on vector 2
+ */
+ ack.flags |= KVM_IRQACKDATA_VECTOR_VALID;
+ ack.vector = 2;
+ __clear_bit(pin, &vcpu->irq.pending);
+ break;
+ default:
+ panic("KVM: unknown interrupt pin raised: %d\n", pin);
+ break;
+ }
+
+ BUG_ON(r < 0);
+
+ if (ack.flags & KVM_IRQACKDATA_VECTOR_VALID) {
+ control = &vcpu->svm->vmcb->control;
+ control->int_vector = ack.vector;
+ control->int_ctl &= ~V_INTR_PRIO_MASK;
+ control->int_ctl |= V_IRQ_MASK |
+ ((/*control->int_vector >> 4*/ 0xf) <<
+ V_INTR_PRIO_SHIFT);
+ }
+ }
/*
- * Interrupts blocked. Wait for unblock.
+ * Re-read the pending interrupt state. If anything is still
+ * pending we need to cause an exit on the next window
*/
- if (!vcpu->interrupt_window_open &&
- (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+ pending = __kvm_vcpu_irq_pending(vcpu);
+
+ if (test_bit(pin, &pending) || kvm_run->request_interrupt_window)
+ /*
+ * Trigger a VMEXIT on the next IRQ window
+ */
control->intercept |= 1ULL << INTERCEPT_VINTR;
- } else
- control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
+static void clear_pending_controls(struct kvm_vcpu *vcpu)
+{
+ struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+
+ control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ int pending = __kvm_vcpu_irq_pending(vcpu);
+
+ clear_pending_controls(vcpu);
+
+ while (pending) {
+ kvm_irqpin_t pin = __fls(pending);
+
+ switch (pin) {
+ case kvm_irqpin_localint:
+ case kvm_irqpin_extint:
+ case kvm_irqpin_nmi:
+ do_intr_requests(vcpu, kvm_run, pin);
+ break;
+ case kvm_irqpin_smi:
+ /* ignored (for now) */
+ printk(KERN_WARNING "KVM: dropping unhandled SMI\n");
+ __clear_bit(pin, &vcpu->irq.pending);
+ break;
+ case kvm_irqpin_invalid:
+ /* drop */
+ break;
+ default:
+ panic("KVM: unknown interrupt pin raised: %d\n", pin);
+ break;
+ }
+
+ __clear_bit(pin, &pending);
+ }
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
- vcpu->irq_summary == 0);
+ kvm_run->ready_for_interrupt_injection =
+ (vcpu->interrupt_window_open &&
+ !kvm_vcpu_irq_pending(vcpu));
kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = vcpu->cr8;
kvm_run->apic_base = vcpu->apic_base;
@@ -1452,7 +1514,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- return (!vcpu->irq_summary &&
+ return (!kvm_vcpu_irq_pending(vcpu) &&
kvm_run->request_interrupt_window &&
vcpu->interrupt_window_open &&
(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
@@ -1482,9 +1544,17 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
int r;
again:
+ spin_lock(&vcpu->irq.lock);
+
+ /*
+ * We must inject interrupts (if any) while the irq_lock
+ * is held
+ */
if (!vcpu->mmio_read_completed)
do_interrupt_requests(vcpu, kvm_run);
+ spin_unlock(&vcpu->irq.lock);
+
clgi();
pre_svm_run(vcpu);
diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c
new file mode 100644
index 0000000..a60707d
--- /dev/null
+++ b/drivers/kvm/userint.c
@@ -0,0 +1,223 @@
+/*
+ * User Interrupts IRQ device
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere
+ * (typically in userspace/QEMU). Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has
+ * already occured. Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (i.e. no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ * upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped)
+ * because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code,
+ * Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ * Gregory Haskins <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+
+/*
+ *----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses
+ * a summary field to accelerate lookups. Assumes external locking
+ *---------------------------------------------------------------------
+ */
+
+struct bitarray {
+ unsigned long summary; /* 1 per word in pending */
+ unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+ return this->summary ? 1 : 0;
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+ if (!this->summary)
+ return -1;
+ else {
+ int word_index = __fls(this->summary);
+ int bit_index = __fls(this->pending[word_index]);
+
+ return word_index * BITS_PER_LONG + bit_index;
+ }
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+ __set_bit(nr, &this->pending);
+ __set_bit(nr / BITS_PER_LONG, &this->summary);
+}
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+ int word = nr / BITS_PER_LONG;
+
+ __clear_bit(nr, &this->pending);
+ if (!this->pending[word])
+ __clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+ return test_bit(nr, &this->pending);
+}
+
+static inline int bitarray_test_and_set(struct bitarray *this, int nr, int val)
+{
+ if (bitarray_test(this, nr) != val) {
+ if (val)
+ bitarray_set(this, nr);
+ else
+ bitarray_clear(this, nr);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------
+ */
+
+struct kvm_user_irqdev {
+ spinlock_t lock;
+ atomic_t ref_count;
+ struct bitarray pending;
+};
+
+static int user_irqdev_ack(struct kvm_irqdevice *this, int flags,
+ struct kvm_irqack_data *data)
+{
+ struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+
+ spin_lock(&s->lock);
+
+ if (!(flags & KVM_IRQACK_FLAG_PEEK)) {
+ int irq = bitarray_findhighest(&s->pending);
+
+ if (irq > -1) {
+ /*
+ * Automatically clear the interrupt as the EOI
+ * mechanism (if any) will take place in userspace
+ */
+ bitarray_clear(&s->pending, irq);
+
+ data->flags |= KVM_IRQACKDATA_VECTOR_VALID;
+ }
+
+ data->vector = irq;
+ }
+
+ if (bitarray_pending(&s->pending))
+ data->flags |= KVM_IRQACKDATA_VECTOR_PENDING;
+
+ spin_unlock(&s->lock);
+
+ return 0;
+}
+
+static int user_irqdev_set_pin(struct kvm_irqdevice *this, int irq, int level)
+{
+ struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+ int forward = 0;
+
+ spin_lock(&s->lock);
+ forward = bitarray_test_and_set(&s->pending, irq, level);
+ spin_unlock(&s->lock);
+
+ /*
+ * alert the higher layer software we have changes
+ */
+ if (forward)
+ kvm_irqdevice_set_intr(this, kvm_irqpin_localint);
+
+ return 0;
+}
+
+static void user_irqdev_destructor(struct kvm_irqdevice *this)
+{
+ struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+
+ if (atomic_dec_and_test(&s->ref_count))
+ kfree(s);
+}
+
+int kvm_user_irqdev_init(struct kvm_irqdevice *irqdev)
+{
+ struct kvm_user_irqdev *s;
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ spin_lock_init(&s->lock);
+
+ irqdev->ack = user_irqdev_ack;
+ irqdev->set_pin = user_irqdev_set_pin;
+ irqdev->destructor = user_irqdev_destructor;
+
+ irqdev->private = s;
+ atomic_inc(&s->ref_count);
+
+ return 0;
+}
+
+int kvm_user_irqdev_save(struct kvm_irqdevice *this, void *data)
+{
+ struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+
+ spin_lock(&s->lock);
+ memcpy(data, s->pending.pending, sizeof s->pending.pending);
+ spin_unlock(&s->lock);
+
+ return 0;
+}
+
+int kvm_user_irqdev_restore(struct kvm_irqdevice *this, void *data)
+{
+ struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private;
+ int i;
+ int forward = 0;
+
+ spin_lock(&s->lock);
+
+ /*
+ * walk the interrupt-bitmap and inject an IRQ for each bit found
+ */
+ for (i = 0; i < 256; ++i) {
+ int val = test_bit(i, data);
+ forward |= bitarray_test_and_set(&s->pending, i, val);
+ }
+
+ spin_unlock(&s->lock);
+
+ /*
+ * alert the higher layer software we have changes
+ */
+ if (forward)
+ kvm_irqdevice_set_intr(this, kvm_irqpin_localint);
+
+ return 0;
+}
+
+int kvm_userint_init(struct kvm_vcpu *vcpu)
+{
+ return kvm_user_irqdev_init(&vcpu->irq.dev);
+}
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 3411813..f0e5826 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1454,52 +1454,118 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu,
int irq)
vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
}
-static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
+static void do_intr_requests(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run,
+ kvm_irqpin_t pin)
{
- int word_index = __ffs(vcpu->irq_summary);
- int bit_index = __ffs(vcpu->irq_pending[word_index]);
- int irq = word_index * BITS_PER_LONG + bit_index;
-
- clear_bit(bit_index, &vcpu->irq_pending[word_index]);
- if (!vcpu->irq_pending[word_index])
- clear_bit(word_index, &vcpu->irq_summary);
-
- if (vcpu->rmode.active) {
- inject_rmode_irq(vcpu, irq);
- return;
- }
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
-}
-
-
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
-{
- u32 cpu_based_vm_exec_control;
+ int pending = 0;
vcpu->interrupt_window_open =
((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
if (vcpu->interrupt_window_open &&
- vcpu->irq_summary &&
- !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
+ !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) {
/*
- * If interrupts enabled, and not blocked by sti or mov ss.
Good.
+ * If interrupts enabled, and not blocked by sti or mov ss.
+ * Good.
*/
- kvm_do_inject_irq(vcpu);
+ struct kvm_irqack_data ack;
+ int r = 0;
+
+ memset(&ack, 0, sizeof(ack));
+
+ switch (pin) {
+ case kvm_irqpin_localint:
+ r = kvm_vcpu_irq_pop(vcpu, &ack);
+ break;
+ case kvm_irqpin_extint:
+ printk(KERN_WARNING "KVM: external-interrupts not " \
+ "handled yet\n");
+ __clear_bit(pin, &vcpu->irq.pending);
+ break;
+ case kvm_irqpin_nmi:
+ /*
+ * FIXME: Someday we will handle this using the
+ * specific VMX NMI features. For now, just inject
+ * the NMI as a standard interrupt on vector 2
+ */
+ ack.flags |= KVM_IRQACKDATA_VECTOR_VALID;
+ ack.vector = 2;
+ __clear_bit(pin, &vcpu->irq.pending);
+ break;
+ default:
+ panic("KVM: unknown interrupt pin raised: %d\n", pin);
+ break;
+ }
+
+ BUG_ON(r < 0);
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- if (!vcpu->interrupt_window_open &&
- (vcpu->irq_summary || kvm_run->request_interrupt_window))
+ if (ack.flags & KVM_IRQACKDATA_VECTOR_VALID) {
+ if (vcpu->rmode.active)
+ inject_rmode_irq(vcpu, ack.vector);
+ else
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ ack.vector |
+ INTR_TYPE_EXT_INTR |
+ INTR_INFO_VALID_MASK);
+ }
+ }
+
+ /*
+ * Re-read the pending interrupt state. If anything is still
+ * pending we need to cause an exit on the next window
+ */
+ pending = __kvm_vcpu_irq_pending(vcpu);
+
+ if (test_bit(pin, &pending) || kvm_run->request_interrupt_window) {
/*
- * Interrupts blocked. Wait for unblock.
+ * Trigger a VMEXIT on the next IRQ window
*/
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
- else
- cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ u32 cbvec = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cbvec |= CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cbvec);
+ }
+}
+
+static void clear_pending_controls(struct kvm_vcpu *vcpu)
+{
+ u32 cbvec = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cbvec &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cbvec);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ int pending = __kvm_vcpu_irq_pending(vcpu);
+
+ clear_pending_controls(vcpu);
+
+ while (pending) {
+ kvm_irqpin_t pin = __fls(pending);
+
+ switch (pin) {
+ case kvm_irqpin_localint:
+ case kvm_irqpin_extint:
+ case kvm_irqpin_nmi:
+ do_intr_requests(vcpu, kvm_run, pin);
+ break;
+ case kvm_irqpin_smi:
+ /* ignored (for now) */
+ printk(KERN_WARNING "KVM: dropping unhandled SMI\n");
+ __clear_bit(pin, &vcpu->irq.pending);
+ break;
+ case kvm_irqpin_invalid:
+ /* drop */
+ break;
+ default:
+ panic("KVM: unknown interrupt pin raised: %d\n", pin);
+ break;
+ }
+
+ __clear_bit(pin, &pending);
+ }
}
static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
@@ -1554,9 +1620,13 @@ static int handle_exception(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
}
if (is_external_interrupt(vect_info)) {
+ /*
+ * An exception was taken while we were trying to inject an
+ * IRQ. We must defer the injection of the vector until
+ * the next window.
+ */
int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
- set_bit(irq, vcpu->irq_pending);
- set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+ kvm_vcpu_irq_push(vcpu, irq);
}
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1872,8 +1942,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = vcpu->cr8;
kvm_run->apic_base = vcpu->apic_base;
- kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
- vcpu->irq_summary == 0);
+ kvm_run->ready_for_interrupt_injection =
+ (vcpu->interrupt_window_open &&
+ !kvm_vcpu_irq_pending(vcpu));
}
static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1884,7 +1955,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
* possible
*/
if (kvm_run->request_interrupt_window &&
- !vcpu->irq_summary) {
+ !kvm_vcpu_irq_pending(vcpu)) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.irq_window_exits;
return 0;
@@ -1895,7 +1966,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
skip_emulated_instruction(vcpu);
- if (vcpu->irq_summary)
+ if (kvm_vcpu_irq_pending(vcpu))
return 1;
kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1965,7 +2036,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run,
struct kvm_vcpu *vcpu)
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- return (!vcpu->irq_summary &&
+ return (!kvm_vcpu_irq_pending(vcpu) &&
kvm_run->request_interrupt_window &&
vcpu->interrupt_window_open &&
(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
@@ -1981,9 +2052,17 @@ preempted:
kvm_guest_debug_pre(vcpu);
again:
+ spin_lock(&vcpu->irq.lock);
+
+ /*
+ * We must inject interrupts (if any) while the irq.lock
+ * is held
+ */
if (!vcpu->mmio_read_completed)
do_interrupt_requests(vcpu, kvm_run);
+ spin_unlock(&vcpu->irq.lock);
+
vmx_save_host_state(vcpu);
kvm_load_guest_fpu(vcpu);
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel