The current code is geared towards using a user-mode (A)PIC. This patch adds an "irqdevice" abstraction, and implements a "userint" model to handle the duties of the original code. Later, we can develop other irqdevice models to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate
Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]> --- drivers/kvm/Makefile | 2 drivers/kvm/irqdevice.h | 176 +++++++++++++++++++++++++++++++++++++ drivers/kvm/kvm.h | 94 +++++++++++++++++++- drivers/kvm/kvm_main.c | 58 +++++++++--- drivers/kvm/svm.c | 162 ++++++++++++++++++++++++---------- drivers/kvm/userint.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/vmx.c | 161 +++++++++++++++++++++++++--------- 7 files changed, 769 insertions(+), 107 deletions(-) diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile index c0a789f..540afbc 100644 --- a/drivers/kvm/Makefile +++ b/drivers/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -kvm-objs := kvm_main.o mmu.o x86_emulate.o +kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h new file mode 100644 index 0000000..097d179 --- /dev/null +++ b/drivers/kvm/irqdevice.h @@ -0,0 +1,176 @@ +/* + * Defines an interface for an abstract interrupt controller. The model + * consists of a unit with an arbitrary number of input lines N (IRQ0-(N-1)), + * an arbitrary number of output lines (INTR) (LINT, EXTINT, NMI, etc), and + * methods for completing an interrupt-acknowledge cycle (INTA). A particular + * implementation of this model will define various policies, such as + * irq-to-vector translation, INTA/auto-EOI policy, etc. + * + * In addition, the INTR callback mechanism allows the unit to be "wired" to + * an interruptible source in a very flexible manner. For instance, an + * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another + * interrupt controller (ala cascaded i8259s) + * + * Copyright (C) 2007 Novell + * + * Authors: + * Gregory Haskins <[EMAIL PROTECTED]> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef __IRQDEVICE_H +#define __IRQDEVICE_H + +struct kvm_irqdevice; + +typedef enum { + kvm_irqpin_localint, + kvm_irqpin_extint, + kvm_irqpin_smi, + kvm_irqpin_nmi, + kvm_irqpin_invalid, /* must always be last */ +} kvm_irqpin_t; + + +struct kvm_irqsink { + void (*set_intr)(struct kvm_irqsink *this, + struct kvm_irqdevice *dev, + kvm_irqpin_t pin); + + void *private; +}; + +#define KVM_IRQACKDATA_VECTOR_VALID (1 << 0) +#define KVM_IRQACKDATA_VECTOR_PENDING (1 << 1) + +#define KVM_IRQACK_FLAG_PEEK (1 << 0) + +struct kvm_irqack_data { + int flags; + int vector; +}; + +struct kvm_irqdevice { + int (*ack)(struct kvm_irqdevice *this, int flags, + struct kvm_irqack_data *data); + int (*set_pin)(struct kvm_irqdevice *this, int pin, int level); + void (*destructor)(struct kvm_irqdevice *this); + + void *private; + struct kvm_irqsink sink; +}; + +/** + * kvm_irqdevice_init - initialize the kvm_irqdevice for use + * @dev: The device + * + * Description: Initialize the kvm_irqdevice for use. Should be called before + * calling any derived implementation init functions + * + * Returns: (void) + */ +static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev) +{ + memset(dev, 0, sizeof(*dev)); +} + +/** + * kvm_irqdevice_ack - read and ack the highest priority vector from the device + * @dev: The device + * @flags: Modifies default behavior + * [ KVM_IRQACK_FLAG_PEEK - Dont ack vector, just check status ] + * @data: A pointer to a kvm_irqack_data structure to hold the result + * + * Description: Read the highest priority pending vector from the device, + * potentially invoking auto-EOI depending on device policy + * + * Successful return indicates that the *data* structure is valid + * + * data.flags - + * [KVM_IRQACKDATA_VECTOR_VALID - data.vector is valid] + * [KVM_IRQACKDATA_VECTOR_PENDING - more vectors are pending] + * + * Returns: (int) + * [-1 = failure] + * [ 0 = success] + */ +static inline int kvm_irqdevice_ack(struct kvm_irqdevice *dev, int flags, + struct kvm_irqack_data *data) +{ + return dev->ack(dev, flags, data); +} + +/** + * kvm_irqdevice_set_pin - allows the caller to assert/deassert an IRQ + * @dev: The device + * @pin: The input pin to alter + * @level: The value to set (1 = assert, 0 = deassert) + * + * Description: Allows the caller to assert/deassert an IRQ input pin to the + * device according to device policy. + * + * Returns: (int) + * [-1 = failure] + * [ 0 = success] + */ +static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin, + int level) +{ + return dev->set_pin(dev, pin, level); +} + +/** + * kvm_irqdevice_register_sink - registers an kvm_irqsink object + * @dev: The device + * @sink: The sink to register. Data will be copied so building object from + * transient storage is ok. + * + * Description: Registers an kvm_irqsink object as an INTR callback + * + * Returns: (void) + */ +static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev, + const struct kvm_irqsink *sink) +{ + dev->sink = *sink; +} + +/** + * kvm_irqdevice_destructor - destroys an irqdevice + * @dev: The device + * + * Returns: (void) + */ +static inline void kvm_irqdevice_destructor(struct kvm_irqdevice *dev) +{ + dev->destructor(dev); +} + +/** + * kvm_irqdevice_set_intr - invokes a registered INTR callback + * @dev: The device + * @pin: Identifies the pin to alter - + * [ KVM_IRQPIN_LOCALINT (default) - an vector is pending on this + * device] + * [ KVM_IRQPIN_EXTINT - a vector is pending on an external device] + * [ KVM_IRQPIN_SMI - system-management-interrupt pin] + * [ KVM_IRQPIN_NMI - non-maskable-interrupt pin + * + * Description: Invokes a registered INTR callback (if present). This + * function is meant to be used privately by a irqdevice + * implementation. + * + * Returns: (void) + */ +static inline void kvm_irqdevice_set_intr(struct kvm_irqdevice *dev, + kvm_irqpin_t pin) +{ + struct kvm_irqsink *sink = &dev->sink; + if (sink->set_intr) + sink->set_intr(sink, dev, pin); +} + +#endif /* __IRQDEVICE_H */ diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 3289801..1c8af97 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -13,6 +13,7 @@ #include <linux/mm.h> #include "vmx.h" +#include "irqdevice.h" #include <linux/kvm.h> #include <linux/kvm_para.h> @@ -160,6 +161,11 @@ struct vmcs { struct kvm_vcpu; +int kvm_user_irqdev_init(struct kvm_irqdevice *dev); +int kvm_user_irqdev_save(struct kvm_irqdevice *this, void *data); +int kvm_user_irqdev_restore(struct kvm_irqdevice *this, void *data); +int kvm_userint_init(struct kvm_vcpu *vcpu); + /* * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level * 32-bit). The kvm_mmu structure abstracts the details of the current mmu @@ -316,6 +322,18 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr); void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev); +#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) + +/* + * structure for maintaining info for interrupting an executing VCPU + */ +struct kvm_vcpu_irq { + spinlock_t lock; + struct kvm_irqdevice dev; + int pending; + int deferred; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -328,9 +346,7 @@ struct kvm_vcpu { u64 host_tsc; struct kvm_run *run; int interrupt_window_open; - unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ -#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) - unsigned long irq_pending[NR_IRQ_WORDS]; + struct kvm_vcpu_irq irq; unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ unsigned long rip; /* needs vcpu_load_rsp_rip() */ @@ -408,6 +424,78 @@ struct kvm_vcpu { struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; }; +/* + * These two functions are helpers for determining if a standard interrupt + * is pending to replace the old "if (vcpu->irq_summary)" logic. + */ + +/* + * Assumes lock already held + */ +static inline int __kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu) +{ + int pending = vcpu->irq.pending; + + if (vcpu->irq.deferred != -1) + __set_bit(kvm_irqpin_localint, &pending); + + return pending; +} + +static inline int kvm_vcpu_irq_pending(struct kvm_vcpu *vcpu) +{ + int ret = 0; + int flags; + + spin_lock_irqsave(&vcpu->irq.lock, flags); + ret = __kvm_vcpu_irq_pending(vcpu); + spin_unlock_irqrestore(&vcpu->irq.lock, flags); + + return ret; +} + +/* + * Assumes lock already held + */ +static inline int kvm_vcpu_irq_pop(struct kvm_vcpu *vcpu, + struct kvm_irqack_data *data) +{ + int ret = 0; + + if (vcpu->irq.deferred != -1) { + ret = kvm_irqdevice_ack(&vcpu->irq.dev, KVM_IRQACK_FLAG_PEEK, + data); + data->flags |= KVM_IRQACKDATA_VECTOR_VALID; + data->vector = vcpu->irq.deferred; + vcpu->irq.deferred = -1; + } else + ret = kvm_irqdevice_ack(&vcpu->irq.dev, 0, data); + + /* + * If there are no more interrupts we must clear the status flag + */ + if (!(data->flags & KVM_IRQACKDATA_VECTOR_PENDING)) + __clear_bit(kvm_irqpin_localint, &vcpu->irq.pending); + + return ret; +} + +static inline void __kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq) +{ + BUG_ON(vcpu->irq.deferred != -1); /* We can only hold one deferred */ + + vcpu->irq.deferred = irq; +} + +static inline void kvm_vcpu_irq_push(struct kvm_vcpu *vcpu, int irq) +{ + int flags; + + spin_lock_irqsave(&vcpu->irq.lock, flags); + __kvm_vcpu_irq_push(vcpu, irq); + spin_unlock_irqrestore(&vcpu->irq.lock, flags); +} + struct kvm_mem_alias { gfn_t base_gfn; unsigned long npages; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 89b00e5..897b20f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -323,6 +323,11 @@ static struct kvm *kvm_create_vm(void) struct kvm_vcpu *vcpu = &kvm->vcpus[i]; mutex_init(&vcpu->mutex); + + memset(&vcpu->irq, 0, sizeof(vcpu->irq)); + spin_lock_init(&vcpu->irq.lock); + vcpu->irq.deferred = -1; + vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; @@ -390,6 +395,7 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) vcpu_load(vcpu); kvm_mmu_destroy(vcpu); vcpu_put(vcpu); + kvm_irqdevice_destructor(&vcpu->irq.dev); kvm_arch_ops->vcpu_free(vcpu); free_page((unsigned long)vcpu->run); vcpu->run = NULL; @@ -2009,8 +2015,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->efer = vcpu->shadow_efer; sregs->apic_base = vcpu->apic_base; - memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, - sizeof sregs->interrupt_bitmap); + kvm_user_irqdev_save(&vcpu->irq.dev, &sregs->interrupt_bitmap); vcpu_put(vcpu); @@ -2027,7 +2032,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { int mmu_reset_needed = 0; - int i; struct descriptor_table dt; vcpu_load(vcpu); @@ -2064,12 +2068,8 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, - sizeof vcpu->irq_pending); - vcpu->irq_summary = 0; - for (i = 0; i < NR_IRQ_WORDS; ++i) - if (vcpu->irq_pending[i]) - __set_bit(i, &vcpu->irq_summary); + kvm_user_irqdev_restore(&vcpu->irq.dev, + &sregs->interrupt_bitmap[0]); set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); @@ -2230,14 +2230,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, { if (irq->irq < 0 || irq->irq >= 256) return -EINVAL; - vcpu_load(vcpu); - - set_bit(irq->irq, vcpu->irq_pending); - set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary); - vcpu_put(vcpu); - - return 0; + return kvm_irqdevice_set_pin(&vcpu->irq.dev, irq->irq, 1); } static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, @@ -2339,6 +2333,32 @@ out1: } /* + * This function will be invoked whenever the vcpu->irq.dev raises its INTR + * line + */ +static void kvm_vcpu_intr(struct kvm_irqsink *this, + struct kvm_irqdevice *dev, + kvm_irqpin_t pin) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private; + unsigned long flags; + + spin_lock_irqsave(&vcpu->irq.lock, flags); + __set_bit(pin, &vcpu->irq.pending); + spin_unlock_irqrestore(&vcpu->irq.lock, flags); +} + +static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu) +{ + struct kvm_irqsink sink = { + .set_intr = kvm_vcpu_intr, + .private = vcpu + }; + + kvm_irqdevice_register_sink(&vcpu->irq.dev, &sink); +} + +/* * Creates some virtual cpus. Good luck creating more than one. */ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) @@ -2385,6 +2405,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) if (r < 0) goto out_free_vcpus; + kvm_irqdevice_init(&vcpu->irq.dev); + kvm_vcpu_irqsink_init(vcpu); + r = kvm_userint_init(vcpu); + if (r < 0) + goto out_free_vcpus; + kvm_arch_ops->vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); if (r >= 0) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index b621403..a9d917a 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -106,24 +106,6 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu) (cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2; } -static inline u8 pop_irq(struct kvm_vcpu *vcpu) -{ - int word_index = __ffs(vcpu->irq_summary); - int bit_index = __ffs(vcpu->irq_pending[word_index]); - int irq = word_index * BITS_PER_LONG + bit_index; - - clear_bit(bit_index, &vcpu->irq_pending[word_index]); - if (!vcpu->irq_pending[word_index]) - clear_bit(word_index, &vcpu->irq_summary); - return irq; -} - -static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) -{ - set_bit(irq, vcpu->irq_pending); - set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); -} - static inline void clgi(void) { asm volatile (SVM_CLGI); @@ -904,7 +886,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; if (is_external_interrupt(exit_int_info)) - push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); + /* + * An exception was taken while we were trying to inject an + * IRQ. We must defer the injection of the vector until + * the next window. + */ + kvm_vcpu_irq_push(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); spin_lock(&vcpu->kvm->lock); @@ -1114,7 +1101,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; skip_emulated_instruction(vcpu); - if (vcpu->irq_summary) + if (kvm_vcpu_irq_pending(vcpu)) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; @@ -1285,7 +1272,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, * possible */ if (kvm_run->request_interrupt_window && - !vcpu->irq_summary) { + !kvm_vcpu_irq_pending(vcpu)) { ++vcpu->stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; @@ -1384,60 +1371,135 @@ static void pre_svm_run(struct kvm_vcpu *vcpu) } -static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) -{ - struct vmcb_control_area *control; - - control = &vcpu->svm->vmcb->control; - control->int_vector = pop_irq(vcpu); - control->int_ctl &= ~V_INTR_PRIO_MASK; - control->int_ctl |= V_IRQ_MASK | - ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); -} - static void kvm_reput_irq(struct kvm_vcpu *vcpu) { struct vmcb_control_area *control = &vcpu->svm->vmcb->control; if (control->int_ctl & V_IRQ_MASK) { control->int_ctl &= ~V_IRQ_MASK; - push_irq(vcpu, control->int_vector); + kvm_vcpu_irq_push(vcpu, control->int_vector); } vcpu->interrupt_window_open = !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); } -static void do_interrupt_requests(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static void do_intr_requests(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run, + kvm_irqpin_t pin) { struct vmcb_control_area *control = &vcpu->svm->vmcb->control; + int pending = 0; vcpu->interrupt_window_open = (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); - if (vcpu->interrupt_window_open && vcpu->irq_summary) + if (vcpu->interrupt_window_open) { /* - * If interrupts enabled, and not blocked by sti or mov ss. Good. + * If interrupts enabled, and not blocked by sti or mov ss. + * Good. */ - kvm_do_inject_irq(vcpu); + struct kvm_irqack_data ack; + int r = 0; + + memset(&ack, 0, sizeof(ack)); + + switch (pin) { + case kvm_irqpin_localint: + r = kvm_vcpu_irq_pop(vcpu, &ack); + break; + case kvm_irqpin_extint: + printk(KERN_WARNING "KVM: external-interrupts not " \ + "handled yet\n"); + __clear_bit(pin, &vcpu->irq.pending); + break; + case kvm_irqpin_nmi: + /* + * FIXME: Someday we will handle this using the + * specific SVN NMI features. For now, just inject + * the NMI as a standard interrupt on vector 2 + */ + ack.flags |= KVM_IRQACKDATA_VECTOR_VALID; + ack.vector = 2; + __clear_bit(pin, &vcpu->irq.pending); + break; + default: + panic("KVM: unknown interrupt pin raised: %d\n", pin); + break; + } + + BUG_ON(r < 0); + + if (ack.flags & KVM_IRQACKDATA_VECTOR_VALID) { + control = &vcpu->svm->vmcb->control; + control->int_vector = ack.vector; + control->int_ctl &= ~V_INTR_PRIO_MASK; + control->int_ctl |= V_IRQ_MASK | + ((/*control->int_vector >> 4*/ 0xf) << + V_INTR_PRIO_SHIFT); + } + } /* - * Interrupts blocked. Wait for unblock. + * Re-read the pending interrupt state. If anything is still + * pending we need to cause an exit on the next window */ - if (!vcpu->interrupt_window_open && - (vcpu->irq_summary || kvm_run->request_interrupt_window)) { + pending = __kvm_vcpu_irq_pending(vcpu); + + if (test_bit(pin, &pending) || kvm_run->request_interrupt_window) + /* + * Trigger a VMEXIT on the next IRQ window + */ control->intercept |= 1ULL << INTERCEPT_VINTR; - } else - control->intercept &= ~(1ULL << INTERCEPT_VINTR); +} + +static void clear_pending_controls(struct kvm_vcpu *vcpu) +{ + struct vmcb_control_area *control = &vcpu->svm->vmcb->control; + + control->intercept &= ~(1ULL << INTERCEPT_VINTR); +} + +static void do_interrupt_requests(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + int pending = __kvm_vcpu_irq_pending(vcpu); + + clear_pending_controls(vcpu); + + while (pending) { + kvm_irqpin_t pin = __fls(pending); + + switch (pin) { + case kvm_irqpin_localint: + case kvm_irqpin_extint: + case kvm_irqpin_nmi: + do_intr_requests(vcpu, kvm_run, pin); + break; + case kvm_irqpin_smi: + /* ignored (for now) */ + printk(KERN_WARNING "KVM: dropping unhandled SMI\n"); + __clear_bit(pin, &vcpu->irq.pending); + break; + case kvm_irqpin_invalid: + /* drop */ + break; + default: + panic("KVM: unknown interrupt pin raised: %d\n", pin); + break; + } + + __clear_bit(pin, &pending); + } } static void post_kvm_run_save(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && - vcpu->irq_summary == 0); + kvm_run->ready_for_interrupt_injection = + (vcpu->interrupt_window_open && + !kvm_vcpu_irq_pending(vcpu)); kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; kvm_run->cr8 = vcpu->cr8; kvm_run->apic_base = vcpu->apic_base; @@ -1452,7 +1514,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - return (!vcpu->irq_summary && + return (!kvm_vcpu_irq_pending(vcpu) && kvm_run->request_interrupt_window && vcpu->interrupt_window_open && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); @@ -1482,9 +1544,17 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; again: + spin_lock(&vcpu->irq.lock); + + /* + * We must inject interrupts (if any) while the irq_lock + * is held + */ if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); + spin_unlock(&vcpu->irq.lock); + clgi(); pre_svm_run(vcpu); diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c new file mode 100644 index 0000000..a60707d --- /dev/null +++ b/drivers/kvm/userint.c @@ -0,0 +1,223 @@ +/* + * User Interrupts IRQ device + * + * This acts as an extention of an interrupt controller that exists elsewhere + * (typically in userspace/QEMU). Because this PIC is a pseudo device that + * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has + * already occured. Therefore, this PIC has the following unusal properties: + * + * 1) It has 256 "pins" which are literal vectors (i.e. no translation) + * 2) It only supports "auto-EOI" behavior since it is expected that the + * upstream emulated PIC will handle the real EOIs (if applicable) + * 3) It only listens to "asserts" on the pins (deasserts are dropped) + * because its an auto-EOI device anyway. + * + * Copyright (C) 2007 Novell + * + * bitarray code based on original vcpu->irq_pending code, + * Copyright (C) 2007 Qumranet + * + * Authors: + * Gregory Haskins <[EMAIL PROTECTED]> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "kvm.h" + +/* + *---------------------------------------------------------------------- + * optimized bitarray object - works like bitarrays in bitops, but uses + * a summary field to accelerate lookups. Assumes external locking + *--------------------------------------------------------------------- + */ + +struct bitarray { + unsigned long summary; /* 1 per word in pending */ + unsigned long pending[NR_IRQ_WORDS]; +}; + +static inline int bitarray_pending(struct bitarray *this) +{ + return this->summary ? 1 : 0; +} + +static inline int bitarray_findhighest(struct bitarray *this) +{ + if (!this->summary) + return -1; + else { + int word_index = __fls(this->summary); + int bit_index = __fls(this->pending[word_index]); + + return word_index * BITS_PER_LONG + bit_index; + } +} + +static inline void bitarray_set(struct bitarray *this, int nr) +{ + __set_bit(nr, &this->pending); + __set_bit(nr / BITS_PER_LONG, &this->summary); +} + +static inline void bitarray_clear(struct bitarray *this, int nr) +{ + int word = nr / BITS_PER_LONG; + + __clear_bit(nr, &this->pending); + if (!this->pending[word]) + __clear_bit(word, &this->summary); +} + +static inline int bitarray_test(struct bitarray *this, int nr) +{ + return test_bit(nr, &this->pending); +} + +static inline int bitarray_test_and_set(struct bitarray *this, int nr, int val) +{ + if (bitarray_test(this, nr) != val) { + if (val) + bitarray_set(this, nr); + else + bitarray_clear(this, nr); + return 1; + } + + return 0; +} + +/* + *---------------------------------------------------------------------- + * userint interface - provides the actual kvm_irqdevice implementation + *--------------------------------------------------------------------- + */ + +struct kvm_user_irqdev { + spinlock_t lock; + atomic_t ref_count; + struct bitarray pending; +}; + +static int user_irqdev_ack(struct kvm_irqdevice *this, int flags, + struct kvm_irqack_data *data) +{ + struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private; + + spin_lock(&s->lock); + + if (!(flags & KVM_IRQACK_FLAG_PEEK)) { + int irq = bitarray_findhighest(&s->pending); + + if (irq > -1) { + /* + * Automatically clear the interrupt as the EOI + * mechanism (if any) will take place in userspace + */ + bitarray_clear(&s->pending, irq); + + data->flags |= KVM_IRQACKDATA_VECTOR_VALID; + } + + data->vector = irq; + } + + if (bitarray_pending(&s->pending)) + data->flags |= KVM_IRQACKDATA_VECTOR_PENDING; + + spin_unlock(&s->lock); + + return 0; +} + +static int user_irqdev_set_pin(struct kvm_irqdevice *this, int irq, int level) +{ + struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private; + int forward = 0; + + spin_lock(&s->lock); + forward = bitarray_test_and_set(&s->pending, irq, level); + spin_unlock(&s->lock); + + /* + * alert the higher layer software we have changes + */ + if (forward) + kvm_irqdevice_set_intr(this, kvm_irqpin_localint); + + return 0; +} + +static void user_irqdev_destructor(struct kvm_irqdevice *this) +{ + struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private; + + if (atomic_dec_and_test(&s->ref_count)) + kfree(s); +} + +int kvm_user_irqdev_init(struct kvm_irqdevice *irqdev) +{ + struct kvm_user_irqdev *s; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + spin_lock_init(&s->lock); + + irqdev->ack = user_irqdev_ack; + irqdev->set_pin = user_irqdev_set_pin; + irqdev->destructor = user_irqdev_destructor; + + irqdev->private = s; + atomic_inc(&s->ref_count); + + return 0; +} + +int kvm_user_irqdev_save(struct kvm_irqdevice *this, void *data) +{ + struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private; + + spin_lock(&s->lock); + memcpy(data, s->pending.pending, sizeof s->pending.pending); + spin_unlock(&s->lock); + + return 0; +} + +int kvm_user_irqdev_restore(struct kvm_irqdevice *this, void *data) +{ + struct kvm_user_irqdev *s = (struct kvm_user_irqdev*)this->private; + int i; + int forward = 0; + + spin_lock(&s->lock); + + /* + * walk the interrupt-bitmap and inject an IRQ for each bit found + */ + for (i = 0; i < 256; ++i) { + int val = test_bit(i, data); + forward |= bitarray_test_and_set(&s->pending, i, val); + } + + spin_unlock(&s->lock); + + /* + * alert the higher layer software we have changes + */ + if (forward) + kvm_irqdevice_set_intr(this, kvm_irqpin_localint); + + return 0; +} + +int kvm_userint_init(struct kvm_vcpu *vcpu) +{ + return kvm_user_irqdev_init(&vcpu->irq.dev); +} + diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 3411813..f0e5826 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1454,52 +1454,118 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6)); } -static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) +static void do_intr_requests(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run, + kvm_irqpin_t pin) { - int word_index = __ffs(vcpu->irq_summary); - int bit_index = __ffs(vcpu->irq_pending[word_index]); - int irq = word_index * BITS_PER_LONG + bit_index; - - clear_bit(bit_index, &vcpu->irq_pending[word_index]); - if (!vcpu->irq_pending[word_index]) - clear_bit(word_index, &vcpu->irq_summary); - - if (vcpu->rmode.active) { - inject_rmode_irq(vcpu, irq); - return; - } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); -} - - -static void do_interrupt_requests(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - u32 cpu_based_vm_exec_control; + int pending = 0; vcpu->interrupt_window_open = ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); if (vcpu->interrupt_window_open && - vcpu->irq_summary && - !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) + !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) { /* - * If interrupts enabled, and not blocked by sti or mov ss. Good. + * If interrupts enabled, and not blocked by sti or mov ss. + * Good. */ - kvm_do_inject_irq(vcpu); + struct kvm_irqack_data ack; + int r = 0; + + memset(&ack, 0, sizeof(ack)); + + switch (pin) { + case kvm_irqpin_localint: + r = kvm_vcpu_irq_pop(vcpu, &ack); + break; + case kvm_irqpin_extint: + printk(KERN_WARNING "KVM: external-interrupts not " \ + "handled yet\n"); + __clear_bit(pin, &vcpu->irq.pending); + break; + case kvm_irqpin_nmi: + /* + * FIXME: Someday we will handle this using the + * specific VMX NMI features. For now, just inject + * the NMI as a standard interrupt on vector 2 + */ + ack.flags |= KVM_IRQACKDATA_VECTOR_VALID; + ack.vector = 2; + __clear_bit(pin, &vcpu->irq.pending); + break; + default: + panic("KVM: unknown interrupt pin raised: %d\n", pin); + break; + } + + BUG_ON(r < 0); - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - if (!vcpu->interrupt_window_open && - (vcpu->irq_summary || kvm_run->request_interrupt_window)) + if (ack.flags & KVM_IRQACKDATA_VECTOR_VALID) { + if (vcpu->rmode.active) + inject_rmode_irq(vcpu, ack.vector); + else + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + ack.vector | + INTR_TYPE_EXT_INTR | + INTR_INFO_VALID_MASK); + } + } + + /* + * Re-read the pending interrupt state. If anything is still + * pending we need to cause an exit on the next window + */ + pending = __kvm_vcpu_irq_pending(vcpu); + + if (test_bit(pin, &pending) || kvm_run->request_interrupt_window) { /* - * Interrupts blocked. Wait for unblock. + * Trigger a VMEXIT on the next IRQ window */ - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - else - cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + u32 cbvec = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cbvec |= CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cbvec); + } +} + +static void clear_pending_controls(struct kvm_vcpu *vcpu) +{ + u32 cbvec = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cbvec &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cbvec); +} + +static void do_interrupt_requests(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + int pending = __kvm_vcpu_irq_pending(vcpu); + + clear_pending_controls(vcpu); + + while (pending) { + kvm_irqpin_t pin = __fls(pending); + + switch (pin) { + case kvm_irqpin_localint: + case kvm_irqpin_extint: + case kvm_irqpin_nmi: + do_intr_requests(vcpu, kvm_run, pin); + break; + case kvm_irqpin_smi: + /* ignored (for now) */ + printk(KERN_WARNING "KVM: dropping unhandled SMI\n"); + __clear_bit(pin, &vcpu->irq.pending); + break; + case kvm_irqpin_invalid: + /* drop */ + break; + default: + panic("KVM: unknown interrupt pin raised: %d\n", pin); + break; + } + + __clear_bit(pin, &pending); + } } static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) @@ -1554,9 +1620,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (is_external_interrupt(vect_info)) { + /* + * An exception was taken while we were trying to inject an + * IRQ. We must defer the injection of the vector until + * the next window. + */ int irq = vect_info & VECTORING_INFO_VECTOR_MASK; - set_bit(irq, vcpu->irq_pending); - set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); + kvm_vcpu_irq_push(vcpu, irq); } if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ @@ -1872,8 +1942,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; kvm_run->cr8 = vcpu->cr8; kvm_run->apic_base = vcpu->apic_base; - kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && - vcpu->irq_summary == 0); + kvm_run->ready_for_interrupt_injection = + (vcpu->interrupt_window_open && + !kvm_vcpu_irq_pending(vcpu)); } static int handle_interrupt_window(struct kvm_vcpu *vcpu, @@ -1884,7 +1955,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, * possible */ if (kvm_run->request_interrupt_window && - !vcpu->irq_summary) { + !kvm_vcpu_irq_pending(vcpu)) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; ++vcpu->stat.irq_window_exits; return 0; @@ -1895,7 +1966,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { skip_emulated_instruction(vcpu); - if (vcpu->irq_summary) + if (kvm_vcpu_irq_pending(vcpu)) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; @@ -1965,7 +2036,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - return (!vcpu->irq_summary && + return (!kvm_vcpu_irq_pending(vcpu) && kvm_run->request_interrupt_window && vcpu->interrupt_window_open && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); @@ -1981,9 +2052,17 @@ preempted: kvm_guest_debug_pre(vcpu); again: + spin_lock(&vcpu->irq.lock); + + /* + * We must inject interrupts (if any) while the irq.lock + * is held + */ if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); + spin_unlock(&vcpu->irq.lock); + vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel