>>> On Thu, Apr 12, 2007 at 12:01 PM, in message <[EMAIL PROTECTED]>,
Gregory Haskins wrote: 
> >>> On Thu, Apr 12, 2007 at 10:14 AM, in message <[EMAIL PROTECTED]>,
> Avi Kivity <[EMAIL PROTECTED]> wrote: 
>> 
>> It's really subtle.
>> 
>> With respect to interrupts, VT^H^Hthe hardware provides an override over 
>> IF.  If an interrupt happens while this override is enabled, we exit 
>> guest mode regardless of the state of IF.  In effect interrupts are 
>> enabled but _delivery_ is disabled.  Once we exit guest mode, interrupts 
>> become disabled again until we set IF explicitly.
> 
> Heres a version that implements that idea.  Note that its still a POC, as 
> the kick_process still needs to be exported and handled in 
> extern-module-compat (or whatever its called)

Here are more refinements to this logic which make it actually usable.  It 
turns out that smp_call_function_single() is exported under at least 2.6.16 
(perhaps even earlier, not sure) so I refactored the code to use that instead 
of kick_process() (thanks for the hint, Avi).  

I also added checks so that the current task doesn't unnecessarily signal 
itself (this covers the case we would hit today with the interrupt controller 
still in QEMU).  Also, we check to see if there are any signals pending before 
VMENTER as an optimization.

Comments please.

Regards,
-Greg


KVM: Preemptible VCPU

From:  <>

This adds support for interrupting an executing CPU

Signed-off-by: Gregory Haskins <[EMAIL PROTECTED]>
---

 drivers/kvm/kvm.h      |   14 +++++++++
 drivers/kvm/kvm_main.c |   77 ++++++++++++++++++++++++++++++++++++++++++++----
 drivers/kvm/svm.c      |   54 ++++++++++++++++++++++++++++++++++
 drivers/kvm/vmx.c      |   58 +++++++++++++++++++++++++++++++++++-
 4 files changed, 195 insertions(+), 8 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 58966d9..56a3d58 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -271,6 +271,19 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 
 #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
 
+#define KVM_SIGNAL_VIRTUAL_INTERRUPT 33 /* Hardcoded for now */
+
+/*
+ * structure for maintaining info for interrupting an executing VCPU
+ */
+struct kvm_vcpu_irq {
+       spinlock_t          lock;
+       struct task_struct *task;
+       int                 signo;
+       int                 guest_mode;
+       int                 pending;
+};
+
 struct kvm_vcpu {
        struct kvm *kvm;
        union {
@@ -284,6 +297,7 @@ struct kvm_vcpu {
        struct kvm_run *run;
        int interrupt_window_open;
        struct kvm_irqdevice irq_dev;
+       struct kvm_vcpu_irq irq;
        unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
        unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7e00412..3e12c9c 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -299,6 +299,14 @@ static struct kvm *kvm_create_vm(void)
                struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
                mutex_init(&vcpu->mutex);
+
+               memset(&vcpu->irq, 0, sizeof(vcpu->irq));
+               spin_lock_init(&vcpu->irq.lock);
+               /*
+                * This should be settable by userspace someday
+                */
+               vcpu->irq.signo = KVM_SIGNAL_VIRTUAL_INTERRUPT;
+
                vcpu->cpu = -1;
                vcpu->kvm = kvm;
                vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -1866,6 +1874,19 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
                kvm_arch_ops->decache_regs(vcpu);
        }
 
+       /*
+        * Make sure the current task is accurately recorded.  Most of the 
+        * time, it will be so we first check if its necessary without taking
+        * the lock.  If there is a mismatch, we must aquire the lock and 
+        * check again to eliminate races
+        */
+       if (unlikely(vcpu->irq.task != current)) {
+               spin_lock(&vcpu->irq.lock);
+               if (vcpu->irq.task != current)
+                       vcpu->irq.task = current;
+               spin_unlock(&vcpu->irq.lock);
+       }
+
        r = kvm_arch_ops->run(vcpu, kvm_run);
 
 out:
@@ -2310,6 +2331,20 @@ out1:
 }
 
 /*
+ * This function is invoked whenever we want to interrupt a vcpu that is
+ * currently executing in guest-mode.  It currently is a no-op because
+ * the simple delivery of the IPI to execute this function accomplishes our
+ * goal: To cause a VMEXIT.  We pass the vcpu (which contains the 
+ * vcpu->irq.task, etc) for future use
+ */
+static void kvm_vcpu_guest_intr(void *info)
+{
+#ifdef NOT_YET
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu*)info;
+#endif
+}
+
+/*
  * This function will be invoked whenever the vcpu->irq_dev raises its INTR 
  * line
  */
@@ -2320,13 +2355,43 @@ static void kvm_vcpu_intr(struct kvm_irqsink *this,
         * Our irq device is requesting to interrupt the vcpu.  If it is
         * currently running, we should inject a host IPI to force a VMEXIT 
         */
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
+
+       spin_lock(&vcpu->irq.lock);
        
-       /*
-        * FIXME: Implement this or the CPU wont notice the interrupt until
-        * the next natural VMEXIT.  Note that this is how the system
-        * has always worked, so nothing is broken here.  This is a future
-        * enhancement
-        */
+       if (vcpu->irq.task && (vcpu->irq.task != current)) {
+               if (vcpu->irq.guest_mode) {
+                       /*
+                        * If we are in guest mode, we can optimize the IPI
+                        * by executing a function on the owning processor.
+                        */
+                       int cpu;
+
+                       /*
+                        * Not sure if disabling preemption is needed
+                        * since we are in a spin-lock anyway?  The
+                        * kick_process() code does this so I copied it
+                        */
+                       preempt_disable();
+                       cpu = task_cpu(vcpu->irq.task);
+                       BUG_ON(cpu == smp_processor_id());
+                       smp_call_function_single(cpu, 
+                                                kvm_vcpu_guest_intr,
+                                                vcpu, 0, 0); 
+                       preempt_enable();
+               } else
+                       /*
+                        * If we are not in guest mode, we must assume that
+                        * we could be blocked anywhere, including userspace.
+                        * Send a signal to give everyone a chance to get
+                        * notification
+                        */
+                       send_sig(vcpu->irq.signo, vcpu->irq.task, 0);
+
+               vcpu->irq.pending = 1;
+       }
+       
+       spin_unlock(&vcpu->irq.lock);
 }
 
 static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index e59a548..aa92a7c 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1461,11 +1461,48 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
        u16 gs_selector;
        u16 ldt_selector;
        int r;
+       unsigned long irq_flags;
 
 again:
+       /*
+        * We disable interrupts until the next VMEXIT to eliminate a race 
+        * condition for delivery of virtual interrutps.  Note that this is
+        * probably not as bad as it sounds, as interrupts will still invoke
+        * a VMEXIT once transitioned to GUEST mode (and thus exit this lock 
+        * scope) even if they are disabled.
+        *
+        * FIXME: Do we need to do anything additional to mask IPI/NMIs? 
+        */
+       local_irq_save(irq_flags);
+
+       spin_lock(&vcpu->irq.lock);
+
+       /*
+        * If there are any signals pending (virtual interrupt related or 
+        * otherwise), don't even bother trying to enter guest mode...
+        */
+       if (signal_pending(current)) {
+           kvm_run->exit_reason = KVM_EXIT_INTR;
+           spin_unlock(&vcpu->irq.lock);
+           local_irq_restore(irq_flags);
+           return -EINTR;
+       }
+
+       /*
+        * There are optimizations we can make when signaling interrupts
+        * if we know the VCPU is in GUEST mode, so mark that here
+        */
+       vcpu->irq.guest_mode = 1;
+
+       /*
+        * We also must inject interrupts (if any) while the irq_lock
+        * is held
+        */
        if (!vcpu->mmio_read_completed)
                do_interrupt_requests(vcpu, kvm_run);
 
+       spin_unlock(&vcpu->irq.lock);
+
        clgi();
 
        pre_svm_run(vcpu);
@@ -1597,6 +1634,13 @@ again:
 #endif
                : "cc", "memory" );
 
+       /*
+        * FIXME: We'd like to turn on interrupts ASAP, but is this so early
+        * that we will mess up the state of the CPU before we fully 
+        * transition from guest to host?
+        */
+       local_irq_restore(irq_flags);
+
        fx_save(vcpu->guest_fx_image);
        fx_restore(vcpu->host_fx_image);
 
@@ -1617,6 +1661,16 @@ again:
        reload_tss(vcpu);
 
        /*
+        * Signal that we have transitioned back to host mode 
+        */
+       spin_lock(&vcpu->irq.lock);
+
+       vcpu->irq.guest_mode = 0;
+       vcpu->irq.pending = 0;
+       
+       spin_unlock(&vcpu->irq.lock);
+
+       /*
         * Profile KVM exit RIPs:
         */
        if (unlikely(prof_on == KVM_PROFILING))
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index a0fdf02..7d5a650 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1722,6 +1722,7 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
        u16 fs_sel, gs_sel, ldt_sel;
        int fs_gs_ldt_reload_needed;
        int r;
+       unsigned long irq_flags;
 
 again:
        /*
@@ -1748,11 +1749,47 @@ again:
        vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
 #endif
 
+       if (vcpu->guest_debug.enabled)
+               kvm_guest_debug_pre(vcpu);
+
+       /*
+        * We disable interrupts until the next VMEXIT to eliminate a race 
+        * condition for delivery of virtual interrutps.  Note that this is
+        * probably not as bad as it sounds, as interrupts will still invoke
+        * a VMEXIT once transitioned to GUEST mode (and thus exit this lock 
+        * scope) even if they are disabled.
+        *
+        * FIXME: Do we need to do anything additional to mask IPI/NMIs? 
+        */
+       local_irq_save(irq_flags);
+
+       spin_lock(&vcpu->irq.lock);
+       
+       /*
+        * If there are any signals pending (virtual interrupt related or 
+        * otherwise), don't even bother trying to enter guest mode...
+        */
+       if (signal_pending(current)) {
+           kvm_run->exit_reason = KVM_EXIT_INTR;
+           spin_unlock(&vcpu->irq.lock);
+           local_irq_restore(irq_flags);
+           return -EINTR;
+       }
+
+       /*
+        * There are optimizations we can make when signaling interrupts
+        * if we know the VCPU is in GUEST mode, so mark that here
+        */
+       vcpu->irq.guest_mode = 1;
+
+       /*
+        * We also must inject interrupts (if any) while the irq.lock
+        * is held
+        */
        if (!vcpu->mmio_read_completed)
                do_interrupt_requests(vcpu, kvm_run);
 
-       if (vcpu->guest_debug.enabled)
-               kvm_guest_debug_pre(vcpu);
+       spin_unlock(&vcpu->irq.lock);
 
        fx_save(vcpu->host_fx_image);
        fx_restore(vcpu->guest_fx_image);
@@ -1880,6 +1917,13 @@ again:
              : "cc", "memory" );
 
        /*
+        * FIXME: We'd like to turn on interrupts ASAP, but is this so early
+        * that we will mess up the state of the CPU before we fully 
+        * transition from guest to host?
+        */
+       local_irq_restore(irq_flags);
+
+       /*
         * Reload segment selectors ASAP. (it's needed for a functional
         * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64
         * relies on having 0 in %gs for the CPU PDA to work.)
@@ -1911,6 +1955,16 @@ again:
 
        asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 
+       /*
+        * Signal that we have transitioned back to host mode 
+        */
+       spin_lock(&vcpu->irq.lock);
+
+       vcpu->irq.guest_mode = 0;
+       vcpu->irq.pending = 0;
+
+       spin_unlock(&vcpu->irq.lock);
+
        if (fail) {
                kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
                kvm_run->fail_entry.hardware_entry_failure_reason
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
kvm-devel mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to