Re: [PATCH v2 1/3] Introduce a workqueue to deliver PIT timer interrupts.

2010-06-15 Thread Gleb Natapov
On Mon, Jun 14, 2010 at 01:11:20PM -0400, Chris Lalancette wrote:
 We really want to kvm_set_irq during the hrtimer callback,
 but that is risky because that is during interrupt context.
 Instead, offload the work to a workqueue, which is a bit safer
 and should provide most of the same functionality.
 
 Signed-off-by: Chris Lalancette clala...@redhat.com
 ---
  arch/x86/kvm/i8254.c |  125 -
  arch/x86/kvm/i8254.h |4 +-
  arch/x86/kvm/irq.c   |1 -
  3 files changed, 74 insertions(+), 56 deletions(-)
 
 diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
 index 188d827..3bed8ac 100644
 --- a/arch/x86/kvm/i8254.c
 +++ b/arch/x86/kvm/i8254.c
 @@ -34,6 +34,7 @@
  
  #include linux/kvm_host.h
  #include linux/slab.h
 +#include linux/workqueue.h
  
  #include irq.h
  #include i8254.h
 @@ -244,11 +245,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier 
 *kian)
  {
   struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);
 - raw_spin_lock(ps-inject_lock);
 + spin_lock(ps-inject_lock);
   if (atomic_dec_return(ps-pit_timer.pending)  0)
   atomic_inc(ps-pit_timer.pending);
   ps-irq_ack = 1;
 - raw_spin_unlock(ps-inject_lock);
 + spin_unlock(ps-inject_lock);
  }
  
  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 @@ -267,7 +268,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
  static void destroy_pit_timer(struct kvm_timer *pt)
  {
   pr_debug(execute del timer!\n);
 - hrtimer_cancel(pt-timer);
 + if (hrtimer_cancel(pt-timer))
 + cancel_work_sync(pt-kvm-arch.vpit-expired);
  }
  
  static bool kpit_is_periodic(struct kvm_timer *ktimer)
 @@ -281,6 +283,58 @@ static struct kvm_timer_ops kpit_ops = {
   .is_periodic = kpit_is_periodic,
  };
  
 +static void pit_do_work(struct work_struct *work)
 +{
 + struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
 + struct kvm *kvm = pit-kvm;
 + struct kvm_vcpu *vcpu;
 + int i;
 + struct kvm_kpit_state *ps = pit-pit_state;
 + int inject = 0;
 +
 + /* Try to inject pending interrupts when
 +  * last one has been acked.
 +  */
 + spin_lock(ps-inject_lock);
 + if (ps-irq_ack) {
 + ps-irq_ack = 0;
 + inject = 1;
 + }
 + spin_unlock(ps-inject_lock);
 + if (inject) {
 + kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
 + kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
 +
 + /*
 +  * Provides NMI watchdog support via Virtual Wire mode.
 +  * The route is: PIT - PIC - LVT0 in NMI mode.
 +  *
 +  * Note: Our Virtual Wire implementation is simplified, only
 +  * propagating PIT interrupts to all VCPUs when they have set
 +  * LVT0 to NMI delivery. Other PIC interrupts are just sent to
 +  * VCPU0, and only if its LVT0 is in EXTINT mode.
 +  */
 + if (kvm-arch.vapics_in_nmi_mode  0)
 + kvm_for_each_vcpu(i, vcpu, kvm)
 + kvm_apic_nmi_wd_deliver(vcpu);
 + }
 +}
 +
 +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 +{
 + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 + struct kvm_pit *pt = ktimer-kvm-arch.vpit;
 +
 + if (ktimer-reinject)
 + queue_work(pt-wq, pt-expired);
If ktimer-reinject is set to false by userspace pit irq will never be
delivered or do I missing something here?

May be we should consider using return value from kvm_set_irq() for
coalescing detection to simplify things. I once had patch for that.

 +
 + if (ktimer-t_ops-is_periodic(ktimer)) {
 + hrtimer_add_expires_ns(ktimer-timer, ktimer-period);
 + return HRTIMER_RESTART;
 + } else
 + return HRTIMER_NORESTART;
 +}
 +
  static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int 
 is_period)
  {
   struct kvm_timer *pt = ps-pit_timer;
 @@ -291,14 +345,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, 
 u32 val, int is_period)
   pr_debug(create pit timer, interval is %llu nsec\n, interval);
  
   /* TODO The new value only affected after the retriggered */
 - hrtimer_cancel(pt-timer);
 + if (hrtimer_cancel(pt-timer))
 + cancel_work_sync(pt-kvm-arch.vpit-expired);
   pt-period = interval;
   ps-is_periodic = is_period;
  
 - pt-timer.function = kvm_timer_fn;
 + pt-timer.function = pit_timer_fn;
   pt-t_ops = kpit_ops;
   pt-kvm = ps-pit-kvm;
 - pt-vcpu = pt-kvm-bsp_vcpu;
  
   atomic_set(pt-pending, 0);
   ps-irq_ack = 1;
 @@ -626,7 +680,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 
 flags)
  
   mutex_init(pit-pit_state.lock);
   mutex_lock(pit-pit_state.lock);
 - 

Re: [PATCH v2 1/3] Introduce a workqueue to deliver PIT timer interrupts.

2010-06-15 Thread Marcelo Tosatti
On Tue, Jun 15, 2010 at 08:11:45AM -0400, Chris Lalancette wrote:
 On 06/14/10 - 07:19:49PM, Marcelo Tosatti wrote:
  On Mon, Jun 14, 2010 at 01:11:20PM -0400, Chris Lalancette wrote:
   We really want to kvm_set_irq during the hrtimer callback,
   but that is risky because that is during interrupt context.
   Instead, offload the work to a workqueue, which is a bit safer
   and should provide most of the same functionality.
   
   Signed-off-by: Chris Lalancette clala...@redhat.com
   ---
arch/x86/kvm/i8254.c |  125 
   -
arch/x86/kvm/i8254.h |4 +-
arch/x86/kvm/irq.c   |1 -
3 files changed, 74 insertions(+), 56 deletions(-)
   
   diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
   index 188d827..3bed8ac 100644
   --- a/arch/x86/kvm/i8254.c
   +++ b/arch/x86/kvm/i8254.c
   @@ -34,6 +34,7 @@

#include linux/kvm_host.h
#include linux/slab.h
   +#include linux/workqueue.h

#include irq.h
#include i8254.h
   @@ -244,11 +245,11 @@ static void kvm_pit_ack_irq(struct 
   kvm_irq_ack_notifier *kian)
{
 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
  irq_ack_notifier);
   - raw_spin_lock(ps-inject_lock);
   + spin_lock(ps-inject_lock);
 if (atomic_dec_return(ps-pit_timer.pending)  0)
 atomic_inc(ps-pit_timer.pending);
 ps-irq_ack = 1;
   - raw_spin_unlock(ps-inject_lock);
   + spin_unlock(ps-inject_lock);
}

void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
   @@ -267,7 +268,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
static void destroy_pit_timer(struct kvm_timer *pt)
{
 pr_debug(execute del timer!\n);
   - hrtimer_cancel(pt-timer);
   + if (hrtimer_cancel(pt-timer))
   + cancel_work_sync(pt-kvm-arch.vpit-expired);
}

static bool kpit_is_periodic(struct kvm_timer *ktimer)
   @@ -281,6 +283,58 @@ static struct kvm_timer_ops kpit_ops = {
 .is_periodic = kpit_is_periodic,
};

   +static void pit_do_work(struct work_struct *work)
   +{
   + struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
   + struct kvm *kvm = pit-kvm;
   + struct kvm_vcpu *vcpu;
   + int i;
   + struct kvm_kpit_state *ps = pit-pit_state;
   + int inject = 0;
   +
   + /* Try to inject pending interrupts when
   +  * last one has been acked.
   +  */
   + spin_lock(ps-inject_lock);
   + if (ps-irq_ack) {
   + ps-irq_ack = 0;
   + inject = 1;
   + }
   + spin_unlock(ps-inject_lock);
   + if (inject) {
   + kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
   + kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
   +
   + /*
   +  * Provides NMI watchdog support via Virtual Wire mode.
   +  * The route is: PIT - PIC - LVT0 in NMI mode.
   +  *
   +  * Note: Our Virtual Wire implementation is simplified, only
   +  * propagating PIT interrupts to all VCPUs when they have set
   +  * LVT0 to NMI delivery. Other PIC interrupts are just sent to
   +  * VCPU0, and only if its LVT0 is in EXTINT mode.
   +  */
   + if (kvm-arch.vapics_in_nmi_mode  0)
   + kvm_for_each_vcpu(i, vcpu, kvm)
   + kvm_apic_nmi_wd_deliver(vcpu);
   + }
   +}
   +
   +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
   +{
   + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
   + struct kvm_pit *pt = ktimer-kvm-arch.vpit;
   +
   + if (ktimer-reinject)
   + queue_work(pt-wq, pt-expired);
  
  The problem is queue_work only queues the work if it was not already
  queued. So multiple queue_work() calls can collapse into one executed
  job.
  
  You need to maintain a counter here in pit_timer_fn, and reinject at 
  some point (perhaps on ACK) if there are multiple interrupts pending.
 
 Ah, OK, so that's what the pending variable was all about.  I didn't quite
 understand that before.  I'll make this change.
 
 Is there any way in particular I can test this change?  Just fire up a RHEL-3
 guest and see if time drifts?  Something more targetted?

Firing up a RHEL-3 guest should be enough (one without lost interrupt
compensation).

  
   +
   + if (ktimer-t_ops-is_periodic(ktimer)) {
   + hrtimer_add_expires_ns(ktimer-timer, ktimer-period);
   + return HRTIMER_RESTART;
   + } else
   + return HRTIMER_NORESTART;
   +}
   +
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int 
   is_period)
{
 struct kvm_timer *pt = ps-pit_timer;
   @@ -291,14 +345,14 @@ static void create_pit_timer(struct kvm_kpit_state 
   *ps, u32 val, int is_period)
 pr_debug(create pit timer, interval is %llu nsec\n, interval);

 /* TODO The new value only affected after the retriggered */
   - hrtimer_cancel(pt-timer);
   + if (hrtimer_cancel(pt-timer))
   + 

[PATCH v2 1/3] Introduce a workqueue to deliver PIT timer interrupts.

2010-06-14 Thread Chris Lalancette
We really want to kvm_set_irq during the hrtimer callback,
but that is risky because that is during interrupt context.
Instead, offload the work to a workqueue, which is a bit safer
and should provide most of the same functionality.

Signed-off-by: Chris Lalancette clala...@redhat.com
---
 arch/x86/kvm/i8254.c |  125 -
 arch/x86/kvm/i8254.h |4 +-
 arch/x86/kvm/irq.c   |1 -
 3 files changed, 74 insertions(+), 56 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 188d827..3bed8ac 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -34,6 +34,7 @@
 
 #include linux/kvm_host.h
 #include linux/slab.h
+#include linux/workqueue.h
 
 #include irq.h
 #include i8254.h
@@ -244,11 +245,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier 
*kian)
 {
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 irq_ack_notifier);
-   raw_spin_lock(ps-inject_lock);
+   spin_lock(ps-inject_lock);
if (atomic_dec_return(ps-pit_timer.pending)  0)
atomic_inc(ps-pit_timer.pending);
ps-irq_ack = 1;
-   raw_spin_unlock(ps-inject_lock);
+   spin_unlock(ps-inject_lock);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -267,7 +268,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 static void destroy_pit_timer(struct kvm_timer *pt)
 {
pr_debug(execute del timer!\n);
-   hrtimer_cancel(pt-timer);
+   if (hrtimer_cancel(pt-timer))
+   cancel_work_sync(pt-kvm-arch.vpit-expired);
 }
 
 static bool kpit_is_periodic(struct kvm_timer *ktimer)
@@ -281,6 +283,58 @@ static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
 };
 
+static void pit_do_work(struct work_struct *work)
+{
+   struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
+   struct kvm *kvm = pit-kvm;
+   struct kvm_vcpu *vcpu;
+   int i;
+   struct kvm_kpit_state *ps = pit-pit_state;
+   int inject = 0;
+
+   /* Try to inject pending interrupts when
+* last one has been acked.
+*/
+   spin_lock(ps-inject_lock);
+   if (ps-irq_ack) {
+   ps-irq_ack = 0;
+   inject = 1;
+   }
+   spin_unlock(ps-inject_lock);
+   if (inject) {
+   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
+   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
+
+   /*
+* Provides NMI watchdog support via Virtual Wire mode.
+* The route is: PIT - PIC - LVT0 in NMI mode.
+*
+* Note: Our Virtual Wire implementation is simplified, only
+* propagating PIT interrupts to all VCPUs when they have set
+* LVT0 to NMI delivery. Other PIC interrupts are just sent to
+* VCPU0, and only if its LVT0 is in EXTINT mode.
+*/
+   if (kvm-arch.vapics_in_nmi_mode  0)
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   kvm_apic_nmi_wd_deliver(vcpu);
+   }
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+   struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+   struct kvm_pit *pt = ktimer-kvm-arch.vpit;
+
+   if (ktimer-reinject)
+   queue_work(pt-wq, pt-expired);
+
+   if (ktimer-t_ops-is_periodic(ktimer)) {
+   hrtimer_add_expires_ns(ktimer-timer, ktimer-period);
+   return HRTIMER_RESTART;
+   } else
+   return HRTIMER_NORESTART;
+}
+
 static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 {
struct kvm_timer *pt = ps-pit_timer;
@@ -291,14 +345,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, 
u32 val, int is_period)
pr_debug(create pit timer, interval is %llu nsec\n, interval);
 
/* TODO The new value only affected after the retriggered */
-   hrtimer_cancel(pt-timer);
+   if (hrtimer_cancel(pt-timer))
+   cancel_work_sync(pt-kvm-arch.vpit-expired);
pt-period = interval;
ps-is_periodic = is_period;
 
-   pt-timer.function = kvm_timer_fn;
+   pt-timer.function = pit_timer_fn;
pt-t_ops = kpit_ops;
pt-kvm = ps-pit-kvm;
-   pt-vcpu = pt-kvm-bsp_vcpu;
 
atomic_set(pt-pending, 0);
ps-irq_ack = 1;
@@ -626,7 +680,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
mutex_init(pit-pit_state.lock);
mutex_lock(pit-pit_state.lock);
-   raw_spin_lock_init(pit-pit_state.inject_lock);
+   spin_lock_init(pit-pit_state.inject_lock);
+
+   pit-wq = create_singlethread_workqueue(kvm-pit-wq);
+   if (!pit-wq) {
+   kfree(pit);
+   return NULL;
+   }
+   INIT_WORK(pit-expired, pit_do_work);
 
kvm-arch.vpit = 

Re: [PATCH v2 1/3] Introduce a workqueue to deliver PIT timer interrupts.

2010-06-14 Thread Marcelo Tosatti
On Mon, Jun 14, 2010 at 01:11:20PM -0400, Chris Lalancette wrote:
 We really want to kvm_set_irq during the hrtimer callback,
 but that is risky because that is during interrupt context.
 Instead, offload the work to a workqueue, which is a bit safer
 and should provide most of the same functionality.
 
 Signed-off-by: Chris Lalancette clala...@redhat.com
 ---
  arch/x86/kvm/i8254.c |  125 -
  arch/x86/kvm/i8254.h |4 +-
  arch/x86/kvm/irq.c   |1 -
  3 files changed, 74 insertions(+), 56 deletions(-)
 
 diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
 index 188d827..3bed8ac 100644
 --- a/arch/x86/kvm/i8254.c
 +++ b/arch/x86/kvm/i8254.c
 @@ -34,6 +34,7 @@
  
  #include linux/kvm_host.h
  #include linux/slab.h
 +#include linux/workqueue.h
  
  #include irq.h
  #include i8254.h
 @@ -244,11 +245,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier 
 *kian)
  {
   struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);
 - raw_spin_lock(ps-inject_lock);
 + spin_lock(ps-inject_lock);
   if (atomic_dec_return(ps-pit_timer.pending)  0)
   atomic_inc(ps-pit_timer.pending);
   ps-irq_ack = 1;
 - raw_spin_unlock(ps-inject_lock);
 + spin_unlock(ps-inject_lock);
  }
  
  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 @@ -267,7 +268,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
  static void destroy_pit_timer(struct kvm_timer *pt)
  {
   pr_debug(execute del timer!\n);
 - hrtimer_cancel(pt-timer);
 + if (hrtimer_cancel(pt-timer))
 + cancel_work_sync(pt-kvm-arch.vpit-expired);
  }
  
  static bool kpit_is_periodic(struct kvm_timer *ktimer)
 @@ -281,6 +283,58 @@ static struct kvm_timer_ops kpit_ops = {
   .is_periodic = kpit_is_periodic,
  };
  
 +static void pit_do_work(struct work_struct *work)
 +{
 + struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
 + struct kvm *kvm = pit-kvm;
 + struct kvm_vcpu *vcpu;
 + int i;
 + struct kvm_kpit_state *ps = pit-pit_state;
 + int inject = 0;
 +
 + /* Try to inject pending interrupts when
 +  * last one has been acked.
 +  */
 + spin_lock(ps-inject_lock);
 + if (ps-irq_ack) {
 + ps-irq_ack = 0;
 + inject = 1;
 + }
 + spin_unlock(ps-inject_lock);
 + if (inject) {
 + kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
 + kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
 +
 + /*
 +  * Provides NMI watchdog support via Virtual Wire mode.
 +  * The route is: PIT - PIC - LVT0 in NMI mode.
 +  *
 +  * Note: Our Virtual Wire implementation is simplified, only
 +  * propagating PIT interrupts to all VCPUs when they have set
 +  * LVT0 to NMI delivery. Other PIC interrupts are just sent to
 +  * VCPU0, and only if its LVT0 is in EXTINT mode.
 +  */
 + if (kvm-arch.vapics_in_nmi_mode  0)
 + kvm_for_each_vcpu(i, vcpu, kvm)
 + kvm_apic_nmi_wd_deliver(vcpu);
 + }
 +}
 +
 +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 +{
 + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 + struct kvm_pit *pt = ktimer-kvm-arch.vpit;
 +
 + if (ktimer-reinject)
 + queue_work(pt-wq, pt-expired);

The problem is queue_work only queues the work if it was not already
queued. So multiple queue_work() calls can collapse into one executed
job.

You need to maintain a counter here in pit_timer_fn, and reinject at 
some point (perhaps on ACK) if there are multiple interrupts pending.

 +
 + if (ktimer-t_ops-is_periodic(ktimer)) {
 + hrtimer_add_expires_ns(ktimer-timer, ktimer-period);
 + return HRTIMER_RESTART;
 + } else
 + return HRTIMER_NORESTART;
 +}
 +
  static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int 
 is_period)
  {
   struct kvm_timer *pt = ps-pit_timer;
 @@ -291,14 +345,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, 
 u32 val, int is_period)
   pr_debug(create pit timer, interval is %llu nsec\n, interval);
  
   /* TODO The new value only affected after the retriggered */
 - hrtimer_cancel(pt-timer);
 + if (hrtimer_cancel(pt-timer))
 + cancel_work_sync(pt-kvm-arch.vpit-expired);

There can be a queued work instance even if the hrtimer is not active,
so cancel_work_sync should be unconditional.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html