Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Avi Kivity
On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
 I took a stub at implementing PV EOI using shared memory.
 This should reduce the number of exits an interrupt
 causes as much as by half.

 A partially complete draft for both host and guest parts
 is below.

 The idea is simple: there's a bit, per APIC, in guest memory,
 that tells the guest that it does not need EOI.
 We set it before injecting an interrupt and clear
 before injecting a nested one. Guest tests it using
 a test and clear operation - this is necessary
 so that host can detect interrupt nesting -
 and if set, it can skip the EOI MSR.

 There's a new MSR to set the address of said register
 in guest memory. Otherwise not much changes:
 - Guest EOI is not required
 - ISR is automatically cleared before injection

 Some things are incomplete: add feature negotiation
 options, qemu support for said options.
 No testing was done beyond compiling the kernel.

 I would appreciate early feedback.

 Signed-off-by: Michael S. Tsirkin m...@redhat.com

 --

 diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
 index d854101..8430f41 100644
 --- a/arch/x86/include/asm/apic.h
 +++ b/arch/x86/include/asm/apic.h
 @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 
 0; }
  
  #endif /* CONFIG_X86_LOCAL_APIC */
  
 +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
 +
  static inline void ack_APIC_irq(void)
  {
 + if (__test_and_clear_bit(0, __get_cpu_var(apic_eoi)))
 + return;
 +

While __test_and_clear_bit() is implemented in a single instruction,
it's not required to be.  Better have the instruction there explicitly.

   /*
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index e216ba0..0ee1472 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
   u64 length;
   u64 status;
   } osvw;
 +
 + struct {
 + u64 msr_val;
 + struct gfn_to_hva_cache data;
 + int vector;
 + } eoi;
  };

Needs to be cleared on INIT.

  

 @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
  smp_processor_id());
   }
  
 + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
 +MSR_KVM_EOI_ENABLED);
 +

Clear on kexec.

   if (has_steal_clock)
   kvm_register_steal_time();
  }
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 8584322..9e38e12 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
 kvm_lapic_irq *irq)
   irq-level, irq-trig_mode);
  }
  
 -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
 +{
 +
 + return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, val,
 +   sizeof(val));
 +}
 +
 +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
 +{
 +
 + return kvm_read_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, val,
 +   sizeof(*val));
 +}
 +
 +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
 +{
 + return (vcpu-arch.eoi.msr_val  MSR_KVM_EOI_ENABLED);
 +}
 +
 +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
 +{
 + u32 val;
 + if (eoi_get_user(vcpu, val)  0)
 + apic_debug(Can't read EOI MSR value: 0x%llx\n,
 +(unsigned long long)vcpi-arch.eoi.msr_val);
 + if (!(val  0x1))
 + vcpu-arch.eoi.vector = -1;
 + return vcpu-arch.eoi.vector;
 +}
 +
 +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
 +{
 + BUG_ON(vcpu-arch.eoi.vector != -1);
 + if (eoi_put_user(vcpu, 0x1)  0) {
 + apic_debug(Can't set EOI MSR value: 0x%llx\n,
 +(unsigned long long)vcpi-arch.eoi.msr_val);
 + return;
 + }
 + vcpu-arch.eoi.vector = vector;
 +}
 +
 +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
 +{
 + int vector;
 + vector = vcpu-arch.eoi.vector;
 + if (vector != -1  eoi_put_user(vcpu, 0x0)  0) {
 + apic_debug(Can't clear EOI MSR value: 0x%llx\n,
 +(unsigned long long)vcpi-arch.eoi.msr_val);
 + return -1;
 + }
 + vcpu-arch.eoi.vector = -1;
 + return vector;
 +}



 +
 +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
  {
   int result;
  
 @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic 
 *apic)
   return result;
  }
  
 +static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 +{
 + int vector;
 + if (eoi_enabled(apic-vcpu)) {
 + vector = eoi_get_pending_vector(apic-vcpu);
 + if (vector != -1)
 +  

Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Avi Kivity
On 04/10/2012 05:26 PM, Michael S. Tsirkin wrote:
 u64 status;
 } osvw;
   +
   + struct {
   + u64 msr_val;
   + struct gfn_to_hva_cache data;
   + int vector;
   + } eoi;
};
  
  Needs to be cleared on INIT.

 You mean kvm_arch_vcpu_reset?

Yes, or kvm_lapic_reset().


  
   @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
smp_processor_id());
 }

   + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
   +MSR_KVM_EOI_ENABLED);
   +
  
  Clear on kexec.

 With register_reboot_notifier?

Yes, we already clear some kvm msrs there.


   - apic_set_vector(vector, apic-regs + APIC_ISR);
   + if (eoi_enabled(vcpu)) {
   + /* Anything pending? If yes disable eoi optimization. */
   + if (unlikely(apic_find_highest_isr(apic) = 0)) {
   + int v = eoi_clr_pending_vector(vcpu);
  
  ISR != pending, that's IRR.  If ISR vector has lower priority than the
  new vector, then we don't need to disable eoi avoidance.

 Yes. But we can and it's easier than figuring out priorities.
 I am guessing such collisions are rare, right?

It's pretty easy, if there is something in IRR but
kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.

 I'll add a trace to make sure.

   + if (v != -1)
   + apic_set_vector(v, apic-regs + APIC_ISR);
   + } else {
   + eoi_set_pending_vector(vcpu, vector);
   + set_isr = false;
  
  Weird.  Just set it normally.  Remember that reading the ISR needs to
  return the correct value.

 Marcelo said linux does not normally read ISR - not true?

It's true and it's irrelevant.  We aren't coding a feature to what linux
does now, but for what linux or another guest may do in the future.

 Note this has no effect if the PV optimization is not enabled.

  We need to process the avoided EOI before any APIC read/writes, to be
  sure the guest sees the updated values.  Same for IOAPIC, EOI affects
  remote_irr.  That may been we need to sample it after every exit, or
  perhaps disable the feature for level-triggered interrupts.

 Disabling would be very sad.  Can we sample on remote irr read?

That can be done from another vcpu.  Why do we care about
level-triggered interrupts?  Everything uses MSI or edge-triggered
IOAPIC interrupts these days.


-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Michael S. Tsirkin
On Tue, Apr 10, 2012 at 05:33:18PM +0300, Avi Kivity wrote:
 On 04/10/2012 05:26 PM, Michael S. Tsirkin wrote:
u64 status;
} osvw;
+
+   struct {
+   u64 msr_val;
+   struct gfn_to_hva_cache data;
+   int vector;
+   } eoi;
 };
   
   Needs to be cleared on INIT.
 
  You mean kvm_arch_vcpu_reset?
 
 Yes, or kvm_lapic_reset().
 
 
   
@@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
   smp_processor_id());
}
 
+   wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
+  MSR_KVM_EOI_ENABLED);
+
   
   Clear on kexec.
 
  With register_reboot_notifier?
 
 Yes, we already clear some kvm msrs there.
 
 
-   apic_set_vector(vector, apic-regs + APIC_ISR);
+   if (eoi_enabled(vcpu)) {
+   /* Anything pending? If yes disable eoi optimization. */
+   if (unlikely(apic_find_highest_isr(apic) = 0)) {
+   int v = eoi_clr_pending_vector(vcpu);
   
   ISR != pending, that's IRR.  If ISR vector has lower priority than the
   new vector, then we don't need to disable eoi avoidance.
 
  Yes. But we can and it's easier than figuring out priorities.
  I am guessing such collisions are rare, right?
 
 It's pretty easy, if there is something in IRR but
 kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.

I only see kvm_apic_has_interrupt - is this what you mean?

  I'll add a trace to make sure.
 
+   if (v != -1)
+   apic_set_vector(v, apic-regs + 
APIC_ISR);
+   } else {
+   eoi_set_pending_vector(vcpu, vector);
+   set_isr = false;
   
   Weird.  Just set it normally.  Remember that reading the ISR needs to
   return the correct value.
 
  Marcelo said linux does not normally read ISR - not true?
 
 It's true and it's irrelevant.  We aren't coding a feature to what linux
 does now, but for what linux or another guest may do in the future.

Right. So you think reading ISR has value
in combination with PV EOI for future guests?
I'm not arguing either way just curious.

  Note this has no effect if the PV optimization is not enabled.
 
   We need to process the avoided EOI before any APIC read/writes, to be
   sure the guest sees the updated values.  Same for IOAPIC, EOI affects
   remote_irr.  That may been we need to sample it after every exit, or
   perhaps disable the feature for level-triggered interrupts.
 
  Disabling would be very sad.  Can we sample on remote irr read?
 
 That can be done from another vcpu.

We still can handle it, right? Where's the code that handles that read?

 Why do we care about
 level-triggered interrupts?  Everything uses MSI or edge-triggered
 IOAPIC interrupts these days.

Well lots of emulated devices don't yet.
They probably should but it's nice to be able to
test with e.g. e1000 emulation not just virtio.

Besides, kvm_get_apic_interrupt
simply doesn't know about the triggering mode at the moment.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Michael S. Tsirkin
On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
 On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
  I took a stub at implementing PV EOI using shared memory.
  This should reduce the number of exits an interrupt
  causes as much as by half.
 
  A partially complete draft for both host and guest parts
  is below.
 
  The idea is simple: there's a bit, per APIC, in guest memory,
  that tells the guest that it does not need EOI.
  We set it before injecting an interrupt and clear
  before injecting a nested one. Guest tests it using
  a test and clear operation - this is necessary
  so that host can detect interrupt nesting -
  and if set, it can skip the EOI MSR.
 
  There's a new MSR to set the address of said register
  in guest memory. Otherwise not much changes:
  - Guest EOI is not required
  - ISR is automatically cleared before injection
 
  Some things are incomplete: add feature negotiation
  options, qemu support for said options.
  No testing was done beyond compiling the kernel.
 
  I would appreciate early feedback.
 
  Signed-off-by: Michael S. Tsirkin m...@redhat.com
 
  --
 
  diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
  index d854101..8430f41 100644
  --- a/arch/x86/include/asm/apic.h
  +++ b/arch/x86/include/asm/apic.h
  @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { 
  return 0; }
   
   #endif /* CONFIG_X86_LOCAL_APIC */
   
  +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
  +
   static inline void ack_APIC_irq(void)
   {
  +   if (__test_and_clear_bit(0, __get_cpu_var(apic_eoi)))
  +   return;
  +
 
 While __test_and_clear_bit() is implemented in a single instruction,
 it's not required to be.  Better have the instruction there explicitly.
 
  /*
   * ack_APIC_irq() actually gets compiled as a single instruction
   * ... yummie.
  diff --git a/arch/x86/include/asm/kvm_host.h 
  b/arch/x86/include/asm/kvm_host.h
  index e216ba0..0ee1472 100644
  --- a/arch/x86/include/asm/kvm_host.h
  +++ b/arch/x86/include/asm/kvm_host.h
  @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
  u64 length;
  u64 status;
  } osvw;
  +
  +   struct {
  +   u64 msr_val;
  +   struct gfn_to_hva_cache data;
  +   int vector;
  +   } eoi;
   };
 
 Needs to be cleared on INIT.

You mean kvm_arch_vcpu_reset?

   
 
  @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
 smp_processor_id());
  }
   
  +   wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
  +  MSR_KVM_EOI_ENABLED);
  +
 
 Clear on kexec.

With register_reboot_notifier?

  if (has_steal_clock)
  kvm_register_steal_time();
   }
  diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
  index 8584322..9e38e12 100644
  --- a/arch/x86/kvm/lapic.c
  +++ b/arch/x86/kvm/lapic.c
  @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
  kvm_lapic_irq *irq)
  irq-level, irq-trig_mode);
   }
   
  -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
  +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
  +{
  +
  +   return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, val,
  + sizeof(val));
  +}
  +
  +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
  +{
  +
  +   return kvm_read_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, val,
  + sizeof(*val));
  +}
  +
  +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
  +{
  +   return (vcpu-arch.eoi.msr_val  MSR_KVM_EOI_ENABLED);
  +}
  +
  +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
  +{
  +   u32 val;
  +   if (eoi_get_user(vcpu, val)  0)
  +   apic_debug(Can't read EOI MSR value: 0x%llx\n,
  +  (unsigned long long)vcpi-arch.eoi.msr_val);
  +   if (!(val  0x1))
  +   vcpu-arch.eoi.vector = -1;
  +   return vcpu-arch.eoi.vector;
  +}
  +
  +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
  +{
  +   BUG_ON(vcpu-arch.eoi.vector != -1);
  +   if (eoi_put_user(vcpu, 0x1)  0) {
  +   apic_debug(Can't set EOI MSR value: 0x%llx\n,
  +  (unsigned long long)vcpi-arch.eoi.msr_val);
  +   return;
  +   }
  +   vcpu-arch.eoi.vector = vector;
  +}
  +
  +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
  +{
  +   int vector;
  +   vector = vcpu-arch.eoi.vector;
  +   if (vector != -1  eoi_put_user(vcpu, 0x0)  0) {
  +   apic_debug(Can't clear EOI MSR value: 0x%llx\n,
  +  (unsigned long long)vcpi-arch.eoi.msr_val);
  +   return -1;
  +   }
  +   vcpu-arch.eoi.vector = -1;
  +   return vector;
  +}
 
 
 
  +
  +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
   {
  int result;
   
  @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct 
  kvm_lapic *apic)
  return result;
   }
   
  +static inline int 

Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Avi Kivity
On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:
  
   Yes. But we can and it's easier than figuring out priorities.
   I am guessing such collisions are rare, right?
  
  It's pretty easy, if there is something in IRR but
  kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.

 I only see kvm_apic_has_interrupt - is this what you mean?

Yes, sorry.

It's not clear whether to do the check in kvm_apic_has_interrupt() or
kvm_apic_get_interrupt() - the latter is called only after interrupts
are enabled, so it looks like a better place (EOIs while interrupts are
disabled have no effect).  But need to make sure those functions are
actually called, since they're protected by KVM_REQ_EVENT.

   I'll add a trace to make sure.
  
 + if (v != -1)
 + apic_set_vector(v, apic-regs + 
 APIC_ISR);
 + } else {
 + eoi_set_pending_vector(vcpu, vector);
 + set_isr = false;

Weird.  Just set it normally.  Remember that reading the ISR needs to
return the correct value.
  
   Marcelo said linux does not normally read ISR - not true?
  
  It's true and it's irrelevant.  We aren't coding a feature to what linux
  does now, but for what linux or another guest may do in the future.

 Right. So you think reading ISR has value
 in combination with PV EOI for future guests?
 I'm not arguing either way just curious.

I don't.  But we need to preserve the same interface the APIC has
presented for thousands of years (well, almost).


   Note this has no effect if the PV optimization is not enabled.
  
We need to process the avoided EOI before any APIC read/writes, to be
sure the guest sees the updated values.  Same for IOAPIC, EOI affects
remote_irr.  That may been we need to sample it after every exit, or
perhaps disable the feature for level-triggered interrupts.
  
   Disabling would be very sad.  Can we sample on remote irr read?
  
  That can be done from another vcpu.

 We still can handle it, right? Where's the code that handles that read?

Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c


  Why do we care about
  level-triggered interrupts?  Everything uses MSI or edge-triggered
  IOAPIC interrupts these days.

 Well lots of emulated devices don't yet.
 They probably should but it's nice to be able to
 test with e.g. e1000 emulation not just virtio.


e1000 doesn't support msi?


 Besides, kvm_get_apic_interrupt
 simply doesn't know about the triggering mode at the moment.



-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Michael S. Tsirkin
On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote:
 On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:
   
Yes. But we can and it's easier than figuring out priorities.
I am guessing such collisions are rare, right?
   
   It's pretty easy, if there is something in IRR but
   kvm_lapic_has_interrupt() returns -1, then we need to disable eoi 
   avoidance.
 
  I only see kvm_apic_has_interrupt - is this what you mean?
 
 Yes, sorry.
 
 It's not clear whether to do the check in kvm_apic_has_interrupt() or
 kvm_apic_get_interrupt() - the latter is called only after interrupts
 are enabled, so it looks like a better place (EOIs while interrupts are
 disabled have no effect).  But need to make sure those functions are
 actually called, since they're protected by KVM_REQ_EVENT.

Sorry not sure what you mean by make sure - read the code carefully?

I'll add a trace to make sure.
   
  +   if (v != -1)
  +   apic_set_vector(v, apic-regs + 
  APIC_ISR);
  +   } else {
  +   eoi_set_pending_vector(vcpu, vector);
  +   set_isr = false;
 
 Weird.  Just set it normally.  Remember that reading the ISR needs to
 return the correct value.
   
Marcelo said linux does not normally read ISR - not true?
   
   It's true and it's irrelevant.  We aren't coding a feature to what linux
   does now, but for what linux or another guest may do in the future.
 
  Right. So you think reading ISR has value
  in combination with PV EOI for future guests?
  I'm not arguing either way just curious.
 
 I don't.  But we need to preserve the same interface the APIC has
 presented for thousands of years (well, almost).


Talk about overstatements :)

 
Note this has no effect if the PV optimization is not enabled.
   
 We need to process the avoided EOI before any APIC read/writes, to be
 sure the guest sees the updated values.  Same for IOAPIC, EOI affects
 remote_irr.  That may been we need to sample it after every exit, or
 perhaps disable the feature for level-triggered interrupts.
   
Disabling would be very sad.  Can we sample on remote irr read?
   
   That can be done from another vcpu.
 
  We still can handle it, right? Where's the code that handles that read?
 
 Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c

Hmm. Disabling for level handles the ack notifiers
issue as well, which I forgot about.
It's a tough call. You think looking at
TMR in kvm_get_apic_interrupt is safe?

 
   Why do we care about
   level-triggered interrupts?  Everything uses MSI or edge-triggered
   IOAPIC interrupts these days.
 
  Well lots of emulated devices don't yet.
  They probably should but it's nice to be able to
  test with e.g. e1000 emulation not just virtio.
 
 
 e1000 doesn't support msi?

qemu emulation doesn't.

 
  Besides, kvm_get_apic_interrupt
  simply doesn't know about the triggering mode at the moment.
 
 
 
 -- 
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Avi Kivity
On 04/10/2012 06:14 PM, Michael S. Tsirkin wrote:
 On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote:
  On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:

 Yes. But we can and it's easier than figuring out priorities.
 I am guessing such collisions are rare, right?

It's pretty easy, if there is something in IRR but
kvm_lapic_has_interrupt() returns -1, then we need to disable eoi 
avoidance.
  
   I only see kvm_apic_has_interrupt - is this what you mean?
  
  Yes, sorry.
  
  It's not clear whether to do the check in kvm_apic_has_interrupt() or
  kvm_apic_get_interrupt() - the latter is called only after interrupts
  are enabled, so it looks like a better place (EOIs while interrupts are
  disabled have no effect).  But need to make sure those functions are
  actually called, since they're protected by KVM_REQ_EVENT.

 Sorry not sure what you mean by make sure - read the code carefully?

Yes.  And I mean, get called at the right time.

  
  Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c

 Hmm. Disabling for level handles the ack notifiers
 issue as well, which I forgot about.
 It's a tough call. You think looking at
 TMR in kvm_get_apic_interrupt is safe?

Yes, it's read only from the guest point of view IIRC.

  
Why do we care about
level-triggered interrupts?  Everything uses MSI or edge-triggered
IOAPIC interrupts these days.
  
   Well lots of emulated devices don't yet.
   They probably should but it's nice to be able to
   test with e.g. e1000 emulation not just virtio.
  
  
  e1000 doesn't support msi?

 qemu emulation doesn't.


Can be changed if someone's really interested.  But really, avoiding
EOIs for e1000 won't help it much.

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Michael S. Tsirkin
On Tue, Apr 10, 2012 at 07:08:26PM +0300, Avi Kivity wrote:
 On 04/10/2012 06:14 PM, Michael S. Tsirkin wrote:
  On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote:
   On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:
 
  Yes. But we can and it's easier than figuring out priorities.
  I am guessing such collisions are rare, right?
 
 It's pretty easy, if there is something in IRR but
 kvm_lapic_has_interrupt() returns -1, then we need to disable eoi 
 avoidance.
   
I only see kvm_apic_has_interrupt - is this what you mean?
   
   Yes, sorry.
   
   It's not clear whether to do the check in kvm_apic_has_interrupt() or
   kvm_apic_get_interrupt() - the latter is called only after interrupts
   are enabled, so it looks like a better place (EOIs while interrupts are
   disabled have no effect).  But need to make sure those functions are
   actually called, since they're protected by KVM_REQ_EVENT.
 
  Sorry not sure what you mean by make sure - read the code carefully?
 
 Yes.  And I mean, get called at the right time.

OK, Review will help here.

   
   Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c
 
  Hmm. Disabling for level handles the ack notifiers
  issue as well, which I forgot about.
  It's a tough call. You think looking at
  TMR in kvm_get_apic_interrupt is safe?
 
 Yes, it's read only from the guest point of view IIRC.
 
   
 Why do we care about
 level-triggered interrupts?  Everything uses MSI or edge-triggered
 IOAPIC interrupts these days.
   
Well lots of emulated devices don't yet.
They probably should but it's nice to be able to
test with e.g. e1000 emulation not just virtio.
   
   
   e1000 doesn't support msi?
 
  qemu emulation doesn't.
 
 
 Can be changed if someone's really interested.  But really, avoiding
 EOIs for e1000 won't help it much.

It will help test EOI avoidance.

 -- 
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Gleb Natapov
Heh, I was working on that too.

On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote:
 On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
  On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
   I took a stub at implementing PV EOI using shared memory.
   This should reduce the number of exits an interrupt
   causes as much as by half.
  
   A partially complete draft for both host and guest parts
   is below.
  
   The idea is simple: there's a bit, per APIC, in guest memory,
   that tells the guest that it does not need EOI.
   We set it before injecting an interrupt and clear
   before injecting a nested one. Guest tests it using
   a test and clear operation - this is necessary
   so that host can detect interrupt nesting -
   and if set, it can skip the EOI MSR.
  
   There's a new MSR to set the address of said register
   in guest memory. Otherwise not much changes:
   - Guest EOI is not required
   - ISR is automatically cleared before injection
  
   Some things are incomplete: add feature negotiation
   options, qemu support for said options.
   No testing was done beyond compiling the kernel.
  
   I would appreciate early feedback.
  
   Signed-off-by: Michael S. Tsirkin m...@redhat.com
  
   --
  
   diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
   index d854101..8430f41 100644
   --- a/arch/x86/include/asm/apic.h
   +++ b/arch/x86/include/asm/apic.h
   @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { 
   return 0; }

#endif /* CONFIG_X86_LOCAL_APIC */

   +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
   +
static inline void ack_APIC_irq(void)
{
   + if (__test_and_clear_bit(0, __get_cpu_var(apic_eoi)))
   + return;
   +
  
  While __test_and_clear_bit() is implemented in a single instruction,
  it's not required to be.  Better have the instruction there explicitly.
  
 /*
  * ack_APIC_irq() actually gets compiled as a single instruction
  * ... yummie.
   diff --git a/arch/x86/include/asm/kvm_host.h 
   b/arch/x86/include/asm/kvm_host.h
   index e216ba0..0ee1472 100644
   --- a/arch/x86/include/asm/kvm_host.h
   +++ b/arch/x86/include/asm/kvm_host.h
   @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
 u64 length;
 u64 status;
 } osvw;
   +
   + struct {
   + u64 msr_val;
   + struct gfn_to_hva_cache data;
   + int vector;
   + } eoi;
};
  
  Needs to be cleared on INIT.
 
 You mean kvm_arch_vcpu_reset?
 

  
   @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
smp_processor_id());
 }

   + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
   +MSR_KVM_EOI_ENABLED);
   +
  
  Clear on kexec.
 
 With register_reboot_notifier?
 
 if (has_steal_clock)
 kvm_register_steal_time();
}
   diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
   index 8584322..9e38e12 100644
   --- a/arch/x86/kvm/lapic.c
   +++ b/arch/x86/kvm/lapic.c
   @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
   kvm_lapic_irq *irq)
 irq-level, irq-trig_mode);
}

   -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
   +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
   +{
   +
   + return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, val,
   +   sizeof(val));
   +}
   +
   +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
   +{
   +
   + return kvm_read_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, val,
   +   sizeof(*val));
   +}
   +
   +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
   +{
   + return (vcpu-arch.eoi.msr_val  MSR_KVM_EOI_ENABLED);
   +}
   +
   +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
   +{
   + u32 val;
   + if (eoi_get_user(vcpu, val)  0)
   + apic_debug(Can't read EOI MSR value: 0x%llx\n,
   +(unsigned long long)vcpi-arch.eoi.msr_val);
   + if (!(val  0x1))
   + vcpu-arch.eoi.vector = -1;
   + return vcpu-arch.eoi.vector;
   +}
   +
   +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
   +{
   + BUG_ON(vcpu-arch.eoi.vector != -1);
   + if (eoi_put_user(vcpu, 0x1)  0) {
   + apic_debug(Can't set EOI MSR value: 0x%llx\n,
   +(unsigned long long)vcpi-arch.eoi.msr_val);
   + return;
   + }
   + vcpu-arch.eoi.vector = vector;
   +}
   +
   +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
   +{
   + int vector;
   + vector = vcpu-arch.eoi.vector;
   + if (vector != -1  eoi_put_user(vcpu, 0x0)  0) {
   + apic_debug(Can't clear EOI MSR value: 0x%llx\n,
   +(unsigned long long)vcpi-arch.eoi.msr_val);
   + return -1;
   + }
   + vcpu-arch.eoi.vector = -1;
   + return vector;
   +}
  
  
  
   +
   +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
{
 int 

Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Michael S. Tsirkin
On Tue, Apr 10, 2012 at 08:59:21PM +0300, Gleb Natapov wrote:
 Heh, I was working on that too.
 
 On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote:
  On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
   On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
I took a stub at implementing PV EOI using shared memory.
This should reduce the number of exits an interrupt
causes as much as by half.
   
A partially complete draft for both host and guest parts
is below.
   
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
We set it before injecting an interrupt and clear
before injecting a nested one. Guest tests it using
a test and clear operation - this is necessary
so that host can detect interrupt nesting -
and if set, it can skip the EOI MSR.
   
There's a new MSR to set the address of said register
in guest memory. Otherwise not much changes:
- Guest EOI is not required
- ISR is automatically cleared before injection
   
Some things are incomplete: add feature negotiation
options, qemu support for said options.
No testing was done beyond compiling the kernel.
   
I would appreciate early feedback.
   
Signed-off-by: Michael S. Tsirkin m...@redhat.com
   
--
   
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d854101..8430f41 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { 
return 0; }
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
+
 static inline void ack_APIC_irq(void)
 {
+   if (__test_and_clear_bit(0, __get_cpu_var(apic_eoi)))
+   return;
+
   
   While __test_and_clear_bit() is implemented in a single instruction,
   it's not required to be.  Better have the instruction there explicitly.
   
/*
 * ack_APIC_irq() actually gets compiled as a single instruction
 * ... yummie.
diff --git a/arch/x86/include/asm/kvm_host.h 
b/arch/x86/include/asm/kvm_host.h
index e216ba0..0ee1472 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
u64 length;
u64 status;
} osvw;
+
+   struct {
+   u64 msr_val;
+   struct gfn_to_hva_cache data;
+   int vector;
+   } eoi;
 };
   
   Needs to be cleared on INIT.
  
  You mean kvm_arch_vcpu_reset?
  
 
   
@@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
   smp_processor_id());
}
 
+   wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
+  MSR_KVM_EOI_ENABLED);
+
   
   Clear on kexec.
  
  With register_reboot_notifier?
  
if (has_steal_clock)
kvm_register_steal_time();
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8584322..9e38e12 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
kvm_lapic_irq *irq)
irq-level, irq-trig_mode);
 }
 
-static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
+{
+
+   return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, 
val,
+ sizeof(val));
+}
+
+static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
+{
+
+   return kvm_read_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, 
val,
+ sizeof(*val));
+}
+
+static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
+{
+   return (vcpu-arch.eoi.msr_val  MSR_KVM_EOI_ENABLED);
+}
+
+static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
+{
+   u32 val;
+   if (eoi_get_user(vcpu, val)  0)
+   apic_debug(Can't read EOI MSR value: 0x%llx\n,
+  (unsigned long long)vcpi-arch.eoi.msr_val);
+   if (!(val  0x1))
+   vcpu-arch.eoi.vector = -1;
+   return vcpu-arch.eoi.vector;
+}
+
+static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
+{
+   BUG_ON(vcpu-arch.eoi.vector != -1);
+   if (eoi_put_user(vcpu, 0x1)  0) {
+   apic_debug(Can't set EOI MSR value: 0x%llx\n,
+  (unsigned long long)vcpi-arch.eoi.msr_val);
+   return;
+   }
+   vcpu-arch.eoi.vector = vector;
+}
+
+static int 

Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Gleb Natapov
On Tue, Apr 10, 2012 at 10:30:04PM +0300, Michael S. Tsirkin wrote:
 On Tue, Apr 10, 2012 at 08:59:21PM +0300, Gleb Natapov wrote:
  Heh, I was working on that too.
  
  On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote:
   On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
 I took a stub at implementing PV EOI using shared memory.
 This should reduce the number of exits an interrupt
 causes as much as by half.

 A partially complete draft for both host and guest parts
 is below.

 The idea is simple: there's a bit, per APIC, in guest memory,
 that tells the guest that it does not need EOI.
 We set it before injecting an interrupt and clear
 before injecting a nested one. Guest tests it using
 a test and clear operation - this is necessary
 so that host can detect interrupt nesting -
 and if set, it can skip the EOI MSR.

 There's a new MSR to set the address of said register
 in guest memory. Otherwise not much changes:
 - Guest EOI is not required
 - ISR is automatically cleared before injection

 Some things are incomplete: add feature negotiation
 options, qemu support for said options.
 No testing was done beyond compiling the kernel.

 I would appreciate early feedback.

 Signed-off-by: Michael S. Tsirkin m...@redhat.com

 --

 diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
 index d854101..8430f41 100644
 --- a/arch/x86/include/asm/apic.h
 +++ b/arch/x86/include/asm/apic.h
 @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) 
 { return 0; }
  
  #endif /* CONFIG_X86_LOCAL_APIC */
  
 +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
 +
  static inline void ack_APIC_irq(void)
  {
 + if (__test_and_clear_bit(0, __get_cpu_var(apic_eoi)))
 + return;
 +

While __test_and_clear_bit() is implemented in a single instruction,
it's not required to be.  Better have the instruction there explicitly.

   /*
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
 diff --git a/arch/x86/include/asm/kvm_host.h 
 b/arch/x86/include/asm/kvm_host.h
 index e216ba0..0ee1472 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
   u64 length;
   u64 status;
   } osvw;
 +
 + struct {
 + u64 msr_val;
 + struct gfn_to_hva_cache data;
 + int vector;
 + } eoi;
  };

Needs to be cleared on INIT.
   
   You mean kvm_arch_vcpu_reset?
   
  

 @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
  smp_processor_id());
   }
  
 + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
 +MSR_KVM_EOI_ENABLED);
 +

Clear on kexec.
   
   With register_reboot_notifier?
   
   if (has_steal_clock)
   kvm_register_steal_time();
  }
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 8584322..9e38e12 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, 
 struct kvm_lapic_irq *irq)
   irq-level, irq-trig_mode);
  }
  
 -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
 +{
 +
 + return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, 
 val,
 +   sizeof(val));
 +}
 +
 +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
 +{
 +
 + return kvm_read_guest_cached(vcpu-kvm, vcpu-arch.eoi.data, 
 val,
 +   sizeof(*val));
 +}
 +
 +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
 +{
 + return (vcpu-arch.eoi.msr_val  MSR_KVM_EOI_ENABLED);
 +}
 +
 +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
 +{
 + u32 val;
 + if (eoi_get_user(vcpu, val)  0)
 + apic_debug(Can't read EOI MSR value: 0x%llx\n,
 +(unsigned long long)vcpi-arch.eoi.msr_val);
 + if (!(val  0x1))
 + vcpu-arch.eoi.vector = -1;
 + return vcpu-arch.eoi.vector;
 +}
 +
 +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
 +{
 + BUG_ON(vcpu-arch.eoi.vector != -1);
 + if (eoi_put_user(vcpu, 0x1)  0) {
 + apic_debug(Can't set EOI MSR value: 0x%llx\n,
 +(unsigned long 

Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Michael S. Tsirkin
On Tue, Apr 10, 2012 at 10:33:54PM +0300, Gleb Natapov wrote:
  We don't try to match what HV does 100% anyway.
  
 We should. The same code will be used for HV.

Only where it makes sense, that is where the functionality
is sufficiently similar.

   We have to notify IOAPIC about EOI ASAP. It
   may hold another interrupt for us that has to be delivered.
  
  You mean the ack notifiers? We can skip just for the vectors
  which have ack notifiers or only if there are no notifiers.
  
 No. I mean:
 
 if (!ent-fields.mask  (ioapic-irr  (1  i)))
 ioapic_service(ioapic, i);

Hmm.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

2012-04-10 Thread Gleb Natapov
On Tue, Apr 10, 2012 at 10:40:14PM +0300, Michael S. Tsirkin wrote:
 On Tue, Apr 10, 2012 at 10:33:54PM +0300, Gleb Natapov wrote:
   We don't try to match what HV does 100% anyway.
   
  We should. The same code will be used for HV.
 
 Only where it makes sense, that is where the functionality
 is sufficiently similar.
 
You can sprinkle additional ifs in the code, but I do not see the point.

We have to notify IOAPIC about EOI ASAP. It
may hold another interrupt for us that has to be delivered.
   
   You mean the ack notifiers? We can skip just for the vectors
   which have ack notifiers or only if there are no notifiers.
   
  No. I mean:
  
  if (!ent-fields.mask  (ioapic-irr  (1  i)))
  ioapic_service(ioapic, i);
 
 Hmm.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html