Re: [PATCHv5 5/5] kvm: host side for eoi optimization

2012-05-31 Thread Gleb Natapov
On Tue, May 22, 2012 at 05:05:47PM +0300, Michael S. Tsirkin wrote:
 Implementation of PV EOI using shared memory.
 This reduces the number of exits an interrupt
 causes as much as by half.
 
 The idea is simple: there's a bit, per APIC, in guest memory,
 that tells the guest that it does not need EOI.
 We set it before injecting an interrupt and clear
 before injecting a nested one. Guest tests it using
 a test and clear operation - this is necessary
 so that host can detect interrupt nesting -
 and if set, it can skip the EOI MSR.
 
 There's a new MSR to set the address of said register
 in guest memory. Otherwise not much changed:
 - Guest EOI is not required
 - Register is tested  ISR is automatically cleared on exit
 
 For testing results see description of previous patch
 'kvm_para: guest side for eoi avoidance'.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 ---
  arch/x86/include/asm/kvm_host.h |   12 
  arch/x86/kvm/cpuid.c|1 +
  arch/x86/kvm/lapic.c|  135 +-
  arch/x86/kvm/lapic.h|2 +
  arch/x86/kvm/trace.h|   34 ++
  arch/x86/kvm/x86.c  |5 ++
  6 files changed, 185 insertions(+), 4 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 32297f2..3ded418 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -174,6 +174,13 @@ enum {
  
  /* apic attention bits */
  #define KVM_APIC_CHECK_VAPIC 0
 +/*
 + * The following bit is set with PV-EOI, unset on EOI.
 + * We detect PV-EOI changes by guest by comparing
 + * this bit with PV-EOI in guest memory.
 + * See the implementation in apic_update_pv_eoi.
 + */ 
 +#define KVM_APIC_PV_EOI_PENDING  1
  
  /*
   * We don't want allocation failures within the mmu code, so we preallocate
 @@ -485,6 +492,11 @@ struct kvm_vcpu_arch {
   u64 length;
   u64 status;
   } osvw;
 +
 + struct {
 + u64 msr_val;
 + struct gfn_to_hva_cache data;
 + } pv_eoi;
  };
  
  struct kvm_lpage_info {
 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
 index eba8345..a9f155a 100644
 --- a/arch/x86/kvm/cpuid.c
 +++ b/arch/x86/kvm/cpuid.c
 @@ -408,6 +408,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, 
 u32 function,
(1  KVM_FEATURE_NOP_IO_DELAY) |
(1  KVM_FEATURE_CLOCKSOURCE2) |
(1  KVM_FEATURE_ASYNC_PF) |
 +  (1  KVM_FEATURE_PV_EOI) |
(1  KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
  
   if (sched_info_on())
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 0d2985d..9d804e7 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -309,6 +309,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
 kvm_lapic_irq *irq)
   irq-level, irq-trig_mode);
  }
  
 +static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 +{
 +
 + return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.pv_eoi.data, val,
 +   sizeof(val));
 +}
 +
 +static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 +{
 +
 + return kvm_read_guest_cached(vcpu-kvm, vcpu-arch.pv_eoi.data, val,
 +   sizeof(*val));
 +}
 +
 +static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 +{
 + return vcpu-arch.pv_eoi.msr_val  KVM_MSR_ENABLED;
 +}
 +
 +static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 +{
 + u8 val;
 + if (pv_eoi_get_user(vcpu, val)  0)
 + apic_debug(Can't read EOI MSR value: 0x%llx\n,
 +(unsigned long long)vcpi-arch.pv_eoi.msr_val);
 + return val  0x1;
 +}
 +
 +static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 +{
 + if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED)  0) {
 + apic_debug(Can't set EOI MSR value: 0x%llx\n,
 +(unsigned long long)vcpi-arch.pv_eoi.msr_val);
 + return;
 + }
 + __set_bit(KVM_APIC_PV_EOI_PENDING, vcpu-arch.apic_attention);
 +}
 +
 +static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 +{
 + if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED)  0) {
 + apic_debug(Can't clear EOI MSR value: 0x%llx\n,
 +(unsigned long long)vcpi-arch.pv_eoi.msr_val);
 + return;
 + }
 + __clear_bit(KVM_APIC_PV_EOI_PENDING, vcpu-arch.apic_attention);
 +}
 +
  static inline int apic_find_highest_isr(struct kvm_lapic *apic)
  {
   int result;
 @@ -525,15 +573,18 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, 
 struct kvm_vcpu *vcpu2)
   return vcpu1-arch.apic_arb_prio - vcpu2-arch.apic_arb_prio;
  }
  
 -static void apic_set_eoi(struct kvm_lapic *apic)
 +static int apic_set_eoi(struct kvm_lapic *apic)
  {
   int vector = apic_find_highest_isr(apic);
 +
 + trace_kvm_eoi(apic, vector);

Re: [PATCHv5 5/5] kvm: host side for eoi optimization

2012-05-31 Thread Michael S. Tsirkin
On Thu, May 31, 2012 at 12:57:10PM +0300, Gleb Natapov wrote:
  @@ -1345,17 +1437,40 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu 
  *vcpu)
  apic_set_tpr(vcpu-arch.apic, data  0xff);
   }
   
  +/*
  + * apic_sync_pv_eoi_to_guest - called before vmentry
  + *
  + * Detect whether it's safe to enable PV EOI and
  + * if yes do so.
  + */
  +static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
  +   struct kvm_lapic *apic)
  +{
  +   if (!pv_eoi_enabled(vcpu) ||
  +   /* IRR set or many bits in ISR: could be nested. */
  +   unlikely(apic-irr_pending) ||
  +   unlikely(apic-isr_count != 1) ||
 Remind me why pv_eoi should not be set if there is more than one isr?

There's a comment below: it might be safe but
we do not bother: no easy way to know which interrupt
has higher priority.

In my testing more than one bit almost never happens in practice so not
worth optimizing for.


 
  +   /* Cache not set: safe but we don't bother. */
  +   unlikely(apic-isr_cache == -1) ||
  +   /* Need EOI to update ioapic. */
  +   unlikely(kvm_ioapic_handles_vector(vcpu-kvm, apic-isr_cache)))
  +   return;
  +
  +   pv_eoi_set_pending(apic-vcpu);
  +}
  +
 apic_sync_pv_eoi_to_guest() is not paired with
 apic_sync_pv_eoi_from_guest() if event injection is canceled.
 You can enter guest with stale pv_eoi bit.

Never. The pv_eoi bit is cleared on each exit.
It will stay cleared unless we set it here.
I will add a comment.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv5 5/5] kvm: host side for eoi optimization

2012-05-31 Thread Gleb Natapov
On Thu, May 31, 2012 at 01:11:11PM +0300, Michael S. Tsirkin wrote:
 On Thu, May 31, 2012 at 12:57:10PM +0300, Gleb Natapov wrote:
   @@ -1345,17 +1437,40 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu 
   *vcpu)
 apic_set_tpr(vcpu-arch.apic, data  0xff);
}

   +/*
   + * apic_sync_pv_eoi_to_guest - called before vmentry
   + *
   + * Detect whether it's safe to enable PV EOI and
   + * if yes do so.
   + */
   +static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
   + struct kvm_lapic *apic)
   +{
   + if (!pv_eoi_enabled(vcpu) ||
   + /* IRR set or many bits in ISR: could be nested. */
   + unlikely(apic-irr_pending) ||
   + unlikely(apic-isr_count != 1) ||
  Remind me why pv_eoi should not be set if there is more than one isr?
 
 There's a comment below: it might be safe but
 we do not bother: no easy way to know which interrupt
 has higher priority.
Last injected interrupt has highest priority.

 
 In my testing more than one bit almost never happens in practice so not
 worth optimizing for.
 
 
  
   + /* Cache not set: safe but we don't bother. */
   + unlikely(apic-isr_cache == -1) ||
   + /* Need EOI to update ioapic. */
   + unlikely(kvm_ioapic_handles_vector(vcpu-kvm, apic-isr_cache)))
   + return;
   +
   + pv_eoi_set_pending(apic-vcpu);
   +}
   +
  apic_sync_pv_eoi_to_guest() is not paired with
  apic_sync_pv_eoi_from_guest() if event injection is canceled.
  You can enter guest with stale pv_eoi bit.
 
 Never. The pv_eoi bit is cleared on each exit.
There will be no exit since there will be no entry. Search for
goto out after kvm_lapic_sync_to_vapic().

 It will stay cleared unless we set it here.
 I will add a comment.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv5 5/5] kvm: host side for eoi optimization

2012-05-31 Thread Michael S. Tsirkin
On Thu, May 31, 2012 at 01:15:13PM +0300, Gleb Natapov wrote:
+   /* Cache not set: safe but we don't bother. */
+   unlikely(apic-isr_cache == -1) ||
+   /* Need EOI to update ioapic. */
+   unlikely(kvm_ioapic_handles_vector(vcpu-kvm, 
apic-isr_cache)))
+   return;
+
+   pv_eoi_set_pending(apic-vcpu);
+}
+
   apic_sync_pv_eoi_to_guest() is not paired with
   apic_sync_pv_eoi_from_guest() if event injection is canceled.
   You can enter guest with stale pv_eoi bit.
  
  Never. The pv_eoi bit is cleared on each exit.
 There will be no exit since there will be no entry. Search for
 goto out after kvm_lapic_sync_to_vapic().

I think you've found a bug, thanks a bunch.
I have fixed it but the x86 guys asked me not to
post more patches until merge window closes :(
So I'll sit on a fix for several days.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv5 5/5] kvm: host side for eoi optimization

2012-05-22 Thread Michael S. Tsirkin
Implementation of PV EOI using shared memory.
This reduces the number of exits an interrupt
causes as much as by half.

The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
We set it before injecting an interrupt and clear
before injecting a nested one. Guest tests it using
a test and clear operation - this is necessary
so that host can detect interrupt nesting -
and if set, it can skip the EOI MSR.

There's a new MSR to set the address of said register
in guest memory. Otherwise not much changed:
- Guest EOI is not required
- Register is tested  ISR is automatically cleared on exit

For testing results see description of previous patch
'kvm_para: guest side for eoi avoidance'.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 arch/x86/include/asm/kvm_host.h |   12 
 arch/x86/kvm/cpuid.c|1 +
 arch/x86/kvm/lapic.c|  135 +-
 arch/x86/kvm/lapic.h|2 +
 arch/x86/kvm/trace.h|   34 ++
 arch/x86/kvm/x86.c  |5 ++
 6 files changed, 185 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 32297f2..3ded418 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -174,6 +174,13 @@ enum {
 
 /* apic attention bits */
 #define KVM_APIC_CHECK_VAPIC   0
+/*
+ * The following bit is set with PV-EOI, unset on EOI.
+ * We detect PV-EOI changes by guest by comparing
+ * this bit with PV-EOI in guest memory.
+ * See the implementation in apic_update_pv_eoi.
+ */ 
+#define KVM_APIC_PV_EOI_PENDING1
 
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
@@ -485,6 +492,11 @@ struct kvm_vcpu_arch {
u64 length;
u64 status;
} osvw;
+
+   struct {
+   u64 msr_val;
+   struct gfn_to_hva_cache data;
+   } pv_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index eba8345..a9f155a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -408,6 +408,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 
function,
 (1  KVM_FEATURE_NOP_IO_DELAY) |
 (1  KVM_FEATURE_CLOCKSOURCE2) |
 (1  KVM_FEATURE_ASYNC_PF) |
+(1  KVM_FEATURE_PV_EOI) |
 (1  KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
 
if (sched_info_on())
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0d2985d..9d804e7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -309,6 +309,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct 
kvm_lapic_irq *irq)
irq-level, irq-trig_mode);
 }
 
+static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
+{
+
+   return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.pv_eoi.data, val,
+ sizeof(val));
+}
+
+static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
+{
+
+   return kvm_read_guest_cached(vcpu-kvm, vcpu-arch.pv_eoi.data, val,
+ sizeof(*val));
+}
+
+static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.pv_eoi.msr_val  KVM_MSR_ENABLED;
+}
+
+static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
+{
+   u8 val;
+   if (pv_eoi_get_user(vcpu, val)  0)
+   apic_debug(Can't read EOI MSR value: 0x%llx\n,
+  (unsigned long long)vcpi-arch.pv_eoi.msr_val);
+   return val  0x1;
+}
+
+static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
+{
+   if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED)  0) {
+   apic_debug(Can't set EOI MSR value: 0x%llx\n,
+  (unsigned long long)vcpi-arch.pv_eoi.msr_val);
+   return;
+   }
+   __set_bit(KVM_APIC_PV_EOI_PENDING, vcpu-arch.apic_attention);
+}
+
+static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
+{
+   if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED)  0) {
+   apic_debug(Can't clear EOI MSR value: 0x%llx\n,
+  (unsigned long long)vcpi-arch.pv_eoi.msr_val);
+   return;
+   }
+   __clear_bit(KVM_APIC_PV_EOI_PENDING, vcpu-arch.apic_attention);
+}
+
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 {
int result;
@@ -525,15 +573,18 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct 
kvm_vcpu *vcpu2)
return vcpu1-arch.apic_arb_prio - vcpu2-arch.apic_arb_prio;
 }
 
-static void apic_set_eoi(struct kvm_lapic *apic)
+static int apic_set_eoi(struct kvm_lapic *apic)
 {
int vector = apic_find_highest_isr(apic);
+
+   trace_kvm_eoi(apic, vector);
+
/*
 * Not every write EOI will has corresponding ISR,
 * one example is when Kernel check timer on