Re: [PATCH v2 2/2] KVM: nVMX: fix acknowledge interrupt on exit when APICv is in use

2014-08-05 Thread Paolo Bonzini
Il 05/08/2014 06:42, Wanpeng Li ha scritto:
 After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
 if L1 asks us to), Acknowledge interrupt on exit behavior can be
 emulated. To do so, KVM will ask the APIC for the interrupt vector if
 during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
 kvm_get_apic_interrupt would return -1 and give the following WARNING:
 
 Call Trace:
  [81493563] dump_stack+0x49/0x5e
  [8103f0eb] warn_slowpath_common+0x7c/0x96
  [a059709a] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [8103f11a] warn_slowpath_null+0x15/0x17
  [a059709a] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [a0594295] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
  [a0537931] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
  [a05972ec] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
  [a051ebe9] inject_pending_event+0xd0/0x16e [kvm]
  [a051efa0] vcpu_enter_guest+0x319/0x704 [kvm]
 
 If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
 But when L2 is running, external interrupt will casue L1 vmexit with
 reason external interrupt. Then L1 will pick up the interrupt through
 vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
 L1 is running, so APIC-v hardware still will do vEOI updating. The problem
 is that the interrupt is delivered not through APIC-v hardware, this means
 SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
 updating. The solution is that, when L1 tried to pick up the interrupt
 from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
 sure the following vEOI updating and vPPR updating corrently.
 
 Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
 not cleare vIRR and hypervisor need to clear it before L1 running.
 
 Suggested-by: Paolo Bonzini pbonz...@redhat.com
 Suggested-by: Zhang, Yang Z yang.z.zh...@intel.com
 Tested-by: Liu, RongrongX rongrongx@intel.com
 Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 ---
 v1 - v2:
  * reusing kvm_get_apic_interrupt here (by modifying kvm_cpu_get_interrupt, 
apic_set_isr and apic_clear_irr)
 
  arch/x86/kvm/irq.c   |  2 +-
  arch/x86/kvm/lapic.c | 52 
 +++-
  2 files changed, 40 insertions(+), 14 deletions(-)
 
 diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
 index bd0da43..a1ec6a5 100644
 --- a/arch/x86/kvm/irq.c
 +++ b/arch/x86/kvm/irq.c
 @@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
  
   vector = kvm_cpu_get_extint(v);
  
 - if (kvm_apic_vid_enabled(v-kvm) || vector != -1)
 + if (vector != -1)
   return vector;  /* PIC */
  
   return kvm_get_apic_interrupt(v);   /* APIC */
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 3855103..08e8a89 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct 
 kvm_lapic *apic)
  
  static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
  {
 - apic-irr_pending = false;
 + struct kvm_vcpu *vcpu;
 +
 + vcpu = apic-vcpu;
 +
   apic_clear_vector(vec, apic-regs + APIC_IRR);
 - if (apic_search_irr(apic) != -1)
 - apic-irr_pending = true;
 + if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
 + /* try to update RVI */
 + kvm_make_request(KVM_REQ_EVENT, vcpu);
 + else {
 + vec = apic_search_irr(apic);
 + apic-irr_pending = (vec != -1);
 + }
  }
  
  static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
  {
 - /* Note that we never get here with APIC virtualization enabled.  */
 + struct kvm_vcpu *vcpu;
 +
 + if (__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
 + return;
 +
 + vcpu = apic-vcpu;
  
 - if (!__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
 - ++apic-isr_count;
 - BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
   /*
 -  * ISR (in service register) bit is set when injecting an interrupt.
 -  * The highest vector is injected. Thus the latest bit set matches
 -  * the highest bit in ISR.
 +  * With APIC virtualization enabled, all caching is disabled
 +  * because the processor can modify ISR under the hood.  Instead
 +  * just set SVI.
*/
 - apic-highest_isr_cache = vec;
 + if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
 + kvm_x86_ops-hwapic_isr_update(vcpu-kvm, vec);
 + else {
 + ++apic-isr_count;
 + BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
 + /*
 +  * ISR (in service register) bit is set when injecting an 
 interrupt.
 +  * The highest vector is injected. Thus the latest bit set 
 matches
 +  * the highest bit in ISR.
 +  */
 + apic-highest_isr_cache = vec;
 +   

Re: [PATCH v2 2/2] KVM: nVMX: fix acknowledge interrupt on exit when APICv is in use

2014-08-05 Thread Felipe Reyes

Hi,

On 08/05/2014 01:04 PM, Paolo Bonzini wrote:

Il 05/08/2014 06:42, Wanpeng Li ha scritto:

After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
if L1 asks us to), Acknowledge interrupt on exit behavior can be
emulated. To do so, KVM will ask the APIC for the interrupt vector if
during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
kvm_get_apic_interrupt would return -1 and give the following WARNING:

Call Trace:
  [81493563] dump_stack+0x49/0x5e
  [8103f0eb] warn_slowpath_common+0x7c/0x96
  [a059709a] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [8103f11a] warn_slowpath_null+0x15/0x17
  [a059709a] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [a0594295] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
  [a0537931] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
  [a05972ec] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
  [a051ebe9] inject_pending_event+0xd0/0x16e [kvm]
  [a051efa0] vcpu_enter_guest+0x319/0x704 [kvm]

If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
But when L2 is running, external interrupt will casue L1 vmexit with
reason external interrupt. Then L1 will pick up the interrupt through
vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
L1 is running, so APIC-v hardware still will do vEOI updating. The problem
is that the interrupt is delivered not through APIC-v hardware, this means
SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
updating. The solution is that, when L1 tried to pick up the interrupt
from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
sure the following vEOI updating and vPPR updating corrently.

Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
not cleare vIRR and hypervisor need to clear it before L1 running.

Suggested-by: Paolo Bonzini pbonz...@redhat.com
Suggested-by: Zhang, Yang Z yang.z.zh...@intel.com
Tested-by: Liu, RongrongX rongrongx@intel.com
Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
---
v1 - v2:
  * reusing kvm_get_apic_interrupt here (by modifying kvm_cpu_get_interrupt,
apic_set_isr and apic_clear_irr)

  arch/x86/kvm/irq.c   |  2 +-
  arch/x86/kvm/lapic.c | 52 +++-
  2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index bd0da43..a1ec6a5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)

vector = kvm_cpu_get_extint(v);

-   if (kvm_apic_vid_enabled(v-kvm) || vector != -1)
+   if (vector != -1)
return vector;  /* PIC */

return kvm_get_apic_interrupt(v);   /* APIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3855103..08e8a89 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct kvm_lapic 
*apic)

  static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
  {
-   apic-irr_pending = false;
+   struct kvm_vcpu *vcpu;
+
+   vcpu = apic-vcpu;
+
apic_clear_vector(vec, apic-regs + APIC_IRR);
-   if (apic_search_irr(apic) != -1)
-   apic-irr_pending = true;
+   if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+   /* try to update RVI */
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   else {
+   vec = apic_search_irr(apic);
+   apic-irr_pending = (vec != -1);
+   }
  }

  static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
  {
-   /* Note that we never get here with APIC virtualization enabled.  */
+   struct kvm_vcpu *vcpu;
+
+   if (__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
+   return;
+
+   vcpu = apic-vcpu;

-   if (!__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
-   ++apic-isr_count;
-   BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
/*
-* ISR (in service register) bit is set when injecting an interrupt.
-* The highest vector is injected. Thus the latest bit set matches
-* the highest bit in ISR.
+* With APIC virtualization enabled, all caching is disabled
+* because the processor can modify ISR under the hood.  Instead
+* just set SVI.
 */
-   apic-highest_isr_cache = vec;
+   if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+   kvm_x86_ops-hwapic_isr_update(vcpu-kvm, vec);
+   else {
+   ++apic-isr_count;
+   BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
+   /*
+* ISR (in service register) bit is set when injecting an 
interrupt.
+* The highest vector is injected. Thus the latest bit set 
matches
+* the highest bit in ISR.
+*/
+   

Re: [PATCH v2 2/2] KVM: nVMX: fix acknowledge interrupt on exit when APICv is in use

2014-08-05 Thread Wanpeng Li
On Tue, Aug 05, 2014 at 02:39:05PM +0200, Felipe Reyes wrote:
Hi,

On 08/05/2014 01:04 PM, Paolo Bonzini wrote:
Il 05/08/2014 06:42, Wanpeng Li ha scritto:
After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
if L1 asks us to), Acknowledge interrupt on exit behavior can be
emulated. To do so, KVM will ask the APIC for the interrupt vector if
during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
kvm_get_apic_interrupt would return -1 and give the following WARNING:

Call Trace:
  [81493563] dump_stack+0x49/0x5e
  [8103f0eb] warn_slowpath_common+0x7c/0x96
  [a059709a] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [8103f11a] warn_slowpath_null+0x15/0x17
  [a059709a] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [a0594295] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
  [a0537931] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
  [a05972ec] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
  [a051ebe9] inject_pending_event+0xd0/0x16e [kvm]
  [a051efa0] vcpu_enter_guest+0x319/0x704 [kvm]

If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
But when L2 is running, external interrupt will casue L1 vmexit with
reason external interrupt. Then L1 will pick up the interrupt through
vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
L1 is running, so APIC-v hardware still will do vEOI updating. The problem
is that the interrupt is delivered not through APIC-v hardware, this means
SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
updating. The solution is that, when L1 tried to pick up the interrupt
from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
sure the following vEOI updating and vPPR updating corrently.

Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
not cleare vIRR and hypervisor need to clear it before L1 running.

Suggested-by: Paolo Bonzini pbonz...@redhat.com
Suggested-by: Zhang, Yang Z yang.z.zh...@intel.com
Tested-by: Liu, RongrongX rongrongx@intel.com
Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
---
v1 - v2:
  * reusing kvm_get_apic_interrupt here (by modifying kvm_cpu_get_interrupt,
apic_set_isr and apic_clear_irr)

  arch/x86/kvm/irq.c   |  2 +-
  arch/x86/kvm/lapic.c | 52 
 +++-
  2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index bd0da43..a1ec6a5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)

 vector = kvm_cpu_get_extint(v);

-if (kvm_apic_vid_enabled(v-kvm) || vector != -1)
+if (vector != -1)
 return vector;  /* PIC */

 return kvm_get_apic_interrupt(v);   /* APIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3855103..08e8a89 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct 
kvm_lapic *apic)

  static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
  {
-apic-irr_pending = false;
+struct kvm_vcpu *vcpu;
+
+vcpu = apic-vcpu;
+
 apic_clear_vector(vec, apic-regs + APIC_IRR);
-if (apic_search_irr(apic) != -1)
-apic-irr_pending = true;
+if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+/* try to update RVI */
+kvm_make_request(KVM_REQ_EVENT, vcpu);
+else {
+vec = apic_search_irr(apic);
+apic-irr_pending = (vec != -1);
+}
  }

  static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
  {
-/* Note that we never get here with APIC virtualization enabled.  */
+struct kvm_vcpu *vcpu;
+
+if (__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
+return;
+
+vcpu = apic-vcpu;

-if (!__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
-++apic-isr_count;
-BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
 /*
- * ISR (in service register) bit is set when injecting an interrupt.
- * The highest vector is injected. Thus the latest bit set matches
- * the highest bit in ISR.
+ * With APIC virtualization enabled, all caching is disabled
+ * because the processor can modify ISR under the hood.  Instead
+ * just set SVI.
  */
-apic-highest_isr_cache = vec;
+if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+kvm_x86_ops-hwapic_isr_update(vcpu-kvm, vec);
+else {
+++apic-isr_count;
+BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
+/*
+ * ISR (in service register) bit is set when injecting an 
interrupt.
+ * The highest vector is injected. Thus the latest bit set 
matches
+ * the highest bit in ISR.
+ */
+apic-highest_isr_cache = vec;
+}
  }

  static inline int 

[PATCH v2 2/2] KVM: nVMX: fix acknowledge interrupt on exit when APICv is in use

2014-08-04 Thread Wanpeng Li
After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
if L1 asks us to), Acknowledge interrupt on exit behavior can be
emulated. To do so, KVM will ask the APIC for the interrupt vector if
during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
kvm_get_apic_interrupt would return -1 and give the following WARNING:

Call Trace:
 [81493563] dump_stack+0x49/0x5e
 [8103f0eb] warn_slowpath_common+0x7c/0x96
 [a059709a] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
 [8103f11a] warn_slowpath_null+0x15/0x17
 [a059709a] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
 [a0594295] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
 [a0537931] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
 [a05972ec] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
 [a051ebe9] inject_pending_event+0xd0/0x16e [kvm]
 [a051efa0] vcpu_enter_guest+0x319/0x704 [kvm]

If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
But when L2 is running, external interrupt will casue L1 vmexit with
reason external interrupt. Then L1 will pick up the interrupt through
vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
L1 is running, so APIC-v hardware still will do vEOI updating. The problem
is that the interrupt is delivered not through APIC-v hardware, this means
SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
updating. The solution is that, when L1 tried to pick up the interrupt
from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
sure the following vEOI updating and vPPR updating corrently.

Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
not cleare vIRR and hypervisor need to clear it before L1 running.

Suggested-by: Paolo Bonzini pbonz...@redhat.com
Suggested-by: Zhang, Yang Z yang.z.zh...@intel.com
Tested-by: Liu, RongrongX rongrongx@intel.com
Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
---
v1 - v2:
 * reusing kvm_get_apic_interrupt here (by modifying kvm_cpu_get_interrupt, 
   apic_set_isr and apic_clear_irr)

 arch/x86/kvm/irq.c   |  2 +-
 arch/x86/kvm/lapic.c | 52 +++-
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index bd0da43..a1ec6a5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 
vector = kvm_cpu_get_extint(v);
 
-   if (kvm_apic_vid_enabled(v-kvm) || vector != -1)
+   if (vector != -1)
return vector;  /* PIC */
 
return kvm_get_apic_interrupt(v);   /* APIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3855103..08e8a89 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct kvm_lapic 
*apic)
 
 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 {
-   apic-irr_pending = false;
+   struct kvm_vcpu *vcpu;
+
+   vcpu = apic-vcpu;
+
apic_clear_vector(vec, apic-regs + APIC_IRR);
-   if (apic_search_irr(apic) != -1)
-   apic-irr_pending = true;
+   if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+   /* try to update RVI */
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   else {
+   vec = apic_search_irr(apic);
+   apic-irr_pending = (vec != -1);
+   }
 }
 
 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 {
-   /* Note that we never get here with APIC virtualization enabled.  */
+   struct kvm_vcpu *vcpu;
+
+   if (__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
+   return;
+
+   vcpu = apic-vcpu;
 
-   if (!__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
-   ++apic-isr_count;
-   BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
/*
-* ISR (in service register) bit is set when injecting an interrupt.
-* The highest vector is injected. Thus the latest bit set matches
-* the highest bit in ISR.
+* With APIC virtualization enabled, all caching is disabled
+* because the processor can modify ISR under the hood.  Instead
+* just set SVI.
 */
-   apic-highest_isr_cache = vec;
+   if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+   kvm_x86_ops-hwapic_isr_update(vcpu-kvm, vec);
+   else {
+   ++apic-isr_count;
+   BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
+   /*
+* ISR (in service register) bit is set when injecting an 
interrupt.
+* The highest vector is injected. Thus the latest bit set 
matches
+* the highest bit in ISR.
+*/
+   apic-highest_isr_cache = vec;
+   }
 }
 
 static inline int apic_find_highest_isr(struct kvm_lapic