On Wed, 2018-10-03 at 17:26 +0200, Paolo Bonzini wrote:
> Commit b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 introduced a check on
> the interrupt-window and NMI-window CPU execution controls in order to
> inject an external interrupt vmexit before the first guest instruction
> executes.  However, when APIC virtualization is enabled the host does not
> need a vmexit in order to inject an interrupt at the next interrupt window;
> instead, it just places the interrupt vector in RVI and the processor will
> inject it as soon as possible.  Therefore, on machines with APICv it is
> not enough to check the CPU execution controls: the same scenario can also
> happen if RVI>0.
> 
> Fixes: b5861e5cf2fcf83031ea3e26b0a69d887adf7d21
> Cc: Nikita Leshchenko <[email protected]>
> Cc: Sean Christopherson <[email protected]>
> Cc: Liran Alon <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>

Reviewed-by: Nikita Leshenko <[email protected]>

> ---
>  arch/x86/kvm/vmx.c | 38 ++++++++++++++++++++++++++------------
>  1 file changed, 26 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6ef2d5b139b9..c7ae8ea87bc4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6162,6 +6162,11 @@ static void 
> vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
>       nested_mark_vmcs12_pages_dirty(vcpu);
>  }
>  
> +static u8 vmx_get_rvi(void)
> +{
> +     return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
> +}
> +
>  static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
>  {
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -6174,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct 
> kvm_vcpu *vcpu)
>               WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
>               return false;
>  
> -     rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
> +     rvi = vmx_get_rvi();
>  
>       vapic_page = kmap(vmx->nested.virtual_apic_page);
>       vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
> @@ -10349,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>       return max_irr;
>  }
>  
> +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
> +{
> +     u8 rvi = vmx_get_rvi();
> +     u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
> +
> +     return ((rvi & 0xf0) > (vppr & 0xf0));
> +}
> +
>  static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
>  {
>       if (!kvm_vcpu_apicv_active(vcpu))
> @@ -12593,10 +12606,13 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu 
> *vcpu, u32 *exit_qual)
>       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>       bool from_vmentry = !!exit_qual;
>       u32 dummy_exit_qual;
> -     u32 vmcs01_cpu_exec_ctrl;
> +     bool evaluate_pending_interrupts;
>       int r = 0;
>  
> -     vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> +     evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
> +             (CPU_BASED_VIRTUAL_INTR_PENDING | 
> CPU_BASED_VIRTUAL_NMI_PENDING);
> +     if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
> +             evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
>  
>       enter_guest_mode(vcpu);
>  
> @@ -12644,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu 
> *vcpu, u32 *exit_qual)
>        * to L1 or delivered directly to L2 (e.g. In case L1 don't
>        * intercept EXTERNAL_INTERRUPT).
>        *
> -      * Usually this would be handled by L0 requesting a
> -      * IRQ/NMI window by setting VMCS accordingly. However,
> -      * this setting was done on VMCS01 and now VMCS02 is active
> -      * instead. Thus, we force L0 to perform pending event
> -      * evaluation by requesting a KVM_REQ_EVENT.
> -      */
> -     if (vmcs01_cpu_exec_ctrl &
> -             (CPU_BASED_VIRTUAL_INTR_PENDING | 
> CPU_BASED_VIRTUAL_NMI_PENDING)) {
> +      * Usually this would be handled by the processor noticing an
> +      * IRQ/NMI window request, or checking RVI during evaluation of
> +      * pending virtual interrupts.  However, this setting was done
> +      * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
> +      * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
> +      */
> +     if (unlikely(evaluate_pending_interrupts))
>               kvm_make_request(KVM_REQ_EVENT, vcpu);
> -     }
>  
>       /*
>        * Note no nested_vmx_succeed or nested_vmx_fail here. At this point

Reply via email to