Allow the pending and the injected exceptions to co-exist
when they are raised.

Add 'kvm_deliver_pending_exception' function which 'merges' the pending
and injected exception or delivers a VM exit with both for a case when
the L1 intercepts the pending exception.

The later is done by vendor code using new nested callback
'deliver_exception_as_vmexit'

The kvm_deliver_pending_exception is called after each VM exit,
and prior to VM entry which ensures that during userspace VM exits,
only injected exception can be in a raised state.

Signed-off-by: Maxim Levitsky <[email protected]>
---
 arch/x86/include/asm/kvm_host.h |   9 ++
 arch/x86/kvm/svm/nested.c       |  27 ++--
 arch/x86/kvm/svm/svm.c          |   2 +-
 arch/x86/kvm/vmx/nested.c       |  58 ++++----
 arch/x86/kvm/vmx/vmx.c          |   2 +-
 arch/x86/kvm/x86.c              | 233 ++++++++++++++++++--------------
 6 files changed, 181 insertions(+), 150 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3b2fd276e8d5..a9b9cd030d9a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1346,6 +1346,15 @@ struct kvm_x86_ops {
 
 struct kvm_x86_nested_ops {
        int (*check_events)(struct kvm_vcpu *vcpu);
+
+       /*
+        * Deliver a pending exception as a VM exit if the L1 intercepts it.
+        * Returns -EBUSY if L1 does intercept the exception but,
+        * it is not possible to deliver it right now.
+        * (for example when nested run is pending)
+        */
+       int (*deliver_exception_as_vmexit)(struct kvm_vcpu *vcpu);
+
        bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
        void (*triple_fault)(struct kvm_vcpu *vcpu);
        int (*get_state)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 7adad9b6dcad..ff745d59ffcf 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1061,21 +1061,6 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       if (vcpu->arch.pending_exception.valid) {
-               /*
-                * Only a pending nested run can block a pending exception.
-                * Otherwise an injected NMI/interrupt should either be
-                * lost or delivered to the nested hypervisor in the EXITINTINFO
-                * vmcb field, while delivering the pending exception.
-                */
-               if (svm->nested.nested_run_pending)
-                        return -EBUSY;
-               if (!nested_exit_on_exception(svm))
-                       return 0;
-               nested_svm_inject_exception_vmexit(svm);
-               return 0;
-       }
-
        if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
                if (block_nested_events)
                        return -EBUSY;
@@ -1107,6 +1092,17 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+int svm_deliver_nested_exception_as_vmexit(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       if (svm->nested.nested_run_pending)
+               return -EBUSY;
+       if (nested_exit_on_exception(svm))
+               nested_svm_inject_exception_vmexit(svm);
+       return 0;
+}
+
 int nested_svm_exit_special(struct vcpu_svm *svm)
 {
        u32 exit_code = svm->vmcb->control.exit_code;
@@ -1321,6 +1317,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 struct kvm_x86_nested_ops svm_nested_ops = {
        .check_events = svm_check_nested_events,
        .triple_fault = nested_svm_triple_fault,
+       .deliver_exception_as_vmexit = svm_deliver_nested_exception_as_vmexit,
        .get_nested_state_pages = svm_get_nested_state_pages,
        .get_state = svm_get_nested_state,
        .set_state = svm_set_nested_state,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 90b541138c5a..b89e48574c39 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -363,7 +363,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
        bool has_error_code = vcpu->arch.injected_exception.has_error_code;
        u32 error_code = vcpu->arch.injected_exception.error_code;
 
-       kvm_deliver_exception_payload(vcpu);
+       WARN_ON_ONCE(vcpu->arch.pending_exception.valid);
 
        if (nr == BP_VECTOR && !nrips) {
                unsigned long rip, old_rip = kvm_rip_read(vcpu);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5d54fecff9a7..1c09b132c55c 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3768,7 +3768,6 @@ static bool nested_vmx_preemption_timer_pending(struct 
kvm_vcpu *vcpu)
 static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       unsigned long exit_qual;
        bool block_nested_events =
            vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
        bool mtf_pending = vmx->nested.mtf_pending;
@@ -3804,41 +3803,15 @@ static int vmx_check_nested_events(struct kvm_vcpu 
*vcpu)
                return 0;
        }
 
-       /*
-        * Process any exceptions that are not debug traps before MTF.
-        *
-        * Note that only a pending nested run can block a pending exception.
-        * Otherwise an injected NMI/interrupt should either be
-        * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
-        * while delivering the pending exception.
-        */
-
-       if (vcpu->arch.pending_exception.valid && !vmx_pending_dbg_trap(vcpu)) {
-               if (vmx->nested.nested_run_pending)
-                       return -EBUSY;
-               if (!nested_vmx_check_exception(vcpu, &exit_qual))
-                       goto no_vmexit;
-               nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
-               return 0;
-       }
-
        if (mtf_pending) {
                if (block_nested_events)
                        return -EBUSY;
+
                nested_vmx_update_pending_dbg(vcpu);
                nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
                return 0;
        }
 
-       if (vcpu->arch.pending_exception.valid) {
-               if (vmx->nested.nested_run_pending)
-                       return -EBUSY;
-               if (!nested_vmx_check_exception(vcpu, &exit_qual))
-                       goto no_vmexit;
-               nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
-               return 0;
-       }
-
        if (nested_vmx_preemption_timer_pending(vcpu)) {
                if (block_nested_events)
                        return -EBUSY;
@@ -3884,6 +3857,34 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static int nested_vmx_deliver_exception_as_vmexit(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long exit_qual;
+
+       if (vmx->nested.nested_run_pending)
+               return -EBUSY;
+
+       if (vmx->nested.mtf_pending && vmx_pending_dbg_trap(vcpu)) {
+               /*
+                * A pending monitor trap takes precedence over pending
+                * debug exception which is 'stashed' into
+                * 'GUEST_PENDING_DBG_EXCEPTIONS'
+                */
+
+               nested_vmx_update_pending_dbg(vcpu);
+               vmx->nested.mtf_pending = false;
+               nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
+               return 0;
+       }
+       if (vcpu->arch.pending_exception.valid) {
+               if (nested_vmx_check_exception(vcpu, &exit_qual))
+                       nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+               return 0;
+       }
+       return 0;
+}
+
 static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
 {
        ktime_t remaining =
@@ -6603,6 +6604,7 @@ __init int nested_vmx_hardware_setup(int 
(*exit_handlers[])(struct kvm_vcpu *))
 
 struct kvm_x86_nested_ops vmx_nested_ops = {
        .check_events = vmx_check_nested_events,
+       .deliver_exception_as_vmexit = nested_vmx_deliver_exception_as_vmexit,
        .hv_timer_pending = nested_vmx_preemption_timer_pending,
        .triple_fault = nested_vmx_triple_fault,
        .get_state = vmx_get_nested_state,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a9b241d2b271..fc6bc40d47b0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1682,7 +1682,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
        u32 error_code = vcpu->arch.injected_exception.error_code;
        u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
-       kvm_deliver_exception_payload(vcpu);
+       WARN_ON_ONCE(vcpu->arch.pending_exception.valid);
 
        if (has_error_code) {
                vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 493d87b0c2d5..a363204f37be 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -535,86 +535,30 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
 
 static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
-               unsigned nr, bool has_error, u32 error_code,
-               bool has_payload, unsigned long payload, bool reinject)
+                                  unsigned int nr, bool has_error, u32 
error_code,
+                                  bool has_payload, unsigned long payload,
+                                  bool reinject)
 {
-       u32 prev_nr;
-       int class1, class2;
-
+       struct kvm_queued_exception *exc;
        kvm_make_request(KVM_REQ_EVENT, vcpu);
 
-       if (!vcpu->arch.pending_exception.valid && 
!vcpu->arch.injected_exception.valid) {
-       queue:
-               if (reinject) {
-                       /*
-                        * On vmentry, vcpu->arch.exception.pending is only
-                        * true if an event injection was blocked by
-                        * nested_run_pending.  In that case, however,
-                        * vcpu_enter_guest requests an immediate exit,
-                        * and the guest shouldn't proceed far enough to
-                        * need reinjection.
-                        */
-                       WARN_ON_ONCE(vcpu->arch.pending_exception.valid);
-                       if (WARN_ON_ONCE(has_payload)) {
-                               /*
-                                * A reinjected event has already
-                                * delivered its payload.
-                                */
-                               has_payload = false;
-                               payload = 0;
-                       }
-
-                       vcpu->arch.injected_exception.valid = true;
-                       vcpu->arch.injected_exception.has_error_code = 
has_error;
-                       vcpu->arch.injected_exception.nr = nr;
-                       vcpu->arch.injected_exception.error_code = error_code;
+       WARN_ON_ONCE(vcpu->arch.pending_exception.valid);
+       WARN_ON_ONCE(reinject && vcpu->arch.injected_exception.valid);
 
-               } else {
-                       vcpu->arch.pending_exception.valid = true;
-                       vcpu->arch.injected_exception.valid = false;
-                       vcpu->arch.pending_exception.has_error_code = has_error;
-                       vcpu->arch.pending_exception.nr = nr;
-                       vcpu->arch.pending_exception.error_code = error_code;
-               }
-
-               vcpu->arch.exception_payload.valid = has_payload;
-               vcpu->arch.exception_payload.value = payload;
-               if (!is_guest_mode(vcpu))
-                       kvm_deliver_exception_payload(vcpu);
-               return;
-       }
-
-       /* to check exception */
-       prev_nr = vcpu->arch.injected_exception.nr;
-       if (prev_nr == DF_VECTOR) {
-               /* triple fault -> shutdown */
-               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-               return;
-       }
-       class1 = exception_class(prev_nr);
-       class2 = exception_class(nr);
-       if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
-               || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
-               /*
-                * Generate double fault per SDM Table 5-5.  Set
-                * exception.pending = true so that the double fault
-                * can trigger a nested vmexit.
-                */
-               vcpu->arch.pending_exception.valid = true;
-               vcpu->arch.injected_exception.valid = false;
-               vcpu->arch.pending_exception.has_error_code = true;
-               vcpu->arch.pending_exception.nr = DF_VECTOR;
-               vcpu->arch.pending_exception.error_code = 0;
+       exc = reinject ? &vcpu->arch.injected_exception :
+                        &vcpu->arch.pending_exception;
+       exc->valid = true;
+       exc->nr = nr;
+       exc->has_error_code = has_error;
+       exc->error_code = error_code;
 
-               vcpu->arch.exception_payload.valid = false;
-               vcpu->arch.exception_payload.value = 0;
-       } else
-               /* replace previous exception with a new one in a hope
-                  that instruction re-execution will regenerate lost
-                  exception */
-               goto queue;
+       // re-injected exception has its payload already delivered
+       WARN_ON_ONCE(reinject && has_payload);
+       vcpu->arch.exception_payload.valid = has_payload;
+       vcpu->arch.exception_payload.value = payload;
 }
 
+
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
        kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
@@ -641,6 +585,95 @@ static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, 
unsigned nr,
                               true, payload, false);
 }
 
+static int kvm_do_deliver_pending_exception(struct kvm_vcpu *vcpu)
+{
+       int class1, class2, ret;
+
+       /* try to deliver current pending exception as VM exit */
+       if (is_guest_mode(vcpu)) {
+               ret = kvm_x86_ops.nested_ops->deliver_exception_as_vmexit(vcpu);
+               if (ret || !vcpu->arch.pending_exception.valid)
+                       return ret;
+       }
+
+       /* No injected exception, so just deliver the payload and inject it */
+       if (!vcpu->arch.injected_exception.valid) {
+               trace_kvm_inj_exception(vcpu->arch.pending_exception.nr,
+                                       
vcpu->arch.pending_exception.has_error_code,
+                                       
vcpu->arch.pending_exception.error_code);
+queue:
+               /* Intel SDM 17.3.1.1 */
+               if (exception_type(vcpu->arch.pending_exception.nr) == 
EXCPT_FAULT)
+                       __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+                                            X86_EFLAGS_RF);
+
+               kvm_deliver_exception_payload(vcpu);
+
+               /* Intel SDM 17.2.4
+                * The processor clears the GD flag upon entering to the
+                * debug exception handler, to allow the handler access
+                * to the debug registers.
+                */
+               if (vcpu->arch.pending_exception.nr == DB_VECTOR) {
+                       if (vcpu->arch.dr7 & DR7_GD) {
+                               vcpu->arch.dr7 &= ~DR7_GD;
+                               kvm_update_dr7(vcpu);
+                       }
+               }
+
+               if (vcpu->arch.pending_exception.error_code && 
!is_protmode(vcpu))
+                       vcpu->arch.pending_exception.error_code = false;
+
+               vcpu->arch.injected_exception = vcpu->arch.pending_exception;
+               vcpu->arch.pending_exception.valid = false;
+               return 0;
+       }
+
+       /* Convert a pending exception and an injected #DF to a triple fault*/
+       if (vcpu->arch.injected_exception.nr == DF_VECTOR) {
+               /* triple fault -> shutdown */
+               vcpu->arch.injected_exception.valid = false;
+               vcpu->arch.pending_exception.valid = false;
+               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+               return 0;
+       }
+
+       class1 = exception_class(vcpu->arch.injected_exception.nr);
+       class2 = exception_class(vcpu->arch.pending_exception.nr);
+
+       if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+               || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+
+               /* Generate double fault per SDM Table 5-5. */
+               vcpu->arch.injected_exception.valid = false;
+               vcpu->arch.pending_exception.valid = true;
+               vcpu->arch.pending_exception.has_error_code = true;
+               vcpu->arch.pending_exception.nr = DF_VECTOR;
+               vcpu->arch.pending_exception.error_code = 0;
+               vcpu->arch.exception_payload.valid = false;
+       } else
+               /* Drop the injected exception and replace it with the pending 
one*/
+               goto queue;
+
+       return 0;
+}
+
+static int kvm_deliver_pending_exception(struct kvm_vcpu *vcpu)
+{
+       int ret = 0;
+
+       if (!vcpu->arch.pending_exception.valid)
+               return ret;
+
+       ret = kvm_do_deliver_pending_exception(vcpu);
+
+       if (ret || !vcpu->arch.pending_exception.valid)
+               return ret;
+
+       WARN_ON_ONCE(vcpu->arch.pending_exception.nr != DF_VECTOR);
+       return kvm_do_deliver_pending_exception(vcpu);
+}
+
 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
 {
        if (err)
@@ -4297,6 +4330,12 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct 
kvm_vcpu *vcpu,
            vcpu->arch.pending_exception.valid && 
vcpu->arch.exception_payload.valid)
                kvm_deliver_exception_payload(vcpu);
 
+       /*
+        * Currently we merge the pending and the injected exceptions
+        * after each VM exit, which can fail only when nested run is pending,
+        * in which case only injected (from us or L1) exception is possible.
+        */
+
        WARN_ON_ONCE(vcpu->arch.pending_exception.valid &&
                     vcpu->arch.injected_exception.valid);
 
@@ -8401,8 +8440,6 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
 
 static void kvm_inject_exception(struct kvm_vcpu *vcpu)
 {
-       if (vcpu->arch.injected_exception.error_code && !is_protmode(vcpu))
-               vcpu->arch.injected_exception.error_code = false;
        static_call(kvm_x86_queue_exception)(vcpu);
 }
 
@@ -8411,8 +8448,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, 
bool *req_immediate_exit
        int r;
        bool can_inject = true;
 
-       /* try to reinject previous events if any */
+       r = kvm_deliver_pending_exception(vcpu);
+       if (r < 0)
+               goto busy;
+
+       WARN_ON_ONCE(vcpu->arch.pending_exception.valid);
 
+       /* try to reinject previous events if any */
        if (vcpu->arch.injected_exception.valid) {
                kvm_inject_exception(vcpu);
                can_inject = false;
@@ -8431,7 +8473,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, 
bool *req_immediate_exit
         * serviced prior to recognizing any new events in order to
         * fully complete the previous instruction.
         */
-       else if (!vcpu->arch.pending_exception.valid) {
+       else {
                if (vcpu->arch.nmi_injected) {
                        static_call(kvm_x86_set_nmi)(vcpu);
                        can_inject = false;
@@ -8441,9 +8483,6 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, 
bool *req_immediate_exit
                }
        }
 
-       WARN_ON_ONCE(vcpu->arch.pending_exception.valid &&
-                    vcpu->arch.injected_exception.valid);
-
        /*
         * Call check_nested_events() even if we reinjected a previous event
         * in order for caller to determine if it should require immediate-exit
@@ -8456,30 +8495,6 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, 
bool *req_immediate_exit
                        goto busy;
        }
 
-       /* try to inject new event if pending */
-       if (vcpu->arch.pending_exception.valid) {
-               trace_kvm_inj_exception(vcpu->arch.pending_exception.nr,
-                                       
vcpu->arch.pending_exception.has_error_code,
-                                       
vcpu->arch.pending_exception.error_code);
-
-               vcpu->arch.injected_exception = vcpu->arch.pending_exception;
-               vcpu->arch.pending_exception.valid = false;
-
-               if (exception_type(vcpu->arch.injected_exception.nr) == 
EXCPT_FAULT)
-                       __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
-                                            X86_EFLAGS_RF);
-
-               if (vcpu->arch.injected_exception.nr == DB_VECTOR) {
-                       kvm_deliver_exception_payload(vcpu);
-                       if (vcpu->arch.dr7 & DR7_GD) {
-                               vcpu->arch.dr7 &= ~DR7_GD;
-                               kvm_update_dr7(vcpu);
-                       }
-               }
-
-               kvm_inject_exception(vcpu);
-               can_inject = false;
-       }
 
        /*
         * Finally, inject interrupt events.  If an event cannot be injected
@@ -9270,6 +9285,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                kvm_lapic_sync_from_vapic(vcpu);
 
        r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
+
+       /*
+        * Deliver the pending exception so that the state of having a pending
+        * and an injected exception is not visible to the userspace.
+        */
+
+       kvm_deliver_pending_exception(vcpu);
+
        return r;
 
 cancel_injection:
@@ -11014,7 +11037,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu 
*vcpu)
        if (vcpu->arch.pv.pv_unhalted)
                return true;
 
-       if (vcpu->arch.pending_exception.valid)
+       if (vcpu->arch.pending_exception.valid || 
vcpu->arch.injected_exception.valid)
                return true;
 
        if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
-- 
2.26.2

Reply via email to