On Thu, Feb 08, 2024 at 01:26:33AM +0800, Xin Li wrote:
>Set VMX nested exception bit in the VM-entry interruption information
>VMCS field when injecting a nested exception using FRED event delivery
>to ensure:
>  1) The nested exception is injected on a correct stack level.
>  2) The nested bit defined in FRED stack frame is set.
>
>The event stack level used by FRED event delivery depends on whether the
>event was a nested exception encountered during delivery of another event,
>because a nested exception is "regarded" as happening on ring 0.  E.g.,
>when #PF is configured to use stack level 1 in IA32_FRED_STKLVLS MSR:
>  - nested #PF will be delivered on stack level 1 when encountered in
>    ring 3.
>  - normal #PF will be delivered on stack level 0 when encountered in
>    ring 3.
>
>The VMX nested-exception support ensures the correct event stack level is
>chosen when a VM entry injects a nested exception.
>
>Signed-off-by: Xin Li <xin3...@intel.com>
>Tested-by: Shan Kang <shan.k...@intel.com>
>---
>
>Changes since v1:
>* Set the nested flag when there is an original interrupt (Chao Gao).
>---
> arch/x86/include/asm/kvm_host.h |  6 +++--
> arch/x86/include/asm/vmx.h      |  5 ++--
> arch/x86/kvm/svm/svm.c          |  4 +--
> arch/x86/kvm/vmx/vmx.c          |  8 ++++--
> arch/x86/kvm/x86.c              | 46 ++++++++++++++++++++++++++-------
> arch/x86/kvm/x86.h              |  1 +
> 6 files changed, 53 insertions(+), 17 deletions(-)
>
>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>index 0d88873eba63..ef278ee0b6ca 100644
>--- a/arch/x86/include/asm/kvm_host.h
>+++ b/arch/x86/include/asm/kvm_host.h
>@@ -736,6 +736,7 @@ struct kvm_queued_exception {
>       u32 error_code;
>       unsigned long payload;
>       bool has_payload;
>+      bool nested;

"nested" may be lost after migration.

> };
> 
> struct kvm_vcpu_arch {
>@@ -2060,8 +2061,9 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
> void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
> void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 
> error_code);
> void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long 
> payload);
>-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
>-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 
>error_code);
>+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool nested);
>+void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr,
>+                           u32 error_code, bool nested);
> void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception 
> *fault);
> void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
>                                   struct x86_exception *fault);
>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>index 6b796c5c9c2b..68af74e48788 100644
>--- a/arch/x86/include/asm/vmx.h
>+++ b/arch/x86/include/asm/vmx.h
>@@ -134,7 +134,7 @@
> #define VMX_BASIC_DUAL_MONITOR_TREATMENT      BIT_ULL(49)
> #define VMX_BASIC_INOUT                               BIT_ULL(54)
> #define VMX_BASIC_TRUE_CTLS                   BIT_ULL(55)
>-
>+#define VMX_BASIC_NESTED_EXCEPTION            BIT_ULL(58)

this definition is not used in this patch.

> 
> /* VMX_MISC bits and bitmasks */
> #define VMX_MISC_INTEL_PT                     BIT_ULL(14)
>@@ -407,8 +407,9 @@ enum vmcs_field {
> #define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
> #define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
> #define INTR_INFO_UNBLOCK_NMI         0x1000          /* 12 */
>+#define INTR_INFO_NESTED_EXCEPTION_MASK       0x2000          /* 13 */
> #define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
>-#define INTR_INFO_RESVD_BITS_MASK       0x7ffff000
>+#define INTR_INFO_RESVD_BITS_MASK       0x7fffd000
> 
> #define VECTORING_INFO_VECTOR_MASK            INTR_INFO_VECTOR_MASK
> #define VECTORING_INFO_TYPE_MASK              INTR_INFO_INTR_TYPE_MASK
>diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>index e90b429c84f1..c220b690a37c 100644
>--- a/arch/x86/kvm/svm/svm.c
>+++ b/arch/x86/kvm/svm/svm.c
>@@ -4057,10 +4057,10 @@ static void svm_complete_interrupts(struct kvm_vcpu 
>*vcpu)
> 
>               if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
>                       u32 err = svm->vmcb->control.exit_int_info_err;
>-                      kvm_requeue_exception_e(vcpu, vector, err);
>+                      kvm_requeue_exception_e(vcpu, vector, err, false);
> 
>               } else
>-                      kvm_requeue_exception(vcpu, vector);
>+                      kvm_requeue_exception(vcpu, vector, false);
>               break;
>       case SVM_EXITINTINFO_TYPE_INTR:
>               kvm_queue_interrupt(vcpu, vector, false);
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index f622fb90a098..1f265d526daf 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -1891,6 +1891,8 @@ static void vmx_inject_exception(struct kvm_vcpu *vcpu)
>                               event_data = to_vmx(vcpu)->fred_xfd_event_data;
> 
>                       vmcs_write64(INJECTED_EVENT_DATA, event_data);
>+
>+                      intr_info |= ex->nested ? 
>INTR_INFO_NESTED_EXCEPTION_MASK : 0;
>               }
>       }
> 
>@@ -7281,9 +7283,11 @@ static void __vmx_complete_interrupts(struct kvm_vcpu 
>*vcpu, bool vectoring)
>               }
> 
>               if (event_id & INTR_INFO_DELIVER_CODE_MASK)
>-                      kvm_requeue_exception_e(vcpu, vector, 
>vmcs_read32(error_code_field));
>+                      kvm_requeue_exception_e(vcpu, vector, 
>vmcs_read32(error_code_field),
>+                                              event_id & 
>INTR_INFO_NESTED_EXCEPTION_MASK);
>               else
>-                      kvm_requeue_exception(vcpu, vector);
>+                      kvm_requeue_exception(vcpu, vector,
>+                                            event_id & 
>INTR_INFO_NESTED_EXCEPTION_MASK);
>               break;
>       case INTR_TYPE_SOFT_INTR:
>               vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 00c0062726ae..725819262085 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -645,7 +645,8 @@ static void kvm_leave_nested(struct kvm_vcpu *vcpu)
> 
> static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
>               unsigned nr, bool has_error, u32 error_code,
>-              bool has_payload, unsigned long payload, bool reinject)
>+              bool has_payload, unsigned long payload,
>+              bool reinject, bool nested)
> {
>       u32 prev_nr;
>       int class1, class2;
>@@ -696,6 +697,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
>                       vcpu->arch.exception.pending = true;
>                       vcpu->arch.exception.injected = false;
>               }
>+
>+              vcpu->arch.exception.nested = vcpu->arch.exception.nested ||
>+                                            (kvm_is_fred_enabled(vcpu) &&
>+                                             ((reinject && nested) ||
>+                                              vcpu->arch.nmi_injected ||
>+                                              vcpu->arch.interrupt.injected));

You can set the nested flag regardless of FRED because the sole place using
such information (vmx_inject_exception()) is guarded by kvm_is_fred_enabled()
already.

I would also drop the check about @reinject to make @reinject and @nested
orthogonal (i.e., avoid the artifical rule that nested interrupts should be
queued by "reinject" only)

so, how about:
                if (vcpu->arch.nmi_injected || vcpu->arch.interrupt.injected ||
                    nested)
                        vcpu->arch.exception.nested = true;

>+
>               vcpu->arch.exception.has_error_code = has_error;
>               vcpu->arch.exception.vector = nr;
>               vcpu->arch.exception.error_code = error_code;
>@@ -725,8 +733,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
>               vcpu->arch.exception.injected = false;
>               vcpu->arch.exception.pending = false;
> 
>+              /*
>+               * A #DF is NOT a nested event per its definition, however per
>+               * FRED spec 5.0 Appendix B, its delivery determines the new
>+               * stack level as is done for events occurring when CPL = 0.
>+               */
>+              vcpu->arch.exception.nested = false;
>+
>               kvm_queue_exception_e(vcpu, DF_VECTOR, 0);
>       } else {
>+              /*
>+               * FRED spec 5.0 Appendix B: delivery of a nested exception
>+               * determines the new stack level as is done for events
>+               * occurring when CPL = 0.
>+               *
>+               * IOW, FRED event delivery of an event encountered in ring 3
>+               * normally uses stack level 0 unconditionally.  However, if
>+               * the event is an exception nested on any earlier event,
>+               * delivery of the nested exception will consult the FRED MSR
>+               * IA32_FRED_STKLVLS to determine which stack level to use.
>+               */
>+              vcpu->arch.exception.nested = kvm_is_fred_enabled(vcpu);

as said above, nested flag can be set regardless of FRED.

Reply via email to