Save/restore the nested flag of an exception during VM save/restore
and live migration to ensure a correct event stack level is chosen
when a nested exception is injected through FRED event delivery.

The event stack level used by FRED event delivery depends on whether
the event was a nested exception encountered during delivery of an
earlier event, because a nested exception is "regarded" as happening
on ring 0.  E.g., when #PF is configured to use stack level 1 in
IA32_FRED_STKLVLS MSR:
  - nested #PF will be delivered on the stack pointed by IA32_FRED_RSP1
    MSR when encountered in ring 3 and ring 0.
  - normal #PF will be delivered on the stack pointed by IA32_FRED_RSP0
    MSR when encountered in ring 3.
  - normal #PF will be delivered on the stack pointed by IA32_FRED_RSP1
    MSR when encountered in ring 0.

As such Qemu needs to track if an event is a nested event during VM
context save/restore and live migration.

Signed-off-by: Xin Li (Intel) <x...@zytor.com>
---
 linux-headers/asm-x86/kvm.h |  4 +++-
 linux-headers/linux/kvm.h   |  1 +
 target/i386/cpu.c           |  1 +
 target/i386/cpu.h           |  1 +
 target/i386/kvm/kvm.c       | 35 +++++++++++++++++++++++++++++++++++
 target/i386/kvm/kvm_i386.h  |  1 +
 target/i386/machine.c       |  1 +
 7 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index f0c1a730d9..f494509b94 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -324,6 +324,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SMM                0x00000008
 #define KVM_VCPUEVENT_VALID_PAYLOAD    0x00000010
 #define KVM_VCPUEVENT_VALID_TRIPLE_FAULT       0x00000020
+#define KVM_VCPUEVENT_VALID_NESTED_FLAG        0x00000040
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS      0x01
@@ -361,7 +362,8 @@ struct kvm_vcpu_events {
        struct {
                __u8 pending;
        } triple_fault;
-       __u8 reserved[26];
+       __u8 reserved[25];
+       __u8 exception_is_nested;
        __u8 exception_has_payload;
        __u64 exception_payload;
 };
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 32c5885a3c..521ec3af37 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -952,6 +952,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_ARM_EL2 240
 #define KVM_CAP_ARM_EL2_E2H0 241
 #define KVM_CAP_RISCV_MP_STATE_RESET 242
+#define KVM_CAP_EXCEPTION_NESTED_FLAG 243
 
 struct kvm_irq_routing_irqchip {
        __u32 irqchip;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 251d5760a0..4483bf9d10 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -8723,6 +8723,7 @@ static void x86_cpu_reset_hold(Object *obj, ResetType 
type)
     env->exception_injected = 0;
     env->exception_has_payload = false;
     env->exception_payload = 0;
+    env->exception_is_nested = false;
     env->nmi_injected = false;
     env->triple_fault_pending = false;
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index f977fc49a7..a9116bfd2c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2097,6 +2097,7 @@ typedef struct CPUArchState {
     uint8_t has_error_code;
     uint8_t exception_has_payload;
     uint64_t exception_payload;
+    uint8_t exception_is_nested;
     uint8_t triple_fault_pending;
     uint32_t ins_len;
     uint32_t sipi_vector;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 369626f8c8..db4af9ec2d 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -174,6 +174,7 @@ static int has_xsave2;
 static int has_xcrs;
 static int has_sregs2;
 static int has_exception_payload;
+static int has_exception_nested_flag;
 static int has_triple_fault_event;
 
 static bool has_msr_mcg_ext_ctl;
@@ -259,6 +260,11 @@ bool kvm_has_exception_payload(void)
     return has_exception_payload;
 }
 
+bool kvm_has_exception_nested_flag(void)
+{
+    return has_exception_nested_flag;
+}
+
 static bool kvm_x2apic_api_set_flags(uint64_t flags)
 {
     KVMState *s = KVM_STATE(current_accel());
@@ -3075,6 +3081,21 @@ static int kvm_vm_enable_exception_payload(KVMState *s)
     return ret;
 }
 
+static int kvm_vm_enable_exception_nested_flag(KVMState *s)
+{
+    int ret = 0;
+    has_exception_nested_flag = kvm_check_extension(s, 
KVM_CAP_EXCEPTION_NESTED_FLAG);
+    if (has_exception_nested_flag) {
+        ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_NESTED_FLAG, 0, true);
+        if (ret < 0) {
+            error_report("kvm: Failed to enable exception nested flag cap: %s",
+                         strerror(-ret));
+        }
+    }
+
+    return ret;
+}
+
 static int kvm_vm_enable_triple_fault_event(KVMState *s)
 {
     int ret = 0;
@@ -3255,6 +3276,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
         return ret;
     }
 
+    ret = kvm_vm_enable_exception_nested_flag(s);
+    if (ret < 0) {
+        return ret;
+    }
+
     ret = kvm_vm_enable_triple_fault_event(s);
     if (ret < 0) {
         return ret;
@@ -5041,6 +5067,10 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
         events.exception_has_payload = env->exception_has_payload;
         events.exception_payload = env->exception_payload;
     }
+    if (has_exception_nested_flag) {
+        events.flags |= KVM_VCPUEVENT_VALID_NESTED_FLAG;
+        events.exception_is_nested = env->exception_is_nested;
+    }
     events.exception.nr = env->exception_nr;
     events.exception.injected = env->exception_injected;
     events.exception.has_error_code = env->has_error_code;
@@ -5109,6 +5139,11 @@ static int kvm_get_vcpu_events(X86CPU *cpu)
         env->exception_pending = 0;
         env->exception_has_payload = false;
     }
+    if (events.flags & KVM_VCPUEVENT_VALID_NESTED_FLAG) {
+        env->exception_is_nested = events.exception_is_nested;
+    } else {
+        env->exception_is_nested = false;
+    }
     env->exception_injected = events.exception.injected;
     env->exception_nr =
         (env->exception_pending || env->exception_injected) ?
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 5f83e8850a..7e765b6833 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -54,6 +54,7 @@ typedef struct KvmCpuidInfo {
 bool kvm_is_vm_type_supported(int type);
 bool kvm_has_adjust_clock_stable(void);
 bool kvm_has_exception_payload(void);
+bool kvm_has_exception_nested_flag(void);
 void kvm_synchronize_all_tsc(void);
 
 void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
diff --git a/target/i386/machine.c b/target/i386/machine.c
index dd2dac1d44..a452d2c97e 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -458,6 +458,7 @@ static const VMStateDescription vmstate_exception_info = {
         VMSTATE_UINT8(env.exception_injected, X86CPU),
         VMSTATE_UINT8(env.exception_has_payload, X86CPU),
         VMSTATE_UINT64(env.exception_payload, X86CPU),
+        VMSTATE_UINT8(env.exception_is_nested, X86CPU),
         VMSTATE_END_OF_LIST()
     }
 };

base-commit: 9e601684dc24a521bb1d23215a63e5c6e79ea0bb
-- 
2.50.1


Reply via email to