Commit 5bea5123cbf0 ("KVM: VMX: check nested state and CR4.VMXE against
SMM") introduced a check to vmx_set_cr4() forbidding to set VMXE from SMM.
The check is correct, however, there is a special case when RSM is called
to leave SMM: rsm_enter_protected_mode() is called with HF_SMM_MASK still
set and in case VMXE was set before entering SMM we're failing to return.

Resolve the issue by temporary dropping HF_SMM_MASK around set_cr4() calls
when ops->set_cr() is called from RSM.

Reported-by: Jon Doron <ari...@gmail.com>
Suggested-by: Liran Alon <liran.a...@oracle.com>
Fixes: 5bea5123cbf0 ("KVM: VMX: check nested state and CR4.VMXE against SMM")
Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com>
---
- Instread of putting the temporary HF_SMM_MASK drop to
  rsm_enter_protected_mode() (as was suggested by Liran), move it to
  emulator_set_cr() modifying its interface. emulate.c seems to be
  vcpu-specifics-free at this moment, we may want to keep it this way.
- It seems that Hyper-V+UEFI on KVM is still broken, I'm observing sporadic
  hangs even with this patch. These hangs, however, seem to be unrelated to
  rsm.
---
 arch/x86/include/asm/kvm_emulate.h |  3 ++-
 arch/x86/kvm/emulate.c             | 27 ++++++++++++++-------------
 arch/x86/kvm/x86.c                 | 12 +++++++++++-
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index 93c4bf598fb0..6c33caa82fa5 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -203,7 +203,8 @@ struct x86_emulate_ops {
        void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
        void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
        ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
-       int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
+       int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val,
+                     bool from_rsm);
        int (*cpl)(struct x86_emulate_ctxt *ctxt);
        int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
        int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c338984c850d..a6204105d4d7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2413,7 +2413,7 @@ static int rsm_enter_protected_mode(struct 
x86_emulate_ctxt *ctxt,
                cr3 &= ~0xfff;
        }
 
-       bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+       bad = ctxt->ops->set_cr(ctxt, 3, cr3, true);
        if (bad)
                return X86EMUL_UNHANDLEABLE;
 
@@ -2422,20 +2422,20 @@ static int rsm_enter_protected_mode(struct 
x86_emulate_ctxt *ctxt,
         * Then enable protected mode.  However, PCID cannot be enabled
         * if EFER.LMA=0, so set it separately.
         */
-       bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+       bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE, true);
        if (bad)
                return X86EMUL_UNHANDLEABLE;
 
-       bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+       bad = ctxt->ops->set_cr(ctxt, 0, cr0, true);
        if (bad)
                return X86EMUL_UNHANDLEABLE;
 
        if (cr4 & X86_CR4_PCIDE) {
-               bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+               bad = ctxt->ops->set_cr(ctxt, 4, cr4, true);
                if (bad)
                        return X86EMUL_UNHANDLEABLE;
                if (pcid) {
-                       bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+                       bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid, true);
                        if (bad)
                                return X86EMUL_UNHANDLEABLE;
                }
@@ -2581,7 +2581,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 
                /* Zero CR4.PCIDE before CR0.PG.  */
                if (cr4 & X86_CR4_PCIDE) {
-                       ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+                       ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE, true);
                        cr4 &= ~X86_CR4_PCIDE;
                }
 
@@ -2595,11 +2595,12 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
        /* For the 64-bit case, this will clear EFER.LMA.  */
        cr0 = ctxt->ops->get_cr(ctxt, 0);
        if (cr0 & X86_CR0_PE)
-               ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+               ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE),
+                                 true);
 
        /* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
        if (cr4 & X86_CR4_PAE)
-               ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+               ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE, true);
 
        /* And finally go back to 32-bit mode.  */
        efer = 0;
@@ -3131,7 +3132,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt 
*ctxt,
        int ret;
        u8 cpl;
 
-       if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
+       if (ctxt->ops->set_cr(ctxt, 3, tss->cr3, false))
                return emulate_gp(ctxt, 0);
        ctxt->_eip = tss->eip;
        ctxt->eflags = tss->eflags | 2;
@@ -3331,7 +3332,7 @@ static int emulator_do_task_switch(struct 
x86_emulate_ctxt *ctxt,
                write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
        }
 
-       ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
+       ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS, false);
        ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
 
        if (has_error_code) {
@@ -3633,7 +3634,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
 
 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
 {
-       if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+       if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val, false))
                return emulate_gp(ctxt, 0);
 
        /* Disable writeback. */
@@ -3766,7 +3767,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
 
        cr0 = ctxt->ops->get_cr(ctxt, 0);
        cr0 &= ~X86_CR0_TS;
-       ctxt->ops->set_cr(ctxt, 0, cr0);
+       ctxt->ops->set_cr(ctxt, 0, cr0, false);
        return X86EMUL_CONTINUE;
 }
 
@@ -3866,7 +3867,7 @@ static int em_smsw(struct x86_emulate_ctxt *ctxt)
 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
 {
        ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
-                         | (ctxt->src.val & 0x0f));
+                         | (ctxt->src.val & 0x0f), false);
        ctxt->dst.type = OP_NONE;
        return X86EMUL_CONTINUE;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a419656521b6..f2745e3170b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5739,7 +5739,8 @@ static unsigned long emulator_get_cr(struct 
x86_emulate_ctxt *ctxt, int cr)
        return value;
 }
 
-static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
+static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val,
+                          bool from_rsm)
 {
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
        int res = 0;
@@ -5755,7 +5756,16 @@ static int emulator_set_cr(struct x86_emulate_ctxt 
*ctxt, int cr, ulong val)
                res = kvm_set_cr3(vcpu, val);
                break;
        case 4:
+               /*
+                * set_cr4() may forbid to set certain flags (e.g. VMXE) from
+                * SMM but we're actually leaving it; temporary drop HF_SMM_MASK
+                * when setting CR4.
+                */
+               if (from_rsm)
+                       vcpu->arch.hflags &= ~HF_SMM_MASK;
                res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
+               if (from_rsm)
+                       vcpu->arch.hflags |= HF_SMM_MASK;
                break;
        case 8:
                res = kvm_set_cr8(vcpu, val);
-- 
2.20.1

Reply via email to