From: Marc Orr <marc...@google.com>

commit acff78477b9b4f26ecdf65733a4ed77fe837e9dc upstream.

The nested_vmx_prepare_msr_bitmap() function doesn't directly guard the
x2APIC MSR intercepts with the "virtualize x2APIC mode" MSR. As a
result, we discovered the potential for a buggy or malicious L1 to get
access to L0's x2APIC MSRs, via an L2, as follows.

1. L1 executes WRMSR(IA32_SPEC_CTRL, 1). This causes the spec_ctrl
variable, in nested_vmx_prepare_msr_bitmap() to become true.
2. L1 disables "virtualize x2APIC mode" in VMCS12.
3. L1 enables "APIC-register virtualization" in VMCS12.

Now, KVM will set VMCS02's x2APIC MSR intercepts from VMCS12, and then
set "virtualize x2APIC mode" to 0 in VMCS02. Oops.

This patch closes the leak by explicitly guarding VMCS02's x2APIC MSR
intercepts with VMCS12's "virtualize x2APIC mode" control.

The scenario outlined above and fix prescribed here, were verified with
a related patch in kvm-unit-tests titled "Add leak scenario to
virt_x2apic_mode_test".

Note, it looks like this issue may have been introduced inadvertently
during a merge---see 15303ba5d1cd.

Signed-off-by: Marc Orr <marc...@google.com>
Reviewed-by: Jim Mattson <jmatt...@google.com>
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>

---
 arch/x86/kvm/vmx.c |   72 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 28 deletions(-)

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11582,6 +11582,17 @@ static int nested_vmx_check_tpr_shadow_c
        return 0;
 }
 
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+       int msr;
+
+       for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+               unsigned word = msr / BITS_PER_LONG;
+
+               msr_bitmap[word] = ~0;
+               msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+       }
+}
+
 /*
  * Merge L0's and L1's MSR bitmap, return false to indicate that
  * we do not use the hardware.
@@ -11623,39 +11634,44 @@ static inline bool nested_vmx_prepare_ms
                return false;
 
        msr_bitmap_l1 = (unsigned long *)kmap(page);
-       if (nested_cpu_has_apic_reg_virt(vmcs12)) {
-               /*
-                * L0 need not intercept reads for MSRs between 0x800 and 
0x8ff, it
-                * just lets the processor take the value from the virtual-APIC 
page;
-                * take those 256 bits directly from the L1 bitmap.
-                */
-               for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-                       unsigned word = msr / BITS_PER_LONG;
-                       msr_bitmap_l0[word] = msr_bitmap_l1[word];
-                       msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
-               }
-       } else {
-               for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-                       unsigned word = msr / BITS_PER_LONG;
-                       msr_bitmap_l0[word] = ~0;
-                       msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
-               }
-       }
 
-       nested_vmx_disable_intercept_for_msr(
-               msr_bitmap_l1, msr_bitmap_l0,
-               X2APIC_MSR(APIC_TASKPRI),
-               MSR_TYPE_W);
+       /*
+        * To keep the control flow simple, pay eight 8-byte writes (sixteen
+        * 4-byte writes on 32-bit systems) up front to enable intercepts for
+        * the x2APIC MSR range and selectively disable them below.
+        */
+       enable_x2apic_msr_intercepts(msr_bitmap_l0);
+
+       if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
+               if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+                       /*
+                        * L0 need not intercept reads for MSRs between 0x800
+                        * and 0x8ff, it just lets the processor take the value
+                        * from the virtual-APIC page; take those 256 bits
+                        * directly from the L1 bitmap.
+                        */
+                       for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+                               unsigned word = msr / BITS_PER_LONG;
+
+                               msr_bitmap_l0[word] = msr_bitmap_l1[word];
+                       }
+               }
 
-       if (nested_cpu_has_vid(vmcs12)) {
-               nested_vmx_disable_intercept_for_msr(
-                       msr_bitmap_l1, msr_bitmap_l0,
-                       X2APIC_MSR(APIC_EOI),
-                       MSR_TYPE_W);
                nested_vmx_disable_intercept_for_msr(
                        msr_bitmap_l1, msr_bitmap_l0,
-                       X2APIC_MSR(APIC_SELF_IPI),
+                       X2APIC_MSR(APIC_TASKPRI),
                        MSR_TYPE_W);
+
+               if (nested_cpu_has_vid(vmcs12)) {
+                       nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
+                               X2APIC_MSR(APIC_EOI),
+                               MSR_TYPE_W);
+                       nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
+                               X2APIC_MSR(APIC_SELF_IPI),
+                               MSR_TYPE_W);
+               }
        }
 
        if (spec_ctrl)


Reply via email to