4.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Paolo Bonzini <pbonz...@redhat.com>

commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6 upstream.

Place the MSR bitmap in struct loaded_vmcs, and update it in place
every time the x2apic or APICv state can change.  This is rare and
the loop can handle 64 MSRs per iteration, in a similar fashion as
nested_vmx_prepare_msr_bitmap.

This prepares for choosing, on a per-VM basis, whether to intercept
the SPEC_CTRL and PRED_CMD MSRs.

Suggested-by: Jim Mattson <jmatt...@google.com>
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
Signed-off-by: David Woodhouse <d...@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
[bwh: Backported to 4.4:
 - APICv support looked different
 - We still need to intercept the APIC_ID MSR
 - Adjust context]
Signed-off-by: Ben Hutchings <ben.hutchi...@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
---
 arch/x86/kvm/vmx.c |  254 +++++++++++++++++++++++------------------------------
 1 file changed, 112 insertions(+), 142 deletions(-)

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -109,6 +109,14 @@ static u64 __read_mostly host_xss;
 static bool __read_mostly enable_pml = 1;
 module_param_named(pml, enable_pml, bool, S_IRUGO);
 
+#define MSR_TYPE_R     1
+#define MSR_TYPE_W     2
+#define MSR_TYPE_RW    3
+
+#define MSR_BITMAP_MODE_X2APIC         1
+#define MSR_BITMAP_MODE_X2APIC_APICV   2
+#define MSR_BITMAP_MODE_LM             4
+
 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
 
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
@@ -188,6 +196,7 @@ struct loaded_vmcs {
        struct vmcs *vmcs;
        int cpu;
        int launched;
+       unsigned long *msr_bitmap;
        struct list_head loaded_vmcss_on_cpu_link;
 };
 
@@ -423,8 +432,6 @@ struct nested_vmx {
        u16 posted_intr_nv;
        u64 msr_ia32_feature_control;
 
-       unsigned long *msr_bitmap;
-
        struct hrtimer preemption_timer;
        bool preemption_timer_expired;
 
@@ -525,6 +532,7 @@ struct vcpu_vmx {
        unsigned long         host_rsp;
        u8                    fail;
        bool                  nmi_known_unmasked;
+       u8                    msr_bitmap_mode;
        u32                   exit_intr_info;
        u32                   idt_vectoring_info;
        ulong                 rflags;
@@ -883,6 +891,7 @@ static void vmx_sync_pir_to_irr_dummy(st
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
 static int alloc_identity_pagetable(struct kvm *kvm);
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -902,10 +911,6 @@ static DEFINE_PER_CPU(spinlock_t, blocke
 
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
 static unsigned long *vmx_vmread_bitmap;
 static unsigned long *vmx_vmwrite_bitmap;
 
@@ -2346,27 +2351,6 @@ static void move_msr_up(struct vcpu_vmx
        vmx->guest_msrs[from] = tmp;
 }
 
-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
-{
-       unsigned long *msr_bitmap;
-
-       if (is_guest_mode(vcpu))
-               msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
-       else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
-               if (is_long_mode(vcpu))
-                       msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
-               else
-                       msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
-       } else {
-               if (is_long_mode(vcpu))
-                       msr_bitmap = vmx_msr_bitmap_longmode;
-               else
-                       msr_bitmap = vmx_msr_bitmap_legacy;
-       }
-
-       vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
-}
-
 /*
  * Set up the vmcs to automatically save and restore system
  * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
@@ -2407,7 +2391,7 @@ static void setup_msrs(struct vcpu_vmx *
        vmx->save_nmsrs = save_nmsrs;
 
        if (cpu_has_vmx_msr_bitmap())
-               vmx_set_msr_bitmap(&vmx->vcpu);
+               vmx_update_msr_bitmap(&vmx->vcpu);
 }
 
 /*
@@ -3360,6 +3344,8 @@ static void free_loaded_vmcs(struct load
        loaded_vmcs_clear(loaded_vmcs);
        free_vmcs(loaded_vmcs->vmcs);
        loaded_vmcs->vmcs = NULL;
+       if (loaded_vmcs->msr_bitmap)
+               free_page((unsigned long)loaded_vmcs->msr_bitmap);
 }
 
 static struct vmcs *alloc_vmcs(void)
@@ -3374,7 +3360,18 @@ static int alloc_loaded_vmcs(struct load
                return -ENOMEM;
 
        loaded_vmcs_init(loaded_vmcs);
+
+       if (cpu_has_vmx_msr_bitmap()) {
+               loaded_vmcs->msr_bitmap = (unsigned long 
*)__get_free_page(GFP_KERNEL);
+               if (!loaded_vmcs->msr_bitmap)
+                       goto out_vmcs;
+               memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+       }
        return 0;
+
+out_vmcs:
+       free_loaded_vmcs(loaded_vmcs);
+       return -ENOMEM;
 }
 
 static void free_kvm_area(void)
@@ -4373,10 +4370,8 @@ static void free_vpid(int vpid)
        spin_unlock(&vmx_vpid_lock);
 }
 
-#define MSR_TYPE_R     1
-#define MSR_TYPE_W     2
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
-                                               u32 msr, int type)
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
*msr_bitmap,
+                                                         u32 msr, int type)
 {
        int f = sizeof(unsigned long);
 
@@ -4410,8 +4405,8 @@ static void __vmx_disable_intercept_for_
        }
 }
 
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
-                                               u32 msr, int type)
+static void __always_inline vmx_enable_intercept_for_msr(unsigned long 
*msr_bitmap,
+                                                        u32 msr, int type)
 {
        int f = sizeof(unsigned long);
 
@@ -4491,37 +4486,76 @@ static void nested_vmx_disable_intercept
        }
 }
 
-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+static void __always_inline vmx_set_intercept_for_msr(unsigned long 
*msr_bitmap,
+                                                     u32 msr, int type, bool 
value)
 {
-       if (!longmode_only)
-               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
-                                               msr, MSR_TYPE_R | MSR_TYPE_W);
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
-                                               msr, MSR_TYPE_R | MSR_TYPE_W);
+       if (value)
+               vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
+       else
+               vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
 }
 
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
+static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
 {
-       __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                       msr, MSR_TYPE_R);
-       __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                       msr, MSR_TYPE_R);
+       u8 mode = 0;
+
+       if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
+               mode |= MSR_BITMAP_MODE_X2APIC;
+               if (enable_apicv)
+                       mode |= MSR_BITMAP_MODE_X2APIC_APICV;
+       }
+
+       if (is_long_mode(vcpu))
+               mode |= MSR_BITMAP_MODE_LM;
+
+       return mode;
 }
 
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
+#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
+
+static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
+                                        u8 mode)
 {
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                       msr, MSR_TYPE_R);
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                       msr, MSR_TYPE_R);
+       int msr;
+
+       for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+               unsigned word = msr / BITS_PER_LONG;
+               msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : 
~0;
+               msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+       }
+
+       if (mode & MSR_BITMAP_MODE_X2APIC) {
+               /*
+                * TPR reads and writes can be virtualized even if virtual 
interrupt
+                * delivery is not in use.
+                */
+               vmx_disable_intercept_for_msr(msr_bitmap, 
X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
+               if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
+                       vmx_enable_intercept_for_msr(msr_bitmap, 
X2APIC_MSR(APIC_ID), MSR_TYPE_R);
+                       vmx_enable_intercept_for_msr(msr_bitmap, 
X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
+                       vmx_disable_intercept_for_msr(msr_bitmap, 
X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
+                       vmx_disable_intercept_for_msr(msr_bitmap, 
X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
+               }
+       }
 }
 
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
 {
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                       msr, MSR_TYPE_W);
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                       msr, MSR_TYPE_W);
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+       u8 mode = vmx_msr_bitmap_mode(vcpu);
+       u8 changed = mode ^ vmx->msr_bitmap_mode;
+
+       if (!changed)
+               return;
+
+       vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
+                                 !(mode & MSR_BITMAP_MODE_LM));
+
+       if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
+               vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
+
+       vmx->msr_bitmap_mode = mode;
 }
 
 static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
@@ -4842,7 +4876,7 @@ static int vmx_vcpu_setup(struct vcpu_vm
                vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
        }
        if (cpu_has_vmx_msr_bitmap())
-               vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
+               vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
 
        vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
@@ -6183,7 +6217,7 @@ static void wakeup_handler(void)
 
 static __init int hardware_setup(void)
 {
-       int r = -ENOMEM, i, msr;
+       int r = -ENOMEM, i;
 
        rdmsrl_safe(MSR_EFER, &host_efer);
 
@@ -6198,31 +6232,13 @@ static __init int hardware_setup(void)
        if (!vmx_io_bitmap_b)
                goto out;
 
-       vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy)
-               goto out1;
-
-       vmx_msr_bitmap_legacy_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy_x2apic)
-               goto out2;
-
-       vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode)
-               goto out3;
-
-       vmx_msr_bitmap_longmode_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode_x2apic)
-               goto out4;
-
        vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmread_bitmap)
-               goto out6;
+               goto out1;
 
        vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmwrite_bitmap)
-               goto out7;
+               goto out2;
 
        memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
        memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@ -6231,12 +6247,9 @@ static __init int hardware_setup(void)
 
        memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
 
-       memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-       memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
-               goto out8;
+               goto out3;
        }
 
        if (boot_cpu_has(X86_FEATURE_NX))
@@ -6302,38 +6315,8 @@ static __init int hardware_setup(void)
                kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
        }
 
-       vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-
-       memcpy(vmx_msr_bitmap_legacy_x2apic,
-                       vmx_msr_bitmap_legacy, PAGE_SIZE);
-       memcpy(vmx_msr_bitmap_longmode_x2apic,
-                       vmx_msr_bitmap_longmode, PAGE_SIZE);
-
        set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
-       if (enable_apicv) {
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       vmx_disable_intercept_msr_read_x2apic(msr);
-
-               /* According SDM, in x2apic mode, the whole id reg is used.
-                * But in KVM, it only use the highest eight bits. Need to
-                * intercept it */
-               vmx_enable_intercept_msr_read_x2apic(0x802);
-               /* TMCCT */
-               vmx_enable_intercept_msr_read_x2apic(0x839);
-               /* TPR */
-               vmx_disable_intercept_msr_write_x2apic(0x808);
-               /* EOI */
-               vmx_disable_intercept_msr_write_x2apic(0x80b);
-               /* SELF-IPI */
-               vmx_disable_intercept_msr_write_x2apic(0x83f);
-       }
-
        if (enable_ept) {
                kvm_mmu_set_mask_ptes(0ull,
                        (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
@@ -6364,18 +6347,10 @@ static __init int hardware_setup(void)
 
        return alloc_kvm_area();
 
-out8:
-       free_page((unsigned long)vmx_vmwrite_bitmap);
-out7:
-       free_page((unsigned long)vmx_vmread_bitmap);
-out6:
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
 out3:
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+       free_page((unsigned long)vmx_vmwrite_bitmap);
 out2:
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
+       free_page((unsigned long)vmx_vmread_bitmap);
 out1:
        free_page((unsigned long)vmx_io_bitmap_b);
 out:
@@ -6386,10 +6361,6 @@ out:
 
 static __exit void hardware_unsetup(void)
 {
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
        free_page((unsigned long)vmx_io_bitmap_b);
        free_page((unsigned long)vmx_io_bitmap_a);
        free_page((unsigned long)vmx_vmwrite_bitmap);
@@ -6753,13 +6724,6 @@ static int handle_vmon(struct kvm_vcpu *
        if (r < 0)
                goto out_vmcs02;
 
-       if (cpu_has_vmx_msr_bitmap()) {
-               vmx->nested.msr_bitmap =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-               if (!vmx->nested.msr_bitmap)
-                       goto out_msr_bitmap;
-       }
-
        if (enable_shadow_vmcs) {
                shadow_vmcs = alloc_vmcs();
                if (!shadow_vmcs)
@@ -6784,9 +6748,6 @@ static int handle_vmon(struct kvm_vcpu *
        return 1;
 
 out_shadow_vmcs:
-       free_page((unsigned long)vmx->nested.msr_bitmap);
-
-out_msr_bitmap:
        free_loaded_vmcs(&vmx->nested.vmcs02);
 
 out_vmcs02:
@@ -6860,10 +6821,6 @@ static void free_nested(struct vcpu_vmx
        vmx->nested.vmxon = false;
        free_vpid(vmx->nested.vpid02);
        nested_release_vmcs12(vmx);
-       if (vmx->nested.msr_bitmap) {
-               free_page((unsigned long)vmx->nested.msr_bitmap);
-               vmx->nested.msr_bitmap = NULL;
-       }
        if (enable_shadow_vmcs)
                free_vmcs(vmx->nested.current_shadow_vmcs);
        /* Unpin physical memory we referred to in the vmcs02 */
@@ -8200,7 +8157,7 @@ static void vmx_set_virtual_x2apic_mode(
        }
        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
 
-       vmx_set_msr_bitmap(vcpu);
+       vmx_update_msr_bitmap(vcpu);
 }
 
 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
@@ -8780,6 +8737,7 @@ static struct kvm_vcpu *vmx_create_vcpu(
 {
        int err;
        struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+       unsigned long *msr_bitmap;
        int cpu;
 
        if (!vmx)
@@ -8820,6 +8778,15 @@ static struct kvm_vcpu *vmx_create_vcpu(
        if (err < 0)
                goto free_msrs;
 
+       msr_bitmap = vmx->vmcs01.msr_bitmap;
+       vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, 
MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, 
MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, 
MSR_TYPE_RW);
+       vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, 
MSR_TYPE_RW);
+       vmx->msr_bitmap_mode = 0;
+
        vmx->loaded_vmcs = &vmx->vmcs01;
        cpu = get_cpu();
        vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -9204,7 +9171,7 @@ static inline bool nested_vmx_merge_msr_
        int msr;
        struct page *page;
        unsigned long *msr_bitmap_l1;
-       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
 
        /* This shortcut is ok because we support only x2APIC MSRs so far. */
        if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
@@ -9715,6 +9682,9 @@ static void prepare_vmcs02(struct kvm_vc
        else
                vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
 
+       if (cpu_has_vmx_msr_bitmap())
+               vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+
        if (enable_vpid) {
                /*
                 * There is no direct mapping between vpid02 and vpid12, the
@@ -10415,7 +10385,7 @@ static void load_vmcs12_host_state(struc
        vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 
        if (cpu_has_vmx_msr_bitmap())
-               vmx_set_msr_bitmap(vcpu);
+               vmx_update_msr_bitmap(vcpu);
 
        if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
                                vmcs12->vm_exit_msr_load_count))


Reply via email to