From: Ashok Raj <ashok....@intel.com> Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor barriers on switching between VMs to avoid inter VM specte-v2 attacks.
[peterz: rebase and changelog rewrite] [karahmed: - vmx: expose PRED_CMD whenever it is available - svm: only pass through IBPB if it is available] [dwmw2: - vmx: allow X86_FEATURE_AMD_PRED_CMD too] Cc: Asit Mallick <asit.k.mall...@intel.com> Cc: Dave Hansen <dave.han...@intel.com> Cc: Arjan Van De Ven <arjan.van.de....@intel.com> Cc: Tim Chen <tim.c.c...@linux.intel.com> Cc: Linus Torvalds <torva...@linux-foundation.org> Cc: Andrea Arcangeli <aarca...@redhat.com> Cc: Andi Kleen <a...@linux.intel.com> Cc: Thomas Gleixner <t...@linutronix.de> Cc: Dan Williams <dan.j.willi...@intel.com> Cc: Jun Nakajima <jun.nakaj...@intel.com> Cc: Andy Lutomirski <l...@kernel.org> Cc: Greg KH <gre...@linuxfoundation.org> Cc: Paolo Bonzini <pbonz...@redhat.com> Signed-off-by: Ashok Raj <ashok....@intel.com> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok....@intel.com Signed-off-by: David Woodhouse <d...@amazon.co.uk> Signed-off-by: KarimAllah Ahmed <karah...@amazon.de> --- arch/x86/kvm/svm.c | 14 ++++++++++++++ arch/x86/kvm/vmx.c | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2744b973..cfdb9ab 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -529,6 +529,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm) set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); } + + if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) + set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1); } static void add_msr_offset(u32 offset) @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } avic_vcpu_load(vcpu, cpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d1e25db..1e45bb3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -6791,6 +6792,16 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } + /* + * The AMD_PRED_CMD bit might be exposed by hypervisors on Intel + * chips which only want to expose PRED_CMD to guests and not + * SPEC_CTRL. Because PRED_CMD is one-shot write-only, while + * PRED_CMD requires storage, live migration support, etc. + */ + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false); + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); -- 2.7.4