Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 1:49 PM, Daniel Kiperwrote: > On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote: >> On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote: >> > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: >> > > >> > > Windows use IBRS and Microsoft don't have any plans to switch to >> > > retpoline. >> > > Running a Windows guest should be a pretty common use-case no? >> > > >> > > In addition, your handle of the first WRMSR intercept could be different. >> > > It could signal you to start doing the following: >> > > 1. Disable intercept on SPEC_CTRL MSR. >> > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. >> > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. >> > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) >> > > >> > > That way, you will both have fastest option as long as guest don't use >> > > IBRS >> > > and also won't have the 3% performance hit compared to Konrad's proposal. >> > > >> > > Am I missing something? >> > >> > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part >> > of the 3% speedup you observe is because in the above, the vmentry path >> > doesn't need to *read* the host's value and store it; the host is >> > expected to restore it for itself anyway? >> >> Yes for at least the purpose of correctness. That is based on what I have >> heard is that you when you transition to a higher ring you have to write 1, >> then write zero >> when you transition back to lower rings. That is it is like a knob. >> >> But then I heard that on some CPUs it is more like reset button and >> just writting 1 to IBRS is fine. But again, correctness here. >> >> > >> > I'd actually quite like to repeat the benchmark on the new fixed >> > microcode, if anyone has it yet, to see if that read/swap slowness is >> > still quite as excessive. I'm certainly not ruling this out, but I'm >> > just a little wary of premature optimisation, and I'd like to make sure >> > we have everything *else* in the KVM patches right first. >> > >> > The fact that the save-and-restrict macros I have in the tip of my >> > working tree at the moment are horrid and causing 0-day nastygrams, >> > probably doesn't help persuade me to favour the approach ;) >> > >> > ... hm, the CPU actually has separate MSR save/restore lists for >> > entry/exit, doesn't it? Is there any way to sanely make use of that and >> > do the restoration manually on vmentry but let it be automatic on >> > vmexit, by having it *only* in the guest's MSR-store area to be saved >> > on exit and restored on exit, but *not* in the host's MSR-store area? > > s/on exit and restored on exit/on exit and restored on entry/? > > Additionally, AIUI there is no "host's MSR-store area". > >> Oh. That sounds sounds interesting > > That is possible but you have to split add_atomic_switch_msr() accordingly. > >> > Reading the code and comparing with the SDM, I can't see where we're >> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested >> > case... >> >> Right. We (well Daniel Kiper, CC-ed) implemented it for this and >> that is where we got the numbers. >> >> Daniel, you OK posting it here? Granted with the caveats thta it won't even >> compile against upstream as it was based on a distro kernel. > > Please look below... > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index aa9bc4f..e7c0f8b 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO); > > extern const ulong vmx_return; > > -#define NR_AUTOLOAD_MSRS 8 > -#define VMCS02_POOL_SIZE 1 > +#define NR_AUTOLOAD_MSRS 8 > +#define NR_AUTOSTORE_MSRS NR_AUTOLOAD_MSRS > + > +#define VMCS02_POOL_SIZE 1 I think you accidentally resurrected VMCS02_POOL_SIZE. > struct vmcs { > u32 revision_id; > @@ -504,6 +506,10 @@ struct vcpu_vmx { > struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; > struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; > } msr_autoload; > + struct msr_autostore { > + unsigned nr; > + struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS]; > + } msr_autostore; > struct { > int loaded; > u16 fs_sel, gs_sel, ldt_sel; > @@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx > *vmx, unsigned msr, > m->host[i].value = host_val; > } > > +static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr) > +{ > + unsigned i; > + struct msr_autostore *m = >msr_autostore; > + > + for (i = 0; i < m->nr; ++i) > + if (m->guest[i].index == msr) > + return; > + > + if (i == NR_AUTOSTORE_MSRS) { > + pr_err("Not enough msr store entries. Can't add msr %x\n", > msr); > + BUG(); I wouldn't panic the host for this.
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 1:49 PM, Daniel Kiper wrote: > On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote: >> On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote: >> > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: >> > > >> > > Windows use IBRS and Microsoft don't have any plans to switch to >> > > retpoline. >> > > Running a Windows guest should be a pretty common use-case no? >> > > >> > > In addition, your handle of the first WRMSR intercept could be different. >> > > It could signal you to start doing the following: >> > > 1. Disable intercept on SPEC_CTRL MSR. >> > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. >> > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. >> > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) >> > > >> > > That way, you will both have fastest option as long as guest don't use >> > > IBRS >> > > and also won't have the 3% performance hit compared to Konrad's proposal. >> > > >> > > Am I missing something? >> > >> > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part >> > of the 3% speedup you observe is because in the above, the vmentry path >> > doesn't need to *read* the host's value and store it; the host is >> > expected to restore it for itself anyway? >> >> Yes for at least the purpose of correctness. That is based on what I have >> heard is that you when you transition to a higher ring you have to write 1, >> then write zero >> when you transition back to lower rings. That is it is like a knob. >> >> But then I heard that on some CPUs it is more like reset button and >> just writting 1 to IBRS is fine. But again, correctness here. >> >> > >> > I'd actually quite like to repeat the benchmark on the new fixed >> > microcode, if anyone has it yet, to see if that read/swap slowness is >> > still quite as excessive. I'm certainly not ruling this out, but I'm >> > just a little wary of premature optimisation, and I'd like to make sure >> > we have everything *else* in the KVM patches right first. >> > >> > The fact that the save-and-restrict macros I have in the tip of my >> > working tree at the moment are horrid and causing 0-day nastygrams, >> > probably doesn't help persuade me to favour the approach ;) >> > >> > ... hm, the CPU actually has separate MSR save/restore lists for >> > entry/exit, doesn't it? Is there any way to sanely make use of that and >> > do the restoration manually on vmentry but let it be automatic on >> > vmexit, by having it *only* in the guest's MSR-store area to be saved >> > on exit and restored on exit, but *not* in the host's MSR-store area? > > s/on exit and restored on exit/on exit and restored on entry/? > > Additionally, AIUI there is no "host's MSR-store area". > >> Oh. That sounds sounds interesting > > That is possible but you have to split add_atomic_switch_msr() accordingly. > >> > Reading the code and comparing with the SDM, I can't see where we're >> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested >> > case... >> >> Right. We (well Daniel Kiper, CC-ed) implemented it for this and >> that is where we got the numbers. >> >> Daniel, you OK posting it here? Granted with the caveats thta it won't even >> compile against upstream as it was based on a distro kernel. > > Please look below... > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index aa9bc4f..e7c0f8b 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO); > > extern const ulong vmx_return; > > -#define NR_AUTOLOAD_MSRS 8 > -#define VMCS02_POOL_SIZE 1 > +#define NR_AUTOLOAD_MSRS 8 > +#define NR_AUTOSTORE_MSRS NR_AUTOLOAD_MSRS > + > +#define VMCS02_POOL_SIZE 1 I think you accidentally resurrected VMCS02_POOL_SIZE. > struct vmcs { > u32 revision_id; > @@ -504,6 +506,10 @@ struct vcpu_vmx { > struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; > struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; > } msr_autoload; > + struct msr_autostore { > + unsigned nr; > + struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS]; > + } msr_autostore; > struct { > int loaded; > u16 fs_sel, gs_sel, ldt_sel; > @@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx > *vmx, unsigned msr, > m->host[i].value = host_val; > } > > +static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr) > +{ > + unsigned i; > + struct msr_autostore *m = >msr_autostore; > + > + for (i = 0; i < m->nr; ++i) > + if (m->guest[i].index == msr) > + return; > + > + if (i == NR_AUTOSTORE_MSRS) { > + pr_err("Not enough msr store entries. Can't add msr %x\n", > msr); > + BUG(); I wouldn't panic the host for this. add_atomic_switch_msr() just emits a
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote: > On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote: > > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: > > > > > > Windows use IBRS and Microsoft don't have any plans to switch to > > > retpoline. > > > Running a Windows guest should be a pretty common use-case no? > > > > > > In addition, your handle of the first WRMSR intercept could be different. > > > It could signal you to start doing the following: > > > 1. Disable intercept on SPEC_CTRL MSR. > > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. > > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. > > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) > > > > > > That way, you will both have fastest option as long as guest don't use > > > IBRS > > > and also won't have the 3% performance hit compared to Konrad's proposal. > > > > > > Am I missing something? > > > > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part > > of the 3% speedup you observe is because in the above, the vmentry path > > doesn't need to *read* the host's value and store it; the host is > > expected to restore it for itself anyway? > > Yes for at least the purpose of correctness. That is based on what I have > heard is that you when you transition to a higher ring you have to write 1, > then write zero > when you transition back to lower rings. That is it is like a knob. > > But then I heard that on some CPUs it is more like reset button and > just writting 1 to IBRS is fine. But again, correctness here. > > > > > I'd actually quite like to repeat the benchmark on the new fixed > > microcode, if anyone has it yet, to see if that read/swap slowness is > > still quite as excessive. I'm certainly not ruling this out, but I'm > > just a little wary of premature optimisation, and I'd like to make sure > > we have everything *else* in the KVM patches right first. > > > > The fact that the save-and-restrict macros I have in the tip of my > > working tree at the moment are horrid and causing 0-day nastygrams, > > probably doesn't help persuade me to favour the approach ;) > > > > ... hm, the CPU actually has separate MSR save/restore lists for > > entry/exit, doesn't it? Is there any way to sanely make use of that and > > do the restoration manually on vmentry but let it be automatic on > > vmexit, by having it *only* in the guest's MSR-store area to be saved > > on exit and restored on exit, but *not* in the host's MSR-store area? s/on exit and restored on exit/on exit and restored on entry/? Additionally, AIUI there is no "host's MSR-store area". > Oh. That sounds sounds interesting That is possible but you have to split add_atomic_switch_msr() accordingly. > > Reading the code and comparing with the SDM, I can't see where we're > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > > case... > > Right. We (well Daniel Kiper, CC-ed) implemented it for this and > that is where we got the numbers. > > Daniel, you OK posting it here? Granted with the caveats thta it won't even > compile against upstream as it was based on a distro kernel. Please look below... diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa9bc4f..e7c0f8b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; -#define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 +#define NR_AUTOLOAD_MSRS 8 +#define NR_AUTOSTORE_MSRS NR_AUTOLOAD_MSRS + +#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -504,6 +506,10 @@ struct vcpu_vmx { struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; } msr_autoload; + struct msr_autostore { + unsigned nr; + struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS]; + } msr_autostore; struct { int loaded; u16 fs_sel, gs_sel, ldt_sel; @@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr) +{ + unsigned i; + struct msr_autostore *m = >msr_autostore; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + return; + + if (i == NR_AUTOSTORE_MSRS) { + pr_err("Not enough msr store entries. Can't add msr %x\n", msr); + BUG(); + } + + m->guest[i].index = msr; + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, ++m->nr); +} + +static u64 get_msr_vmcs_store(struct vcpu_vmx *vmx, unsigned msr) +{ + unsigned i; + struct msr_autostore *m = >msr_autostore; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) +
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote: > On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote: > > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: > > > > > > Windows use IBRS and Microsoft don't have any plans to switch to > > > retpoline. > > > Running a Windows guest should be a pretty common use-case no? > > > > > > In addition, your handle of the first WRMSR intercept could be different. > > > It could signal you to start doing the following: > > > 1. Disable intercept on SPEC_CTRL MSR. > > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. > > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. > > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) > > > > > > That way, you will both have fastest option as long as guest don't use > > > IBRS > > > and also won't have the 3% performance hit compared to Konrad's proposal. > > > > > > Am I missing something? > > > > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part > > of the 3% speedup you observe is because in the above, the vmentry path > > doesn't need to *read* the host's value and store it; the host is > > expected to restore it for itself anyway? > > Yes for at least the purpose of correctness. That is based on what I have > heard is that you when you transition to a higher ring you have to write 1, > then write zero > when you transition back to lower rings. That is it is like a knob. > > But then I heard that on some CPUs it is more like reset button and > just writting 1 to IBRS is fine. But again, correctness here. > > > > > I'd actually quite like to repeat the benchmark on the new fixed > > microcode, if anyone has it yet, to see if that read/swap slowness is > > still quite as excessive. I'm certainly not ruling this out, but I'm > > just a little wary of premature optimisation, and I'd like to make sure > > we have everything *else* in the KVM patches right first. > > > > The fact that the save-and-restrict macros I have in the tip of my > > working tree at the moment are horrid and causing 0-day nastygrams, > > probably doesn't help persuade me to favour the approach ;) > > > > ... hm, the CPU actually has separate MSR save/restore lists for > > entry/exit, doesn't it? Is there any way to sanely make use of that and > > do the restoration manually on vmentry but let it be automatic on > > vmexit, by having it *only* in the guest's MSR-store area to be saved > > on exit and restored on exit, but *not* in the host's MSR-store area? s/on exit and restored on exit/on exit and restored on entry/? Additionally, AIUI there is no "host's MSR-store area". > Oh. That sounds sounds interesting That is possible but you have to split add_atomic_switch_msr() accordingly. > > Reading the code and comparing with the SDM, I can't see where we're > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > > case... > > Right. We (well Daniel Kiper, CC-ed) implemented it for this and > that is where we got the numbers. > > Daniel, you OK posting it here? Granted with the caveats thta it won't even > compile against upstream as it was based on a distro kernel. Please look below... diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa9bc4f..e7c0f8b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; -#define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 +#define NR_AUTOLOAD_MSRS 8 +#define NR_AUTOSTORE_MSRS NR_AUTOLOAD_MSRS + +#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -504,6 +506,10 @@ struct vcpu_vmx { struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; } msr_autoload; + struct msr_autostore { + unsigned nr; + struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS]; + } msr_autostore; struct { int loaded; u16 fs_sel, gs_sel, ldt_sel; @@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr) +{ + unsigned i; + struct msr_autostore *m = >msr_autostore; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + return; + + if (i == NR_AUTOSTORE_MSRS) { + pr_err("Not enough msr store entries. Can't add msr %x\n", msr); + BUG(); + } + + m->guest[i].index = msr; + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, ++m->nr); +} + +static u64 get_msr_vmcs_store(struct vcpu_vmx *vmx, unsigned msr) +{ + unsigned i; + struct msr_autostore *m = >msr_autostore; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) +
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 11:16 AM, Konrad Rzeszutek Wilkwrote: > On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote: >> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed >> wrote: >> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >> > guests >> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using >> > a >> > retpoline+IBPB based approach. >> > >> > To avoid the overhead of atomically saving and restoring the >> > MSR_IA32_SPEC_CTRL >> > for guests that do not actually use the MSR, only add_atomic_switch_msr >> > when a >> > non-zero is written to it. >> > >> > Cc: Asit Mallick >> > Cc: Arjan Van De Ven >> > Cc: Dave Hansen >> > Cc: Andi Kleen >> > Cc: Andrea Arcangeli >> > Cc: Linus Torvalds >> > Cc: Tim Chen >> > Cc: Thomas Gleixner >> > Cc: Dan Williams >> > Cc: Jun Nakajima >> > Cc: Paolo Bonzini >> > Cc: David Woodhouse >> > Cc: Greg KH >> > Cc: Andy Lutomirski >> > Signed-off-by: KarimAllah Ahmed >> > Signed-off-by: Ashok Raj >> > --- >> > arch/x86/kvm/cpuid.c | 4 +++- >> > arch/x86/kvm/cpuid.h | 1 + >> > arch/x86/kvm/vmx.c | 63 >> > >> > 3 files changed, 67 insertions(+), 1 deletion(-) >> > >> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> > index 0099e10..dc78095 100644 >> > --- a/arch/x86/kvm/cpuid.c >> > +++ b/arch/x86/kvm/cpuid.c >> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) >> > /* These are scattered features in cpufeatures.h. */ >> > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 >> > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 >> > +#define KVM_CPUID_BIT_SPEC_CTRL 26 >> > #define KF(x) bit(KVM_CPUID_BIT_##x) >> > >> > int kvm_update_cpuid(struct kvm_vcpu *vcpu) >> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct >> > kvm_cpuid_entry2 *entry, u32 function, >> > >> > /* cpuid 7.0.edx*/ >> > const u32 kvm_cpuid_7_0_edx_x86_features = >> > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); >> > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ >> > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); >> >> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to >> pass through for existing CPUs (26 and 27)? >> >> > >> > /* all calls to cpuid_count() should be made on the same cpu */ >> > get_cpu(); >> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h >> > index cdc70a3..dcfe227 100644 >> > --- a/arch/x86/kvm/cpuid.h >> > +++ b/arch/x86/kvm/cpuid.h >> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { >> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, >> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, >> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, >> > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, >> > }; >> > >> > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned >> > x86_feature) >> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> > index aa8638a..1b743a0 100644 >> > --- a/arch/x86/kvm/vmx.c >> > +++ b/arch/x86/kvm/vmx.c >> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, >> > bool masked); >> > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, >> > u16 error_code); >> > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); >> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long >> > *msr_bitmap, >> > + u32 msr, int >> > type); >> > + >> > >> > static DEFINE_PER_CPU(struct vmcs *, vmxarea); >> > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); >> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx >> > *vmx, unsigned msr, >> > m->host[i].value = host_val; >> > } >> > >> > +/* do not touch guest_val and host_val if the msr is not found */ >> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, >> > + u64 *guest_val, u64 *host_val) >> > +{ >> > + unsigned i; >> > + struct msr_autoload *m = >msr_autoload; >> > + >> > + for (i = 0; i < m->nr; ++i) >> > + if (m->guest[i].index == msr) >> > + break; >> > + >> > + if (i == m->nr) >> > + return 1; >> > + >> > + if (guest_val) >> > + *guest_val = m->guest[i].value; >> > + if (host_val) >> > +
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 11:16 AM, Konrad Rzeszutek Wilk wrote: > On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote: >> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed >> wrote: >> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >> > guests >> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using >> > a >> > retpoline+IBPB based approach. >> > >> > To avoid the overhead of atomically saving and restoring the >> > MSR_IA32_SPEC_CTRL >> > for guests that do not actually use the MSR, only add_atomic_switch_msr >> > when a >> > non-zero is written to it. >> > >> > Cc: Asit Mallick >> > Cc: Arjan Van De Ven >> > Cc: Dave Hansen >> > Cc: Andi Kleen >> > Cc: Andrea Arcangeli >> > Cc: Linus Torvalds >> > Cc: Tim Chen >> > Cc: Thomas Gleixner >> > Cc: Dan Williams >> > Cc: Jun Nakajima >> > Cc: Paolo Bonzini >> > Cc: David Woodhouse >> > Cc: Greg KH >> > Cc: Andy Lutomirski >> > Signed-off-by: KarimAllah Ahmed >> > Signed-off-by: Ashok Raj >> > --- >> > arch/x86/kvm/cpuid.c | 4 +++- >> > arch/x86/kvm/cpuid.h | 1 + >> > arch/x86/kvm/vmx.c | 63 >> > >> > 3 files changed, 67 insertions(+), 1 deletion(-) >> > >> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> > index 0099e10..dc78095 100644 >> > --- a/arch/x86/kvm/cpuid.c >> > +++ b/arch/x86/kvm/cpuid.c >> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) >> > /* These are scattered features in cpufeatures.h. */ >> > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 >> > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 >> > +#define KVM_CPUID_BIT_SPEC_CTRL 26 >> > #define KF(x) bit(KVM_CPUID_BIT_##x) >> > >> > int kvm_update_cpuid(struct kvm_vcpu *vcpu) >> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct >> > kvm_cpuid_entry2 *entry, u32 function, >> > >> > /* cpuid 7.0.edx*/ >> > const u32 kvm_cpuid_7_0_edx_x86_features = >> > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); >> > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ >> > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); >> >> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to >> pass through for existing CPUs (26 and 27)? >> >> > >> > /* all calls to cpuid_count() should be made on the same cpu */ >> > get_cpu(); >> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h >> > index cdc70a3..dcfe227 100644 >> > --- a/arch/x86/kvm/cpuid.h >> > +++ b/arch/x86/kvm/cpuid.h >> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { >> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, >> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, >> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, >> > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, >> > }; >> > >> > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned >> > x86_feature) >> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> > index aa8638a..1b743a0 100644 >> > --- a/arch/x86/kvm/vmx.c >> > +++ b/arch/x86/kvm/vmx.c >> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, >> > bool masked); >> > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, >> > u16 error_code); >> > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); >> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long >> > *msr_bitmap, >> > + u32 msr, int >> > type); >> > + >> > >> > static DEFINE_PER_CPU(struct vmcs *, vmxarea); >> > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); >> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx >> > *vmx, unsigned msr, >> > m->host[i].value = host_val; >> > } >> > >> > +/* do not touch guest_val and host_val if the msr is not found */ >> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, >> > + u64 *guest_val, u64 *host_val) >> > +{ >> > + unsigned i; >> > + struct msr_autoload *m = >msr_autoload; >> > + >> > + for (i = 0; i < m->nr; ++i) >> > + if (m->guest[i].index == msr) >> > + break; >> > + >> > + if (i == m->nr) >> > + return 1; >> > + >> > + if (guest_val) >> > + *guest_val = m->guest[i].value; >> > + if (host_val) >> > + *host_val = m->host[i].value; >> > + >> > + return 0; >> > +} >> > + >> > static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) >> > { >> > u64 guest_efer = vmx->vcpu.arch.efer; >> > @@ -3203,7 +3228,9 @@ static inline bool >> > vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, >> > */ >> > static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> > { >> > +
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote: > On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmedwrote: > > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for > > guests > > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a > > retpoline+IBPB based approach. > > > > To avoid the overhead of atomically saving and restoring the > > MSR_IA32_SPEC_CTRL > > for guests that do not actually use the MSR, only add_atomic_switch_msr > > when a > > non-zero is written to it. > > > > Cc: Asit Mallick > > Cc: Arjan Van De Ven > > Cc: Dave Hansen > > Cc: Andi Kleen > > Cc: Andrea Arcangeli > > Cc: Linus Torvalds > > Cc: Tim Chen > > Cc: Thomas Gleixner > > Cc: Dan Williams > > Cc: Jun Nakajima > > Cc: Paolo Bonzini > > Cc: David Woodhouse > > Cc: Greg KH > > Cc: Andy Lutomirski > > Signed-off-by: KarimAllah Ahmed > > Signed-off-by: Ashok Raj > > --- > > arch/x86/kvm/cpuid.c | 4 +++- > > arch/x86/kvm/cpuid.h | 1 + > > arch/x86/kvm/vmx.c | 63 > > > > 3 files changed, 67 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > index 0099e10..dc78095 100644 > > --- a/arch/x86/kvm/cpuid.c > > +++ b/arch/x86/kvm/cpuid.c > > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > > /* These are scattered features in cpufeatures.h. */ > > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 > > +#define KVM_CPUID_BIT_SPEC_CTRL 26 > > #define KF(x) bit(KVM_CPUID_BIT_##x) > > > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) > > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct > > kvm_cpuid_entry2 *entry, u32 function, > > > > /* cpuid 7.0.edx*/ > > const u32 kvm_cpuid_7_0_edx_x86_features = > > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); > > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ > > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); > > Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to > pass through for existing CPUs (26 and 27)? > > > > > /* all calls to cpuid_count() should be made on the same cpu */ > > get_cpu(); > > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > > index cdc70a3..dcfe227 100644 > > --- a/arch/x86/kvm/cpuid.h > > +++ b/arch/x86/kvm/cpuid.h > > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, > > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > > }; > > > > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned > > x86_feature) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index aa8638a..1b743a0 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, > > bool masked); > > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > > u16 error_code); > > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); > > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long > > *msr_bitmap, > > + u32 msr, int > > type); > > + > > > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx > > *vmx, unsigned msr, > > m->host[i].value = host_val; > > } > > > > +/* do not touch guest_val and host_val if the msr is not found */ > > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, > > + u64 *guest_val, u64 *host_val) > > +{ > > + unsigned i; > > + struct msr_autoload *m = >msr_autoload; > > + > > + for (i = 0; i < m->nr; ++i) > > + if (m->guest[i].index == msr) > > + break; > > + > > + if (i == m->nr) > > + return 1; > > + > > + if (guest_val) > > + *guest_val = m->guest[i].value; > > + if (host_val) > > + *host_val = m->host[i].value; > > + > > + return 0; > > +} > > + > > static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) > > { > > u64 guest_efer = vmx->vcpu.arch.efer; > > @@
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote: > On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: > > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for > > guests > > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a > > retpoline+IBPB based approach. > > > > To avoid the overhead of atomically saving and restoring the > > MSR_IA32_SPEC_CTRL > > for guests that do not actually use the MSR, only add_atomic_switch_msr > > when a > > non-zero is written to it. > > > > Cc: Asit Mallick > > Cc: Arjan Van De Ven > > Cc: Dave Hansen > > Cc: Andi Kleen > > Cc: Andrea Arcangeli > > Cc: Linus Torvalds > > Cc: Tim Chen > > Cc: Thomas Gleixner > > Cc: Dan Williams > > Cc: Jun Nakajima > > Cc: Paolo Bonzini > > Cc: David Woodhouse > > Cc: Greg KH > > Cc: Andy Lutomirski > > Signed-off-by: KarimAllah Ahmed > > Signed-off-by: Ashok Raj > > --- > > arch/x86/kvm/cpuid.c | 4 +++- > > arch/x86/kvm/cpuid.h | 1 + > > arch/x86/kvm/vmx.c | 63 > > > > 3 files changed, 67 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > index 0099e10..dc78095 100644 > > --- a/arch/x86/kvm/cpuid.c > > +++ b/arch/x86/kvm/cpuid.c > > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > > /* These are scattered features in cpufeatures.h. */ > > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 > > +#define KVM_CPUID_BIT_SPEC_CTRL 26 > > #define KF(x) bit(KVM_CPUID_BIT_##x) > > > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) > > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct > > kvm_cpuid_entry2 *entry, u32 function, > > > > /* cpuid 7.0.edx*/ > > const u32 kvm_cpuid_7_0_edx_x86_features = > > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); > > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ > > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); > > Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to > pass through for existing CPUs (26 and 27)? > > > > > /* all calls to cpuid_count() should be made on the same cpu */ > > get_cpu(); > > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > > index cdc70a3..dcfe227 100644 > > --- a/arch/x86/kvm/cpuid.h > > +++ b/arch/x86/kvm/cpuid.h > > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, > > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > > }; > > > > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned > > x86_feature) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index aa8638a..1b743a0 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, > > bool masked); > > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > > u16 error_code); > > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); > > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long > > *msr_bitmap, > > + u32 msr, int > > type); > > + > > > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx > > *vmx, unsigned msr, > > m->host[i].value = host_val; > > } > > > > +/* do not touch guest_val and host_val if the msr is not found */ > > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, > > + u64 *guest_val, u64 *host_val) > > +{ > > + unsigned i; > > + struct msr_autoload *m = >msr_autoload; > > + > > + for (i = 0; i < m->nr; ++i) > > + if (m->guest[i].index == msr) > > + break; > > + > > + if (i == m->nr) > > + return 1; > > + > > + if (guest_val) > > + *guest_val = m->guest[i].value; > > + if (host_val) > > + *host_val = m->host[i].value; > > + > > + return 0; > > +} > > + > > static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) > > { > > u64 guest_efer = vmx->vcpu.arch.efer; > > @@ -3203,7 +3228,9 @@ static inline bool > > vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, > > */ > > static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > { > > + u64 spec_ctrl = 0; > > struct shared_msr_entry *msr; > > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > > > switch (msr_info->index) { > > #ifdef CONFIG_X86_64 > > @@ -3223,6
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 01/29/2018 08:04 PM, Jim Mattson wrote: Can I assume you'll send out a new version with the fixes? Yes, I am currently doing some tests and once I am done I will send a new round. ... and the typo is already fixed in 'ibpb-wip' :) On Mon, Jan 29, 2018 at 11:01 AM, David Woodhousewrote: (Top-posting; sorry.) Much of that is already fixed during our day, in http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb I forgot to fix up the wrong-MSR typo though, and we do still need to address reset. On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote: On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only add_atomic_switch_msr when a non-zero is written to it. Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Dave Hansen Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Linus Torvalds Cc: Tim Chen Cc: Thomas Gleixner Cc: Dan Williams Cc: Jun Nakajima Cc: Paolo Bonzini Cc: David Woodhouse Cc: Greg KH Cc: Andy Lutomirski Signed-off-by: KarimAllah Ahmed Signed-off-by: Ashok Raj --- arch/x86/kvm/cpuid.c | 4 +++- arch/x86/kvm/cpuid.h | 1 + arch/x86/kvm/vmx.c | 63 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10..dc78095 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) /* These are scattered features in cpufeatures.h. */ #define KVM_CPUID_BIT_AVX512_4VNNIW 2 #define KVM_CPUID_BIT_AVX512_4FMAPS 3 +#define KVM_CPUID_BIT_SPEC_CTRL 26 #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to pass through for existing CPUs (26 and 27)? /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3..dcfe227 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa8638a..1b743a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +/* do not touch guest_val and host_val if the msr is not found */ +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, + u64 *guest_val, u64 *host_val) +{ + unsigned i; + struct msr_autoload *m = >msr_autoload; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + break; + + if (i == m->nr) + return 1; + + if (guest_val) + *guest_val = m->guest[i].value; + if (host_val) + *host_val = m->host[i].value; + + return 0; +} + static bool update_transition_efer(struct
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 01/29/2018 08:04 PM, Jim Mattson wrote: Can I assume you'll send out a new version with the fixes? Yes, I am currently doing some tests and once I am done I will send a new round. ... and the typo is already fixed in 'ibpb-wip' :) On Mon, Jan 29, 2018 at 11:01 AM, David Woodhouse wrote: (Top-posting; sorry.) Much of that is already fixed during our day, in http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb I forgot to fix up the wrong-MSR typo though, and we do still need to address reset. On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote: On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only add_atomic_switch_msr when a non-zero is written to it. Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Dave Hansen Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Linus Torvalds Cc: Tim Chen Cc: Thomas Gleixner Cc: Dan Williams Cc: Jun Nakajima Cc: Paolo Bonzini Cc: David Woodhouse Cc: Greg KH Cc: Andy Lutomirski Signed-off-by: KarimAllah Ahmed Signed-off-by: Ashok Raj --- arch/x86/kvm/cpuid.c | 4 +++- arch/x86/kvm/cpuid.h | 1 + arch/x86/kvm/vmx.c | 63 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10..dc78095 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) /* These are scattered features in cpufeatures.h. */ #define KVM_CPUID_BIT_AVX512_4VNNIW 2 #define KVM_CPUID_BIT_AVX512_4FMAPS 3 +#define KVM_CPUID_BIT_SPEC_CTRL 26 #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to pass through for existing CPUs (26 and 27)? /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3..dcfe227 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa8638a..1b743a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +/* do not touch guest_val and host_val if the msr is not found */ +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, + u64 *guest_val, u64 *host_val) +{ + unsigned i; + struct msr_autoload *m = >msr_autoload; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + break; + + if (i == m->nr) + return 1; + + if (guest_val) + *guest_val = m->guest[i].value; + if (host_val) + *host_val = m->host[i].value; + + return 0; +} + static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { u64 guest_efer = vmx->vcpu.arch.efer; @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, */ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { + u64 spec_ctrl = 0; struct shared_msr_entry *msr; + struct vcpu_vmx *vmx = to_vmx(vcpu); switch
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
Can I assume you'll send out a new version with the fixes? On Mon, Jan 29, 2018 at 11:01 AM, David Woodhousewrote: > > (Top-posting; sorry.) > > Much of that is already fixed during our day, in > http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb > > I forgot to fix up the wrong-MSR typo though, and we do still need to address > reset. > > On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote: >> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: >> > >> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >> > guests >> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using >> > a >> > retpoline+IBPB based approach. >> > >> > To avoid the overhead of atomically saving and restoring the >> > MSR_IA32_SPEC_CTRL >> > for guests that do not actually use the MSR, only add_atomic_switch_msr >> > when a >> > non-zero is written to it. >> > >> > Cc: Asit Mallick >> > Cc: Arjan Van De Ven >> > Cc: Dave Hansen >> > Cc: Andi Kleen >> > Cc: Andrea Arcangeli >> > Cc: Linus Torvalds >> > Cc: Tim Chen >> > Cc: Thomas Gleixner >> > Cc: Dan Williams >> > Cc: Jun Nakajima >> > Cc: Paolo Bonzini >> > Cc: David Woodhouse >> > Cc: Greg KH >> > Cc: Andy Lutomirski >> > Signed-off-by: KarimAllah Ahmed >> > Signed-off-by: Ashok Raj >> > --- >> > arch/x86/kvm/cpuid.c | 4 +++- >> > arch/x86/kvm/cpuid.h | 1 + >> > arch/x86/kvm/vmx.c | 63 >> > >> > 3 files changed, 67 insertions(+), 1 deletion(-) >> > >> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> > index 0099e10..dc78095 100644 >> > --- a/arch/x86/kvm/cpuid.c >> > +++ b/arch/x86/kvm/cpuid.c >> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) >> > /* These are scattered features in cpufeatures.h. */ >> > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 >> > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 >> > +#define KVM_CPUID_BIT_SPEC_CTRL 26 >> > #define KF(x) bit(KVM_CPUID_BIT_##x) >> > >> > int kvm_update_cpuid(struct kvm_vcpu *vcpu) >> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct >> > kvm_cpuid_entry2 *entry, u32 function, >> > >> > /* cpuid 7.0.edx*/ >> > const u32 kvm_cpuid_7_0_edx_x86_features = >> > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); >> > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ >> > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); >> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to >> pass through for existing CPUs (26 and 27)? >> >> > >> > >> > /* all calls to cpuid_count() should be made on the same cpu */ >> > get_cpu(); >> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h >> > index cdc70a3..dcfe227 100644 >> > --- a/arch/x86/kvm/cpuid.h >> > +++ b/arch/x86/kvm/cpuid.h >> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { >> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, >> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, >> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, >> > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, >> > }; >> > >> > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned >> > x86_feature) >> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> > index aa8638a..1b743a0 100644 >> > --- a/arch/x86/kvm/vmx.c >> > +++ b/arch/x86/kvm/vmx.c >> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, >> > bool masked); >> > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, >> > u16 error_code); >> > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); >> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long >> > *msr_bitmap, >> > + u32 msr, int >> > type); >> > + >> > >> > static DEFINE_PER_CPU(struct vmcs *, vmxarea); >> > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); >> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx >> > *vmx, unsigned msr, >> > m->host[i].value = host_val; >> > } >> > >> > +/* do not touch guest_val and host_val if the msr is not found */ >> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, >> > + u64 *guest_val, u64 *host_val) >> > +{ >> > + unsigned i; >> > + struct msr_autoload *m = >msr_autoload; >> > + >> > + for (i = 0; i < m->nr; ++i) >> > + if
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
Can I assume you'll send out a new version with the fixes? On Mon, Jan 29, 2018 at 11:01 AM, David Woodhouse wrote: > > (Top-posting; sorry.) > > Much of that is already fixed during our day, in > http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb > > I forgot to fix up the wrong-MSR typo though, and we do still need to address > reset. > > On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote: >> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: >> > >> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >> > guests >> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using >> > a >> > retpoline+IBPB based approach. >> > >> > To avoid the overhead of atomically saving and restoring the >> > MSR_IA32_SPEC_CTRL >> > for guests that do not actually use the MSR, only add_atomic_switch_msr >> > when a >> > non-zero is written to it. >> > >> > Cc: Asit Mallick >> > Cc: Arjan Van De Ven >> > Cc: Dave Hansen >> > Cc: Andi Kleen >> > Cc: Andrea Arcangeli >> > Cc: Linus Torvalds >> > Cc: Tim Chen >> > Cc: Thomas Gleixner >> > Cc: Dan Williams >> > Cc: Jun Nakajima >> > Cc: Paolo Bonzini >> > Cc: David Woodhouse >> > Cc: Greg KH >> > Cc: Andy Lutomirski >> > Signed-off-by: KarimAllah Ahmed >> > Signed-off-by: Ashok Raj >> > --- >> > arch/x86/kvm/cpuid.c | 4 +++- >> > arch/x86/kvm/cpuid.h | 1 + >> > arch/x86/kvm/vmx.c | 63 >> > >> > 3 files changed, 67 insertions(+), 1 deletion(-) >> > >> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> > index 0099e10..dc78095 100644 >> > --- a/arch/x86/kvm/cpuid.c >> > +++ b/arch/x86/kvm/cpuid.c >> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) >> > /* These are scattered features in cpufeatures.h. */ >> > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 >> > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 >> > +#define KVM_CPUID_BIT_SPEC_CTRL 26 >> > #define KF(x) bit(KVM_CPUID_BIT_##x) >> > >> > int kvm_update_cpuid(struct kvm_vcpu *vcpu) >> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct >> > kvm_cpuid_entry2 *entry, u32 function, >> > >> > /* cpuid 7.0.edx*/ >> > const u32 kvm_cpuid_7_0_edx_x86_features = >> > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); >> > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ >> > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); >> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to >> pass through for existing CPUs (26 and 27)? >> >> > >> > >> > /* all calls to cpuid_count() should be made on the same cpu */ >> > get_cpu(); >> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h >> > index cdc70a3..dcfe227 100644 >> > --- a/arch/x86/kvm/cpuid.h >> > +++ b/arch/x86/kvm/cpuid.h >> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { >> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, >> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, >> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, >> > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, >> > }; >> > >> > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned >> > x86_feature) >> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> > index aa8638a..1b743a0 100644 >> > --- a/arch/x86/kvm/vmx.c >> > +++ b/arch/x86/kvm/vmx.c >> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, >> > bool masked); >> > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, >> > u16 error_code); >> > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); >> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long >> > *msr_bitmap, >> > + u32 msr, int >> > type); >> > + >> > >> > static DEFINE_PER_CPU(struct vmcs *, vmxarea); >> > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); >> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx >> > *vmx, unsigned msr, >> > m->host[i].value = host_val; >> > } >> > >> > +/* do not touch guest_val and host_val if the msr is not found */ >> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, >> > + u64 *guest_val, u64 *host_val) >> > +{ >> > + unsigned i; >> > + struct msr_autoload *m = >msr_autoload; >> > + >> > + for (i = 0; i < m->nr; ++i) >> > + if (m->guest[i].index == msr) >> > + break; >> > + >> > + if (i == m->nr) >> > + return 1; >> > + >> > + if (guest_val) >> > + *guest_val = m->guest[i].value; >> > + if (host_val) >> > + *host_val = m->host[i].value; >> > + >> > + return 0; >> > +} >> > + >> > static bool update_transition_efer(struct
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
(Top-posting; sorry.) Much of that is already fixed during our day, in http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb I forgot to fix up the wrong-MSR typo though, and we do still need to address reset. On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote: > On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: > > > > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for > > guests > > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a > > retpoline+IBPB based approach. > > > > To avoid the overhead of atomically saving and restoring the > > MSR_IA32_SPEC_CTRL > > for guests that do not actually use the MSR, only add_atomic_switch_msr > > when a > > non-zero is written to it. > > > > Cc: Asit Mallick> > Cc: Arjan Van De Ven > > Cc: Dave Hansen > > Cc: Andi Kleen > > Cc: Andrea Arcangeli > > Cc: Linus Torvalds > > Cc: Tim Chen > > Cc: Thomas Gleixner > > Cc: Dan Williams > > Cc: Jun Nakajima > > Cc: Paolo Bonzini > > Cc: David Woodhouse > > Cc: Greg KH > > Cc: Andy Lutomirski > > Signed-off-by: KarimAllah Ahmed > > Signed-off-by: Ashok Raj > > --- > > arch/x86/kvm/cpuid.c | 4 +++- > > arch/x86/kvm/cpuid.h | 1 + > > arch/x86/kvm/vmx.c | 63 > > > > 3 files changed, 67 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > index 0099e10..dc78095 100644 > > --- a/arch/x86/kvm/cpuid.c > > +++ b/arch/x86/kvm/cpuid.c > > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > > /* These are scattered features in cpufeatures.h. */ > > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 > > +#define KVM_CPUID_BIT_SPEC_CTRL 26 > > #define KF(x) bit(KVM_CPUID_BIT_##x) > > > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) > > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct > > kvm_cpuid_entry2 *entry, u32 function, > > > > /* cpuid 7.0.edx*/ > > const u32 kvm_cpuid_7_0_edx_x86_features = > > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); > > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ > > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); > Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to > pass through for existing CPUs (26 and 27)? > > > > > > > /* all calls to cpuid_count() should be made on the same cpu */ > > get_cpu(); > > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > > index cdc70a3..dcfe227 100644 > > --- a/arch/x86/kvm/cpuid.h > > +++ b/arch/x86/kvm/cpuid.h > > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, > > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > > }; > > > > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned > > x86_feature) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index aa8638a..1b743a0 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, > > bool masked); > > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > > u16 error_code); > > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); > > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long > > *msr_bitmap, > > + u32 msr, int > > type); > > + > > > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx > > *vmx, unsigned msr, > > m->host[i].value = host_val; > > } > > > > +/* do not touch guest_val and host_val if the msr is not found */ > > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, > > + u64 *guest_val, u64 *host_val) > > +{ > > + unsigned i; > > + struct msr_autoload *m = >msr_autoload; > > + > > + for (i = 0; i < m->nr; ++i) > > + if (m->guest[i].index == msr) > > + break; > > + > > + if (i == m->nr) > > + return 1; > > + > > + if (guest_val) > > + *guest_val = m->guest[i].value; > > + if (host_val) > > +
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
(Top-posting; sorry.) Much of that is already fixed during our day, in http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb I forgot to fix up the wrong-MSR typo though, and we do still need to address reset. On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote: > On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: > > > > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for > > guests > > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a > > retpoline+IBPB based approach. > > > > To avoid the overhead of atomically saving and restoring the > > MSR_IA32_SPEC_CTRL > > for guests that do not actually use the MSR, only add_atomic_switch_msr > > when a > > non-zero is written to it. > > > > Cc: Asit Mallick > > Cc: Arjan Van De Ven > > Cc: Dave Hansen > > Cc: Andi Kleen > > Cc: Andrea Arcangeli > > Cc: Linus Torvalds > > Cc: Tim Chen > > Cc: Thomas Gleixner > > Cc: Dan Williams > > Cc: Jun Nakajima > > Cc: Paolo Bonzini > > Cc: David Woodhouse > > Cc: Greg KH > > Cc: Andy Lutomirski > > Signed-off-by: KarimAllah Ahmed > > Signed-off-by: Ashok Raj > > --- > > arch/x86/kvm/cpuid.c | 4 +++- > > arch/x86/kvm/cpuid.h | 1 + > > arch/x86/kvm/vmx.c | 63 > > > > 3 files changed, 67 insertions(+), 1 deletion(-) > > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > index 0099e10..dc78095 100644 > > --- a/arch/x86/kvm/cpuid.c > > +++ b/arch/x86/kvm/cpuid.c > > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > > /* These are scattered features in cpufeatures.h. */ > > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 > > +#define KVM_CPUID_BIT_SPEC_CTRL 26 > > #define KF(x) bit(KVM_CPUID_BIT_##x) > > > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) > > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct > > kvm_cpuid_entry2 *entry, u32 function, > > > > /* cpuid 7.0.edx*/ > > const u32 kvm_cpuid_7_0_edx_x86_features = > > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); > > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ > > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); > Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to > pass through for existing CPUs (26 and 27)? > > > > > > > /* all calls to cpuid_count() should be made on the same cpu */ > > get_cpu(); > > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > > index cdc70a3..dcfe227 100644 > > --- a/arch/x86/kvm/cpuid.h > > +++ b/arch/x86/kvm/cpuid.h > > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, > > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > > }; > > > > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned > > x86_feature) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index aa8638a..1b743a0 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, > > bool masked); > > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > > u16 error_code); > > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); > > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long > > *msr_bitmap, > > + u32 msr, int > > type); > > + > > > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx > > *vmx, unsigned msr, > > m->host[i].value = host_val; > > } > > > > +/* do not touch guest_val and host_val if the msr is not found */ > > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, > > + u64 *guest_val, u64 *host_val) > > +{ > > + unsigned i; > > + struct msr_autoload *m = >msr_autoload; > > + > > + for (i = 0; i < m->nr; ++i) > > + if (m->guest[i].index == msr) > > + break; > > + > > + if (i == m->nr) > > + return 1; > > + > > + if (guest_val) > > + *guest_val = m->guest[i].value; > > + if (host_val) > > + *host_val = m->host[i].value; > > + > > + return 0; > > +} > > + > > static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) > > { > > u64 guest_efer = vmx->vcpu.arch.efer; > > @@ -3203,7 +3228,9 @@ static inline bool > > vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, > > */ > > static int vmx_get_msr(struct kvm_vcpu *vcpu,
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmedwrote: > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a > retpoline+IBPB based approach. > > To avoid the overhead of atomically saving and restoring the > MSR_IA32_SPEC_CTRL > for guests that do not actually use the MSR, only add_atomic_switch_msr when a > non-zero is written to it. > > Cc: Asit Mallick > Cc: Arjan Van De Ven > Cc: Dave Hansen > Cc: Andi Kleen > Cc: Andrea Arcangeli > Cc: Linus Torvalds > Cc: Tim Chen > Cc: Thomas Gleixner > Cc: Dan Williams > Cc: Jun Nakajima > Cc: Paolo Bonzini > Cc: David Woodhouse > Cc: Greg KH > Cc: Andy Lutomirski > Signed-off-by: KarimAllah Ahmed > Signed-off-by: Ashok Raj > --- > arch/x86/kvm/cpuid.c | 4 +++- > arch/x86/kvm/cpuid.h | 1 + > arch/x86/kvm/vmx.c | 63 > > 3 files changed, 67 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 0099e10..dc78095 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > /* These are scattered features in cpufeatures.h. */ > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 > +#define KVM_CPUID_BIT_SPEC_CTRL 26 > #define KF(x) bit(KVM_CPUID_BIT_##x) > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 > *entry, u32 function, > > /* cpuid 7.0.edx*/ > const u32 kvm_cpuid_7_0_edx_x86_features = > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to pass through for existing CPUs (26 and 27)? > > /* all calls to cpuid_count() should be made on the same cpu */ > get_cpu(); > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > index cdc70a3..dcfe227 100644 > --- a/arch/x86/kvm/cpuid.h > +++ b/arch/x86/kvm/cpuid.h > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > }; > > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned > x86_feature) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index aa8638a..1b743a0 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool > masked); > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > u16 error_code); > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long > *msr_bitmap, > + u32 msr, int type); > + > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx > *vmx, unsigned msr, > m->host[i].value = host_val; > } > > +/* do not touch guest_val and host_val if the msr is not found */ > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, > + u64 *guest_val, u64 *host_val) > +{ > + unsigned i; > + struct msr_autoload *m = >msr_autoload; > + > + for (i = 0; i < m->nr; ++i) > + if (m->guest[i].index == msr) > + break; > + > + if (i == m->nr) > + return 1; > + > + if (guest_val) > + *guest_val = m->guest[i].value; > + if (host_val) > + *host_val = m->host[i].value; > + > + return 0; > +} > + > static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) > { > u64 guest_efer = vmx->vcpu.arch.efer; > @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct > kvm_vcpu *vcpu, > */ > static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > { > + u64 spec_ctrl = 0; > struct shared_msr_entry *msr; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > switch
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed wrote: > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a > retpoline+IBPB based approach. > > To avoid the overhead of atomically saving and restoring the > MSR_IA32_SPEC_CTRL > for guests that do not actually use the MSR, only add_atomic_switch_msr when a > non-zero is written to it. > > Cc: Asit Mallick > Cc: Arjan Van De Ven > Cc: Dave Hansen > Cc: Andi Kleen > Cc: Andrea Arcangeli > Cc: Linus Torvalds > Cc: Tim Chen > Cc: Thomas Gleixner > Cc: Dan Williams > Cc: Jun Nakajima > Cc: Paolo Bonzini > Cc: David Woodhouse > Cc: Greg KH > Cc: Andy Lutomirski > Signed-off-by: KarimAllah Ahmed > Signed-off-by: Ashok Raj > --- > arch/x86/kvm/cpuid.c | 4 +++- > arch/x86/kvm/cpuid.h | 1 + > arch/x86/kvm/vmx.c | 63 > > 3 files changed, 67 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 0099e10..dc78095 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > /* These are scattered features in cpufeatures.h. */ > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 > +#define KVM_CPUID_BIT_SPEC_CTRL 26 > #define KF(x) bit(KVM_CPUID_BIT_##x) > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 > *entry, u32 function, > > /* cpuid 7.0.edx*/ > const u32 kvm_cpuid_7_0_edx_x86_features = > - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); > + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ > + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to pass through for existing CPUs (26 and 27)? > > /* all calls to cpuid_count() should be made on the same cpu */ > get_cpu(); > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > index cdc70a3..dcfe227 100644 > --- a/arch/x86/kvm/cpuid.h > +++ b/arch/x86/kvm/cpuid.h > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, > + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > }; > > static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned > x86_feature) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index aa8638a..1b743a0 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool > masked); > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > u16 error_code); > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long > *msr_bitmap, > + u32 msr, int type); > + > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx > *vmx, unsigned msr, > m->host[i].value = host_val; > } > > +/* do not touch guest_val and host_val if the msr is not found */ > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, > + u64 *guest_val, u64 *host_val) > +{ > + unsigned i; > + struct msr_autoload *m = >msr_autoload; > + > + for (i = 0; i < m->nr; ++i) > + if (m->guest[i].index == msr) > + break; > + > + if (i == m->nr) > + return 1; > + > + if (guest_val) > + *guest_val = m->guest[i].value; > + if (host_val) > + *host_val = m->host[i].value; > + > + return 0; > +} > + > static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) > { > u64 guest_efer = vmx->vcpu.arch.efer; > @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct > kvm_vcpu *vcpu, > */ > static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > { > + u64 spec_ctrl = 0; > struct shared_msr_entry *msr; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > switch (msr_info->index) { > #ifdef CONFIG_X86_64 > @@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct > msr_data *msr_info) > case MSR_IA32_TSC: > msr_info->data = guest_read_tsc(vcpu); > break; > + case MSR_IA32_SPEC_CTRL: > + if (!msr_info->host_initiated && > + !guest_cpuid_has(vcpu,
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote: > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: > > > > Windows use IBRS and Microsoft don't have any plans to switch to retpoline. > > Running a Windows guest should be a pretty common use-case no? > > > > In addition, your handle of the first WRMSR intercept could be different. > > It could signal you to start doing the following: > > 1. Disable intercept on SPEC_CTRL MSR. > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) > > > > That way, you will both have fastest option as long as guest don't use IBRS > > and also won't have the 3% performance hit compared to Konrad's proposal. > > > > Am I missing something? > > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part > of the 3% speedup you observe is because in the above, the vmentry path > doesn't need to *read* the host's value and store it; the host is > expected to restore it for itself anyway? Yes for at least the purpose of correctness. That is based on what I have heard is that you when you transition to a higher ring you have to write 1, then write zero when you transition back to lower rings. That is it is like a knob. But then I heard that on some CPUs it is more like reset button and just writting 1 to IBRS is fine. But again, correctness here. > > I'd actually quite like to repeat the benchmark on the new fixed > microcode, if anyone has it yet, to see if that read/swap slowness is > still quite as excessive. I'm certainly not ruling this out, but I'm > just a little wary of premature optimisation, and I'd like to make sure > we have everything *else* in the KVM patches right first. > > The fact that the save-and-restrict macros I have in the tip of my > working tree at the moment are horrid and causing 0-day nastygrams, > probably doesn't help persuade me to favour the approach ;) > > ... hm, the CPU actually has separate MSR save/restore lists for > entry/exit, doesn't it? Is there any way to sanely make use of that and > do the restoration manually on vmentry but let it be automatic on > vmexit, by having it *only* in the guest's MSR-store area to be saved > on exit and restored on exit, but *not* in the host's MSR-store area? Oh. That sounds sounds interesting > > Reading the code and comparing with the SDM, I can't see where we're > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > case... Right. We (well Daniel Kiper, CC-ed) implemented it for this and that is where we got the numbers. Daniel, you OK posting it here? Granted with the caveats thta it won't even compile against upstream as it was based on a distro kernel.
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote: > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: > > > > Windows use IBRS and Microsoft don't have any plans to switch to retpoline. > > Running a Windows guest should be a pretty common use-case no? > > > > In addition, your handle of the first WRMSR intercept could be different. > > It could signal you to start doing the following: > > 1. Disable intercept on SPEC_CTRL MSR. > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) > > > > That way, you will both have fastest option as long as guest don't use IBRS > > and also won't have the 3% performance hit compared to Konrad's proposal. > > > > Am I missing something? > > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part > of the 3% speedup you observe is because in the above, the vmentry path > doesn't need to *read* the host's value and store it; the host is > expected to restore it for itself anyway? Yes for at least the purpose of correctness. That is based on what I have heard is that you when you transition to a higher ring you have to write 1, then write zero when you transition back to lower rings. That is it is like a knob. But then I heard that on some CPUs it is more like reset button and just writting 1 to IBRS is fine. But again, correctness here. > > I'd actually quite like to repeat the benchmark on the new fixed > microcode, if anyone has it yet, to see if that read/swap slowness is > still quite as excessive. I'm certainly not ruling this out, but I'm > just a little wary of premature optimisation, and I'd like to make sure > we have everything *else* in the KVM patches right first. > > The fact that the save-and-restrict macros I have in the tip of my > working tree at the moment are horrid and causing 0-day nastygrams, > probably doesn't help persuade me to favour the approach ;) > > ... hm, the CPU actually has separate MSR save/restore lists for > entry/exit, doesn't it? Is there any way to sanely make use of that and > do the restoration manually on vmentry but let it be automatic on > vmexit, by having it *only* in the guest's MSR-store area to be saved > on exit and restored on exit, but *not* in the host's MSR-store area? Oh. That sounds sounds interesting > > Reading the code and comparing with the SDM, I can't see where we're > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > case... Right. We (well Daniel Kiper, CC-ed) implemented it for this and that is where we got the numbers. Daniel, you OK posting it here? Granted with the caveats thta it won't even compile against upstream as it was based on a distro kernel.
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 10:37:44AM +, David Woodhouse wrote: > On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote: > > On 01/29/2018 09:46 AM, David Woodhouse wrote: > > > Reading the code and comparing with the SDM, I can't see where we're > > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > > > case... > > Hmmm ... you are probably right! I think all users of this interface > > always trap + update save area and never passthrough the MSR. That is > > why only LOAD is needed *so far*. > > > > Okay, let me sort this out in v3 then. > > I'm starting to think a variant of Ashok's patch might actually be the > simpler approach, and not "premature optimisation". Especially if we > need to support the !cpu_has_vmx_msr_bitmaps() case? > > Start with vmx->spec_ctrl set to zero. When first touched, make it > passthrough (but not atomically switched) and set a flag (e.g. > "spec_ctrl_live") which triggers the 'restore_branch_speculation' and > 'save_and_restrict_branch_speculation' behaviours. Except don't use > those macros. Those can look something like > > /* If this vCPU has touched SPEC_CTRL then restore its value if needed */ > if (vmx->spec_ctrl_live && vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > /* vmentry is serialising on affected CPUs, so the conditional branch is > safe */ > > > ... and, respectively, ... > > /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have > zero */ > if (vmx->spec_ctrl_live) { > rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > if (vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, 0); > } > > > Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the > pass-through MSR bitmap directly, in the case that it exists? Or the cpuid_flag as that would determine whether the MSR bitmap intercept is set or not.
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, Jan 29, 2018 at 10:37:44AM +, David Woodhouse wrote: > On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote: > > On 01/29/2018 09:46 AM, David Woodhouse wrote: > > > Reading the code and comparing with the SDM, I can't see where we're > > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > > > case... > > Hmmm ... you are probably right! I think all users of this interface > > always trap + update save area and never passthrough the MSR. That is > > why only LOAD is needed *so far*. > > > > Okay, let me sort this out in v3 then. > > I'm starting to think a variant of Ashok's patch might actually be the > simpler approach, and not "premature optimisation". Especially if we > need to support the !cpu_has_vmx_msr_bitmaps() case? > > Start with vmx->spec_ctrl set to zero. When first touched, make it > passthrough (but not atomically switched) and set a flag (e.g. > "spec_ctrl_live") which triggers the 'restore_branch_speculation' and > 'save_and_restrict_branch_speculation' behaviours. Except don't use > those macros. Those can look something like > > /* If this vCPU has touched SPEC_CTRL then restore its value if needed */ > if (vmx->spec_ctrl_live && vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > /* vmentry is serialising on affected CPUs, so the conditional branch is > safe */ > > > ... and, respectively, ... > > /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have > zero */ > if (vmx->spec_ctrl_live) { > rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > if (vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, 0); > } > > > Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the > pass-through MSR bitmap directly, in the case that it exists? Or the cpuid_flag as that would determine whether the MSR bitmap intercept is set or not.
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 29/01/2018 11:37, David Woodhouse wrote: > On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote: >> On 01/29/2018 09:46 AM, David Woodhouse wrote: >>> Reading the code and comparing with the SDM, I can't see where we're >>> ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested >>> case... >> Hmmm ... you are probably right! I think all users of this interface >> always trap + update save area and never passthrough the MSR. That is >> why only LOAD is needed *so far*. >> >> Okay, let me sort this out in v3 then. > > I'm starting to think a variant of Ashok's patch might actually be the > simpler approach, and not "premature optimisation". Especially if we > need to support the !cpu_has_vmx_msr_bitmaps() case? That case is awfully slow anyway, it doesn't matter, but the direct-access flag would simply be always 0 if you have no MSR bitmaps. > Start with vmx->spec_ctrl set to zero. When first touched, make it > passthrough (but not atomically switched) and set a flag (e.g. > "spec_ctrl_live") which triggers the 'restore_branch_speculation' and > 'save_and_restrict_branch_speculation' behaviours. Except don't use > those macros. Those can look something like > > /* If this vCPU has touched SPEC_CTRL then restore its value if needed */ > if (vmx->spec_ctrl_live && vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > /* vmentry is serialising on affected CPUs, so the conditional branch is > safe */ > > > ... and, respectively, ... > > /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have > zero */ > if (vmx->spec_ctrl_live) { > rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > if (vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, 0); > } > > Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the > pass-through MSR bitmap directly, in the case that it exists? Probably a cache miss, or even a TLB miss if you're unlucky, so the separate flag is okay. Paolo
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 29/01/2018 11:37, David Woodhouse wrote: > On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote: >> On 01/29/2018 09:46 AM, David Woodhouse wrote: >>> Reading the code and comparing with the SDM, I can't see where we're >>> ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested >>> case... >> Hmmm ... you are probably right! I think all users of this interface >> always trap + update save area and never passthrough the MSR. That is >> why only LOAD is needed *so far*. >> >> Okay, let me sort this out in v3 then. > > I'm starting to think a variant of Ashok's patch might actually be the > simpler approach, and not "premature optimisation". Especially if we > need to support the !cpu_has_vmx_msr_bitmaps() case? That case is awfully slow anyway, it doesn't matter, but the direct-access flag would simply be always 0 if you have no MSR bitmaps. > Start with vmx->spec_ctrl set to zero. When first touched, make it > passthrough (but not atomically switched) and set a flag (e.g. > "spec_ctrl_live") which triggers the 'restore_branch_speculation' and > 'save_and_restrict_branch_speculation' behaviours. Except don't use > those macros. Those can look something like > > /* If this vCPU has touched SPEC_CTRL then restore its value if needed */ > if (vmx->spec_ctrl_live && vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > /* vmentry is serialising on affected CPUs, so the conditional branch is > safe */ > > > ... and, respectively, ... > > /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have > zero */ > if (vmx->spec_ctrl_live) { > rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > if (vmx->spec_ctrl) > wrmsrl(MSR_IA32_SPEC_CTRL, 0); > } > > Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the > pass-through MSR bitmap directly, in the case that it exists? Probably a cache miss, or even a TLB miss if you're unlucky, so the separate flag is okay. Paolo
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 29/01/2018 09:46, David Woodhouse wrote: > I'd actually quite like to repeat the benchmark on the new fixed > microcode, if anyone has it yet, to see if that read/swap slowness is > still quite as excessive. I'm certainly not ruling this out, but I'm > just a little wary of premature optimisation, and I'd like to make sure > we have everything *else* in the KVM patches right first. > > The fact that the save-and-restrict macros I have in the tip of my > working tree at the moment are horrid and causing 0-day nastygrams, > probably doesn't help persuade me to favour the approach ;) > > ... hm, the CPU actually has separate MSR save/restore lists for > entry/exit, doesn't it? Is there any way to sanely make use of that and > do the restoration manually on vmentry but let it be automatic on > vmexit, by having it *only* in the guest's MSR-store area to be saved > on exit and restored on exit, but *not* in the host's MSR-store area? Right now we don't even use the store-on-vmexit list at all, so the Simplest Patch That Can Possibly Work is definitely the one using rdmsr/wrmsr. It's not really premature optimization---though it doesn't hurt that it isn't awfully slow. Paolo
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 29/01/2018 09:46, David Woodhouse wrote: > I'd actually quite like to repeat the benchmark on the new fixed > microcode, if anyone has it yet, to see if that read/swap slowness is > still quite as excessive. I'm certainly not ruling this out, but I'm > just a little wary of premature optimisation, and I'd like to make sure > we have everything *else* in the KVM patches right first. > > The fact that the save-and-restrict macros I have in the tip of my > working tree at the moment are horrid and causing 0-day nastygrams, > probably doesn't help persuade me to favour the approach ;) > > ... hm, the CPU actually has separate MSR save/restore lists for > entry/exit, doesn't it? Is there any way to sanely make use of that and > do the restoration manually on vmentry but let it be automatic on > vmexit, by having it *only* in the guest's MSR-store area to be saved > on exit and restored on exit, but *not* in the host's MSR-store area? Right now we don't even use the store-on-vmexit list at all, so the Simplest Patch That Can Possibly Work is definitely the one using rdmsr/wrmsr. It's not really premature optimization---though it doesn't hurt that it isn't awfully slow. Paolo
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote: > On 01/29/2018 09:46 AM, David Woodhouse wrote: > > Reading the code and comparing with the SDM, I can't see where we're > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > > case... > Hmmm ... you are probably right! I think all users of this interface > always trap + update save area and never passthrough the MSR. That is > why only LOAD is needed *so far*. > > Okay, let me sort this out in v3 then. I'm starting to think a variant of Ashok's patch might actually be the simpler approach, and not "premature optimisation". Especially if we need to support the !cpu_has_vmx_msr_bitmaps() case? Start with vmx->spec_ctrl set to zero. When first touched, make it passthrough (but not atomically switched) and set a flag (e.g. "spec_ctrl_live") which triggers the 'restore_branch_speculation' and 'save_and_restrict_branch_speculation' behaviours. Except don't use those macros. Those can look something like /* If this vCPU has touched SPEC_CTRL then restore its value if needed */ if (vmx->spec_ctrl_live && vmx->spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); /* vmentry is serialising on affected CPUs, so the conditional branch is safe */ ... and, respectively, ... /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have zero */ if (vmx->spec_ctrl_live) { rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); if (vmx->spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, 0); } Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the pass-through MSR bitmap directly, in the case that it exists? smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote: > On 01/29/2018 09:46 AM, David Woodhouse wrote: > > Reading the code and comparing with the SDM, I can't see where we're > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested > > case... > Hmmm ... you are probably right! I think all users of this interface > always trap + update save area and never passthrough the MSR. That is > why only LOAD is needed *so far*. > > Okay, let me sort this out in v3 then. I'm starting to think a variant of Ashok's patch might actually be the simpler approach, and not "premature optimisation". Especially if we need to support the !cpu_has_vmx_msr_bitmaps() case? Start with vmx->spec_ctrl set to zero. When first touched, make it passthrough (but not atomically switched) and set a flag (e.g. "spec_ctrl_live") which triggers the 'restore_branch_speculation' and 'save_and_restrict_branch_speculation' behaviours. Except don't use those macros. Those can look something like /* If this vCPU has touched SPEC_CTRL then restore its value if needed */ if (vmx->spec_ctrl_live && vmx->spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); /* vmentry is serialising on affected CPUs, so the conditional branch is safe */ ... and, respectively, ... /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have zero */ if (vmx->spec_ctrl_live) { rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); if (vmx->spec_ctrl) wrmsrl(MSR_IA32_SPEC_CTRL, 0); } Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the pass-through MSR bitmap directly, in the case that it exists? smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 01/29/2018 09:46 AM, David Woodhouse wrote: On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: Windows use IBRS and Microsoft don't have any plans to switch to retpoline. Running a Windows guest should be a pretty common use-case no? In addition, your handle of the first WRMSR intercept could be different. It could signal you to start doing the following: 1. Disable intercept on SPEC_CTRL MSR. 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) That way, you will both have fastest option as long as guest don't use IBRS and also won't have the 3% performance hit compared to Konrad's proposal. Am I missing something? Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part of the 3% speedup you observe is because in the above, the vmentry path doesn't need to *read* the host's value and store it; the host is expected to restore it for itself anyway? I'd actually quite like to repeat the benchmark on the new fixed microcode, if anyone has it yet, to see if that read/swap slowness is still quite as excessive. I'm certainly not ruling this out, but I'm just a little wary of premature optimisation, and I'd like to make sure we have everything *else* in the KVM patches right first. The fact that the save-and-restrict macros I have in the tip of my working tree at the moment are horrid and causing 0-day nastygrams, probably doesn't help persuade me to favour the approach ;) ... hm, the CPU actually has separate MSR save/restore lists for entry/exit, doesn't it? Is there any way to sanely make use of that and do the restoration manually on vmentry but let it be automatic on vmexit, by having it *only* in the guest's MSR-store area to be saved on exit and restored on exit, but *not* in the host's MSR-store area? Reading the code and comparing with the SDM, I can't see where we're ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested case... Hmmm ... you are probably right! I think all users of this interface always trap + update save area and never passthrough the MSR. That is why only LOAD is needed *so far*. Okay, let me sort this out in v3 then. Amazon Development Center Germany GmbH Berlin - Dresden - Aachen main office: Krausenstr. 38, 10117 Berlin Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger Ust-ID: DE289237879 Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 01/29/2018 09:46 AM, David Woodhouse wrote: On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: Windows use IBRS and Microsoft don't have any plans to switch to retpoline. Running a Windows guest should be a pretty common use-case no? In addition, your handle of the first WRMSR intercept could be different. It could signal you to start doing the following: 1. Disable intercept on SPEC_CTRL MSR. 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) That way, you will both have fastest option as long as guest don't use IBRS and also won't have the 3% performance hit compared to Konrad's proposal. Am I missing something? Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part of the 3% speedup you observe is because in the above, the vmentry path doesn't need to *read* the host's value and store it; the host is expected to restore it for itself anyway? I'd actually quite like to repeat the benchmark on the new fixed microcode, if anyone has it yet, to see if that read/swap slowness is still quite as excessive. I'm certainly not ruling this out, but I'm just a little wary of premature optimisation, and I'd like to make sure we have everything *else* in the KVM patches right first. The fact that the save-and-restrict macros I have in the tip of my working tree at the moment are horrid and causing 0-day nastygrams, probably doesn't help persuade me to favour the approach ;) ... hm, the CPU actually has separate MSR save/restore lists for entry/exit, doesn't it? Is there any way to sanely make use of that and do the restoration manually on vmentry but let it be automatic on vmexit, by having it *only* in the guest's MSR-store area to be saved on exit and restored on exit, but *not* in the host's MSR-store area? Reading the code and comparing with the SDM, I can't see where we're ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested case... Hmmm ... you are probably right! I think all users of this interface always trap + update save area and never passthrough the MSR. That is why only LOAD is needed *so far*. Okay, let me sort this out in v3 then. Amazon Development Center Germany GmbH Berlin - Dresden - Aachen main office: Krausenstr. 38, 10117 Berlin Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger Ust-ID: DE289237879 Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: > > Windows use IBRS and Microsoft don't have any plans to switch to retpoline. > Running a Windows guest should be a pretty common use-case no? > > In addition, your handle of the first WRMSR intercept could be different. > It could signal you to start doing the following: > 1. Disable intercept on SPEC_CTRL MSR. > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) > > That way, you will both have fastest option as long as guest don't use IBRS > and also won't have the 3% performance hit compared to Konrad's proposal. > > Am I missing something? Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part of the 3% speedup you observe is because in the above, the vmentry path doesn't need to *read* the host's value and store it; the host is expected to restore it for itself anyway? I'd actually quite like to repeat the benchmark on the new fixed microcode, if anyone has it yet, to see if that read/swap slowness is still quite as excessive. I'm certainly not ruling this out, but I'm just a little wary of premature optimisation, and I'd like to make sure we have everything *else* in the KVM patches right first. The fact that the save-and-restrict macros I have in the tip of my working tree at the moment are horrid and causing 0-day nastygrams, probably doesn't help persuade me to favour the approach ;) ... hm, the CPU actually has separate MSR save/restore lists for entry/exit, doesn't it? Is there any way to sanely make use of that and do the restoration manually on vmentry but let it be automatic on vmexit, by having it *only* in the guest's MSR-store area to be saved on exit and restored on exit, but *not* in the host's MSR-store area? Reading the code and comparing with the SDM, I can't see where we're ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested case... smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote: > > Windows use IBRS and Microsoft don't have any plans to switch to retpoline. > Running a Windows guest should be a pretty common use-case no? > > In addition, your handle of the first WRMSR intercept could be different. > It could signal you to start doing the following: > 1. Disable intercept on SPEC_CTRL MSR. > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) > > That way, you will both have fastest option as long as guest don't use IBRS > and also won't have the 3% performance hit compared to Konrad's proposal. > > Am I missing something? Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part of the 3% speedup you observe is because in the above, the vmentry path doesn't need to *read* the host's value and store it; the host is expected to restore it for itself anyway? I'd actually quite like to repeat the benchmark on the new fixed microcode, if anyone has it yet, to see if that read/swap slowness is still quite as excessive. I'm certainly not ruling this out, but I'm just a little wary of premature optimisation, and I'd like to make sure we have everything *else* in the KVM patches right first. The fact that the save-and-restrict macros I have in the tip of my working tree at the moment are horrid and causing 0-day nastygrams, probably doesn't help persuade me to favour the approach ;) ... hm, the CPU actually has separate MSR save/restore lists for entry/exit, doesn't it? Is there any way to sanely make use of that and do the restoration manually on vmentry but let it be automatic on vmexit, by having it *only* in the guest's MSR-store area to be saved on exit and restored on exit, but *not* in the host's MSR-store area? Reading the code and comparing with the SDM, I can't see where we're ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested case... smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 01/28/2018 09:21 PM, Konrad Rzeszutek Wilk wrote: On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmedwrote: Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only add_atomic_switch_msr when a non-zero is written to it. We tried this and found that it was about 3% slower that doing the old way of rdmsr and wrmsr. I actually have not measured the performance difference between using the atomic_switch vs just just doing rdmsr/wrmsr. I was mostly focused on not saving and restoring when the guest does not actually use the MSRs. Interesting data point though, I will update the code to use rdmsr/wrmsr and see if I see it in my hardware (I am using a skylake processor). But that was also with the host doing IBRS as well. On what type of hardware did you run this? Ccing Daniel. Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Dave Hansen Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Linus Torvalds Cc: Tim Chen Cc: Thomas Gleixner Cc: Dan Williams Cc: Jun Nakajima Cc: Paolo Bonzini Cc: David Woodhouse Cc: Greg KH Cc: Andy Lutomirski Signed-off-by: KarimAllah Ahmed Signed-off-by: Ashok Raj --- arch/x86/kvm/cpuid.c | 4 +++- arch/x86/kvm/cpuid.h | 1 + arch/x86/kvm/vmx.c | 63 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10..dc78095 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) /* These are scattered features in cpufeatures.h. */ #define KVM_CPUID_BIT_AVX512_4VNNIW 2 #define KVM_CPUID_BIT_AVX512_4FMAPS 3 +#define KVM_CPUID_BIT_SPEC_CTRL 26 #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3..dcfe227 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa8638a..1b743a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +/* do not touch guest_val and host_val if the msr is not found */ +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, + u64 *guest_val, u64 *host_val) +{ + unsigned i; + struct msr_autoload *m = >msr_autoload; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + break; + + if (i == m->nr) + return 1; + + if (guest_val) + *guest_val = m->guest[i].value; + if (host_val) + *host_val = m->host[i].value; + + return 0; +} + static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { u64 guest_efer = vmx->vcpu.arch.efer; @@ -3203,7 +3228,9 @@ static inline bool
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On 01/28/2018 09:21 PM, Konrad Rzeszutek Wilk wrote: On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed wrote: Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only add_atomic_switch_msr when a non-zero is written to it. We tried this and found that it was about 3% slower that doing the old way of rdmsr and wrmsr. I actually have not measured the performance difference between using the atomic_switch vs just just doing rdmsr/wrmsr. I was mostly focused on not saving and restoring when the guest does not actually use the MSRs. Interesting data point though, I will update the code to use rdmsr/wrmsr and see if I see it in my hardware (I am using a skylake processor). But that was also with the host doing IBRS as well. On what type of hardware did you run this? Ccing Daniel. Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Dave Hansen Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Linus Torvalds Cc: Tim Chen Cc: Thomas Gleixner Cc: Dan Williams Cc: Jun Nakajima Cc: Paolo Bonzini Cc: David Woodhouse Cc: Greg KH Cc: Andy Lutomirski Signed-off-by: KarimAllah Ahmed Signed-off-by: Ashok Raj --- arch/x86/kvm/cpuid.c | 4 +++- arch/x86/kvm/cpuid.h | 1 + arch/x86/kvm/vmx.c | 63 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10..dc78095 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) /* These are scattered features in cpufeatures.h. */ #define KVM_CPUID_BIT_AVX512_4VNNIW 2 #define KVM_CPUID_BIT_AVX512_4FMAPS 3 +#define KVM_CPUID_BIT_SPEC_CTRL 26 #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3..dcfe227 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa8638a..1b743a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +/* do not touch guest_val and host_val if the msr is not found */ +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, + u64 *guest_val, u64 *host_val) +{ + unsigned i; + struct msr_autoload *m = >msr_autoload; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + break; + + if (i == m->nr) + return 1; + + if (guest_val) + *guest_val = m->guest[i].value; + if (host_val) + *host_val = m->host[i].value; + + return 0; +} + static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { u64 guest_efer = vmx->vcpu.arch.efer; @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, */ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { + u64 spec_ctrl = 0; struct shared_msr_entry *msr; + struct vcpu_vmx *vmx = to_vmx(vcpu); switch (msr_info->index) { #ifdef CONFIG_X86_64 @@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
- dw...@infradead.org wrote: > On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote: > > >To avoid the overhead of atomically saving and restoring the > MSR_IA32_SPEC_CTRL > > >for guests that do not actually use the MSR, only > add_atomic_switch_msr when a > > >non-zero is written to it. > > > > > > We tried this and found that it was about 3% slower that doing the > > old way of rdmsr and wrmsr. > > > > But that was also with the host doing IBRS as well. > > The common case will be that neither host nor guest are doing IBRS. > Until the guest touches the MSR we do absolutely *nothing* with it, > which is definitely the fastest option. Windows use IBRS and Microsoft don't have any plans to switch to retpoline. Running a Windows guest should be a pretty common use-case no? In addition, your handle of the first WRMSR intercept could be different. It could signal you to start doing the following: 1. Disable intercept on SPEC_CTRL MSR. 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) That way, you will both have fastest option as long as guest don't use IBRS and also won't have the 3% performance hit compared to Konrad's proposal. Am I missing something? -Liran
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
- dw...@infradead.org wrote: > On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote: > > >To avoid the overhead of atomically saving and restoring the > MSR_IA32_SPEC_CTRL > > >for guests that do not actually use the MSR, only > add_atomic_switch_msr when a > > >non-zero is written to it. > > > > > > We tried this and found that it was about 3% slower that doing the > > old way of rdmsr and wrmsr. > > > > But that was also with the host doing IBRS as well. > > The common case will be that neither host nor guest are doing IBRS. > Until the guest touches the MSR we do absolutely *nothing* with it, > which is definitely the fastest option. Windows use IBRS and Microsoft don't have any plans to switch to retpoline. Running a Windows guest should be a pretty common use-case no? In addition, your handle of the first WRMSR intercept could be different. It could signal you to start doing the following: 1. Disable intercept on SPEC_CTRL MSR. 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR. 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value. (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1) That way, you will both have fastest option as long as guest don't use IBRS and also won't have the 3% performance hit compared to Konrad's proposal. Am I missing something? -Liran
RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
>> >> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: >> > >> > Do you mean that the host would intercept the guest WRMSR and do >> > WRMSR itself? I would suggest that doing so is inconsistent with the >> > docs. As specified, doing WRMSR to write 1 to IBRS does *not* >> > protect the guest. >> >> I believe it does. Guest kernel is protected from any guest userspace >> predictions learned before IBRS was last set to 1 in *any* mode, >> including host. > > the specification requires you to write a 1 on each transition to higher > privilege. Right. Andy's concern was about VMX non-root (i.e. guest) ring 0 attempting to write IBRS but it's trapped and actually happens in the host. As long as it *remains* set when the host re-enters the vCPU that should be fine. -- dwmw2
RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
>> >> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: >> > >> > Do you mean that the host would intercept the guest WRMSR and do >> > WRMSR itself? I would suggest that doing so is inconsistent with the >> > docs. As specified, doing WRMSR to write 1 to IBRS does *not* >> > protect the guest. >> >> I believe it does. Guest kernel is protected from any guest userspace >> predictions learned before IBRS was last set to 1 in *any* mode, >> including host. > > the specification requires you to write a 1 on each transition to higher > privilege. Right. Andy's concern was about VMX non-root (i.e. guest) ring 0 attempting to write IBRS but it's trapped and actually happens in the host. As long as it *remains* set when the host re-enters the vCPU that should be fine. -- dwmw2
RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
> > On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: > > > > Do you mean that the host would intercept the guest WRMSR and do > > WRMSR itself? I would suggest that doing so is inconsistent with the > > docs. As specified, doing WRMSR to write 1 to IBRS does *not* > > protect the guest. > > I believe it does. Guest kernel is protected from any guest userspace > predictions learned before IBRS was last set to 1 in *any* mode, > including host. the specification requires you to write a 1 on each transition to higher privilege. > > > For that matter, what are the semantics of VMRESUME doing a write to > > IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest > > context? the guest ring 3 wouldn't have had time to do anything evil in the mean time so the vmresume write is valid. (anything else would be unworkable)
RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
> > On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: > > > > Do you mean that the host would intercept the guest WRMSR and do > > WRMSR itself? I would suggest that doing so is inconsistent with the > > docs. As specified, doing WRMSR to write 1 to IBRS does *not* > > protect the guest. > > I believe it does. Guest kernel is protected from any guest userspace > predictions learned before IBRS was last set to 1 in *any* mode, > including host. the specification requires you to write a 1 on each transition to higher privilege. > > > For that matter, what are the semantics of VMRESUME doing a write to > > IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest > > context? the guest ring 3 wouldn't have had time to do anything evil in the mean time so the vmresume write is valid. (anything else would be unworkable)
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 12:53 -0800, Andy Lutomirski wrote: > > > I believe it does. Guest kernel is protected from any guest userspace > > predictions learned before IBRS was last set to 1 in *any* mode, > > including host. > > Hmm, you're probably right. > > I would love to know what awful hack Intel did that resulted in these > semantics. I am not convinced I ever really want to know. I just want it all to go away in a future CPU with a SPCTR_NO bit in IA32_ARCH_CAPABILITIES. (Not the IBRS_ALL interim hack). I think it's a mixture of ongoing checking, and a barrier. And perhaps varying proportions of each, in different CPU generations. By defining it thus, they can actually implement it *either* way. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 12:53 -0800, Andy Lutomirski wrote: > > > I believe it does. Guest kernel is protected from any guest userspace > > predictions learned before IBRS was last set to 1 in *any* mode, > > including host. > > Hmm, you're probably right. > > I would love to know what awful hack Intel did that resulted in these > semantics. I am not convinced I ever really want to know. I just want it all to go away in a future CPU with a SPCTR_NO bit in IA32_ARCH_CAPABILITIES. (Not the IBRS_ALL interim hack). I think it's a mixture of ongoing checking, and a barrier. And perhaps varying proportions of each, in different CPU generations. By defining it thus, they can actually implement it *either* way. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
> On Jan 28, 2018, at 12:44 PM, David Woodhousewrote: > >> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: >> >> Do you mean that the host would intercept the guest WRMSR and do >> WRMSR itself? I would suggest that doing so is inconsistent with the >> docs. As specified, doing WRMSR to write 1 to IBRS does *not* >> protect the guest. > > I believe it does. Guest kernel is protected from any guest userspace > predictions learned before IBRS was last set to 1 in *any* mode, > including host. Hmm, you're probably right. I would love to know what awful hack Intel did that resulted in these semantics. > >> For that matter, what are the semantics of VMRESUME doing a write to >> IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest >> context? > > Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1 > to IBRS as part of its MSR switch when it was already 1 is not > optimised away and *is* treated as writing IBRS=1 again. That's good news.
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
> On Jan 28, 2018, at 12:44 PM, David Woodhouse wrote: > >> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: >> >> Do you mean that the host would intercept the guest WRMSR and do >> WRMSR itself? I would suggest that doing so is inconsistent with the >> docs. As specified, doing WRMSR to write 1 to IBRS does *not* >> protect the guest. > > I believe it does. Guest kernel is protected from any guest userspace > predictions learned before IBRS was last set to 1 in *any* mode, > including host. Hmm, you're probably right. I would love to know what awful hack Intel did that resulted in these semantics. > >> For that matter, what are the semantics of VMRESUME doing a write to >> IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest >> context? > > Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1 > to IBRS as part of its MSR switch when it was already 1 is not > optimised away and *is* treated as writing IBRS=1 again. That's good news.
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: > > Do you mean that the host would intercept the guest WRMSR and do > WRMSR itself? I would suggest that doing so is inconsistent with the > docs. As specified, doing WRMSR to write 1 to IBRS does *not* > protect the guest. I believe it does. Guest kernel is protected from any guest userspace predictions learned before IBRS was last set to 1 in *any* mode, including host. > For that matter, what are the semantics of VMRESUME doing a write to > IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest > context? Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1 to IBRS as part of its MSR switch when it was already 1 is not optimised away and *is* treated as writing IBRS=1 again. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote: > > Do you mean that the host would intercept the guest WRMSR and do > WRMSR itself? I would suggest that doing so is inconsistent with the > docs. As specified, doing WRMSR to write 1 to IBRS does *not* > protect the guest. I believe it does. Guest kernel is protected from any guest userspace predictions learned before IBRS was last set to 1 in *any* mode, including host. > For that matter, what are the semantics of VMRESUME doing a write to > IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest > context? Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1 to IBRS as part of its MSR switch when it was already 1 is not optimised away and *is* treated as writing IBRS=1 again. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
> On Jan 28, 2018, at 12:21 PM, Konrad Rzeszutek Wilk> wrote: > >> On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed >> wrote: >> Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >> guests >> that will only mitigate Spectre V2 through IBRS+IBPB and will not be >> using a >> retpoline+IBPB based approach. >> >> To avoid the overhead of atomically saving and restoring the >> MSR_IA32_SPEC_CTRL >> for guests that do not actually use the MSR, only add_atomic_switch_msr >> when a >> non-zero is written to it. > > > We tried this and found that it was about 3% slower that doing the old way of > rdmsr and wrmsr. > Do you mean that the host would intercept the guest WRMSR and do WRMSR itself? I would suggest that doing so is inconsistent with the docs. As specified, doing WRMSR to write 1 to IBRS does *not* protect the guest. For that matter, what are the semantics of VMRESUME doing a write to IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest context?
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
> On Jan 28, 2018, at 12:21 PM, Konrad Rzeszutek Wilk > wrote: > >> On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed >> wrote: >> Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >> guests >> that will only mitigate Spectre V2 through IBRS+IBPB and will not be >> using a >> retpoline+IBPB based approach. >> >> To avoid the overhead of atomically saving and restoring the >> MSR_IA32_SPEC_CTRL >> for guests that do not actually use the MSR, only add_atomic_switch_msr >> when a >> non-zero is written to it. > > > We tried this and found that it was about 3% slower that doing the old way of > rdmsr and wrmsr. > Do you mean that the host would intercept the guest WRMSR and do WRMSR itself? I would suggest that doing so is inconsistent with the docs. As specified, doing WRMSR to write 1 to IBRS does *not* protect the guest. For that matter, what are the semantics of VMRESUME doing a write to IBRS as part of its MSR switch? Is it treated as IBRS=1 from guest context?
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote: > >To avoid the overhead of atomically saving and restoring the > >MSR_IA32_SPEC_CTRL > >for guests that do not actually use the MSR, only add_atomic_switch_msr when > >a > >non-zero is written to it. > > > We tried this and found that it was about 3% slower that doing the > old way of rdmsr and wrmsr. > > But that was also with the host doing IBRS as well. The common case will be that neither host nor guest are doing IBRS. Until the guest touches the MSR we do absolutely *nothing* with it, which is definitely the fastest option. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote: > >To avoid the overhead of atomically saving and restoring the > >MSR_IA32_SPEC_CTRL > >for guests that do not actually use the MSR, only add_atomic_switch_msr when > >a > >non-zero is written to it. > > > We tried this and found that it was about 3% slower that doing the > old way of rdmsr and wrmsr. > > But that was also with the host doing IBRS as well. The common case will be that neither host nor guest are doing IBRS. Until the guest touches the MSR we do absolutely *nothing* with it, which is definitely the fastest option. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmedwrote: >Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >guests >that will only mitigate Spectre V2 through IBRS+IBPB and will not be >using a >retpoline+IBPB based approach. > >To avoid the overhead of atomically saving and restoring the >MSR_IA32_SPEC_CTRL >for guests that do not actually use the MSR, only add_atomic_switch_msr >when a >non-zero is written to it. We tried this and found that it was about 3% slower that doing the old way of rdmsr and wrmsr. But that was also with the host doing IBRS as well. On what type of hardware did you run this? Ccing Daniel. > >Cc: Asit Mallick >Cc: Arjan Van De Ven >Cc: Dave Hansen >Cc: Andi Kleen >Cc: Andrea Arcangeli >Cc: Linus Torvalds >Cc: Tim Chen >Cc: Thomas Gleixner >Cc: Dan Williams >Cc: Jun Nakajima >Cc: Paolo Bonzini >Cc: David Woodhouse >Cc: Greg KH >Cc: Andy Lutomirski >Signed-off-by: KarimAllah Ahmed >Signed-off-by: Ashok Raj >--- > arch/x86/kvm/cpuid.c | 4 +++- > arch/x86/kvm/cpuid.h | 1 + >arch/x86/kvm/vmx.c | 63 > > 3 files changed, 67 insertions(+), 1 deletion(-) > >diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >index 0099e10..dc78095 100644 >--- a/arch/x86/kvm/cpuid.c >+++ b/arch/x86/kvm/cpuid.c >@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > /* These are scattered features in cpufeatures.h. */ > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 >+#define KVM_CPUID_BIT_SPEC_CTRL 26 > #define KF(x) bit(KVM_CPUID_BIT_##x) > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) >@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct >kvm_cpuid_entry2 *entry, u32 function, > > /* cpuid 7.0.edx*/ > const u32 kvm_cpuid_7_0_edx_x86_features = >- KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); >+ KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ >+ (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); > > /* all calls to cpuid_count() should be made on the same cpu */ > get_cpu(); >diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h >index cdc70a3..dcfe227 100644 >--- a/arch/x86/kvm/cpuid.h >+++ b/arch/x86/kvm/cpuid.h >@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, >+ [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > }; > >static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned >x86_feature) >diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >index aa8638a..1b743a0 100644 >--- a/arch/x86/kvm/vmx.c >+++ b/arch/x86/kvm/vmx.c >@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, >bool masked); > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > u16 error_code); > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); >+static void __always_inline vmx_disable_intercept_for_msr(unsigned >long *msr_bitmap, >+u32 msr, int type); >+ > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); >@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct >vcpu_vmx *vmx, unsigned msr, > m->host[i].value = host_val; > } > >+/* do not touch guest_val and host_val if the msr is not found */ >+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, >+u64 *guest_val, u64 *host_val) >+{ >+ unsigned i; >+ struct msr_autoload *m = >msr_autoload; >+ >+ for (i = 0; i < m->nr; ++i) >+ if (m->guest[i].index == msr) >+ break; >+ >+ if (i == m->nr) >+ return 1; >+ >+ if (guest_val) >+ *guest_val = m->guest[i].value; >+ if (host_val) >+ *host_val = m->host[i].value; >+ >+ return 0; >+} >+ >static bool update_transition_efer(struct vcpu_vmx *vmx, int >efer_offset) > { > u64 guest_efer = vmx->vcpu.arch.efer; >@@ -3203,7 +3228,9 @@ static inline bool >vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, > */ >static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data >*msr_info) > { >+ u64 spec_ctrl = 0; > struct shared_msr_entry *msr; >+ struct vcpu_vmx *vmx = to_vmx(vcpu); > > switch (msr_info->index) { > #ifdef CONFIG_X86_64 >@@
Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed wrote: >Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for >guests >that will only mitigate Spectre V2 through IBRS+IBPB and will not be >using a >retpoline+IBPB based approach. > >To avoid the overhead of atomically saving and restoring the >MSR_IA32_SPEC_CTRL >for guests that do not actually use the MSR, only add_atomic_switch_msr >when a >non-zero is written to it. We tried this and found that it was about 3% slower that doing the old way of rdmsr and wrmsr. But that was also with the host doing IBRS as well. On what type of hardware did you run this? Ccing Daniel. > >Cc: Asit Mallick >Cc: Arjan Van De Ven >Cc: Dave Hansen >Cc: Andi Kleen >Cc: Andrea Arcangeli >Cc: Linus Torvalds >Cc: Tim Chen >Cc: Thomas Gleixner >Cc: Dan Williams >Cc: Jun Nakajima >Cc: Paolo Bonzini >Cc: David Woodhouse >Cc: Greg KH >Cc: Andy Lutomirski >Signed-off-by: KarimAllah Ahmed >Signed-off-by: Ashok Raj >--- > arch/x86/kvm/cpuid.c | 4 +++- > arch/x86/kvm/cpuid.h | 1 + >arch/x86/kvm/vmx.c | 63 > > 3 files changed, 67 insertions(+), 1 deletion(-) > >diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >index 0099e10..dc78095 100644 >--- a/arch/x86/kvm/cpuid.c >+++ b/arch/x86/kvm/cpuid.c >@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) > /* These are scattered features in cpufeatures.h. */ > #define KVM_CPUID_BIT_AVX512_4VNNIW 2 > #define KVM_CPUID_BIT_AVX512_4FMAPS 3 >+#define KVM_CPUID_BIT_SPEC_CTRL 26 > #define KF(x) bit(KVM_CPUID_BIT_##x) > > int kvm_update_cpuid(struct kvm_vcpu *vcpu) >@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct >kvm_cpuid_entry2 *entry, u32 function, > > /* cpuid 7.0.edx*/ > const u32 kvm_cpuid_7_0_edx_x86_features = >- KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); >+ KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ >+ (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); > > /* all calls to cpuid_count() should be made on the same cpu */ > get_cpu(); >diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h >index cdc70a3..dcfe227 100644 >--- a/arch/x86/kvm/cpuid.h >+++ b/arch/x86/kvm/cpuid.h >@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, > [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, >+ [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > }; > >static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned >x86_feature) >diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >index aa8638a..1b743a0 100644 >--- a/arch/x86/kvm/vmx.c >+++ b/arch/x86/kvm/vmx.c >@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, >bool masked); > static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > u16 error_code); > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); >+static void __always_inline vmx_disable_intercept_for_msr(unsigned >long *msr_bitmap, >+u32 msr, int type); >+ > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); >@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct >vcpu_vmx *vmx, unsigned msr, > m->host[i].value = host_val; > } > >+/* do not touch guest_val and host_val if the msr is not found */ >+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, >+u64 *guest_val, u64 *host_val) >+{ >+ unsigned i; >+ struct msr_autoload *m = >msr_autoload; >+ >+ for (i = 0; i < m->nr; ++i) >+ if (m->guest[i].index == msr) >+ break; >+ >+ if (i == m->nr) >+ return 1; >+ >+ if (guest_val) >+ *guest_val = m->guest[i].value; >+ if (host_val) >+ *host_val = m->host[i].value; >+ >+ return 0; >+} >+ >static bool update_transition_efer(struct vcpu_vmx *vmx, int >efer_offset) > { > u64 guest_efer = vmx->vcpu.arch.efer; >@@ -3203,7 +3228,9 @@ static inline bool >vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, > */ >static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data >*msr_info) > { >+ u64 spec_ctrl = 0; > struct shared_msr_entry *msr; >+ struct vcpu_vmx *vmx = to_vmx(vcpu); > > switch (msr_info->index) { > #ifdef CONFIG_X86_64 >@@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, >struct msr_data *msr_info) > case MSR_IA32_TSC: > msr_info->data = guest_read_tsc(vcpu); > break; >+ case MSR_IA32_SPEC_CTRL: >+ if (!msr_info->host_initiated && >+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) >+ return 1; >+ >+
[PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only add_atomic_switch_msr when a non-zero is written to it. Cc: Asit MallickCc: Arjan Van De Ven Cc: Dave Hansen Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Linus Torvalds Cc: Tim Chen Cc: Thomas Gleixner Cc: Dan Williams Cc: Jun Nakajima Cc: Paolo Bonzini Cc: David Woodhouse Cc: Greg KH Cc: Andy Lutomirski Signed-off-by: KarimAllah Ahmed Signed-off-by: Ashok Raj --- arch/x86/kvm/cpuid.c | 4 +++- arch/x86/kvm/cpuid.h | 1 + arch/x86/kvm/vmx.c | 63 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10..dc78095 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) /* These are scattered features in cpufeatures.h. */ #define KVM_CPUID_BIT_AVX512_4VNNIW 2 #define KVM_CPUID_BIT_AVX512_4FMAPS 3 +#define KVM_CPUID_BIT_SPEC_CTRL 26 #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3..dcfe227 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa8638a..1b743a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +/* do not touch guest_val and host_val if the msr is not found */ +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, + u64 *guest_val, u64 *host_val) +{ + unsigned i; + struct msr_autoload *m = >msr_autoload; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + break; + + if (i == m->nr) + return 1; + + if (guest_val) + *guest_val = m->guest[i].value; + if (host_val) + *host_val = m->host[i].value; + + return 0; +} + static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { u64 guest_efer = vmx->vcpu.arch.efer; @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, */ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { + u64 spec_ctrl = 0; struct shared_msr_entry *msr; + struct vcpu_vmx *vmx = to_vmx(vcpu); switch (msr_info->index) { #ifdef CONFIG_X86_64 @@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + +
[PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL
Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only add_atomic_switch_msr when a non-zero is written to it. Cc: Asit Mallick Cc: Arjan Van De Ven Cc: Dave Hansen Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Linus Torvalds Cc: Tim Chen Cc: Thomas Gleixner Cc: Dan Williams Cc: Jun Nakajima Cc: Paolo Bonzini Cc: David Woodhouse Cc: Greg KH Cc: Andy Lutomirski Signed-off-by: KarimAllah Ahmed Signed-off-by: Ashok Raj --- arch/x86/kvm/cpuid.c | 4 +++- arch/x86/kvm/cpuid.h | 1 + arch/x86/kvm/vmx.c | 63 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10..dc78095 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void) /* These are scattered features in cpufeatures.h. */ #define KVM_CPUID_BIT_AVX512_4VNNIW 2 #define KVM_CPUID_BIT_AVX512_4FMAPS 3 +#define KVM_CPUID_BIT_SPEC_CTRL 26 #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \ + (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3..dcfe227 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aa8638a..1b743a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } +/* do not touch guest_val and host_val if the msr is not found */ +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, + u64 *guest_val, u64 *host_val) +{ + unsigned i; + struct msr_autoload *m = >msr_autoload; + + for (i = 0; i < m->nr; ++i) + if (m->guest[i].index == msr) + break; + + if (i == m->nr) + return 1; + + if (guest_val) + *guest_val = m->guest[i].value; + if (host_val) + *host_val = m->host[i].value; + + return 0; +} + static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { u64 guest_efer = vmx->vcpu.arch.efer; @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, */ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { + u64 spec_ctrl = 0; struct shared_msr_entry *msr; + struct vcpu_vmx *vmx = to_vmx(vcpu); switch (msr_info->index) { #ifdef CONFIG_X86_64 @@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + /* +* If the MSR is not in the atomic list yet, then it was never +* written to. So the MSR value will be '0'. +*/ + read_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL, _ctrl, NULL); + + msr_info->data = spec_ctrl; + break; case MSR_IA32_SYSENTER_CS: