Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
On Mon, Jan 29, 2018 at 1:49 PM, Daniel Kiper  wrote:
> On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote:
>> On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote:
>> > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
>> > >
>> > > Windows use IBRS and Microsoft don't have any plans to switch to 
>> > > retpoline.
>> > > Running a Windows guest should be a pretty common use-case no?
>> > >
>> > > In addition, your handle of the first WRMSR intercept could be different.
>> > > It could signal you to start doing the following:
>> > > 1. Disable intercept on SPEC_CTRL MSR.
>> > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
>> > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
>> > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
>> > >
>> > > That way, you will both have fastest option as long as guest don't use 
>> > > IBRS
>> > > and also won't have the 3% performance hit compared to Konrad's proposal.
>> > >
>> > > Am I missing something?
>> >
>> > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
>> > of the 3% speedup you observe is because in the above, the vmentry path
>> > doesn't need to *read* the host's value and store it; the host is
>> > expected to restore it for itself anyway?
>>
>> Yes for at least the purpose of correctness. That is based on what I have
>> heard is that you when you transition to a higher ring you have to write 1, 
>> then write zero
>> when you transition back to lower rings. That is it is like a knob.
>>
>> But then I heard that on some CPUs it is more like reset button and
>> just writting 1 to IBRS is fine. But again, correctness here.
>>
>> >
>> > I'd actually quite like to repeat the benchmark on the new fixed
>> > microcode, if anyone has it yet, to see if that read/swap slowness is
>> > still quite as excessive. I'm certainly not ruling this out, but I'm
>> > just a little wary of premature optimisation, and I'd like to make sure
>> > we have everything *else* in the KVM patches right first.
>> >
>> > The fact that the save-and-restrict macros I have in the tip of my
>> > working tree at the moment are horrid and causing 0-day nastygrams,
>> > probably doesn't help persuade me to favour the approach ;)
>> >
>> > ... hm, the CPU actually has separate MSR save/restore lists for
>> > entry/exit, doesn't it? Is there any way to sanely make use of that and
>> > do the restoration manually on vmentry but let it be automatic on
>> > vmexit, by having it *only* in the guest's MSR-store area to be saved
>> > on exit and restored on exit, but *not* in the host's MSR-store area?
>
> s/on exit and restored on exit/on exit and restored on entry/?
>
> Additionally, AIUI there is no "host's MSR-store area".
>
>> Oh. That sounds sounds interesting
>
> That is possible but you have to split add_atomic_switch_msr() accordingly.
>
>> > Reading the code and comparing with the SDM, I can't see where we're
>> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
>> > case...
>>
>> Right. We (well Daniel Kiper, CC-ed) implemented it for this and
>> that is where we got the numbers.
>>
>> Daniel, you OK posting it here? Granted with the caveats thta it won't even
>> compile against upstream as it was based on a distro kernel.
>
> Please look below...
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index aa9bc4f..e7c0f8b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO);
>
>  extern const ulong vmx_return;
>
> -#define NR_AUTOLOAD_MSRS 8
> -#define VMCS02_POOL_SIZE 1
> +#define NR_AUTOLOAD_MSRS   8
> +#define NR_AUTOSTORE_MSRS  NR_AUTOLOAD_MSRS
> +
> +#define VMCS02_POOL_SIZE   1

I think you accidentally resurrected VMCS02_POOL_SIZE.

>  struct vmcs {
> u32 revision_id;
> @@ -504,6 +506,10 @@ struct vcpu_vmx {
> struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
> struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
> } msr_autoload;
> +   struct msr_autostore {
> +   unsigned nr;
> +   struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS];
> +   } msr_autostore;
> struct {
> int   loaded;
> u16   fs_sel, gs_sel, ldt_sel;
> @@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> *vmx, unsigned msr,
> m->host[i].value = host_val;
>  }
>
> +static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr)
> +{
> +   unsigned i;
> +   struct msr_autostore *m = >msr_autostore;
> +
> +   for (i = 0; i < m->nr; ++i)
> +   if (m->guest[i].index == msr)
> +   return;
> +
> +   if (i == NR_AUTOSTORE_MSRS) {
> +   pr_err("Not enough msr store entries. Can't add msr %x\n", 
> msr);
> +   BUG();

I wouldn't panic the host for this. 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
On Mon, Jan 29, 2018 at 1:49 PM, Daniel Kiper  wrote:
> On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote:
>> On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote:
>> > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
>> > >
>> > > Windows use IBRS and Microsoft don't have any plans to switch to 
>> > > retpoline.
>> > > Running a Windows guest should be a pretty common use-case no?
>> > >
>> > > In addition, your handle of the first WRMSR intercept could be different.
>> > > It could signal you to start doing the following:
>> > > 1. Disable intercept on SPEC_CTRL MSR.
>> > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
>> > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
>> > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
>> > >
>> > > That way, you will both have fastest option as long as guest don't use 
>> > > IBRS
>> > > and also won't have the 3% performance hit compared to Konrad's proposal.
>> > >
>> > > Am I missing something?
>> >
>> > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
>> > of the 3% speedup you observe is because in the above, the vmentry path
>> > doesn't need to *read* the host's value and store it; the host is
>> > expected to restore it for itself anyway?
>>
>> Yes for at least the purpose of correctness. That is based on what I have
>> heard is that you when you transition to a higher ring you have to write 1, 
>> then write zero
>> when you transition back to lower rings. That is it is like a knob.
>>
>> But then I heard that on some CPUs it is more like reset button and
>> just writting 1 to IBRS is fine. But again, correctness here.
>>
>> >
>> > I'd actually quite like to repeat the benchmark on the new fixed
>> > microcode, if anyone has it yet, to see if that read/swap slowness is
>> > still quite as excessive. I'm certainly not ruling this out, but I'm
>> > just a little wary of premature optimisation, and I'd like to make sure
>> > we have everything *else* in the KVM patches right first.
>> >
>> > The fact that the save-and-restrict macros I have in the tip of my
>> > working tree at the moment are horrid and causing 0-day nastygrams,
>> > probably doesn't help persuade me to favour the approach ;)
>> >
>> > ... hm, the CPU actually has separate MSR save/restore lists for
>> > entry/exit, doesn't it? Is there any way to sanely make use of that and
>> > do the restoration manually on vmentry but let it be automatic on
>> > vmexit, by having it *only* in the guest's MSR-store area to be saved
>> > on exit and restored on exit, but *not* in the host's MSR-store area?
>
> s/on exit and restored on exit/on exit and restored on entry/?
>
> Additionally, AIUI there is no "host's MSR-store area".
>
>> Oh. That sounds sounds interesting
>
> That is possible but you have to split add_atomic_switch_msr() accordingly.
>
>> > Reading the code and comparing with the SDM, I can't see where we're
>> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
>> > case...
>>
>> Right. We (well Daniel Kiper, CC-ed) implemented it for this and
>> that is where we got the numbers.
>>
>> Daniel, you OK posting it here? Granted with the caveats thta it won't even
>> compile against upstream as it was based on a distro kernel.
>
> Please look below...
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index aa9bc4f..e7c0f8b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO);
>
>  extern const ulong vmx_return;
>
> -#define NR_AUTOLOAD_MSRS 8
> -#define VMCS02_POOL_SIZE 1
> +#define NR_AUTOLOAD_MSRS   8
> +#define NR_AUTOSTORE_MSRS  NR_AUTOLOAD_MSRS
> +
> +#define VMCS02_POOL_SIZE   1

I think you accidentally resurrected VMCS02_POOL_SIZE.

>  struct vmcs {
> u32 revision_id;
> @@ -504,6 +506,10 @@ struct vcpu_vmx {
> struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
> struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
> } msr_autoload;
> +   struct msr_autostore {
> +   unsigned nr;
> +   struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS];
> +   } msr_autostore;
> struct {
> int   loaded;
> u16   fs_sel, gs_sel, ldt_sel;
> @@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> *vmx, unsigned msr,
> m->host[i].value = host_val;
>  }
>
> +static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr)
> +{
> +   unsigned i;
> +   struct msr_autostore *m = >msr_autostore;
> +
> +   for (i = 0; i < m->nr; ++i)
> +   if (m->guest[i].index == msr)
> +   return;
> +
> +   if (i == NR_AUTOSTORE_MSRS) {
> +   pr_err("Not enough msr store entries. Can't add msr %x\n", 
> msr);
> +   BUG();

I wouldn't panic the host for this. add_atomic_switch_msr() just emits
a 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Daniel Kiper
On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote:
> On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote:
> > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
> > >
> > > Windows use IBRS and Microsoft don't have any plans to switch to 
> > > retpoline.
> > > Running a Windows guest should be a pretty common use-case no?
> > >
> > > In addition, your handle of the first WRMSR intercept could be different.
> > > It could signal you to start doing the following:
> > > 1. Disable intercept on SPEC_CTRL MSR.
> > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
> > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
> > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
> > >
> > > That way, you will both have fastest option as long as guest don't use 
> > > IBRS
> > > and also won't have the 3% performance hit compared to Konrad's proposal.
> > >
> > > Am I missing something?
> >
> > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
> > of the 3% speedup you observe is because in the above, the vmentry path
> > doesn't need to *read* the host's value and store it; the host is
> > expected to restore it for itself anyway?
>
> Yes for at least the purpose of correctness. That is based on what I have
> heard is that you when you transition to a higher ring you have to write 1, 
> then write zero
> when you transition back to lower rings. That is it is like a knob.
>
> But then I heard that on some CPUs it is more like reset button and
> just writting 1 to IBRS is fine. But again, correctness here.
>
> >
> > I'd actually quite like to repeat the benchmark on the new fixed
> > microcode, if anyone has it yet, to see if that read/swap slowness is
> > still quite as excessive. I'm certainly not ruling this out, but I'm
> > just a little wary of premature optimisation, and I'd like to make sure
> > we have everything *else* in the KVM patches right first.
> >
> > The fact that the save-and-restrict macros I have in the tip of my
> > working tree at the moment are horrid and causing 0-day nastygrams,
> > probably doesn't help persuade me to favour the approach ;)
> >
> > ... hm, the CPU actually has separate MSR save/restore lists for
> > entry/exit, doesn't it? Is there any way to sanely make use of that and
> > do the restoration manually on vmentry but let it be automatic on
> > vmexit, by having it *only* in the guest's MSR-store area to be saved
> > on exit and restored on exit, but *not* in the host's MSR-store area?

s/on exit and restored on exit/on exit and restored on entry/?

Additionally, AIUI there is no "host's MSR-store area".

> Oh. That sounds sounds interesting

That is possible but you have to split add_atomic_switch_msr() accordingly.

> > Reading the code and comparing with the SDM, I can't see where we're
> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> > case...
>
> Right. We (well Daniel Kiper, CC-ed) implemented it for this and
> that is where we got the numbers.
>
> Daniel, you OK posting it here? Granted with the caveats thta it won't even
> compile against upstream as it was based on a distro kernel.

Please look below...

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa9bc4f..e7c0f8b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO);

 extern const ulong vmx_return;

-#define NR_AUTOLOAD_MSRS 8
-#define VMCS02_POOL_SIZE 1
+#define NR_AUTOLOAD_MSRS   8
+#define NR_AUTOSTORE_MSRS  NR_AUTOLOAD_MSRS
+
+#define VMCS02_POOL_SIZE   1

 struct vmcs {
u32 revision_id;
@@ -504,6 +506,10 @@ struct vcpu_vmx {
struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
} msr_autoload;
+   struct msr_autostore {
+   unsigned nr;
+   struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS];
+   } msr_autostore;
struct {
int   loaded;
u16   fs_sel, gs_sel, ldt_sel;
@@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, 
unsigned msr,
m->host[i].value = host_val;
 }

+static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr)
+{
+   unsigned i;
+   struct msr_autostore *m = >msr_autostore;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   return;
+
+   if (i == NR_AUTOSTORE_MSRS) {
+   pr_err("Not enough msr store entries. Can't add msr %x\n", msr);
+   BUG();
+   }
+
+   m->guest[i].index = msr;
+   vmcs_write32(VM_EXIT_MSR_STORE_COUNT, ++m->nr);
+}
+
+static u64 get_msr_vmcs_store(struct vcpu_vmx *vmx, unsigned msr)
+{
+   unsigned i;
+   struct msr_autostore *m = >msr_autostore;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+ 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Daniel Kiper
On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote:
> On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote:
> > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
> > >
> > > Windows use IBRS and Microsoft don't have any plans to switch to 
> > > retpoline.
> > > Running a Windows guest should be a pretty common use-case no?
> > >
> > > In addition, your handle of the first WRMSR intercept could be different.
> > > It could signal you to start doing the following:
> > > 1. Disable intercept on SPEC_CTRL MSR.
> > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
> > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
> > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
> > >
> > > That way, you will both have fastest option as long as guest don't use 
> > > IBRS
> > > and also won't have the 3% performance hit compared to Konrad's proposal.
> > >
> > > Am I missing something?
> >
> > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
> > of the 3% speedup you observe is because in the above, the vmentry path
> > doesn't need to *read* the host's value and store it; the host is
> > expected to restore it for itself anyway?
>
> Yes for at least the purpose of correctness. That is based on what I have
> heard is that you when you transition to a higher ring you have to write 1, 
> then write zero
> when you transition back to lower rings. That is it is like a knob.
>
> But then I heard that on some CPUs it is more like reset button and
> just writting 1 to IBRS is fine. But again, correctness here.
>
> >
> > I'd actually quite like to repeat the benchmark on the new fixed
> > microcode, if anyone has it yet, to see if that read/swap slowness is
> > still quite as excessive. I'm certainly not ruling this out, but I'm
> > just a little wary of premature optimisation, and I'd like to make sure
> > we have everything *else* in the KVM patches right first.
> >
> > The fact that the save-and-restrict macros I have in the tip of my
> > working tree at the moment are horrid and causing 0-day nastygrams,
> > probably doesn't help persuade me to favour the approach ;)
> >
> > ... hm, the CPU actually has separate MSR save/restore lists for
> > entry/exit, doesn't it? Is there any way to sanely make use of that and
> > do the restoration manually on vmentry but let it be automatic on
> > vmexit, by having it *only* in the guest's MSR-store area to be saved
> > on exit and restored on exit, but *not* in the host's MSR-store area?

s/on exit and restored on exit/on exit and restored on entry/?

Additionally, AIUI there is no "host's MSR-store area".

> Oh. That sounds sounds interesting

That is possible but you have to split add_atomic_switch_msr() accordingly.

> > Reading the code and comparing with the SDM, I can't see where we're
> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> > case...
>
> Right. We (well Daniel Kiper, CC-ed) implemented it for this and
> that is where we got the numbers.
>
> Daniel, you OK posting it here? Granted with the caveats thta it won't even
> compile against upstream as it was based on a distro kernel.

Please look below...

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa9bc4f..e7c0f8b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO);

 extern const ulong vmx_return;

-#define NR_AUTOLOAD_MSRS 8
-#define VMCS02_POOL_SIZE 1
+#define NR_AUTOLOAD_MSRS   8
+#define NR_AUTOSTORE_MSRS  NR_AUTOLOAD_MSRS
+
+#define VMCS02_POOL_SIZE   1

 struct vmcs {
u32 revision_id;
@@ -504,6 +506,10 @@ struct vcpu_vmx {
struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
} msr_autoload;
+   struct msr_autostore {
+   unsigned nr;
+   struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS];
+   } msr_autostore;
struct {
int   loaded;
u16   fs_sel, gs_sel, ldt_sel;
@@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, 
unsigned msr,
m->host[i].value = host_val;
 }

+static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr)
+{
+   unsigned i;
+   struct msr_autostore *m = >msr_autostore;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   return;
+
+   if (i == NR_AUTOSTORE_MSRS) {
+   pr_err("Not enough msr store entries. Can't add msr %x\n", msr);
+   BUG();
+   }
+
+   m->guest[i].index = msr;
+   vmcs_write32(VM_EXIT_MSR_STORE_COUNT, ++m->nr);
+}
+
+static u64 get_msr_vmcs_store(struct vcpu_vmx *vmx, unsigned msr)
+{
+   unsigned i;
+   struct msr_autostore *m = >msr_autostore;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+ 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
On Mon, Jan 29, 2018 at 11:16 AM, Konrad Rzeszutek Wilk
 wrote:
> On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote:
>> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  
>> wrote:
>> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
>> > guests
>> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using 
>> > a
>> > retpoline+IBPB based approach.
>> >
>> > To avoid the overhead of atomically saving and restoring the 
>> > MSR_IA32_SPEC_CTRL
>> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
>> > when a
>> > non-zero is written to it.
>> >
>> > Cc: Asit Mallick 
>> > Cc: Arjan Van De Ven 
>> > Cc: Dave Hansen 
>> > Cc: Andi Kleen 
>> > Cc: Andrea Arcangeli 
>> > Cc: Linus Torvalds 
>> > Cc: Tim Chen 
>> > Cc: Thomas Gleixner 
>> > Cc: Dan Williams 
>> > Cc: Jun Nakajima 
>> > Cc: Paolo Bonzini 
>> > Cc: David Woodhouse 
>> > Cc: Greg KH 
>> > Cc: Andy Lutomirski 
>> > Signed-off-by: KarimAllah Ahmed 
>> > Signed-off-by: Ashok Raj 
>> > ---
>> >  arch/x86/kvm/cpuid.c |  4 +++-
>> >  arch/x86/kvm/cpuid.h |  1 +
>> >  arch/x86/kvm/vmx.c   | 63 
>> > 
>> >  3 files changed, 67 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> > index 0099e10..dc78095 100644
>> > --- a/arch/x86/kvm/cpuid.c
>> > +++ b/arch/x86/kvm/cpuid.c
>> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
>> >  /* These are scattered features in cpufeatures.h. */
>> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
>> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
>> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
>> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
>> >
>> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
>> > kvm_cpuid_entry2 *entry, u32 function,
>> >
>> > /* cpuid 7.0.edx*/
>> > const u32 kvm_cpuid_7_0_edx_x86_features =
>> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
>> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
>> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
>>
>> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
>> pass through for existing CPUs (26 and 27)?
>>
>> >
>> > /* all calls to cpuid_count() should be made on the same cpu */
>> > get_cpu();
>> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
>> > index cdc70a3..dcfe227 100644
>> > --- a/arch/x86/kvm/cpuid.h
>> > +++ b/arch/x86/kvm/cpuid.h
>> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
>> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
>> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
>> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
>> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
>> >  };
>> >
>> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
>> > x86_feature)
>> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> > index aa8638a..1b743a0 100644
>> > --- a/arch/x86/kvm/vmx.c
>> > +++ b/arch/x86/kvm/vmx.c
>> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
>> > bool masked);
>> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
>> > u16 error_code);
>> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
>> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
>> > *msr_bitmap,
>> > + u32 msr, int 
>> > type);
>> > +
>> >
>> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
>> > *vmx, unsigned msr,
>> > m->host[i].value = host_val;
>> >  }
>> >
>> > +/* do not touch guest_val and host_val if the msr is not found */
>> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>> > + u64 *guest_val, u64 *host_val)
>> > +{
>> > +   unsigned i;
>> > +   struct msr_autoload *m = >msr_autoload;
>> > +
>> > +   for (i = 0; i < m->nr; ++i)
>> > +   if (m->guest[i].index == msr)
>> > +   break;
>> > +
>> > +   if (i == m->nr)
>> > +   return 1;
>> > +
>> > +   if (guest_val)
>> > +   *guest_val = m->guest[i].value;
>> > +   if (host_val)
>> > +   

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
On Mon, Jan 29, 2018 at 11:16 AM, Konrad Rzeszutek Wilk
 wrote:
> On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote:
>> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  
>> wrote:
>> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
>> > guests
>> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using 
>> > a
>> > retpoline+IBPB based approach.
>> >
>> > To avoid the overhead of atomically saving and restoring the 
>> > MSR_IA32_SPEC_CTRL
>> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
>> > when a
>> > non-zero is written to it.
>> >
>> > Cc: Asit Mallick 
>> > Cc: Arjan Van De Ven 
>> > Cc: Dave Hansen 
>> > Cc: Andi Kleen 
>> > Cc: Andrea Arcangeli 
>> > Cc: Linus Torvalds 
>> > Cc: Tim Chen 
>> > Cc: Thomas Gleixner 
>> > Cc: Dan Williams 
>> > Cc: Jun Nakajima 
>> > Cc: Paolo Bonzini 
>> > Cc: David Woodhouse 
>> > Cc: Greg KH 
>> > Cc: Andy Lutomirski 
>> > Signed-off-by: KarimAllah Ahmed 
>> > Signed-off-by: Ashok Raj 
>> > ---
>> >  arch/x86/kvm/cpuid.c |  4 +++-
>> >  arch/x86/kvm/cpuid.h |  1 +
>> >  arch/x86/kvm/vmx.c   | 63 
>> > 
>> >  3 files changed, 67 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> > index 0099e10..dc78095 100644
>> > --- a/arch/x86/kvm/cpuid.c
>> > +++ b/arch/x86/kvm/cpuid.c
>> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
>> >  /* These are scattered features in cpufeatures.h. */
>> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
>> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
>> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
>> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
>> >
>> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
>> > kvm_cpuid_entry2 *entry, u32 function,
>> >
>> > /* cpuid 7.0.edx*/
>> > const u32 kvm_cpuid_7_0_edx_x86_features =
>> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
>> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
>> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
>>
>> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
>> pass through for existing CPUs (26 and 27)?
>>
>> >
>> > /* all calls to cpuid_count() should be made on the same cpu */
>> > get_cpu();
>> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
>> > index cdc70a3..dcfe227 100644
>> > --- a/arch/x86/kvm/cpuid.h
>> > +++ b/arch/x86/kvm/cpuid.h
>> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
>> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
>> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
>> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
>> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
>> >  };
>> >
>> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
>> > x86_feature)
>> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> > index aa8638a..1b743a0 100644
>> > --- a/arch/x86/kvm/vmx.c
>> > +++ b/arch/x86/kvm/vmx.c
>> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
>> > bool masked);
>> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
>> > u16 error_code);
>> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
>> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
>> > *msr_bitmap,
>> > + u32 msr, int 
>> > type);
>> > +
>> >
>> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
>> > *vmx, unsigned msr,
>> > m->host[i].value = host_val;
>> >  }
>> >
>> > +/* do not touch guest_val and host_val if the msr is not found */
>> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>> > + u64 *guest_val, u64 *host_val)
>> > +{
>> > +   unsigned i;
>> > +   struct msr_autoload *m = >msr_autoload;
>> > +
>> > +   for (i = 0; i < m->nr; ++i)
>> > +   if (m->guest[i].index == msr)
>> > +   break;
>> > +
>> > +   if (i == m->nr)
>> > +   return 1;
>> > +
>> > +   if (guest_val)
>> > +   *guest_val = m->guest[i].value;
>> > +   if (host_val)
>> > +   *host_val = m->host[i].value;
>> > +
>> > +   return 0;
>> > +}
>> > +
>> >  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
>> >  {
>> > u64 guest_efer = vmx->vcpu.arch.efer;
>> > @@ -3203,7 +3228,9 @@ static inline bool 
>> > vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
>> >   */
>> >  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> >  {
>> > +   

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Konrad Rzeszutek Wilk
On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote:
> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
> > guests
> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
> > retpoline+IBPB based approach.
> >
> > To avoid the overhead of atomically saving and restoring the 
> > MSR_IA32_SPEC_CTRL
> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
> > when a
> > non-zero is written to it.
> >
> > Cc: Asit Mallick 
> > Cc: Arjan Van De Ven 
> > Cc: Dave Hansen 
> > Cc: Andi Kleen 
> > Cc: Andrea Arcangeli 
> > Cc: Linus Torvalds 
> > Cc: Tim Chen 
> > Cc: Thomas Gleixner 
> > Cc: Dan Williams 
> > Cc: Jun Nakajima 
> > Cc: Paolo Bonzini 
> > Cc: David Woodhouse 
> > Cc: Greg KH 
> > Cc: Andy Lutomirski 
> > Signed-off-by: KarimAllah Ahmed 
> > Signed-off-by: Ashok Raj 
> > ---
> >  arch/x86/kvm/cpuid.c |  4 +++-
> >  arch/x86/kvm/cpuid.h |  1 +
> >  arch/x86/kvm/vmx.c   | 63 
> > 
> >  3 files changed, 67 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 0099e10..dc78095 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
> >  /* These are scattered features in cpufeatures.h. */
> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
> >
> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
> > kvm_cpuid_entry2 *entry, u32 function,
> >
> > /* cpuid 7.0.edx*/
> > const u32 kvm_cpuid_7_0_edx_x86_features =
> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
> 
> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
> pass through for existing CPUs (26 and 27)?
> 
> >
> > /* all calls to cpuid_count() should be made on the same cpu */
> > get_cpu();
> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> > index cdc70a3..dcfe227 100644
> > --- a/arch/x86/kvm/cpuid.h
> > +++ b/arch/x86/kvm/cpuid.h
> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> >  };
> >
> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
> > x86_feature)
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index aa8638a..1b743a0 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
> > bool masked);
> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> > u16 error_code);
> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
> > *msr_bitmap,
> > + u32 msr, int 
> > type);
> > +
> >
> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> > *vmx, unsigned msr,
> > m->host[i].value = host_val;
> >  }
> >
> > +/* do not touch guest_val and host_val if the msr is not found */
> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> > + u64 *guest_val, u64 *host_val)
> > +{
> > +   unsigned i;
> > +   struct msr_autoload *m = >msr_autoload;
> > +
> > +   for (i = 0; i < m->nr; ++i)
> > +   if (m->guest[i].index == msr)
> > +   break;
> > +
> > +   if (i == m->nr)
> > +   return 1;
> > +
> > +   if (guest_val)
> > +   *guest_val = m->guest[i].value;
> > +   if (host_val)
> > +   *host_val = m->host[i].value;
> > +
> > +   return 0;
> > +}
> > +
> >  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
> >  {
> > u64 guest_efer = vmx->vcpu.arch.efer;
> > @@ 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Konrad Rzeszutek Wilk
On Mon, Jan 29, 2018 at 10:43:22AM -0800, Jim Mattson wrote:
> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
> > guests
> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
> > retpoline+IBPB based approach.
> >
> > To avoid the overhead of atomically saving and restoring the 
> > MSR_IA32_SPEC_CTRL
> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
> > when a
> > non-zero is written to it.
> >
> > Cc: Asit Mallick 
> > Cc: Arjan Van De Ven 
> > Cc: Dave Hansen 
> > Cc: Andi Kleen 
> > Cc: Andrea Arcangeli 
> > Cc: Linus Torvalds 
> > Cc: Tim Chen 
> > Cc: Thomas Gleixner 
> > Cc: Dan Williams 
> > Cc: Jun Nakajima 
> > Cc: Paolo Bonzini 
> > Cc: David Woodhouse 
> > Cc: Greg KH 
> > Cc: Andy Lutomirski 
> > Signed-off-by: KarimAllah Ahmed 
> > Signed-off-by: Ashok Raj 
> > ---
> >  arch/x86/kvm/cpuid.c |  4 +++-
> >  arch/x86/kvm/cpuid.h |  1 +
> >  arch/x86/kvm/vmx.c   | 63 
> > 
> >  3 files changed, 67 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 0099e10..dc78095 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
> >  /* These are scattered features in cpufeatures.h. */
> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
> >
> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
> > kvm_cpuid_entry2 *entry, u32 function,
> >
> > /* cpuid 7.0.edx*/
> > const u32 kvm_cpuid_7_0_edx_x86_features =
> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
> 
> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
> pass through for existing CPUs (26 and 27)?
> 
> >
> > /* all calls to cpuid_count() should be made on the same cpu */
> > get_cpu();
> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> > index cdc70a3..dcfe227 100644
> > --- a/arch/x86/kvm/cpuid.h
> > +++ b/arch/x86/kvm/cpuid.h
> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> >  };
> >
> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
> > x86_feature)
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index aa8638a..1b743a0 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
> > bool masked);
> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> > u16 error_code);
> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
> > *msr_bitmap,
> > + u32 msr, int 
> > type);
> > +
> >
> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> > *vmx, unsigned msr,
> > m->host[i].value = host_val;
> >  }
> >
> > +/* do not touch guest_val and host_val if the msr is not found */
> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> > + u64 *guest_val, u64 *host_val)
> > +{
> > +   unsigned i;
> > +   struct msr_autoload *m = >msr_autoload;
> > +
> > +   for (i = 0; i < m->nr; ++i)
> > +   if (m->guest[i].index == msr)
> > +   break;
> > +
> > +   if (i == m->nr)
> > +   return 1;
> > +
> > +   if (guest_val)
> > +   *guest_val = m->guest[i].value;
> > +   if (host_val)
> > +   *host_val = m->host[i].value;
> > +
> > +   return 0;
> > +}
> > +
> >  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
> >  {
> > u64 guest_efer = vmx->vcpu.arch.efer;
> > @@ -3203,7 +3228,9 @@ static inline bool 
> > vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
> >   */
> >  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  {
> > +   u64 spec_ctrl = 0;
> > struct shared_msr_entry *msr;
> > +   struct vcpu_vmx *vmx = to_vmx(vcpu);
> >
> > switch (msr_info->index) {
> >  #ifdef CONFIG_X86_64
> > @@ -3223,6 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread KarimAllah Ahmed

On 01/29/2018 08:04 PM, Jim Mattson wrote:

Can I assume you'll send out a new version with the fixes?


Yes, I am currently doing some tests and once I am done I will send a 
new round.


... and the typo is already fixed in 'ibpb-wip' :)



On Mon, Jan 29, 2018 at 11:01 AM, David Woodhouse  wrote:


(Top-posting; sorry.)

Much of that is already fixed during our day, in
http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb

I forgot to fix up the wrong-MSR typo though, and we do still need to address 
reset.

On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote:

On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:


Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests
that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
retpoline+IBPB based approach.

To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL
for guests that do not actually use the MSR, only add_atomic_switch_msr when a
non-zero is written to it.

Cc: Asit Mallick 
Cc: Arjan Van De Ven 
Cc: Dave Hansen 
Cc: Andi Kleen 
Cc: Andrea Arcangeli 
Cc: Linus Torvalds 
Cc: Tim Chen 
Cc: Thomas Gleixner 
Cc: Dan Williams 
Cc: Jun Nakajima 
Cc: Paolo Bonzini 
Cc: David Woodhouse 
Cc: Greg KH 
Cc: Andy Lutomirski 
Signed-off-by: KarimAllah Ahmed 
Signed-off-by: Ashok Raj 
---
  arch/x86/kvm/cpuid.c |  4 +++-
  arch/x86/kvm/cpuid.h |  1 +
  arch/x86/kvm/vmx.c   | 63 
  3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10..dc78095 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
  /* These are scattered features in cpufeatures.h. */
  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KVM_CPUID_BIT_SPEC_CTRL 26
  #define KF(x) bit(KVM_CPUID_BIT_##x)

  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,

 /* cpuid 7.0.edx*/
 const u32 kvm_cpuid_7_0_edx_x86_features =
-   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
+   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);

Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
pass through for existing CPUs (26 and 27)?




 /* all calls to cpuid_count() should be made on the same cpu */
 get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3..dcfe227 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
 [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
 [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
 [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
+   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
  };

  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
x86_feature)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa8638a..1b743a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool 
masked);
  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
 u16 error_code);
  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
*msr_bitmap,
+ u32 msr, int type);
+

  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, 
unsigned msr,
 m->host[i].value = host_val;
  }

+/* do not touch guest_val and host_val if the msr is not found */
+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
+ u64 *guest_val, u64 *host_val)
+{
+   unsigned i;
+   struct msr_autoload *m = >msr_autoload;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   break;
+
+   if (i == m->nr)
+   return 1;
+
+   if (guest_val)
+   *guest_val = m->guest[i].value;
+   if (host_val)
+   *host_val = m->host[i].value;
+
+   return 0;
+}
+
  static bool update_transition_efer(struct 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread KarimAllah Ahmed

On 01/29/2018 08:04 PM, Jim Mattson wrote:

Can I assume you'll send out a new version with the fixes?


Yes, I am currently doing some tests and once I am done I will send a 
new round.


... and the typo is already fixed in 'ibpb-wip' :)



On Mon, Jan 29, 2018 at 11:01 AM, David Woodhouse  wrote:


(Top-posting; sorry.)

Much of that is already fixed during our day, in
http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb

I forgot to fix up the wrong-MSR typo though, and we do still need to address 
reset.

On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote:

On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:


Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests
that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
retpoline+IBPB based approach.

To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL
for guests that do not actually use the MSR, only add_atomic_switch_msr when a
non-zero is written to it.

Cc: Asit Mallick 
Cc: Arjan Van De Ven 
Cc: Dave Hansen 
Cc: Andi Kleen 
Cc: Andrea Arcangeli 
Cc: Linus Torvalds 
Cc: Tim Chen 
Cc: Thomas Gleixner 
Cc: Dan Williams 
Cc: Jun Nakajima 
Cc: Paolo Bonzini 
Cc: David Woodhouse 
Cc: Greg KH 
Cc: Andy Lutomirski 
Signed-off-by: KarimAllah Ahmed 
Signed-off-by: Ashok Raj 
---
  arch/x86/kvm/cpuid.c |  4 +++-
  arch/x86/kvm/cpuid.h |  1 +
  arch/x86/kvm/vmx.c   | 63 
  3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10..dc78095 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
  /* These are scattered features in cpufeatures.h. */
  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KVM_CPUID_BIT_SPEC_CTRL 26
  #define KF(x) bit(KVM_CPUID_BIT_##x)

  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,

 /* cpuid 7.0.edx*/
 const u32 kvm_cpuid_7_0_edx_x86_features =
-   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
+   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);

Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
pass through for existing CPUs (26 and 27)?




 /* all calls to cpuid_count() should be made on the same cpu */
 get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3..dcfe227 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
 [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
 [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
 [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
+   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
  };

  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
x86_feature)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa8638a..1b743a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool 
masked);
  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
 u16 error_code);
  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
*msr_bitmap,
+ u32 msr, int type);
+

  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, 
unsigned msr,
 m->host[i].value = host_val;
  }

+/* do not touch guest_val and host_val if the msr is not found */
+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
+ u64 *guest_val, u64 *host_val)
+{
+   unsigned i;
+   struct msr_autoload *m = >msr_autoload;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   break;
+
+   if (i == m->nr)
+   return 1;
+
+   if (guest_val)
+   *guest_val = m->guest[i].value;
+   if (host_val)
+   *host_val = m->host[i].value;
+
+   return 0;
+}
+
  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
  {
 u64 guest_efer = vmx->vcpu.arch.efer;
@@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct 
kvm_vcpu *vcpu,
   */
  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  {
+   u64 spec_ctrl = 0;
 struct shared_msr_entry *msr;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);

 switch 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
Can I assume you'll send out a new version with the fixes?

On Mon, Jan 29, 2018 at 11:01 AM, David Woodhouse  wrote:
>
> (Top-posting; sorry.)
>
> Much of that is already fixed during our day, in
> http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
>
> I forgot to fix up the wrong-MSR typo though, and we do still need to address 
> reset.
>
> On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote:
>> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
>> >
>> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
>> > guests
>> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using 
>> > a
>> > retpoline+IBPB based approach.
>> >
>> > To avoid the overhead of atomically saving and restoring the 
>> > MSR_IA32_SPEC_CTRL
>> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
>> > when a
>> > non-zero is written to it.
>> >
>> > Cc: Asit Mallick 
>> > Cc: Arjan Van De Ven 
>> > Cc: Dave Hansen 
>> > Cc: Andi Kleen 
>> > Cc: Andrea Arcangeli 
>> > Cc: Linus Torvalds 
>> > Cc: Tim Chen 
>> > Cc: Thomas Gleixner 
>> > Cc: Dan Williams 
>> > Cc: Jun Nakajima 
>> > Cc: Paolo Bonzini 
>> > Cc: David Woodhouse 
>> > Cc: Greg KH 
>> > Cc: Andy Lutomirski 
>> > Signed-off-by: KarimAllah Ahmed 
>> > Signed-off-by: Ashok Raj 
>> > ---
>> >  arch/x86/kvm/cpuid.c |  4 +++-
>> >  arch/x86/kvm/cpuid.h |  1 +
>> >  arch/x86/kvm/vmx.c   | 63 
>> > 
>> >  3 files changed, 67 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> > index 0099e10..dc78095 100644
>> > --- a/arch/x86/kvm/cpuid.c
>> > +++ b/arch/x86/kvm/cpuid.c
>> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
>> >  /* These are scattered features in cpufeatures.h. */
>> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
>> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
>> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
>> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
>> >
>> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
>> > kvm_cpuid_entry2 *entry, u32 function,
>> >
>> > /* cpuid 7.0.edx*/
>> > const u32 kvm_cpuid_7_0_edx_x86_features =
>> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
>> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
>> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
>> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
>> pass through for existing CPUs (26 and 27)?
>>
>> >
>> >
>> > /* all calls to cpuid_count() should be made on the same cpu */
>> > get_cpu();
>> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
>> > index cdc70a3..dcfe227 100644
>> > --- a/arch/x86/kvm/cpuid.h
>> > +++ b/arch/x86/kvm/cpuid.h
>> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
>> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
>> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
>> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
>> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
>> >  };
>> >
>> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
>> > x86_feature)
>> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> > index aa8638a..1b743a0 100644
>> > --- a/arch/x86/kvm/vmx.c
>> > +++ b/arch/x86/kvm/vmx.c
>> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
>> > bool masked);
>> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
>> > u16 error_code);
>> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
>> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
>> > *msr_bitmap,
>> > + u32 msr, int 
>> > type);
>> > +
>> >
>> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
>> > *vmx, unsigned msr,
>> > m->host[i].value = host_val;
>> >  }
>> >
>> > +/* do not touch guest_val and host_val if the msr is not found */
>> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>> > + u64 *guest_val, u64 *host_val)
>> > +{
>> > +   unsigned i;
>> > +   struct msr_autoload *m = >msr_autoload;
>> > +
>> > +   for (i = 0; i < m->nr; ++i)
>> > +   if 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
Can I assume you'll send out a new version with the fixes?

On Mon, Jan 29, 2018 at 11:01 AM, David Woodhouse  wrote:
>
> (Top-posting; sorry.)
>
> Much of that is already fixed during our day, in
> http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
>
> I forgot to fix up the wrong-MSR typo though, and we do still need to address 
> reset.
>
> On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote:
>> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
>> >
>> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
>> > guests
>> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using 
>> > a
>> > retpoline+IBPB based approach.
>> >
>> > To avoid the overhead of atomically saving and restoring the 
>> > MSR_IA32_SPEC_CTRL
>> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
>> > when a
>> > non-zero is written to it.
>> >
>> > Cc: Asit Mallick 
>> > Cc: Arjan Van De Ven 
>> > Cc: Dave Hansen 
>> > Cc: Andi Kleen 
>> > Cc: Andrea Arcangeli 
>> > Cc: Linus Torvalds 
>> > Cc: Tim Chen 
>> > Cc: Thomas Gleixner 
>> > Cc: Dan Williams 
>> > Cc: Jun Nakajima 
>> > Cc: Paolo Bonzini 
>> > Cc: David Woodhouse 
>> > Cc: Greg KH 
>> > Cc: Andy Lutomirski 
>> > Signed-off-by: KarimAllah Ahmed 
>> > Signed-off-by: Ashok Raj 
>> > ---
>> >  arch/x86/kvm/cpuid.c |  4 +++-
>> >  arch/x86/kvm/cpuid.h |  1 +
>> >  arch/x86/kvm/vmx.c   | 63 
>> > 
>> >  3 files changed, 67 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> > index 0099e10..dc78095 100644
>> > --- a/arch/x86/kvm/cpuid.c
>> > +++ b/arch/x86/kvm/cpuid.c
>> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
>> >  /* These are scattered features in cpufeatures.h. */
>> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
>> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
>> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
>> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
>> >
>> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
>> > kvm_cpuid_entry2 *entry, u32 function,
>> >
>> > /* cpuid 7.0.edx*/
>> > const u32 kvm_cpuid_7_0_edx_x86_features =
>> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
>> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
>> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
>> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
>> pass through for existing CPUs (26 and 27)?
>>
>> >
>> >
>> > /* all calls to cpuid_count() should be made on the same cpu */
>> > get_cpu();
>> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
>> > index cdc70a3..dcfe227 100644
>> > --- a/arch/x86/kvm/cpuid.h
>> > +++ b/arch/x86/kvm/cpuid.h
>> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
>> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
>> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
>> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
>> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
>> >  };
>> >
>> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
>> > x86_feature)
>> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> > index aa8638a..1b743a0 100644
>> > --- a/arch/x86/kvm/vmx.c
>> > +++ b/arch/x86/kvm/vmx.c
>> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
>> > bool masked);
>> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
>> > u16 error_code);
>> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
>> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
>> > *msr_bitmap,
>> > + u32 msr, int 
>> > type);
>> > +
>> >
>> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
>> > *vmx, unsigned msr,
>> > m->host[i].value = host_val;
>> >  }
>> >
>> > +/* do not touch guest_val and host_val if the msr is not found */
>> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>> > + u64 *guest_val, u64 *host_val)
>> > +{
>> > +   unsigned i;
>> > +   struct msr_autoload *m = >msr_autoload;
>> > +
>> > +   for (i = 0; i < m->nr; ++i)
>> > +   if (m->guest[i].index == msr)
>> > +   break;
>> > +
>> > +   if (i == m->nr)
>> > +   return 1;
>> > +
>> > +   if (guest_val)
>> > +   *guest_val = m->guest[i].value;
>> > +   if (host_val)
>> > +   *host_val = m->host[i].value;
>> > +
>> > +   return 0;
>> > +}
>> > +
>> >  static bool update_transition_efer(struct 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread David Woodhouse

(Top-posting; sorry.)

Much of that is already fixed during our day, in 
http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb

I forgot to fix up the wrong-MSR typo though, and we do still need to address 
reset.

On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote:
> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
> > 
> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
> > guests
> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
> > retpoline+IBPB based approach.
> > 
> > To avoid the overhead of atomically saving and restoring the 
> > MSR_IA32_SPEC_CTRL
> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
> > when a
> > non-zero is written to it.
> > 
> > Cc: Asit Mallick 
> > Cc: Arjan Van De Ven 
> > Cc: Dave Hansen 
> > Cc: Andi Kleen 
> > Cc: Andrea Arcangeli 
> > Cc: Linus Torvalds 
> > Cc: Tim Chen 
> > Cc: Thomas Gleixner 
> > Cc: Dan Williams 
> > Cc: Jun Nakajima 
> > Cc: Paolo Bonzini 
> > Cc: David Woodhouse 
> > Cc: Greg KH 
> > Cc: Andy Lutomirski 
> > Signed-off-by: KarimAllah Ahmed 
> > Signed-off-by: Ashok Raj 
> > ---
> >  arch/x86/kvm/cpuid.c |  4 +++-
> >  arch/x86/kvm/cpuid.h |  1 +
> >  arch/x86/kvm/vmx.c   | 63 
> > 
> >  3 files changed, 67 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 0099e10..dc78095 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
> >  /* These are scattered features in cpufeatures.h. */
> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
> > 
> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
> > kvm_cpuid_entry2 *entry, u32 function,
> > 
> > /* cpuid 7.0.edx*/
> > const u32 kvm_cpuid_7_0_edx_x86_features =
> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
> pass through for existing CPUs (26 and 27)?
> 
> > 
> > 
> > /* all calls to cpuid_count() should be made on the same cpu */
> > get_cpu();
> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> > index cdc70a3..dcfe227 100644
> > --- a/arch/x86/kvm/cpuid.h
> > +++ b/arch/x86/kvm/cpuid.h
> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> >  };
> > 
> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
> > x86_feature)
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index aa8638a..1b743a0 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
> > bool masked);
> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> > u16 error_code);
> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
> > *msr_bitmap,
> > + u32 msr, int 
> > type);
> > +
> > 
> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> > *vmx, unsigned msr,
> > m->host[i].value = host_val;
> >  }
> > 
> > +/* do not touch guest_val and host_val if the msr is not found */
> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> > + u64 *guest_val, u64 *host_val)
> > +{
> > +   unsigned i;
> > +   struct msr_autoload *m = >msr_autoload;
> > +
> > +   for (i = 0; i < m->nr; ++i)
> > +   if (m->guest[i].index == msr)
> > +   break;
> > +
> > +   if (i == m->nr)
> > +   return 1;
> > +
> > +   if (guest_val)
> > +   *guest_val = m->guest[i].value;
> > +   if (host_val)
> > + 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread David Woodhouse

(Top-posting; sorry.)

Much of that is already fixed during our day, in 
http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb

I forgot to fix up the wrong-MSR typo though, and we do still need to address 
reset.

On Mon, 2018-01-29 at 10:43 -0800, Jim Mattson wrote:
> On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
> > 
> > Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for 
> > guests
> > that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
> > retpoline+IBPB based approach.
> > 
> > To avoid the overhead of atomically saving and restoring the 
> > MSR_IA32_SPEC_CTRL
> > for guests that do not actually use the MSR, only add_atomic_switch_msr 
> > when a
> > non-zero is written to it.
> > 
> > Cc: Asit Mallick 
> > Cc: Arjan Van De Ven 
> > Cc: Dave Hansen 
> > Cc: Andi Kleen 
> > Cc: Andrea Arcangeli 
> > Cc: Linus Torvalds 
> > Cc: Tim Chen 
> > Cc: Thomas Gleixner 
> > Cc: Dan Williams 
> > Cc: Jun Nakajima 
> > Cc: Paolo Bonzini 
> > Cc: David Woodhouse 
> > Cc: Greg KH 
> > Cc: Andy Lutomirski 
> > Signed-off-by: KarimAllah Ahmed 
> > Signed-off-by: Ashok Raj 
> > ---
> >  arch/x86/kvm/cpuid.c |  4 +++-
> >  arch/x86/kvm/cpuid.h |  1 +
> >  arch/x86/kvm/vmx.c   | 63 
> > 
> >  3 files changed, 67 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 0099e10..dc78095 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
> >  /* These are scattered features in cpufeatures.h. */
> >  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
> >  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
> > +#define KVM_CPUID_BIT_SPEC_CTRL 26
> >  #define KF(x) bit(KVM_CPUID_BIT_##x)
> > 
> >  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct 
> > kvm_cpuid_entry2 *entry, u32 function,
> > 
> > /* cpuid 7.0.edx*/
> > const u32 kvm_cpuid_7_0_edx_x86_features =
> > -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
> > +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
> > +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
> Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
> pass through for existing CPUs (26 and 27)?
> 
> > 
> > 
> > /* all calls to cpuid_count() should be made on the same cpu */
> > get_cpu();
> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> > index cdc70a3..dcfe227 100644
> > --- a/arch/x86/kvm/cpuid.h
> > +++ b/arch/x86/kvm/cpuid.h
> > @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> > [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
> > [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
> > [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
> > +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> >  };
> > 
> >  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
> > x86_feature)
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index aa8638a..1b743a0 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, 
> > bool masked);
> >  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> > u16 error_code);
> >  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> > +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
> > *msr_bitmap,
> > + u32 msr, int 
> > type);
> > +
> > 
> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
> >  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> > @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> > *vmx, unsigned msr,
> > m->host[i].value = host_val;
> >  }
> > 
> > +/* do not touch guest_val and host_val if the msr is not found */
> > +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> > + u64 *guest_val, u64 *host_val)
> > +{
> > +   unsigned i;
> > +   struct msr_autoload *m = >msr_autoload;
> > +
> > +   for (i = 0; i < m->nr; ++i)
> > +   if (m->guest[i].index == msr)
> > +   break;
> > +
> > +   if (i == m->nr)
> > +   return 1;
> > +
> > +   if (guest_val)
> > +   *guest_val = m->guest[i].value;
> > +   if (host_val)
> > +   *host_val = m->host[i].value;
> > +
> > +   return 0;
> > +}
> > +
> >  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
> >  {
> > u64 guest_efer = vmx->vcpu.arch.efer;
> > @@ -3203,7 +3228,9 @@ static inline bool 
> > vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
> >   */
> >  static int vmx_get_msr(struct kvm_vcpu *vcpu, 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
> Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests
> that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
> retpoline+IBPB based approach.
>
> To avoid the overhead of atomically saving and restoring the 
> MSR_IA32_SPEC_CTRL
> for guests that do not actually use the MSR, only add_atomic_switch_msr when a
> non-zero is written to it.
>
> Cc: Asit Mallick 
> Cc: Arjan Van De Ven 
> Cc: Dave Hansen 
> Cc: Andi Kleen 
> Cc: Andrea Arcangeli 
> Cc: Linus Torvalds 
> Cc: Tim Chen 
> Cc: Thomas Gleixner 
> Cc: Dan Williams 
> Cc: Jun Nakajima 
> Cc: Paolo Bonzini 
> Cc: David Woodhouse 
> Cc: Greg KH 
> Cc: Andy Lutomirski 
> Signed-off-by: KarimAllah Ahmed 
> Signed-off-by: Ashok Raj 
> ---
>  arch/x86/kvm/cpuid.c |  4 +++-
>  arch/x86/kvm/cpuid.h |  1 +
>  arch/x86/kvm/vmx.c   | 63 
> 
>  3 files changed, 67 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 0099e10..dc78095 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
>  /* These are scattered features in cpufeatures.h. */
>  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
>  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
> +#define KVM_CPUID_BIT_SPEC_CTRL 26
>  #define KF(x) bit(KVM_CPUID_BIT_##x)
>
>  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
> *entry, u32 function,
>
> /* cpuid 7.0.edx*/
> const u32 kvm_cpuid_7_0_edx_x86_features =
> -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
> +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
> +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);

Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
pass through for existing CPUs (26 and 27)?

>
> /* all calls to cpuid_count() should be made on the same cpu */
> get_cpu();
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index cdc70a3..dcfe227 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
> [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
> [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
> +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
>  };
>
>  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
> x86_feature)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index aa8638a..1b743a0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool 
> masked);
>  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> u16 error_code);
>  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
> *msr_bitmap,
> + u32 msr, int type);
> +
>
>  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> *vmx, unsigned msr,
> m->host[i].value = host_val;
>  }
>
> +/* do not touch guest_val and host_val if the msr is not found */
> +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> + u64 *guest_val, u64 *host_val)
> +{
> +   unsigned i;
> +   struct msr_autoload *m = >msr_autoload;
> +
> +   for (i = 0; i < m->nr; ++i)
> +   if (m->guest[i].index == msr)
> +   break;
> +
> +   if (i == m->nr)
> +   return 1;
> +
> +   if (guest_val)
> +   *guest_val = m->guest[i].value;
> +   if (host_val)
> +   *host_val = m->host[i].value;
> +
> +   return 0;
> +}
> +
>  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
>  {
> u64 guest_efer = vmx->vcpu.arch.efer;
> @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct 
> kvm_vcpu *vcpu,
>   */
>  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  {
> +   u64 spec_ctrl = 0;
> struct shared_msr_entry *msr;
> +   struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> switch 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Jim Mattson
On Sun, Jan 28, 2018 at 11:29 AM, KarimAllah Ahmed  wrote:
> Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests
> that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
> retpoline+IBPB based approach.
>
> To avoid the overhead of atomically saving and restoring the 
> MSR_IA32_SPEC_CTRL
> for guests that do not actually use the MSR, only add_atomic_switch_msr when a
> non-zero is written to it.
>
> Cc: Asit Mallick 
> Cc: Arjan Van De Ven 
> Cc: Dave Hansen 
> Cc: Andi Kleen 
> Cc: Andrea Arcangeli 
> Cc: Linus Torvalds 
> Cc: Tim Chen 
> Cc: Thomas Gleixner 
> Cc: Dan Williams 
> Cc: Jun Nakajima 
> Cc: Paolo Bonzini 
> Cc: David Woodhouse 
> Cc: Greg KH 
> Cc: Andy Lutomirski 
> Signed-off-by: KarimAllah Ahmed 
> Signed-off-by: Ashok Raj 
> ---
>  arch/x86/kvm/cpuid.c |  4 +++-
>  arch/x86/kvm/cpuid.h |  1 +
>  arch/x86/kvm/vmx.c   | 63 
> 
>  3 files changed, 67 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 0099e10..dc78095 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
>  /* These are scattered features in cpufeatures.h. */
>  #define KVM_CPUID_BIT_AVX512_4VNNIW 2
>  #define KVM_CPUID_BIT_AVX512_4FMAPS 3
> +#define KVM_CPUID_BIT_SPEC_CTRL 26
>  #define KF(x) bit(KVM_CPUID_BIT_##x)
>
>  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> @@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
> *entry, u32 function,
>
> /* cpuid 7.0.edx*/
> const u32 kvm_cpuid_7_0_edx_x86_features =
> -   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
> +   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
> +   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);

Isn't 'boot_cpu_has()' superflous here? And aren't there two bits to
pass through for existing CPUs (26 and 27)?

>
> /* all calls to cpuid_count() should be made on the same cpu */
> get_cpu();
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index cdc70a3..dcfe227 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
> [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
> [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
> +   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
>  };
>
>  static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned 
> x86_feature)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index aa8638a..1b743a0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool 
> masked);
>  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> u16 error_code);
>  static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
> +static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
> *msr_bitmap,
> + u32 msr, int type);
> +
>
>  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> @@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx 
> *vmx, unsigned msr,
> m->host[i].value = host_val;
>  }
>
> +/* do not touch guest_val and host_val if the msr is not found */
> +static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
> + u64 *guest_val, u64 *host_val)
> +{
> +   unsigned i;
> +   struct msr_autoload *m = >msr_autoload;
> +
> +   for (i = 0; i < m->nr; ++i)
> +   if (m->guest[i].index == msr)
> +   break;
> +
> +   if (i == m->nr)
> +   return 1;
> +
> +   if (guest_val)
> +   *guest_val = m->guest[i].value;
> +   if (host_val)
> +   *host_val = m->host[i].value;
> +
> +   return 0;
> +}
> +
>  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
>  {
> u64 guest_efer = vmx->vcpu.arch.efer;
> @@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct 
> kvm_vcpu *vcpu,
>   */
>  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  {
> +   u64 spec_ctrl = 0;
> struct shared_msr_entry *msr;
> +   struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> switch (msr_info->index) {
>  #ifdef CONFIG_X86_64
> @@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct 
> msr_data *msr_info)
> case MSR_IA32_TSC:
> msr_info->data = guest_read_tsc(vcpu);
> break;
> +   case MSR_IA32_SPEC_CTRL:
> +   if (!msr_info->host_initiated &&
> +   !guest_cpuid_has(vcpu, 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Konrad Rzeszutek Wilk
On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote:
> On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
> > 
> > Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
> > Running a Windows guest should be a pretty common use-case no?
> > 
> > In addition, your handle of the first WRMSR intercept could be different.
> > It could signal you to start doing the following:
> > 1. Disable intercept on SPEC_CTRL MSR.
> > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
> > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
> > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
> > 
> > That way, you will both have fastest option as long as guest don't use IBRS
> > and also won't have the 3% performance hit compared to Konrad's proposal.
> > 
> > Am I missing something?
> 
> Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
> of the 3% speedup you observe is because in the above, the vmentry path
> doesn't need to *read* the host's value and store it; the host is
> expected to restore it for itself anyway?

Yes for at least the purpose of correctness. That is based on what I have
heard is that you when you transition to a higher ring you have to write 1, 
then write zero
when you transition back to lower rings. That is it is like a knob.

But then I heard that on some CPUs it is more like reset button and
just writting 1 to IBRS is fine. But again, correctness here.

> 
> I'd actually quite like to repeat the benchmark on the new fixed
> microcode, if anyone has it yet, to see if that read/swap slowness is
> still quite as excessive. I'm certainly not ruling this out, but I'm
> just a little wary of premature optimisation, and I'd like to make sure
> we have everything *else* in the KVM patches right first.
> 
> The fact that the save-and-restrict macros I have in the tip of my
> working tree at the moment are horrid and causing 0-day nastygrams,
> probably doesn't help persuade me to favour the approach ;)
> 
> ... hm, the CPU actually has separate MSR save/restore lists for
> entry/exit, doesn't it? Is there any way to sanely make use of that and
> do the restoration manually on vmentry but let it be automatic on
> vmexit, by having it *only* in the guest's MSR-store area to be saved
> on exit and restored on exit, but *not* in the host's MSR-store area?

Oh. That sounds sounds interesting
> 
> Reading the code and comparing with the SDM, I can't see where we're
> ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> case...

Right. We (well Daniel Kiper, CC-ed) implemented it for this and
that is where we got the numbers.

Daniel, you OK posting it here? Granted with the caveats thta it won't even
compile against upstream as it was based on a distro kernel.




Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Konrad Rzeszutek Wilk
On Mon, Jan 29, 2018 at 08:46:03AM +, David Woodhouse wrote:
> On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
> > 
> > Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
> > Running a Windows guest should be a pretty common use-case no?
> > 
> > In addition, your handle of the first WRMSR intercept could be different.
> > It could signal you to start doing the following:
> > 1. Disable intercept on SPEC_CTRL MSR.
> > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
> > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
> > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
> > 
> > That way, you will both have fastest option as long as guest don't use IBRS
> > and also won't have the 3% performance hit compared to Konrad's proposal.
> > 
> > Am I missing something?
> 
> Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
> of the 3% speedup you observe is because in the above, the vmentry path
> doesn't need to *read* the host's value and store it; the host is
> expected to restore it for itself anyway?

Yes for at least the purpose of correctness. That is based on what I have
heard is that you when you transition to a higher ring you have to write 1, 
then write zero
when you transition back to lower rings. That is it is like a knob.

But then I heard that on some CPUs it is more like reset button and
just writting 1 to IBRS is fine. But again, correctness here.

> 
> I'd actually quite like to repeat the benchmark on the new fixed
> microcode, if anyone has it yet, to see if that read/swap slowness is
> still quite as excessive. I'm certainly not ruling this out, but I'm
> just a little wary of premature optimisation, and I'd like to make sure
> we have everything *else* in the KVM patches right first.
> 
> The fact that the save-and-restrict macros I have in the tip of my
> working tree at the moment are horrid and causing 0-day nastygrams,
> probably doesn't help persuade me to favour the approach ;)
> 
> ... hm, the CPU actually has separate MSR save/restore lists for
> entry/exit, doesn't it? Is there any way to sanely make use of that and
> do the restoration manually on vmentry but let it be automatic on
> vmexit, by having it *only* in the guest's MSR-store area to be saved
> on exit and restored on exit, but *not* in the host's MSR-store area?

Oh. That sounds sounds interesting
> 
> Reading the code and comparing with the SDM, I can't see where we're
> ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> case...

Right. We (well Daniel Kiper, CC-ed) implemented it for this and
that is where we got the numbers.

Daniel, you OK posting it here? Granted with the caveats thta it won't even
compile against upstream as it was based on a distro kernel.




Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Konrad Rzeszutek Wilk
On Mon, Jan 29, 2018 at 10:37:44AM +, David Woodhouse wrote:
> On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote:
> > On 01/29/2018 09:46 AM, David Woodhouse wrote:
> > > Reading the code and comparing with the SDM, I can't see where we're
> > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> > > case...
> > Hmmm ... you are probably right! I think all users of this interface
> > always trap + update save area and never passthrough the MSR. That is
> > why only LOAD is needed *so far*.
> > 
> > Okay, let me sort this out in v3 then.
> 
> I'm starting to think a variant of Ashok's patch might actually be the
> simpler approach, and not "premature optimisation". Especially if we
> need to support the !cpu_has_vmx_msr_bitmaps() case?
> 
> Start with vmx->spec_ctrl set to zero. When first touched, make it
> passthrough (but not atomically switched) and set a flag (e.g.
> "spec_ctrl_live") which triggers the 'restore_branch_speculation' and
> 'save_and_restrict_branch_speculation' behaviours. Except don't use
> those macros. Those can look something like
> 
>  /* If this vCPU has touched SPEC_CTRL then restore its value if needed */
>  if (vmx->spec_ctrl_live && vmx->spec_ctrl)
>      wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  /* vmentry is serialising on affected CPUs, so the conditional branch is 
> safe */
> 
> 
> ... and, respectively, ...
> 
>  /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have 
> zero */
>  if (vmx->spec_ctrl_live) {
>  rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  if (vmx->spec_ctrl)
>  wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>  }
> 
> 
> Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the
> pass-through MSR bitmap directly, in the case that it exists? 

Or the cpuid_flag as that would determine whether the MSR bitmap intercept
is set or not.




Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Konrad Rzeszutek Wilk
On Mon, Jan 29, 2018 at 10:37:44AM +, David Woodhouse wrote:
> On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote:
> > On 01/29/2018 09:46 AM, David Woodhouse wrote:
> > > Reading the code and comparing with the SDM, I can't see where we're
> > > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> > > case...
> > Hmmm ... you are probably right! I think all users of this interface
> > always trap + update save area and never passthrough the MSR. That is
> > why only LOAD is needed *so far*.
> > 
> > Okay, let me sort this out in v3 then.
> 
> I'm starting to think a variant of Ashok's patch might actually be the
> simpler approach, and not "premature optimisation". Especially if we
> need to support the !cpu_has_vmx_msr_bitmaps() case?
> 
> Start with vmx->spec_ctrl set to zero. When first touched, make it
> passthrough (but not atomically switched) and set a flag (e.g.
> "spec_ctrl_live") which triggers the 'restore_branch_speculation' and
> 'save_and_restrict_branch_speculation' behaviours. Except don't use
> those macros. Those can look something like
> 
>  /* If this vCPU has touched SPEC_CTRL then restore its value if needed */
>  if (vmx->spec_ctrl_live && vmx->spec_ctrl)
>      wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  /* vmentry is serialising on affected CPUs, so the conditional branch is 
> safe */
> 
> 
> ... and, respectively, ...
> 
>  /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have 
> zero */
>  if (vmx->spec_ctrl_live) {
>  rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  if (vmx->spec_ctrl)
>  wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>  }
> 
> 
> Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the
> pass-through MSR bitmap directly, in the case that it exists? 

Or the cpuid_flag as that would determine whether the MSR bitmap intercept
is set or not.




Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Paolo Bonzini
On 29/01/2018 11:37, David Woodhouse wrote:
> On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote:
>> On 01/29/2018 09:46 AM, David Woodhouse wrote:
>>> Reading the code and comparing with the SDM, I can't see where we're
>>> ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
>>> case...
>> Hmmm ... you are probably right! I think all users of this interface
>> always trap + update save area and never passthrough the MSR. That is
>> why only LOAD is needed *so far*.
>>
>> Okay, let me sort this out in v3 then.
> 
> I'm starting to think a variant of Ashok's patch might actually be the
> simpler approach, and not "premature optimisation". Especially if we
> need to support the !cpu_has_vmx_msr_bitmaps() case?

That case is awfully slow anyway, it doesn't matter, but the
direct-access flag would simply be always 0 if you have no MSR bitmaps.

> Start with vmx->spec_ctrl set to zero. When first touched, make it
> passthrough (but not atomically switched) and set a flag (e.g.
> "spec_ctrl_live") which triggers the 'restore_branch_speculation' and
> 'save_and_restrict_branch_speculation' behaviours. Except don't use
> those macros. Those can look something like
> 
>  /* If this vCPU has touched SPEC_CTRL then restore its value if needed */
>  if (vmx->spec_ctrl_live && vmx->spec_ctrl)
>      wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  /* vmentry is serialising on affected CPUs, so the conditional branch is 
> safe */
> 
> 
> ... and, respectively, ...
> 
>  /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have 
> zero */
>  if (vmx->spec_ctrl_live) {
>  rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  if (vmx->spec_ctrl)
>  wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>  }
> 
> Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the
> pass-through MSR bitmap directly, in the case that it exists? 

Probably a cache miss, or even a TLB miss if you're unlucky, so the
separate flag is okay.

Paolo


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Paolo Bonzini
On 29/01/2018 11:37, David Woodhouse wrote:
> On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote:
>> On 01/29/2018 09:46 AM, David Woodhouse wrote:
>>> Reading the code and comparing with the SDM, I can't see where we're
>>> ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
>>> case...
>> Hmmm ... you are probably right! I think all users of this interface
>> always trap + update save area and never passthrough the MSR. That is
>> why only LOAD is needed *so far*.
>>
>> Okay, let me sort this out in v3 then.
> 
> I'm starting to think a variant of Ashok's patch might actually be the
> simpler approach, and not "premature optimisation". Especially if we
> need to support the !cpu_has_vmx_msr_bitmaps() case?

That case is awfully slow anyway, it doesn't matter, but the
direct-access flag would simply be always 0 if you have no MSR bitmaps.

> Start with vmx->spec_ctrl set to zero. When first touched, make it
> passthrough (but not atomically switched) and set a flag (e.g.
> "spec_ctrl_live") which triggers the 'restore_branch_speculation' and
> 'save_and_restrict_branch_speculation' behaviours. Except don't use
> those macros. Those can look something like
> 
>  /* If this vCPU has touched SPEC_CTRL then restore its value if needed */
>  if (vmx->spec_ctrl_live && vmx->spec_ctrl)
>      wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  /* vmentry is serialising on affected CPUs, so the conditional branch is 
> safe */
> 
> 
> ... and, respectively, ...
> 
>  /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have 
> zero */
>  if (vmx->spec_ctrl_live) {
>  rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  if (vmx->spec_ctrl)
>  wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>  }
> 
> Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the
> pass-through MSR bitmap directly, in the case that it exists? 

Probably a cache miss, or even a TLB miss if you're unlucky, so the
separate flag is okay.

Paolo


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Paolo Bonzini
On 29/01/2018 09:46, David Woodhouse wrote:
> I'd actually quite like to repeat the benchmark on the new fixed
> microcode, if anyone has it yet, to see if that read/swap slowness is
> still quite as excessive. I'm certainly not ruling this out, but I'm
> just a little wary of premature optimisation, and I'd like to make sure
> we have everything *else* in the KVM patches right first.
> 
> The fact that the save-and-restrict macros I have in the tip of my
> working tree at the moment are horrid and causing 0-day nastygrams,
> probably doesn't help persuade me to favour the approach ;)
> 
> ... hm, the CPU actually has separate MSR save/restore lists for
> entry/exit, doesn't it? Is there any way to sanely make use of that and
> do the restoration manually on vmentry but let it be automatic on
> vmexit, by having it *only* in the guest's MSR-store area to be saved
> on exit and restored on exit, but *not* in the host's MSR-store area?

Right now we don't even use the store-on-vmexit list at all, so the
Simplest Patch That Can Possibly Work is definitely the one using
rdmsr/wrmsr.  It's not really premature optimization---though it doesn't
hurt that it isn't awfully slow.

Paolo


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread Paolo Bonzini
On 29/01/2018 09:46, David Woodhouse wrote:
> I'd actually quite like to repeat the benchmark on the new fixed
> microcode, if anyone has it yet, to see if that read/swap slowness is
> still quite as excessive. I'm certainly not ruling this out, but I'm
> just a little wary of premature optimisation, and I'd like to make sure
> we have everything *else* in the KVM patches right first.
> 
> The fact that the save-and-restrict macros I have in the tip of my
> working tree at the moment are horrid and causing 0-day nastygrams,
> probably doesn't help persuade me to favour the approach ;)
> 
> ... hm, the CPU actually has separate MSR save/restore lists for
> entry/exit, doesn't it? Is there any way to sanely make use of that and
> do the restoration manually on vmentry but let it be automatic on
> vmexit, by having it *only* in the guest's MSR-store area to be saved
> on exit and restored on exit, but *not* in the host's MSR-store area?

Right now we don't even use the store-on-vmexit list at all, so the
Simplest Patch That Can Possibly Work is definitely the one using
rdmsr/wrmsr.  It's not really premature optimization---though it doesn't
hurt that it isn't awfully slow.

Paolo


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread David Woodhouse
On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote:
> On 01/29/2018 09:46 AM, David Woodhouse wrote:
> > Reading the code and comparing with the SDM, I can't see where we're
> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> > case...
> Hmmm ... you are probably right! I think all users of this interface
> always trap + update save area and never passthrough the MSR. That is
> why only LOAD is needed *so far*.
> 
> Okay, let me sort this out in v3 then.

I'm starting to think a variant of Ashok's patch might actually be the
simpler approach, and not "premature optimisation". Especially if we
need to support the !cpu_has_vmx_msr_bitmaps() case?

Start with vmx->spec_ctrl set to zero. When first touched, make it
passthrough (but not atomically switched) and set a flag (e.g.
"spec_ctrl_live") which triggers the 'restore_branch_speculation' and
'save_and_restrict_branch_speculation' behaviours. Except don't use
those macros. Those can look something like

 /* If this vCPU has touched SPEC_CTRL then restore its value if needed */
 if (vmx->spec_ctrl_live && vmx->spec_ctrl)
     wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 /* vmentry is serialising on affected CPUs, so the conditional branch is safe 
*/


... and, respectively, ...

 /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have 
zero */
 if (vmx->spec_ctrl_live) {
 rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 if (vmx->spec_ctrl)
 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 }


Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the
pass-through MSR bitmap directly, in the case that it exists? 

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread David Woodhouse
On Mon, 2018-01-29 at 10:43 +0100, KarimAllah Ahmed wrote:
> On 01/29/2018 09:46 AM, David Woodhouse wrote:
> > Reading the code and comparing with the SDM, I can't see where we're
> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> > case...
> Hmmm ... you are probably right! I think all users of this interface
> always trap + update save area and never passthrough the MSR. That is
> why only LOAD is needed *so far*.
> 
> Okay, let me sort this out in v3 then.

I'm starting to think a variant of Ashok's patch might actually be the
simpler approach, and not "premature optimisation". Especially if we
need to support the !cpu_has_vmx_msr_bitmaps() case?

Start with vmx->spec_ctrl set to zero. When first touched, make it
passthrough (but not atomically switched) and set a flag (e.g.
"spec_ctrl_live") which triggers the 'restore_branch_speculation' and
'save_and_restrict_branch_speculation' behaviours. Except don't use
those macros. Those can look something like

 /* If this vCPU has touched SPEC_CTRL then restore its value if needed */
 if (vmx->spec_ctrl_live && vmx->spec_ctrl)
     wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 /* vmentry is serialising on affected CPUs, so the conditional branch is safe 
*/


... and, respectively, ...

 /* If this vCPU has touched SPEC_CTRL then save its value and ensure we have 
zero */
 if (vmx->spec_ctrl_live) {
 rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 if (vmx->spec_ctrl)
 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 }


Perhaps we can ditch the separate 'spec_ctrl_live' flag and check the
pass-through MSR bitmap directly, in the case that it exists? 

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread KarimAllah Ahmed

On 01/29/2018 09:46 AM, David Woodhouse wrote:

On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:


Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
Running a Windows guest should be a pretty common use-case no?

In addition, your handle of the first WRMSR intercept could be different.
It could signal you to start doing the following:
1. Disable intercept on SPEC_CTRL MSR.
2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
(And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)

That way, you will both have fastest option as long as guest don't use IBRS
and also won't have the 3% performance hit compared to Konrad's proposal.

Am I missing something?


Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
of the 3% speedup you observe is because in the above, the vmentry path
doesn't need to *read* the host's value and store it; the host is
expected to restore it for itself anyway?

I'd actually quite like to repeat the benchmark on the new fixed
microcode, if anyone has it yet, to see if that read/swap slowness is
still quite as excessive. I'm certainly not ruling this out, but I'm
just a little wary of premature optimisation, and I'd like to make sure
we have everything *else* in the KVM patches right first.

The fact that the save-and-restrict macros I have in the tip of my
working tree at the moment are horrid and causing 0-day nastygrams,
probably doesn't help persuade me to favour the approach ;)

... hm, the CPU actually has separate MSR save/restore lists for
entry/exit, doesn't it? Is there any way to sanely make use of that and
do the restoration manually on vmentry but let it be automatic on
vmexit, by having it *only* in the guest's MSR-store area to be saved
on exit and restored on exit, but *not* in the host's MSR-store area?

Reading the code and comparing with the SDM, I can't see where we're
ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
case...


Hmmm ... you are probably right! I think all users of this interface
always trap + update save area and never passthrough the MSR. That is
why only LOAD is needed *so far*.

Okay, let me sort this out in v3 then.




Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread KarimAllah Ahmed

On 01/29/2018 09:46 AM, David Woodhouse wrote:

On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:


Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
Running a Windows guest should be a pretty common use-case no?

In addition, your handle of the first WRMSR intercept could be different.
It could signal you to start doing the following:
1. Disable intercept on SPEC_CTRL MSR.
2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
(And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)

That way, you will both have fastest option as long as guest don't use IBRS
and also won't have the 3% performance hit compared to Konrad's proposal.

Am I missing something?


Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
of the 3% speedup you observe is because in the above, the vmentry path
doesn't need to *read* the host's value and store it; the host is
expected to restore it for itself anyway?

I'd actually quite like to repeat the benchmark on the new fixed
microcode, if anyone has it yet, to see if that read/swap slowness is
still quite as excessive. I'm certainly not ruling this out, but I'm
just a little wary of premature optimisation, and I'd like to make sure
we have everything *else* in the KVM patches right first.

The fact that the save-and-restrict macros I have in the tip of my
working tree at the moment are horrid and causing 0-day nastygrams,
probably doesn't help persuade me to favour the approach ;)

... hm, the CPU actually has separate MSR save/restore lists for
entry/exit, doesn't it? Is there any way to sanely make use of that and
do the restoration manually on vmentry but let it be automatic on
vmexit, by having it *only* in the guest's MSR-store area to be saved
on exit and restored on exit, but *not* in the host's MSR-store area?

Reading the code and comparing with the SDM, I can't see where we're
ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
case...


Hmmm ... you are probably right! I think all users of this interface
always trap + update save area and never passthrough the MSR. That is
why only LOAD is needed *so far*.

Okay, let me sort this out in v3 then.




Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread David Woodhouse
On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
> 
> Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
> Running a Windows guest should be a pretty common use-case no?
> 
> In addition, your handle of the first WRMSR intercept could be different.
> It could signal you to start doing the following:
> 1. Disable intercept on SPEC_CTRL MSR.
> 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
> 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
> (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
> 
> That way, you will both have fastest option as long as guest don't use IBRS
> and also won't have the 3% performance hit compared to Konrad's proposal.
> 
> Am I missing something?

Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
of the 3% speedup you observe is because in the above, the vmentry path
doesn't need to *read* the host's value and store it; the host is
expected to restore it for itself anyway?

I'd actually quite like to repeat the benchmark on the new fixed
microcode, if anyone has it yet, to see if that read/swap slowness is
still quite as excessive. I'm certainly not ruling this out, but I'm
just a little wary of premature optimisation, and I'd like to make sure
we have everything *else* in the KVM patches right first.

The fact that the save-and-restrict macros I have in the tip of my
working tree at the moment are horrid and causing 0-day nastygrams,
probably doesn't help persuade me to favour the approach ;)

... hm, the CPU actually has separate MSR save/restore lists for
entry/exit, doesn't it? Is there any way to sanely make use of that and
do the restoration manually on vmentry but let it be automatic on
vmexit, by having it *only* in the guest's MSR-store area to be saved
on exit and restored on exit, but *not* in the host's MSR-store area?

Reading the code and comparing with the SDM, I can't see where we're
ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
case...

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-29 Thread David Woodhouse
On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
> 
> Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
> Running a Windows guest should be a pretty common use-case no?
> 
> In addition, your handle of the first WRMSR intercept could be different.
> It could signal you to start doing the following:
> 1. Disable intercept on SPEC_CTRL MSR.
> 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
> 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
> (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
> 
> That way, you will both have fastest option as long as guest don't use IBRS
> and also won't have the 3% performance hit compared to Konrad's proposal.
> 
> Am I missing something?

Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
of the 3% speedup you observe is because in the above, the vmentry path
doesn't need to *read* the host's value and store it; the host is
expected to restore it for itself anyway?

I'd actually quite like to repeat the benchmark on the new fixed
microcode, if anyone has it yet, to see if that read/swap slowness is
still quite as excessive. I'm certainly not ruling this out, but I'm
just a little wary of premature optimisation, and I'd like to make sure
we have everything *else* in the KVM patches right first.

The fact that the save-and-restrict macros I have in the tip of my
working tree at the moment are horrid and causing 0-day nastygrams,
probably doesn't help persuade me to favour the approach ;)

... hm, the CPU actually has separate MSR save/restore lists for
entry/exit, doesn't it? Is there any way to sanely make use of that and
do the restoration manually on vmentry but let it be automatic on
vmexit, by having it *only* in the guest's MSR-store area to be saved
on exit and restored on exit, but *not* in the host's MSR-store area?

Reading the code and comparing with the SDM, I can't see where we're
ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
case...

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread KarimAllah Ahmed

On 01/28/2018 09:21 PM, Konrad Rzeszutek Wilk wrote:

On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed  wrote:

Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
guests
that will only mitigate Spectre V2 through IBRS+IBPB and will not be
using a
retpoline+IBPB based approach.

To avoid the overhead of atomically saving and restoring the
MSR_IA32_SPEC_CTRL
for guests that do not actually use the MSR, only add_atomic_switch_msr
when a
non-zero is written to it.



We tried this and found that it was about 3% slower that doing the old way of 
rdmsr and wrmsr.


I actually have not measured the performance difference between using 
the atomic_switch vs just just doing rdmsr/wrmsr. I was mostly focused 
on not saving and restoring when the guest does not actually use the MSRs.


Interesting data point though, I will update the code to use rdmsr/wrmsr 
and see if I see it in my hardware (I am using a skylake processor).




But that was also with the host doing IBRS  as well.

On what type of hardware did you run this?

Ccing Daniel.


Cc: Asit Mallick 
Cc: Arjan Van De Ven 
Cc: Dave Hansen 
Cc: Andi Kleen 
Cc: Andrea Arcangeli 
Cc: Linus Torvalds 
Cc: Tim Chen 
Cc: Thomas Gleixner 
Cc: Dan Williams 
Cc: Jun Nakajima 
Cc: Paolo Bonzini 
Cc: David Woodhouse 
Cc: Greg KH 
Cc: Andy Lutomirski 
Signed-off-by: KarimAllah Ahmed 
Signed-off-by: Ashok Raj 
---
arch/x86/kvm/cpuid.c |  4 +++-
arch/x86/kvm/cpuid.h |  1 +
arch/x86/kvm/vmx.c   | 63

3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10..dc78095 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
/* These are scattered features in cpufeatures.h. */
#define KVM_CPUID_BIT_AVX512_4VNNIW 2
#define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KVM_CPUID_BIT_SPEC_CTRL 26
#define KF(x) bit(KVM_CPUID_BIT_##x)

int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct
kvm_cpuid_entry2 *entry, u32 function,

/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
-   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
+   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);

/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3..dcfe227 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
[CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
+   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
};

static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned
x86_feature)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa8638a..1b743a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu,
bool masked);
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
u16 error_code);
static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned
long *msr_bitmap,
+ u32 msr, int type);
+

static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct
vcpu_vmx *vmx, unsigned msr,
m->host[i].value = host_val;
}

+/* do not touch guest_val and host_val if the msr is not found */
+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
+ u64 *guest_val, u64 *host_val)
+{
+   unsigned i;
+   struct msr_autoload *m = >msr_autoload;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   break;
+
+   if (i == m->nr)
+   return 1;
+
+   if (guest_val)
+   *guest_val = m->guest[i].value;
+   if (host_val)
+   *host_val = m->host[i].value;
+
+   return 0;
+}
+
static bool update_transition_efer(struct vcpu_vmx *vmx, int
efer_offset)
{
u64 guest_efer = vmx->vcpu.arch.efer;
@@ -3203,7 +3228,9 @@ static inline bool

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread KarimAllah Ahmed

On 01/28/2018 09:21 PM, Konrad Rzeszutek Wilk wrote:

On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed  wrote:

Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
guests
that will only mitigate Spectre V2 through IBRS+IBPB and will not be
using a
retpoline+IBPB based approach.

To avoid the overhead of atomically saving and restoring the
MSR_IA32_SPEC_CTRL
for guests that do not actually use the MSR, only add_atomic_switch_msr
when a
non-zero is written to it.



We tried this and found that it was about 3% slower that doing the old way of 
rdmsr and wrmsr.


I actually have not measured the performance difference between using 
the atomic_switch vs just just doing rdmsr/wrmsr. I was mostly focused 
on not saving and restoring when the guest does not actually use the MSRs.


Interesting data point though, I will update the code to use rdmsr/wrmsr 
and see if I see it in my hardware (I am using a skylake processor).




But that was also with the host doing IBRS  as well.

On what type of hardware did you run this?

Ccing Daniel.


Cc: Asit Mallick 
Cc: Arjan Van De Ven 
Cc: Dave Hansen 
Cc: Andi Kleen 
Cc: Andrea Arcangeli 
Cc: Linus Torvalds 
Cc: Tim Chen 
Cc: Thomas Gleixner 
Cc: Dan Williams 
Cc: Jun Nakajima 
Cc: Paolo Bonzini 
Cc: David Woodhouse 
Cc: Greg KH 
Cc: Andy Lutomirski 
Signed-off-by: KarimAllah Ahmed 
Signed-off-by: Ashok Raj 
---
arch/x86/kvm/cpuid.c |  4 +++-
arch/x86/kvm/cpuid.h |  1 +
arch/x86/kvm/vmx.c   | 63

3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10..dc78095 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
/* These are scattered features in cpufeatures.h. */
#define KVM_CPUID_BIT_AVX512_4VNNIW 2
#define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KVM_CPUID_BIT_SPEC_CTRL 26
#define KF(x) bit(KVM_CPUID_BIT_##x)

int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct
kvm_cpuid_entry2 *entry, u32 function,

/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
-   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
+   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);

/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3..dcfe227 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
[CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
+   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
};

static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned
x86_feature)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa8638a..1b743a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu,
bool masked);
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
u16 error_code);
static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned
long *msr_bitmap,
+ u32 msr, int type);
+

static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct
vcpu_vmx *vmx, unsigned msr,
m->host[i].value = host_val;
}

+/* do not touch guest_val and host_val if the msr is not found */
+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
+ u64 *guest_val, u64 *host_val)
+{
+   unsigned i;
+   struct msr_autoload *m = >msr_autoload;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   break;
+
+   if (i == m->nr)
+   return 1;
+
+   if (guest_val)
+   *guest_val = m->guest[i].value;
+   if (host_val)
+   *host_val = m->host[i].value;
+
+   return 0;
+}
+
static bool update_transition_efer(struct vcpu_vmx *vmx, int
efer_offset)
{
u64 guest_efer = vmx->vcpu.arch.efer;
@@ -3203,7 +3228,9 @@ static inline bool
vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
  */
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data
*msr_info)
{
+   u64 spec_ctrl = 0;
struct shared_msr_entry *msr;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);

switch (msr_info->index) {
#ifdef CONFIG_X86_64
@@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
struct msr_data *msr_info)

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Liran Alon

- dw...@infradead.org wrote:

> On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote:
> > >To avoid the overhead of atomically saving and restoring the
> MSR_IA32_SPEC_CTRL
> > >for guests that do not actually use the MSR, only
> add_atomic_switch_msr when a
> > >non-zero is written to it.
> > 
> > 
> > We tried this and found that it was about 3% slower that doing the
> > old way of rdmsr and wrmsr.
> > 
> > But that was also with the host doing IBRS  as well.
> 
> The common case will be that neither host nor guest are doing IBRS.
> Until the guest touches the MSR we do absolutely *nothing* with it,
> which is definitely the fastest option.

Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
Running a Windows guest should be a pretty common use-case no?

In addition, your handle of the first WRMSR intercept could be different.
It could signal you to start doing the following:
1. Disable intercept on SPEC_CTRL MSR.
2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
(And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)

That way, you will both have fastest option as long as guest don't use IBRS
and also won't have the 3% performance hit compared to Konrad's proposal.

Am I missing something?

-Liran


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Liran Alon

- dw...@infradead.org wrote:

> On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote:
> > >To avoid the overhead of atomically saving and restoring the
> MSR_IA32_SPEC_CTRL
> > >for guests that do not actually use the MSR, only
> add_atomic_switch_msr when a
> > >non-zero is written to it.
> > 
> > 
> > We tried this and found that it was about 3% slower that doing the
> > old way of rdmsr and wrmsr.
> > 
> > But that was also with the host doing IBRS  as well.
> 
> The common case will be that neither host nor guest are doing IBRS.
> Until the guest touches the MSR we do absolutely *nothing* with it,
> which is definitely the fastest option.

Windows use IBRS and Microsoft don't have any plans to switch to retpoline.
Running a Windows guest should be a pretty common use-case no?

In addition, your handle of the first WRMSR intercept could be different.
It could signal you to start doing the following:
1. Disable intercept on SPEC_CTRL MSR.
2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
(And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)

That way, you will both have fastest option as long as guest don't use IBRS
and also won't have the 3% performance hit compared to Konrad's proposal.

Am I missing something?

-Liran


RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse

>>
>> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
>> >
>> > Do you mean that the host would intercept the guest WRMSR and do
>> > WRMSR itself?  I would suggest that doing so is inconsistent with the
>> > docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
>> > protect the guest.
>>
>> I believe it does. Guest kernel is protected from any guest userspace
>> predictions learned before IBRS was last set to 1 in *any* mode,
>> including host.
>
> the specification requires you to write a 1 on each transition to higher
> privilege.

Right. Andy's concern was about VMX non-root (i.e. guest) ring 0
attempting to write IBRS but it's trapped and actually happens in the
host.

As long as it *remains* set when the host re-enters the vCPU that should
be fine.


-- 
dwmw2



RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse

>>
>> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
>> >
>> > Do you mean that the host would intercept the guest WRMSR and do
>> > WRMSR itself?  I would suggest that doing so is inconsistent with the
>> > docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
>> > protect the guest.
>>
>> I believe it does. Guest kernel is protected from any guest userspace
>> predictions learned before IBRS was last set to 1 in *any* mode,
>> including host.
>
> the specification requires you to write a 1 on each transition to higher
> privilege.

Right. Andy's concern was about VMX non-root (i.e. guest) ring 0
attempting to write IBRS but it's trapped and actually happens in the
host.

As long as it *remains* set when the host re-enters the vCPU that should
be fine.


-- 
dwmw2



RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Van De Ven, Arjan
> 
> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
> >
> > Do you mean that the host would intercept the guest WRMSR and do
> > WRMSR itself?  I would suggest that doing so is inconsistent with the
> > docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
> > protect the guest.
> 
> I believe it does. Guest kernel is protected from any guest userspace
> predictions learned before IBRS was last set to 1 in *any* mode,
> including host.

the specification requires you to write a 1 on each transition to higher 
privilege.


> 
> > For that matter, what are the semantics of VMRESUME doing a write to
> > IBRS as part of its MSR switch?  Is it treated as IBRS=1 from guest
> > context?

the guest ring 3 wouldn't have had time to do anything evil in the mean time so 
the vmresume write is valid. (anything else would be unworkable)



RE: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Van De Ven, Arjan
> 
> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
> >
> > Do you mean that the host would intercept the guest WRMSR and do
> > WRMSR itself?  I would suggest that doing so is inconsistent with the
> > docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
> > protect the guest.
> 
> I believe it does. Guest kernel is protected from any guest userspace
> predictions learned before IBRS was last set to 1 in *any* mode,
> including host.

the specification requires you to write a 1 on each transition to higher 
privilege.


> 
> > For that matter, what are the semantics of VMRESUME doing a write to
> > IBRS as part of its MSR switch?  Is it treated as IBRS=1 from guest
> > context?

the guest ring 3 wouldn't have had time to do anything evil in the mean time so 
the vmresume write is valid. (anything else would be unworkable)



Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse
On Sun, 2018-01-28 at 12:53 -0800, Andy Lutomirski wrote:
> 
> > I believe it does. Guest kernel is protected from any guest userspace
> > predictions learned before IBRS was last set to 1 in *any* mode,
> > including host.
> 
> Hmm, you're probably right.
> 
> I would love to know what awful hack Intel did that resulted in these 
> semantics.

I am not convinced I ever really want to know. I just want it all to go
away in a future CPU with a SPCTR_NO bit in IA32_ARCH_CAPABILITIES.
(Not the IBRS_ALL interim hack).

I think it's a mixture of ongoing checking, and a barrier. And perhaps
varying proportions of each, in different CPU generations. By defining
it thus, they can actually implement it *either* way.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse
On Sun, 2018-01-28 at 12:53 -0800, Andy Lutomirski wrote:
> 
> > I believe it does. Guest kernel is protected from any guest userspace
> > predictions learned before IBRS was last set to 1 in *any* mode,
> > including host.
> 
> Hmm, you're probably right.
> 
> I would love to know what awful hack Intel did that resulted in these 
> semantics.

I am not convinced I ever really want to know. I just want it all to go
away in a future CPU with a SPCTR_NO bit in IA32_ARCH_CAPABILITIES.
(Not the IBRS_ALL interim hack).

I think it's a mixture of ongoing checking, and a barrier. And perhaps
varying proportions of each, in different CPU generations. By defining
it thus, they can actually implement it *either* way.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Andy Lutomirski



> On Jan 28, 2018, at 12:44 PM, David Woodhouse  wrote:
> 
>> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
>> 
>> Do you mean that the host would intercept the guest WRMSR and do
>> WRMSR itself?  I would suggest that doing so is inconsistent with the
>> docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
>> protect the guest.
> 
> I believe it does. Guest kernel is protected from any guest userspace
> predictions learned before IBRS was last set to 1 in *any* mode,
> including host.

Hmm, you're probably right.

I would love to know what awful hack Intel did that resulted in these semantics.

> 
>> For that matter, what are the semantics of VMRESUME doing a write to
>> IBRS as part of its MSR switch?  Is it treated as IBRS=1 from guest
>> context?
> 
> Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1
> to IBRS as part of its MSR switch when it was already 1 is not
> optimised away and *is* treated as writing IBRS=1 again.

That's good news.

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Andy Lutomirski



> On Jan 28, 2018, at 12:44 PM, David Woodhouse  wrote:
> 
>> On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
>> 
>> Do you mean that the host would intercept the guest WRMSR and do
>> WRMSR itself?  I would suggest that doing so is inconsistent with the
>> docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
>> protect the guest.
> 
> I believe it does. Guest kernel is protected from any guest userspace
> predictions learned before IBRS was last set to 1 in *any* mode,
> including host.

Hmm, you're probably right.

I would love to know what awful hack Intel did that resulted in these semantics.

> 
>> For that matter, what are the semantics of VMRESUME doing a write to
>> IBRS as part of its MSR switch?  Is it treated as IBRS=1 from guest
>> context?
> 
> Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1
> to IBRS as part of its MSR switch when it was already 1 is not
> optimised away and *is* treated as writing IBRS=1 again.

That's good news.

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse
On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
> 
> Do you mean that the host would intercept the guest WRMSR and do
> WRMSR itself?  I would suggest that doing so is inconsistent with the
> docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
> protect the guest.

I believe it does. Guest kernel is protected from any guest userspace
predictions learned before IBRS was last set to 1 in *any* mode,
including host.

> For that matter, what are the semantics of VMRESUME doing a write to
> IBRS as part of its MSR switch?  Is it treated as IBRS=1 from guest
> context?

Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1
to IBRS as part of its MSR switch when it was already 1 is not
optimised away and *is* treated as writing IBRS=1 again.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse
On Sun, 2018-01-28 at 12:40 -0800, Andy Lutomirski wrote:
> 
> Do you mean that the host would intercept the guest WRMSR and do
> WRMSR itself?  I would suggest that doing so is inconsistent with the
> docs.  As specified, doing WRMSR to write 1 to IBRS does *not*
> protect the guest.

I believe it does. Guest kernel is protected from any guest userspace
predictions learned before IBRS was last set to 1 in *any* mode,
including host.

> For that matter, what are the semantics of VMRESUME doing a write to
> IBRS as part of its MSR switch?  Is it treated as IBRS=1 from guest
> context?

Why does it matter? We *have* confirmed, FWIW, that VMRESUME writing 1
to IBRS as part of its MSR switch when it was already 1 is not
optimised away and *is* treated as writing IBRS=1 again.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Andy Lutomirski

> On Jan 28, 2018, at 12:21 PM, Konrad Rzeszutek Wilk  
> wrote:
> 
>> On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed  
>> wrote:
>> Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
>> guests
>> that will only mitigate Spectre V2 through IBRS+IBPB and will not be
>> using a
>> retpoline+IBPB based approach.
>> 
>> To avoid the overhead of atomically saving and restoring the
>> MSR_IA32_SPEC_CTRL
>> for guests that do not actually use the MSR, only add_atomic_switch_msr
>> when a
>> non-zero is written to it.
> 
> 
> We tried this and found that it was about 3% slower that doing the old way of 
> rdmsr and wrmsr.
> 

Do you mean that the host would intercept the guest WRMSR and do WRMSR itself?  
I would suggest that doing so is inconsistent with the docs.  As specified, 
doing WRMSR to write 1 to IBRS does *not* protect the guest.

For that matter, what are the semantics of VMRESUME doing a write to IBRS as 
part of its MSR switch?  Is it treated as IBRS=1 from guest context?


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Andy Lutomirski

> On Jan 28, 2018, at 12:21 PM, Konrad Rzeszutek Wilk  
> wrote:
> 
>> On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed  
>> wrote:
>> Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
>> guests
>> that will only mitigate Spectre V2 through IBRS+IBPB and will not be
>> using a
>> retpoline+IBPB based approach.
>> 
>> To avoid the overhead of atomically saving and restoring the
>> MSR_IA32_SPEC_CTRL
>> for guests that do not actually use the MSR, only add_atomic_switch_msr
>> when a
>> non-zero is written to it.
> 
> 
> We tried this and found that it was about 3% slower that doing the old way of 
> rdmsr and wrmsr.
> 

Do you mean that the host would intercept the guest WRMSR and do WRMSR itself?  
I would suggest that doing so is inconsistent with the docs.  As specified, 
doing WRMSR to write 1 to IBRS does *not* protect the guest.

For that matter, what are the semantics of VMRESUME doing a write to IBRS as 
part of its MSR switch?  Is it treated as IBRS=1 from guest context?


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse
On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote:
> >To avoid the overhead of atomically saving and restoring the 
> >MSR_IA32_SPEC_CTRL
> >for guests that do not actually use the MSR, only add_atomic_switch_msr when 
> >a
> >non-zero is written to it.
> 
> 
> We tried this and found that it was about 3% slower that doing the
> old way of rdmsr and wrmsr.
> 
> But that was also with the host doing IBRS  as well.

The common case will be that neither host nor guest are doing IBRS.
Until the guest touches the MSR we do absolutely *nothing* with it,
which is definitely the fastest option.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread David Woodhouse
On Sun, 2018-01-28 at 15:21 -0500, Konrad Rzeszutek Wilk wrote:
> >To avoid the overhead of atomically saving and restoring the 
> >MSR_IA32_SPEC_CTRL
> >for guests that do not actually use the MSR, only add_atomic_switch_msr when 
> >a
> >non-zero is written to it.
> 
> 
> We tried this and found that it was about 3% slower that doing the
> old way of rdmsr and wrmsr.
> 
> But that was also with the host doing IBRS  as well.

The common case will be that neither host nor guest are doing IBRS.
Until the guest touches the MSR we do absolutely *nothing* with it,
which is definitely the fastest option.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Konrad Rzeszutek Wilk
On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed  wrote:
>Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
>guests
>that will only mitigate Spectre V2 through IBRS+IBPB and will not be
>using a
>retpoline+IBPB based approach.
>
>To avoid the overhead of atomically saving and restoring the
>MSR_IA32_SPEC_CTRL
>for guests that do not actually use the MSR, only add_atomic_switch_msr
>when a
>non-zero is written to it.


We tried this and found that it was about 3% slower that doing the old way of 
rdmsr and wrmsr.

But that was also with the host doing IBRS  as well.

On what type of hardware did you run this?

Ccing Daniel.
>
>Cc: Asit Mallick 
>Cc: Arjan Van De Ven 
>Cc: Dave Hansen 
>Cc: Andi Kleen 
>Cc: Andrea Arcangeli 
>Cc: Linus Torvalds 
>Cc: Tim Chen 
>Cc: Thomas Gleixner 
>Cc: Dan Williams 
>Cc: Jun Nakajima 
>Cc: Paolo Bonzini 
>Cc: David Woodhouse 
>Cc: Greg KH 
>Cc: Andy Lutomirski 
>Signed-off-by: KarimAllah Ahmed 
>Signed-off-by: Ashok Raj 
>---
> arch/x86/kvm/cpuid.c |  4 +++-
> arch/x86/kvm/cpuid.h |  1 +
>arch/x86/kvm/vmx.c   | 63
>
> 3 files changed, 67 insertions(+), 1 deletion(-)
>
>diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>index 0099e10..dc78095 100644
>--- a/arch/x86/kvm/cpuid.c
>+++ b/arch/x86/kvm/cpuid.c
>@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
> /* These are scattered features in cpufeatures.h. */
> #define KVM_CPUID_BIT_AVX512_4VNNIW 2
> #define KVM_CPUID_BIT_AVX512_4FMAPS 3
>+#define KVM_CPUID_BIT_SPEC_CTRL 26
> #define KF(x) bit(KVM_CPUID_BIT_##x)
> 
> int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct
>kvm_cpuid_entry2 *entry, u32 function,
> 
>   /* cpuid 7.0.edx*/
>   const u32 kvm_cpuid_7_0_edx_x86_features =
>-  KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
>+  KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
>+  (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
> 
>   /* all calls to cpuid_count() should be made on the same cpu */
>   get_cpu();
>diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
>index cdc70a3..dcfe227 100644
>--- a/arch/x86/kvm/cpuid.h
>+++ b/arch/x86/kvm/cpuid.h
>@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
>   [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
>   [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
>   [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
>+  [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> };
> 
>static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned
>x86_feature)
>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>index aa8638a..1b743a0 100644
>--- a/arch/x86/kvm/vmx.c
>+++ b/arch/x86/kvm/vmx.c
>@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu,
>bool masked);
> static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
>   u16 error_code);
> static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
>+static void __always_inline vmx_disable_intercept_for_msr(unsigned
>long *msr_bitmap,
>+u32 msr, int type);
>+
> 
> static DEFINE_PER_CPU(struct vmcs *, vmxarea);
> static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct
>vcpu_vmx *vmx, unsigned msr,
>   m->host[i].value = host_val;
> }
> 
>+/* do not touch guest_val and host_val if the msr is not found */
>+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>+u64 *guest_val, u64 *host_val)
>+{
>+  unsigned i;
>+  struct msr_autoload *m = >msr_autoload;
>+
>+  for (i = 0; i < m->nr; ++i)
>+  if (m->guest[i].index == msr)
>+  break;
>+
>+  if (i == m->nr)
>+  return 1;
>+
>+  if (guest_val)
>+  *guest_val = m->guest[i].value;
>+  if (host_val)
>+  *host_val = m->host[i].value;
>+
>+  return 0;
>+}
>+
>static bool update_transition_efer(struct vcpu_vmx *vmx, int
>efer_offset)
> {
>   u64 guest_efer = vmx->vcpu.arch.efer;
>@@ -3203,7 +3228,9 @@ static inline bool
>vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
>  */
>static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data
>*msr_info)
> {
>+  u64 spec_ctrl = 0;
>   struct shared_msr_entry *msr;
>+  struct vcpu_vmx *vmx = to_vmx(vcpu);
> 
>   switch (msr_info->index) {
> #ifdef CONFIG_X86_64
>@@ 

Re: [PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread Konrad Rzeszutek Wilk
On January 28, 2018 2:29:10 PM EST, KarimAllah Ahmed  wrote:
>Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
>guests
>that will only mitigate Spectre V2 through IBRS+IBPB and will not be
>using a
>retpoline+IBPB based approach.
>
>To avoid the overhead of atomically saving and restoring the
>MSR_IA32_SPEC_CTRL
>for guests that do not actually use the MSR, only add_atomic_switch_msr
>when a
>non-zero is written to it.


We tried this and found that it was about 3% slower that doing the old way of 
rdmsr and wrmsr.

But that was also with the host doing IBRS  as well.

On what type of hardware did you run this?

Ccing Daniel.
>
>Cc: Asit Mallick 
>Cc: Arjan Van De Ven 
>Cc: Dave Hansen 
>Cc: Andi Kleen 
>Cc: Andrea Arcangeli 
>Cc: Linus Torvalds 
>Cc: Tim Chen 
>Cc: Thomas Gleixner 
>Cc: Dan Williams 
>Cc: Jun Nakajima 
>Cc: Paolo Bonzini 
>Cc: David Woodhouse 
>Cc: Greg KH 
>Cc: Andy Lutomirski 
>Signed-off-by: KarimAllah Ahmed 
>Signed-off-by: Ashok Raj 
>---
> arch/x86/kvm/cpuid.c |  4 +++-
> arch/x86/kvm/cpuid.h |  1 +
>arch/x86/kvm/vmx.c   | 63
>
> 3 files changed, 67 insertions(+), 1 deletion(-)
>
>diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>index 0099e10..dc78095 100644
>--- a/arch/x86/kvm/cpuid.c
>+++ b/arch/x86/kvm/cpuid.c
>@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
> /* These are scattered features in cpufeatures.h. */
> #define KVM_CPUID_BIT_AVX512_4VNNIW 2
> #define KVM_CPUID_BIT_AVX512_4FMAPS 3
>+#define KVM_CPUID_BIT_SPEC_CTRL 26
> #define KF(x) bit(KVM_CPUID_BIT_##x)
> 
> int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct
>kvm_cpuid_entry2 *entry, u32 function,
> 
>   /* cpuid 7.0.edx*/
>   const u32 kvm_cpuid_7_0_edx_x86_features =
>-  KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
>+  KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
>+  (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
> 
>   /* all calls to cpuid_count() should be made on the same cpu */
>   get_cpu();
>diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
>index cdc70a3..dcfe227 100644
>--- a/arch/x86/kvm/cpuid.h
>+++ b/arch/x86/kvm/cpuid.h
>@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
>   [CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
>   [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
>   [CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
>+  [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
> };
> 
>static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned
>x86_feature)
>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>index aa8638a..1b743a0 100644
>--- a/arch/x86/kvm/vmx.c
>+++ b/arch/x86/kvm/vmx.c
>@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu,
>bool masked);
> static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
>   u16 error_code);
> static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
>+static void __always_inline vmx_disable_intercept_for_msr(unsigned
>long *msr_bitmap,
>+u32 msr, int type);
>+
> 
> static DEFINE_PER_CPU(struct vmcs *, vmxarea);
> static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct
>vcpu_vmx *vmx, unsigned msr,
>   m->host[i].value = host_val;
> }
> 
>+/* do not touch guest_val and host_val if the msr is not found */
>+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
>+u64 *guest_val, u64 *host_val)
>+{
>+  unsigned i;
>+  struct msr_autoload *m = >msr_autoload;
>+
>+  for (i = 0; i < m->nr; ++i)
>+  if (m->guest[i].index == msr)
>+  break;
>+
>+  if (i == m->nr)
>+  return 1;
>+
>+  if (guest_val)
>+  *guest_val = m->guest[i].value;
>+  if (host_val)
>+  *host_val = m->host[i].value;
>+
>+  return 0;
>+}
>+
>static bool update_transition_efer(struct vcpu_vmx *vmx, int
>efer_offset)
> {
>   u64 guest_efer = vmx->vcpu.arch.efer;
>@@ -3203,7 +3228,9 @@ static inline bool
>vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
>  */
>static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data
>*msr_info)
> {
>+  u64 spec_ctrl = 0;
>   struct shared_msr_entry *msr;
>+  struct vcpu_vmx *vmx = to_vmx(vcpu);
> 
>   switch (msr_info->index) {
> #ifdef CONFIG_X86_64
>@@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
>struct msr_data *msr_info)
>   case MSR_IA32_TSC:
>   msr_info->data = guest_read_tsc(vcpu);
>   break;
>+  case MSR_IA32_SPEC_CTRL:
>+  if (!msr_info->host_initiated &&
>+  !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
>+  return 1;
>+
>+ 

[PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread KarimAllah Ahmed
Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests
that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
retpoline+IBPB based approach.

To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL
for guests that do not actually use the MSR, only add_atomic_switch_msr when a
non-zero is written to it.

Cc: Asit Mallick 
Cc: Arjan Van De Ven 
Cc: Dave Hansen 
Cc: Andi Kleen 
Cc: Andrea Arcangeli 
Cc: Linus Torvalds 
Cc: Tim Chen 
Cc: Thomas Gleixner 
Cc: Dan Williams 
Cc: Jun Nakajima 
Cc: Paolo Bonzini 
Cc: David Woodhouse 
Cc: Greg KH 
Cc: Andy Lutomirski 
Signed-off-by: KarimAllah Ahmed 
Signed-off-by: Ashok Raj 
---
 arch/x86/kvm/cpuid.c |  4 +++-
 arch/x86/kvm/cpuid.h |  1 +
 arch/x86/kvm/vmx.c   | 63 
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10..dc78095 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
 /* These are scattered features in cpufeatures.h. */
 #define KVM_CPUID_BIT_AVX512_4VNNIW 2
 #define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KVM_CPUID_BIT_SPEC_CTRL 26
 #define KF(x) bit(KVM_CPUID_BIT_##x)
 
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
 
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
-   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
+   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
 
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3..dcfe227 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
[CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
+   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
 };
 
 static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa8638a..1b743a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool 
masked);
 static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
u16 error_code);
 static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
*msr_bitmap,
+ u32 msr, int type);
+
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, 
unsigned msr,
m->host[i].value = host_val;
 }
 
+/* do not touch guest_val and host_val if the msr is not found */
+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
+ u64 *guest_val, u64 *host_val)
+{
+   unsigned i;
+   struct msr_autoload *m = >msr_autoload;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   break;
+
+   if (i == m->nr)
+   return 1;
+
+   if (guest_val)
+   *guest_val = m->guest[i].value;
+   if (host_val)
+   *host_val = m->host[i].value;
+
+   return 0;
+}
+
 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 {
u64 guest_efer = vmx->vcpu.arch.efer;
@@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct 
kvm_vcpu *vcpu,
  */
 static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
+   u64 spec_ctrl = 0;
struct shared_msr_entry *msr;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
switch (msr_info->index) {
 #ifdef CONFIG_X86_64
@@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
case MSR_IA32_TSC:
msr_info->data = guest_read_tsc(vcpu);
break;
+   case MSR_IA32_SPEC_CTRL:
+   if (!msr_info->host_initiated &&
+   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+   return 1;
+
+  

[PATCH] x86: vmx: Allow direct access to MSR_IA32_SPEC_CTRL

2018-01-28 Thread KarimAllah Ahmed
Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests
that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a
retpoline+IBPB based approach.

To avoid the overhead of atomically saving and restoring the MSR_IA32_SPEC_CTRL
for guests that do not actually use the MSR, only add_atomic_switch_msr when a
non-zero is written to it.

Cc: Asit Mallick 
Cc: Arjan Van De Ven 
Cc: Dave Hansen 
Cc: Andi Kleen 
Cc: Andrea Arcangeli 
Cc: Linus Torvalds 
Cc: Tim Chen 
Cc: Thomas Gleixner 
Cc: Dan Williams 
Cc: Jun Nakajima 
Cc: Paolo Bonzini 
Cc: David Woodhouse 
Cc: Greg KH 
Cc: Andy Lutomirski 
Signed-off-by: KarimAllah Ahmed 
Signed-off-by: Ashok Raj 
---
 arch/x86/kvm/cpuid.c |  4 +++-
 arch/x86/kvm/cpuid.h |  1 +
 arch/x86/kvm/vmx.c   | 63 
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0099e10..dc78095 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -70,6 +70,7 @@ u64 kvm_supported_xcr0(void)
 /* These are scattered features in cpufeatures.h. */
 #define KVM_CPUID_BIT_AVX512_4VNNIW 2
 #define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KVM_CPUID_BIT_SPEC_CTRL 26
 #define KF(x) bit(KVM_CPUID_BIT_##x)
 
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -392,7 +393,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
 
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
-   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+   KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) | \
+   (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ? KF(SPEC_CTRL) : 0);
 
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3..dcfe227 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_000A_EDX] = {0x800a, 0, CPUID_EDX},
[CPUID_7_ECX] = { 7, 0, CPUID_ECX},
[CPUID_8000_0007_EBX] = {0x8007, 0, CPUID_EBX},
+   [CPUID_7_EDX] = { 7, 0, CPUID_EDX},
 };
 
 static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa8638a..1b743a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -920,6 +920,9 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool 
masked);
 static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
u16 error_code);
 static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long 
*msr_bitmap,
+ u32 msr, int type);
+
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2007,6 +2010,28 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, 
unsigned msr,
m->host[i].value = host_val;
 }
 
+/* do not touch guest_val and host_val if the msr is not found */
+static int read_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
+ u64 *guest_val, u64 *host_val)
+{
+   unsigned i;
+   struct msr_autoload *m = >msr_autoload;
+
+   for (i = 0; i < m->nr; ++i)
+   if (m->guest[i].index == msr)
+   break;
+
+   if (i == m->nr)
+   return 1;
+
+   if (guest_val)
+   *guest_val = m->guest[i].value;
+   if (host_val)
+   *host_val = m->host[i].value;
+
+   return 0;
+}
+
 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 {
u64 guest_efer = vmx->vcpu.arch.efer;
@@ -3203,7 +3228,9 @@ static inline bool vmx_feature_control_msr_valid(struct 
kvm_vcpu *vcpu,
  */
 static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
+   u64 spec_ctrl = 0;
struct shared_msr_entry *msr;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
switch (msr_info->index) {
 #ifdef CONFIG_X86_64
@@ -3223,6 +3250,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
case MSR_IA32_TSC:
msr_info->data = guest_read_tsc(vcpu);
break;
+   case MSR_IA32_SPEC_CTRL:
+   if (!msr_info->host_initiated &&
+   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+   return 1;
+
+   /*
+* If the MSR is not in the atomic list yet, then it was never
+* written to. So the MSR value will be '0'.
+*/
+   read_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL, _ctrl, 
NULL);
+
+   msr_info->data = spec_ctrl;
+   break;
case MSR_IA32_SYSENTER_CS: