On Mon, Jan 29, 2018 at 12:31:13PM -0500, Konrad Rzeszutek Wilk wrote:
> On Mon, Jan 29, 2018 at 08:46:03AM +0000, David Woodhouse wrote:
> > On Sun, 2018-01-28 at 16:39 -0800, Liran Alon wrote:
> > >
> > > Windows use IBRS and Microsoft don't have any plans to switch to 
> > > retpoline.
> > > Running a Windows guest should be a pretty common use-case no?
> > >
> > > In addition, your handle of the first WRMSR intercept could be different.
> > > It could signal you to start doing the following:
> > > 1. Disable intercept on SPEC_CTRL MSR.
> > > 2. On VMEntry, Write vCPU SPEC_CTRL value into physical MSR.
> > > 3. On VMExit, read physical MSR into vCPU SPEC_CTRL value.
> > > (And if IBRS is used at host, also set physical SPEC_CTRL MSR here to 1)
> > >
> > > That way, you will both have fastest option as long as guest don't use 
> > > IBRS
> > > and also won't have the 3% performance hit compared to Konrad's proposal.
> > >
> > > Am I missing something?
> >
> > Reads from the SPEC_CTRL MSR are strangely slow. I suspect a large part
> > of the 3% speedup you observe is because in the above, the vmentry path
> > doesn't need to *read* the host's value and store it; the host is
> > expected to restore it for itself anyway?
>
> Yes for at least the purpose of correctness. That is based on what I have
> heard is that you when you transition to a higher ring you have to write 1, 
> then write zero
> when you transition back to lower rings. That is it is like a knob.
>
> But then I heard that on some CPUs it is more like reset button and
> just writting 1 to IBRS is fine. But again, correctness here.
>
> >
> > I'd actually quite like to repeat the benchmark on the new fixed
> > microcode, if anyone has it yet, to see if that read/swap slowness is
> > still quite as excessive. I'm certainly not ruling this out, but I'm
> > just a little wary of premature optimisation, and I'd like to make sure
> > we have everything *else* in the KVM patches right first.
> >
> > The fact that the save-and-restrict macros I have in the tip of my
> > working tree at the moment are horrid and causing 0-day nastygrams,
> > probably doesn't help persuade me to favour the approach ;)
> >
> > ... hm, the CPU actually has separate MSR save/restore lists for
> > entry/exit, doesn't it? Is there any way to sanely make use of that and
> > do the restoration manually on vmentry but let it be automatic on
> > vmexit, by having it *only* in the guest's MSR-store area to be saved
> > on exit and restored on exit, but *not* in the host's MSR-store area?

s/on exit and restored on exit/on exit and restored on entry/?

Additionally, AIUI there is no "host's MSR-store area".

> Oh. That sounds sounds interesting

That is possible but you have to split add_atomic_switch_msr() accordingly.

> > Reading the code and comparing with the SDM, I can't see where we're
> > ever setting VM_EXIT_MSR_STORE_{ADDR,COUNT} except in the nested
> > case...
>
> Right. We (well Daniel Kiper, CC-ed) implemented it for this and
> that is where we got the numbers.
>
> Daniel, you OK posting it here? Granted with the caveats thta it won't even
> compile against upstream as it was based on a distro kernel.

Please look below...

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa9bc4f..e7c0f8b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -162,8 +162,10 @@ module_param(ple_window_max, int, S_IRUGO);

 extern const ulong vmx_return;

-#define NR_AUTOLOAD_MSRS 8
-#define VMCS02_POOL_SIZE 1
+#define NR_AUTOLOAD_MSRS       8
+#define NR_AUTOSTORE_MSRS      NR_AUTOLOAD_MSRS
+
+#define VMCS02_POOL_SIZE       1

 struct vmcs {
        u32 revision_id;
@@ -504,6 +506,10 @@ struct vcpu_vmx {
                struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
                struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
        } msr_autoload;
+       struct msr_autostore {
+               unsigned nr;
+               struct vmx_msr_entry guest[NR_AUTOSTORE_MSRS];
+       } msr_autostore;
        struct {
                int           loaded;
                u16           fs_sel, gs_sel, ldt_sel;
@@ -1704,6 +1710,37 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, 
unsigned msr,
        m->host[i].value = host_val;
 }

+static void add_atomic_store_msr(struct vcpu_vmx *vmx, unsigned msr)
+{
+       unsigned i;
+       struct msr_autostore *m = &vmx->msr_autostore;
+
+       for (i = 0; i < m->nr; ++i)
+               if (m->guest[i].index == msr)
+                       return;
+
+       if (i == NR_AUTOSTORE_MSRS) {
+               pr_err("Not enough msr store entries. Can't add msr %x\n", msr);
+               BUG();
+       }
+
+       m->guest[i].index = msr;
+       vmcs_write32(VM_EXIT_MSR_STORE_COUNT, ++m->nr);
+}
+
+static u64 get_msr_vmcs_store(struct vcpu_vmx *vmx, unsigned msr)
+{
+       unsigned i;
+       struct msr_autostore *m = &vmx->msr_autostore;
+
+       for (i = 0; i < m->nr; ++i)
+               if (m->guest[i].index == msr)
+                       return m->guest[i].value;
+
+       pr_err("Can't find msr %x in VMCS store\n", msr);
+       BUG();
+}
+
 static void reload_tss(void)
 {
        /*
@@ -4716,6 +4753,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 #endif

        vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+       vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest));
        vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
        vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
        vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
@@ -8192,8 +8230,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)

        vmx->__launched = vmx->loaded_vmcs->launched;

-       if (ibrs_inuse)
-               wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+       if (ibrs_inuse) {
+               add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL, vmx->spec_ctrl,
+                                     SPEC_CTRL_FEATURE_ENABLE_IBRS);
+               add_atomic_store_msr(vmx, MSR_IA32_SPEC_CTRL);
+       }

        asm(
                /* Store host registers */
@@ -8317,12 +8358,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu 
*vcpu)
 #endif
              );

-       if (ibrs_inuse) {
-               rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
-               wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_FEATURE_ENABLE_IBRS);
-       }
        stuff_RSB();

+       if (ibrs_inuse)
+               vmx->spec_ctrl = get_msr_vmcs_store(vmx, MSR_IA32_SPEC_CTRL);
+
        /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
        if (debugctlmsr)
                update_debugctlmsr(debugctlmsr);

By the way, I saw in one version of patches that you use own rdmsrl()/wrmsrl()
functions to save/restore IBRS instead of using common ones (I mean
native_rdmsrl()/native_wrmsrl(), safe__rdmsrl()/safe_wrmsrl(), etc.) available
in the kernel. Could you explain why do you do that?

Daniel

Reply via email to