When the host enables XSAVE/XRSTOR, the patch exposes the XSAVE/XRSTOR
related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and the
patch allows guest to set CR4.OSXSAVE to enable XSAVE.
The patch adds per-vcpu host/guest xstate image/mask and enhances the
current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate
(FPU/SSE/YMM) switch.

Signed-off-by: Dexuan Cui <dexuan....@intel.com>
---
 arch/x86/include/asm/kvm_host.h |   15 +--
 arch/x86/include/asm/vmx.h      |    1 +
 arch/x86/include/asm/xsave.h    |    3 +
 arch/x86/kvm/vmx.c              |   24 +++++
 arch/x86/kvm/x86.c              |  217 +++++++++++++++++++++++++++++++++++++--
 5 files changed, 242 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f0007b..60be1a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -303,6 +303,11 @@ struct kvm_vcpu_arch {
        struct i387_fxsave_struct host_fx_image;
        struct i387_fxsave_struct guest_fx_image;
 
+       struct xsave_struct *host_xstate_image;
+       struct xsave_struct *guest_xstate_image;
+       uint64_t host_xstate_mask;
+       uint64_t guest_xstate_mask;
+
        gva_t mmio_fault_cr2;
        struct kvm_pio_request pio;
        void *pio_data;
@@ -718,16 +723,6 @@ static inline unsigned long read_msr(unsigned long msr)
 }
 #endif
 
-static inline void kvm_fx_save(struct i387_fxsave_struct *image)
-{
-       asm("fxsave (%0)":: "r" (image));
-}
-
-static inline void kvm_fx_restore(struct i387_fxsave_struct *image)
-{
-       asm("fxrstor (%0)":: "r" (image));
-}
-
 static inline void kvm_fx_finit(void)
 {
        asm("finit");
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index fb9a080..842286b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -260,6 +260,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD             54
+#define EXIT_REASON_XSETBV             55
 
 /*
  * Interruption-information format
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index ddc04cc..ada81a2 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -13,6 +13,9 @@
 
 #define FXSAVE_SIZE    512
 
+#define XSTATE_YMM_SIZE 256
+#define XSTATE_YMM_OFFSET (512 + 64)
+
 /*
  * These are the features that the OS can handle currently.
  */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 875b785..a72d024 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -35,6 +35,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -2517,6 +2519,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
        if (enable_ept)
                vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
+       if (cpu_has_xsave)
+               vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE;
        vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 
        tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
@@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+       u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) |
+               kvm_register_read(vcpu, VCPU_REGS_RAX);
+       u64 host_bv = vcpu->arch.host_xstate_mask;
+
+       if (((new_bv ^ host_bv) & ~host_bv) || !(new_bv & 1))
+               goto err;
+       if ((host_bv & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE))
+               goto err;
+       vcpu->arch.guest_xstate_mask = new_bv;
+       xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+       skip_emulated_instruction(vcpu);
+       return 1;
+err:
+       kvm_inject_gp(vcpu, 0);
+       return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification;
@@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu 
*vcpu) = {
        [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
        [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
        [EXIT_REASON_WBINVD]                  = handle_wbinvd,
+       [EXIT_REASON_XSETBV]                  = handle_xsetbv,
        [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
        [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
        [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b2ce1d..2af3fbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -52,6 +52,8 @@
 #include <asm/desc.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS                                              \
@@ -62,6 +64,7 @@
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+                         | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)       \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -4017,6 +4020,36 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
 
+static struct kmem_cache *kvm_xstate_cachep;
+static unsigned int kvm_xstate_size;
+
+static int kvm_alloc_xstate_cachep(void)
+{
+       u32 eax, ebx, ecx, edx;
+
+       if (!cpu_has_xsave)
+               return 0;
+
+       cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+       kvm_xstate_size = ebx;
+       kvm_xstate_cachep =
+               kmem_cache_create("kvm_vcpu_xstate", kvm_xstate_size,
+                       __alignof__(union thread_xstate), 0, NULL);
+       if (!kvm_xstate_cachep)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void kvm_free_xstate_cachep(void)
+{
+       if (!kvm_xstate_cachep)
+               return;
+
+       kmem_cache_destroy(kvm_xstate_cachep);
+       kvm_xstate_cachep = NULL;
+}
+
 int kvm_arch_init(void *opaque)
 {
        int r;
@@ -4039,6 +4072,10 @@ int kvm_arch_init(void *opaque)
                goto out;
        }
 
+       r = kvm_alloc_xstate_cachep();
+       if (r)
+               goto out;
+
        r = kvm_mmu_module_init();
        if (r)
                goto out;
@@ -4058,6 +4095,7 @@ int kvm_arch_init(void *opaque)
        return 0;
 
 out:
+       kvm_free_xstate_cachep();
        return r;
 }
 
@@ -4070,6 +4108,7 @@ void kvm_arch_exit(void)
                                            CPUFREQ_TRANSITION_NOTIFIER);
        kvm_x86_ops = NULL;
        kvm_mmu_module_exit();
+       kvm_free_xstate_cachep();
 }
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -4307,6 +4346,65 @@ not_found:
        return 36;
 }
 
+#define bitmaskof(idx)  (1U << ((idx) & 31))
+static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32 func, u32 idx)
+{
+       u32 eax, ebx, ecx, edx;
+
+       if (func != 0 && func != 1 && func != 0xd)
+               return;
+
+       eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+       ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
+       ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+       edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
+
+       switch (func) {
+       case 0:
+               /* fixup the Maximum Input Value */
+               if (cpu_has_xsave && eax < 0xd)
+                       eax = 0xd;
+               break;
+       case 1:
+               ecx &= ~(bitmaskof(X86_FEATURE_XSAVE) |
+                       bitmaskof(X86_FEATURE_OSXSAVE));
+               if (!cpu_has_xsave)
+                       break;
+               ecx |= bitmaskof(X86_FEATURE_XSAVE);
+               if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE)
+                       ecx |= bitmaskof(X86_FEATURE_OSXSAVE);
+               break;
+       case 0xd:
+               eax = ebx = ecx = edx = 0;
+               if (!cpu_has_xsave)
+                       break;
+               switch (idx) {
+               case 0:
+                       eax = vcpu->arch.host_xstate_mask & XCNTXT_MASK;
+                       /* FP/SSE + XSAVE.HEADER + YMM. */
+                       ecx = 512 + 64;
+                       if (eax & XSTATE_YMM)
+                               ecx += XSTATE_YMM_SIZE;
+                       ebx = ecx;
+                       break;
+               case 2:
+                       if (!(vcpu->arch.host_xstate_mask & XSTATE_YMM))
+                               break;
+                       eax = XSTATE_YMM_SIZE;
+                       ebx = XSTATE_YMM_OFFSET;
+                       break;
+               default:
+                       break;
+               }
+               break;
+       }
+
+       kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
+       kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
+       kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
+       kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
        u32 function, index;
@@ -4325,6 +4423,9 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
                kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
                kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
        }
+
+       kvm_emulate_cpuid_fixup(vcpu, function, index);
+
        kvm_x86_ops->skip_emulated_instruction(vcpu);
        trace_kvm_cpuid(function,
                        kvm_register_read(vcpu, VCPU_REGS_RAX),
@@ -5091,6 +5192,60 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, 
struct kvm_fpu *fpu)
        return 0;
 }
 
+#ifdef CONFIG_X86_64
+#define REX_PREFIX  "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)
+               : "memory");
+               vcpu->arch.host_xstate_mask =
+                       xgetbv(XCR_XFEATURE_ENABLED_MASK);
+       } else {
+               asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image));
+       }
+}
+
+static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)
+               : "memory");
+               vcpu->arch.guest_xstate_mask =
+                       xgetbv(XCR_XFEATURE_ENABLED_MASK);
+       } else {
+               asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+       }
+}
+
+static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask);
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image));
+       } else {
+               asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image));
+       }
+}
+
+static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image));
+       } else {
+               asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+       }
+}
+
 void fx_init(struct kvm_vcpu *vcpu)
 {
        unsigned after_mxcsr_mask;
@@ -5102,17 +5257,21 @@ void fx_init(struct kvm_vcpu *vcpu)
         * allocate ram with GFP_KERNEL.
         */
        if (!used_math())
-               kvm_fx_save(&vcpu->arch.host_fx_image);
+               kvm_fx_save_host(vcpu);
 
        /* Initialize guest FPU by resetting ours and saving into guest's */
        preempt_disable();
-       kvm_fx_save(&vcpu->arch.host_fx_image);
+       kvm_fx_save_host(vcpu);
        kvm_fx_finit();
-       kvm_fx_save(&vcpu->arch.guest_fx_image);
-       kvm_fx_restore(&vcpu->arch.host_fx_image);
+       kvm_fx_save_guest(vcpu);
+       kvm_fx_restore_host(vcpu);
        preempt_enable();
 
        vcpu->arch.cr0 |= X86_CR0_ET;
+
+       if (cpu_has_xsave)
+               return;
+
        after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
        vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
        memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
@@ -5126,8 +5285,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
                return;
 
        vcpu->guest_fpu_loaded = 1;
-       kvm_fx_save(&vcpu->arch.host_fx_image);
-       kvm_fx_restore(&vcpu->arch.guest_fx_image);
+       kvm_fx_save_host(vcpu);
+       kvm_fx_restore_guest(vcpu);
        trace_kvm_fpu(1);
 }
 
@@ -5137,13 +5296,50 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
                return;
 
        vcpu->guest_fpu_loaded = 0;
-       kvm_fx_save(&vcpu->arch.guest_fx_image);
-       kvm_fx_restore(&vcpu->arch.host_fx_image);
+       kvm_fx_save_guest(vcpu);
+       kvm_fx_restore_host(vcpu);
        ++vcpu->stat.fpu_reload;
        set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
        trace_kvm_fpu(0);
 }
 
+static void kvm_arch_vcpu_destroy_xstate_image(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.guest_xstate_image)
+               kmem_cache_free(kvm_xstate_cachep,
+                       vcpu->arch.guest_xstate_image);
+       if (vcpu->arch.host_xstate_image)
+               kmem_cache_free(kvm_xstate_cachep,
+                       vcpu->arch.host_xstate_image);
+       vcpu->arch.guest_xstate_image = NULL;
+       vcpu->arch.host_xstate_image = NULL;
+}
+
+static int kvm_arch_vcpu_create_xstate_image(struct kvm_vcpu *vcpu)
+{
+       if (!cpu_has_xsave)
+               return 0;
+
+       if (!vcpu->arch.guest_xstate_image) {
+               vcpu->arch.guest_xstate_image =
+                       kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL);
+               if (!vcpu->arch.guest_xstate_image)
+                       goto err;
+       }
+       if (!vcpu->arch.host_xstate_image) {
+               vcpu->arch.host_xstate_image =
+                       kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL);
+               if (!vcpu->arch.host_xstate_image)
+                       goto err;
+       }
+
+       return 0;
+
+err:
+       kvm_arch_vcpu_destroy_xstate_image(vcpu);
+       return -ENOMEM;
+}
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
        if (vcpu->arch.time_page) {
@@ -5152,6 +5348,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
        }
 
        kvm_x86_ops->vcpu_free(vcpu);
+       kvm_arch_vcpu_destroy_xstate_image(vcpu);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -5189,6 +5386,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
        vcpu_put(vcpu);
 
        kvm_x86_ops->vcpu_free(vcpu);
+       kvm_arch_vcpu_destroy_xstate_image(vcpu);
 }
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -5201,6 +5399,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
        vcpu->arch.dr6 = DR6_FIXED_1;
        vcpu->arch.dr7 = DR7_FIXED_1;
 
+       if (kvm_arch_vcpu_create_xstate_image(vcpu) < 0)
+               return -ENOMEM;
+
        return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-- 
1.6.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to