On 04/29/2010 08:22 AM, Dexuan Cui wrote:
When the host enables XSAVE/XRSTOR, the patch exposes the XSAVE/XRSTOR
related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and the
patch allows guest to set CR4.OSXSAVE to enable XSAVE.
The patch adds per-vcpu host/guest xstate image/mask and enhances the
current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate
(FPU/SSE/YMM) switch.


  5 files changed, 242 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f0007b..60be1a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -303,6 +303,11 @@ struct kvm_vcpu_arch {
        struct i387_fxsave_struct host_fx_image;
        struct i387_fxsave_struct guest_fx_image;

+       struct xsave_struct *host_xstate_image;
+       struct xsave_struct *guest_xstate_image;
+       uint64_t host_xstate_mask;

Does host_xstate_mask need to be per-vcpu, or can it be global?

+       uint64_t guest_xstate_mask;

Can be called xcr0, like other shadow registers.

+
        gva_t mmio_fault_cr2;
        struct kvm_pio_request pio;
        void *pio_data;


@@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
        return 1;
  }

+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+       u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) |
+               kvm_register_read(vcpu, VCPU_REGS_RAX);

Missing shift?

Probably worthwhile to create a helper for reading/writing edx:eax into a u64.

+       u64 host_bv = vcpu->arch.host_xstate_mask;

What about ecx?

+
+       if (((new_bv ^ host_bv)&  ~host_bv)

Isn't (new_bv & ~host_bv) equivalent? (guest cannot exceed host...)

  || !(new_bv&  1))

Symbolic value or comment.

+               goto err;
+       if ((host_bv&  XSTATE_YMM&  new_bv)&&  !(new_bv&  XSTATE_SSE))

host_bv unneeded, I think.

+               goto err;
+       vcpu->arch.guest_xstate_mask = new_bv;
+       xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);

Can't we run with the host xcr0? isn't it guaranteed to be a superset of the guest xcr0?

+       skip_emulated_instruction(vcpu);
+       return 1;
+err:
+       kvm_inject_gp(vcpu, 0);

Need to #UD in some circumstances.

+       return 1;
+}
+
  static int handle_apic_access(struct kvm_vcpu *vcpu)
  {
        unsigned long exit_qualification;
@@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu 
*vcpu) = {
        [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
        [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
        [EXIT_REASON_WBINVD]                  = handle_wbinvd,
+       [EXIT_REASON_XSETBV]                  = handle_xsetbv,
        [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
        [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
        [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b2ce1d..2af3fbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -52,6 +52,8 @@
  #include<asm/desc.h>
  #include<asm/mtrr.h>
  #include<asm/mce.h>
+#include<asm/i387.h>
+#include<asm/xcr.h>

  #define MAX_IO_MSRS 256
  #define CR0_RESERVED_BITS                                             \
@@ -62,6 +64,7 @@
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+                         | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)       \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))

It also depends on the guest cpuid value. Please add it outside the macro, it's confusing to read something that looks like a constant but isn't.

  int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -4307,6 +4346,65 @@ not_found:
        return 36;
  }

+#define bitmaskof(idx)  (1U<<  ((idx)&  31))
+static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32 func, u32 idx)
+{
+       u32 eax, ebx, ecx, edx;
+
+       if (func != 0&&  func != 1&&  func != 0xd)
+               return;
+
+       eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+       ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
+       ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+       edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
+
+       switch (func) {
+       case 0:
+               /* fixup the Maximum Input Value */
+               if (cpu_has_xsave&&  eax<  0xd)
+                       eax = 0xd;
+               break;
+       case 1:
+               ecx&= ~(bitmaskof(X86_FEATURE_XSAVE) |
+                       bitmaskof(X86_FEATURE_OSXSAVE));
+               if (!cpu_has_xsave)
+                       break;
+               ecx |= bitmaskof(X86_FEATURE_XSAVE);
+               if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
+                       ecx |= bitmaskof(X86_FEATURE_OSXSAVE);
+               break;
+       case 0xd:
+               eax = ebx = ecx = edx = 0;
+               if (!cpu_has_xsave)
+                       break;
+               switch (idx) {
+               case 0:
+                       eax = vcpu->arch.host_xstate_mask&  XCNTXT_MASK;
+                       /* FP/SSE + XSAVE.HEADER + YMM. */
+                       ecx = 512 + 64;
+                       if (eax&  XSTATE_YMM)
+                               ecx += XSTATE_YMM_SIZE;
+                       ebx = ecx;
+                       break;
+               case 2:
+                       if (!(vcpu->arch.host_xstate_mask&  XSTATE_YMM))
+                               break;
+                       eax = XSTATE_YMM_SIZE;
+                       ebx = XSTATE_YMM_OFFSET;
+                       break;
+               default:
+                       break;
+               }
+               break;
+       }
+
+       kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
+       kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
+       kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
+       kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+}

This should be part of KVM_GET_SUPPORTED_CPUID.@@ -5091,6 +5192,60 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
        return 0;
  }

+#ifdef CONFIG_X86_64
+#define REX_PREFIX  "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)
+               : "memory");
+               vcpu->arch.host_xstate_mask =
+                       xgetbv(XCR_XFEATURE_ENABLED_MASK);
+       } else {
+               asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image));
+       }
+}
+
+static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)
+               : "memory");
+               vcpu->arch.guest_xstate_mask =
+                       xgetbv(XCR_XFEATURE_ENABLED_MASK);
+       } else {
+               asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+       }
+}
+
+static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask);
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image));
+       } else {
+               asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image));
+       }
+}
+
+static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu)
+{
+       if (cpu_has_xsave) {
+               xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+               asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+               : : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image));
+       } else {
+               asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+       }
+}
+


This mostly duplicates the standard x86 fpu code. I have a patch somewhere that abstracts it out, I'll dig it up and send it out.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to