>>> On 03.11.15 at 07:27, <shuai.r...@linux.intel.com> wrote:
> This patch uses xsaves/xrstors/xsavec instead of xsaveopt/xrstor
> to perform the xsave_area switching so that xen itself
> can benefit from them when available.
> 
> For xsaves/xrstors/xsavec only use compact format. Add format conversion
> support when perform guest os migration.
> 
> Also, pv guest will not support xsaves/xrstors.
> 
> Signed-off-by: Shuai Ruan <shuai.r...@linux.intel.com>
> ---
>  xen/arch/x86/domain.c        |   7 ++
>  xen/arch/x86/domctl.c        |  30 +++++-
>  xen/arch/x86/hvm/hvm.c       |  18 +++-
>  xen/arch/x86/i387.c          |   4 +
>  xen/arch/x86/traps.c         |   6 +-
>  xen/arch/x86/xstate.c        | 242 
> +++++++++++++++++++++++++++++++++++++------
>  xen/include/asm-x86/xstate.h |   2 +
>  7 files changed, 265 insertions(+), 44 deletions(-)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index fe3be30..108d4f8 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -883,7 +883,12 @@ int arch_set_info_guest(
>      {
>          memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt));
>          if ( v->arch.xsave_area )
> +        {
>               v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
> +             if ( cpu_has_xsaves || cpu_has_xsavec )
> +                  v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP_SSE |
> +                                                           
> XSTATE_COMPACTION_ENABLED;
> +        }

So here you nicely extend the existing conditional.

> @@ -1568,6 +1573,8 @@ static void __context_switch(void)
>              if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
>                  BUG();
>          }
> +        if ( cpu_has_xsaves && has_hvm_container_vcpu(n) )
> +            set_msr_xss(n->arch.hvm_vcpu.msr_xss);

Why not also here (the previous if() uses cpu_has_xsave, which
you surely depend on)? Agreed the difference is minor for modern
CPUs, but I wanted to ask anyway. I.e. an explanation will do,
no need to re-submit just because of this.

> @@ -158,6 +334,20 @@ void xsave(struct vcpu *v, uint64_t mask)
>          ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size;
>  }
>  
> +#define XSTATE_FIXUP ".section .fixup,\"ax\"      \n"        \
> +                     "2: mov %5,%%ecx             \n"        \
> +                     "   xor %1,%1                \n"        \
> +                     "   rep stosb                \n"        \
> +                     "   lea %2,%0                \n"        \
> +                     "   mov %3,%1                \n"        \
> +                     "   jmp 1b                   \n"        \
> +                     ".previous                   \n"        \
> +                     _ASM_EXTABLE(1b, 2b)                    \
> +                     : "+&D" (ptr), "+&a" (lmask)            \
> +                     : "m" (*ptr), "g" (lmask), "d" (hmask), \
> +                       "m" (xsave_cntxt_size)                \
> +                     : "ecx"
> +
>  void xrstor(struct vcpu *v, uint64_t mask)
>  {
>      uint32_t hmask = mask >> 32;
> @@ -187,39 +377,22 @@ void xrstor(struct vcpu *v, uint64_t mask)
>      switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
>      {
>      default:
> -        asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n"
> -                       ".section .fixup,\"ax\"      \n"
> -                       "2: mov %5,%%ecx             \n"
> -                       "   xor %1,%1                \n"
> -                       "   rep stosb                \n"
> -                       "   lea %2,%0                \n"
> -                       "   mov %3,%1                \n"
> -                       "   jmp 1b                   \n"
> -                       ".previous                   \n"
> -                       _ASM_EXTABLE(1b, 2b)
> -                       : "+&D" (ptr), "+&a" (lmask)
> -                       : "m" (*ptr), "g" (lmask), "d" (hmask),
> -                         "m" (xsave_cntxt_size)
> -                       : "ecx" );
> +        alternative_input("1: "".byte 0x48,0x0f,0xae,0x2f",
> +                          ".byte 0x48,0x0f,0xc7,0x1f",
> +                          X86_FEATURE_XSAVES,
> +                          "D" (ptr), "m" (*ptr), "a" (lmask), "d" (hmask));
> +        asm volatile (XSTATE_FIXUP);
>          break;
>      case 4: case 2:
> -        asm volatile ( "1: .byte 0x0f,0xae,0x2f\n"
> -                       ".section .fixup,\"ax\" \n"
> -                       "2: mov %5,%%ecx        \n"
> -                       "   xor %1,%1           \n"
> -                       "   rep stosb           \n"
> -                       "   lea %2,%0           \n"
> -                       "   mov %3,%1           \n"
> -                       "   jmp 1b              \n"
> -                       ".previous              \n"
> -                       _ASM_EXTABLE(1b, 2b)
> -                       : "+&D" (ptr), "+&a" (lmask)
> -                       : "m" (*ptr), "g" (lmask), "d" (hmask),
> -                         "m" (xsave_cntxt_size)
> -                       : "ecx" );
> +        alternative_input("1: "".byte 0x0f,0xae,0x2f",
> +                          ".byte 0x0f,0xc7,0x1f",
> +                          X86_FEATURE_XSAVES,
> +                          "D" (ptr), "m" (*ptr), "a" (lmask), "d" (hmask));
> +        asm volatile (XSTATE_FIXUP);
>          break;
>      }
>  }
> +#undef XSTATE_FIXUP

Repeating my comment on v8: "I wonder whether at least for the
restore side alternative asm wouldn't result in better readable code
and at the same time in a smaller patch." Did you at least look into
that option?

> @@ -343,11 +516,18 @@ void xstate_init(struct cpuinfo_x86 *c)
>  
>      /* Mask out features not currently understood by Xen. */
>      eax &= (cpufeat_mask(X86_FEATURE_XSAVEOPT) |
> -            cpufeat_mask(X86_FEATURE_XSAVEC));
> +            cpufeat_mask(X86_FEATURE_XSAVEC) |
> +            cpufeat_mask(X86_FEATURE_XGETBV1) |
> +            cpufeat_mask(X86_FEATURE_XSAVES));
>  
>      c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)] = eax;
>  
>      BUG_ON(eax != 
> boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)]);
> +
> +    if ( setup_xstate_features(bsp) && bsp )
> +        BUG();

Well, not what I had hoped for, but okay.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to