>>> On 22.06.18 at 12:57,  wrote:
> Other than FXRSTOR, XRSTOR allows for setting components to their
> initial state. Utilize this to clear register state immediately after
> having saved a vCPU's state (which we don't defer past
> __context_switch()), considering that
> - this supposedly reduces power consumption,
> - this might even free up physical registers,
> - we don't normally save/restore FPU state for a vCPU on every context
>   switch (in some initial measurements I've observed an approximate
>   50:50 relation between the two on a not overly heavily loaded system;
>   it's clear anyway that this is heavily dependent on what exactly a
>   vCPU is used for).
> 
> Signed-off-by: Jan Beulich <jbeul...@suse.com>
> ---
> RFC since the full performance effect is still not very clear.

I don't mind this being objected to, but at least I'd like to know whether
it's worthwhile for me to carry.

Jan

> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -33,6 +33,7 @@ static inline void fpu_xrstor(struct vcp
>      ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
>      ASSERT(ok);
>      xrstor(v, mask);
> +    v->arch.xstate_dirty = mask;
>      ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
>      ASSERT(ok);
>  }
> @@ -148,6 +149,9 @@ static inline void fpu_xsave(struct vcpu
>      ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
>      ASSERT(ok);
>      xsave(v, mask);
> +    xstate_load_init(v->arch.xstate_dirty &
> +                     v->arch.xsave_area->xsave_hdr.xstate_bv);
> +    v->arch.xstate_dirty = 0;
>      ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
>      ASSERT(ok);
>  }
> --- a/xen/arch/x86/spec_ctrl.c
> +++ b/xen/arch/x86/spec_ctrl.c
> @@ -616,7 +616,7 @@ void __init init_speculation_mitigations
>  
>      /* Check whether Eager FPU should be enabled by default. */
>      if ( opt_eager_fpu == -1 )
> -        opt_eager_fpu = should_use_eager_fpu();
> +        opt_eager_fpu = !cpu_has_xsave && should_use_eager_fpu();
>  
>      /* (Re)init BSP state now that default_spec_ctrl_flags has been 
> calculated. */
>      init_shadow_spec_ctrl_state();
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -734,6 +734,7 @@ int handle_xsetbv(u32 index, u64 new_bv)
>              cr0 &= ~X86_CR0_TS;
>          }
>          xrstor(curr, mask);
> +        curr->arch.xstate_dirty |= mask;
>          if ( cr0 & X86_CR0_TS )
>              write_cr0(cr0);
>      }
> @@ -774,12 +775,19 @@ uint64_t read_bndcfgu(void)
>      return xstate->xsave_hdr.xstate_bv & X86_XCR0_BNDCSR ? bndcsr->bndcfgu : 
> 0;
>  }
>  
> +void xstate_load_init(uint64_t mask)
> +{
> +    struct vcpu *v = idle_vcpu[smp_processor_id()];
> +    struct xsave_struct *xstate = v->arch.xsave_area;
> +
> +    memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
> +    xrstor(v, mask);
> +}
> +
>  void xstate_set_init(uint64_t mask)
>  {
>      unsigned long cr0 = read_cr0();
>      unsigned long xcr0 = this_cpu(xcr0);
> -    struct vcpu *v = idle_vcpu[smp_processor_id()];
> -    struct xsave_struct *xstate = v->arch.xsave_area;
>  
>      if ( ~xfeature_mask & mask )
>      {
> @@ -792,8 +800,7 @@ void xstate_set_init(uint64_t mask)
>  
>      clts();
>  
> -    memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
> -    xrstor(v, mask);
> +    xstate_load_init(mask);
>  
>      if ( cr0 & X86_CR0_TS )
>          write_cr0(cr0);
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -559,6 +559,11 @@ struct arch_vcpu
>       * it explicitly enables it via xcr0.
>       */
>      uint64_t xcr0_accum;
> +    /*
> +     * Accumulated set of components which may currently be dirty, and hence
> +     * should be cleared immediately after saving state.
> +     */
> +    uint64_t xstate_dirty;
>      /* This variable determines whether nonlazy extended state has been used,
>       * and thus should be saved/restored. */
>      bool_t nonlazy_xstate_used;
> --- a/xen/include/asm-x86/xstate.h
> +++ b/xen/include/asm-x86/xstate.h
> @@ -95,6 +95,7 @@ uint64_t get_msr_xss(void);
>  uint64_t read_bndcfgu(void);
>  void xsave(struct vcpu *v, uint64_t mask);
>  void xrstor(struct vcpu *v, uint64_t mask);
> +void xstate_load_init(uint64_t mask);
>  void xstate_set_init(uint64_t mask);
>  bool xsave_enabled(const struct vcpu *v);
>  int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum,
> 
> 
> 
> 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to