>>> On 03.11.15 at 07:27, <shuai.r...@linux.intel.com> wrote: > This patch uses xsaves/xrstors/xsavec instead of xsaveopt/xrstor > to perform the xsave_area switching so that xen itself > can benefit from them when available. > > For xsaves/xrstors/xsavec only use compact format. Add format conversion > support when perform guest os migration. > > Also, pv guest will not support xsaves/xrstors. > > Signed-off-by: Shuai Ruan <shuai.r...@linux.intel.com> > --- > xen/arch/x86/domain.c | 7 ++ > xen/arch/x86/domctl.c | 30 +++++- > xen/arch/x86/hvm/hvm.c | 18 +++- > xen/arch/x86/i387.c | 4 + > xen/arch/x86/traps.c | 6 +- > xen/arch/x86/xstate.c | 242 > +++++++++++++++++++++++++++++++++++++------ > xen/include/asm-x86/xstate.h | 2 + > 7 files changed, 265 insertions(+), 44 deletions(-) > > diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c > index fe3be30..108d4f8 100644 > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -883,7 +883,12 @@ int arch_set_info_guest( > { > memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt)); > if ( v->arch.xsave_area ) > + { > v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; > + if ( cpu_has_xsaves || cpu_has_xsavec ) > + v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP_SSE | > + > XSTATE_COMPACTION_ENABLED; > + }
So here you nicely extend the existing conditional. > @@ -1568,6 +1573,8 @@ static void __context_switch(void) > if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) ) > BUG(); > } > + if ( cpu_has_xsaves && has_hvm_container_vcpu(n) ) > + set_msr_xss(n->arch.hvm_vcpu.msr_xss); Why not also here (the previous if() uses cpu_has_xsave, which you surely depend on)? Agreed the difference is minor for modern CPUs, but I wanted to ask anyway. I.e. an explanation will do, no need to re-submit just because of this. > @@ -158,6 +334,20 @@ void xsave(struct vcpu *v, uint64_t mask) > ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size; > } > > +#define XSTATE_FIXUP ".section .fixup,\"ax\" \n" \ > + "2: mov %5,%%ecx \n" \ > + " xor %1,%1 \n" \ > + " rep stosb \n" \ > + " lea %2,%0 \n" \ > + " mov %3,%1 \n" \ > + " jmp 1b \n" \ > + ".previous \n" \ > + _ASM_EXTABLE(1b, 2b) \ > + : "+&D" (ptr), "+&a" (lmask) \ > + : "m" (*ptr), "g" (lmask), "d" (hmask), \ > + "m" (xsave_cntxt_size) \ > + : "ecx" > + > void xrstor(struct vcpu *v, uint64_t mask) > { > uint32_t hmask = mask >> 32; > @@ -187,39 +377,22 @@ void xrstor(struct vcpu *v, uint64_t mask) > switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) ) > { > default: > - asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n" > - ".section .fixup,\"ax\" \n" > - "2: mov %5,%%ecx \n" > - " xor %1,%1 \n" > - " rep stosb \n" > - " lea %2,%0 \n" > - " mov %3,%1 \n" > - " jmp 1b \n" > - ".previous \n" > - _ASM_EXTABLE(1b, 2b) > - : "+&D" (ptr), "+&a" (lmask) > - : "m" (*ptr), "g" (lmask), "d" (hmask), > - "m" (xsave_cntxt_size) > - : "ecx" ); > + alternative_input("1: "".byte 0x48,0x0f,0xae,0x2f", > + ".byte 0x48,0x0f,0xc7,0x1f", > + X86_FEATURE_XSAVES, > + "D" (ptr), "m" (*ptr), "a" (lmask), "d" (hmask)); > + asm volatile (XSTATE_FIXUP); > break; > case 4: case 2: > - asm volatile ( "1: .byte 0x0f,0xae,0x2f\n" > - ".section .fixup,\"ax\" \n" > - "2: mov %5,%%ecx \n" > - " xor %1,%1 \n" > - " rep stosb \n" > - " lea %2,%0 \n" > - " mov %3,%1 \n" > - " jmp 1b \n" > - ".previous \n" > - _ASM_EXTABLE(1b, 2b) > - : "+&D" (ptr), "+&a" (lmask) > - : "m" (*ptr), "g" (lmask), "d" (hmask), > - "m" (xsave_cntxt_size) > - : "ecx" ); > + alternative_input("1: "".byte 0x0f,0xae,0x2f", > + ".byte 0x0f,0xc7,0x1f", > + X86_FEATURE_XSAVES, > + "D" (ptr), "m" (*ptr), "a" (lmask), "d" (hmask)); > + asm volatile (XSTATE_FIXUP); > break; > } > } > +#undef XSTATE_FIXUP Repeating my comment on v8: "I wonder whether at least for the restore side alternative asm wouldn't result in better readable code and at the same time in a smaller patch." Did you at least look into that option? > @@ -343,11 +516,18 @@ void xstate_init(struct cpuinfo_x86 *c) > > /* Mask out features not currently understood by Xen. */ > eax &= (cpufeat_mask(X86_FEATURE_XSAVEOPT) | > - cpufeat_mask(X86_FEATURE_XSAVEC)); > + cpufeat_mask(X86_FEATURE_XSAVEC) | > + cpufeat_mask(X86_FEATURE_XGETBV1) | > + cpufeat_mask(X86_FEATURE_XSAVES)); > > c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)] = eax; > > BUG_ON(eax != > boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)]); > + > + if ( setup_xstate_features(bsp) && bsp ) > + BUG(); Well, not what I had hoped for, but okay. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel