On 13/01/17 15:31, Jan Beulich wrote: > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -676,6 +676,16 @@ do{ asm volatile ( > #define __emulate_1op_8byte(_op, _dst, _eflags) > #endif /* __i386__ */ > > +#define emulate_stub(dst, src...) do { \ > + unsigned long tmp; \ > + asm volatile ( _PRE_EFLAGS("[efl]", "[msk]", "[tmp]") \ > + "call *%[stub];" \ > + _POST_EFLAGS("[efl]", "[msk]", "[tmp]") \ > + : dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs._eflags) \ > + : [stub] "r" (stub.func), \ > + [msk] "i" (EFLAGS_MASK), ## src ); \ > +} while (0) > + > /* Fetch next part of the instruction being emulated. */ > #define insn_fetch_bytes(_size) \ > ({ unsigned long _x = 0, _ip = state->ip; \ > @@ -2295,7 +2305,10 @@ x86_decode( > } > } > else > + { > + ASSERT(op_bytes == 4); > vex.b = 1; > + } > switch ( b ) > { > case 0x62: > @@ -5866,6 +5879,67 @@ x86_emulate( > break; > #endif > > + case X86EMUL_OPC_VEX(0x0f38, 0xf2): /* andn r/m,r,r */ > + case X86EMUL_OPC_VEX(0x0f38, 0xf7): /* bextr r,r/m,r */ > + { > + uint8_t *buf = get_stub(stub); > + typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]); > + > + host_and_vcpu_must_have(bmi1); > + generate_exception_if(vex.l, EXC_UD);
The manual also states #UD if VEX.W is set. > + > + buf[0] = 0xc4; > + *pvex = vex; > + pvex->b = 1; > + pvex->r = 1; > + pvex->reg = ~0; /* rAX */ > + buf[3] = b; > + buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */ > + buf[5] = 0xc3; > + > + src.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7), > + &_regs, 0); Given this construct, and several GPR-encoded vex instructions, how about a decode_vex_gpr() wrapper? > + emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" > (*src.reg)); > + > + put_stub(stub); > + break; > + } > + > + case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */ > + { > + uint8_t *buf = get_stub(stub); > + typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]); > + > + switch ( modrm_reg & 7 ) > + { > + case 1: /* blsr r,r/m */ > + case 2: /* blsmsk r,r/m */ > + case 3: /* blsi r,r/m */ > + host_and_vcpu_must_have(bmi1); > + break; > + default: > + goto cannot_emulate; > + } > + > + generate_exception_if(vex.l, EXC_UD); > + > + buf[0] = 0xc4; > + *pvex = vex; > + pvex->b = 1; > + pvex->r = 1; > + pvex->reg = ~0; /* rAX */ > + buf[3] = b; > + buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */ > + buf[5] = 0xc3; > + > + dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7), > + &_regs, 0); > + emulate_stub("=&a" (dst.val), "c" (&src.val)); > + > + put_stub(stub); > + break; > + } > + > case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */ > case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */ > { > --- a/xen/include/asm-x86/cpufeature.h > +++ b/xen/include/asm-x86/cpufeature.h > @@ -57,6 +57,7 @@ > #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) > #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) > #define cpu_has_lwp boot_cpu_has(X86_FEATURE_LWP) > +#define cpu_has_bmi1 boot_cpu_has(X86_FEATURE_BMI1) > #define cpu_has_mpx boot_cpu_has(X86_FEATURE_MPX) > #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) > #define cpu_has_rdtscp boot_cpu_has(X86_FEATURE_RDTSCP) After trying this out, we clearly need to alter the position on VEX prefixes. VEX encoded GPR instructions don't fall within the previous assumptions made about the dependences of VEX instructions. ~Andrew diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py index 6212e4f..d4210d5 100755 --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -234,9 +234,11 @@ def crunch_numbers(state): XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES, AVX, MPX, PKU, LWP], - # AVX is taken to mean hardware support for VEX encoded instructions, - # 256bit registers, and the instructions themselves. Each of these - # subsequent instruction groups may only be VEX encoded. + # AVX is taken to mean hardware support for 256bit registers, and the + # instructions themselves. It does not related to the VEX prefix (In + # particular, most BMI{1,2} instructions may only be VEX encoded but + # operate on GPRs rather than YMM register and can be used without + # enabling xstate). AVX: [FMA, FMA4, F16C, AVX2, XOP], # CX16 is only encodable in Long Mode. LAHF_LM indicates that the _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel