On 13/01/17 15:31, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -676,6 +676,16 @@ do{ asm volatile (
>  #define __emulate_1op_8byte(_op, _dst, _eflags)
>  #endif /* __i386__ */
>  
> +#define emulate_stub(dst, src...) do {                                  \
> +    unsigned long tmp;                                                  \
> +    asm volatile ( _PRE_EFLAGS("[efl]", "[msk]", "[tmp]")               \
> +                   "call *%[stub];"                                     \
> +                   _POST_EFLAGS("[efl]", "[msk]", "[tmp]")              \
> +                   : dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs._eflags) \
> +                   : [stub] "r" (stub.func),                            \
> +                     [msk] "i" (EFLAGS_MASK), ## src );                 \
> +} while (0)
> +
>  /* Fetch next part of the instruction being emulated. */
>  #define insn_fetch_bytes(_size)                                         \
>  ({ unsigned long _x = 0, _ip = state->ip;                               \
> @@ -2295,7 +2305,10 @@ x86_decode(
>                          }
>                      }
>                      else
> +                    {
> +                        ASSERT(op_bytes == 4);
>                          vex.b = 1;
> +                    }
>                      switch ( b )
>                      {
>                      case 0x62:
> @@ -5866,6 +5879,67 @@ x86_emulate(
>          break;
>  #endif
>  
> +    case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
> +    case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
> +    {
> +        uint8_t *buf = get_stub(stub);
> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
> +
> +        host_and_vcpu_must_have(bmi1);
> +        generate_exception_if(vex.l, EXC_UD);

The manual also states #UD if VEX.W is set.

> +
> +        buf[0] = 0xc4;
> +        *pvex = vex;
> +        pvex->b = 1;
> +        pvex->r = 1;
> +        pvex->reg = ~0; /* rAX */
> +        buf[3] = b;
> +        buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
> +        buf[5] = 0xc3;
> +
> +        src.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
> +                                  &_regs, 0);

Given this construct, and several GPR-encoded vex instructions, how
about a decode_vex_gpr() wrapper?

> +        emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" 
> (*src.reg));
> +
> +        put_stub(stub);
> +        break;
> +    }
> +
> +    case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */
> +    {
> +        uint8_t *buf = get_stub(stub);
> +        typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
> +
> +        switch ( modrm_reg & 7 )
> +        {
> +        case 1: /* blsr r,r/m */
> +        case 2: /* blsmsk r,r/m */
> +        case 3: /* blsi r,r/m */
> +            host_and_vcpu_must_have(bmi1);
> +            break;
> +        default:
> +            goto cannot_emulate;
> +        }
> +
> +        generate_exception_if(vex.l, EXC_UD);
> +
> +        buf[0] = 0xc4;
> +        *pvex = vex;
> +        pvex->b = 1;
> +        pvex->r = 1;
> +        pvex->reg = ~0; /* rAX */
> +        buf[3] = b;
> +        buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
> +        buf[5] = 0xc3;
> +
> +        dst.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7),
> +                                  &_regs, 0);
> +        emulate_stub("=&a" (dst.val), "c" (&src.val));
> +
> +        put_stub(stub);
> +        break;
> +    }
> +
>      case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
>      case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
>      {
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -57,6 +57,7 @@
>  #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
>  #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
>  #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
> +#define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
>  #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
>  #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
>  #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)

After trying this out, we clearly need to alter the position on VEX
prefixes.  VEX encoded GPR instructions don't fall within the previous
assumptions made about the dependences of VEX instructions.

~Andrew

diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 6212e4f..d4210d5 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -234,9 +234,11 @@ def crunch_numbers(state):
         XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
                 AVX, MPX, PKU, LWP],
 
-        # AVX is taken to mean hardware support for VEX encoded
instructions,
-        # 256bit registers, and the instructions themselves.  Each of these
-        # subsequent instruction groups may only be VEX encoded.
+        # AVX is taken to mean hardware support for 256bit registers,
and the
+        # instructions themselves.  It does not related to the VEX
prefix (In
+        # particular, most BMI{1,2} instructions may only be VEX
encoded but
+        # operate on GPRs rather than YMM register and can be used without
+        # enabling xstate).
         AVX: [FMA, FMA4, F16C, AVX2, XOP],
 
         # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Reply via email to