X86_SPECIAL_ZExtOp0 and X86_SPECIAL_ZExtOp2 are poorly named; they are a hack
that is needed by scalar insertion and extraction instructions, and not really
related to zero extension: for PEXTR the zero extension is done by the 
generation
functions, for PINSR the high bits are not used at all and in fact are *not*
filled with zeroes when loaded into s->T1.

Rename the values to match the effect described in the manual, and explain
better in the comments.

Reviewed-by: Richard Henderson <richard.hender...@linaro.org>
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
 target/i386/tcg/decode-new.c.inc | 16 ++++++++--------
 target/i386/tcg/decode-new.h     | 17 +++++++++++++----
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 5eb2e9d0224..00fdb243857 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -153,8 +153,8 @@
 #define xchg .special = X86_SPECIAL_Locked,
 #define lock .special = X86_SPECIAL_HasLock,
 #define mmx .special = X86_SPECIAL_MMX,
-#define zext0 .special = X86_SPECIAL_ZExtOp0,
-#define zext2 .special = X86_SPECIAL_ZExtOp2,
+#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
+#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
 #define avx_movx .special = X86_SPECIAL_AVXExtMov,
 
 #define vex1 .vex_class = 1,
@@ -632,13 +632,13 @@ static const X86OpEntry opcodes_0F3A[256] = {
     [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 chk(W0) 
cpuid(AVX) p_66),
     [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 chk(W0) 
cpuid(AVX) p_66),
 
-    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) 
zext0 p_66),
-    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) 
zext0 p_66),
+    [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) 
op0_Rd p_66),
+    [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) 
op0_Rd p_66),
     [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) 
p_66),
     [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) 
p_66),
     [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,xh, V,x,  I,b,  vex11 chk(W0) 
cpuid(F16C) p_66),
 
-    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) 
zext2 p_66),
+    [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) 
op2_Ry p_66),
     [0x21] = X86_OP_GROUP0(VINSERTPS),
     [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) 
p_66),
 
@@ -1883,17 +1883,17 @@ static void disas_insn_new(DisasContext *s, CPUState 
*cpu, int b)
     case X86_SPECIAL_HasLock:
         break;
 
-    case X86_SPECIAL_ZExtOp0:
+    case X86_SPECIAL_Op0_Rd:
         assert(decode.op[0].unit == X86_OP_INT);
         if (!decode.op[0].has_ea) {
             decode.op[0].ot = MO_32;
         }
         break;
 
-    case X86_SPECIAL_ZExtOp2:
+    case X86_SPECIAL_Op2_Ry:
         assert(decode.op[2].unit == X86_OP_INT);
         if (!decode.op[2].has_ea) {
-            decode.op[2].ot = MO_32;
+            decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag;
         }
         break;
 
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 611bfddd957..b253f7457ae 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -165,11 +165,20 @@ typedef enum X86InsnSpecial {
     X86_SPECIAL_Locked,
 
     /*
-     * Register operand 0/2 is zero extended to 32 bits.  Rd/Mb or Rd/Mw
-     * in the manual.
+     * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits
+     * (and writeback zero-extends it to 64 bits if applicable).  PREFIX_DATA
+     * does not trigger 16-bit writeback and, as a side effect, high-byte
+     * registers are never used.
      */
-    X86_SPECIAL_ZExtOp0,
-    X86_SPECIAL_ZExtOp2,
+    X86_SPECIAL_Op0_Rd,
+
+    /*
+     * Ry/Mb in the manual (PINSRB).  However, the high bits are never used by
+     * the instruction in either the register or memory cases; the *real* 
effect
+     * of this modifier is that high-byte registers are never used, even 
without
+     * a REX prefix.  Therefore, PINSRW does not need it despite having Ry/Mw.
+     */
+    X86_SPECIAL_Op2_Ry,
 
     /*
      * Register operand 2 is extended to full width, while a memory operand
-- 
2.43.0


Reply via email to