These are more simple integer instructions present in both MMX and SSE/AVX, with no holes that were later occupied by newer instructions.
Simple, non-saturating operations are implemented using gvec; apart from this, there is not much to talk about. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/tcg/decode-new.c.inc | 28 ++++++++ target/i386/tcg/emit.c.inc | 113 +++++++++++++++++++++++++++++++ target/i386/tcg/translate.c | 4 +- 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index f20587c096..59f5637583 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -264,6 +264,34 @@ static const X86OpEntry opcodes_0F[256] = { [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x, vex4 p_66 avx2_256), [0x6e] = X86_OP_ENTRY3(MOVD_to, V,x, None,None, E,y, vex5 mmx p_00_66), /* wrong dest Vy on SDM! */ [0x6f] = X86_OP_GROUP3(0F6F, V,x, None,None, W,x, vex5 mmx p_00_66_f3), + + /* Incorrectly missing from 2-17 */ + [0xd8] = X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd9] = X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xda] = X86_OP_ENTRY3(PMINUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdb] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdc] = X86_OP_ENTRY3(PADDUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdd] = X86_OP_ENTRY3(PADDUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xde] = X86_OP_ENTRY3(PMAXUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdf] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + + [0xe8] = X86_OP_ENTRY3(PSUBSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe9] = X86_OP_ENTRY3(PSUBSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xea] = X86_OP_ENTRY3(PMINSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xeb] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xec] = X86_OP_ENTRY3(PADDSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xed] = X86_OP_ENTRY3(PADDSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xee] = X86_OP_ENTRY3(PMAXSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xef] = X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + + [0xf8] = X86_OP_ENTRY3(PSUBB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf9] = X86_OP_ENTRY3(PSUBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfa] = X86_OP_ENTRY3(PSUBD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfb] = X86_OP_ENTRY3(PSUBQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfc] = X86_OP_ENTRY3(PADDB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfd] = X86_OP_ENTRY3(PADDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfe] = X86_OP_ENTRY3(PADDD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + /* 0xff = UD0 */ }; static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 3f89d3cf50..1ba7a45668 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -290,6 +290,20 @@ BINARY_INT_MMX(PUNPCKHWD, punpckhwd) BINARY_INT_MMX(PUNPCKHDQ, punpckhdq) BINARY_INT_MMX(PACKSSDW, packssdw) +BINARY_INT_MMX(PSUBUSB, psubusb) +BINARY_INT_MMX(PSUBUSW, psubusw) +BINARY_INT_MMX(PMINUB, pminub) +BINARY_INT_MMX(PADDUSB, paddusb) +BINARY_INT_MMX(PADDUSW, paddusw) +BINARY_INT_MMX(PMAXUB, pmaxub) + +BINARY_INT_MMX(PSUBSB, psubsb) +BINARY_INT_MMX(PSUBSW, psubsw) +BINARY_INT_MMX(PMINSW, pminsw) +BINARY_INT_MMX(PADDSB, paddsb) +BINARY_INT_MMX(PADDSW, paddsw) +BINARY_INT_MMX(PMAXSW, pmaxsw) + /* Instructions with no MMX equivalent. */ #define BINARY_INT_SSE(uname, lname) \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ @@ -526,6 +540,51 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } +static void gen_PADDB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_add(MO_8, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PADDW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_add(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PADDD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_add(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PAND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_and(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_andc(MO_64, + decode->op[0].offset, decode->op[2].offset, + decode->op[1].offset, vec_len, vec_len); +} + static void gen_PCMPGTB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { int vec_len = sse_vec_len(s, decode); @@ -571,6 +630,60 @@ static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_helper_pext(s->T0, s->T0, s->T1); } +static void gen_POR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_or(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PXOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_xor(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PSUBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_sub(MO_8, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PSUBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_sub(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PSUBD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_sub(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + +static void gen_PSUBQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_sub(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index cf18e12d38..11c17258eb 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4666,7 +4666,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) #ifndef CONFIG_USER_ONLY use_new &= b <= limit; #endif - if (use_new && (b >= 0x160 && b <= 0x16f)) { + if (use_new && + ((b >= 0x160 && b <= 0x16f) || + (b >= 0x1d8 && b <= 0x1ff && (b & 8)))) { return disas_insn_new(s, cpu, b + 0x100); } break; -- 2.37.2