The more complicated ones here are d6-d7, e6-e7, f7. The others are trivial.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/tcg/decode-new.c.inc | 39 +++++++++++++ target/i386/tcg/emit.c.inc | 99 +++++++++++++++++++++++++++++--- target/i386/tcg/translate.c | 4 +- 3 files changed, 133 insertions(+), 9 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 0e2da85934..e9a9981a7f 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -242,6 +242,18 @@ static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui } } +static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry movq[4] = { + {}, + X86_OP_ENTRY3(MOVQ, W,x, None, None, V,q, vex5), + X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q), + X86_OP_ENTRY3(MOVq_dq, P,q, None, None, U,q), + }; + + *entry = *decode_by_prefix(s, movq); +} + static const X86OpEntry opcodes_0F38_00toEF[240] = { }; @@ -396,6 +408,33 @@ static const X86OpEntry opcodes_0F[256] = { [0x7e] = X86_OP_GROUP0(0F7E), [0x7f] = X86_OP_GROUP3(0F6F, W,x, None,None, V,x, vex5 mmx p_00_66_f3), + [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), + [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd6] = X86_OP_GROUP0(0FD6), + [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66), /* MOVNTQ/MOVNTDQ */ + + [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe6] = X86_OP_ENTRY2(VCVTpd_dq, V,x, W,x, vex2 p_66_f3_f2), + [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */ + + [0xf0] = X86_OP_ENTRY3(LDDQU, V,x, None,None, M,x, vex4_unal cpuid(SSE3) p_f2), + [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66), + /* Incorrectly missing from 2-17 */ [0xd8] = X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), [0xd9] = X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index fb01035d06..c90f933093 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -403,6 +403,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } HORIZONTAL_FP_SSE(VHADD, hadd) HORIZONTAL_FP_SSE(VHSUB, hsub) +HORIZONTAL_FP_SSE(VADDSUB, addsub) /* * 00 = p* Pq, Qq (if mmx not NULL; no VEX) @@ -462,6 +463,24 @@ BINARY_INT_MMX(PADDSB, paddsb) BINARY_INT_MMX(PADDSW, paddsw) BINARY_INT_MMX(PMAXSW, pmaxsw) +BINARY_INT_MMX(PAVGB, pavgb) +BINARY_INT_MMX(PAVGW, pavgw) +BINARY_INT_MMX(PMADDWD, pmaddwd) +BINARY_INT_MMX(PMULHUW, pmulhuw) +BINARY_INT_MMX(PMULHW, pmulhw) +BINARY_INT_MMX(PMULLW, pmullw) +BINARY_INT_MMX(PMULUDQ, pmuludq) +BINARY_INT_MMX(PSADBW, psadbw) + +BINARY_INT_MMX(PSLLW_r, psllw) +BINARY_INT_MMX(PSLLD_r, pslld) +BINARY_INT_MMX(PSLLQ_r, psllq) +BINARY_INT_MMX(PSRLW_r, psrlw) +BINARY_INT_MMX(PSRLD_r, psrld) +BINARY_INT_MMX(PSRLQ_r, psrlq) +BINARY_INT_MMX(PSRAW_r, psraw) +BINARY_INT_MMX(PSRAD_r, psrad) + /* Instructions with no MMX equivalent. */ #define BINARY_INT_SSE(uname, lname) \ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ @@ -680,6 +699,24 @@ static void gen_EMMS_VZERO(DisasContext *s, CPUX86State *env, X86DecodedInsn *de } } +static void gen_LDDQU(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_load_sse(s, s->T0, decode->op[0].ot, decode->op[0].offset); +} + +static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); + gen_extu(s->aflag, s->A0); + gen_add_A0_ds_seg(s); + + if (s->prefix & PREFIX_DATA) { + gen_helper_maskmov_xmm(cpu_env, s->ptr1, s->ptr2, s->A0); + } else { + gen_helper_maskmov_mmx(cpu_env, s->ptr1, s->ptr2, s->A0); + } +} + static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -756,14 +793,26 @@ static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - int vec_len = sse_vec_len(s, decode); - int lo_ofs = decode->op[0].offset - - xmm_offset(decode->op[0].ot) - + xmm_offset(MO_64); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset); - tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, lo_ofs); + + if (decode->op[0].has_ea) { + gen_op_st_v(s, MO_64, s->tmp1_i64, s->A0); + } else { + int vec_len = sse_vec_len(s, decode); + int lo_ofs = decode->op[0].offset + - xmm_offset(decode->op[0].ot) + + xmm_offset(MO_64); + + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + tcg_gen_st_i64(s->tmp1_i64, cpu_env, lo_ofs); + } +} + +static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_enter_mmx(cpu_env); + /* Otherwise the same as any other movq. */ + return gen_MOVQ(s, env, decode); } static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -816,6 +865,15 @@ static void gen_PADDD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) decode->op[2].offset, vec_len, vec_len); } +static void gen_PADDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = sse_vec_len(s, decode); + + tcg_gen_gvec_add(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->op[2].offset, vec_len, vec_len); +} + static void gen_PAND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { int vec_len = sse_vec_len(s, decode); @@ -906,6 +964,16 @@ static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_helper_pext(s->T0, s->T0, s->T1); } +static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (s->prefix & PREFIX_DATA) { + gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr2); + } else { + gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr2); + } + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); +} + static void gen_POR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { int vec_len = sse_vec_len(s, decode); @@ -1202,6 +1270,23 @@ static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec gen_helper_cvtsd2ss, gen_helper_cvtss2sd); } +static void gen_VCVTpd_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + SSEFunc_0_epp fn = NULL; + switch (sse_prefix(s)) { + case 0x66: + fn = s->vex_l ? gen_helper_cvttpd2dq_ymm : gen_helper_cvttpd2dq_xmm; + break; + case 0xf3: + fn = s->vex_l ? gen_helper_cvtdq2pd_ymm : gen_helper_cvtdq2pd_xmm; + break; + case 0xf2: + fn = s->vex_l ? gen_helper_cvtpd2dq_ymm : gen_helper_cvtpd2dq_xmm; + break; + } + fn(cpu_env, s->ptr0, s->ptr2); +} + static void gen_VCVTps_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { SSEFunc_0_epp fn = NULL; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 45287dfea2..d15e988891 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4668,8 +4668,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) #endif if (use_new && ((b >= 0x150 && b <= 0x17f) || - (b >= 0x1d8 && b <= 0x1ff && (b & 8)))) { - return disas_insn_new(s, cpu, b + 0x100); + (b >= 0x1d0 && b <= 0x1ff))) { + return disas_insn_new(s, cpu, b); } break; case 0xf3: -- 2.37.2