Make these helpers suitable for use with tcg_gen_gvec_* functions. --- target/i386/ops_sse.h | 27 +++++++++++++++++---------- target/i386/ops_sse_header.h | 4 ++-- target/i386/translate.c | 18 ++++++++---------- 3 files changed, 27 insertions(+), 22 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 1661bd7c64..384a835662 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -485,22 +485,29 @@ void glue(helper_pavgw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc) glue(clear_high, SUFFIX)(d, oprsz, maxsz); } -void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmuludq, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc) { - d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0); -#if SHIFT == 1 - d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2); -#endif + const intptr_t oprsz = simd_oprsz(desc); + const intptr_t maxsz = simd_maxsz(desc); + + for (intptr_t i = 0; i * sizeof(uint64_t) < oprsz; ++i) { + const uint64_t t = (uint64_t)a->L(2 * i) * (uint64_t)b->L(2 * i); + d->Q(i) = t; + } + glue(clear_high, SUFFIX)(d, oprsz, maxsz); } -void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmaddwd, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc) { - int i; + const intptr_t oprsz = simd_oprsz(desc); + const intptr_t maxsz = simd_maxsz(desc); - for (i = 0; i < (2 << SHIFT); i++) { - d->L(i) = (int16_t)s->W(2 * i) * (int16_t)d->W(2 * i) + - (int16_t)s->W(2 * i + 1) * (int16_t)d->W(2 * i + 1); + for (intptr_t i = 0; i * sizeof(uint32_t) < oprsz; ++i) { + const int32_t t0 = (int32_t)a->W(2 * i + 0) * (int32_t)b->W(2 * i + 0); + const int32_t t1 = (int32_t)a->W(2 * i + 1) * (int32_t)b->W(2 * i + 1); + d->L(i) = t0 + t1; } + glue(clear_high, SUFFIX)(d, oprsz, maxsz); } #if SHIFT == 0 diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index b5e8aae897..18d39ca649 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -71,8 +71,8 @@ DEF_HELPER_3(glue(pavgusb, SUFFIX), void, env, Reg, Reg) #endif DEF_HELPER_4(glue(pavgw, SUFFIX), void, Reg, Reg, Reg, i32) -DEF_HELPER_3(glue(pmuludq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaddwd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(pmuludq, SUFFIX), void, Reg, Reg, Reg, i32) +DEF_HELPER_4(glue(pmaddwd, SUFFIX), void, Reg, Reg, Reg, i32) DEF_HELPER_3(glue(psadbw, SUFFIX), void, env, Reg, Reg) DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl) diff --git a/target/i386/translate.c b/target/i386/translate.c index 77b2e18f34..55607db09c 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -2806,8 +2806,6 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = { [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq }, [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */ - [0xf4] = MMX_OP2(pmuludq), - [0xf5] = MMX_OP2(pmaddwd), [0xf6] = MMX_OP2(psadbw), [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx, (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */ @@ -6129,10 +6127,10 @@ DEF_GEN_INSN3_GVEC(vpmulhuw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmulhuw DEF_GEN_INSN3_HELPER_EPP(pmuldq, pmuldq_xmm, Vdq, Vdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmuldq, pmuldq_xmm, Vdq, Hdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmuldq, pmuldq_xmm, Vqq, Hqq, Wqq) -DEF_GEN_INSN3_HELPER_EPP(pmuludq, pmuludq_mmx, Pq, Pq, Qq) -DEF_GEN_INSN3_HELPER_EPP(pmuludq, pmuludq_xmm, Vdq, Vdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmuludq, pmuludq_xmm, Vdq, Hdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmuludq, pmuludq_xmm, Vqq, Hqq, Wqq) +DEF_GEN_INSN3_GVEC(pmuludq, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmuludq_mmx) +DEF_GEN_INSN3_GVEC(pmuludq, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmuludq_xmm) +DEF_GEN_INSN3_GVEC(vpmuludq, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmuludq_xmm) +DEF_GEN_INSN3_GVEC(vpmuludq, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmuludq_xmm) DEF_GEN_INSN3_HELPER_EPP(pmulhrsw, pmulhrsw_mmx, Pq, Pq, Qq) DEF_GEN_INSN3_HELPER_EPP(pmulhrsw, pmulhrsw_xmm, Vdq, Vdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmulhrsw, pmulhrsw_xmm, Vdq, Hdq, Wdq) @@ -6147,10 +6145,10 @@ DEF_GEN_INSN3_HELPER_EPP(mulss, mulss, Vd, Vd, Wd) DEF_GEN_INSN3_HELPER_EPP(vmulss, mulss, Vd, Hd, Wd) DEF_GEN_INSN3_HELPER_EPP(mulsd, mulsd, Vq, Vq, Wq) DEF_GEN_INSN3_HELPER_EPP(vmulsd, mulsd, Vq, Hq, Wq) -DEF_GEN_INSN3_HELPER_EPP(pmaddwd, pmaddwd_mmx, Pq, Pq, Qq) -DEF_GEN_INSN3_HELPER_EPP(pmaddwd, pmaddwd_xmm, Vdq, Vdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmaddwd, pmaddwd_xmm, Vdq, Hdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmaddwd, pmaddwd_xmm, Vqq, Hqq, Wqq) +DEF_GEN_INSN3_GVEC(pmaddwd, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmaddwd_mmx) +DEF_GEN_INSN3_GVEC(pmaddwd, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmaddwd_xmm) +DEF_GEN_INSN3_GVEC(vpmaddwd, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmaddwd_xmm) +DEF_GEN_INSN3_GVEC(vpmaddwd, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmaddwd_xmm) DEF_GEN_INSN3_HELPER_EPP(pmaddubsw, pmaddubsw_mmx, Pq, Pq, Qq) DEF_GEN_INSN3_HELPER_EPP(pmaddubsw, pmaddubsw_xmm, Vdq, Vdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmaddubsw, pmaddubsw_xmm, Vdq, Hdq, Wdq) -- 2.20.1