Make these helpers suitable for use with tcg_gen_gvec_* functions. Signed-off-by: Jan Bobek <jan.bo...@gmail.com> --- target/i386/ops_sse.h | 42 ++++++++++++++++++++++++++++++------ target/i386/ops_sse_header.h | 6 +++--- target/i386/translate.c | 27 +++++++++++------------ 3 files changed, 51 insertions(+), 24 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 168e581c0c..6ec116573b 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -412,20 +412,50 @@ static inline int satsw(int x) } } -#define FMULLW(a, b) ((a) * (b)) #define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16) -#define FMULHUW(a, b) ((a) * (b) >> 16) -#define FMULHW(a, b) ((int16_t)(a) * (int16_t)(b) >> 16) #define FAVG(a, b) (((a) + (b) + 1) >> 1) #endif -SSE_HELPER_W(helper_pmullw, FMULLW) +void glue(helper_pmullw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc) +{ + const intptr_t oprsz = simd_oprsz(desc); + const intptr_t maxsz = simd_maxsz(desc); + + for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) { + const uint32_t t = (uint32_t)a->W(i) * (uint32_t)b->W(i); + d->W(i) = t; + } + glue(clear_high, SUFFIX)(d, oprsz, maxsz); +} + #if SHIFT == 0 SSE_HELPER_W(helper_pmulhrw, FMULHRW) #endif -SSE_HELPER_W(helper_pmulhuw, FMULHUW) -SSE_HELPER_W(helper_pmulhw, FMULHW) + +void glue(helper_pmulhuw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc) +{ + const intptr_t oprsz = simd_oprsz(desc); + const intptr_t maxsz = simd_maxsz(desc); + + for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) { + const uint32_t t = (uint32_t)a->W(i) * (uint32_t)b->W(i); + d->W(i) = t >> 16; + } + glue(clear_high, SUFFIX)(d, oprsz, maxsz); +} + +void glue(helper_pmulhw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc) +{ + const intptr_t oprsz = simd_oprsz(desc); + const intptr_t maxsz = simd_maxsz(desc); + + for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) { + const int32_t t = (int32_t)a->W(i) * (int32_t)b->W(i); + d->W(i) = t >> 16; + } + glue(clear_high, SUFFIX)(d, oprsz, maxsz); +} SSE_HELPER_B(helper_pavgb, FAVG) SSE_HELPER_W(helper_pavgw, FAVG) diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 724692a689..7e6411fc82 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -58,12 +58,12 @@ DEF_HELPER_3(glue(pslldqi, SUFFIX), void, Reg, Reg, i32) DEF_HELPER_3(glue(psrldqi, SUFFIX), void, Reg, Reg, i32) #endif -DEF_HELPER_3(glue(pmullw, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(pmullw, SUFFIX), void, Reg, Reg, Reg, i32) #if SHIFT == 0 DEF_HELPER_3(glue(pmulhrw, SUFFIX), void, env, Reg, Reg) #endif -DEF_HELPER_3(glue(pmulhuw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmulhw, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(pmulhuw, SUFFIX), void, Reg, Reg, Reg, i32) +DEF_HELPER_4(glue(pmulhw, SUFFIX), void, Reg, Reg, Reg, i32) DEF_HELPER_3(glue(pavgb, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(pavgw, SUFFIX), void, env, Reg, Reg) diff --git a/target/i386/translate.c b/target/i386/translate.c index 03f7c6e450..79f8c1ddac 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -2801,13 +2801,10 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = { [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */ [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */ [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps }, - [0xd5] = MMX_OP2(pmullw), [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */ [0xe0] = MMX_OP2(pavgb), [0xe3] = MMX_OP2(pavgw), - [0xe4] = MMX_OP2(pmulhuw), - [0xe5] = MMX_OP2(pmulhw), [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq }, [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */ @@ -6116,21 +6113,21 @@ DEF_GEN_INSN3_HELPER_EPP(addsubpd, addsubpd, Vdq, Vdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vaddsubpd, addsubpd, Vdq, Hdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vaddsubpd, addsubpd, Vqq, Hqq, Wqq) -DEF_GEN_INSN3_HELPER_EPP(pmullw, pmullw_mmx, Pq, Pq, Qq) -DEF_GEN_INSN3_HELPER_EPP(pmullw, pmullw_xmm, Vdq, Vdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmullw, pmullw_xmm, Vdq, Hdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmullw, pmullw_xmm, Vqq, Hqq, Wqq) +DEF_GEN_INSN3_GVEC(pmullw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmullw_mmx) +DEF_GEN_INSN3_GVEC(pmullw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmullw_xmm) +DEF_GEN_INSN3_GVEC(vpmullw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmullw_xmm) +DEF_GEN_INSN3_GVEC(vpmullw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmullw_xmm) DEF_GEN_INSN3_HELPER_EPP(pmulld, pmulld_xmm, Vdq, Vdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmulld, pmulld_xmm, Vdq, Hdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmulld, pmulld_xmm, Vqq, Hqq, Wqq) -DEF_GEN_INSN3_HELPER_EPP(pmulhw, pmulhw_mmx, Pq, Pq, Qq) -DEF_GEN_INSN3_HELPER_EPP(pmulhw, pmulhw_xmm, Vdq, Vdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmulhw, pmulhw_xmm, Vdq, Hdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmulhw, pmulhw_xmm, Vqq, Hqq, Wqq) -DEF_GEN_INSN3_HELPER_EPP(pmulhuw, pmulhuw_mmx, Pq, Pq, Qq) -DEF_GEN_INSN3_HELPER_EPP(pmulhuw, pmulhuw_xmm, Vdq, Vdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmulhuw, pmulhuw_xmm, Vdq, Hdq, Wdq) -DEF_GEN_INSN3_HELPER_EPP(vpmulhuw, pmulhuw_xmm, Vqq, Hqq, Wqq) +DEF_GEN_INSN3_GVEC(pmulhw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmulhw_mmx) +DEF_GEN_INSN3_GVEC(pmulhw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmulhw_xmm) +DEF_GEN_INSN3_GVEC(vpmulhw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmulhw_xmm) +DEF_GEN_INSN3_GVEC(vpmulhw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmulhw_xmm) +DEF_GEN_INSN3_GVEC(pmulhuw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmulhuw_mmx) +DEF_GEN_INSN3_GVEC(pmulhuw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmulhuw_xmm) +DEF_GEN_INSN3_GVEC(vpmulhuw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmulhuw_xmm) +DEF_GEN_INSN3_GVEC(vpmulhuw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ, pmulhuw_xmm) DEF_GEN_INSN3_HELPER_EPP(pmuldq, pmuldq_xmm, Vdq, Vdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmuldq, pmuldq_xmm, Vdq, Hdq, Wdq) DEF_GEN_INSN3_HELPER_EPP(vpmuldq, pmuldq_xmm, Vqq, Hqq, Wqq) -- 2.20.1