A set of shuffle operations that operate on complete 256 bit registers. The integer and floating point variants have identical semantics.
Signed-off-by: Paul Brook <p...@nowt.org> --- target/i386/ops_sse.h | 73 ++++++++++++++++++++++++++++++++++++ target/i386/ops_sse_header.h | 3 ++ target/i386/tcg/translate.c | 9 +++++ 3 files changed, 85 insertions(+) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 14a2d1bf78..04d2006cd8 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -3407,6 +3407,79 @@ void helper_vzeroupper_hi8(CPUX86State *env) } } #endif + +void helper_vpermdq_ymm(CPUX86State *env, + Reg *d, Reg *v, Reg *s, uint32_t order) +{ + uint64_t r0, r1, r2, r3; + + switch (order & 3) { + case 0: + r0 = v->Q(0); + r1 = v->Q(1); + break; + case 1: + r0 = v->Q(2); + r1 = v->Q(3); + break; + case 2: + r0 = s->Q(0); + r1 = s->Q(1); + break; + case 3: + r0 = s->Q(2); + r1 = s->Q(3); + break; + } + switch ((order >> 4) & 3) { + case 0: + r2 = v->Q(0); + r3 = v->Q(1); + break; + case 1: + r2 = v->Q(2); + r3 = v->Q(3); + break; + case 2: + r2 = s->Q(0); + r3 = s->Q(1); + break; + case 3: + r2 = s->Q(2); + r3 = s->Q(3); + break; + } + d->Q(0) = r0; + d->Q(1) = r1; + d->Q(2) = r2; + d->Q(3) = r3; +} + +void helper_vpermq_ymm(CPUX86State *env, Reg *d, Reg *s, uint32_t order) +{ + uint64_t r0, r1, r2, r3; + r0 = s->Q(order & 3); + r1 = s->Q((order >> 2) & 3); + r2 = s->Q((order >> 4) & 3); + r3 = s->Q((order >> 6) & 3); + d->Q(0) = r0; + d->Q(1) = r1; + d->Q(2) = r2; + d->Q(3) = r3; +} + +void helper_vpermd_ymm(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + uint32_t r[8]; + int i; + + for (i = 0; i < 8; i++) { + r[i] = s->L(v->L(i) & 7); + } + for (i = 0; i < 8; i++) { + d->L(i) = r[i]; + } +} #endif #endif diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index e5d8ea9bb7..099e6e8ffc 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -457,6 +457,9 @@ DEF_HELPER_1(vzeroupper, void, env) DEF_HELPER_1(vzeroall_hi8, void, env) DEF_HELPER_1(vzeroupper_hi8, void, env) #endif +DEF_HELPER_5(vpermdq_ymm, void, env, Reg, Reg, Reg, i32) +DEF_HELPER_4(vpermq_ymm, void, env, Reg, Reg, i32) +DEF_HELPER_4(vpermd_ymm, void, env, Reg, Reg, Reg) #endif #endif diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index fe1ab58d07..5a11d3c083 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3258,6 +3258,8 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = { [0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX), [0x14] = BLENDV_OP(blendvps, SSE41, 0), [0x15] = BLENDV_OP(blendvpd, SSE41, 0), +#define gen_helper_vpermd_xmm NULL + [0x16] = BINARY_OP(vpermd, AVX, SSE_OPF_AVX2), /* vpermps */ [0x17] = CMP_OP(ptest, SSE41), /* TODO:Some vbroadcast variants require AVX2 */ [0x18] = UNARY_OP(vbroadcastl, AVX, SSE_OPF_SCALAR), /* vbroadcastss */ @@ -3287,6 +3289,7 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = { [0x33] = UNARY_OP(pmovzxwd, SSE41, SSE_OPF_MMX), [0x34] = UNARY_OP(pmovzxwq, SSE41, SSE_OPF_MMX), [0x35] = UNARY_OP(pmovzxdq, SSE41, SSE_OPF_MMX), + [0x36] = BINARY_OP(vpermd, AVX, SSE_OPF_AVX2), /* vpermd */ [0x37] = BINARY_OP(pcmpgtq, SSE41, SSE_OPF_MMX), [0x38] = BINARY_OP(pminsb, SSE41, SSE_OPF_MMX), [0x39] = BINARY_OP(pminsd, SSE41, SSE_OPF_MMX), @@ -3329,8 +3332,13 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] = { /* prefix [66] 0f 3a */ static const struct SSEOpHelper_table7 sse_op_table7[256] = { +#define gen_helper_vpermq_xmm NULL + [0x00] = UNARY_OP(vpermq, AVX, SSE_OPF_AVX2), + [0x01] = UNARY_OP(vpermq, AVX, SSE_OPF_AVX2), /* vpermpd */ [0x04] = UNARY_OP(vpermilps_imm, AVX, 0), [0x05] = UNARY_OP(vpermilpd_imm, AVX, 0), +#define gen_helper_vpermdq_xmm NULL + [0x06] = BINARY_OP(vpermdq, AVX, 0), /* vperm2f128 */ [0x08] = UNARY_OP(roundps, SSE41, 0), [0x09] = UNARY_OP(roundpd, SSE41, 0), #define gen_helper_roundss_ymm NULL @@ -3353,6 +3361,7 @@ static const struct SSEOpHelper_table7 sse_op_table7[256] = { [0x41] = BINARY_OP(dppd, SSE41, 0), [0x42] = BINARY_OP(mpsadbw, SSE41, SSE_OPF_MMX), [0x44] = BINARY_OP(pclmulqdq, PCLMULQDQ, 0), + [0x46] = BINARY_OP(vpermdq, AVX, SSE_OPF_AVX2), /* vperm2i128 */ #define gen_helper_pcmpestrm_ymm NULL [0x60] = CMP_OP(pcmpestrm, SSE42), #define gen_helper_pcmpestri_ymm NULL -- 2.36.0