On 9/12/22 00:04, Paolo Bonzini wrote:
These are mostly floating-point SSE operations. The odd ones out
are MOVMSK and CVTxx2yy, the others are straightforward.
Unary operations are a bit special in AVX because they have 2 operands
for PD/PS operands (VEX.vvvv must be 1111b), and 3 operands for SD/SS.
They are handled using X86_OP_GROUP3 for compactness.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 32 ++++++
target/i386/tcg/emit.c.inc | 175 +++++++++++++++++++++++++++++++
target/i386/tcg/translate.c | 2 +-
3 files changed, 208 insertions(+), 1 deletion(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 59f5637583..5a94e05d71 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -243,7 +243,30 @@ static void decode_0F3A(DisasContext *s, CPUX86State *env,
X86OpEntry *entry, ui
*entry = opcodes_0F3A[*b];
}
+static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
+ entry->op1 = X86_TYPE_None;
+ entry->s1 = X86_SIZE_None;
+ }
+ switch (*b) {
+ case 0x51: entry->gen = gen_VSQRT; break;
+ case 0x52: entry->gen = gen_VRSQRT; break;
+ case 0x53: entry->gen = gen_VRCP; break;
+ case 0x5A: entry->gen = gen_VCVTfp2fp; break;
+ }
+}
I wonder if a .special would be cleaner here, but I guess this isn't horrible.
+ [0x54] = X86_OP_ENTRY3(VAND, V,x, H,x, W,x, vex4 p_00_66),
+ [0x55] = X86_OP_ENTRY3(VANDN, V,x, H,x, W,x, vex4 p_00_66),
+ [0x56] = X86_OP_ENTRY3(VOR, V,x, H,x, W,x, vex4 p_00_66),
+ [0x57] = X86_OP_ENTRY3(VXOR, V,x, H,x, W,x, vex4 p_00_66),
Just reuse PAND et al with a comment? I see there's a define later, but why?
+static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn
*decode)
+{
+ gen_unary_fp_sse(s, env, decode,
+ gen_helper_cvtpd2ps_xmm, gen_helper_cvtps2pd_xmm,
+ gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm,
+ gen_helper_cvtsd2ss, gen_helper_cvtss2sd);
+}
+
+static void gen_VCVTps_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn
*decode)
+{
+ SSEFunc_0_epp fn = NULL;
+ switch (sse_prefix(s)) {
+ case 0x00:
+ fn = s->vex_l ? gen_helper_cvtdq2ps_ymm : gen_helper_cvtdq2ps_xmm;
+ break;
+ case 0x66:
+ fn = s->vex_l ? gen_helper_cvtps2dq_ymm : gen_helper_cvtps2dq_xmm;
+ break;
+ case 0xf3:
+ fn = s->vex_l ? gen_helper_cvttps2dq_ymm : gen_helper_cvttps2dq_xmm;
+ break;
+ }
+ fn(cpu_env, s->ptr0, s->ptr2);
+}
Only use of sse_prefix? We directly look at prefix bits elsewhere...
Also, while these are all coverts, it doesn't seem conceptually different from
decode_group_twobyte_6F. Why are we waiting until generation in this case?
r~