[Qemu-devel] [RFC PATCH v3 23/46] target/i386: introduce operand for direct-only r/m field

2019-08-14 Thread Jan Bobek
Many operands can only decode successfully if the ModR/M byte has the
direct form (i.e. MOD=3). Capture this common aspect by introducing a
special direct-only r/m field operand.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index e4515e81df..c918065b96 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4822,6 +4822,43 @@ INSNOP_FINALIZE(modrm_rm)
 {
 }
 
+/*
+ * modrm_rm_direct
+ *
+ * Equivalent of modrm_rm, but only decodes successfully if
+ * the ModR/M byte has the direct form (i.e. MOD=3).
+ */
+typedef insnop_arg_t(modrm_rm) insnop_arg_t(modrm_rm_direct);
+typedef struct {
+insnop_ctxt_t(modrm_rm) rm;
+} insnop_ctxt_t(modrm_rm_direct);
+
+INSNOP_INIT(modrm_rm_direct)
+{
+int ret;
+insnop_ctxt_t(modrm_mod) modctxt;
+
+ret = insnop_init(modrm_mod)(, env, s, modrm, 0);
+if (!ret) {
+const int mod = insnop_prepare(modrm_mod)(, env, s, modrm, 0);
+if (mod == 3) {
+ret = insnop_init(modrm_rm)(>rm, env, s, modrm, is_write);
+} else {
+ret = 1;
+}
+insnop_finalize(modrm_mod)(, env, s, modrm, 0, mod);
+}
+return ret;
+}
+INSNOP_PREPARE(modrm_rm_direct)
+{
+return insnop_prepare(modrm_rm)(>rm, env, s, modrm, is_write);
+}
+INSNOP_FINALIZE(modrm_rm_direct)
+{
+insnop_finalize(modrm_rm)(>rm, env, s, modrm, is_write, arg);
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 16/46] target/i386: introduce instruction operand infrastructure

2019-08-14 Thread Jan Bobek
insnop_arg_t, insnop_ctxt_t and init, prepare and finalize functions
form the basis of instruction operand decoding. Introduce macros for
defining a generic instruction operand; use cases for operand decoding
will be introduced later.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 0cffa2226b..9d00b36406 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4548,6 +4548,34 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
CkCpuidFeat feat)
 }
 }
 
+/*
+ * Instruction operand
+ */
+#define insnop_arg_t(opT)insnop_ ## opT ## _arg_t
+#define insnop_ctxt_t(opT)   insnop_ ## opT ## _ctxt_t
+#define insnop_init(opT) insnop_ ## opT ## _init
+#define insnop_prepare(opT)  insnop_ ## opT ## _prepare
+#define insnop_finalize(opT) insnop_ ## opT ## _finalize
+
+#define INSNOP_INIT(opT)\
+static int insnop_init(opT)(insnop_ctxt_t(opT) *ctxt,   \
+CPUX86State *env,   \
+DisasContext *s,\
+int modrm, bool is_write)
+
+#define INSNOP_PREPARE(opT) \
+static insnop_arg_t(opT) insnop_prepare(opT)(insnop_ctxt_t(opT) *ctxt, \
+ CPUX86State *env,  \
+ DisasContext *s,   \
+ int modrm, bool is_write)
+
+#define INSNOP_FINALIZE(opT)\
+static void insnop_finalize(opT)(insnop_ctxt_t(opT) *ctxt,  \
+ CPUX86State *env,  \
+ DisasContext *s,   \
+ int modrm, bool is_write,  \
+ insnop_arg_t(opT) arg)
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 18/46] target/i386: introduce generic either-or operand

2019-08-14 Thread Jan Bobek
The either-or operand attempts to decode one operand, and if it fails,
it falls back to a second operand. It is unifying, meaning that
insnop_arg_t of the second operand must be implicitly castable to
insnop_arg_t of the first operand.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 46 +
 1 file changed, 46 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 8989e6504c..a0b883c680 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4596,6 +4596,52 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
CkCpuidFeat feat)
 insnop_finalize(opT2)(ctxt, env, s, modrm, is_write, arg);  \
 }
 
+/*
+ * Generic unifying either-or operand
+ */
+#define DEF_INSNOP_EITHER(opT, opT1, opT2)  \
+typedef insnop_arg_t(opT1) insnop_arg_t(opT);   \
+typedef struct {\
+bool is_ ## opT1;   \
+union { \
+insnop_ctxt_t(opT1) ctxt_ ## opT1;  \
+insnop_ctxt_t(opT2) ctxt_ ## opT2;  \
+};  \
+} insnop_ctxt_t(opT);   \
+\
+INSNOP_INIT(opT)\
+{   \
+int ret = insnop_init(opT1)(>ctxt_ ## opT1,   \
+env, s, modrm, is_write);   \
+if (!ret) { \
+ctxt->is_ ## opT1 = 1;  \
+return 0;   \
+}   \
+ret = insnop_init(opT2)(>ctxt_ ## opT2,   \
+env, s, modrm, is_write);   \
+if (!ret) { \
+ctxt->is_ ## opT1 = 0;  \
+return 0;   \
+}   \
+return ret; \
+}   \
+INSNOP_PREPARE(opT) \
+{   \
+return (ctxt->is_ ## opT1   \
+? insnop_prepare(opT1)(>ctxt_ ## opT1,\
+   env, s, modrm, is_write) \
+: insnop_prepare(opT2)(>ctxt_ ## opT2,\
+   env, s, modrm, is_write));   \
+}   \
+INSNOP_FINALIZE(opT)\
+{   \
+(ctxt->is_ ## opT1  \
+ ? insnop_finalize(opT1)(>ctxt_ ## opT1,  \
+ env, s, modrm, is_write, arg)  \
+ : insnop_finalize(opT2)(>ctxt_ ## opT2,  \
+ env, s, modrm, is_write, arg));\
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 21/46] target/i386: introduce modrm operand

2019-08-14 Thread Jan Bobek
This permits the ModR/M byte to be passed raw into the code generator,
effectively allowing to short-circuit the operand decoding mechanism
and do the decoding work manually in the code generator.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 7fc5149d29..25c25a30fb 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4740,6 +4740,26 @@ INSNOP_FINALIZE(tcg_temp_i64)
 tcg_temp_free_i64(arg);
 }
 
+/*
+ * modrm
+ *
+ * Operand whose value is the ModR/M byte.
+ */
+typedef int insnop_arg_t(modrm);
+typedef struct {} insnop_ctxt_t(modrm);
+
+INSNOP_INIT(modrm)
+{
+return 0;
+}
+INSNOP_PREPARE(modrm)
+{
+return modrm;
+}
+INSNOP_FINALIZE(modrm)
+{
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 46/46] target/i386: introduce SSE3 instructions to sse-opcode.inc.h

2019-08-14 Thread Jan Bobek
Add all the SSE3 instruction entries to sse-opcode.inc.h.

Signed-off-by: Jan Bobek 
---
 target/i386/sse-opcode.inc.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/target/i386/sse-opcode.inc.h b/target/i386/sse-opcode.inc.h
index efa67b7ce2..0cfe6fbe31 100644
--- a/target/i386/sse-opcode.inc.h
+++ b/target/i386/sse-opcode.inc.h
@@ -133,6 +133,14 @@ OPCODE(movmskps, LEG(NP, 0F, 1, 0x50), SSE, WR, Gq, Udq)
 OPCODE(movmskpd, LEG(66, 0F, 0, 0x50), SSE2, WR, Gd, Udq)
 /* 66 REX.W 0F 50 /r: MOVMSKPD r64, xmm */
 OPCODE(movmskpd, LEG(66, 0F, 1, 0x50), SSE2, WR, Gq, Udq)
+/* F2 0F F0 /r: LDDQU xmm1, m128 */
+OPCODE(lddqu, LEG(F2, 0F, 0, 0xf0), SSE3, WR, Vdq, Mdq)
+/* F3 0F 16 /r: MOVSHDUP xmm1, xmm2/m128 */
+OPCODE(movshdup, LEG(F3, 0F, 0, 0x16), SSE3, WR, Vdq, Wdq)
+/* F3 0F 12 /r: MOVSLDUP xmm1, xmm2/m128 */
+OPCODE(movsldup, LEG(F3, 0F, 0, 0x12), SSE3, WR, Vdq, Wdq)
+/* F2 0F 12 /r: MOVDDUP xmm1, xmm2/m64 */
+OPCODE(movddup, LEG(F2, 0F, 0, 0x12), SSE3, WR, Vdq, Wq)
 /* NP 0F FC /r: PADDB mm, mm/m64 */
 OPCODE(paddb, LEG(NP, 0F, 0, 0xfc), MMX, WRR, Pq, Pq, Qq)
 /* 66 0F FC /r: PADDB xmm1, xmm2/m128 */
@@ -173,6 +181,10 @@ OPCODE(addpd, LEG(66, 0F, 0, 0x58), SSE2, WRR, Vdq, Vdq, 
Wdq)
 OPCODE(addss, LEG(F3, 0F, 0, 0x58), SSE, WRR, Vd, Vd, Wd)
 /* F2 0F 58 /r: ADDSD xmm1, xmm2/m64 */
 OPCODE(addsd, LEG(F2, 0F, 0, 0x58), SSE2, WRR, Vq, Vq, Wq)
+/* F2 0F 7C /r: HADDPS xmm1, xmm2/m128 */
+OPCODE(haddps, LEG(F2, 0F, 0, 0x7c), SSE3, WRR, Vdq, Vdq, Wdq)
+/* 66 0F 7C /r: HADDPD xmm1, xmm2/m128 */
+OPCODE(haddpd, LEG(66, 0F, 0, 0x7c), SSE3, WRR, Vdq, Vdq, Wdq)
 /* NP 0F F8 /r: PSUBB mm, mm/m64 */
 OPCODE(psubb, LEG(NP, 0F, 0, 0xf8), MMX, WRR, Pq, Pq, Qq)
 /* 66 0F F8 /r: PSUBB xmm1, xmm2/m128 */
@@ -213,6 +225,14 @@ OPCODE(subpd, LEG(66, 0F, 0, 0x5c), SSE2, WRR, Vdq, Vdq, 
Wdq)
 OPCODE(subss, LEG(F3, 0F, 0, 0x5c), SSE, WRR, Vd, Vd, Wd)
 /* F2 0F 5C /r: SUBSD xmm1, xmm2/m64 */
 OPCODE(subsd, LEG(F2, 0F, 0, 0x5c), SSE2, WRR, Vq, Vq, Wq)
+/* F2 0F 7D /r: HSUBPS xmm1, xmm2/m128 */
+OPCODE(hsubps, LEG(F2, 0F, 0, 0x7d), SSE3, WRR, Vdq, Vdq, Wdq)
+/* 66 0F 7D /r: HSUBPD xmm1, xmm2/m128 */
+OPCODE(hsubpd, LEG(66, 0F, 0, 0x7d), SSE3, WRR, Vdq, Vdq, Wdq)
+/* F2 0F D0 /r: ADDSUBPS xmm1, xmm2/m128 */
+OPCODE(addsubps, LEG(F2, 0F, 0, 0xd0), SSE3, WRR, Vdq, Vdq, Wdq)
+/* 66 0F D0 /r: ADDSUBPD xmm1, xmm2/m128 */
+OPCODE(addsubpd, LEG(66, 0F, 0, 0xd0), SSE3, WRR, Vdq, Vdq, Wdq)
 /* NP 0F D5 /r: PMULLW mm, mm/m64 */
 OPCODE(pmullw, LEG(NP, 0F, 0, 0xd5), MMX, WRR, Pq, Pq, Qq)
 /* 66 0F D5 /r: PMULLW xmm1, xmm2/m128 */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 06/46] target/i386: Simplify gen_exception arguments

2019-08-14 Thread Jan Bobek
From: Richard Henderson 

We can compute cur_eip from values present within DisasContext.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 89 -
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 40a4844b64..7532d65778 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -1272,10 +1272,10 @@ static void gen_helper_fp_arith_STN_ST0(int op, int 
opreg)
 }
 }
 
-static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
+static void gen_exception(DisasContext *s, int trapno)
 {
 gen_update_cc_op(s);
-gen_jmp_im(s, cur_eip);
+gen_jmp_im(s, s->pc_start - s->cs_base);
 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
 s->base.is_jmp = DISAS_NORETURN;
 }
@@ -1284,7 +1284,7 @@ static void gen_exception(DisasContext *s, int trapno, 
target_ulong cur_eip)
the instruction is known, but it isn't allowed in the current cpu mode.  */
 static void gen_illegal_opcode(DisasContext *s)
 {
-gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
+gen_exception(s, EXCP06_ILLOP);
 }
 
 /* if d == OR_TMP0, it means memory operand (address in A0) */
@@ -3040,8 +3040,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 [0xdf] = AESNI_OP(aeskeygenassist),
 };
 
-static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-target_ulong pc_start)
+static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
@@ -3076,7 +3075,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 /* simple MMX/SSE operation */
 if (s->flags & HF_TS_MASK) {
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 return;
 }
 if (s->flags & HF_EM_MASK) {
@@ -4515,7 +4514,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->vex_l = 0;
 s->vex_v = 0;
 if (sigsetjmp(s->jmpbuf, 0) != 0) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 return s->pc;
 }
 
@@ -5854,7 +5853,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
 /* XXX: what to do if illegal op ? */
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 break;
 }
 modrm = x86_ldub_code(env, s);
@@ -6572,7 +6571,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 set_cc_op(s, CC_OP_EFLAGS);
 } else if (s->vm86) {
 if (s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag - 1));
 set_cc_op(s, CC_OP_EFLAGS);
@@ -6694,7 +6693,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9c: /* pushf */
 gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
 if (s->vm86 && s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 gen_update_cc_op(s);
 gen_helper_read_eflags(s->T0, cpu_env);
@@ -6704,7 +6703,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9d: /* popf */
 gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
 if (s->vm86 && s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 ot = gen_pop_T0(s);
 if (s->cpl == 0) {
@@ -7021,7 +7020,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 goto illegal_op;
 val = x86_ldub_code(env, s);
 if (val == 0) {
-gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+gen_exception(s, EXCP00_DIVZ);
 } else {
 gen_helper_aam(cpu_env, tcg_const_i32(val));
 set_cc_op(s, CC_OP_LOGICB);
@@ -7055,7 +7054,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9b: /* fwait */
 if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
 (HF_MP_MASK | HF_TS_MASK)) {
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 } else {
 gen_helper_fwait(cpu_env);
 }
@@ -7066,7 +7065,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xcd: /* int N */
 val = x86_ldub_code(env, s);
 if (s->vm86 && s->iopl != 3) {
-

[Qemu-devel] [RFC PATCH v3 15/46] target/i386: introduce function ck_cpuid

2019-08-14 Thread Jan Bobek
Introduce a helper function to take care of instruction CPUID checks.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 48 +
 1 file changed, 48 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 6296a02991..0cffa2226b 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4500,6 +4500,54 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 #define tcg_gen_gvec_cmpgt(vece, dofs, aofs, bofs, oprsz, maxsz)\
 tcg_gen_gvec_cmp(TCG_COND_GT, vece, dofs, aofs, bofs, oprsz, maxsz)
 
+typedef enum {
+CK_CPUID_MMX = 1,
+CK_CPUID_3DNOW,
+CK_CPUID_SSE,
+CK_CPUID_SSE2,
+CK_CPUID_CLFLUSH,
+CK_CPUID_SSE3,
+CK_CPUID_SSSE3,
+CK_CPUID_SSE4_1,
+CK_CPUID_SSE4_2,
+CK_CPUID_SSE4A,
+CK_CPUID_AVX,
+CK_CPUID_AVX2,
+} CkCpuidFeat;
+
+static int ck_cpuid(CPUX86State *env, DisasContext *s, CkCpuidFeat feat)
+{
+switch (feat) {
+case CK_CPUID_MMX:
+return !(s->cpuid_features & CPUID_MMX)
+|| !(s->cpuid_ext2_features & CPUID_EXT2_MMX);
+case CK_CPUID_3DNOW:
+return !(s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
+case CK_CPUID_SSE:
+return !(s->cpuid_features & CPUID_SSE);
+case CK_CPUID_SSE2:
+return !(s->cpuid_features & CPUID_SSE2);
+case CK_CPUID_CLFLUSH:
+return !(s->cpuid_features & CPUID_CLFLUSH);
+case CK_CPUID_SSE3:
+return !(s->cpuid_ext_features & CPUID_EXT_SSE3);
+case CK_CPUID_SSSE3:
+return !(s->cpuid_ext_features & CPUID_EXT_SSSE3);
+case CK_CPUID_SSE4_1:
+return !(s->cpuid_ext_features & CPUID_EXT_SSE41);
+case CK_CPUID_SSE4_2:
+return !(s->cpuid_ext_features & CPUID_EXT_SSE42);
+case CK_CPUID_SSE4A:
+return !(s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
+case CK_CPUID_AVX:
+return !(s->cpuid_ext_features & CPUID_EXT_AVX);
+case CK_CPUID_AVX2:
+return !(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
+default:
+g_assert_not_reached();
+}
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 45/46] target/i386: introduce SSE3 code generators

2019-08-14 Thread Jan Bobek
Introduce code generators required by SSE3 instructions.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 64 +
 1 file changed, 64 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c72138014a..9da3fbb611 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -5627,6 +5627,63 @@ GEN_INSN2(movmskpd, Gq, Udq)
 tcg_temp_free_i32(arg1_r32);
 }
 
+GEN_INSN2(lddqu, Vdq, Mdq)
+{
+assert(arg2 == s->A0);
+gen_ldo_env_A0(s, arg1);
+}
+
+GEN_INSN2(movshdup, Vdq, Wdq)
+{
+const TCGv_i32 r32 = tcg_temp_new_i32();
+
+tcg_gen_ld_i32(r32, cpu_env, arg2 + offsetof(ZMMReg, ZMM_L(1)));
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(0)));
+if (arg1 != arg2) {
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(1)));
+}
+
+tcg_gen_ld_i32(r32, cpu_env, arg2 + offsetof(ZMMReg, ZMM_L(3)));
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(2)));
+if (arg1 != arg2) {
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(3)));
+}
+
+tcg_temp_free_i32(r32);
+}
+
+GEN_INSN2(movsldup, Vdq, Wdq)
+{
+const TCGv_i32 r32 = tcg_temp_new_i32();
+
+tcg_gen_ld_i32(r32, cpu_env, arg2 + offsetof(ZMMReg, ZMM_L(0)));
+if (arg1 != arg2) {
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(0)));
+}
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(1)));
+
+tcg_gen_ld_i32(r32, cpu_env, arg2 + offsetof(ZMMReg, ZMM_L(2)));
+if (arg1 != arg2) {
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(2)));
+}
+tcg_gen_st_i32(r32, cpu_env, arg1 + offsetof(ZMMReg, ZMM_L(3)));
+
+tcg_temp_free_i32(r32);
+}
+
+GEN_INSN2(movddup, Vdq, Wq)
+{
+const TCGv_i64 r64 = tcg_temp_new_i64();
+
+tcg_gen_ld_i64(r64, cpu_env, arg2 + offsetof(ZMMReg, ZMM_Q(0)));
+if (arg1 != arg2) {
+tcg_gen_st_i64(r64, cpu_env, arg1 + offsetof(ZMMReg, ZMM_Q(0)));
+}
+tcg_gen_st_i64(r64, cpu_env, arg1 + offsetof(ZMMReg, ZMM_Q(1)));
+
+tcg_temp_free_i64(r64);
+}
+
 DEF_GEN_INSN3_GVEC_MM(paddb, add, Pq, Pq, Qq, MO_8)
 DEF_GEN_INSN3_GVEC_XMM(paddb, add, Vdq, Vdq, Wdq, MO_8)
 DEF_GEN_INSN3_GVEC_MM(paddw, add, Pq, Pq, Qq, MO_16)
@@ -5647,6 +5704,8 @@ DEF_GEN_INSN3_HELPER_EPP(addps, addps, Vdq, Vdq, Wdq)
 DEF_GEN_INSN3_HELPER_EPP(addss, addss, Vd, Vd, Wd)
 DEF_GEN_INSN3_HELPER_EPP(addpd, addpd, Vdq, Vdq, Wdq)
 DEF_GEN_INSN3_HELPER_EPP(addsd, addsd, Vq, Vq, Wq)
+DEF_GEN_INSN3_HELPER_EPP(haddps, haddps, Vdq, Vdq, Wdq)
+DEF_GEN_INSN3_HELPER_EPP(haddpd, haddpd, Vdq, Vdq, Wdq)
 
 DEF_GEN_INSN3_GVEC_MM(psubb, sub, Pq, Pq, Qq, MO_8)
 DEF_GEN_INSN3_GVEC_XMM(psubb, sub, Vdq, Vdq, Wdq, MO_8)
@@ -5668,6 +5727,11 @@ DEF_GEN_INSN3_HELPER_EPP(subps, subps, Vdq, Vdq, Wdq)
 DEF_GEN_INSN3_HELPER_EPP(subpd, subpd, Vdq, Vdq, Wdq)
 DEF_GEN_INSN3_HELPER_EPP(subss, subss, Vd, Vd, Wd)
 DEF_GEN_INSN3_HELPER_EPP(subsd, subsd, Vq, Vq, Wq)
+DEF_GEN_INSN3_HELPER_EPP(hsubps, hsubps, Vdq, Vdq, Wdq)
+DEF_GEN_INSN3_HELPER_EPP(hsubpd, hsubpd, Vdq, Vdq, Wdq)
+
+DEF_GEN_INSN3_HELPER_EPP(addsubps, addsubps, Vdq, Vdq, Wdq)
+DEF_GEN_INSN3_HELPER_EPP(addsubpd, addsubpd, Vdq, Vdq, Wdq)
 
 DEF_GEN_INSN3_HELPER_EPP(pmullw, pmullw_mmx, Pq, Pq, Qq)
 DEF_GEN_INSN3_HELPER_EPP(pmullw, pmullw_xmm, Vdq, Vdq, Wdq)
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 20/46] target/i386: introduce tcg_temp operands

2019-08-14 Thread Jan Bobek
TCG temporary operands allocate a 32-bit or 64-bit TCG temporary, and
later automatically free it.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 44 +
 1 file changed, 44 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 99f46be34e..7fc5149d29 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4696,6 +4696,50 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
CkCpuidFeat feat)
 insnop_finalize(opTarg)(>arg, env, s, modrm, is_write, arg); \
 }
 
+/*
+ * tcg_temp_i32
+ *
+ * Operand which allocates a 32-bit TCG temporary and frees it
+ * automatically after use.
+ */
+typedef TCGv_i32 insnop_arg_t(tcg_temp_i32);
+typedef struct {} insnop_ctxt_t(tcg_temp_i32);
+
+INSNOP_INIT(tcg_temp_i32)
+{
+return 0;
+}
+INSNOP_PREPARE(tcg_temp_i32)
+{
+return tcg_temp_new_i32();
+}
+INSNOP_FINALIZE(tcg_temp_i32)
+{
+tcg_temp_free_i32(arg);
+}
+
+/*
+ * tcg_temp_i64
+ *
+ * Operand which allocates a 64-bit TCG temporary and frees it
+ * automatically after use.
+ */
+typedef TCGv_i64 insnop_arg_t(tcg_temp_i64);
+typedef struct {} insnop_ctxt_t(tcg_temp_i64);
+
+INSNOP_INIT(tcg_temp_i64)
+{
+return 0;
+}
+INSNOP_PREPARE(tcg_temp_i64)
+{
+return tcg_temp_new_i64();
+}
+INSNOP_FINALIZE(tcg_temp_i64)
+{
+tcg_temp_free_i64(arg);
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 05/46] target/i386: use prefix from DisasContext

2019-08-14 Thread Jan Bobek
Reduce scope of the local variable prefixes to enforce use of prefix
from DisasContext instead.

Suggested-by: Richard Henderson 
Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 113 
 1 file changed, 57 insertions(+), 56 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index bb13877df7..40a4844b64 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4491,7 +4491,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 {
 CPUX86State *env = cpu->env_ptr;
-int b, prefixes;
+int b;
 int shift;
 TCGMemOp ot;
 int modrm, reg, rm, mod, op, opreg, val;
@@ -4499,6 +4499,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 target_ulong pc_start = s->base.pc_next;
 
 {
+int prefixes;
 TCGMemOp aflag, dflag;
 
 s->pc_start = s->pc = pc_start;
@@ -6356,7 +6357,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa4: /* movsS */
 case 0xa5:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_movs(s, ot);
@@ -6366,7 +6367,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xaa: /* stosS */
 case 0xab:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_stos(s, ot);
@@ -6375,7 +6376,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xac: /* lodsS */
 case 0xad:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_lods(s, ot);
@@ -6384,9 +6385,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xae: /* scasS */
 case 0xaf:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & PREFIX_REPNZ) {
+if (s->prefix & PREFIX_REPNZ) {
 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
-} else if (prefixes & PREFIX_REPZ) {
+} else if (s->prefix & PREFIX_REPZ) {
 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
 } else {
 gen_scas(s, ot);
@@ -6396,9 +6397,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa6: /* cmpsS */
 case 0xa7:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & PREFIX_REPNZ) {
+if (s->prefix & PREFIX_REPNZ) {
 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
-} else if (prefixes & PREFIX_REPZ) {
+} else if (s->prefix & PREFIX_REPZ) {
 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
 } else {
 gen_cmps(s, ot);
@@ -6409,8 +6410,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
 gen_check_io(s, ot, pc_start - s->cs_base, 
- SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ SVM_IOIO_TYPE_MASK | svm_is_rep(s->prefix) | 4);
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_ins(s, ot);
@@ -6424,8 +6425,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
 gen_check_io(s, ot, pc_start - s->cs_base,
- svm_is_rep(prefixes) | 4);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ svm_is_rep(s->prefix) | 4);
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_outs(s, ot);
@@ -6444,7 +6445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 val = x86_ldub_code(env, s);
 tcg_gen_movi_tl(s->T0, val);
 gen_check_io(s, ot, pc_start - s->cs_base,
- SVM_IOIO_TYPE_MASK | svm_is_rep(pref

[Qemu-devel] [RFC PATCH v3 35/46] target/i386: introduce MMX translators

2019-08-14 Thread Jan Bobek
Use the translator macros to define instruction translators required
by MMX instructions.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 76c27d0380..4fecb0d240 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -5457,6 +5457,15 @@ static void translate_insn0()(
 }   \
 }
 
+DEF_TRANSLATE_INSN2(Ed, Pq)
+DEF_TRANSLATE_INSN2(Eq, Pq)
+DEF_TRANSLATE_INSN2(Gd, Nq)
+DEF_TRANSLATE_INSN2(Gq, Nq)
+DEF_TRANSLATE_INSN2(Pq, Ed)
+DEF_TRANSLATE_INSN2(Pq, Eq)
+DEF_TRANSLATE_INSN2(Pq, Qq)
+DEF_TRANSLATE_INSN2(Qq, Pq)
+
 #define DEF_TRANSLATE_INSN3(opT1, opT2, opT3)   \
 static void translate_insn3(opT1, opT2, opT3)(  \
 CPUX86State *env, DisasContext *s, int modrm,   \
@@ -5501,6 +5510,13 @@ static void translate_insn0()(
 }   \
 }
 
+DEF_TRANSLATE_INSN3(Gd, Nq, Ib)
+DEF_TRANSLATE_INSN3(Gq, Nq, Ib)
+DEF_TRANSLATE_INSN3(Nq, Nq, Ib)
+DEF_TRANSLATE_INSN3(Pq, Pq, Qd)
+DEF_TRANSLATE_INSN3(Pq, Pq, Qq)
+DEF_TRANSLATE_INSN3(Pq, Qq, Ib)
+
 #define DEF_TRANSLATE_INSN4(opT1, opT2, opT3, opT4) \
 static void translate_insn4(opT1, opT2, opT3, opT4)(\
 CPUX86State *env, DisasContext *s, int modrm,   \
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 17/46] target/i386: introduce generic operand alias

2019-08-14 Thread Jan Bobek
It turns out it is useful to be able to declare operand name
aliases. Introduce a macro to capture this functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 9d00b36406..8989e6504c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4576,6 +4576,26 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
CkCpuidFeat feat)
  int modrm, bool is_write,  \
  insnop_arg_t(opT) arg)
 
+/*
+ * Operand alias
+ */
+#define DEF_INSNOP_ALIAS(opT, opT2) \
+typedef insnop_arg_t(opT2) insnop_arg_t(opT);   \
+typedef insnop_ctxt_t(opT2) insnop_ctxt_t(opT); \
+\
+INSNOP_INIT(opT)\
+{   \
+return insnop_init(opT2)(ctxt, env, s, modrm, is_write);\
+}   \
+INSNOP_PREPARE(opT) \
+{   \
+return insnop_prepare(opT2)(ctxt, env, s, modrm, is_write); \
+}   \
+INSNOP_FINALIZE(opT)\
+{   \
+insnop_finalize(opT2)(ctxt, env, s, modrm, is_write, arg);  \
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 11/46] target/i386: introduce gen_(ld, st)d_env_A0

2019-08-14 Thread Jan Bobek
Similar in spirit to the already present gen_(ld,st)(q,o)_env_A0, it
will prove useful in later commits for smaller-sized vector loads.

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c5ec309fe2..258351fce3 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2652,6 +2652,18 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
 gen_jmp_tb(s, eip, 0);
 }
 
+static inline void gen_ldd_env_A0(DisasContext *s, int offset)
+{
+tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+tcg_gen_st_i32(s->tmp2_i32, cpu_env, offset);
+}
+
+static inline void gen_std_env_A0(DisasContext *s, int offset)
+{
+tcg_gen_ld_i32(s->tmp2_i32, cpu_env, offset);
+tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+}
+
 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 09/46] target/i386: make variable is_xmm const

2019-08-14 Thread Jan Bobek
The variable is_xmm does not change value after assignment, so make
this fact explicit by marking it const.

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 8bf39b73c4..c5ec309fe2 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3042,7 +3042,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
-int op1_offset, op2_offset, is_xmm, val;
+int op1_offset, op2_offset, val;
 int modrm, mod, rm, reg;
 SSEFunc_0_epp sse_fn_epp;
 SSEFunc_0_eppi sse_fn_eppi;
@@ -3056,20 +3056,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 : s->prefix & PREFIX_REPZ ? 2
 : s->prefix & PREFIX_REPNZ ? 3
 : 0;
+const int is_xmm =
+(0x10 <= b && b <= 0x5f)
+|| b == 0xc6
+|| b == 0xc2
+|| !!b1;
 sse_fn_epp = sse_op_table1[b][b1];
 if (!sse_fn_epp) {
 goto unknown_op;
 }
-if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
-is_xmm = 1;
-} else {
-if (b1 == 0) {
-/* MMX case */
-is_xmm = 0;
-} else {
-is_xmm = 1;
-}
-}
 /* simple MMX/SSE operation */
 if (s->flags & HF_TS_MASK) {
 gen_exception(s, EXCP07_PREX);
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 14/46] target/i386: introduce mnemonic aliases for several gvec operations

2019-08-14 Thread Jan Bobek
It is helpful to introduce aliases for some general gvec operations as
it makes a couple of instruction code generators simpler (added
later).

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index e9741cd7f7..6296a02991 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4493,6 +4493,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
 
+#define tcg_gen_gvec_andn(vece, dofs, aofs, bofs, oprsz, maxsz) \
+tcg_gen_gvec_andc(vece, dofs, bofs, aofs, oprsz, maxsz)
+#define tcg_gen_gvec_cmpeq(vece, dofs, aofs, bofs, oprsz, maxsz)\
+tcg_gen_gvec_cmp(TCG_COND_EQ, vece, dofs, aofs, bofs, oprsz, maxsz)
+#define tcg_gen_gvec_cmpgt(vece, dofs, aofs, bofs, oprsz, maxsz)\
+tcg_gen_gvec_cmp(TCG_COND_GT, vece, dofs, aofs, bofs, oprsz, maxsz)
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 22/46] target/i386: introduce operands for decoding modrm fields

2019-08-14 Thread Jan Bobek
The old code uses bitshifts and bitwise-and all over the place for
decoding ModR/M fields. Avoid doing that by introducing proper
decoding operands.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 62 +
 1 file changed, 62 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 25c25a30fb..e4515e81df 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4760,6 +4760,68 @@ INSNOP_FINALIZE(modrm)
 {
 }
 
+/*
+ * modrm_mod
+ *
+ * Operand whose value is the MOD field of the ModR/M byte.
+ */
+typedef int insnop_arg_t(modrm_mod);
+typedef struct {} insnop_ctxt_t(modrm_mod);
+
+INSNOP_INIT(modrm_mod)
+{
+return 0;
+}
+INSNOP_PREPARE(modrm_mod)
+{
+return (modrm >> 6) & 3;
+}
+INSNOP_FINALIZE(modrm_mod)
+{
+}
+
+/*
+ * modrm_reg
+ *
+ * Operand whose value is the REG field of the ModR/M byte, extended
+ * with the REX.R bit if REX prefix is present.
+ */
+typedef int insnop_arg_t(modrm_reg);
+typedef struct {} insnop_ctxt_t(modrm_reg);
+
+INSNOP_INIT(modrm_reg)
+{
+return 0;
+}
+INSNOP_PREPARE(modrm_reg)
+{
+return ((modrm >> 3) & 7) | REX_R(s);
+}
+INSNOP_FINALIZE(modrm_reg)
+{
+}
+
+/*
+ * modrm_rm
+ *
+ * Operand whose value is the RM field of the ModR/M byte, extended
+ * with the REX.B bit if REX prefix is present.
+ */
+typedef int insnop_arg_t(modrm_rm);
+typedef struct {} insnop_ctxt_t(modrm_rm);
+
+INSNOP_INIT(modrm_rm)
+{
+return 0;
+}
+INSNOP_PREPARE(modrm_rm)
+{
+return (modrm & 7) | REX_B(s);
+}
+INSNOP_FINALIZE(modrm_rm)
+{
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 07/46] target/i386: use pc_start from DisasContext

2019-08-14 Thread Jan Bobek
The variable pc_start is already a member of DisasContext. Remove the
superfluous local variable.

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 131 
 1 file changed, 65 insertions(+), 66 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 7532d65778..b1ba2fc3e5 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4495,13 +4495,12 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 TCGMemOp ot;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
-target_ulong pc_start = s->base.pc_next;
 
 {
 int prefixes;
 TCGMemOp aflag, dflag;
 
-s->pc_start = s->pc = pc_start;
+s->pc_start = s->pc = s->base.pc_next;
 s->override = -1;
 #ifdef TARGET_X86_64
 s->rex_x = 0;
@@ -6357,7 +6356,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa5:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_movs(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_movs(s, ot);
 }
@@ -6367,7 +6366,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xab:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_stos(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_stos(s, ot);
 }
@@ -6376,7 +6375,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xad:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_lods(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_lods(s, ot);
 }
@@ -6385,9 +6384,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xaf:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & PREFIX_REPNZ) {
-gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+gen_repz_scas(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
1);
 } else if (s->prefix & PREFIX_REPZ) {
-gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+gen_repz_scas(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
0);
 } else {
 gen_scas(s, ot);
 }
@@ -6397,9 +6396,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa7:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & PREFIX_REPNZ) {
-gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+gen_repz_cmps(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
1);
 } else if (s->prefix & PREFIX_REPZ) {
-gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+gen_repz_cmps(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
0);
 } else {
 gen_cmps(s, ot);
 }
@@ -6408,10 +6407,10 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 case 0x6d:
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-gen_check_io(s, ot, pc_start - s->cs_base, 
+gen_check_io(s, ot, s->pc_start - s->cs_base,
  SVM_IOIO_TYPE_MASK | svm_is_rep(s->prefix) | 4);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_ins(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_ins(s, ot);
 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6423,10 +6422,10 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 case 0x6f:
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-gen_check_io(s, ot, pc_start - s->cs_base,
+gen_check_io(s, ot, s->pc_start - s->cs_base,
  svm_is_rep(s->prefix) | 4);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_outs(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_outs(s, ot);
 

[Qemu-devel] [RFC PATCH v3 08/46] target/i386: make variable b1 const

2019-08-14 Thread Jan Bobek
The variable b1 does not change value once assigned. Make this fact
explicit by marking it const.

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index b1ba2fc3e5..8bf39b73c4 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3042,7 +3042,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
-int b1, op1_offset, op2_offset, is_xmm, val;
+int op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
 SSEFunc_0_epp sse_fn_epp;
 SSEFunc_0_eppi sse_fn_eppi;
@@ -3051,14 +3051,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 TCGMemOp ot;
 
 b &= 0xff;
-if (s->prefix & PREFIX_DATA)
-b1 = 1;
-else if (s->prefix & PREFIX_REPZ)
-b1 = 2;
-else if (s->prefix & PREFIX_REPNZ)
-b1 = 3;
-else
-b1 = 0;
+const int b1 =
+s->prefix & PREFIX_DATA ? 1
+: s->prefix & PREFIX_REPZ ? 2
+: s->prefix & PREFIX_REPNZ ? 3
+: 0;
 sse_fn_epp = sse_op_table1[b][b1];
 if (!sse_fn_epp) {
 goto unknown_op;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 12/46] target/i386: introduce gen_sse_ng

2019-08-14 Thread Jan Bobek
This function serves as the point-of-intercept for all newly
implemented instructions. If no new implementation exists, fall back
to gen_sse.

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 258351fce3..fdc7cb0054 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4489,6 +4489,33 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 }
 }
 
+static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
+{
+enum {
+P_NP = 0,
+P_66 = 1 << (0 + 8),
+P_F3 = 1 << (1 + 8),
+P_F2 = 1 << (2 + 8),
+W_0  = 0 << (3 + 8),
+W_1  = 1 << (3 + 8),
+M_NA = 0,
+M_0F = 1 << (4 + 8),
+};
+
+switch ((b & 0xff) | M_0F
+| (s->prefix & PREFIX_DATA ? P_66 : 0)
+| (s->prefix & PREFIX_REPZ ? P_F3 : 0)
+| (s->prefix & PREFIX_REPNZ ? P_F2 : 0)
+| (REX_W(s) > 0 ? W_1 : W_0)) {
+
+default:
+gen_sse(env, s, b);
+return;
+}
+
+g_assert_not_reached();
+}
+
 /* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
@@ -8379,7 +8406,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x1c2:
 case 0x1c4 ... 0x1c6:
 case 0x1d0 ... 0x1fe:
-gen_sse(env, s, b);
+gen_sse_ng(env, s, b);
 break;
 default:
 goto unknown_op;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 04/46] target/i386: use dflag from DisasContext

2019-08-14 Thread Jan Bobek
There already is a variable dflag in DisasContext, so reduce the scope
of the local variable dflag to enforce use of the one in DisasContext.

Suggested-by: Richard Henderson 
Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 184 
 1 file changed, 92 insertions(+), 92 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index bda96277e4..bb13877df7 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4493,13 +4493,13 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 CPUX86State *env = cpu->env_ptr;
 int b, prefixes;
 int shift;
-TCGMemOp ot, dflag;
+TCGMemOp ot;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
 
 {
-TCGMemOp aflag;
+TCGMemOp aflag, dflag;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
@@ -4686,7 +4686,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 op = (b >> 3) & 7;
 f = (b >> 1) & 3;
 
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 switch(f) {
 case 0: /* OP Ev, Gv */
@@ -4744,7 +4744,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 {
 int val;
 
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -4781,16 +4781,16 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 /**/
 /* inc, dec, and other misc arith */
 case 0x40 ... 0x47: /* inc Gv */
-ot = dflag;
+ot = s->dflag;
 gen_inc(s, ot, OR_EAX + (b & 7), 1);
 break;
 case 0x48 ... 0x4f: /* dec Gv */
-ot = dflag;
+ot = s->dflag;
 gen_inc(s, ot, OR_EAX + (b & 7), -1);
 break;
 case 0xf6: /* GRP3 */
 case 0xf7:
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -5022,7 +5022,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 
 case 0xfe: /* GRP4 */
 case 0xff: /* GRP5 */
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -5036,10 +5036,10 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 /* operand size for jumps is 64 bit */
 ot = MO_64;
 } else if (op == 3 || op == 5) {
-ot = dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
+ot = s->dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
 } else if (op == 6) {
 /* default push size is 64 bit */
-ot = mo_pushpop(s, dflag);
+ot = mo_pushpop(s, s->dflag);
 }
 }
 if (mod != 3) {
@@ -5067,7 +5067,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 break;
 case 2: /* call Ev */
 /* XXX: optimize if memory (no 'and' is necessary) */
-if (dflag == MO_16) {
+if (s->dflag == MO_16) {
 tcg_gen_ext16u_tl(s->T0, s->T0);
 }
 next_eip = s->pc - s->cs_base;
@@ -5085,19 +5085,19 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 if (s->pe && !s->vm86) {
 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
-   tcg_const_i32(dflag - 1),
+   tcg_const_i32(s->dflag - 1),
tcg_const_tl(s->pc - s->cs_base));
 } else {
 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
-  tcg_const_i32(dflag - 1),
+  tcg_const_i32(s->dflag - 1),
   tcg_const_i32(s->pc - s->cs_base));
 }
 tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
 gen_jr(s, s->tmp4);
 break;
 case 4: /* jmp Ev */
-if (dflag == MO_16) {
+if (s->dflag == MO_16) {
 tcg_gen_ext16u_tl(s->T0, s->T0);
 }
 gen_op_jmp_v(s->T0);
@@ -5130,7 +5130,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 
 case 0x84: /* test Ev, Gv */
 case 0x85:
-ot = mo_b_d(b, dflag);
+

[Qemu-devel] [RFC PATCH v3 13/46] target/i386: disable unused function warning temporarily

2019-08-14 Thread Jan Bobek
Some functions added later are generated by preprocessor macros and
end up being unused (e.g. not all operands can serve as a destination
operand). Disable unused function warnings for the new code until I
figure out how I want to solve this particular issue.

Note: This changeset is intended for development only and shall not be
included in the final patch series.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index fdc7cb0054..e9741cd7f7 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4489,6 +4489,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 }
 }
 
+/* XXX TODO get rid of this eventually */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
@@ -4515,6 +4519,7 @@ static void gen_sse_ng(CPUX86State *env, DisasContext *s, 
int b)
 
 g_assert_not_reached();
 }
+#pragma GCC diagnostic pop
 
 /* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 02/46] target/i386: Push rex_w into DisasContext

2019-08-14 Thread Jan Bobek
From: Richard Henderson 

Treat this the same as we already do for other rex bits.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index d74dbfd585..c0866c2797 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -44,11 +44,13 @@
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
 #define REX_R(s) ((s)->rex_r)
+#define REX_W(s) ((s)->rex_w)
 #else
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
 #define REX_R(s) 0
+#define REX_W(s) -1
 #endif
 
 #ifdef TARGET_X86_64
@@ -100,7 +102,7 @@ typedef struct DisasContext {
 #ifdef TARGET_X86_64
 int lma;/* long mode active */
 int code64; /* 64 bit code segment */
-int rex_x, rex_b, rex_r;
+int rex_x, rex_b, rex_r, rex_w;
 #endif
 int vex_l;  /* vex vector length */
 int vex_v;  /* vex  register, without 1's complement.  */
@@ -4495,7 +4497,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
-int rex_w;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
@@ -4503,6 +4504,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->rex_x = 0;
 s->rex_b = 0;
 s->rex_r = 0;
+s->rex_w = -1;
 s->x86_64_hregs = false;
 #endif
 s->rip_offset = 0; /* for relative ip address */
@@ -4514,7 +4516,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 }
 
 prefixes = 0;
-rex_w = -1;
 
  next_byte:
 b = x86_ldub_code(env, s);
@@ -4557,7 +4558,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x40 ... 0x4f:
 if (CODE64(s)) {
 /* REX prefix */
-rex_w = (b >> 3) & 1;
+s->rex_w = (b >> 3) & 1;
 s->rex_r = (b & 0x4) << 1;
 s->rex_x = (b & 0x2) << 2;
 s->rex_b = (b & 0x1) << 3;
@@ -4606,7 +4607,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->rex_b = (~vex2 >> 2) & 8;
 #endif
 vex3 = x86_ldub_code(env, s);
-rex_w = (vex3 >> 7) & 1;
+#ifdef TARGET_X86_64
+s->rex_w = (vex3 >> 7) & 1;
+#endif
 switch (vex2 & 0x1f) {
 case 0x01: /* Implied 0f leading opcode bytes.  */
 b = x86_ldub_code(env, s) | 0x100;
@@ -4631,9 +4634,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 /* Post-process prefixes.  */
 if (CODE64(s)) {
 /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
-   data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
+   data with REX_W, and 16-bit data with 0x66; REX_W takes precedence
over 0x66 if both are present.  */
-dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
+dflag = (REX_W(s) > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : 
MO_32);
 /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
 aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
 } else {
@@ -5029,7 +5032,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 /* operand size for jumps is 64 bit */
 ot = MO_64;
 } else if (op == 3 || op == 5) {
-ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
+ot = dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
 } else if (op == 6) {
 /* default push size is 64 bit */
 ot = mo_pushpop(s, dflag);
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 01/46] target/i386: Push rex_r into DisasContext

2019-08-14 Thread Jan Bobek
From: Richard Henderson 

Treat this value the same as we do for rex_b and rex_x.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 85 +
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 03150a86e2..d74dbfd585 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -43,10 +43,12 @@
 #define CODE64(s) ((s)->code64)
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
+#define REX_R(s) ((s)->rex_r)
 #else
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
+#define REX_R(s) 0
 #endif
 
 #ifdef TARGET_X86_64
@@ -98,7 +100,7 @@ typedef struct DisasContext {
 #ifdef TARGET_X86_64
 int lma;/* long mode active */
 int code64; /* 64 bit code segment */
-int rex_x, rex_b;
+int rex_x, rex_b, rex_r;
 #endif
 int vex_l;  /* vex vector length */
 int vex_v;  /* vex  register, without 1's complement.  */
@@ -3037,7 +3039,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 };
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-target_ulong pc_start, int rex_r)
+target_ulong pc_start)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
@@ -3107,8 +3109,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 
 modrm = x86_ldub_code(env, s);
 reg = ((modrm >> 3) & 7);
-if (is_xmm)
-reg |= rex_r;
+if (is_xmm) {
+reg |= REX_R(s);
+}
 mod = (modrm >> 6) & 3;
 if (sse_fn_epp == SSE_SPECIAL) {
 b |= (b1 << 8);
@@ -3642,7 +3645,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 tcg_gen_ld16u_tl(s->T0, cpu_env,
 
offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
 }
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 gen_op_mov_reg_v(s, ot, reg, s->T0);
 break;
 case 0x1d6: /* movq ea, xmm */
@@ -3686,7 +3689,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
  offsetof(CPUX86State, fpregs[rm].mmx));
 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
 }
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
 break;
 
@@ -3698,7 +3701,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 modrm = x86_ldub_code(env, s);
 rm = modrm & 7;
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 mod = (modrm >> 6) & 3;
 if (b1 >= 2) {
 goto unknown_op;
@@ -3774,7 +3777,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 /* Various integer extensions at 0f 38 f[0-f].  */
 b = modrm | (b1 << 8);
 modrm = x86_ldub_code(env, s);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 
 switch (b) {
 case 0x3f0: /* crc32 Gd,Eb */
@@ -4128,7 +4131,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 b = modrm;
 modrm = x86_ldub_code(env, s);
 rm = modrm & 7;
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 mod = (modrm >> 6) & 3;
 if (b1 >= 2) {
 goto unknown_op;
@@ -4148,7 +4151,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 rm = (modrm & 7) | REX_B(s);
 if (mod != 3)
 gen_lea_modrm(env, s, modrm);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 val = x86_ldub_code(env, s);
 switch (b) {
 case 0x14: /* pextrb */
@@ -4317,7 +4320,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 /* Various integer extensions at 0f 3a f[0-f].  */
 b = modrm | (b1 << 8);
 modrm = x86_ldub_code(env, s);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 
 switch (b) {
 case 0x3f0: /* rorx Gy,Ey, Ib */
@@ -4491,14 +4494,15 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 TCGMemOp ot, aflag, dflag;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
-int rex_w, rex_r;
 target_ulong pc_start = s->base.pc_next;
+int rex_w;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
 #ifdef TARGET_X86_64
 s->rex_x = 0;
 s->rex_b = 0;
+s->rex_r = 0;
 s->x86_64_hregs = false;
 #endif
 s->rip_offset = 

[Qemu-devel] [RFC PATCH v3 03/46] target/i386: reduce scope of variable aflag

2019-08-14 Thread Jan Bobek
The variable aflag is not used in most of disas_insn; make this clear
by explicitly reducing its scope to the block where it is used.

Suggested-by: Richard Henderson 
Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c0866c2797..bda96277e4 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4493,11 +4493,14 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 CPUX86State *env = cpu->env_ptr;
 int b, prefixes;
 int shift;
-TCGMemOp ot, aflag, dflag;
+TCGMemOp ot, dflag;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
 
+{
+TCGMemOp aflag;
+
 s->pc_start = s->pc = pc_start;
 s->override = -1;
 #ifdef TARGET_X86_64
@@ -4657,6 +4660,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->prefix = prefixes;
 s->aflag = aflag;
 s->dflag = dflag;
+}
 
 /* now check op code */
  reswitch:
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 10/46] target/i386: add vector register file alignment constraints

2019-08-14 Thread Jan Bobek
gvec operations require that all vectors be aligned on 16-byte
boundary; make sure the MM/XMM/YMM/ZMM register file is aligned as
neccessary.

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/cpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8b3dc5533e..cb407b86ba 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1199,9 +1199,9 @@ typedef struct CPUX86State {
 float_status mmx_status; /* for 3DNow! float ops */
 float_status sse_status;
 uint32_t mxcsr;
-ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32];
-ZMMReg xmm_t0;
-MMXReg mmx_t0;
+ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] QEMU_ALIGNED(16);
+ZMMReg xmm_t0 QEMU_ALIGNED(16);
+MMXReg mmx_t0 QEMU_ALIGNED(8);
 
 XMMReg ymmh_regs[CPU_NB_REGS];
 
-- 
2.20.1




[Qemu-devel] [RFC PATCH v3 00/46] rewrite MMX/SSE/SSE2/SSE3 instruction translation

2019-08-14 Thread Jan Bobek
The previous version can be found at [1]. Changes compared to v2:

  - Expanded the instruction operand infrastructure a bit; I am now
fairly confident that it is powerful enough to accommodate for all
the use cases I will need. It's still a bit clunky to work with at
times, but I am happy with it for now.

  - Reduced the number of various INSN_* (now called OPCODE_*) macro
variants using variadic macros.

  - Implemented translation for instructions up to SSE3.

Cheers,
 -Jan

References:
  1. https://lists.nongnu.org/archive/html/qemu-devel/2019-08/msg01790.html

Jan Bobek (43):
  target/i386: reduce scope of variable aflag
  target/i386: use dflag from DisasContext
  target/i386: use prefix from DisasContext
  target/i386: use pc_start from DisasContext
  target/i386: make variable b1 const
  target/i386: make variable is_xmm const
  target/i386: add vector register file alignment constraints
  target/i386: introduce gen_(ld,st)d_env_A0
  target/i386: introduce gen_sse_ng
  target/i386: disable unused function warning temporarily
  target/i386: introduce mnemonic aliases for several gvec operations
  target/i386: introduce function ck_cpuid
  target/i386: introduce instruction operand infrastructure
  target/i386: introduce generic operand alias
  target/i386: introduce generic either-or operand
  target/i386: introduce generic load-store operand
  target/i386: introduce tcg_temp operands
  target/i386: introduce modrm operand
  target/i386: introduce operands for decoding modrm fields
  target/i386: introduce operand for direct-only r/m field
  target/i386: introduce operand vex_v
  target/i386: introduce Ib (immediate) operand
  target/i386: introduce M* (memptr) operands
  target/i386: introduce G*, R*, E* (general register) operands
  target/i386: introduce P*, N*, Q* (MMX) operands
  target/i386: introduce H*, V*, U*, W* (SSE/AVX) operands
  target/i386: introduce code generators
  target/i386: introduce helper-based code generator macros
  target/i386: introduce gvec-based code generator macros
  target/i386: introduce sse-opcode.inc.h
  target/i386: introduce instruction translator macros
  target/i386: introduce MMX translators
  target/i386: introduce MMX code generators
  target/i386: introduce MMX instructions to sse-opcode.inc.h
  target/i386: introduce SSE translators
  target/i386: introduce SSE code generators
  target/i386: introduce SSE instructions to sse-opcode.inc.h
  target/i386: introduce SSE2 translators
  target/i386: introduce SSE2 code generators
  target/i386: introduce SSE2 instructions to sse-opcode.inc.h
  target/i386: introduce SSE3 translators
  target/i386: introduce SSE3 code generators
  target/i386: introduce SSE3 instructions to sse-opcode.inc.h

Richard Henderson (3):
  target/i386: Push rex_r into DisasContext
  target/i386: Push rex_w into DisasContext
  target/i386: Simplify gen_exception arguments

 target/i386/cpu.h|6 +-
 target/i386/sse-opcode.inc.h |  699 +
 target/i386/translate.c  | 2808 ++
 3 files changed, 3189 insertions(+), 324 deletions(-)
 create mode 100644 target/i386/sse-opcode.inc.h

-- 
2.20.1




Re: [Qemu-devel] [RFC PATCH v2 21/39] target/i386: introduce insn.h

2019-08-14 Thread Jan Bobek
On 8/13/19 2:00 AM, Richard Henderson wrote:
> On 8/10/19 5:12 AM, Jan Bobek wrote:
>> This header is intended to eventually list all supported instructions
>> along with some useful details (e.g. mnemonics, opcode, operands etc.)
>> It shall be used (along with some preprocessor magic) anytime we need
>> to automatically generate code for every instruction.
>>
>> Signed-off-by: Jan Bobek 
>> ---
>>  target/i386/insn.h | 87 ++
>>  1 file changed, 87 insertions(+)
>>  create mode 100644 target/i386/insn.h
> 
> Things that are included multiple times should be named *.inc.h.  There are
> quite a few that don't follow this in the tree, but we are slowly fixing 
> those.
> 
> Though even "insn.inc.h" isn't particularly descriptive, and definitely
> overstates the case.  Maybe sse-opcode.inc.h?  While it's not only sse, it is
> used by gen_sse_ng().

"sse-opcode.inc.h" isn't 100 % as you point out, but looks good enough for now.

-Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v2 23/39] target/i386: introduce instruction translator macros

2019-08-14 Thread Jan Bobek
On 8/13/19 2:30 AM, Richard Henderson wrote:
> On 8/10/19 5:12 AM, Jan Bobek wrote:
>> +#define CASES_LEG_NP_0F_W0(opcode)  \
>> +case opcode | M_0F | W_0:
>> +#define CASES_LEG_NP_0F_W1(opcode)  \
>> +case opcode | M_0F | W_1:
>> +#define CASES_LEG_F3_0F_W0(opcode)  \
>> +case opcode | M_0F | P_F3 | W_0:
>> +#define CASES_LEG_F3_0F_W1(opcode)  \
>> +case opcode | M_0F | P_F3 | W_1:
>> +
>> +#define LEG(p, m, w)\
>> +CASES_LEG_ ## p ## _ ## m ## _W ## w
>> +#define INSN(mnem, cases, opcode, feat) \
>> +cases(opcode)   \
> 
> It appears as if you don't need the CASES_* macros here.
> 
> #define LEG(p, m, w, op) \
>case P_##p | M_##m | W_##2 | op
> 
> #define INSN(mnem, leg, feat) \
>leg: translate_insn(env, s, CK_CPUID_##feat, gen_insn(mnem));
> 
> so long as P_NP is in the enumeration above with value 0.
> 
> Unless there's some other reason that opcode needs to stay separate?

I was thinking ahead with the CASES_* macros here: if I have LIG
and/or WIG in the VEX prefix, I'll need more than one case label,
but only one label in other cases. However, that's not a reason
for the opcode to be separate, and I think I like it stashed with
the rest of the prefix fields better.

-Jan
 



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v2 16/39] target/i386: introduce instruction operand infrastructure

2019-08-14 Thread Jan Bobek
On 8/13/19 2:07 AM, Richard Henderson wrote:
> On 8/10/19 5:12 AM, Jan Bobek wrote:
>> +#define INSNOP_INIT(opT, init_stmt)\
>> +static int insnop_init(opT)(CPUX86State *env, DisasContext *s, \
>> +int modrm, insnop_t(opT) *op)  \
>> +{  \
>> +init_stmt; \
>> +}
> ...
>> +#define INSNOP_INIT_FAILreturn 1
>> +#define INSNOP_INIT_OK(x)   return ((*(op) = (x)), 0)
> 
> Return bool and true on success.

So, the reason why I did this "inverted" logic (0 = success, 1 =
failure) is because I was anticipating I might need to differentiate
between two or more different failures, in which case returning
different non-zero values for different error cases makes perfect
sense. I have not made use of it yet, but I'd rather hold on to this
idiom at least for now, until I am 100 % sure it really is
unnecessary.

-Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v2 00/39] rewrite MMX/SSE instruction translation

2019-08-11 Thread Jan Bobek
On 8/10/19 7:35 PM, Richard Henderson wrote:
> On 8/9/19 9:12 PM, Jan Bobek wrote:
>> This is a v2 of the patch series posted in [1]. Patches 1-9 are just
>> cleanups; patches 10-39 are something actually interesting. Compared
>> to v1, I started using preprocessor more extensively to generate
>> repetitive boilerplate code; opinions/alternatives are welcome and
>> appreciated.
> 
> This is tricky.  I'm not keen on code entirely expanded via macros because it
> becomes extremely difficult to debug.  All statements get recorded at the same
> line of the location of the expansion, which makes the gdb "step" command
> finish the entire function because there is no next line.
> 
> Once upon a time I wrote some code that's extremely macro crazy:
> 
> https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=soft-fp/op-common.h;hb=HEAD
> 
> It has been extremely difficult to maintain over the years.

Thank you, that's exactly the feedback I'm looking for! I've played
with the preprocessor in the past just to try out what's possible, but
I've never maintained code that uses it as extensively as this
series. It didn't occur to me that there would be a problem with
stepping it in gdb, for example, but now it seems obvious.

> We have just recently gotten rid of some of the macros in the softmmu code
> 
> https://patchwork.ozlabs.org/project/qemu-devel/list/?series=105441
> 
> replacing most of them with inline functions.

I'll have to look at it and see how exactly it's done; perhaps I'll
find something that's applicable to my case, too.

> A lot of what you have needs very little adjustment to address the debugging
> problem.  E.g.
> 
>> +#define INSNOP_INIT(opT, init_stmt)\
>> +static int insnop_init(opT)(CPUX86State *env, DisasContext *s, \
>> +int modrm, insnop_t(opT) *op)  \
>> +{  \
>> +init_stmt; \
>> +}
> 
>> +INSNOP(
>> +M, TCGv,
>> +do {
>> +if (decode_modrm_mod(env, s, modrm) == 3) {
>> +INSNOP_INIT_FAIL;
>> +} else {
>> +INSNOP_INIT_OK(s->A0);
>> +}
>> +} while (0),
>> +do {
>> +assert(*op == s->A0);
>> +gen_lea_modrm(env, s, modrm);
>> +} while (0),
>> +INSNOP_FINALIZE_NOOP)
> 
> Rearrange this as
> 
> #define INSNOP_INIT(OPT) \
> static bool insnop_##OPT##_init(CPUX86State *env, DisasContext *s, \
> int modrm, insnop_##OPT##_t *op)
> 
> #define INSNOP_PREPARE(OPT) \
> static void insnop_##OPT##_prepare(CPUX86State *env, DisasContext *s, \
>int modrm, insnop_##OPT##_t *op)
> 
> INSNOP_INIT(M)
> {
> if (decode_modrm_mod(env, s, modrm) == 3) {
> INSNOP_INIT_FAIL;
> } else {
> INSNOP_INIT_OK(s->A0);
> }
> }
> 
> INSNOP_PREPARE(M)
> {
> assert(*op == s->A0);
> gen_lea_modrm(env, s, modrm);
> }
> 
> etc and suddenly the entire expansion does not occur on a single line.

That makes complete sense, thank you! I'll keep the debugging issue in
mind.

> Further specific commentary to follow.

Looking forward to it!

-Jan



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [RFC PATCH v2 37/39] target/i386: introduce SSE code generators

2019-08-09 Thread Jan Bobek
Introduce code generators required by SSE instructions.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 440 
 1 file changed, 440 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 12d2ac2eb5..681fa1aee2 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4943,6 +4943,9 @@ INSNOP_LDST_UNIFY(Wdq, Udq, UdqMdq)
 #define GEN_INSN_WR_GVEC_MM(mnem, gvec, opW1, opR1, vece)   \
 GEN_INSN_WR_GVEC(mnem, gvec, opW1, opR1, vece,  \
  sizeof(MMXReg), sizeof(MMXReg))
+#define GEN_INSN_WR_GVEC_XMM(mnem, gvec, opW1, opR1, vece)  \
+GEN_INSN_WR_GVEC(mnem, gvec, opW1, opR1, vece,  \
+ sizeof(XMMReg), sizeof(XMMReg))
 
 #define GEN_INSN_WRR_GVEC(mnem, gvec, opW1, opR1, opR2, vece, oprsz, maxsz) \
 static void gen_insn_wrr(mnem, opW1, opR1, opR2)(   \
@@ -4954,6 +4957,9 @@ INSNOP_LDST_UNIFY(Wdq, Udq, UdqMdq)
 #define GEN_INSN_WRR_GVEC_MM(mnem, gvec, opW1, opR1, opR2, vece)\
 GEN_INSN_WRR_GVEC(mnem, gvec, opW1, opR1, opR2, vece,   \
   sizeof(MMXReg), sizeof(MMXReg))
+#define GEN_INSN_WRR_GVEC_XMM(mnem, gvec, opW1, opR1, opR2, vece)   \
+GEN_INSN_WRR_GVEC(mnem, gvec, opW1, opR1, opR2, vece,   \
+  sizeof(XMMReg), sizeof(XMMReg))
 
 static void gen_insn_wr(movq, Eq, Pq)(CPUX86State *env, DisasContext *s,
   insnop_t(Eq) ret, insnop_t(Pq) arg1)
@@ -4986,6 +4992,101 @@ static void gen_insn_wr(movd, Pq, Ed)(CPUX86State *env, 
DisasContext *s,
 
 GEN_INSN_WR_GVEC_MM(movq, mov, Pq, Qq, MO_64)
 GEN_INSN_WR_GVEC_MM(movq, mov, Qq, Pq, MO_64)
+GEN_INSN_WR_GVEC_XMM(movaps, mov, Vdq, Wdq, MO_64)
+GEN_INSN_WR_GVEC_XMM(movaps, mov, Wdq, Vdq, MO_64)
+GEN_INSN_WR_GVEC_XMM(movups, mov, Vdq, Wdq, MO_64)
+GEN_INSN_WR_GVEC_XMM(movups, mov, Wdq, Vdq, MO_64)
+
+static void gen_insn_wr(movss, Wd, Vd)(CPUX86State *env, DisasContext *s,
+   insnop_t(Wd) ret, insnop_t(Vd) arg1)
+{
+const size_t ofs = offsetof(ZMMReg, ZMM_L(0));
+gen_op_movl(s, ret + ofs, arg1 + ofs);
+}
+
+static void gen_insn_wrr(movss, Vdq, Vdq, UdMd)(CPUX86State *env,
+DisasContext *s,
+insnop_t(Vdq) ret,
+insnop_t(Vdq) arg1,
+insnop_t(UdMd) arg2)
+{
+assert(ret == arg1);
+
+if (arg2.is_mem) {
+const size_t ofs0 = offsetof(ZMMReg, ZMM_Q(0));
+const size_t ofs1 = offsetof(ZMMReg, ZMM_Q(1));
+
+tcg_gen_movi_i64(s->tmp1_i64, 0);
+tcg_gen_st_i64(s->tmp1_i64, cpu_env, ret + ofs0);
+tcg_gen_st_i64(s->tmp1_i64, cpu_env, ret + ofs1);
+}
+gen_insn_wr(movss, Wd, Vd)(env, s, ret, arg2.op_reg);
+}
+
+static void gen_insn_wr(movlps, Mq, Vq)(CPUX86State *env, DisasContext *s,
+insnop_t(Mq) ret, insnop_t(Vq) arg1)
+{
+assert(ret == s->A0);
+gen_stq_env_A0(s, arg1 + offsetof(ZMMReg, ZMM_Q(0)));
+}
+
+static void gen_insn_wr(movhlps, Vq, UdqMq)(CPUX86State *env,
+DisasContext *s,
+insnop_t(Vq) ret,
+insnop_t(UdqMq) arg1)
+{
+const size_t dofs = offsetof(ZMMReg, ZMM_Q(0));
+const size_t aofs = offsetof(ZMMReg, ZMM_Q(arg1.is_mem ? 0 : 1));
+gen_op_movq(s, ret + dofs, arg1.op_reg + aofs);
+}
+
+static void gen_insn_wr(movhps, Mq, Vdq)(CPUX86State *env, DisasContext *s,
+ insnop_t(Mq) ret, insnop_t(Vdq) arg1)
+{
+assert(ret == s->A0);
+gen_stq_env_A0(s, arg1 + offsetof(ZMMReg, ZMM_Q(1)));
+}
+
+static void gen_insn_wrr(movlhps, Vdq, Vq, UqMq)(CPUX86State *env,
+ DisasContext *s,
+ insnop_t(Vdq) ret,
+ insnop_t(Vq) arg1,
+ insnop_t(UqMq) arg2)
+{
+assert(ret == arg1);
+
+const size_t dofs = offsetof(ZMMReg, ZMM_Q(1));
+const size_t aofs = offsetof(ZMMReg, ZMM_Q(0));
+gen_op_movq(s, ret + dofs, arg2.op_reg + aofs);
+}
+
+static void gen_insn_wr(pmovmskb, Gd, Nq)(CPUX86State *env, DisasContext *s,
+  insnop_t(Gd) ret, insnop_t(Nq) arg1)
+{
+tcg_gen_addi_ptr(s->ptr0, cpu_env, arg1);
+gen_helper_pmovmskb_mmx(ret, cpu_env, s->ptr0);
+}
+
+static void gen_insn_wr(pmovmskb, Gq, Nq)(CPUX86State *env, DisasContext *s,
+  insnop_t(Gq) ret, insnop_t(Nq) arg1)
+{
+gen_insn_wr(pmovmskb, Gd, Nq)(env, s, s->tmp2_i32, arg1);
+tcg_gen_extu_i32_i64(ret, s->tmp2_i32);
+}

[Qemu-devel] [RFC PATCH v2 38/39] target/i386: introduce SSE instructions to insn.h

2019-08-09 Thread Jan Bobek
Add all the SSE instruction entries to insn.h.

Signed-off-by: Jan Bobek 
---
 target/i386/insn.h | 158 +
 1 file changed, 158 insertions(+)

diff --git a/target/i386/insn.h b/target/i386/insn.h
index 6506ff3137..6e0c75b9f7 100644
--- a/target/i386/insn.h
+++ b/target/i386/insn.h
@@ -78,6 +78,36 @@ INSN_WR(movq, LEG(NP, 0F, 1), 0x7e, MMX, Eq, Pq)
 INSN_WR(movq, LEG(NP, 0F, 0), 0x6f, MMX, Pq, Qq)
 /* NP 0F 7F /r: MOVQ mm/m64, mm */
 INSN_WR(movq, LEG(NP, 0F, 0), 0x7f, MMX, Qq, Pq)
+/* NP 0F 28 /r: MOVAPS xmm1, xmm2/m128 */
+INSN_WR(movaps, LEG(NP, 0F, 0), 0x28, SSE, Vdq, Wdq)
+/* NP 0F 29 /r: MOVAPS xmm2/m128, xmm1 */
+INSN_WR(movaps, LEG(NP, 0F, 0), 0x29, SSE, Wdq, Vdq)
+/* NP 0F 10 /r: MOVUPS xmm1, xmm2/m128 */
+INSN_WR(movups, LEG(NP, 0F, 0), 0x10, SSE, Vdq, Wdq)
+/* NP 0F 11 /r: MOVUPS xmm2/m128, xmm1 */
+INSN_WR(movups, LEG(NP, 0F, 0), 0x11, SSE, Wdq, Vdq)
+/* F3 0F 10 /r: MOVSS xmm1, xmm2/m32 */
+INSN_WRR(movss, LEG(F3, 0F, 0), 0x10, SSE, Vdq, Vdq, UdMd)
+/* F3 0F 11 /r: MOVSS xmm2/m32, xmm1 */
+INSN_WR(movss, LEG(F3, 0F, 0), 0x11, SSE, Wd, Vd)
+/* NP 0F 12 /r: MOVHLPS xmm1, xmm2 */
+/* NP 0F 12 /r: MOVLPS xmm1, m64 */
+INSN_WR(movhlps, LEG(NP, 0F, 0), 0x12, SSE, Vq, UdqMq)
+/* 0F 13 /r: MOVLPS m64, xmm1 */
+INSN_WR(movlps, LEG(NP, 0F, 0), 0x13, SSE, Mq, Vq)
+/* NP 0F 16 /r: MOVLHPS xmm1, xmm2 */
+/* NP 0F 16 /r: MOVHPS xmm1, m64 */
+INSN_WRR(movlhps, LEG(NP, 0F, 0), 0x16, SSE, Vdq, Vq, UqMq)
+/* NP 0F 17 /r: MOVHPS m64, xmm1 */
+INSN_WR(movhps, LEG(NP, 0F, 0), 0x17, SSE, Mq, Vdq)
+/* NP 0F D7 /r: PMOVMSKB r32, mm */
+INSN_WR(pmovmskb, LEG(NP, 0F, 0), 0xd7, SSE, Gd, Nq)
+/* NP REX.W 0F D7 /r: PMOVMSKB r64, mm */
+INSN_WR(pmovmskb, LEG(NP, 0F, 1), 0xd7, SSE, Gq, Nq)
+/* NP 0F 50 /r: MOVMSKPS r32, xmm */
+INSN_WR(movmskps, LEG(NP, 0F, 0), 0x50, SSE, Gd, Udq)
+/* NP REX.W 0F 50 /r: MOVMSKPS r64, xmm */
+INSN_WR(movmskps, LEG(NP, 0F, 1), 0x50, SSE, Gq, Udq)
 /* NP 0F FC /r: PADDB mm, mm/m64 */
 INSN_WRR(paddb, LEG(NP, 0F, 0), 0xfc, MMX, Pq, Pq, Qq)
 /* NP 0F FD /r: PADDW mm, mm/m64 */
@@ -92,6 +122,10 @@ INSN_WRR(paddsw, LEG(NP, 0F, 0), 0xed, MMX, Pq, Pq, Qq)
 INSN_WRR(paddusb, LEG(NP, 0F, 0), 0xdc, MMX, Pq, Pq, Qq)
 /* NP 0F DD /r: PADDUSW mm,mm/m64 */
 INSN_WRR(paddusw, LEG(NP, 0F, 0), 0xdd, MMX, Pq, Pq, Qq)
+/* NP 0F 58 /r: ADDPS xmm1, xmm2/m128 */
+INSN_WRR(addps, LEG(NP, 0F, 0), 0x58, SSE, Vdq, Vdq, Wdq)
+/* F3 0F 58 /r: ADDSS xmm1, xmm2/m32 */
+INSN_WRR(addss, LEG(F3, 0F, 0), 0x58, SSE, Vd, Vd, Wd)
 /* NP 0F F8 /r: PSUBB mm, mm/m64 */
 INSN_WRR(psubb, LEG(NP, 0F, 0), 0xf8, MMX, Pq, Pq, Qq)
 /* NP 0F F9 /r: PSUBW mm, mm/m64 */
@@ -106,12 +140,60 @@ INSN_WRR(psubsw, LEG(NP, 0F, 0), 0xe9, MMX, Pq, Pq, Qq)
 INSN_WRR(psubusb, LEG(NP, 0F, 0), 0xd8, MMX, Pq, Pq, Qq)
 /* NP 0F D9 /r: PSUBUSW mm, mm/m64 */
 INSN_WRR(psubusw, LEG(NP, 0F, 0), 0xd9, MMX, Pq, Pq, Qq)
+/* NP 0F 5C /r: SUBPS xmm1, xmm2/m128 */
+INSN_WRR(subps, LEG(NP, 0F, 0), 0x5c, SSE, Vdq, Vdq, Wdq)
+/* F3 0F 5C /r: SUBSS xmm1, xmm2/m32 */
+INSN_WRR(subss, LEG(F3, 0F, 0), 0x5c, SSE, Vd, Vd, Wd)
 /* NP 0F D5 /r: PMULLW mm, mm/m64 */
 INSN_WRR(pmullw, LEG(NP, 0F, 0), 0xd5, MMX, Pq, Pq, Qq)
 /* NP 0F E5 /r: PMULHW mm, mm/m64 */
 INSN_WRR(pmulhw, LEG(NP, 0F, 0), 0xe5, MMX, Pq, Pq, Qq)
+/* NP 0F E4 /r: PMULHUW mm1, mm2/m64 */
+INSN_WRR(pmulhuw, LEG(NP, 0F, 0), 0xe4, SSE, Pq, Pq, Qq)
+/* NP 0F 59 /r: MULPS xmm1, xmm2/m128 */
+INSN_WRR(mulps, LEG(NP, 0F, 0), 0x59, SSE, Vdq, Vdq, Wdq)
+/* F3 0F 59 /r: MULSS xmm1,xmm2/m32 */
+INSN_WRR(mulss, LEG(F3, 0F, 0), 0x59, SSE, Vd, Vd, Wd)
 /* NP 0F F5 /r: PMADDWD mm, mm/m64 */
 INSN_WRR(pmaddwd, LEG(NP, 0F, 0), 0xf5, MMX, Pq, Pq, Qq)
+/* NP 0F 5E /r: DIVPS xmm1, xmm2/m128 */
+INSN_WRR(divps, LEG(NP, 0F, 0), 0x5e, SSE, Vdq, Vdq, Wdq)
+/* F3 0F 5E /r: DIVSS xmm1, xmm2/m32 */
+INSN_WRR(divss, LEG(F3, 0F, 0), 0x5e, SSE, Vd, Vd, Wd)
+/* NP 0F 53 /r: RCPPS xmm1, xmm2/m128 */
+INSN_WR(rcpps, LEG(NP, 0F, 0), 0x53, SSE, Vdq, Wdq)
+/* F3 0F 53 /r: RCPSS xmm1, xmm2/m32 */
+INSN_WR(rcpss, LEG(F3, 0F, 0), 0x53, SSE, Vd, Wd)
+/* NP 0F 51 /r: SQRTPS xmm1, xmm2/m128 */
+INSN_WR(sqrtps, LEG(NP, 0F, 0), 0x51, SSE, Vdq, Wdq)
+/* F3 0F 51 /r: SQRTSS xmm1, xmm2/m32 */
+INSN_WR(sqrtss, LEG(F3, 0F, 0), 0x51, SSE, Vd, Wd)
+/* NP 0F 52 /r: RSQRTPS xmm1, xmm2/m128 */
+INSN_WR(rsqrtps, LEG(NP, 0F, 0), 0x52, SSE, Vdq, Wdq)
+/* F3 0F 52 /r: RSQRTSS xmm1, xmm2/m32 */
+INSN_WR(rsqrtss, LEG(F3, 0F, 0), 0x52, SSE, Vd, Wd)
+/* NP 0F DA /r: PMINUB mm1, mm2/m64 */
+INSN_WRR(pminub, LEG(NP, 0F, 0), 0xda, SSE, Pq, Pq, Qq)
+/* NP 0F EA /r: PMINSW mm1, mm2/m64 */
+INSN_WRR(pminsw, LEG(NP, 0F, 0), 0xea, SSE, Pq, Pq, Qq)
+/* NP 0F 5D /r: MINPS xmm1, xmm2/m128 */
+INSN_WRR(minps, LEG(NP, 0F, 0), 0x5d, SSE, Vdq, Vdq, Wdq)
+/* F3 0F 5D /r: MINSS xmm1,xmm2/m32 */
+INSN_WRR(minss, LEG(F3, 0F, 0), 0x5d, SSE, Vd, Vd, Wd)
+/* NP 0F DE /r: PMAXUB mm1, mm2/m64 */
+INSN_WRR(pmaxub, LEG(NP, 0F, 0), 0xde, SSE, Pq, Pq, Qq)
+/* NP 0F EE /r: PMAXSW mm1, mm2/m64 */
+INSN_WRR(pmaxsw, LEG(NP, 0F, 0), 0xee, SSE, Pq, Pq, Qq

[Qemu-devel] [RFC PATCH v2 30/39] target/i386: introduce gvec-based code generator macros

2019-08-09 Thread Jan Bobek
Code generators defined using these macros rely on a gvec operation
(i.e. tcg_gen_gvec_*).

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index d721bb5142..36f2579654 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -23,6 +23,7 @@
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg-op.h"
+#include "tcg-op-gvec.h"
 #include "exec/cpu_ldst.h"
 #include "exec/translator.h"
 
@@ -4882,6 +4883,22 @@ INSNOP_LDST_UNIFY(Qq, Nq, NqMq)
 gen_helper_ ## helper(cpu_env, s->ptr0, s->ptr1);   \
 }
 
+#define GEN_INSN_WR_GVEC(mnem, gvec, opW1, opR1, vece, oprsz, maxsz)\
+static void gen_insn_wr(mnem, opW1, opR1)(  \
+CPUX86State *env, DisasContext *s, insnop_t(opW1) ret,  \
+insnop_t(opR1) arg1)\
+{   \
+tcg_gen_gvec_ ## gvec(vece, ret, arg1, oprsz, maxsz);   \
+}
+
+#define GEN_INSN_WRR_GVEC(mnem, gvec, opW1, opR1, opR2, vece, oprsz, maxsz) \
+static void gen_insn_wrr(mnem, opW1, opR1, opR2)(   \
+CPUX86State *env, DisasContext *s, insnop_t(opW1) ret,  \
+insnop_t(opR1) arg1, insnop_t(opR2) arg2)   \
+{   \
+tcg_gen_gvec_ ## gvec(vece, ret, arg1, arg2, oprsz, maxsz); \
+}
+
 /*
  * Instruction translators
  */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 39/39] target/i386: introduce memory-pointer operand read/write workarounds

2019-08-09 Thread Jan Bobek
The memory-pointer operand has a known limitation (see the commit
introducing M* operands for details); the workaround involves
declaring write-memory operands as read-memory instead.

Note: This changeset is intended for development only and shall not be
included in the final patch series.

Signed-off-by: Jan Bobek 
---
 target/i386/insn.h | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/target/i386/insn.h b/target/i386/insn.h
index 6e0c75b9f7..b61a4182f6 100644
--- a/target/i386/insn.h
+++ b/target/i386/insn.h
@@ -94,12 +94,14 @@ INSN_WR(movss, LEG(F3, 0F, 0), 0x11, SSE, Wd, Vd)
 /* NP 0F 12 /r: MOVLPS xmm1, m64 */
 INSN_WR(movhlps, LEG(NP, 0F, 0), 0x12, SSE, Vq, UdqMq)
 /* 0F 13 /r: MOVLPS m64, xmm1 */
-INSN_WR(movlps, LEG(NP, 0F, 0), 0x13, SSE, Mq, Vq)
+/* FIXME this is hacked, should be INSN_WR */
+INSN_RR(movlps, LEG(NP, 0F, 0), 0x13, SSE, Mq, Vq)
 /* NP 0F 16 /r: MOVLHPS xmm1, xmm2 */
 /* NP 0F 16 /r: MOVHPS xmm1, m64 */
 INSN_WRR(movlhps, LEG(NP, 0F, 0), 0x16, SSE, Vdq, Vq, UqMq)
 /* NP 0F 17 /r: MOVHPS m64, xmm1 */
-INSN_WR(movhps, LEG(NP, 0F, 0), 0x17, SSE, Mq, Vdq)
+/* FIXME this is hacked, should be INSN_WR */
+INSN_RR(movhps, LEG(NP, 0F, 0), 0x17, SSE, Mq, Vdq)
 /* NP 0F D7 /r: PMOVMSKB r32, mm */
 INSN_WR(pmovmskb, LEG(NP, 0F, 0), 0xd7, SSE, Gd, Nq)
 /* NP REX.W 0F D7 /r: PMOVMSKB r64, mm */
@@ -299,9 +301,11 @@ INSN_WR(cvttss2si, LEG(F3, 0F, 1), 0x2c, SSE, Gq, Wd)
 /* NP 0F F7 /r: MASKMOVQ mm1, mm2 */
 INSN_RR(maskmovq, LEG(NP, 0F, 0), 0xf7, SSE, Pq, Nq)
 /* NP 0F 2B /r: MOVNTPS m128, xmm1 */
-INSN_WR(movntps, LEG(NP, 0F, 0), 0x2b, SSE, Mdq, Vdq)
+/* FIXME this is hacked, should be INSN_WR */
+INSN_RR(movntps, LEG(NP, 0F, 0), 0x2b, SSE, Mdq, Vdq)
 /* NP 0F E7 /r: MOVNTQ m64, mm */
-INSN_WR(movntq, LEG(NP, 0F, 0), 0xe7, SSE, Mq, Pq)
+/* FIXME this is hacked, should be INSN_WR */
+INSN_RR(movntq, LEG(NP, 0F, 0), 0xe7, SSE, Mq, Pq)
 /* NP 0F 77: EMMS */
 INSN(emms, LEG(NP, 0F, 0), 0x77, MMX)
 
@@ -340,7 +344,8 @@ INSN_GRP_BEGIN(grp15_LEG_NP)
 /* NP 0F AE /2: LDMXCSR m32 */
 INSN_GRPMEMB_R(grp15_LEG_NP, ldmxcsr, 2, SSE, Md)
 /* NP 0F AE /3: STMXCSR m32 */
-INSN_GRPMEMB_W(grp15_LEG_NP, stmxcsr, 3, SSE, Md)
+/* FIXME this is hacked, should be INSN_GRPMEMB_W */
+INSN_GRPMEMB_R(grp15_LEG_NP, stmxcsr, 3, SSE, Md)
 INSN_GRP_END(grp15_LEG_NP)
 
 INSN_GRP(grp16_LEG_NP, LEG(NP, 0F, 0), 0x18)
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 25/39] target/i386: introduce M* (memptr) operands

2019-08-09 Thread Jan Bobek
The memory-pointer operand decodes the indirect form of ModR/M byte,
loads the effective address into a register and passes that register
as the operand.

Note: This operand has a known flaw: if an instruction is writing to
memory (rather than reading), this operand cannot and will not load
the effective address into the register (as it should). The current
workaround is to declare the memory operand as read (rather than
write); this flaw will be addressed in the next iteration.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index b8e6eaebb4..301dc4eddf 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4705,6 +4705,31 @@ INSNOP(Ib, int8_t,  \
(*op = x86_ldub_code(env, s)),   \
INSNOP_FINALIZE_INVALID)
 
+/*
+ * Memory-pointer operand
+ */
+INSNOP(
+M, TCGv,
+do {
+if (decode_modrm_mod(env, s, modrm) == 3) {
+INSNOP_INIT_FAIL;
+} else {
+INSNOP_INIT_OK(s->A0);
+}
+} while (0),
+do {
+assert(*op == s->A0);
+gen_lea_modrm(env, s, modrm);
+} while (0),
+INSNOP_FINALIZE_NOOP)
+
+INSNOP_ALIAS(Mb, M)
+INSNOP_ALIAS(Mw, M)
+INSNOP_ALIAS(Mq, M)
+INSNOP_ALIAS(Md, M)
+INSNOP_ALIAS(Mdq, M)
+INSNOP_ALIAS(Mqq, M)
+
 /*
  * Code generators
  */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 23/39] target/i386: introduce instruction translator macros

2019-08-09 Thread Jan Bobek
Instruction "translators" are responsible for decoding and loading
instruction operands, calling the passed-in code generator, and
storing the operands back (if applicable). Once a translator returns,
the instruction has been translated to TCG ops, hence the name.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 288 
 1 file changed, 288 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 30180d1c25..0da064d5fd 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4715,6 +4715,222 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
int ck_cpuid_feat)
 #define gen_insn_wrrr(mnem, opW1, opR1, opR2, opR3) \
 gen_ ## mnem ## _ ## opW1 ## opR1 ## opR2 ## opR3
 
+/*
+ * Instruction translators
+ */
+#define translate_insn_r(opR1)  \
+translate_insn_r_ ## opR1
+#define translate_insn_rr(opR1, opR2)   \
+translate_insn_rr_ ## opR1 ## opR2
+#define translate_insn_w(opW1)  \
+translate_insn_w_ ## opW1
+#define translate_insn_wr(opW1, opR1)   \
+translate_insn_wr_ ## opW1 ## opR1
+#define translate_insn_wrr(opW1, opR1, opR2)\
+translate_insn_wrr_ ## opW1 ## opR1 ## opR2
+#define translate_insn_wrrr(opW1, opR1, opR2, opR3) \
+translate_insn_wrrr_ ## opW1 ## opR1 ## opR2 ## opR3
+#define translate_group(grpname)\
+translate_group_ ## grpname
+
+static void translate_insn(
+CPUX86State *env, DisasContext *s, int ck_cpuid_feat,
+void (*gen_insn_fp)(CPUX86State *, DisasContext *))
+{
+if (ck_cpuid(env, s, ck_cpuid_feat)) {
+gen_illegal_opcode(s);
+return;
+}
+
+(*gen_insn_fp)(env, s);
+}
+
+#define TRANSLATE_INSN_R(opR1)  \
+static void translate_insn_r(opR1)( \
+CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
+void (*gen_insn_fp)(CPUX86State *, DisasContext *, insnop_t(opR1))) \
+{   \
+insnop_t(opR1) arg1;\
+\
+if (ck_cpuid(env, s, ck_cpuid_feat) \
+|| insnop_init(opR1)(env, s, modrm, )) {   \
+gen_illegal_opcode(s);  \
+return; \
+}   \
+\
+insnop_prepare(opR1)(env, s, modrm, ); \
+(*gen_insn_fp)(env, s, arg1);   \
+}
+
+#define TRANSLATE_INSN_RR(opR1, opR2)   \
+static void translate_insn_rr(opR1, opR2)(  \
+CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
+void (*gen_insn_fp)(CPUX86State *, DisasContext *, insnop_t(opR1), \
+insnop_t(opR2)))\
+{   \
+insnop_t(opR1) arg1;\
+insnop_t(opR2) arg2;\
+\
+if (ck_cpuid(env, s, ck_cpuid_feat) \
+|| insnop_init(opR1)(env, s, modrm, )  \
+|| insnop_init(opR2)(env, s, modrm, )) {   \
+gen_illegal_opcode(s);  \
+return; \
+}   \
+\
+insnop_prepare(opR1)(env, s, modrm, ); \
+insnop_prepare(opR2)(env, s, modrm, ); \
+(*gen_insn_fp)(env, s, arg1, arg2); \
+}
+
+#define TRANSLATE_INSN_W(opW1)  \
+static void translate_insn_w(opW1)( \
+CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
+void (*gen_insn_fp)(CPUX86State *, DisasContext *, insnop_t(opW1))) \
+{   \
+insnop_t(opW1) ret; \
+\
+if (ck_cpuid(env, s, ck_cpuid_feat) \
+|| insnop_init(opW1)(env

[Qemu-devel] [RFC PATCH v2 34/39] target/i386: introduce V*, U*, W* (SSE/AVX) operands

2019-08-09 Thread Jan Bobek
These address the SSE/AVX-technology register file. Offset of the
entire corresponding register is passed as the operand value,
regardless of operand-size suffix.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 45 +
 1 file changed, 45 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index aa6fb8b013..97614e5941 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4839,6 +4839,51 @@ INSNOP_LDST(NqMq, Nq, Mq, offsetof(CPUX86State, mmx_t0),
 INSNOP_LDST_UNIFY(Qd, Nd, NdMd)
 INSNOP_LDST_UNIFY(Qq, Nq, NqMq)
 
+/*
+ * SSE/AVX registers
+ */
+#define INSNOP_INIT_XMM(xmmid_fp)   \
+do {\
+const int xmmid = xmmid_fp(env, s, modrm);  \
+INSNOP_INIT_OK(offsetof(CPUX86State, xmm_regs[xmmid])); \
+} while (0)
+
+#define INSNOP_XMM(opT, init_stmt) \
+INSNOP(opT, uint32_t,  \
+   init_stmt,  \
+   INSNOP_PREPARE_NOOP,\
+   INSNOP_FINALIZE_NOOP)
+
+INSNOP_XMM(V, INSNOP_INIT_XMM(decode_modrm_reg_rexr))
+INSNOP_ALIAS(Vd, V)
+INSNOP_ALIAS(Vq, V)
+INSNOP_ALIAS(Vdq, V)
+INSNOP_ALIAS(Vqq, V)
+
+INSNOP_XMM(U, INSNOP_INIT_DIRECT_ONLY(INSNOP_INIT_XMM(decode_modrm_rm_rexb)))
+INSNOP_ALIAS(Ud, U)
+INSNOP_ALIAS(Uq, U)
+INSNOP_ALIAS(Udq, U)
+INSNOP_ALIAS(Uqq, U)
+
+INSNOP_LDST(UdMd, Ud, Md, offsetof(CPUX86State, xmm_t0),
+(assert(ptr == s->A0),
+ gen_ldd_env_A0(s, reg + offsetof(ZMMReg, ZMM_L(0,
+(assert(ptr == s->A0),
+ gen_std_env_A0(s, reg + offsetof(ZMMReg, ZMM_L(0)
+INSNOP_LDST(UqMq, Uq, Mq, offsetof(CPUX86State, xmm_t0),
+(assert(ptr == s->A0),
+ gen_ldq_env_A0(s, reg + offsetof(ZMMReg, ZMM_Q(0,
+(assert(ptr == s->A0),
+ gen_stq_env_A0(s, reg + offsetof(ZMMReg, ZMM_Q(0)
+INSNOP_LDST(UdqMdq, Udq, Mdq, offsetof(CPUX86State, xmm_t0),
+(assert(ptr == s->A0), gen_ldo_env_A0(s, reg)),
+(assert(ptr == s->A0), gen_sto_env_A0(s, reg)))
+
+INSNOP_LDST_UNIFY(Wd, Ud, UdMd)
+INSNOP_LDST_UNIFY(Wq, Uq, UqMq)
+INSNOP_LDST_UNIFY(Wdq, Udq, UdqMdq)
+
 /*
  * Code generators
  */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 33/39] target/i386: introduce MMX instructions to insn.h

2019-08-09 Thread Jan Bobek
Add all MMX instruction entries to insn.h.

Signed-off-by: Jan Bobek 
---
 target/i386/insn.h | 131 +
 1 file changed, 131 insertions(+)

diff --git a/target/i386/insn.h b/target/i386/insn.h
index 4b48c0c0e1..6506ff3137 100644
--- a/target/i386/insn.h
+++ b/target/i386/insn.h
@@ -66,6 +66,137 @@
 #   define INSN_GRP_END(grpname)
 #endif /* INSN_GRP_END */
 
+/* NP 0F 6E /r: MOVD mm,r/m32 */
+INSN_WR(movd, LEG(NP, 0F, 0), 0x6e, MMX, Pq, Ed)
+/* NP 0F 7E /r: MOVD r/m32,mm */
+INSN_WR(movd, LEG(NP, 0F, 0), 0x7e, MMX, Ed, Pq)
+/* NP REX.W + 0F 6E /r: MOVQ mm,r/m64 */
+INSN_WR(movq, LEG(NP, 0F, 1), 0x6e, MMX, Pq, Eq)
+/* NP REX.W + 0F 7E /r: MOVQ r/m64,mm */
+INSN_WR(movq, LEG(NP, 0F, 1), 0x7e, MMX, Eq, Pq)
+/* NP 0F 6F /r: MOVQ mm, mm/m64 */
+INSN_WR(movq, LEG(NP, 0F, 0), 0x6f, MMX, Pq, Qq)
+/* NP 0F 7F /r: MOVQ mm/m64, mm */
+INSN_WR(movq, LEG(NP, 0F, 0), 0x7f, MMX, Qq, Pq)
+/* NP 0F FC /r: PADDB mm, mm/m64 */
+INSN_WRR(paddb, LEG(NP, 0F, 0), 0xfc, MMX, Pq, Pq, Qq)
+/* NP 0F FD /r: PADDW mm, mm/m64 */
+INSN_WRR(paddw, LEG(NP, 0F, 0), 0xfd, MMX, Pq, Pq, Qq)
+/* NP 0F FE /r: PADDD mm, mm/m64 */
+INSN_WRR(paddd, LEG(NP, 0F, 0), 0xfe, MMX, Pq, Pq, Qq)
+/* NP 0F EC /r: PADDSB mm, mm/m64 */
+INSN_WRR(paddsb, LEG(NP, 0F, 0), 0xec, MMX, Pq, Pq, Qq)
+/* NP 0F ED /r: PADDSW mm, mm/m64 */
+INSN_WRR(paddsw, LEG(NP, 0F, 0), 0xed, MMX, Pq, Pq, Qq)
+/* NP 0F DC /r: PADDUSB mm,mm/m64 */
+INSN_WRR(paddusb, LEG(NP, 0F, 0), 0xdc, MMX, Pq, Pq, Qq)
+/* NP 0F DD /r: PADDUSW mm,mm/m64 */
+INSN_WRR(paddusw, LEG(NP, 0F, 0), 0xdd, MMX, Pq, Pq, Qq)
+/* NP 0F F8 /r: PSUBB mm, mm/m64 */
+INSN_WRR(psubb, LEG(NP, 0F, 0), 0xf8, MMX, Pq, Pq, Qq)
+/* NP 0F F9 /r: PSUBW mm, mm/m64 */
+INSN_WRR(psubw, LEG(NP, 0F, 0), 0xf9, MMX, Pq, Pq, Qq)
+/* NP 0F FA /r: PSUBD mm, mm/m64 */
+INSN_WRR(psubd, LEG(NP, 0F, 0), 0xfa, MMX, Pq, Pq, Qq)
+/* NP 0F E8 /r: PSUBSB mm, mm/m64 */
+INSN_WRR(psubsb, LEG(NP, 0F, 0), 0xe8, MMX, Pq, Pq, Qq)
+/* NP 0F E9 /r: PSUBSW mm, mm/m64 */
+INSN_WRR(psubsw, LEG(NP, 0F, 0), 0xe9, MMX, Pq, Pq, Qq)
+/* NP 0F D8 /r: PSUBUSB mm, mm/m64 */
+INSN_WRR(psubusb, LEG(NP, 0F, 0), 0xd8, MMX, Pq, Pq, Qq)
+/* NP 0F D9 /r: PSUBUSW mm, mm/m64 */
+INSN_WRR(psubusw, LEG(NP, 0F, 0), 0xd9, MMX, Pq, Pq, Qq)
+/* NP 0F D5 /r: PMULLW mm, mm/m64 */
+INSN_WRR(pmullw, LEG(NP, 0F, 0), 0xd5, MMX, Pq, Pq, Qq)
+/* NP 0F E5 /r: PMULHW mm, mm/m64 */
+INSN_WRR(pmulhw, LEG(NP, 0F, 0), 0xe5, MMX, Pq, Pq, Qq)
+/* NP 0F F5 /r: PMADDWD mm, mm/m64 */
+INSN_WRR(pmaddwd, LEG(NP, 0F, 0), 0xf5, MMX, Pq, Pq, Qq)
+/* NP 0F 74 /r: PCMPEQB mm,mm/m64 */
+INSN_WRR(pcmpeqb, LEG(NP, 0F, 0), 0x74, MMX, Pq, Pq, Qq)
+/* NP 0F 75 /r: PCMPEQW mm,mm/m64 */
+INSN_WRR(pcmpeqw, LEG(NP, 0F, 0), 0x75, MMX, Pq, Pq, Qq)
+/* NP 0F 76 /r: PCMPEQD mm,mm/m64 */
+INSN_WRR(pcmpeqd, LEG(NP, 0F, 0), 0x76, MMX, Pq, Pq, Qq)
+/* NP 0F 64 /r: PCMPGTB mm,mm/m64 */
+INSN_WRR(pcmpgtb, LEG(NP, 0F, 0), 0x64, MMX, Pq, Pq, Qq)
+/* NP 0F 65 /r: PCMPGTW mm,mm/m64 */
+INSN_WRR(pcmpgtw, LEG(NP, 0F, 0), 0x65, MMX, Pq, Pq, Qq)
+/* NP 0F 66 /r: PCMPGTD mm,mm/m64 */
+INSN_WRR(pcmpgtd, LEG(NP, 0F, 0), 0x66, MMX, Pq, Pq, Qq)
+/* NP 0F DB /r: PAND mm, mm/m64 */
+INSN_WRR(pand, LEG(NP, 0F, 0), 0xdb, MMX, Pq, Pq, Qq)
+/* NP 0F DF /r: PANDN mm, mm/m64 */
+INSN_WRR(pandn, LEG(NP, 0F, 0), 0xdf, MMX, Pq, Pq, Qq)
+/* NP 0F EB /r: POR mm, mm/m64 */
+INSN_WRR(por, LEG(NP, 0F, 0), 0xeb, MMX, Pq, Pq, Qq)
+/* NP 0F EF /r: PXOR mm, mm/m64 */
+INSN_WRR(pxor, LEG(NP, 0F, 0), 0xef, MMX, Pq, Pq, Qq)
+/* NP 0F F1 /r: PSLLW mm, mm/m64 */
+INSN_WRR(psllw, LEG(NP, 0F, 0), 0xf1, MMX, Pq, Pq, Qq)
+/* NP 0F F2 /r: PSLLD mm, mm/m64 */
+INSN_WRR(pslld, LEG(NP, 0F, 0), 0xf2, MMX, Pq, Pq, Qq)
+/* NP 0F F3 /r: PSLLQ mm, mm/m64 */
+INSN_WRR(psllq, LEG(NP, 0F, 0), 0xf3, MMX, Pq, Pq, Qq)
+/* NP 0F D1 /r: PSRLW mm, mm/m64 */
+INSN_WRR(psrlw, LEG(NP, 0F, 0), 0xd1, MMX, Pq, Pq, Qq)
+/* NP 0F D2 /r: PSRLD mm, mm/m64 */
+INSN_WRR(psrld, LEG(NP, 0F, 0), 0xd2, MMX, Pq, Pq, Qq)
+/* NP 0F D3 /r: PSRLQ mm, mm/m64 */
+INSN_WRR(psrlq, LEG(NP, 0F, 0), 0xd3, MMX, Pq, Pq, Qq)
+/* NP 0F E1 /r: PSRAW mm,mm/m64 */
+INSN_WRR(psraw, LEG(NP, 0F, 0), 0xe1, MMX, Pq, Pq, Qq)
+/* NP 0F E2 /r: PSRAD mm,mm/m64 */
+INSN_WRR(psrad, LEG(NP, 0F, 0), 0xe2, MMX, Pq, Pq, Qq)
+/* NP 0F 63 /r: PACKSSWB mm1, mm2/m64 */
+INSN_WRR(packsswb, LEG(NP, 0F, 0), 0x63, MMX, Pq, Pq, Qq)
+/* NP 0F 6B /r: PACKSSDW mm1, mm2/m64 */
+INSN_WRR(packssdw, LEG(NP, 0F, 0), 0x6b, MMX, Pq, Pq, Qq)
+/* NP 0F 67 /r: PACKUSWB mm, mm/m64 */
+INSN_WRR(packuswb, LEG(NP, 0F, 0), 0x67, MMX, Pq, Pq, Qq)
+/* NP 0F 68 /r: PUNPCKHBW mm, mm/m64 */
+INSN_WRR(punpckhbw, LEG(NP, 0F, 0), 0x68, MMX, Pq, Pq, Qq)
+/* NP 0F 69 /r: PUNPCKHWD mm, mm/m64 */
+INSN_WRR(punpckhwd, LEG(NP, 0F, 0), 0x69, MMX, Pq, Pq, Qq)
+/* NP 0F 6A /r: PUNPCKHDQ mm, mm/m64 */
+INSN_WRR(punpckhdq, LEG(NP, 0F, 0), 0x6a, MMX, Pq, Pq, Qq)
+/* NP 0F 60 /r: PUNPCKLBW mm, mm/m32 */
+INSN_WRR(punpcklbw, LEG(NP, 0F, 0), 0x60, MMX, Pq, Pq, Qd)
+/* NP 0F 61 /r: PUNPCKLWD mm, mm/m32 */
+INSN_WRR(punpcklwd, LEG(NP, 0F

[Qemu-devel] [RFC PATCH v2 24/39] target/i386: introduce Ib (immediate) operand

2019-08-09 Thread Jan Bobek
Introduce the immediate-byte operand, which loads a byte from the
instruction stream and passes its value as the operand.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 0da064d5fd..b8e6eaebb4 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4697,6 +4697,14 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
int ck_cpuid_feat)
 insnop_finalize(opTrm)(env, s, modrm, ); \
 } while (0))
 
+/*
+ * Immediate operand
+ */
+INSNOP(Ib, int8_t,  \
+   INSNOP_INIT_OK(*op), \
+   (*op = x86_ldub_code(env, s)),   \
+   INSNOP_FINALIZE_INVALID)
+
 /*
  * Code generators
  */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 36/39] target/i386: introduce SSE translators

2019-08-09 Thread Jan Bobek
Use the translator macros to define translators required by SSE
instructions.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 5802b324f0..12d2ac2eb5 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -5110,6 +5110,9 @@ static void translate_insn(
 (*gen_insn_fp)(env, s, arg1);   \
 }
 
+TRANSLATE_INSN_R(Mb)
+TRANSLATE_INSN_R(Md)
+
 #define TRANSLATE_INSN_RR(opR1, opR2)   \
 static void translate_insn_rr(opR1, opR2)(  \
 CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
@@ -5131,6 +5134,13 @@ static void translate_insn(
 (*gen_insn_fp)(env, s, arg1, arg2); \
 }
 
+TRANSLATE_INSN_RR(Pq, Nq)
+TRANSLATE_INSN_RR(Mq, Pq)
+TRANSLATE_INSN_RR(Vd, Wd)
+TRANSLATE_INSN_RR(Mq, Vq)
+TRANSLATE_INSN_RR(Mq, Vdq)
+TRANSLATE_INSN_RR(Mdq, Vdq)
+
 #define TRANSLATE_INSN_W(opW1)  \
 static void translate_insn_w(opW1)( \
 CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
@@ -5178,6 +5188,20 @@ TRANSLATE_INSN_WR(Qq, Pq)
 TRANSLATE_INSN_WR(Gd, Nq)
 TRANSLATE_INSN_WR(Gq, Nq)
 
+TRANSLATE_INSN_WR(Vd, Wd)
+TRANSLATE_INSN_WR(Vdq, Wdq)
+TRANSLATE_INSN_WR(Vq, UdqMq)
+TRANSLATE_INSN_WR(Wd, Vd)
+TRANSLATE_INSN_WR(Wdq, Vdq)
+TRANSLATE_INSN_WR(Gd, Udq)
+TRANSLATE_INSN_WR(Gq, Udq)
+TRANSLATE_INSN_WR(Vdq, Qq)
+TRANSLATE_INSN_WR(Vd, Ed)
+TRANSLATE_INSN_WR(Vd, Eq)
+TRANSLATE_INSN_WR(Pq, Wq)
+TRANSLATE_INSN_WR(Gd, Wd)
+TRANSLATE_INSN_WR(Gq, Wd)
+
 #define TRANSLATE_INSN_WRR(opW1, opR1, opR2)\
 static void translate_insn_wrr(opW1, opR1, opR2)(   \
 CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
@@ -5209,6 +5233,11 @@ TRANSLATE_INSN_WRR(Gd, Nq, Ib)
 TRANSLATE_INSN_WRR(Gq, Nq, Ib)
 TRANSLATE_INSN_WRR(Nq, Nq, Ib)
 
+TRANSLATE_INSN_WRR(Vd, Vd, Wd)
+TRANSLATE_INSN_WRR(Vdq, Vdq, Wdq)
+TRANSLATE_INSN_WRR(Vdq, Vq, UqMq)
+TRANSLATE_INSN_WRR(Vdq, Vdq, UdMd)
+
 #define TRANSLATE_INSN_WRRR(opW1, opR1, opR2, opR3) \
 static void translate_insn_wrrr(opW1, opR1, opR2, opR3)(\
 CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
@@ -5236,6 +5265,10 @@ TRANSLATE_INSN_WRR(Nq, Nq, Ib)
 insnop_finalize(opW1)(env, s, modrm, ); \
 }
 
+TRANSLATE_INSN_WRRR(Vd, Vd, Wd, Ib)
+TRANSLATE_INSN_WRRR(Vdq, Vdq, Wdq, Ib)
+TRANSLATE_INSN_WRRR(Pq, Pq, RdMw, Ib)
+
 #define INSN_GRP_BEGIN(grpname) \
 static void translate_group(grpname)(   \
 CPUX86State *env, DisasContext *s, int modrm)   \
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 22/39] target/i386: introduce code generators

2019-08-09 Thread Jan Bobek
In this context, "code generators" are functions that receive decoded
instruction operands and emit TCG ops implementing the correct
instruction functionality. Introduce the naming macros first, actual
generator macros will be added later.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index ebb68fef0b..30180d1c25 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4697,6 +4697,24 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
int ck_cpuid_feat)
 insnop_finalize(opTrm)(env, s, modrm, ); \
 } while (0))
 
+/*
+ * Code generators
+ */
+#define gen_insn(mnem)  \
+gen_ ## mnem
+#define gen_insn_r(mnem, opR1)  \
+gen_ ## mnem ## _ ## opR1
+#define gen_insn_rr(mnem, opR1, opR2)   \
+gen_ ## mnem ## _ ## opR1 ## opR2
+#define gen_insn_w(mnem, opW1)  \
+gen_ ## mnem ## _ ## opW1
+#define gen_insn_wr(mnem, opW1, opR1)   \
+gen_ ## mnem ## _ ## opW1 ## opR1
+#define gen_insn_wrr(mnem, opW1, opR1, opR2)\
+gen_ ## mnem ## _ ## opW1 ## opR1 ## opR2
+#define gen_insn_wrrr(mnem, opW1, opR1, opR2, opR3) \
+gen_ ## mnem ## _ ## opW1 ## opR1 ## opR2 ## opR3
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 31/39] target/i386: introduce MMX translators

2019-08-09 Thread Jan Bobek
Use the translator macros to define instruction translators required
by MMX instructions.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 36f2579654..3475727380 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -5005,6 +5005,15 @@ static void translate_insn(
 insnop_finalize(opW1)(env, s, modrm, ); \
 }
 
+TRANSLATE_INSN_WR(Pq, Ed)
+TRANSLATE_INSN_WR(Pq, Eq)
+TRANSLATE_INSN_WR(Ed, Pq)
+TRANSLATE_INSN_WR(Eq, Pq)
+TRANSLATE_INSN_WR(Pq, Qq)
+TRANSLATE_INSN_WR(Qq, Pq)
+TRANSLATE_INSN_WR(Gd, Nq)
+TRANSLATE_INSN_WR(Gq, Nq)
+
 #define TRANSLATE_INSN_WRR(opW1, opR1, opR2)\
 static void translate_insn_wrr(opW1, opR1, opR2)(   \
 CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
@@ -5029,6 +5038,13 @@ static void translate_insn(
 insnop_finalize(opW1)(env, s, modrm, ); \
 }
 
+TRANSLATE_INSN_WRR(Pq, Pq, Qd)
+TRANSLATE_INSN_WRR(Pq, Pq, Qq)
+TRANSLATE_INSN_WRR(Pq, Qq, Ib)
+TRANSLATE_INSN_WRR(Gd, Nq, Ib)
+TRANSLATE_INSN_WRR(Gq, Nq, Ib)
+TRANSLATE_INSN_WRR(Nq, Nq, Ib)
+
 #define TRANSLATE_INSN_WRRR(opW1, opR1, opR2, opR3) \
 static void translate_insn_wrrr(opW1, opR1, opR2, opR3)(\
 CPUX86State *env, DisasContext *s, int modrm, int ck_cpuid_feat, \
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 28/39] target/i386: introduce P*, N*, Q* (MMX) operands

2019-08-09 Thread Jan Bobek
These address the MMX-technology register file; the corresponding
cpu_env offset is passed as the operand value. Notably, offset of the
entire register is pased at all times, regardless of the operand-size
suffix.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 9896f1c99e..19b92d61f6 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4801,6 +4801,43 @@ INSNOP_LDST(RqMq, Rq, Mq, NULL,
 INSNOP_LDST_UNIFY(Ed, Rd, RdMd)
 INSNOP_LDST_UNIFY(Eq, Rq, RqMq)
 
+/*
+ * MMX registers
+ */
+#define INSNOP_INIT_MM(mmid_fp) \
+do {\
+const int mmid = mmid_fp(env, s, modrm);\
+INSNOP_INIT_OK(offsetof(CPUX86State, fpregs[mmid].mmx));\
+} while (0)
+
+#define INSNOP_MM(opT, init_stmt)  \
+INSNOP(opT, uint32_t,  \
+   init_stmt,  \
+   INSNOP_PREPARE_NOOP,\
+   INSNOP_FINALIZE_NOOP)
+
+INSNOP_MM(P, INSNOP_INIT_MM(decode_modrm_reg_norexr))
+INSNOP_ALIAS(Pd, P)
+INSNOP_ALIAS(Pq, P)
+
+INSNOP_MM(N, INSNOP_INIT_DIRECT_ONLY(INSNOP_INIT_MM(decode_modrm_rm_norexb)))
+INSNOP_ALIAS(Nd, N)
+INSNOP_ALIAS(Nq, N)
+
+INSNOP_LDST(NdMd, Nd, Md, offsetof(CPUX86State, mmx_t0),
+(assert(ptr == s->A0),
+ gen_ldd_env_A0(s, reg + offsetof(MMXReg, MMX_L(0,
+(assert(ptr == s->A0),
+ gen_std_env_A0(s, reg + offsetof(MMXReg, MMX_L(0)
+INSNOP_LDST(NqMq, Nq, Mq, offsetof(CPUX86State, mmx_t0),
+(assert(ptr == s->A0),
+ gen_ldq_env_A0(s, reg + offsetof(MMXReg, MMX_Q(0,
+(assert(ptr == s->A0),
+ gen_stq_env_A0(s, reg + offsetof(MMXReg, MMX_Q(0)
+
+INSNOP_LDST_UNIFY(Qd, Nd, NdMd)
+INSNOP_LDST_UNIFY(Qq, Nq, NqMq)
+
 /*
  * Code generators
  */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 27/39] target/i386: introduce RdMw operand

2019-08-09 Thread Jan Bobek
The PINSRW family of instructions have a peculiar second operand:
32-bit general-purpose register file is addressed, but if the operand
is indirect, only 16 bits are loaded from memory. Reflect this by the
RdMw operand.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 0e57d5f049..9896f1c99e 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4777,6 +4777,9 @@ INSNOP(Rq, TCGv_i64, INSNOP_INIT_FAIL,
 #endif /* !TARGET_X86_64 */
 
 #ifdef TARGET_X86_64
+INSNOP_LDST(RdMw, Rd, Mw, s->tmp3_i32,
+tcg_gen_qemu_ld_i32(reg, ptr, s->mem_index, MO_LEUW),
+tcg_gen_qemu_st_i32(reg, ptr, s->mem_index, MO_LEUW))
 INSNOP_LDST(RdMd, Rd, Md, s->tmp3_i32,
 tcg_gen_qemu_ld_i32(reg, ptr, s->mem_index, MO_LEUL),
 tcg_gen_qemu_st_i32(reg, ptr, s->mem_index, MO_LEUL))
@@ -4784,6 +4787,9 @@ INSNOP_LDST(RqMq, Rq, Mq, s->T0,
 tcg_gen_qemu_ld_i64(reg, ptr, s->mem_index, MO_LEQ),
 tcg_gen_qemu_st_i64(reg, ptr, s->mem_index, MO_LEQ))
 #else /* !TARGET_X86_64 */
+INSNOP_LDST(RdMw, Rd, Md, s->T0,
+tcg_gen_qemu_ld_i32(reg, ptr, s->mem_index, MO_LEUW),
+tcg_gen_qemu_st_i32(reg, ptr, s->mem_index, MO_LEUW))
 INSNOP_LDST(RdMd, Rd, Md, s->T0,
 tcg_gen_qemu_ld_i32(reg, ptr, s->mem_index, MO_LEUL),
 tcg_gen_qemu_st_i32(reg, ptr, s->mem_index, MO_LEUL))
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 29/39] target/i386: introduce helper-based code generator macros

2019-08-09 Thread Jan Bobek
Code generators defined using these macros rely on a helper function
(as emitted by gen_helper_*).

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 19b92d61f6..d721bb5142 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4856,6 +4856,32 @@ INSNOP_LDST_UNIFY(Qq, Nq, NqMq)
 #define gen_insn_wrrr(mnem, opW1, opR1, opR2, opR3) \
 gen_ ## mnem ## _ ## opW1 ## opR1 ## opR2 ## opR3
 
+#define GEN_INSN_HELPER(mnem, helper)   \
+static void gen_insn(mnem)( \
+CPUX86State *env, DisasContext *s)  \
+{   \
+gen_helper_ ## helper(cpu_env); \
+}
+#define GEN_INSN_WR_HELPER(mnem, helper, opW1, opR1)\
+static void gen_insn_wr(mnem, opW1, opR1)(  \
+CPUX86State *env, DisasContext *s, insnop_t(opW1) ret,  \
+insnop_t(opR1) arg1)\
+{   \
+tcg_gen_addi_ptr(s->ptr0, cpu_env, ret);\
+tcg_gen_addi_ptr(s->ptr1, cpu_env, arg1);   \
+gen_helper_ ## helper(cpu_env, s->ptr0, s->ptr1);   \
+}
+#define GEN_INSN_WRR_HELPER(mnem, helper, opW1, opR1, opR2) \
+static void gen_insn_wrr(mnem, opW1, opR1, opR2)(   \
+CPUX86State *env, DisasContext *s, insnop_t(opW1) ret,  \
+insnop_t(opR1) arg1, insnop_t(opR2) arg2)   \
+{   \
+assert(ret == arg1);\
+tcg_gen_addi_ptr(s->ptr0, cpu_env, ret);\
+tcg_gen_addi_ptr(s->ptr1, cpu_env, arg2);   \
+gen_helper_ ## helper(cpu_env, s->ptr0, s->ptr1);   \
+}
+
 /*
  * Instruction translators
  */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 35/39] target/i386: introduce UdqMq operand

2019-08-09 Thread Jan Bobek
The MOVHLPS instruction has a special operand: it reads the high
quadword of the source operand (hence it requires the full
double-quadword width), but if the operand is indirect, only 64-bits
are read from memory. Introduce UdqMq operand to address this case.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 97614e5941..5802b324f0 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4876,6 +4876,11 @@ INSNOP_LDST(UqMq, Uq, Mq, offsetof(CPUX86State, xmm_t0),
  gen_ldq_env_A0(s, reg + offsetof(ZMMReg, ZMM_Q(0,
 (assert(ptr == s->A0),
  gen_stq_env_A0(s, reg + offsetof(ZMMReg, ZMM_Q(0)
+INSNOP_LDST(UdqMq, Udq, Mq, offsetof(CPUX86State, xmm_t0),
+(assert(ptr == s->A0),
+ gen_ldq_env_A0(s, reg + offsetof(ZMMReg, ZMM_Q(0,
+(assert(ptr == s->A0),
+ gen_stq_env_A0(s, reg + offsetof(ZMMReg, ZMM_Q(0)
 INSNOP_LDST(UdqMdq, Udq, Mdq, offsetof(CPUX86State, xmm_t0),
 (assert(ptr == s->A0), gen_ldo_env_A0(s, reg)),
 (assert(ptr == s->A0), gen_sto_env_A0(s, reg)))
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 20/39] target/i386: introduce generic load-store operand

2019-08-09 Thread Jan Bobek
This operand attempts to capture the "indirect" or "memory" operand in
a generic way. It significatly reduces the amount code that needs to
be written in order to read operands from memory to temporary storage
and write them back.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 78 +
 1 file changed, 78 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index cd2467e6a5..ebb68fef0b 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4619,6 +4619,84 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
int ck_cpuid_feat)
insnop_prepare(opT2)(env, s, modrm, op),   \
insnop_finalize(opT2)(env, s, modrm, op))
 
+/*
+ * "Load-store" operand helper
+ */
+#define INSNOP_LDST(opT, opTr, opTm, scratch_op, ld_stmt, st_stmt)  \
+INSNOP( \
+opT,\
+struct {\
+bool is_mem;\
+insnop_t(opTr) op_reg;  \
+},  \
+do {\
+insnop_t(opTr) reg; \
+insnop_t(opTm) ptr; \
+if (!insnop_init(opTr)(env, s, modrm, )) {  \
+op->is_mem = 0; \
+op->op_reg = reg;   \
+INSNOP_INIT_OK(*op);\
+} else if (!insnop_init(opTm)(env, s, modrm, )) {   \
+op->is_mem = 1; \
+op->op_reg = (scratch_op);  \
+INSNOP_INIT_OK(*op);\
+}   \
+INSNOP_INIT_FAIL;   \
+} while (0),\
+do {\
+insnop_t(opTr) reg = op->op_reg;\
+if (op->is_mem) {   \
+insnop_t(opTm) ptr; \
+const int ret = insnop_init(opTm)(env, s, modrm, ); \
+assert(!ret);   \
+\
+insnop_prepare(opTm)(env, s, modrm, );  \
+ld_stmt;\
+} else {\
+insnop_prepare(opTr)(env, s, modrm, );  \
+}   \
+} while (0),\
+do {\
+insnop_t(opTr) reg = op->op_reg;\
+if (op->is_mem) {   \
+insnop_t(opTm) ptr; \
+const int ret = insnop_init(opTm)(env, s, modrm, ); \
+assert(!ret);   \
+\
+insnop_prepare(opTm)(env, s, modrm, );  \
+st_stmt;\
+} else {\
+insnop_finalize(opTr)(env, s, modrm, ); \
+}   \
+} while (0))
+
+#define INSNOP_LDST_UNIFY(opT, opTr, opTrm) \
+INSNOP( \
+opT, insnop_t(opTr),\
+do {\
+insnop_t(opTrm) rm; \
+if (!insnop_init(opTrm)(env, s, modrm, )) {  \
+INSNOP_INIT_OK(rm.op_reg);  \
+}   \
+INSNOP_INIT_FAIL;   \
+} while (0),   

[Qemu-devel] [RFC PATCH v2 26/39] target/i386: introduce G*, R*, E* (general register) operands

2019-08-09 Thread Jan Bobek
These address the general-purpose register file. The corresponding
32-bit or 64-bit register is passed as the operand value.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 65 +
 1 file changed, 65 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 301dc4eddf..0e57d5f049 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4730,6 +4730,71 @@ INSNOP_ALIAS(Md, M)
 INSNOP_ALIAS(Mdq, M)
 INSNOP_ALIAS(Mqq, M)
 
+/*
+ * General registers
+ */
+#define INSNOP_R32(opT, regid_fp, init_stmt)\
+INSNOP( \
+opT, TCGv_i32, init_stmt,   \
+do {\
+const int regid = regid_fp(env, s, modrm);  \
+tcg_gen_trunc_tl_i32(*op, cpu_regs[regid]); \
+} while (0),\
+do {\
+const int regid = regid_fp(env, s, modrm);  \
+tcg_gen_extu_i32_tl(cpu_regs[regid], *op);  \
+} while (0))
+
+#define INSNOP_R64(opT, regid_fp, init_stmt)\
+INSNOP( \
+opT, TCGv_i64, init_stmt,   \
+do {\
+const int regid = regid_fp(env, s, modrm);  \
+tcg_gen_mov_i64(*op, cpu_regs[regid]);  \
+} while (0),\
+do {\
+const int regid = regid_fp(env, s, modrm);  \
+tcg_gen_mov_i64(cpu_regs[regid], *op);  \
+} while (0))
+
+#ifdef TARGET_X86_64
+INSNOP_R32(Gd, decode_modrm_reg_rexr, INSNOP_INIT_OK(s->tmp2_i32))
+INSNOP_R64(Gq, decode_modrm_reg_rexr, INSNOP_INIT_OK(s->T1))
+
+INSNOP_R32(Rd, decode_modrm_rm_rexb,
+   INSNOP_INIT_DIRECT_ONLY(INSNOP_INIT_OK(s->tmp3_i32)))
+INSNOP_R64(Rq, decode_modrm_rm_rexb,
+   INSNOP_INIT_DIRECT_ONLY(INSNOP_INIT_OK(s->T0)))
+#else /* !TARGET_X86_64 */
+INSNOP_R32(Gd, decode_modrm_reg_rexr, INSNOP_INIT_OK(s->T1))
+INSNOP(Gq, TCGv_i64, INSNOP_INIT_FAIL,
+   INSNOP_PREPARE_INVALID, INSNOP_FINALIZE_INVALID)
+
+INSNOP_R32(Rd, decode_modrm_rm_rexb,
+   INSNOP_INIT_DIRECT_ONLY(INSNOP_INIT_OK(s->T0)))
+INSNOP(Rq, TCGv_i64, INSNOP_INIT_FAIL,
+   INSNOP_PREPARE_INVALID, INSNOP_FINALIZE_INVALID)
+#endif /* !TARGET_X86_64 */
+
+#ifdef TARGET_X86_64
+INSNOP_LDST(RdMd, Rd, Md, s->tmp3_i32,
+tcg_gen_qemu_ld_i32(reg, ptr, s->mem_index, MO_LEUL),
+tcg_gen_qemu_st_i32(reg, ptr, s->mem_index, MO_LEUL))
+INSNOP_LDST(RqMq, Rq, Mq, s->T0,
+tcg_gen_qemu_ld_i64(reg, ptr, s->mem_index, MO_LEQ),
+tcg_gen_qemu_st_i64(reg, ptr, s->mem_index, MO_LEQ))
+#else /* !TARGET_X86_64 */
+INSNOP_LDST(RdMd, Rd, Md, s->T0,
+tcg_gen_qemu_ld_i32(reg, ptr, s->mem_index, MO_LEUL),
+tcg_gen_qemu_st_i32(reg, ptr, s->mem_index, MO_LEUL))
+INSNOP_LDST(RqMq, Rq, Mq, NULL,
+INSNOP_PREPARE_INVALID,
+INSNOP_FINALIZE_INVALID)
+#endif /* !TARGET_X86_64 */
+
+INSNOP_LDST_UNIFY(Ed, Rd, RdMd)
+INSNOP_LDST_UNIFY(Eq, Rq, RqMq)
+
 /*
  * Code generators
  */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 16/39] target/i386: introduce instruction operand infrastructure

2019-08-09 Thread Jan Bobek
insnop_t and the init, prepare and finalize functions form the basis
of instruction operand decoding. Introduce macros for defining a
generic instruction operand; use cases for operand decoding will be
introduced later with instruction translators.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 508d584584..109e4922eb 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4545,6 +4545,47 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
int ck_cpuid_feat)
 }
 }
 
+/*
+ * Core instruction operand infrastructure
+ */
+#define insnop_t(opT)insnop_ ## opT ## _t
+#define insnop_init(opT) insnop_ ## opT ## _init
+#define insnop_prepare(opT)  insnop_ ## opT ## _prepare
+#define insnop_finalize(opT) insnop_ ## opT ## _finalize
+
+#define TYPEDEF_INSNOP_T(opT, type) \
+typedef type insnop_t(opT);
+#define INSNOP_INIT(opT, init_stmt)\
+static int insnop_init(opT)(CPUX86State *env, DisasContext *s, \
+int modrm, insnop_t(opT) *op)  \
+{  \
+init_stmt; \
+}
+#define INSNOP_PREPARE(opT, prepare_stmt)   \
+static void insnop_prepare(opT)(CPUX86State *env, DisasContext *s,  \
+int modrm, insnop_t(opT) *op)   \
+{   \
+prepare_stmt;   \
+}
+#define INSNOP_FINALIZE(opT, finalize_stmt) \
+static void insnop_finalize(opT)(CPUX86State *env, DisasContext *s, \
+ int modrm, insnop_t(opT) *op)  \
+{   \
+finalize_stmt;  \
+}
+#define INSNOP(opT, type, init_stmt, prepare_stmt, finalize_stmt)   \
+TYPEDEF_INSNOP_T(opT, type) \
+INSNOP_INIT(opT, init_stmt) \
+INSNOP_PREPARE(opT, prepare_stmt)   \
+INSNOP_FINALIZE(opT, finalize_stmt)
+
+#define INSNOP_INIT_FAILreturn 1
+#define INSNOP_INIT_OK(x)   return ((*(op) = (x)), 0)
+#define INSNOP_PREPARE_NOOP /* no-op */
+#define INSNOP_PREPARE_INVALID  g_assert_not_reached()
+#define INSNOP_FINALIZE_NOOP/* no-op */
+#define INSNOP_FINALIZE_INVALID g_assert_not_reached()
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 32/39] target/i386: introduce MMX code generators

2019-08-09 Thread Jan Bobek
Define code generators required for MMX instructions.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 114 
 1 file changed, 114 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 3475727380..aa6fb8b013 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4890,6 +4890,9 @@ INSNOP_LDST_UNIFY(Qq, Nq, NqMq)
 {   \
 tcg_gen_gvec_ ## gvec(vece, ret, arg1, oprsz, maxsz);   \
 }
+#define GEN_INSN_WR_GVEC_MM(mnem, gvec, opW1, opR1, vece)   \
+GEN_INSN_WR_GVEC(mnem, gvec, opW1, opR1, vece,  \
+ sizeof(MMXReg), sizeof(MMXReg))
 
 #define GEN_INSN_WRR_GVEC(mnem, gvec, opW1, opR1, opR2, vece, oprsz, maxsz) \
 static void gen_insn_wrr(mnem, opW1, opR1, opR2)(   \
@@ -4898,6 +4901,117 @@ INSNOP_LDST_UNIFY(Qq, Nq, NqMq)
 {   \
 tcg_gen_gvec_ ## gvec(vece, ret, arg1, arg2, oprsz, maxsz); \
 }
+#define GEN_INSN_WRR_GVEC_MM(mnem, gvec, opW1, opR1, opR2, vece)\
+GEN_INSN_WRR_GVEC(mnem, gvec, opW1, opR1, opR2, vece,   \
+  sizeof(MMXReg), sizeof(MMXReg))
+
+static void gen_insn_wr(movq, Eq, Pq)(CPUX86State *env, DisasContext *s,
+  insnop_t(Eq) ret, insnop_t(Pq) arg1)
+{
+const size_t ofs = offsetof(MMXReg, MMX_Q(0));
+tcg_gen_ld_i64(ret, cpu_env, arg1 + ofs);
+}
+
+static void gen_insn_wr(movd, Ed, Pq)(CPUX86State *env, DisasContext *s,
+  insnop_t(Ed) ret, insnop_t(Pq) arg1)
+{
+const size_t ofs = offsetof(MMXReg, MMX_L(0));
+tcg_gen_ld_i32(ret, cpu_env, arg1 + ofs);
+}
+
+static void gen_insn_wr(movq, Pq, Eq)(CPUX86State *env, DisasContext *s,
+  insnop_t(Pq) ret, insnop_t(Eq) arg1)
+{
+const size_t ofs = offsetof(MMXReg, MMX_Q(0));
+tcg_gen_st_i64(arg1, cpu_env, ret + ofs);
+}
+
+static void gen_insn_wr(movd, Pq, Ed)(CPUX86State *env, DisasContext *s,
+  insnop_t(Pq) ret, insnop_t(Ed) arg1)
+{
+const insnop_t(Eq) r64 = s->tmp1_i64;
+tcg_gen_extu_i32_i64(r64, arg1);
+gen_insn_wr(movq, Pq, Eq)(env, s, ret, r64);
+}
+
+GEN_INSN_WR_GVEC_MM(movq, mov, Pq, Qq, MO_64)
+GEN_INSN_WR_GVEC_MM(movq, mov, Qq, Pq, MO_64)
+
+GEN_INSN_WRR_GVEC_MM(paddb, add, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(paddw, add, Pq, Pq, Qq, MO_16)
+GEN_INSN_WRR_GVEC_MM(paddd, add, Pq, Pq, Qq, MO_32)
+GEN_INSN_WRR_GVEC_MM(paddsb, ssadd, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(paddsw, ssadd, Pq, Pq, Qq, MO_16)
+GEN_INSN_WRR_GVEC_MM(paddusb, usadd, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(paddusw, usadd, Pq, Pq, Qq, MO_16)
+
+GEN_INSN_WRR_GVEC_MM(psubb, sub, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(psubw, sub, Pq, Pq, Qq, MO_16)
+GEN_INSN_WRR_GVEC_MM(psubd, sub, Pq, Pq, Qq, MO_32)
+GEN_INSN_WRR_GVEC_MM(psubsb, sssub, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(psubsw, sssub, Pq, Pq, Qq, MO_16)
+GEN_INSN_WRR_GVEC_MM(psubusb, ussub, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(psubusw, ussub, Pq, Pq, Qq, MO_16)
+
+GEN_INSN_WRR_HELPER(pmulhw, pmulhw_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(pmullw, pmullw_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(pmaddwd, pmaddwd_mmx, Pq, Pq, Qq)
+
+GEN_INSN_WRR_GVEC_MM(pcmpeqb, cmpeq, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(pcmpeqw, cmpeq, Pq, Pq, Qq, MO_16)
+GEN_INSN_WRR_GVEC_MM(pcmpeqd, cmpeq, Pq, Pq, Qq, MO_32)
+GEN_INSN_WRR_GVEC_MM(pcmpgtb, cmpgt, Pq, Pq, Qq, MO_8)
+GEN_INSN_WRR_GVEC_MM(pcmpgtw, cmpgt, Pq, Pq, Qq, MO_16)
+GEN_INSN_WRR_GVEC_MM(pcmpgtd, cmpgt, Pq, Pq, Qq, MO_32)
+
+GEN_INSN_WRR_GVEC_MM(pand, and, Pq, Pq, Qq, MO_64)
+GEN_INSN_WRR_GVEC_MM(pandn, andn, Pq, Pq, Qq, MO_64)
+GEN_INSN_WRR_GVEC_MM(por, or, Pq, Pq, Qq, MO_64)
+GEN_INSN_WRR_GVEC_MM(pxor, xor, Pq, Pq, Qq, MO_64)
+
+GEN_INSN_WRR_HELPER(psllw, psllw_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(pslld, pslld_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(psllq, psllq_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(psrlw, psrlw_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(psrld, psrld_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(psrlq, psrlq_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(psraw, psraw_mmx, Pq, Pq, Qq)
+GEN_INSN_WRR_HELPER(psrad, psrad_mmx, Pq, Pq, Qq)
+
+#define GEN_PSHIFT_IMM_MM(mnem, opW1, opR1) \
+static void gen_insn_wrr(mnem, opW1, opR1, Ib)( \
+CPUX86State *env, DisasContext *s,  \
+insnop_t(opW1) ret, insnop_t(opR1) arg1, insnop_t(Ib) arg2) \
+{   \
+const uint64_t arg2_ui64 = (uint8_t)arg2;   \
+const insnop_t(Eq) arg2_r64 = s->tmp1_i64;  \
+const insnop_t(Qq) arg2_mm = offsetof(CPUX86State, mmx_t0.M

[Qemu-devel] [RFC PATCH v2 21/39] target/i386: introduce insn.h

2019-08-09 Thread Jan Bobek
This header is intended to eventually list all supported instructions
along with some useful details (e.g. mnemonics, opcode, operands etc.)
It shall be used (along with some preprocessor magic) anytime we need
to automatically generate code for every instruction.

Signed-off-by: Jan Bobek 
---
 target/i386/insn.h | 87 ++
 1 file changed, 87 insertions(+)
 create mode 100644 target/i386/insn.h

diff --git a/target/i386/insn.h b/target/i386/insn.h
new file mode 100644
index 00..4b48c0c0e1
--- /dev/null
+++ b/target/i386/insn.h
@@ -0,0 +1,87 @@
+#ifndef INSN
+#   define INSN(mnem, prefix, opcode, feat)
+#endif /* INSN */
+
+#ifndef INSN_R
+#   define INSN_R(mnem, prefix, opcode, feat, opR1)
+#endif /* INSN_R */
+
+#ifndef INSN_RR
+#   define INSN_RR(mnem, prefix, opcode, feat, opR1, opR2)
+#endif /* INSN_RR */
+
+#ifndef INSN_W
+#   define INSN_W(mnem, prefix, opcode, feat, opW1)
+#endif /* INSN_W */
+
+#ifndef INSN_WR
+#   define INSN_WR(mnem, prefix, opcode, feat, opW1, opR1)
+#endif /* INSN_WR */
+
+#ifndef INSN_WRR
+#   define INSN_WRR(mnem, prefix, opcode, feat, opW1, opR1, opR2)
+#endif /* INSN_WRR */
+
+#ifndef INSN_WRRR
+#   define INSN_WRRR(mnem, prefix, opcode, feat, opW1, opR1, opR2, opR3)
+#endif /* INSN_WRRR */
+
+#ifndef INSN_GRP
+#   define INSN_GRP(grpname, prefix, opcode)
+#endif /* INSN_GRP */
+
+#ifndef INSN_GRP_BEGIN
+#   define INSN_GRP_BEGIN(grpname)
+#endif /* INSN_GRP_BEGIN */
+
+#ifndef INSN_GRPMEMB
+#   define INSN_GRPMEMB(grpname, mnem, opcode, feat)
+#endif /* INSN_GRPMEMB */
+
+#ifndef INSN_GRPMEMB_R
+#   define INSN_GRPMEMB_R(grpname, mnem, opcode, feat, opR1)
+#endif /* INSN_GRPMEMB_R */
+
+#ifndef INSN_GRPMEMB_RR
+#   define INSN_GRPMEMB_RR(grpname, mnem, opcode, feat, opR1, opR2)
+#endif /* INSN_GRPMEMB_RR */
+
+#ifndef INSN_GRPMEMB_W
+#   define INSN_GRPMEMB_W(grpname, mnem, opcode, feat, opW1)
+#endif /* INSN_GRPMEMB_W */
+
+#ifndef INSN_GRPMEMB_WR
+#   define INSN_GRPMEMB_WR(grpname, mnem, opcode, feat, opW1, opR1)
+#endif /* INSN_GRPMEMB_WR */
+
+#ifndef INSN_GRPMEMB_WRR
+#   define INSN_GRPMEMB_WRR(grpname, mnem, opcode, feat, opW1, opR1, opR2)
+#endif /* INSN_GRPMEMB_WRR */
+
+#ifndef INSN_GRPMEMB_WRRR
+#   define INSN_GRPMEMB_WRRR(grpname, mnem, opcode, feat, opW1, opR1, opR2, 
opR3)
+#endif /* INSN_GRPMEMB_WRRR */
+
+#ifndef INSN_GRP_END
+#   define INSN_GRP_END(grpname)
+#endif /* INSN_GRP_END */
+
+#undef LEG
+#undef VEX
+#undef INSN
+#undef INSN_R
+#undef INSN_RR
+#undef INSN_W
+#undef INSN_WR
+#undef INSN_WRR
+#undef INSN_WRRR
+#undef INSN_GRP
+#undef INSN_GRP_BEGIN
+#undef INSN_GRPMEMB
+#undef INSN_GRPMEMB_R
+#undef INSN_GRPMEMB_RR
+#undef INSN_GRPMEMB_W
+#undef INSN_GRPMEMB_WR
+#undef INSN_GRPMEMB_WRR
+#undef INSN_GRPMEMB_WRRR
+#undef INSN_GRP_END
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 14/39] target/i386: introduce mnemonic aliases for several gvec operations

2019-08-09 Thread Jan Bobek
It is helpful to introduce aliases for some general gvec operations as
it makes a couple of instruction code generators simpler (added
later).

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 23550a21d3..03b49411e5 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4493,6 +4493,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
 
+#define tcg_gen_gvec_andn(vece, dofs, aofs, bofs, oprsz, maxsz) \
+tcg_gen_gvec_andc(vece, dofs, bofs, aofs, oprsz, maxsz)
+#define tcg_gen_gvec_cmpeq(vece, dofs, aofs, bofs, oprsz, maxsz)\
+tcg_gen_gvec_cmp(TCG_COND_EQ, vece, dofs, aofs, bofs, oprsz, maxsz)
+#define tcg_gen_gvec_cmpgt(vece, dofs, aofs, bofs, oprsz, maxsz)\
+tcg_gen_gvec_cmp(TCG_COND_GT, vece, dofs, aofs, bofs, oprsz, maxsz)
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 19/39] target/i386: introduce generic operand alias

2019-08-09 Thread Jan Bobek
It turns out it is useful to be able to declare operand name
aliases. Introduce a macro to capture this functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 0bee7288e6..cd2467e6a5 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4610,6 +4610,15 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
int ck_cpuid_feat)
 }   \
 } while (0)
 
+/*
+ * "Alias" operand helper
+ */
+#define INSNOP_ALIAS(opT, opT2)   \
+INSNOP(opT, insnop_t(opT2),   \
+   return insnop_init(opT2)(env, s, modrm, op),   \
+   insnop_prepare(opT2)(env, s, modrm, op),   \
+   insnop_finalize(opT2)(env, s, modrm, op))
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 07/39] target/i386: use pc_start from DisasContext

2019-08-09 Thread Jan Bobek
The variable pc_start is already a member of DisasContext. Remove the
superfluous local variable.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 131 
 1 file changed, 65 insertions(+), 66 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 7532d65778..b1ba2fc3e5 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4495,13 +4495,12 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 TCGMemOp ot;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
-target_ulong pc_start = s->base.pc_next;
 
 {
 int prefixes;
 TCGMemOp aflag, dflag;
 
-s->pc_start = s->pc = pc_start;
+s->pc_start = s->pc = s->base.pc_next;
 s->override = -1;
 #ifdef TARGET_X86_64
 s->rex_x = 0;
@@ -6357,7 +6356,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa5:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_movs(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_movs(s, ot);
 }
@@ -6367,7 +6366,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xab:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_stos(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_stos(s, ot);
 }
@@ -6376,7 +6375,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xad:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_lods(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_lods(s, ot);
 }
@@ -6385,9 +6384,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xaf:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & PREFIX_REPNZ) {
-gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+gen_repz_scas(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
1);
 } else if (s->prefix & PREFIX_REPZ) {
-gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+gen_repz_scas(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
0);
 } else {
 gen_scas(s, ot);
 }
@@ -6397,9 +6396,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa7:
 ot = mo_b_d(b, s->dflag);
 if (s->prefix & PREFIX_REPNZ) {
-gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+gen_repz_cmps(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
1);
 } else if (s->prefix & PREFIX_REPZ) {
-gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+gen_repz_cmps(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base, 
0);
 } else {
 gen_cmps(s, ot);
 }
@@ -6408,10 +6407,10 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 case 0x6d:
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-gen_check_io(s, ot, pc_start - s->cs_base, 
+gen_check_io(s, ot, s->pc_start - s->cs_base,
  SVM_IOIO_TYPE_MASK | svm_is_rep(s->prefix) | 4);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_ins(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_ins(s, ot);
 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6423,10 +6422,10 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 case 0x6f:
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-gen_check_io(s, ot, pc_start - s->cs_base,
+gen_check_io(s, ot, s->pc_start - s->cs_base,
  svm_is_rep(s->prefix) | 4);
 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
-gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+gen_repz_outs(s, ot, s->pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_outs(s, ot);
 if (tb_cflags(s

[Qemu-devel] [RFC PATCH v2 15/39] target/i386: introduce function ck_cpuid

2019-08-09 Thread Jan Bobek
Introduce a helper function to take care of instruction CPUID checks.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 45 +
 1 file changed, 45 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 03b49411e5..508d584584 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4500,6 +4500,51 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 #define tcg_gen_gvec_cmpgt(vece, dofs, aofs, bofs, oprsz, maxsz)\
 tcg_gen_gvec_cmp(TCG_COND_GT, vece, dofs, aofs, bofs, oprsz, maxsz)
 
+enum {
+CK_CPUID_MMX = 1,
+CK_CPUID_3DNOW,
+CK_CPUID_SSE,
+CK_CPUID_SSE2,
+CK_CPUID_SSE3,
+CK_CPUID_SSSE3,
+CK_CPUID_SSE4_1,
+CK_CPUID_SSE4_2,
+CK_CPUID_SSE4A,
+CK_CPUID_AVX,
+CK_CPUID_AVX2,
+};
+
+static int ck_cpuid(CPUX86State *env, DisasContext *s, int ck_cpuid_feat)
+{
+switch (ck_cpuid_feat) {
+case CK_CPUID_MMX:
+return !(s->cpuid_features & CPUID_MMX)
+|| !(s->cpuid_ext2_features & CPUID_EXT2_MMX);
+case CK_CPUID_3DNOW:
+return !(s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
+case CK_CPUID_SSE:
+return !(s->cpuid_features & CPUID_SSE);
+case CK_CPUID_SSE2:
+return !(s->cpuid_features & CPUID_SSE2);
+case CK_CPUID_SSE3:
+return !(s->cpuid_ext_features & CPUID_EXT_SSE3);
+case CK_CPUID_SSSE3:
+return !(s->cpuid_ext_features & CPUID_EXT_SSSE3);
+case CK_CPUID_SSE4_1:
+return !(s->cpuid_ext_features & CPUID_EXT_SSE41);
+case CK_CPUID_SSE4_2:
+return !(s->cpuid_ext_features & CPUID_EXT_SSE42);
+case CK_CPUID_SSE4A:
+return !(s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
+case CK_CPUID_AVX:
+return !(s->cpuid_ext_features & CPUID_EXT_AVX);
+case CK_CPUID_AVX2:
+return !(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
+default:
+g_assert_not_reached();
+}
+}
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 13/39] target/i386: disable unused function warning temporarily

2019-08-09 Thread Jan Bobek
Some functions added later are generated by preprocessor macros and
end up being unused (e.g. not all operands can serve as a destination
operand). Disable unused function warnings for the new code until I
figure out how I want to solve this particular issue.

Note: This changeset is intended for development only and shall not be
included in the final patch series.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index fbf10b57a2..23550a21d3 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4489,6 +4489,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 }
 }
 
+/* XXX TODO get rid of this eventually */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
@@ -4513,6 +4517,7 @@ static void gen_sse_ng(CPUX86State *env, DisasContext *s, 
int b)
 
 g_assert_not_reached();
 }
+#pragma GCC diagnostic pop
 
 /* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 10/39] target/i386: add vector register file alignment constraints

2019-08-09 Thread Jan Bobek
gvec operations require that all vectors be aligned on 16-byte
boundary; make sure the MM/XMM/YMM/ZMM register file is aligned as
neccessary.

Reviewed-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/cpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8b3dc5533e..cb407b86ba 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1199,9 +1199,9 @@ typedef struct CPUX86State {
 float_status mmx_status; /* for 3DNow! float ops */
 float_status sse_status;
 uint32_t mxcsr;
-ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32];
-ZMMReg xmm_t0;
-MMXReg mmx_t0;
+ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] QEMU_ALIGNED(16);
+ZMMReg xmm_t0 QEMU_ALIGNED(16);
+MMXReg mmx_t0 QEMU_ALIGNED(8);
 
 XMMReg ymmh_regs[CPU_NB_REGS];
 
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 18/39] target/i386: introduce modifier for direct-only operand decoding

2019-08-09 Thread Jan Bobek
Many operands can only decode successfully if the ModR/M byte has the
direct form (i.e. MOD=3). Capture this common aspect by introducing a
special operand-initialization statement wrapper.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 4a2dae6238..0bee7288e6 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4601,6 +4601,15 @@ static int ck_cpuid(CPUX86State *env, DisasContext *s, 
int ck_cpuid_feat)
 #define INSNOP_FINALIZE_NOOP/* no-op */
 #define INSNOP_FINALIZE_INVALID g_assert_not_reached()
 
+#define INSNOP_INIT_DIRECT_ONLY(init_stmt)  \
+do {\
+if (decode_modrm_mod(env, s, modrm) == 3) { \
+init_stmt;  \
+} else {\
+INSNOP_INIT_FAIL;   \
+}   \
+} while (0)
+
 static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
 {
 enum {
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 12/39] target/i386: introduce gen_sse_ng

2019-08-09 Thread Jan Bobek
This function serves as the point-of-intercept for all newly
implemented instructions. If no new implementation exists, fall back
to gen_sse.

Note: This changeset is intended for development only and shall not be
included in the final patch series.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 258351fce3..fbf10b57a2 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4489,6 +4489,31 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 }
 }
 
+static void gen_sse_ng(CPUX86State *env, DisasContext *s, int b)
+{
+enum {
+P_66 = 1 << (0 + 8),
+P_F3 = 1 << (1 + 8),
+P_F2 = 1 << (2 + 8),
+W_0  = 0 << (3 + 8),
+W_1  = 1 << (3 + 8),
+M_0F = 1 << (4 + 8),
+};
+
+switch (b | M_0F
+| (s->prefix & PREFIX_DATA ? P_66 : 0)
+| (s->prefix & PREFIX_REPZ ? P_F3 : 0)
+| (s->prefix & PREFIX_REPNZ ? P_F2 : 0)
+| (REX_W(s) > 0 ? W_1 : W_0)) {
+
+default:
+gen_sse(env, s, b);
+return;
+}
+
+g_assert_not_reached();
+}
+
 /* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
@@ -8379,7 +8404,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x1c2:
 case 0x1c4 ... 0x1c6:
 case 0x1d0 ... 0x1fe:
-gen_sse(env, s, b);
+gen_sse_ng(env, s, b);
 break;
 default:
 goto unknown_op;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 05/39] target/i386: use prefix from DisasContext

2019-08-09 Thread Jan Bobek
Reduce scope of the local variable prefixes to enforce use of prefix
from DisasContext instead.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 113 
 1 file changed, 57 insertions(+), 56 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index bb13877df7..40a4844b64 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4491,7 +4491,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 {
 CPUX86State *env = cpu->env_ptr;
-int b, prefixes;
+int b;
 int shift;
 TCGMemOp ot;
 int modrm, reg, rm, mod, op, opreg, val;
@@ -4499,6 +4499,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 target_ulong pc_start = s->base.pc_next;
 
 {
+int prefixes;
 TCGMemOp aflag, dflag;
 
 s->pc_start = s->pc = pc_start;
@@ -6356,7 +6357,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa4: /* movsS */
 case 0xa5:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_movs(s, ot);
@@ -6366,7 +6367,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xaa: /* stosS */
 case 0xab:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_stos(s, ot);
@@ -6375,7 +6376,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xac: /* lodsS */
 case 0xad:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_lods(s, ot);
@@ -6384,9 +6385,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xae: /* scasS */
 case 0xaf:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & PREFIX_REPNZ) {
+if (s->prefix & PREFIX_REPNZ) {
 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
-} else if (prefixes & PREFIX_REPZ) {
+} else if (s->prefix & PREFIX_REPZ) {
 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
 } else {
 gen_scas(s, ot);
@@ -6396,9 +6397,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xa6: /* cmpsS */
 case 0xa7:
 ot = mo_b_d(b, s->dflag);
-if (prefixes & PREFIX_REPNZ) {
+if (s->prefix & PREFIX_REPNZ) {
 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
-} else if (prefixes & PREFIX_REPZ) {
+} else if (s->prefix & PREFIX_REPZ) {
 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
 } else {
 gen_cmps(s, ot);
@@ -6409,8 +6410,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
 gen_check_io(s, ot, pc_start - s->cs_base, 
- SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ SVM_IOIO_TYPE_MASK | svm_is_rep(s->prefix) | 4);
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_ins(s, ot);
@@ -6424,8 +6425,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 ot = mo_b_d32(b, s->dflag);
 tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
 gen_check_io(s, ot, pc_start - s->cs_base,
- svm_is_rep(prefixes) | 4);
-if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ svm_is_rep(s->prefix) | 4);
+if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
 gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
 } else {
 gen_outs(s, ot);
@@ -6444,7 +6445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 val = x86_ldub_code(env, s);
 tcg_gen_movi_tl(s->T0, val);
 gen_check_io(s, ot, pc_start - s->cs_base,
- SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+ SVM_IOIO_TYPE_MASK | sv

[Qemu-devel] [RFC PATCH v2 04/39] target/i386: use dflag from DisasContext

2019-08-09 Thread Jan Bobek
There already is a variable dflag in DisasContext, so reduce the scope
of the local variable dflag to enforce use of the one in DisasContext.

Suggested-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 184 
 1 file changed, 92 insertions(+), 92 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index bda96277e4..bb13877df7 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4493,13 +4493,13 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 CPUX86State *env = cpu->env_ptr;
 int b, prefixes;
 int shift;
-TCGMemOp ot, dflag;
+TCGMemOp ot;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
 
 {
-TCGMemOp aflag;
+TCGMemOp aflag, dflag;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
@@ -4686,7 +4686,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 op = (b >> 3) & 7;
 f = (b >> 1) & 3;
 
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 switch(f) {
 case 0: /* OP Ev, Gv */
@@ -4744,7 +4744,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 {
 int val;
 
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -4781,16 +4781,16 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 /**/
 /* inc, dec, and other misc arith */
 case 0x40 ... 0x47: /* inc Gv */
-ot = dflag;
+ot = s->dflag;
 gen_inc(s, ot, OR_EAX + (b & 7), 1);
 break;
 case 0x48 ... 0x4f: /* dec Gv */
-ot = dflag;
+ot = s->dflag;
 gen_inc(s, ot, OR_EAX + (b & 7), -1);
 break;
 case 0xf6: /* GRP3 */
 case 0xf7:
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -5022,7 +5022,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 
 case 0xfe: /* GRP4 */
 case 0xff: /* GRP5 */
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -5036,10 +5036,10 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 /* operand size for jumps is 64 bit */
 ot = MO_64;
 } else if (op == 3 || op == 5) {
-ot = dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
+ot = s->dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
 } else if (op == 6) {
 /* default push size is 64 bit */
-ot = mo_pushpop(s, dflag);
+ot = mo_pushpop(s, s->dflag);
 }
 }
 if (mod != 3) {
@@ -5067,7 +5067,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 break;
 case 2: /* call Ev */
 /* XXX: optimize if memory (no 'and' is necessary) */
-if (dflag == MO_16) {
+if (s->dflag == MO_16) {
 tcg_gen_ext16u_tl(s->T0, s->T0);
 }
 next_eip = s->pc - s->cs_base;
@@ -5085,19 +5085,19 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 if (s->pe && !s->vm86) {
 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
-   tcg_const_i32(dflag - 1),
+   tcg_const_i32(s->dflag - 1),
tcg_const_tl(s->pc - s->cs_base));
 } else {
 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
-  tcg_const_i32(dflag - 1),
+  tcg_const_i32(s->dflag - 1),
   tcg_const_i32(s->pc - s->cs_base));
 }
 tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
 gen_jr(s, s->tmp4);
 break;
 case 4: /* jmp Ev */
-if (dflag == MO_16) {
+if (s->dflag == MO_16) {
 tcg_gen_ext16u_tl(s->T0, s->T0);
 }
 gen_op_jmp_v(s->T0);
@@ -5130,7 +5130,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 
 case 0x84: /* test Ev, Gv */
 case 0x85:
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dfla

[Qemu-devel] [RFC PATCH v2 17/39] target/i386: introduce helpers for decoding modrm fields

2019-08-09 Thread Jan Bobek
The old code uses bitshifts and bitwise-and all over the place for
decoding ModR/M fields. Avoid doing that by introducing proper
decoding macros.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 109e4922eb..4a2dae6238 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4500,6 +4500,21 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 #define tcg_gen_gvec_cmpgt(vece, dofs, aofs, bofs, oprsz, maxsz)\
 tcg_gen_gvec_cmp(TCG_COND_GT, vece, dofs, aofs, bofs, oprsz, maxsz)
 
+#define decode_modrm_mod(env, s, modrm) \
+(((modrm) >> 6) & 3)
+
+#define decode_modrm_reg_norexr(env, s, modrm)  \
+(((modrm) >> 3) & 7)
+#define decode_modrm_reg_rexr(env, s, modrm)\
+(decode_modrm_reg_norexr(env, s, modrm) \
+ | REX_R(s))
+
+#define decode_modrm_rm_norexb(env, s, modrm)   \
+((modrm) & 7)
+#define decode_modrm_rm_rexb(env, s, modrm) \
+(decode_modrm_rm_norexb(env, s, modrm)  \
+ | REX_B(s))
+
 enum {
 CK_CPUID_MMX = 1,
 CK_CPUID_3DNOW,
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 11/39] target/i386: introduce gen_(ld, st)d_env_A0

2019-08-09 Thread Jan Bobek
Similar in spirit to the already present gen_(ld,st)(q,o)_env_A0, it
will prove useful in later commits for smaller-sized vector loads.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c5ec309fe2..258351fce3 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2652,6 +2652,18 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
 gen_jmp_tb(s, eip, 0);
 }
 
+static inline void gen_ldd_env_A0(DisasContext *s, int offset)
+{
+tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+tcg_gen_st_i32(s->tmp2_i32, cpu_env, offset);
+}
+
+static inline void gen_std_env_A0(DisasContext *s, int offset)
+{
+tcg_gen_ld_i32(s->tmp2_i32, cpu_env, offset);
+tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+}
+
 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 02/39] target/i386: Push rex_w into DisasContext

2019-08-09 Thread Jan Bobek
From: Richard Henderson 

Treat this the same as we already do for other rex bits.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index d74dbfd585..c0866c2797 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -44,11 +44,13 @@
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
 #define REX_R(s) ((s)->rex_r)
+#define REX_W(s) ((s)->rex_w)
 #else
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
 #define REX_R(s) 0
+#define REX_W(s) -1
 #endif
 
 #ifdef TARGET_X86_64
@@ -100,7 +102,7 @@ typedef struct DisasContext {
 #ifdef TARGET_X86_64
 int lma;/* long mode active */
 int code64; /* 64 bit code segment */
-int rex_x, rex_b, rex_r;
+int rex_x, rex_b, rex_r, rex_w;
 #endif
 int vex_l;  /* vex vector length */
 int vex_v;  /* vex  register, without 1's complement.  */
@@ -4495,7 +4497,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
-int rex_w;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
@@ -4503,6 +4504,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->rex_x = 0;
 s->rex_b = 0;
 s->rex_r = 0;
+s->rex_w = -1;
 s->x86_64_hregs = false;
 #endif
 s->rip_offset = 0; /* for relative ip address */
@@ -4514,7 +4516,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 }
 
 prefixes = 0;
-rex_w = -1;
 
  next_byte:
 b = x86_ldub_code(env, s);
@@ -4557,7 +4558,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x40 ... 0x4f:
 if (CODE64(s)) {
 /* REX prefix */
-rex_w = (b >> 3) & 1;
+s->rex_w = (b >> 3) & 1;
 s->rex_r = (b & 0x4) << 1;
 s->rex_x = (b & 0x2) << 2;
 s->rex_b = (b & 0x1) << 3;
@@ -4606,7 +4607,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->rex_b = (~vex2 >> 2) & 8;
 #endif
 vex3 = x86_ldub_code(env, s);
-rex_w = (vex3 >> 7) & 1;
+#ifdef TARGET_X86_64
+s->rex_w = (vex3 >> 7) & 1;
+#endif
 switch (vex2 & 0x1f) {
 case 0x01: /* Implied 0f leading opcode bytes.  */
 b = x86_ldub_code(env, s) | 0x100;
@@ -4631,9 +4634,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 /* Post-process prefixes.  */
 if (CODE64(s)) {
 /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
-   data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
+   data with REX_W, and 16-bit data with 0x66; REX_W takes precedence
over 0x66 if both are present.  */
-dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
+dflag = (REX_W(s) > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : 
MO_32);
 /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
 aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
 } else {
@@ -5029,7 +5032,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 /* operand size for jumps is 64 bit */
 ot = MO_64;
 } else if (op == 3 || op == 5) {
-ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
+ot = dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
 } else if (op == 6) {
 /* default push size is 64 bit */
 ot = mo_pushpop(s, dflag);
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 09/39] target/i386: make variable is_xmm const

2019-08-09 Thread Jan Bobek
The variable is_xmm does not change value after assignment, so make
this fact explicit by marking it const.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 8bf39b73c4..c5ec309fe2 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3042,7 +3042,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
-int op1_offset, op2_offset, is_xmm, val;
+int op1_offset, op2_offset, val;
 int modrm, mod, rm, reg;
 SSEFunc_0_epp sse_fn_epp;
 SSEFunc_0_eppi sse_fn_eppi;
@@ -3056,20 +3056,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 : s->prefix & PREFIX_REPZ ? 2
 : s->prefix & PREFIX_REPNZ ? 3
 : 0;
+const int is_xmm =
+(0x10 <= b && b <= 0x5f)
+|| b == 0xc6
+|| b == 0xc2
+|| !!b1;
 sse_fn_epp = sse_op_table1[b][b1];
 if (!sse_fn_epp) {
 goto unknown_op;
 }
-if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
-is_xmm = 1;
-} else {
-if (b1 == 0) {
-/* MMX case */
-is_xmm = 0;
-} else {
-is_xmm = 1;
-}
-}
 /* simple MMX/SSE operation */
 if (s->flags & HF_TS_MASK) {
 gen_exception(s, EXCP07_PREX);
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 08/39] target/i386: make variable b1 const

2019-08-09 Thread Jan Bobek
The variable b1 does not change value once assigned. Make this fact
explicit by marking it const.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index b1ba2fc3e5..8bf39b73c4 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3042,7 +3042,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
-int b1, op1_offset, op2_offset, is_xmm, val;
+int op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
 SSEFunc_0_epp sse_fn_epp;
 SSEFunc_0_eppi sse_fn_eppi;
@@ -3051,14 +3051,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 TCGMemOp ot;
 
 b &= 0xff;
-if (s->prefix & PREFIX_DATA)
-b1 = 1;
-else if (s->prefix & PREFIX_REPZ)
-b1 = 2;
-else if (s->prefix & PREFIX_REPNZ)
-b1 = 3;
-else
-b1 = 0;
+const int b1 =
+s->prefix & PREFIX_DATA ? 1
+: s->prefix & PREFIX_REPZ ? 2
+: s->prefix & PREFIX_REPNZ ? 3
+: 0;
 sse_fn_epp = sse_op_table1[b][b1];
 if (!sse_fn_epp) {
 goto unknown_op;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 06/39] target/i386: Simplify gen_exception arguments

2019-08-09 Thread Jan Bobek
From: Richard Henderson 

We can compute cur_eip from values present within DisasContext.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 89 -
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 40a4844b64..7532d65778 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -1272,10 +1272,10 @@ static void gen_helper_fp_arith_STN_ST0(int op, int 
opreg)
 }
 }
 
-static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
+static void gen_exception(DisasContext *s, int trapno)
 {
 gen_update_cc_op(s);
-gen_jmp_im(s, cur_eip);
+gen_jmp_im(s, s->pc_start - s->cs_base);
 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
 s->base.is_jmp = DISAS_NORETURN;
 }
@@ -1284,7 +1284,7 @@ static void gen_exception(DisasContext *s, int trapno, 
target_ulong cur_eip)
the instruction is known, but it isn't allowed in the current cpu mode.  */
 static void gen_illegal_opcode(DisasContext *s)
 {
-gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
+gen_exception(s, EXCP06_ILLOP);
 }
 
 /* if d == OR_TMP0, it means memory operand (address in A0) */
@@ -3040,8 +3040,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 [0xdf] = AESNI_OP(aeskeygenassist),
 };
 
-static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-target_ulong pc_start)
+static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
@@ -3076,7 +3075,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 /* simple MMX/SSE operation */
 if (s->flags & HF_TS_MASK) {
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 return;
 }
 if (s->flags & HF_EM_MASK) {
@@ -4515,7 +4514,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->vex_l = 0;
 s->vex_v = 0;
 if (sigsetjmp(s->jmpbuf, 0) != 0) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 return s->pc;
 }
 
@@ -5854,7 +5853,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
 /* XXX: what to do if illegal op ? */
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 break;
 }
 modrm = x86_ldub_code(env, s);
@@ -6572,7 +6571,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 set_cc_op(s, CC_OP_EFLAGS);
 } else if (s->vm86) {
 if (s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag - 1));
 set_cc_op(s, CC_OP_EFLAGS);
@@ -6694,7 +6693,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9c: /* pushf */
 gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
 if (s->vm86 && s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 gen_update_cc_op(s);
 gen_helper_read_eflags(s->T0, cpu_env);
@@ -6704,7 +6703,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9d: /* popf */
 gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
 if (s->vm86 && s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 ot = gen_pop_T0(s);
 if (s->cpl == 0) {
@@ -7021,7 +7020,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 goto illegal_op;
 val = x86_ldub_code(env, s);
 if (val == 0) {
-gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+gen_exception(s, EXCP00_DIVZ);
 } else {
 gen_helper_aam(cpu_env, tcg_const_i32(val));
 set_cc_op(s, CC_OP_LOGICB);
@@ -7055,7 +7054,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9b: /* fwait */
 if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
 (HF_MP_MASK | HF_TS_MASK)) {
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 } else {
 gen_helper_fwait(cpu_env);
 }
@@ -7066,7 +7065,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xcd: /* int N */
 val = x86_ldub_code(env, s);
 if (s->vm86 && s->iopl != 3) {
-

[Qemu-devel] [RFC PATCH v2 03/39] target/i386: reduce scope of variable aflag

2019-08-09 Thread Jan Bobek
The variable aflag is not used in most of disas_insn; make this clear
by explicitly reducing its scope to the block where it is used.

Suggested-by: Richard Henderson 
Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c0866c2797..bda96277e4 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4493,11 +4493,14 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 CPUX86State *env = cpu->env_ptr;
 int b, prefixes;
 int shift;
-TCGMemOp ot, aflag, dflag;
+TCGMemOp ot, dflag;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
 
+{
+TCGMemOp aflag;
+
 s->pc_start = s->pc = pc_start;
 s->override = -1;
 #ifdef TARGET_X86_64
@@ -4657,6 +4660,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->prefix = prefixes;
 s->aflag = aflag;
 s->dflag = dflag;
+}
 
 /* now check op code */
  reswitch:
-- 
2.20.1




[Qemu-devel] [RFC PATCH v2 01/39] target/i386: Push rex_r into DisasContext

2019-08-09 Thread Jan Bobek
From: Richard Henderson 

Treat this value the same as we do for rex_b and rex_x.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 85 +
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 03150a86e2..d74dbfd585 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -43,10 +43,12 @@
 #define CODE64(s) ((s)->code64)
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
+#define REX_R(s) ((s)->rex_r)
 #else
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
+#define REX_R(s) 0
 #endif
 
 #ifdef TARGET_X86_64
@@ -98,7 +100,7 @@ typedef struct DisasContext {
 #ifdef TARGET_X86_64
 int lma;/* long mode active */
 int code64; /* 64 bit code segment */
-int rex_x, rex_b;
+int rex_x, rex_b, rex_r;
 #endif
 int vex_l;  /* vex vector length */
 int vex_v;  /* vex  register, without 1's complement.  */
@@ -3037,7 +3039,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 };
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-target_ulong pc_start, int rex_r)
+target_ulong pc_start)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
@@ -3107,8 +3109,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 
 modrm = x86_ldub_code(env, s);
 reg = ((modrm >> 3) & 7);
-if (is_xmm)
-reg |= rex_r;
+if (is_xmm) {
+reg |= REX_R(s);
+}
 mod = (modrm >> 6) & 3;
 if (sse_fn_epp == SSE_SPECIAL) {
 b |= (b1 << 8);
@@ -3642,7 +3645,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 tcg_gen_ld16u_tl(s->T0, cpu_env,
 
offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
 }
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 gen_op_mov_reg_v(s, ot, reg, s->T0);
 break;
 case 0x1d6: /* movq ea, xmm */
@@ -3686,7 +3689,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
  offsetof(CPUX86State, fpregs[rm].mmx));
 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
 }
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
 break;
 
@@ -3698,7 +3701,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 modrm = x86_ldub_code(env, s);
 rm = modrm & 7;
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 mod = (modrm >> 6) & 3;
 if (b1 >= 2) {
 goto unknown_op;
@@ -3774,7 +3777,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 /* Various integer extensions at 0f 38 f[0-f].  */
 b = modrm | (b1 << 8);
 modrm = x86_ldub_code(env, s);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 
 switch (b) {
 case 0x3f0: /* crc32 Gd,Eb */
@@ -4128,7 +4131,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 b = modrm;
 modrm = x86_ldub_code(env, s);
 rm = modrm & 7;
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 mod = (modrm >> 6) & 3;
 if (b1 >= 2) {
 goto unknown_op;
@@ -4148,7 +4151,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 rm = (modrm & 7) | REX_B(s);
 if (mod != 3)
 gen_lea_modrm(env, s, modrm);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 val = x86_ldub_code(env, s);
 switch (b) {
 case 0x14: /* pextrb */
@@ -4317,7 +4320,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 /* Various integer extensions at 0f 3a f[0-f].  */
 b = modrm | (b1 << 8);
 modrm = x86_ldub_code(env, s);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 
 switch (b) {
 case 0x3f0: /* rorx Gy,Ey, Ib */
@@ -4491,14 +4494,15 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 TCGMemOp ot, aflag, dflag;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
-int rex_w, rex_r;
 target_ulong pc_start = s->base.pc_next;
+int rex_w;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
 #ifdef TARGET_X86_64
 s->rex_x = 0;
 s->rex_b = 0;
+s->rex_r = 0;
 s->x86_64_hregs = false;
 #endif
 s->rip_offset = 

[Qemu-devel] [RFC PATCH v2 00/39] rewrite MMX/SSE instruction translation

2019-08-09 Thread Jan Bobek
This is a v2 of the patch series posted in [1]. Patches 1-9 are just
cleanups; patches 10-39 are something actually interesting. Compared
to v1, I started using preprocessor more extensively to generate
repetitive boilerplate code; opinions/alternatives are welcome and
appreciated.

I tried to eliminate as many errors reported by scripts/checkpatch.pl
as I could, but there are still some left; AFAICT they appear to be
non-applicable false positives caused by preprocessor macros.

There is a known flaw of M* operands documented in patches 25 and 39;
it will be addressed in v3. (It has some design implications which
require larger changes, so that's why I'm not including them right
away, but I already have a good idea of how to address this.)

Cheers,
  -Jan

Changes from v1:
  There is in fact little overlap with v1, apart from the minor
  cleanup patches; I tried a different approach this time.

References:
  1. https://lists.nongnu.org/archive/html/qemu-devel/2019-07/msg07041.html

Jan Bobek (36):
  target/i386: reduce scope of variable aflag
  target/i386: use dflag from DisasContext
  target/i386: use prefix from DisasContext
  target/i386: use pc_start from DisasContext
  target/i386: make variable b1 const
  target/i386: make variable is_xmm const
  target/i386: add vector register file alignment constraints
  target/i386: introduce gen_(ld,st)d_env_A0
  target/i386: introduce gen_sse_ng
  target/i386: disable unused function warning temporarily
  target/i386: introduce mnemonic aliases for several gvec operations
  target/i386: introduce function ck_cpuid
  target/i386: introduce instruction operand infrastructure
  target/i386: introduce helpers for decoding modrm fields
  target/i386: introduce modifier for direct-only operand decoding
  target/i386: introduce generic operand alias
  target/i386: introduce generic load-store operand
  target/i386: introduce insn.h
  target/i386: introduce code generators
  target/i386: introduce instruction translator macros
  target/i386: introduce Ib (immediate) operand
  target/i386: introduce M* (memptr) operands
  target/i386: introduce G*, R*, E* (general register) operands
  target/i386: introduce RdMw operand
  target/i386: introduce P*, N*, Q* (MMX) operands
  target/i386: introduce helper-based code generator macros
  target/i386: introduce gvec-based code generator macros
  target/i386: introduce MMX translators
  target/i386: introduce MMX code generators
  target/i386: introduce MMX instructions to insn.h
  target/i386: introduce V*, U*, W* (SSE/AVX) operands
  target/i386: introduce UdqMq operand
  target/i386: introduce SSE translators
  target/i386: introduce SSE code generators
  target/i386: introduce SSE instructions to insn.h
  target/i386: introduce memory-pointer operand read/write workarounds

Richard Henderson (3):
  target/i386: Push rex_r into DisasContext
  target/i386: Push rex_w into DisasContext
  target/i386: Simplify gen_exception arguments

 target/i386/cpu.h   |6 +-
 target/i386/insn.h  |  381 
 target/i386/translate.c | 2032 ---
 3 files changed, 2095 insertions(+), 324 deletions(-)
 create mode 100644 target/i386/insn.h

-- 
2.20.1




Re: [Qemu-devel] [RFC PATCH v1 22/22] target/i386: reimplement (V)P(EQ, CMP)(B, W, D)

2019-08-02 Thread Jan Bobek
On 7/31/19 5:31 PM, Richard Henderson wrote:
> On 7/31/19 1:09 PM, Aleksandar Markovic wrote:
>>
>>
>> On Wed, Jul 31, 2019 at 9:51 PM Richard Henderson 
>> > <mailto:richard.hender...@linaro.org>> wrote:
>>
>> On 7/31/19 10:57 AM, Jan Bobek wrote:
>> > +static inline void gen_gvec_cmpeq(unsigned vece, uint32_t dofs,
>> > +                                  uint32_t aofs, uint32_t bofs,
>> > +                                  uint32_t oprsz, uint32_t maxsz)
>> > +{
>> > +    tcg_gen_gvec_cmp(TCG_COND_EQ, vece, dofs, aofs, bofs, oprsz, 
>> maxsz);
>> > +}
>> ...
>> > +static inline void gen_gvec_cmpgt(unsigned vece, uint32_t dofs,
>> > +                                  uint32_t aofs, uint32_t bofs,
>> > +                                  uint32_t oprsz, uint32_t maxsz)
>> > +{
>> > +    tcg_gen_gvec_cmp(TCG_COND_GT, vece, dofs, aofs, bofs, oprsz, 
>> maxsz);
>> > +}
>>
>> Drop the inlines.
>>
>>
>> Why? The compiler will decide at the end of the day, but at least "inline" 
>> here
>> says that the code author thinks that inlining is desirable, logical, and 
>> expected
>> in these cases, which is in turn a valuable information for the code reader.
> 
> In this case it is in fact a lie that will only confuse the reader, as it did
> you.  Functions whose address are passed as a callback, as these are, are
> always forced out of line.
> 
> But beyond that, clang diagnoses unused static inline within *.c while gcc 
> does
> not (I'm not sure I agree with clang, but it is what it is).  By leaving off
> the inline, but compilers will diagnose when code rearrangement leaves a
> function unused.

Dang, I completely forgot about the function-address vs. inlining rule. I 
thought
of these as macros, really; they are only functions because I needed to pass
them to the gen_gvec_ld_modrm_* helpers.

I'll drop the inline, compilers ignore it anyway.

-Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v1 08/22] target/i386: reimplement (V)PAND, (V)ANDPS, (V)ANDPD

2019-08-02 Thread Jan Bobek
On 7/31/19 3:35 PM, Richard Henderson wrote:
> On 7/31/19 10:56 AM, Jan Bobek wrote:
>> +#define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
>> (modrm), MO_64, tcg_gen_gvec_and, 0112)
>> +#define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
>> (modrm), MO_64, tcg_gen_gvec_and, 0112)
>> +#define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
>> (modrm), MO_64, tcg_gen_gvec_and, 0123)
>> +#define gen_vpand_ymm(env, s, modrm) gen_gvec_ld_modrm_vymm((env), (s), 
>> (modrm), MO_64, tcg_gen_gvec_and, 0123)
>> +#define gen_andps_xmm  gen_pand_xmm
>> +#define gen_vandps_xmm gen_vpand_xmm
>> +#define gen_vandps_ymm gen_vpand_ymm
>> +#define gen_andpd_xmm  gen_pand_xmm
>> +#define gen_vandpd_xmm gen_vpand_xmm
>> +#define gen_vandpd_ymm gen_vpand_ymm
> 
> 
> Why all of these extra defines?
> 
>> +enum {
>> +M_0F= 0x01 << 8,
>> +M_0F38  = 0x02 << 8,
>> +M_0F3A  = 0x04 << 8,
>> +P_66= 0x08 << 8,
>> +P_F3= 0x10 << 8,
>> +P_F2= 0x20 << 8,
>> +VEX_128 = 0x40 << 8,
>> +VEX_256 = 0x80 << 8,
>> +};
>> +
>> +switch(b | M_0F
>> +   | (s->prefix & PREFIX_DATA ? P_66 : 0)
>> +   | (s->prefix & PREFIX_REPZ ? P_F3 : 0)
>> +   | (s->prefix & PREFIX_REPNZ ? P_F2 : 0)
>> +   | (s->prefix & PREFIX_VEX ? (s->vex_l ? VEX_256 : VEX_128) : 0)) 
>> {
> 
> I think you can move this above almost everything in this function, so that 
> all
> of the legacy bits follow this switch.
> 
>> +case 0xdb | M_0F:  gen_pand_mm(env, s, modrm); return;
> 
> You'll want to put these on the next lines -- checkpatch.pl again.
> 
>> +case 0xdb | M_0F | P_66:   gen_pand_xmm(env, s, modrm); return;
>> +case 0xdb | M_0F | P_66 | VEX_128: gen_vpand_xmm(env, s, modrm); return;
>> +case 0xdb | M_0F | P_66 | VEX_256: gen_vpand_ymm(env, s, modrm); return;
>> +case 0x54 | M_0F:  gen_andps_xmm(env, s, modrm); return;
>> +case 0x54 | M_0F | VEX_128:gen_vandps_xmm(env, s, modrm); 
>> return;
>> +case 0x54 | M_0F | VEX_256:gen_vandps_ymm(env, s, modrm); 
>> return;
>> +case 0x54 | M_0F | P_66:   gen_andpd_xmm(env, s, modrm); return;
>> +case 0x54 | M_0F | P_66 | VEX_128: gen_vandpd_xmm(env, s, modrm); 
>> return;
>> +case 0x54 | M_0F | P_66 | VEX_256: gen_vandpd_ymm(env, s, modrm); 
>> return;
>> +default: break;
>> +}
> 
> Perhaps group cases together?
> 
> case 0xdb | M_0F | P_66:  /* PAND */
> case 0x54 | M_0F: /* ANDPS */
> case 0x54 | M_0F | P_66:  /* ANDPD */
>gen_gvec_ld_modrm_xmm(env, s, modrm, MO_64, tcg_gen_gvec_and, 0112);
>return;

As Aleksandar pointed out in his email, the general intuition was to
have self-documenting code. Seeing

case 0x54 | M_0F | VEX_256: gen_vandps_ymm(env, s, modrm); return;

clearly states that this particular case is a VANDPS, and if one wants
to see what we do with it, they can go look gen_vandps_ymm up.

That being said, I have to the conclusion in the meantime that keeping
all the extra macros is just too much code and not worth it, so I'll
do it like you suggest above.

> How are you planning to handle CPUID checks?  I know the currently handling is
> quite spotty, but with a reorg we might as well fix that too.

Good question. CPUID checks are not handled in this patch at all, I
will need to come up with a workable approach.

-Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers

2019-08-02 Thread Jan Bobek
On 7/31/19 6:47 PM, Richard Henderson wrote:
> I suppose there aren't so many different combinations, but did you consider
> separate callbacks per operand?  If you have
> 
> typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int);
> 
> static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm)
> {
> int reg = (modrm >> 3) & 7; /* Ignore REX_R */
> return offsetof(CPUX86State, fpregs[reg].mmx);
> }
> 
> static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm)
> {
> int mod = (modrm >> 6) & 3;
> unsigned ret;
> 
> if (mod == 3) {
> int rm = modrm & 7; /* Ignore REX_B */
> ret = offsetof(CPUX86State, fpregs[rm].mmx);
> } else {
> ret = offsetof(CPUX86State, mmx_t0);
> gen_lea_modrm(env, s, modrm);
> gen_ldq_env_A0(s, ret);
> }
> return ret;
> }
> 
> static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm)
> {
> int reg = ((modrm >> 3) & 7) | REX_R(s);
> return offsetof(CPUX86State, xmm_regs[reg]);
> }
> 
> static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm)
> {
> int mod = (modrm >> 6) & 3;
> unsigned ret;
> 
> if (mod == 3) {
> int rm = (modrm & 7) | REX_B(s);
> ret = offsetof(CPUX86State, xmm_regs[rm]);
> } else {
> ret = offsetof(CPUX86State, xmm_t0);
> gen_lea_modrm(env, s, modrm);
> gen_ldo_env_A0(s, ret);
> }
> return ret;
> }
> 
> static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm)
> {
> return offsetof(CPUX86State, xmm_regs[s->vex_v]);
> }
> 
> Then you can have
> 
> #define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ)
> static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env,  \
> DisasContext *s, int modrm, unsigned vece,  gen_gvec_2_fp_t gen) \
> {   \
> int ofd = offset_##OP0(env, s, modrm);  \
> int of1 = offset_##OP1(env, s, modrm);  \
> int of2 = offset_##OP2(env, s, modrm);  \
> gen(vece, opd, opa, opb, OPRSZ, MAXSZ); \
> }
> 
> GEN_GVEC_3(Pq, Pq, Qq, sizeof(MMXReg), sizeof(MMXReg))
> GEN_GVEC_3(Vx, Vx, Wx, sizeof(XMMReg), max_vec_size(s))
> GEN_GVEC_3(Vx, Hx, Wx, sizeof(XMMReg), max_vec_size(s))
> 
> The PqPqQq and VxVxWx sub-strings aren't quite canonical, but imo a better fit
> to the actual format of the instruction, with 2 inputs and 1 output.

Funny, I had a similar idea and converged to almost identical
solution. This will be part of v2.

> You can also do
> 
> GEN_GVEC_3(Pq, Qq, Pq, sizeof(MMXReg), sizeof(MMXReg))
> 
> for those rare "reversed" operations like PANDN.  Now you don't need to carry
> around the OPCTL argument, which I initially found non-obvious.

Yup, solves the problem nicely and more clearly.

> I initially thought you'd be able to infer maxsz from the set of arguments, 
> but
> since there are vex encoded operations that do not use vex. that is not
> always the case.  Thus I suggest
> 
> static size_t max_vec_size(DisasContext *s)
> {
> if (s->prefixes & PREFIX_VEX) {
> /*
>  * TODO: When avx512 is supported and enabled, sizeof(ZMMReg).
>  * In the meantime don't waste time zeroing data that is not
>  * architecturally present.
>  */
> return sizeof(YMMReg);
> } else {
> /* Without vex encoding, only the low 128 bits are modified. */
> return sizeof(XMMReg);
> }
> }

Looks good.

-Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v1 05/22] target/i386: introduce gen_ld_modrm_* helpers

2019-08-02 Thread Jan Bobek
On 7/31/19 3:08 PM, Richard Henderson wrote:
> On 7/31/19 10:56 AM, Jan Bobek wrote:
>> These help with decoding/loading ModR/M vector operands; the operand's
>> register offset is returned, which is suitable for use with gvec
>> infrastructure.
>>
>> Signed-off-by: Jan Bobek 
>> ---
>>  target/i386/translate.c | 47 +
>>  1 file changed, 47 insertions(+)
>>
>> diff --git a/target/i386/translate.c b/target/i386/translate.c
>> index 9e22eca2dc..7548677e1f 100644
>> --- a/target/i386/translate.c
>> +++ b/target/i386/translate.c
>> @@ -3040,6 +3040,53 @@ static const struct SSEOpHelper_eppi 
>> sse_op_table7[256] = {
>>  [0xdf] = AESNI_OP(aeskeygenassist),
>>  };
>>  
>> +static inline void gen_ld_modrm_PqQq(CPUX86State *env, DisasContext *s, int 
>> modrm,
>> + uint32_t* dofs, uint32_t* aofs)
> 
> s/uint32_t* /uint32_t */
> 
> Drop the inlines; let the compiler choose.
> 
> 
>> +{
>> +const int mod = (modrm >> 6) & 3;
>> +const int reg = (modrm >> 3) & 7; /* no REX_R */
>> +*dofs = offsetof(CPUX86State, fpregs[reg].mmx);
>> +
>> +if(mod == 3) {
> 
> s/if(/if (/
> 
> Both of these errors should be caught by ./scripts/checkpatch.pl.

I have the script set up; I disabled it temporarily (or so I thought)
some time ago when it was preventing me from git stash'ing some
experimental hacks, and never got around to enabling it again.

Anyway, I'll make sure not to forget to run it prior to submission
next time.

>> +gen_ldo_env_A0(s, *aofs); /* FIXME this needs to load 32 bytes for 
>> YMM 
> 
> Better as "TODO", since this isn't broken and in need of fixing, since we do
> not yet support AVX.
> 
> Otherwise,
> Reviewed-by: Richard Henderson 
> 
> 
> r~
> 



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v1 03/22] target/i386: Use prefix, aflag and dflag from DisasContext

2019-08-02 Thread Jan Bobek
Hi Aleksandar,

thanks a lot for your feedback! I have to admit that I paid little
attention to this particular patch, because it was authored by
Richard; I simply included it verbatim. I agree that it would be
clearer if it were split into three patches, and the description could
be made less confusing.  I will make sure to include your suggestions
in v2.

Thanks a lot for looking over my code!

Best,
-Jan

On 7/31/19 4:04 PM, Aleksandar Markovic wrote:
> 
> 
> On Wed, Jul 31, 2019 at 9:41 PM Aleksandar Markovic 
> mailto:aleksandar.m.m...@gmail.com>> wrote:
> 
> 
> 
> On Wed, Jul 31, 2019 at 7:59 PM Jan Bobek  <mailto:jan.bo...@gmail.com>> wrote:
> 
> From: Richard Henderson mailto:r...@twiddle.net>>
> 
> The variables are already there, we just have to hide the ones
> in disas_insn so that we are forced to use them.
> 
> Signed-off-by: Richard Henderson  <mailto:r...@twiddle.net>>
> ---
>  target/i386/translate.c | 299 
> 
>  1 file changed, 152 insertions(+), 147 deletions(-)
> 
> 
> Hi, Jan.
> 
> The series overall looks great, and hopefully you will refine rough
> around the edges parts soon. Thanks for this valuable contribution!
> 
> About this patch, I noticed that it mentions "aflag" in the title, but
> the patch actually does not change any code related to the variable
> "aflag" in the described sense - it looks to me it just reduces the
> scope of the local variable "aflag", which is certainly different than
> "use aflag from DisasContext" as it could be implied from the
> patch title. You definitely should not confuse the readers with
> such inaccuracies.
> 
> 
> Also, Jan, you need to correct the code alignment (indentation), if
> you enclose a part of a function to form a new code block. I guess
> you just left these cosmetic things for v2 or later.
> 
> Sincerely,
> Aleksandar
>  
> 
> 
> Actually, I think the patch would look much better if split into three
> patches (easier for reviewing, and also clearer for future developers),
> wouldn't it?
> 
> Yours,
> Aleksandar
> 



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [RFC PATCH v1 20/22] target/i386: reimplement (V)PMAXSW

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  3 ---
 target/i386/ops_sse_header.h |  2 --
 target/i386/translate.c  | 12 +++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 058ed5cdfc..92d0544474 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -338,7 +338,6 @@ static inline int satsw(int x)
 }
 
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
-#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
 #define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
 #define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0)
@@ -355,8 +354,6 @@ static inline int satsw(int x)
 
 SSE_HELPER_B(helper_pmaxub, FMAXUB)
 
-SSE_HELPER_W(helper_pmaxsw, FMAXSW)
-
 SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
 SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
 SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 3d0e321230..bf38738783 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -62,8 +62,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 
 SSE_HELPER_B(pmaxub, FMAXUB)
 
-SSE_HELPER_W(pmaxsw, FMAXSW)
-
 SSE_HELPER_B(pcmpgtb, FCMPGTB)
 SSE_HELPER_W(pcmpgtw, FCMPGTW)
 SSE_HELPER_L(pcmpgtl, FCMPGTL)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 893fe1253f..48bfb4e47b 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2844,7 +2844,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xeb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xec] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xed] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
-[0xee] = MMX_OP2(pmaxsw),
+[0xee] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xef] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
 [0xf1] = MMX_OP2(psllw),
@@ -3206,6 +3206,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpminu_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umin, 0123)
 #define gen_vpminu_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umin, 0123)
 
+#define gen_pmaxs_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_smax, 0112)
+#define gen_pmaxs_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_smax, 0112)
+#define gen_vpmaxs_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smax, 0123)
+#define gen_vpmaxs_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smax, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3431,6 +3436,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xea | M_0F | P_66 | VEX_128: gen_vpmins_xmm(env, s, modrm, MO_16); 
return;
 case 0xea | M_0F | P_66 | VEX_256: gen_vpmins_ymm(env, s, modrm, MO_16); 
return;
 
+case 0xee | M_0F:  gen_pmaxs_mm(env, s, modrm, MO_16); 
return;
+case 0xee | M_0F | P_66:   gen_pmaxs_xmm(env, s, modrm, MO_16); 
return;
+case 0xee | M_0F | P_66 | VEX_128: gen_vpmaxs_xmm(env, s, modrm, MO_16); 
return;
+case 0xee | M_0F | P_66 | VEX_256: gen_vpmaxs_ymm(env, s, modrm, MO_16); 
return;
+
 case 0xdb | M_0F:  gen_pand_mm(env, s, modrm); return;
 case 0xdb | M_0F | P_66:   gen_pand_xmm(env, s, modrm); return;
 case 0xdb | M_0F | P_66 | VEX_128: gen_vpand_xmm(env, s, modrm); return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 21/22] target/i386: reimplement (V)PMAXUB

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  4 
 target/i386/ops_sse_header.h |  2 --
 target/i386/translate.c  | 12 +++-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 92d0544474..75ff686bb6 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -337,8 +337,6 @@ static inline int satsw(int x)
 }
 }
 
-#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
-
 #define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
 #define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0)
 #define FCMPGTL(a, b) ((int32_t)(a) > (int32_t)(b) ? -1 : 0)
@@ -352,8 +350,6 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_pmaxub, FMAXUB)
-
 SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
 SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
 SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index bf38738783..9c7451d28e 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,8 +60,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(pmaxub, FMAXUB)
-
 SSE_HELPER_B(pcmpgtb, FCMPGTB)
 SSE_HELPER_W(pcmpgtw, FCMPGTW)
 SSE_HELPER_L(pcmpgtl, FCMPGTL)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 48bfb4e47b..d08d2cedce 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2828,7 +2828,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xdb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdc] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdd] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
-[0xde] = MMX_OP2(pmaxub),
+[0xde] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdf] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xe0] = MMX_OP2(pavgb),
 [0xe1] = MMX_OP2(psraw),
@@ -3211,6 +3211,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpmaxs_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smax, 0123)
 #define gen_vpmaxs_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smax, 0123)
 
+#define gen_pmaxu_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_umax, 0112)
+#define gen_pmaxu_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_umax, 0112)
+#define gen_vpmaxu_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umax, 0123)
+#define gen_vpmaxu_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umax, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3436,6 +3441,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xea | M_0F | P_66 | VEX_128: gen_vpmins_xmm(env, s, modrm, MO_16); 
return;
 case 0xea | M_0F | P_66 | VEX_256: gen_vpmins_ymm(env, s, modrm, MO_16); 
return;
 
+case 0xde | M_0F:  gen_pmaxu_mm(env, s, modrm, MO_8); 
return;
+case 0xde | M_0F | P_66:   gen_pmaxu_xmm(env, s, modrm, MO_8); 
return;
+case 0xde | M_0F | P_66 | VEX_128: gen_vpmaxu_xmm(env, s, modrm, MO_8); 
return;
+case 0xde | M_0F | P_66 | VEX_256: gen_vpmaxu_ymm(env, s, modrm, MO_8); 
return;
+
 case 0xee | M_0F:  gen_pmaxs_mm(env, s, modrm, MO_16); 
return;
 case 0xee | M_0F | P_66:   gen_pmaxs_xmm(env, s, modrm, MO_16); 
return;
 case 0xee | M_0F | P_66 | VEX_128: gen_vpmaxs_xmm(env, s, modrm, MO_16); 
return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 22/22] target/i386: reimplement (V)P(EQ, CMP)(B, W, D)

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h| 13 ---
 target/i386/ops_sse_header.h |  8 -
 target/i386/translate.c  | 66 
 3 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 75ff686bb6..b6ace9410f 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -337,11 +337,6 @@ static inline int satsw(int x)
 }
 }
 
-#define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
-#define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0)
-#define FCMPGTL(a, b) ((int32_t)(a) > (int32_t)(b) ? -1 : 0)
-#define FCMPEQ(a, b) ((a) == (b) ? -1 : 0)
-
 #define FMULLW(a, b) ((a) * (b))
 #define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16)
 #define FMULHUW(a, b) ((a) * (b) >> 16)
@@ -350,14 +345,6 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
-SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
-SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
-
-SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
-SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
-SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
-
 SSE_HELPER_W(helper_pmullw, FMULLW)
 #if SHIFT == 0
 SSE_HELPER_W(helper_pmulhrw, FMULHRW)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 9c7451d28e..d8e33dff6b 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,14 +60,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(pcmpgtb, FCMPGTB)
-SSE_HELPER_W(pcmpgtw, FCMPGTW)
-SSE_HELPER_L(pcmpgtl, FCMPGTL)
-
-SSE_HELPER_B(pcmpeqb, FCMPEQ)
-SSE_HELPER_W(pcmpeqw, FCMPEQ)
-SSE_HELPER_L(pcmpeql, FCMPEQ)
-
 SSE_HELPER_W(pmullw, FMULLW)
 #if SHIFT == 0
 SSE_HELPER_W(pmulhrw, FMULHRW)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index d08d2cedce..729509e1ff 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2783,9 +2783,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0x61] = MMX_OP2(punpcklwd),
 [0x62] = MMX_OP2(punpckldq),
 [0x63] = MMX_OP2(packsswb),
-[0x64] = MMX_OP2(pcmpgtb),
-[0x65] = MMX_OP2(pcmpgtw),
-[0x66] = MMX_OP2(pcmpgtl),
+[0x64] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0x65] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0x66] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0x67] = MMX_OP2(packuswb),
 [0x68] = MMX_OP2(punpckhbw),
 [0x69] = MMX_OP2(punpckhwd),
@@ -2802,9 +2802,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
-[0x74] = MMX_OP2(pcmpeqb),
-[0x75] = MMX_OP2(pcmpeqw),
-[0x76] = MMX_OP2(pcmpeql),
+[0x74] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0x75] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0x76] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0x77] = { SSE_DUMMY }, /* emms */
 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
@@ -3216,6 +3216,30 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpmaxu_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umax, 0123)
 #define gen_vpmaxu_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umax, 0123)
 
+static inline void gen_gvec_cmpeq(unsigned vece, uint32_t dofs,
+  uint32_t aofs, uint32_t bofs,
+  uint32_t oprsz, uint32_t maxsz)
+{
+tcg_gen_gvec_cmp(TCG_COND_EQ, vece, dofs, aofs, bofs, oprsz, maxsz);
+}
+
+#define gen_pcmpeq_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), 
(s), (modrm), (vece), gen_gvec_cmpeq, 0112)
+#define gen_pcmpeq_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), 
(s), (modrm), (vece), gen_gvec_cmpeq, 0112)
+#define gen_vpcmpeq_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), 
(s), (modrm), (vece), gen_gvec_cmpeq, 0123)
+#define gen_vpcmpeq_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), 
(s), (modrm), (vece), gen_gvec_cmpeq, 0123)
+
+static inline void gen_gvec_cmpgt(unsigned vece, uint32_t dofs,
+  uint32_t aofs, uint32_t bofs,
+  uint32_t oprsz, uint32_t maxsz)
+{
+tcg_gen_gvec_cmp(TCG_COND_GT, vece, dofs, aofs, bofs, oprsz, maxsz);
+}
+
+#define gen_pcmpgt_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), 
(s), (modrm), (vece), gen_gvec_cmpgt, 0112)
+#define gen_pcmpgt_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), 
(s), (modrm), (vece), gen_gvec_cmpgt, 0112)
+#define gen_vpcm

[Qemu-devel] [RFC PATCH v1 15/22] target/i386: reimplement (V)PADDUS(B, W)

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  5 -
 target/i386/ops_sse_header.h |  2 --
 target/i386/translate.c  | 19 +--
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 8829dcb781..8c9b47fca4 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -337,9 +337,6 @@ static inline int satsw(int x)
 }
 }
 
-#define FADDUB(a, b) satub((a) + (b))
-#define FADDUW(a, b) satuw((a) + (b))
-
 #define FSUBUB(a, b) satub((a) - (b))
 #define FSUBUW(a, b) satuw((a) - (b))
 #define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
@@ -362,11 +359,9 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_paddusb, FADDUB)
 SSE_HELPER_B(helper_psubusb, FSUBUB)
 SSE_HELPER_B(helper_psubsb, FSUBSB)
 
-SSE_HELPER_W(helper_paddusw, FADDUW)
 SSE_HELPER_W(helper_psubusw, FSUBUW)
 SSE_HELPER_W(helper_psubsw, FSUBSW)
 
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 78203e80a5..8a31ade70c 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,11 +60,9 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(paddusb, FADDUB)
 SSE_HELPER_B(psubusb, FSUBUB)
 SSE_HELPER_B(psubsb, FSUBSB)
 
-SSE_HELPER_W(paddusw, FADDUW)
 SSE_HELPER_W(psubusw, FSUBUW)
 SSE_HELPER_W(psubsw, FSUBSW)
 
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 5ea5014d99..e2ed8c20b3 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2826,8 +2826,8 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xd9] = MMX_OP2(psubusw),
 [0xda] = MMX_OP2(pminub),
 [0xdb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
-[0xdc] = MMX_OP2(paddusb),
-[0xdd] = MMX_OP2(paddusw),
+[0xdc] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xdd] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xde] = MMX_OP2(pmaxub),
 [0xdf] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xe0] = MMX_OP2(pavgb),
@@ -3176,6 +3176,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpadds_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_ssadd, 0123)
 #define gen_vpadds_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_ssadd, 0123)
 
+#define gen_paddus_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), 
(s), (modrm), (vece), tcg_gen_gvec_usadd, 0112)
+#define gen_paddus_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), 
(s), (modrm), (vece), tcg_gen_gvec_usadd, 0112)
+#define gen_vpaddus_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), 
(s), (modrm), (vece), tcg_gen_gvec_usadd, 0123)
+#define gen_vpaddus_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), 
(s), (modrm), (vece), tcg_gen_gvec_usadd, 0123)
+
 #define gen_psub_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0112)
 #define gen_psub_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0112)
 #define gen_vpsub_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0123)
@@ -3336,6 +3341,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xd4 | M_0F | P_66 | VEX_128: gen_vpadd_xmm(env, s, modrm, MO_64); 
return;
 case 0xd4 | M_0F | P_66 | VEX_256: gen_vpadd_ymm(env, s, modrm, MO_64); 
return;
 
+case 0xdc | M_0F:  gen_paddus_mm(env, s, modrm, MO_8); 
return;
+case 0xdc | M_0F | P_66:   gen_paddus_xmm(env, s, modrm, MO_8); 
return;
+case 0xdc | M_0F | P_66 | VEX_128: gen_vpaddus_xmm(env, s, modrm, MO_8); 
return;
+case 0xdc | M_0F | P_66 | VEX_256: gen_vpaddus_ymm(env, s, modrm, MO_8); 
return;
+
+case 0xdd | M_0F:  gen_paddus_mm(env, s, modrm, MO_16); 
return;
+case 0xdd | M_0F | P_66:   gen_paddus_xmm(env, s, modrm, MO_16); 
return;
+case 0xdd | M_0F | P_66 | VEX_128: gen_vpaddus_xmm(env, s, modrm, MO_16); 
return;
+case 0xdd | M_0F | P_66 | VEX_256: gen_vpaddus_ymm(env, s, modrm, MO_16); 
return;
+
 case 0xec | M_0F:  gen_padds_mm(env, s, modrm, MO_8); 
return;
 case 0xec | M_0F | P_66:   gen_padds_xmm(env, s, modrm, MO_8); 
return;
 case 0xec | M_0F | P_66 | VEX_128: gen_vpadds_xmm(env, s, modrm, MO_8); 
return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 16/22] target/i386: reimplement (V)PSUBS(B, W)

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  4 
 target/i386/ops_sse_header.h |  2 --
 target/i386/translate.c  | 19 +--
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 8c9b47fca4..f948adbc68 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -339,8 +339,6 @@ static inline int satsw(int x)
 
 #define FSUBUB(a, b) satub((a) - (b))
 #define FSUBUW(a, b) satuw((a) - (b))
-#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
-#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b))
 #define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
 #define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
@@ -360,10 +358,8 @@ static inline int satsw(int x)
 #endif
 
 SSE_HELPER_B(helper_psubusb, FSUBUB)
-SSE_HELPER_B(helper_psubsb, FSUBSB)
 
 SSE_HELPER_W(helper_psubusw, FSUBUW)
-SSE_HELPER_W(helper_psubsw, FSUBSW)
 
 SSE_HELPER_B(helper_pminub, FMINUB)
 SSE_HELPER_B(helper_pmaxub, FMAXUB)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 8a31ade70c..20fb8aeccc 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -61,10 +61,8 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
 SSE_HELPER_B(psubusb, FSUBUB)
-SSE_HELPER_B(psubsb, FSUBSB)
 
 SSE_HELPER_W(psubusw, FSUBUW)
-SSE_HELPER_W(psubsw, FSUBSW)
 
 SSE_HELPER_B(pminub, FMINUB)
 SSE_HELPER_B(pmaxub, FMAXUB)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index e2ed8c20b3..894471861d 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2838,8 +2838,8 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xe5] = MMX_OP2(pmulhw),
 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, 
gen_helper_cvtpd2dq },
 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
-[0xe8] = MMX_OP2(psubsb),
-[0xe9] = MMX_OP2(psubsw),
+[0xe8] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xe9] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xea] = MMX_OP2(pminsw),
 [0xeb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xec] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
@@ -3186,6 +3186,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpsub_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0123)
 #define gen_vpsub_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0123)
 
+#define gen_psubs_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sssub, 0112)
+#define gen_psubs_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sssub, 0112)
+#define gen_vpsubs_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sssub, 0123)
+#define gen_vpsubs_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sssub, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3381,6 +3386,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xfb | M_0F | P_66 | VEX_128: gen_vpsub_xmm(env, s, modrm, MO_64); 
return;
 case 0xfb | M_0F | P_66 | VEX_256: gen_vpsub_ymm(env, s, modrm, MO_64); 
return;
 
+case 0xe8 | M_0F:  gen_psubs_mm(env, s, modrm, MO_8); 
return;
+case 0xe8 | M_0F | P_66:   gen_psubs_xmm(env, s, modrm, MO_8); 
return;
+case 0xe8 | M_0F | P_66 | VEX_128: gen_vpsubs_xmm(env, s, modrm, MO_8); 
return;
+case 0xe8 | M_0F | P_66 | VEX_256: gen_vpsubs_ymm(env, s, modrm, MO_8); 
return;
+
+case 0xe9 | M_0F:  gen_psubs_mm(env, s, modrm, MO_16); 
return;
+case 0xe9 | M_0F | P_66:   gen_psubs_xmm(env, s, modrm, MO_16); 
return;
+case 0xe9 | M_0F | P_66 | VEX_128: gen_vpsubs_xmm(env, s, modrm, MO_16); 
return;
+case 0xe9 | M_0F | P_66 | VEX_256: gen_vpsubs_ymm(env, s, modrm, MO_16); 
return;
+
 case 0xdb | M_0F:  gen_pand_mm(env, s, modrm); return;
 case 0xdb | M_0F | P_66:   gen_pand_xmm(env, s, modrm); return;
 case 0xdb | M_0F | P_66 | VEX_128: gen_vpand_xmm(env, s, modrm); return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 19/22] target/i386: reimplement (V)PMINUB

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  2 --
 target/i386/ops_sse_header.h |  1 -
 target/i386/translate.c  | 12 +++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index f57eaa2c77..058ed5cdfc 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -337,7 +337,6 @@ static inline int satsw(int x)
 }
 }
 
-#define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
 #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
@@ -354,7 +353,6 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_pminub, FMINUB)
 SSE_HELPER_B(helper_pmaxub, FMAXUB)
 
 SSE_HELPER_W(helper_pmaxsw, FMAXSW)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index a7f99e5427..3d0e321230 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,7 +60,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(pminub, FMINUB)
 SSE_HELPER_B(pmaxub, FMAXUB)
 
 SSE_HELPER_W(pmaxsw, FMAXSW)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index d601c6d4c2..893fe1253f 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2824,7 +2824,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
 [0xd8] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xd9] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
-[0xda] = MMX_OP2(pminub),
+[0xda] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdc] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdd] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
@@ -3201,6 +3201,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpmins_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smin, 0123)
 #define gen_vpmins_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smin, 0123)
 
+#define gen_pminu_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_umin, 0112)
+#define gen_pminu_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_umin, 0112)
+#define gen_vpminu_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umin, 0123)
+#define gen_vpminu_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_umin, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3416,6 +3421,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xe9 | M_0F | P_66 | VEX_128: gen_vpsubs_xmm(env, s, modrm, MO_16); 
return;
 case 0xe9 | M_0F | P_66 | VEX_256: gen_vpsubs_ymm(env, s, modrm, MO_16); 
return;
 
+case 0xda | M_0F:  gen_pminu_mm(env, s, modrm, MO_8); 
return;
+case 0xda | M_0F | P_66:   gen_pminu_xmm(env, s, modrm, MO_8); 
return;
+case 0xda | M_0F | P_66 | VEX_128: gen_vpminu_xmm(env, s, modrm, MO_8); 
return;
+case 0xda | M_0F | P_66 | VEX_256: gen_vpminu_ymm(env, s, modrm, MO_8); 
return;
+
 case 0xea | M_0F:  gen_pmins_mm(env, s, modrm, MO_16); 
return;
 case 0xea | M_0F | P_66:   gen_pmins_xmm(env, s, modrm, MO_16); 
return;
 case 0xea | M_0F | P_66 | VEX_128: gen_vpmins_xmm(env, s, modrm, MO_16); 
return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 13/22] target/i386: reimplement (V)PSUB(B, W, D, Q)

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  6 --
 target/i386/ops_sse_header.h |  5 -
 target/i386/translate.c  | 33 +
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 31a761a89a..59935a65be 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -342,7 +342,6 @@ static inline int satsw(int x)
 #define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
 #define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
 
-#define FSUB(a, b) ((a) - (b))
 #define FSUBUB(a, b) satub((a) - (b))
 #define FSUBUW(a, b) satuw((a) - (b))
 #define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
@@ -365,11 +364,6 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_psubb, FSUB)
-SSE_HELPER_W(helper_psubw, FSUB)
-SSE_HELPER_L(helper_psubl, FSUB)
-SSE_HELPER_Q(helper_psubq, FSUB)
-
 SSE_HELPER_B(helper_paddusb, FADDUB)
 SSE_HELPER_B(helper_paddsb, FADDSB)
 SSE_HELPER_B(helper_psubusb, FSUBUB)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 5c69ab91d4..bcdbac99a0 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,11 +60,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(psubb, FSUB)
-SSE_HELPER_W(psubw, FSUB)
-SSE_HELPER_L(psubl, FSUB)
-SSE_HELPER_Q(psubq, FSUB)
-
 SSE_HELPER_B(paddusb, FADDUB)
 SSE_HELPER_B(paddsb, FADDSB)
 SSE_HELPER_B(psubusb, FSUBUB)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1dbeb49066..6f4dfd06a1 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2855,10 +2855,10 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xf6] = MMX_OP2(psadbw),
 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
(SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
-[0xf8] = MMX_OP2(psubb),
-[0xf9] = MMX_OP2(psubw),
-[0xfa] = MMX_OP2(psubl),
-[0xfb] = MMX_OP2(psubq),
+[0xf8] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xf9] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xfa] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xfb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xfc] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xfd] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xfe] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
@@ -3171,6 +3171,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpadd_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0123)
 #define gen_vpadd_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0123)
 
+#define gen_psub_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0112)
+#define gen_psub_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0112)
+#define gen_vpsub_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0123)
+#define gen_vpsub_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3326,6 +3331,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xd4 | M_0F | P_66 | VEX_128: gen_vpadd_xmm(env, s, modrm, MO_64); 
return;
 case 0xd4 | M_0F | P_66 | VEX_256: gen_vpadd_ymm(env, s, modrm, MO_64); 
return;
 
+case 0xf8 | M_0F:  gen_psub_mm(env, s, modrm, MO_8); 
return;
+case 0xf8 | M_0F | P_66:   gen_psub_xmm(env, s, modrm, MO_8); 
return;
+case 0xf8 | M_0F | P_66 | VEX_128: gen_vpsub_xmm(env, s, modrm, MO_8); 
return;
+case 0xf8 | M_0F | P_66 | VEX_256: gen_vpsub_ymm(env, s, modrm, MO_8); 
return;
+
+case 0xf9 | M_0F:  gen_psub_mm(env, s, modrm, MO_16); 
return;
+case 0xf9 | M_0F | P_66:   gen_psub_xmm(env, s, modrm, MO_16); 
return;
+case 0xf9 | M_0F | P_66 | VEX_128: gen_vpsub_xmm(env, s, modrm, MO_16); 
return;
+case 0xf9 | M_0F | P_66 | VEX_256: gen_vpsub_ymm(env, s, modrm, MO_16); 
return;
+
+case 0xfa | M_0F:  gen_psub_mm(env, s, modrm, MO_32); 
return;
+case 0xfa | M_0F | P_66:   gen_psub_xmm(env, s, modrm, MO_32); 
return;
+case 0xfa | M_0F | P_66 | VEX_128: gen_vpsub_xmm(env, s, modrm, MO_32); 
return;
+case 0xfa | M_0F | P_66 | VEX_256: gen_vpsub_ymm(env, s, modrm, MO_32); 

[Qemu-devel] [RFC PATCH v1 12/22] target/i386: reimplement (V)PADD(B, W, D, Q)

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  6 --
 target/i386/ops_sse_header.h |  5 -
 target/i386/translate.c  | 33 +
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 84562a4536..31a761a89a 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -337,7 +337,6 @@ static inline int satsw(int x)
 }
 }
 
-#define FADD(a, b) ((a) + (b))
 #define FADDUB(a, b) satub((a) + (b))
 #define FADDUW(a, b) satuw((a) + (b))
 #define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
@@ -366,11 +365,6 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_paddb, FADD)
-SSE_HELPER_W(helper_paddw, FADD)
-SSE_HELPER_L(helper_paddl, FADD)
-SSE_HELPER_Q(helper_paddq, FADD)
-
 SSE_HELPER_B(helper_psubb, FSUB)
 SSE_HELPER_W(helper_psubw, FSUB)
 SSE_HELPER_L(helper_psubl, FSUB)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index abd00ca69d..5c69ab91d4 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,11 +60,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(paddb, FADD)
-SSE_HELPER_W(paddw, FADD)
-SSE_HELPER_L(paddl, FADD)
-SSE_HELPER_Q(paddq, FADD)
-
 SSE_HELPER_B(psubb, FSUB)
 SSE_HELPER_W(psubw, FSUB)
 SSE_HELPER_L(psubl, FSUB)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 69e9514679..1dbeb49066 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2818,7 +2818,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xd1] = MMX_OP2(psrlw),
 [0xd2] = MMX_OP2(psrld),
 [0xd3] = MMX_OP2(psrlq),
-[0xd4] = MMX_OP2(paddq),
+[0xd4] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xd5] = MMX_OP2(pmullw),
 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
@@ -2859,9 +2859,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xf9] = MMX_OP2(psubw),
 [0xfa] = MMX_OP2(psubl),
 [0xfb] = MMX_OP2(psubq),
-[0xfc] = MMX_OP2(paddb),
-[0xfd] = MMX_OP2(paddw),
-[0xfe] = MMX_OP2(paddl),
+[0xfc] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xfd] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xfe] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 };
 
 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
@@ -3166,6 +3166,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 gen_ld_modrm_VxHxWx,\
 gen_gvec_2_fp, (opctl))
 
+#define gen_padd_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0112)
+#define gen_padd_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0112)
+#define gen_vpadd_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0123)
+#define gen_vpadd_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3301,6 +3306,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
| (s->prefix & PREFIX_REPZ ? P_F3 : 0)
| (s->prefix & PREFIX_REPNZ ? P_F2 : 0)
| (s->prefix & PREFIX_VEX ? (s->vex_l ? VEX_256 : VEX_128) : 0)) {
+case 0xfc | M_0F:  gen_padd_mm(env, s, modrm, MO_8); 
return;
+case 0xfc | M_0F | P_66:   gen_padd_xmm(env, s, modrm, MO_8); 
return;
+case 0xfc | M_0F | P_66 | VEX_128: gen_vpadd_xmm(env, s, modrm, MO_8); 
return;
+case 0xfc | M_0F | P_66 | VEX_256: gen_vpadd_ymm(env, s, modrm, MO_8); 
return;
+
+case 0xfd | M_0F:  gen_padd_mm(env, s, modrm, MO_16); 
return;
+case 0xfd | M_0F | P_66:   gen_padd_xmm(env, s, modrm, MO_16); 
return;
+case 0xfd | M_0F | P_66 | VEX_128: gen_vpadd_xmm(env, s, modrm, MO_16); 
return;
+case 0xfd | M_0F | P_66 | VEX_256: gen_vpadd_ymm(env, s, modrm, MO_16); 
return;
+
+case 0xfe | M_0F:  gen_padd_mm(env, s, modrm, MO_32); 
return;
+case 0xfe | M_0F | P_66:   gen_padd_xmm(env, s, modrm, MO_32); 
return;
+case 0xfe | M_0F | P_66 | VEX_128: gen_vpadd_xmm(env, s, modrm, MO_32); 
return;
+case 0xfe | M_0F | P_66 | VEX_256: gen_vpadd_ymm(env, s, modrm, MO_32); 
return;
+
+case 0xd4 | M_0F:  ge

[Qemu-devel] [RFC PATCH v1 17/22] target/i386: reimplement (V)PSUBUS(B, W)

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  6 --
 target/i386/ops_sse_header.h |  4 
 target/i386/translate.c  | 19 +--
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index f948adbc68..4f00f3273d 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -337,8 +337,6 @@ static inline int satsw(int x)
 }
 }
 
-#define FSUBUB(a, b) satub((a) - (b))
-#define FSUBUW(a, b) satuw((a) - (b))
 #define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
 #define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
@@ -357,10 +355,6 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_psubusb, FSUBUB)
-
-SSE_HELPER_W(helper_psubusw, FSUBUW)
-
 SSE_HELPER_B(helper_pminub, FMINUB)
 SSE_HELPER_B(helper_pmaxub, FMAXUB)
 
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 20fb8aeccc..829c132ae4 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,10 +60,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(psubusb, FSUBUB)
-
-SSE_HELPER_W(psubusw, FSUBUW)
-
 SSE_HELPER_B(pminub, FMINUB)
 SSE_HELPER_B(pmaxub, FMAXUB)
 
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 894471861d..5b19e9ac4b 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2822,8 +2822,8 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xd5] = MMX_OP2(pmullw),
 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
-[0xd8] = MMX_OP2(psubusb),
-[0xd9] = MMX_OP2(psubusw),
+[0xd8] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xd9] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xda] = MMX_OP2(pminub),
 [0xdb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdc] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
@@ -3191,6 +3191,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpsubs_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sssub, 0123)
 #define gen_vpsubs_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sssub, 0123)
 
+#define gen_psubus_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), 
(s), (modrm), (vece), tcg_gen_gvec_ussub, 0112)
+#define gen_psubus_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), 
(s), (modrm), (vece), tcg_gen_gvec_ussub, 0112)
+#define gen_vpsubus_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), 
(s), (modrm), (vece), tcg_gen_gvec_ussub, 0123)
+#define gen_vpsubus_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), 
(s), (modrm), (vece), tcg_gen_gvec_ussub, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3386,6 +3391,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xfb | M_0F | P_66 | VEX_128: gen_vpsub_xmm(env, s, modrm, MO_64); 
return;
 case 0xfb | M_0F | P_66 | VEX_256: gen_vpsub_ymm(env, s, modrm, MO_64); 
return;
 
+case 0xd8 | M_0F:  gen_psubus_mm(env, s, modrm, MO_8); 
return;
+case 0xd8 | M_0F | P_66:   gen_psubus_xmm(env, s, modrm, MO_8); 
return;
+case 0xd8 | M_0F | P_66 | VEX_128: gen_vpsubus_xmm(env, s, modrm, MO_8); 
return;
+case 0xd8 | M_0F | P_66 | VEX_256: gen_vpsubus_ymm(env, s, modrm, MO_8); 
return;
+
+case 0xd9 | M_0F:  gen_psubus_mm(env, s, modrm, MO_16); 
return;
+case 0xd9 | M_0F | P_66:   gen_psubus_xmm(env, s, modrm, MO_16); 
return;
+case 0xd9 | M_0F | P_66 | VEX_128: gen_vpsubus_xmm(env, s, modrm, MO_16); 
return;
+case 0xd9 | M_0F | P_66 | VEX_256: gen_vpsubus_ymm(env, s, modrm, MO_16); 
return;
+
 case 0xe8 | M_0F:  gen_psubs_mm(env, s, modrm, MO_8); 
return;
 case 0xe8 | M_0F | P_66:   gen_psubs_xmm(env, s, modrm, MO_8); 
return;
 case 0xe8 | M_0F | P_66 | VEX_128: gen_vpsubs_xmm(env, s, modrm, MO_8); 
return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 18/22] target/i386: reimplement (V)PMINSW

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  2 --
 target/i386/ops_sse_header.h |  1 -
 target/i386/translate.c  | 12 +++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 4f00f3273d..f57eaa2c77 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -338,7 +338,6 @@ static inline int satsw(int x)
 }
 
 #define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
-#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
 #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
@@ -358,7 +357,6 @@ static inline int satsw(int x)
 SSE_HELPER_B(helper_pminub, FMINUB)
 SSE_HELPER_B(helper_pmaxub, FMAXUB)
 
-SSE_HELPER_W(helper_pminsw, FMINSW)
 SSE_HELPER_W(helper_pmaxsw, FMAXSW)
 
 SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 829c132ae4..a7f99e5427 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -63,7 +63,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 SSE_HELPER_B(pminub, FMINUB)
 SSE_HELPER_B(pmaxub, FMAXUB)
 
-SSE_HELPER_W(pminsw, FMINSW)
 SSE_HELPER_W(pmaxsw, FMAXSW)
 
 SSE_HELPER_B(pcmpgtb, FCMPGTB)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 5b19e9ac4b..d601c6d4c2 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2840,7 +2840,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
 [0xe8] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xe9] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
-[0xea] = MMX_OP2(pminsw),
+[0xea] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xeb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xec] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xed] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
@@ -3196,6 +3196,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpsubus_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), 
(s), (modrm), (vece), tcg_gen_gvec_ussub, 0123)
 #define gen_vpsubus_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), 
(s), (modrm), (vece), tcg_gen_gvec_ussub, 0123)
 
+#define gen_pmins_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_smin, 0112)
+#define gen_pmins_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_smin, 0112)
+#define gen_vpmins_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smin, 0123)
+#define gen_vpmins_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_smin, 0123)
+
 #define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
 #define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
@@ -3411,6 +3416,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xe9 | M_0F | P_66 | VEX_128: gen_vpsubs_xmm(env, s, modrm, MO_16); 
return;
 case 0xe9 | M_0F | P_66 | VEX_256: gen_vpsubs_ymm(env, s, modrm, MO_16); 
return;
 
+case 0xea | M_0F:  gen_pmins_mm(env, s, modrm, MO_16); 
return;
+case 0xea | M_0F | P_66:   gen_pmins_xmm(env, s, modrm, MO_16); 
return;
+case 0xea | M_0F | P_66 | VEX_128: gen_vpmins_xmm(env, s, modrm, MO_16); 
return;
+case 0xea | M_0F | P_66 | VEX_256: gen_vpmins_ymm(env, s, modrm, MO_16); 
return;
+
 case 0xdb | M_0F:  gen_pand_mm(env, s, modrm); return;
 case 0xdb | M_0F | P_66:   gen_pand_xmm(env, s, modrm); return;
 case 0xdb | M_0F | P_66 | VEX_128: gen_vpand_xmm(env, s, modrm); return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 09/22] target/i386: reimplement (V)POR, (V)ORPS, (V)ORPD

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  2 --
 target/i386/ops_sse_header.h |  1 -
 target/i386/translate.c  | 27 +--
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index b3ba23287d..8b4ac9115e 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -354,7 +354,6 @@ static inline int satsw(int x)
 #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
 #define FANDN(a, b) ((~(a)) & (b))
-#define FOR(a, b) ((a) | (b))
 #define FXOR(a, b) ((a) ^ (b))
 
 #define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
@@ -397,7 +396,6 @@ SSE_HELPER_W(helper_pminsw, FMINSW)
 SSE_HELPER_W(helper_pmaxsw, FMAXSW)
 
 SSE_HELPER_Q(helper_pandn, FANDN)
-SSE_HELPER_Q(helper_por, FOR)
 SSE_HELPER_Q(helper_pxor, FXOR)
 
 SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 63b4376389..6a732ee489 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -87,7 +87,6 @@ SSE_HELPER_W(pminsw, FMINSW)
 SSE_HELPER_W(pmaxsw, FMAXSW)
 
 SSE_HELPER_Q(pandn, FANDN)
-SSE_HELPER_Q(por, FOR)
 SSE_HELPER_Q(pxor, FXOR)
 
 SSE_HELPER_B(pcmpgtb, FCMPGTB)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 3821733a4e..28cd84432d 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2758,7 +2758,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
 [0x54] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* andps, andpd */
 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd 
*/
-[0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
+[0x56] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* orps, orpd */
 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
 [0x58] = SSE_FOP(add),
 [0x59] = SSE_FOP(mul),
@@ -2841,7 +2841,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xe8] = MMX_OP2(psubsb),
 [0xe9] = MMX_OP2(psubsw),
 [0xea] = MMX_OP2(pminsw),
-[0xeb] = MMX_OP2(por),
+[0xeb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xec] = MMX_OP2(paddsb),
 [0xed] = MMX_OP2(paddsw),
 [0xee] = MMX_OP2(pmaxsw),
@@ -3177,6 +3177,17 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vandpd_xmm gen_vpand_xmm
 #define gen_vandpd_ymm gen_vpand_ymm
 
+#define gen_por_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_or, 0112)
+#define gen_por_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_or, 0112)
+#define gen_vpor_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_or, 0123)
+#define gen_vpor_ymm(env, s, modrm) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_or, 0123)
+#define gen_orps_xmm  gen_por_xmm
+#define gen_vorps_xmm gen_vpor_xmm
+#define gen_vorps_ymm gen_vpor_ymm
+#define gen_orpd_xmm  gen_por_xmm
+#define gen_vorpd_xmm gen_vpor_xmm
+#define gen_vorpd_ymm gen_vpor_ymm
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
@@ -3278,6 +3289,18 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0x54 | M_0F | P_66:   gen_andpd_xmm(env, s, modrm); return;
 case 0x54 | M_0F | P_66 | VEX_128: gen_vandpd_xmm(env, s, modrm); return;
 case 0x54 | M_0F | P_66 | VEX_256: gen_vandpd_ymm(env, s, modrm); return;
+
+case 0xeb | M_0F:  gen_por_mm(env, s, modrm); return;
+case 0xeb | M_0F | P_66:   gen_por_xmm(env, s, modrm); return;
+case 0xeb | M_0F | P_66 | VEX_128: gen_vpor_xmm(env, s, modrm); return;
+case 0xeb | M_0F | P_66 | VEX_256: gen_vpor_ymm(env, s, modrm); return;
+case 0x56 | M_0F:  gen_orps_xmm(env, s, modrm); return;
+case 0x56 | M_0F | VEX_128:gen_vorps_xmm(env, s, modrm); return;
+case 0x56 | M_0F | VEX_256:gen_vorps_ymm(env, s, modrm); return;
+case 0x56 | M_0F | P_66:   gen_orpd_xmm(env, s, modrm); return;
+case 0x56 | M_0F | P_66 | VEX_128: gen_vorpd_xmm(env, s, modrm); return;
+case 0x56 | M_0F | P_66 | VEX_256: gen_vorpd_ymm(env, s, modrm); return;
+
 default: break;
 }
 
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 14/22] target/i386: reimplement (V)PADDS(B, W)

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  4 
 target/i386/ops_sse_header.h |  2 --
 target/i386/translate.c  | 19 +--
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 59935a65be..8829dcb781 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -339,8 +339,6 @@ static inline int satsw(int x)
 
 #define FADDUB(a, b) satub((a) + (b))
 #define FADDUW(a, b) satuw((a) + (b))
-#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
-#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
 
 #define FSUBUB(a, b) satub((a) - (b))
 #define FSUBUW(a, b) satuw((a) - (b))
@@ -365,12 +363,10 @@ static inline int satsw(int x)
 #endif
 
 SSE_HELPER_B(helper_paddusb, FADDUB)
-SSE_HELPER_B(helper_paddsb, FADDSB)
 SSE_HELPER_B(helper_psubusb, FSUBUB)
 SSE_HELPER_B(helper_psubsb, FSUBSB)
 
 SSE_HELPER_W(helper_paddusw, FADDUW)
-SSE_HELPER_W(helper_paddsw, FADDSW)
 SSE_HELPER_W(helper_psubusw, FSUBUW)
 SSE_HELPER_W(helper_psubsw, FSUBSW)
 
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index bcdbac99a0..78203e80a5 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -61,12 +61,10 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
 SSE_HELPER_B(paddusb, FADDUB)
-SSE_HELPER_B(paddsb, FADDSB)
 SSE_HELPER_B(psubusb, FSUBUB)
 SSE_HELPER_B(psubsb, FSUBSB)
 
 SSE_HELPER_W(paddusw, FADDUW)
-SSE_HELPER_W(paddsw, FADDSW)
 SSE_HELPER_W(psubusw, FSUBUW)
 SSE_HELPER_W(psubsw, FSUBSW)
 
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 6f4dfd06a1..5ea5014d99 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2842,8 +2842,8 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xe9] = MMX_OP2(psubsw),
 [0xea] = MMX_OP2(pminsw),
 [0xeb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
-[0xec] = MMX_OP2(paddsb),
-[0xed] = MMX_OP2(paddsw),
+[0xec] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
+[0xed] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xee] = MMX_OP2(pmaxsw),
 [0xef] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
@@ -3171,6 +3171,11 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vpadd_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0123)
 #define gen_vpadd_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_add, 0123)
 
+#define gen_padds_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_ssadd, 0112)
+#define gen_padds_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_ssadd, 0112)
+#define gen_vpadds_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_ssadd, 0123)
+#define gen_vpadds_ymm(env, s, modrm, vece) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), (vece), tcg_gen_gvec_ssadd, 0123)
+
 #define gen_psub_mm(env, s, modrm, vece)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0112)
 #define gen_psub_xmm(env, s, modrm, vece)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0112)
 #define gen_vpsub_xmm(env, s, modrm, vece) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), (vece), tcg_gen_gvec_sub, 0123)
@@ -3331,6 +3336,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0xd4 | M_0F | P_66 | VEX_128: gen_vpadd_xmm(env, s, modrm, MO_64); 
return;
 case 0xd4 | M_0F | P_66 | VEX_256: gen_vpadd_ymm(env, s, modrm, MO_64); 
return;
 
+case 0xec | M_0F:  gen_padds_mm(env, s, modrm, MO_8); 
return;
+case 0xec | M_0F | P_66:   gen_padds_xmm(env, s, modrm, MO_8); 
return;
+case 0xec | M_0F | P_66 | VEX_128: gen_vpadds_xmm(env, s, modrm, MO_8); 
return;
+case 0xec | M_0F | P_66 | VEX_256: gen_vpadds_ymm(env, s, modrm, MO_8); 
return;
+
+case 0xed | M_0F:  gen_padds_mm(env, s, modrm, MO_16); 
return;
+case 0xed | M_0F | P_66:   gen_padds_xmm(env, s, modrm, MO_16); 
return;
+case 0xed | M_0F | P_66 | VEX_128: gen_vpadds_xmm(env, s, modrm, MO_16); 
return;
+case 0xed | M_0F | P_66 | VEX_256: gen_vpadds_ymm(env, s, modrm, MO_16); 
return;
+
 case 0xf8 | M_0F:  gen_psub_mm(env, s, modrm, MO_8); 
return;
 case 0xf8 | M_0F | P_66:   gen_psub_xmm(env, s, modrm, MO_8); 
return;
 case 0xf8 | M_0F | P_66 | VEX_128: gen_vpsub_xmm(env, s, modrm, MO_8); 
return;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 10/22] target/i386: reimplement (V)PXOR, (V)XORPS, (V)XORPD

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  2 --
 target/i386/ops_sse_header.h |  1 -
 target/i386/translate.c  | 26 --
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 8b4ac9115e..68dbeda047 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -354,7 +354,6 @@ static inline int satsw(int x)
 #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
 #define FANDN(a, b) ((~(a)) & (b))
-#define FXOR(a, b) ((a) ^ (b))
 
 #define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
 #define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0)
@@ -396,7 +395,6 @@ SSE_HELPER_W(helper_pminsw, FMINSW)
 SSE_HELPER_W(helper_pmaxsw, FMAXSW)
 
 SSE_HELPER_Q(helper_pandn, FANDN)
-SSE_HELPER_Q(helper_pxor, FXOR)
 
 SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
 SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 6a732ee489..a98b9f8f3f 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -87,7 +87,6 @@ SSE_HELPER_W(pminsw, FMINSW)
 SSE_HELPER_W(pmaxsw, FMAXSW)
 
 SSE_HELPER_Q(pandn, FANDN)
-SSE_HELPER_Q(pxor, FXOR)
 
 SSE_HELPER_B(pcmpgtb, FCMPGTB)
 SSE_HELPER_W(pcmpgtw, FCMPGTW)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 28cd84432d..cfe285e3e5 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2759,7 +2759,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0x54] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* andps, andpd */
 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd 
*/
 [0x56] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* orps, orpd */
-[0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
+[0x57] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* xorps, xorpd */
 [0x58] = SSE_FOP(add),
 [0x59] = SSE_FOP(mul),
 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
@@ -2845,7 +2845,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xec] = MMX_OP2(paddsb),
 [0xed] = MMX_OP2(paddsw),
 [0xee] = MMX_OP2(pmaxsw),
-[0xef] = MMX_OP2(pxor),
+[0xef] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
 [0xf1] = MMX_OP2(psllw),
 [0xf2] = MMX_OP2(pslld),
@@ -3188,6 +3188,17 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vorpd_xmm gen_vpor_xmm
 #define gen_vorpd_ymm gen_vpor_ymm
 
+#define gen_pxor_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_xor, 0112)
+#define gen_pxor_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_xor, 0112)
+#define gen_vpxor_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_xor, 0123)
+#define gen_vpxor_ymm(env, s, modrm) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_xor, 0123)
+#define gen_xorps_xmm  gen_pxor_xmm
+#define gen_vxorps_xmm gen_vpxor_xmm
+#define gen_vxorps_ymm gen_vpxor_ymm
+#define gen_xorpd_xmm  gen_pxor_xmm
+#define gen_vxorpd_xmm gen_vpxor_xmm
+#define gen_vxorpd_ymm gen_vpxor_ymm
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
@@ -3301,6 +3312,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0x56 | M_0F | P_66 | VEX_128: gen_vorpd_xmm(env, s, modrm); return;
 case 0x56 | M_0F | P_66 | VEX_256: gen_vorpd_ymm(env, s, modrm); return;
 
+case 0xef | M_0F:  gen_pxor_mm(env, s, modrm); return;
+case 0xef | M_0F | P_66:   gen_pxor_xmm(env, s, modrm); return;
+case 0xef | M_0F | P_66 | VEX_128: gen_vpxor_xmm(env, s, modrm); return;
+case 0xef | M_0F | P_66 | VEX_256: gen_vpxor_ymm(env, s, modrm); return;
+case 0x57 | M_0F:  gen_xorps_xmm(env, s, modrm); return;
+case 0x57 | M_0F | VEX_128:gen_vxorps_xmm(env, s, modrm); return;
+case 0x57 | M_0F | VEX_256:gen_vxorps_ymm(env, s, modrm); return;
+case 0x57 | M_0F | P_66:   gen_xorpd_xmm(env, s, modrm); return;
+case 0x57 | M_0F | P_66 | VEX_128: gen_vxorpd_xmm(env, s, modrm); return;
+case 0x57 | M_0F | P_66 | VEX_256: gen_vxorpd_ymm(env, s, modrm); return;
+
 default: break;
 }
 
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 11/22] target/i386: reimplement (V)PANDN, (V)ANDNPS, (V)ANDNPD

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  4 
 target/i386/ops_sse_header.h |  2 --
 target/i386/translate.c  | 26 --
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 68dbeda047..84562a4536 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -353,8 +353,6 @@ static inline int satsw(int x)
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
 #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
-#define FANDN(a, b) ((~(a)) & (b))
-
 #define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
 #define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0)
 #define FCMPGTL(a, b) ((int32_t)(a) > (int32_t)(b) ? -1 : 0)
@@ -394,8 +392,6 @@ SSE_HELPER_B(helper_pmaxub, FMAXUB)
 SSE_HELPER_W(helper_pminsw, FMINSW)
 SSE_HELPER_W(helper_pmaxsw, FMAXSW)
 
-SSE_HELPER_Q(helper_pandn, FANDN)
-
 SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
 SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
 SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index a98b9f8f3f..abd00ca69d 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -86,8 +86,6 @@ SSE_HELPER_B(pmaxub, FMAXUB)
 SSE_HELPER_W(pminsw, FMINSW)
 SSE_HELPER_W(pmaxsw, FMAXSW)
 
-SSE_HELPER_Q(pandn, FANDN)
-
 SSE_HELPER_B(pcmpgtb, FCMPGTB)
 SSE_HELPER_W(pcmpgtw, FCMPGTW)
 SSE_HELPER_L(pcmpgtl, FCMPGTL)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index cfe285e3e5..69e9514679 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2757,7 +2757,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
 [0x54] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* andps, andpd */
-[0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd 
*/
+[0x55] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* andnps, andnpd */
 [0x56] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* orps, orpd */
 [0x57] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* xorps, xorpd */
 [0x58] = SSE_FOP(add),
@@ -2829,7 +2829,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xdc] = MMX_OP2(paddusb),
 [0xdd] = MMX_OP2(paddusw),
 [0xde] = MMX_OP2(pmaxub),
-[0xdf] = MMX_OP2(pandn),
+[0xdf] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xe0] = MMX_OP2(pavgb),
 [0xe1] = MMX_OP2(psraw),
 [0xe2] = MMX_OP2(psrad),
@@ -3177,6 +3177,17 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 #define gen_vandpd_xmm gen_vpand_xmm
 #define gen_vandpd_ymm gen_vpand_ymm
 
+#define gen_pandn_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_andc, 0121)
+#define gen_pandn_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_andc, 0121)
+#define gen_vpandn_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_andc, 0132)
+#define gen_vpandn_ymm(env, s, modrm) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_andc, 0132)
+#define gen_andnps_xmm  gen_pandn_xmm
+#define gen_vandnps_xmm gen_vpandn_xmm
+#define gen_vandnps_ymm gen_vpandn_ymm
+#define gen_andnpd_xmm  gen_pandn_xmm
+#define gen_vandnpd_xmm gen_vpandn_xmm
+#define gen_vandnpd_ymm gen_vpandn_ymm
+
 #define gen_por_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_or, 0112)
 #define gen_por_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_or, 0112)
 #define gen_vpor_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_or, 0123)
@@ -3301,6 +3312,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 case 0x54 | M_0F | P_66 | VEX_128: gen_vandpd_xmm(env, s, modrm); return;
 case 0x54 | M_0F | P_66 | VEX_256: gen_vandpd_ymm(env, s, modrm); return;
 
+case 0xdf | M_0F:  gen_pandn_mm(env, s, modrm); return;
+case 0xdf | M_0F | P_66:   gen_pandn_xmm(env, s, modrm); return;
+case 0xdf | M_0F | P_66 | VEX_128: gen_vpandn_xmm(env, s, modrm); return;
+case 0xdf | M_0F | P_66 | VEX_256: gen_vpandn_ymm(env, s, modrm); return;
+case 0x55 | M_0F:  gen_andnps_xmm(env, s, modrm); return;
+case 0x55 | M_0F | VEX_128:gen_vandnps_xmm(env, s, modrm); return;
+case 0x55 | M_0F | VEX_256:gen_vandnps_ymm(env, s, modrm); return;
+case 0x55 | M_0F | P_66:   gen_andnpd_xmm(env, s, modrm); return;
+case 0x55 | M_0F | P_66 | VEX_128: gen_vandnpd_xmm(env, s, modrm); return;
+case 0x55 | M_0F | P_66 | VEX_256: gen_vandnpd_ymm(env, s, modrm); return;
+
 case 0xeb | M_0F:  gen_por_mm(env, s, modrm); return;
 case 0xeb | M_0F | P_66:   

[Qemu-devel] [RFC PATCH v1 08/22] target/i386: reimplement (V)PAND, (V)ANDPS, (V)ANDPD

2019-07-31 Thread Jan Bobek
Use the gvec infrastructure to achieve the desired functionality.

Note: This commit adds several bits which will not be part of the
final patch series and which are only present to allow for incremenal
write-and-test development cycle. Notably, the SSE_TOMBSTONE define
will go away entirely with all of the tables, and nothing will follow
the new dispatch switch in gen_sse.

Signed-off-by: Jan Bobek 
---
 target/i386/ops_sse.h|  2 --
 target/i386/ops_sse_header.h |  1 -
 target/i386/translate.c  | 49 ++--
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index ed05989768..b3ba23287d 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -353,7 +353,6 @@ static inline int satsw(int x)
 #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
 #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
 
-#define FAND(a, b) ((a) & (b))
 #define FANDN(a, b) ((~(a)) & (b))
 #define FOR(a, b) ((a) | (b))
 #define FXOR(a, b) ((a) ^ (b))
@@ -397,7 +396,6 @@ SSE_HELPER_B(helper_pmaxub, FMAXUB)
 SSE_HELPER_W(helper_pminsw, FMINSW)
 SSE_HELPER_W(helper_pmaxsw, FMAXSW)
 
-SSE_HELPER_Q(helper_pand, FAND)
 SSE_HELPER_Q(helper_pandn, FANDN)
 SSE_HELPER_Q(helper_por, FOR)
 SSE_HELPER_Q(helper_pxor, FXOR)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 094aafc573..63b4376389 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -86,7 +86,6 @@ SSE_HELPER_B(pmaxub, FMAXUB)
 SSE_HELPER_W(pminsw, FMINSW)
 SSE_HELPER_W(pmaxsw, FMAXSW)
 
-SSE_HELPER_Q(pand, FAND)
 SSE_HELPER_Q(pandn, FANDN)
 SSE_HELPER_Q(por, FOR)
 SSE_HELPER_Q(pxor, FXOR)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index d576b3345c..3821733a4e 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -23,6 +23,7 @@
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg-op.h"
+#include "tcg-op-gvec.h"
 #include "exec/cpu_ldst.h"
 #include "exec/translator.h"
 
@@ -2723,6 +2724,7 @@ typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr 
reg_a, TCGv_ptr reg_b,
 
 #define SSE_SPECIAL ((void *)1)
 #define SSE_DUMMY ((void *)2)
+#define SSE_TOMBSTONE ((void *)3)
 
 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
@@ -2754,7 +2756,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0x51] = SSE_FOP(sqrt),
 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
-[0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
+[0x54] = { SSE_TOMBSTONE, SSE_TOMBSTONE }, /* andps, andpd */
 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd 
*/
 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
@@ -2823,7 +2825,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
 [0xd8] = MMX_OP2(psubusb),
 [0xd9] = MMX_OP2(psubusw),
 [0xda] = MMX_OP2(pminub),
-[0xdb] = MMX_OP2(pand),
+[0xdb] = { SSE_TOMBSTONE, SSE_TOMBSTONE },
 [0xdc] = MMX_OP2(paddusb),
 [0xdd] = MMX_OP2(paddusw),
 [0xde] = MMX_OP2(pmaxub),
@@ -3164,6 +3166,17 @@ static inline void gen_gvec_ld_modrm_3(CPUX86State *env, 
DisasContext *s,
 gen_ld_modrm_VxHxWx,\
 gen_gvec_2_fp, (opctl))
 
+#define gen_pand_mm(env, s, modrm)   gen_gvec_ld_modrm_mm  ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
+#define gen_pand_xmm(env, s, modrm)  gen_gvec_ld_modrm_xmm ((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0112)
+#define gen_vpand_xmm(env, s, modrm) gen_gvec_ld_modrm_vxmm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
+#define gen_vpand_ymm(env, s, modrm) gen_gvec_ld_modrm_vymm((env), (s), 
(modrm), MO_64, tcg_gen_gvec_and, 0123)
+#define gen_andps_xmm  gen_pand_xmm
+#define gen_vandps_xmm gen_vpand_xmm
+#define gen_vandps_ymm gen_vpand_ymm
+#define gen_andpd_xmm  gen_pand_xmm
+#define gen_vandpd_xmm gen_vpand_xmm
+#define gen_vandpd_ymm gen_vpand_ymm
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
@@ -3238,6 +3251,38 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b)
 reg |= REX_R(s);
 }
 mod = (modrm >> 6) & 3;
+
+enum {
+M_0F= 0x01 << 8,
+M_0F38  = 0x02 << 8,
+M_0F3A  = 0x04 << 8,
+P_66= 0x08 << 8,
+P_F3= 0x10 << 8,
+P_F2= 0x20 << 8,
+VEX_128 = 0x40 << 8,
+VEX_256 = 0x80 << 8,
+};
+
+switch(b | M_0F
+   | (s->prefix & PREFIX_DATA 

[Qemu-devel] [RFC PATCH v1 05/22] target/i386: introduce gen_ld_modrm_* helpers

2019-07-31 Thread Jan Bobek
These help with decoding/loading ModR/M vector operands; the operand's
register offset is returned, which is suitable for use with gvec
infrastructure.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 47 +
 1 file changed, 47 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 9e22eca2dc..7548677e1f 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3040,6 +3040,53 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] 
= {
 [0xdf] = AESNI_OP(aeskeygenassist),
 };
 
+static inline void gen_ld_modrm_PqQq(CPUX86State *env, DisasContext *s, int 
modrm,
+ uint32_t* dofs, uint32_t* aofs)
+{
+const int mod = (modrm >> 6) & 3;
+const int reg = (modrm >> 3) & 7; /* no REX_R */
+*dofs = offsetof(CPUX86State, fpregs[reg].mmx);
+
+if(mod == 3) {
+const int rm = modrm & 7; /* no REX_B */
+
+*aofs = offsetof(CPUX86State, fpregs[rm].mmx);
+} else {
+*aofs = offsetof(CPUX86State, mmx_t0);
+
+gen_lea_modrm(env, s, modrm);
+gen_ldq_env_A0(s, *aofs);
+}
+}
+
+static inline void gen_ld_modrm_VxWx(CPUX86State *env, DisasContext *s, int 
modrm,
+ uint32_t* dofs, uint32_t* aofs)
+{
+const int mod = (modrm >> 6) & 3;
+const int reg = ((modrm >> 3) & 7) | REX_R(s);
+*dofs = offsetof(CPUX86State, xmm_regs[reg]);
+
+if(mod == 3) {
+const int rm = (modrm & 7) | REX_B(s);
+
+*aofs = offsetof(CPUX86State, xmm_regs[rm]);
+} else {
+*aofs = offsetof(CPUX86State, xmm_t0);
+
+gen_lea_modrm(env, s, modrm);
+gen_ldo_env_A0(s, *aofs); /* FIXME this needs to load 32 bytes for YMM 
*/
+}
+}
+
+static inline void gen_ld_modrm_VxHxWx(CPUX86State *env, DisasContext *s, int 
modrm,
+   uint32_t* dofs, uint32_t* aofs, 
uint32_t* bofs)
+{
+assert(s->prefix & PREFIX_VEX);
+
+gen_ld_modrm_VxWx(env, s, modrm, dofs, bofs);
+*aofs = offsetof(CPUX86State, xmm_regs[s->vex_v]);
+}
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers

2019-07-31 Thread Jan Bobek
gen_gvec_ld_modrm_* helpers tie together a gen_ld_modrm_* helper and a
particular gvec operation, effectively handling a single instruction.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 77 +
 1 file changed, 77 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 7548677e1f..d576b3345c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3087,6 +3087,83 @@ static inline void gen_ld_modrm_VxHxWx(CPUX86State *env, 
DisasContext *s, int mo
 *aofs = offsetof(CPUX86State, xmm_regs[s->vex_v]);
 }
 
+typedef void (*gen_ld_modrm_2_fp_t)(CPUX86State *env, DisasContext *s, int 
modrm,
+uint32_t *dofs, uint32_t *aofs);
+typedef void (*gen_ld_modrm_3_fp_t)(CPUX86State *env, DisasContext *s, int 
modrm,
+uint32_t *dofs, uint32_t *aofs, uint32_t 
*bofs);
+typedef void (*gen_gvec_2_fp_t)(unsigned vece, uint32_t dofs, uint32_t aofs,
+uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s,
+   int modrm, unsigned vece,
+   uint32_t oprsz, uint32_t maxsz,
+   gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp,
+   gen_gvec_2_fp_t gen_gvec_2_fp,
+   int opctl)
+{
+uint32_t ofss[2];
+
+const int opd = ((opctl >> 6) & 7) - 1;
+const int opa = ((opctl >> 3) & 7) - 1;
+const int opb = ((opctl >> 0) & 7) - 1;
+
+assert(0 <= opd && opd < 2);
+assert(0 <= opa && opa < 2);
+assert(0 <= opb && opb < 2);
+
+(*gen_ld_modrm_2_fp)(env, s, modrm, [0], [1]);
+(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
+}
+
+static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s,
+   int modrm, unsigned vece,
+   uint32_t oprsz, uint32_t maxsz,
+   gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp,
+   gen_gvec_2_fp_t gen_gvec_2_fp,
+   int opctl)
+{
+uint32_t ofss[3];
+
+const int opd = ((opctl >> 6) & 7) - 1;
+const int opa = ((opctl >> 3) & 7) - 1;
+const int opb = ((opctl >> 0) & 7) - 1;
+
+assert(0 <= opd && opd < 3);
+assert(0 <= opa && opa < 3);
+assert(0 <= opb && opb < 3);
+
+(*gen_ld_modrm_3_fp)(env, s, modrm, [0], [1], [2]);
+(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
+}
+
+#define gen_gvec_ld_modrm_mm(env, s, modrm, vece,   \
+ gen_gvec_2_fp, opctl)  \
+gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\
+sizeof(MMXReg), sizeof(MMXReg), \
+gen_ld_modrm_PqQq,  \
+gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_xmm(env, s, modrm, vece,  \
+  gen_gvec_2_fp, opctl) \
+gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\
+sizeof(XMMReg), sizeof(XMMReg), \
+gen_ld_modrm_VxWx,  \
+gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_vxmm(env, s, modrm, vece, \
+   gen_gvec_2_fp, opctl)\
+gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\
+sizeof(XMMReg), sizeof(ZMMReg), \
+gen_ld_modrm_VxHxWx,\
+gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_vymm(env, s, modrm, vece, \
+   gen_gvec_2_fp, opctl)\
+gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\
+sizeof(YMMReg), sizeof(ZMMReg), \
+gen_ld_modrm_VxHxWx,\
+gen_gvec_2_fp, (opctl))
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 01/22] target/i386: Push rex_r into DisasContext

2019-07-31 Thread Jan Bobek
From: Richard Henderson 

Treat this value the same as we do for rex_b and rex_x.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 85 +
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 03150a86e2..d74dbfd585 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -43,10 +43,12 @@
 #define CODE64(s) ((s)->code64)
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
+#define REX_R(s) ((s)->rex_r)
 #else
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
+#define REX_R(s) 0
 #endif
 
 #ifdef TARGET_X86_64
@@ -98,7 +100,7 @@ typedef struct DisasContext {
 #ifdef TARGET_X86_64
 int lma;/* long mode active */
 int code64; /* 64 bit code segment */
-int rex_x, rex_b;
+int rex_x, rex_b, rex_r;
 #endif
 int vex_l;  /* vex vector length */
 int vex_v;  /* vex  register, without 1's complement.  */
@@ -3037,7 +3039,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 };
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-target_ulong pc_start, int rex_r)
+target_ulong pc_start)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
@@ -3107,8 +3109,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 
 modrm = x86_ldub_code(env, s);
 reg = ((modrm >> 3) & 7);
-if (is_xmm)
-reg |= rex_r;
+if (is_xmm) {
+reg |= REX_R(s);
+}
 mod = (modrm >> 6) & 3;
 if (sse_fn_epp == SSE_SPECIAL) {
 b |= (b1 << 8);
@@ -3642,7 +3645,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 tcg_gen_ld16u_tl(s->T0, cpu_env,
 
offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
 }
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 gen_op_mov_reg_v(s, ot, reg, s->T0);
 break;
 case 0x1d6: /* movq ea, xmm */
@@ -3686,7 +3689,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
  offsetof(CPUX86State, fpregs[rm].mmx));
 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
 }
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
 break;
 
@@ -3698,7 +3701,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 modrm = x86_ldub_code(env, s);
 rm = modrm & 7;
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 mod = (modrm >> 6) & 3;
 if (b1 >= 2) {
 goto unknown_op;
@@ -3774,7 +3777,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 /* Various integer extensions at 0f 38 f[0-f].  */
 b = modrm | (b1 << 8);
 modrm = x86_ldub_code(env, s);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 
 switch (b) {
 case 0x3f0: /* crc32 Gd,Eb */
@@ -4128,7 +4131,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 b = modrm;
 modrm = x86_ldub_code(env, s);
 rm = modrm & 7;
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 mod = (modrm >> 6) & 3;
 if (b1 >= 2) {
 goto unknown_op;
@@ -4148,7 +4151,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 rm = (modrm & 7) | REX_B(s);
 if (mod != 3)
 gen_lea_modrm(env, s, modrm);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 val = x86_ldub_code(env, s);
 switch (b) {
 case 0x14: /* pextrb */
@@ -4317,7 +4320,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 /* Various integer extensions at 0f 3a f[0-f].  */
 b = modrm | (b1 << 8);
 modrm = x86_ldub_code(env, s);
-reg = ((modrm >> 3) & 7) | rex_r;
+reg = ((modrm >> 3) & 7) | REX_R(s);
 
 switch (b) {
 case 0x3f0: /* rorx Gy,Ey, Ib */
@@ -4491,14 +4494,15 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 TCGMemOp ot, aflag, dflag;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
-int rex_w, rex_r;
 target_ulong pc_start = s->base.pc_next;
+int rex_w;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
 #ifdef TARGET_X86_64
 s->rex_x = 0;
 s->rex_b = 0;
+s->rex_r = 0;
 s->x86_64_hregs = false;
 #endif
 s->rip_offset = 

[Qemu-devel] [RFC PATCH v1 03/22] target/i386: Use prefix, aflag and dflag from DisasContext

2019-07-31 Thread Jan Bobek
From: Richard Henderson 

The variables are already there, we just have to hide the ones
in disas_insn so that we are forced to use them.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 299 
 1 file changed, 152 insertions(+), 147 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c0866c2797..692261f73f 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4491,13 +4491,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 {
 CPUX86State *env = cpu->env_ptr;
-int b, prefixes;
+int b;
 int shift;
-TCGMemOp ot, aflag, dflag;
+TCGMemOp ot;
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
 
+{
+int prefixes;
+TCGMemOp aflag, dflag;
+
 s->pc_start = s->pc = pc_start;
 s->override = -1;
 #ifdef TARGET_X86_64
@@ -4657,6 +4661,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->prefix = prefixes;
 s->aflag = aflag;
 s->dflag = dflag;
+}
 
 /* now check op code */
  reswitch:
@@ -4682,7 +4687,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 op = (b >> 3) & 7;
 f = (b >> 1) & 3;
 
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 switch(f) {
 case 0: /* OP Ev, Gv */
@@ -4740,7 +4745,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 {
 int val;
 
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -4777,16 +4782,16 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 /**/
 /* inc, dec, and other misc arith */
 case 0x40 ... 0x47: /* inc Gv */
-ot = dflag;
+ot = s->dflag;
 gen_inc(s, ot, OR_EAX + (b & 7), 1);
 break;
 case 0x48 ... 0x4f: /* dec Gv */
-ot = dflag;
+ot = s->dflag;
 gen_inc(s, ot, OR_EAX + (b & 7), -1);
 break;
 case 0xf6: /* GRP3 */
 case 0xf7:
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -5018,7 +5023,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 
 case 0xfe: /* GRP4 */
 case 0xff: /* GRP5 */
-ot = mo_b_d(b, dflag);
+ot = mo_b_d(b, s->dflag);
 
 modrm = x86_ldub_code(env, s);
 mod = (modrm >> 6) & 3;
@@ -5032,10 +5037,10 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 /* operand size for jumps is 64 bit */
 ot = MO_64;
 } else if (op == 3 || op == 5) {
-ot = dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
+ot = s->dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
 } else if (op == 6) {
 /* default push size is 64 bit */
-ot = mo_pushpop(s, dflag);
+ot = mo_pushpop(s, s->dflag);
 }
 }
 if (mod != 3) {
@@ -5063,7 +5068,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 break;
 case 2: /* call Ev */
 /* XXX: optimize if memory (no 'and' is necessary) */
-if (dflag == MO_16) {
+if (s->dflag == MO_16) {
 tcg_gen_ext16u_tl(s->T0, s->T0);
 }
 next_eip = s->pc - s->cs_base;
@@ -5081,19 +5086,19 @@ static target_ulong disas_insn(DisasContext *s, 
CPUState *cpu)
 if (s->pe && !s->vm86) {
 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
-   tcg_const_i32(dflag - 1),
+   tcg_const_i32(s->dflag - 1),
tcg_const_tl(s->pc - s->cs_base));
 } else {
 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
-  tcg_const_i32(dflag - 1),
+  tcg_const_i32(s->dflag - 1),
   tcg_const_i32(s->pc - s->cs_base));
 }
 tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
 gen_jr(s, s->tmp4);
 break;
 case 4: /* jmp Ev */
-if (dflag == MO_16) {
+if (s->dflag == MO_16) {
 tcg_gen_ext16u_tl(s->T0, s->T0);
 }
 gen_op_jmp_v(s->T0);
@@ -5126,7 +5131,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 
 case 0x84: /* 

[Qemu-devel] [RFC PATCH v1 07/22] target/i386: add vector register file alignment constraints

2019-07-31 Thread Jan Bobek
gvec operations require that all vectors be aligned on 16-byte
boundary; make sure the MM/XMM/YMM/ZMM register file is aligned as
neccessary.

Signed-off-by: Jan Bobek 
---
 target/i386/cpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8b3dc5533e..cb407b86ba 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1199,9 +1199,9 @@ typedef struct CPUX86State {
 float_status mmx_status; /* for 3DNow! float ops */
 float_status sse_status;
 uint32_t mxcsr;
-ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32];
-ZMMReg xmm_t0;
-MMXReg mmx_t0;
+ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] QEMU_ALIGNED(16);
+ZMMReg xmm_t0 QEMU_ALIGNED(16);
+MMXReg mmx_t0 QEMU_ALIGNED(8);
 
 XMMReg ymmh_regs[CPU_NB_REGS];
 
-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 04/22] target/i386: Simplify gen_exception arguments

2019-07-31 Thread Jan Bobek
From: Richard Henderson 

We can compute cur_eip from values present within DisasContext.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 89 -
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 692261f73f..9e22eca2dc 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -1272,10 +1272,10 @@ static void gen_helper_fp_arith_STN_ST0(int op, int 
opreg)
 }
 }
 
-static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
+static void gen_exception(DisasContext *s, int trapno)
 {
 gen_update_cc_op(s);
-gen_jmp_im(s, cur_eip);
+gen_jmp_im(s, s->pc_start - s->cs_base);
 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
 s->base.is_jmp = DISAS_NORETURN;
 }
@@ -1284,7 +1284,7 @@ static void gen_exception(DisasContext *s, int trapno, 
target_ulong cur_eip)
the instruction is known, but it isn't allowed in the current cpu mode.  */
 static void gen_illegal_opcode(DisasContext *s)
 {
-gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
+gen_exception(s, EXCP06_ILLOP);
 }
 
 /* if d == OR_TMP0, it means memory operand (address in A0) */
@@ -3040,8 +3040,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = 
{
 [0xdf] = AESNI_OP(aeskeygenassist),
 };
 
-static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-target_ulong pc_start)
+static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
 int modrm, mod, rm, reg;
@@ -3076,7 +3075,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 /* simple MMX/SSE operation */
 if (s->flags & HF_TS_MASK) {
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 return;
 }
 if (s->flags & HF_EM_MASK) {
@@ -4515,7 +4514,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->vex_l = 0;
 s->vex_v = 0;
 if (sigsetjmp(s->jmpbuf, 0) != 0) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 return s->pc;
 }
 
@@ -5854,7 +5853,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
 /* XXX: what to do if illegal op ? */
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 break;
 }
 modrm = x86_ldub_code(env, s);
@@ -6572,7 +6571,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 set_cc_op(s, CC_OP_EFLAGS);
 } else if (s->vm86) {
 if (s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag - 1));
 set_cc_op(s, CC_OP_EFLAGS);
@@ -6694,7 +6693,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9c: /* pushf */
 gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
 if (s->vm86 && s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 gen_update_cc_op(s);
 gen_helper_read_eflags(s->T0, cpu_env);
@@ -6704,7 +6703,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9d: /* popf */
 gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
 if (s->vm86 && s->iopl != 3) {
-gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+gen_exception(s, EXCP0D_GPF);
 } else {
 ot = gen_pop_T0(s);
 if (s->cpl == 0) {
@@ -7021,7 +7020,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 goto illegal_op;
 val = x86_ldub_code(env, s);
 if (val == 0) {
-gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+gen_exception(s, EXCP00_DIVZ);
 } else {
 gen_helper_aam(cpu_env, tcg_const_i32(val));
 set_cc_op(s, CC_OP_LOGICB);
@@ -7055,7 +7054,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x9b: /* fwait */
 if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
 (HF_MP_MASK | HF_TS_MASK)) {
-gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+gen_exception(s, EXCP07_PREX);
 } else {
 gen_helper_fwait(cpu_env);
 }
@@ -7066,7 +7065,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0xcd: /* int N */
 val = x86_ldub_code(env, s);
 if (s->vm86 && s->iopl != 3) {
-

[Qemu-devel] [RFC PATCH v1 00/22] reimplement (some) x86 vector instructions using tcg-gvec

2019-07-31 Thread Jan Bobek
This patch series is an early work-in-progress snapshot of my efforts
to utilize the TCG gvec infrastracture in x86 frontend. Only a handful
of instructions have been converted (those which have a direct gvec
equivalent).

The dispatch switch for the converted instructions is sort of hacked
into gen_sse; this is obviously intended for development only.
Eventually, everything that follows this switch will be removed,
along with the SSE tables and all that goes along with it.

Cheers,
  -Jan

Jan Bobek (18):
  target/i386: introduce gen_ld_modrm_* helpers
  target/i386: introduce gen_gvec_ld_modrm_* helpers
  target/i386: add vector register file alignment constraints
  target/i386: reimplement (V)PAND, (V)ANDPS, (V)ANDPD
  target/i386: reimplement (V)POR, (V)ORPS, (V)ORPD
  target/i386: reimplement (V)PXOR, (V)XORPS, (V)XORPD
  target/i386: reimplement (V)PANDN, (V)ANDNPS, (V)ANDNPD
  target/i386: reimplement (V)PADD(B,W,D,Q)
  target/i386: reimplement (V)PSUB(B,W,D,Q)
  target/i386: reimplement (V)PADDS(B,W)
  target/i386: reimplement (V)PADDUS(B,W)
  target/i386: reimplement (V)PSUBS(B,W)
  target/i386: reimplement (V)PSUBUS(B,W)
  target/i386: reimplement (V)PMINSW
  target/i386: reimplement (V)PMINUB
  target/i386: reimplement (V)PMAXSW
  target/i386: reimplement (V)PMAXUB
  target/i386: reimplement (V)P(EQ,CMP)(B,W,D)

Richard Henderson (4):
  target/i386: Push rex_r into DisasContext
  target/i386: Push rex_w into DisasContext
  target/i386: Use prefix, aflag and dflag from DisasContext
  target/i386: Simplify gen_exception arguments

 target/i386/cpu.h|   6 +-
 target/i386/ops_sse.h|  65 ---
 target/i386/ops_sse_header.h |  39 --
 target/i386/translate.c  | 990 +--
 4 files changed, 723 insertions(+), 377 deletions(-)

-- 
2.20.1




[Qemu-devel] [RFC PATCH v1 02/22] target/i386: Push rex_w into DisasContext

2019-07-31 Thread Jan Bobek
From: Richard Henderson 

Treat this the same as we already do for other rex bits.

Signed-off-by: Richard Henderson 
---
 target/i386/translate.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index d74dbfd585..c0866c2797 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -44,11 +44,13 @@
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
 #define REX_R(s) ((s)->rex_r)
+#define REX_W(s) ((s)->rex_w)
 #else
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
 #define REX_R(s) 0
+#define REX_W(s) -1
 #endif
 
 #ifdef TARGET_X86_64
@@ -100,7 +102,7 @@ typedef struct DisasContext {
 #ifdef TARGET_X86_64
 int lma;/* long mode active */
 int code64; /* 64 bit code segment */
-int rex_x, rex_b, rex_r;
+int rex_x, rex_b, rex_r, rex_w;
 #endif
 int vex_l;  /* vex vector length */
 int vex_v;  /* vex  register, without 1's complement.  */
@@ -4495,7 +4497,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
-int rex_w;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
@@ -4503,6 +4504,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->rex_x = 0;
 s->rex_b = 0;
 s->rex_r = 0;
+s->rex_w = -1;
 s->x86_64_hregs = false;
 #endif
 s->rip_offset = 0; /* for relative ip address */
@@ -4514,7 +4516,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 }
 
 prefixes = 0;
-rex_w = -1;
 
  next_byte:
 b = x86_ldub_code(env, s);
@@ -4557,7 +4558,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 case 0x40 ... 0x4f:
 if (CODE64(s)) {
 /* REX prefix */
-rex_w = (b >> 3) & 1;
+s->rex_w = (b >> 3) & 1;
 s->rex_r = (b & 0x4) << 1;
 s->rex_x = (b & 0x2) << 2;
 s->rex_b = (b & 0x1) << 3;
@@ -4606,7 +4607,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->rex_b = (~vex2 >> 2) & 8;
 #endif
 vex3 = x86_ldub_code(env, s);
-rex_w = (vex3 >> 7) & 1;
+#ifdef TARGET_X86_64
+s->rex_w = (vex3 >> 7) & 1;
+#endif
 switch (vex2 & 0x1f) {
 case 0x01: /* Implied 0f leading opcode bytes.  */
 b = x86_ldub_code(env, s) | 0x100;
@@ -4631,9 +4634,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 /* Post-process prefixes.  */
 if (CODE64(s)) {
 /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
-   data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
+   data with REX_W, and 16-bit data with 0x66; REX_W takes precedence
over 0x66 if both are present.  */
-dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
+dflag = (REX_W(s) > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : 
MO_32);
 /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
 aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
 } else {
@@ -5029,7 +5032,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 /* operand size for jumps is 64 bit */
 ot = MO_64;
 } else if (op == 3 || op == 5) {
-ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
+ot = dflag != MO_16 ? MO_32 + (REX_W(s) == 1) : MO_16;
 } else if (op == 6) {
 /* default push size is 64 bit */
 ot = mo_pushpop(s, dflag);
-- 
2.20.1




Re: [Qemu-devel] [RISU PATCH v3 18/18] x86.risu: add AVX2 instructions

2019-07-22 Thread Jan Bobek
On 7/20/19 8:46 PM, Richard Henderson wrote:
> On 7/11/19 3:33 PM, Jan Bobek wrote:
>> +# VEX.256.0F.WIG 28 /r: VMOVAPS ymm1, ymm2/m256
>> +# VEX.256.0F.WIG 29 /r: VMOVAPS ymm2/m256, ymm1
>> +VMOVAPS AVX2 0010100 d \
>> +  !constraints { vex($_, m => 0x0F, l => 256, v => 0); modrm($_); 1 } \
>> +  !memory { $d ? store(size => 32, align => 32) : load(size => 32, align => 
>> 32); }
> 
> I believe all of the floating-point 256-bit operations are actually AVX1.
> Which, I see, would annoyingly require a renaming, since that would put two
> VMOVAPS insns into the same group.

Yeah and it is not just VMOVAPS, obviously.

> I wonder if it's worth calling the two groups AVX128 and AVX256 and ignore the
> actual cpuid to which the insn is assigned?  Which ever way, they're still 
> tied
> to the same --xstate value to indicate ymmh.

We could do that, but I think I like your idea below even better.

> Or could we fold the two insns together:
> 
> VMOVAPS AVX 0010100 d \
> !constraints { vex($_, m => 0x0F, v => 0); modrm($_); 1 } \
> !memory { my $len = $_->{vex}{l} / 8; \
>   $d ? store(size => $len, align => $len) \
>  : load(size => $len, align => $len); }

This is a really interesting idea. If inability to differentiate
between the two is acceptable for us, then I think this approach might
be cleaner, more concise, and remove some redundancy.

-Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RISU PATCH v3 17/18] x86.risu: add AVX instructions

2019-07-22 Thread Jan Bobek
On 7/20/19 8:04 PM, Richard Henderson wrote:
> On 7/11/19 3:32 PM, Jan Bobek wrote:
>> +# VEX.LIG.F3.0F.WIG 10 /r: VMOVSS xmm1, xmm2, xmm3
>> +# VEX.LIG.F3.0F.WIG 10 /r: VMOVSS xmm1, m32
>> +# VEX.LIG.F3.0F.WIG 11 /r: VMOVSS xmm1, xmm2, xmm3
>> +# VEX.LIG.F3.0F.WIG 11 /r: VMOVSS m32, xmm1
>> +VMOVSS AVX 0001000 d \
>> +  !constraints { vex($_, m => 0x0F, l => 0, p => 0xF3); modrm($_); 
>> $_->{vex}{v} = 0 unless defined $_->{modrm}{reg2}; 1 } \
>> +  !memory { $d ? store(size => 4) : load(size => 4); }
> 
> Why the l => 0?  LIG does mean VEX.L ignored, so why not let it get randomized
> as you do for WIG?
> 
> Not wrong as is... this is the documented value for scalar operands.  But 
> there
> is a different document markup, LZ, for required (E)VEX.L == 0.

I am aware of LIG vs. LZ. Quoting from the MOVSS manual page:

  Software should ensure VMOVSS is encoded with VEX.L=0. Encoding
  VMOVSS with VEX.L=1 may encounter unpredictable behavior across
  different processor generations.

"Unpredictable behavior" sounded a bit menacing to me, so I opted for
the conservative route. AFAICT all the scalar instructions have this
warning attached; I don't know why they differentiate between LIG and
LZ then, though. Do you think it's irrelevant?

-Jan



signature.asc
Description: OpenPGP digital signature


<    1   2   3   4   >