Re: [Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers

2019-08-02 Thread Jan Bobek
On 7/31/19 6:47 PM, Richard Henderson wrote:
> I suppose there aren't so many different combinations, but did you consider
> separate callbacks per operand?  If you have
> 
> typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int);
> 
> static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm)
> {
> int reg = (modrm >> 3) & 7; /* Ignore REX_R */
> return offsetof(CPUX86State, fpregs[reg].mmx);
> }
> 
> static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm)
> {
> int mod = (modrm >> 6) & 3;
> unsigned ret;
> 
> if (mod == 3) {
> int rm = modrm & 7; /* Ignore REX_B */
> ret = offsetof(CPUX86State, fpregs[rm].mmx);
> } else {
> ret = offsetof(CPUX86State, mmx_t0);
> gen_lea_modrm(env, s, modrm);
> gen_ldq_env_A0(s, ret);
> }
> return ret;
> }
> 
> static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm)
> {
> int reg = ((modrm >> 3) & 7) | REX_R(s);
> return offsetof(CPUX86State, xmm_regs[reg]);
> }
> 
> static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm)
> {
> int mod = (modrm >> 6) & 3;
> unsigned ret;
> 
> if (mod == 3) {
> int rm = (modrm & 7) | REX_B(s);
> ret = offsetof(CPUX86State, xmm_regs[rm]);
> } else {
> ret = offsetof(CPUX86State, xmm_t0);
> gen_lea_modrm(env, s, modrm);
> gen_ldo_env_A0(s, ret);
> }
> return ret;
> }
> 
> static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm)
> {
> return offsetof(CPUX86State, xmm_regs[s->vex_v]);
> }
> 
> Then you can have
> 
> #define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ)
> static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env,  \
> DisasContext *s, int modrm, unsigned vece,  gen_gvec_2_fp_t gen) \
> {   \
> int ofd = offset_##OP0(env, s, modrm);  \
> int of1 = offset_##OP1(env, s, modrm);  \
> int of2 = offset_##OP2(env, s, modrm);  \
> gen(vece, opd, opa, opb, OPRSZ, MAXSZ); \
> }
> 
> GEN_GVEC_3(Pq, Pq, Qq, sizeof(MMXReg), sizeof(MMXReg))
> GEN_GVEC_3(Vx, Vx, Wx, sizeof(XMMReg), max_vec_size(s))
> GEN_GVEC_3(Vx, Hx, Wx, sizeof(XMMReg), max_vec_size(s))
> 
> The PqPqQq and VxVxWx sub-strings aren't quite canonical, but imo a better fit
> to the actual format of the instruction, with 2 inputs and 1 output.

Funny, I had a similar idea and converged to almost identical
solution. This will be part of v2.

> You can also do
> 
> GEN_GVEC_3(Pq, Qq, Pq, sizeof(MMXReg), sizeof(MMXReg))
> 
> for those rare "reversed" operations like PANDN.  Now you don't need to carry
> around the OPCTL argument, which I initially found non-obvious.

Yup, solves the problem nicely and more clearly.

> I initially thought you'd be able to infer maxsz from the set of arguments, 
> but
> since there are vex encoded operations that do not use vex. that is not
> always the case.  Thus I suggest
> 
> static size_t max_vec_size(DisasContext *s)
> {
> if (s->prefixes & PREFIX_VEX) {
> /*
>  * TODO: When avx512 is supported and enabled, sizeof(ZMMReg).
>  * In the meantime don't waste time zeroing data that is not
>  * architecturally present.
>  */
> return sizeof(YMMReg);
> } else {
> /* Without vex encoding, only the low 128 bits are modified. */
> return sizeof(XMMReg);
> }
> }

Looks good.

-Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers

2019-07-31 Thread Richard Henderson
On 7/31/19 10:56 AM, Jan Bobek wrote:
> +static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s,
> +   int modrm, unsigned vece,
> +   uint32_t oprsz, uint32_t maxsz,
> +   gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp,
> +   gen_gvec_2_fp_t gen_gvec_2_fp,
> +   int opctl)
> +{
> +uint32_t ofss[2];
> +
> +const int opd = ((opctl >> 6) & 7) - 1;
> +const int opa = ((opctl >> 3) & 7) - 1;
> +const int opb = ((opctl >> 0) & 7) - 1;
> +
> +assert(0 <= opd && opd < 2);
> +assert(0 <= opa && opa < 2);
> +assert(0 <= opb && opb < 2);
> +
> +(*gen_ld_modrm_2_fp)(env, s, modrm, [0], [1]);
> +(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
> +}
> +
> +static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s,
> +   int modrm, unsigned vece,
> +   uint32_t oprsz, uint32_t maxsz,
> +   gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp,
> +   gen_gvec_2_fp_t gen_gvec_2_fp,
> +   int opctl)
> +{
> +uint32_t ofss[3];
> +
> +const int opd = ((opctl >> 6) & 7) - 1;
> +const int opa = ((opctl >> 3) & 7) - 1;
> +const int opb = ((opctl >> 0) & 7) - 1;
> +
> +assert(0 <= opd && opd < 3);
> +assert(0 <= opa && opa < 3);
> +assert(0 <= opb && opb < 3);
> +
> +(*gen_ld_modrm_3_fp)(env, s, modrm, [0], [1], [2]);
> +(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
> +}
> +> +#define gen_gvec_ld_modrm_mm(env, s, modrm, vece,   
> \> +
gen_gvec_2_fp, opctl)  \> +
gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\> +
 sizeof(MMXReg), sizeof(MMXReg), \> +
  gen_ld_modrm_PqQq,  \> +
   gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_xmm(env, s,
modrm, vece,  \> +
gen_gvec_2_fp, opctl) \> +gen_gvec_ld_modrm_2((env),
(s), (modrm), (vece),\> +
sizeof(XMMReg), sizeof(XMMReg), \> +
gen_ld_modrm_VxWx,  \> +
gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vxmm(env, s, modrm,
vece, \> +   gen_gvec_2_fp,
opctl)\> +gen_gvec_ld_modrm_3((env), (s), (modrm),
(vece),\> +sizeof(XMMReg),
sizeof(ZMMReg), \> +
gen_ld_modrm_VxHxWx,\> +
gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vymm(env, s, modrm,
vece, \> +   gen_gvec_2_fp,
opctl)\> +gen_gvec_ld_modrm_3((env), (s), (modrm),
(vece),\> +sizeof(YMMReg),
sizeof(ZMMReg), \> +
gen_ld_modrm_VxHxWx,\> +
gen_gvec_2_fp, (opctl))

I suppose there aren't so many different combinations, but did you consider
separate callbacks per operand?  If you have

typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int);

static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm)
{
int reg = (modrm >> 3) & 7; /* Ignore REX_R */
return offsetof(CPUX86State, fpregs[reg].mmx);
}

static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm)
{
int mod = (modrm >> 6) & 3;
unsigned ret;

if (mod == 3) {
int rm = modrm & 7; /* Ignore REX_B */
ret = offsetof(CPUX86State, fpregs[rm].mmx);
} else {
ret = offsetof(CPUX86State, mmx_t0);
gen_lea_modrm(env, s, modrm);
gen_ldq_env_A0(s, ret);
}
return ret;
}

static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm)
{
int reg = ((modrm >> 3) & 7) | REX_R(s);
return offsetof(CPUX86State, xmm_regs[reg]);
}

static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm)
{
int mod = (modrm >> 6) & 3;
unsigned ret;

if (mod == 3) {
int rm = (modrm & 7) | REX_B(s);
ret = offsetof(CPUX86State, xmm_regs[rm]);
} else {
ret = offsetof(CPUX86State, xmm_t0);
gen_lea_modrm(env, s, modrm);
gen_ldo_env_A0(s, ret);
}
return ret;
}

static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm)
{
return offsetof(CPUX86State, xmm_regs[s->vex_v]);
}

Then you can have

#define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ)
static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env,  \
DisasContext *s, int modrm, unsigned vece,  gen_gvec_2_fp_t gen) \
{   \
int 

[Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers

2019-07-31 Thread Jan Bobek
gen_gvec_ld_modrm_* helpers tie together a gen_ld_modrm_* helper and a
particular gvec operation, effectively handling a single instruction.

Signed-off-by: Jan Bobek 
---
 target/i386/translate.c | 77 +
 1 file changed, 77 insertions(+)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 7548677e1f..d576b3345c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3087,6 +3087,83 @@ static inline void gen_ld_modrm_VxHxWx(CPUX86State *env, 
DisasContext *s, int mo
 *aofs = offsetof(CPUX86State, xmm_regs[s->vex_v]);
 }
 
+typedef void (*gen_ld_modrm_2_fp_t)(CPUX86State *env, DisasContext *s, int 
modrm,
+uint32_t *dofs, uint32_t *aofs);
+typedef void (*gen_ld_modrm_3_fp_t)(CPUX86State *env, DisasContext *s, int 
modrm,
+uint32_t *dofs, uint32_t *aofs, uint32_t 
*bofs);
+typedef void (*gen_gvec_2_fp_t)(unsigned vece, uint32_t dofs, uint32_t aofs,
+uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s,
+   int modrm, unsigned vece,
+   uint32_t oprsz, uint32_t maxsz,
+   gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp,
+   gen_gvec_2_fp_t gen_gvec_2_fp,
+   int opctl)
+{
+uint32_t ofss[2];
+
+const int opd = ((opctl >> 6) & 7) - 1;
+const int opa = ((opctl >> 3) & 7) - 1;
+const int opb = ((opctl >> 0) & 7) - 1;
+
+assert(0 <= opd && opd < 2);
+assert(0 <= opa && opa < 2);
+assert(0 <= opb && opb < 2);
+
+(*gen_ld_modrm_2_fp)(env, s, modrm, [0], [1]);
+(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
+}
+
+static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s,
+   int modrm, unsigned vece,
+   uint32_t oprsz, uint32_t maxsz,
+   gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp,
+   gen_gvec_2_fp_t gen_gvec_2_fp,
+   int opctl)
+{
+uint32_t ofss[3];
+
+const int opd = ((opctl >> 6) & 7) - 1;
+const int opa = ((opctl >> 3) & 7) - 1;
+const int opb = ((opctl >> 0) & 7) - 1;
+
+assert(0 <= opd && opd < 3);
+assert(0 <= opa && opa < 3);
+assert(0 <= opb && opb < 3);
+
+(*gen_ld_modrm_3_fp)(env, s, modrm, [0], [1], [2]);
+(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
+}
+
+#define gen_gvec_ld_modrm_mm(env, s, modrm, vece,   \
+ gen_gvec_2_fp, opctl)  \
+gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\
+sizeof(MMXReg), sizeof(MMXReg), \
+gen_ld_modrm_PqQq,  \
+gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_xmm(env, s, modrm, vece,  \
+  gen_gvec_2_fp, opctl) \
+gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\
+sizeof(XMMReg), sizeof(XMMReg), \
+gen_ld_modrm_VxWx,  \
+gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_vxmm(env, s, modrm, vece, \
+   gen_gvec_2_fp, opctl)\
+gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\
+sizeof(XMMReg), sizeof(ZMMReg), \
+gen_ld_modrm_VxHxWx,\
+gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_vymm(env, s, modrm, vece, \
+   gen_gvec_2_fp, opctl)\
+gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\
+sizeof(YMMReg), sizeof(ZMMReg), \
+gen_ld_modrm_VxHxWx,\
+gen_gvec_2_fp, (opctl))
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
 int b1, op1_offset, op2_offset, is_xmm, val;
-- 
2.20.1