Re: [Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers
On 7/31/19 6:47 PM, Richard Henderson wrote: > I suppose there aren't so many different combinations, but did you consider > separate callbacks per operand? If you have > > typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int); > > static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm) > { > int reg = (modrm >> 3) & 7; /* Ignore REX_R */ > return offsetof(CPUX86State, fpregs[reg].mmx); > } > > static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm) > { > int mod = (modrm >> 6) & 3; > unsigned ret; > > if (mod == 3) { > int rm = modrm & 7; /* Ignore REX_B */ > ret = offsetof(CPUX86State, fpregs[rm].mmx); > } else { > ret = offsetof(CPUX86State, mmx_t0); > gen_lea_modrm(env, s, modrm); > gen_ldq_env_A0(s, ret); > } > return ret; > } > > static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm) > { > int reg = ((modrm >> 3) & 7) | REX_R(s); > return offsetof(CPUX86State, xmm_regs[reg]); > } > > static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm) > { > int mod = (modrm >> 6) & 3; > unsigned ret; > > if (mod == 3) { > int rm = (modrm & 7) | REX_B(s); > ret = offsetof(CPUX86State, xmm_regs[rm]); > } else { > ret = offsetof(CPUX86State, xmm_t0); > gen_lea_modrm(env, s, modrm); > gen_ldo_env_A0(s, ret); > } > return ret; > } > > static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm) > { > return offsetof(CPUX86State, xmm_regs[s->vex_v]); > } > > Then you can have > > #define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ) > static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env, \ > DisasContext *s, int modrm, unsigned vece, gen_gvec_2_fp_t gen) \ > { \ > int ofd = offset_##OP0(env, s, modrm); \ > int of1 = offset_##OP1(env, s, modrm); \ > int of2 = offset_##OP2(env, s, modrm); \ > gen(vece, opd, opa, opb, OPRSZ, MAXSZ); \ > } > > GEN_GVEC_3(Pq, Pq, Qq, sizeof(MMXReg), sizeof(MMXReg)) > GEN_GVEC_3(Vx, Vx, Wx, sizeof(XMMReg), max_vec_size(s)) > GEN_GVEC_3(Vx, Hx, Wx, sizeof(XMMReg), max_vec_size(s)) > > The PqPqQq and VxVxWx sub-strings aren't quite canonical, but imo a better fit > to the actual format of the instruction, with 2 inputs and 1 output. Funny, I had a similar idea and converged to almost identical solution. This will be part of v2. > You can also do > > GEN_GVEC_3(Pq, Qq, Pq, sizeof(MMXReg), sizeof(MMXReg)) > > for those rare "reversed" operations like PANDN. Now you don't need to carry > around the OPCTL argument, which I initially found non-obvious. Yup, solves the problem nicely and more clearly. > I initially thought you'd be able to infer maxsz from the set of arguments, > but > since there are vex encoded operations that do not use vex. that is not > always the case. Thus I suggest > > static size_t max_vec_size(DisasContext *s) > { > if (s->prefixes & PREFIX_VEX) { > /* > * TODO: When avx512 is supported and enabled, sizeof(ZMMReg). > * In the meantime don't waste time zeroing data that is not > * architecturally present. > */ > return sizeof(YMMReg); > } else { > /* Without vex encoding, only the low 128 bits are modified. */ > return sizeof(XMMReg); > } > } Looks good. -Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers
On 7/31/19 10:56 AM, Jan Bobek wrote: > +static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s, > + int modrm, unsigned vece, > + uint32_t oprsz, uint32_t maxsz, > + gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp, > + gen_gvec_2_fp_t gen_gvec_2_fp, > + int opctl) > +{ > +uint32_t ofss[2]; > + > +const int opd = ((opctl >> 6) & 7) - 1; > +const int opa = ((opctl >> 3) & 7) - 1; > +const int opb = ((opctl >> 0) & 7) - 1; > + > +assert(0 <= opd && opd < 2); > +assert(0 <= opa && opa < 2); > +assert(0 <= opb && opb < 2); > + > +(*gen_ld_modrm_2_fp)(env, s, modrm, [0], [1]); > +(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz); > +} > + > +static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s, > + int modrm, unsigned vece, > + uint32_t oprsz, uint32_t maxsz, > + gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp, > + gen_gvec_2_fp_t gen_gvec_2_fp, > + int opctl) > +{ > +uint32_t ofss[3]; > + > +const int opd = ((opctl >> 6) & 7) - 1; > +const int opa = ((opctl >> 3) & 7) - 1; > +const int opb = ((opctl >> 0) & 7) - 1; > + > +assert(0 <= opd && opd < 3); > +assert(0 <= opa && opa < 3); > +assert(0 <= opb && opb < 3); > + > +(*gen_ld_modrm_3_fp)(env, s, modrm, [0], [1], [2]); > +(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz); > +} > +> +#define gen_gvec_ld_modrm_mm(env, s, modrm, vece, > \> + gen_gvec_2_fp, opctl) \> + gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\> + sizeof(MMXReg), sizeof(MMXReg), \> + gen_ld_modrm_PqQq, \> + gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_xmm(env, s, modrm, vece, \> + gen_gvec_2_fp, opctl) \> +gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\> + sizeof(XMMReg), sizeof(XMMReg), \> + gen_ld_modrm_VxWx, \> + gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vxmm(env, s, modrm, vece, \> + gen_gvec_2_fp, opctl)\> +gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\> +sizeof(XMMReg), sizeof(ZMMReg), \> + gen_ld_modrm_VxHxWx,\> + gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vymm(env, s, modrm, vece, \> + gen_gvec_2_fp, opctl)\> +gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\> +sizeof(YMMReg), sizeof(ZMMReg), \> + gen_ld_modrm_VxHxWx,\> + gen_gvec_2_fp, (opctl)) I suppose there aren't so many different combinations, but did you consider separate callbacks per operand? If you have typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int); static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm) { int reg = (modrm >> 3) & 7; /* Ignore REX_R */ return offsetof(CPUX86State, fpregs[reg].mmx); } static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm) { int mod = (modrm >> 6) & 3; unsigned ret; if (mod == 3) { int rm = modrm & 7; /* Ignore REX_B */ ret = offsetof(CPUX86State, fpregs[rm].mmx); } else { ret = offsetof(CPUX86State, mmx_t0); gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, ret); } return ret; } static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm) { int reg = ((modrm >> 3) & 7) | REX_R(s); return offsetof(CPUX86State, xmm_regs[reg]); } static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm) { int mod = (modrm >> 6) & 3; unsigned ret; if (mod == 3) { int rm = (modrm & 7) | REX_B(s); ret = offsetof(CPUX86State, xmm_regs[rm]); } else { ret = offsetof(CPUX86State, xmm_t0); gen_lea_modrm(env, s, modrm); gen_ldo_env_A0(s, ret); } return ret; } static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm) { return offsetof(CPUX86State, xmm_regs[s->vex_v]); } Then you can have #define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ) static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env, \ DisasContext *s, int modrm, unsigned vece, gen_gvec_2_fp_t gen) \ { \ int
[Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers
gen_gvec_ld_modrm_* helpers tie together a gen_ld_modrm_* helper and a particular gvec operation, effectively handling a single instruction. Signed-off-by: Jan Bobek --- target/i386/translate.c | 77 + 1 file changed, 77 insertions(+) diff --git a/target/i386/translate.c b/target/i386/translate.c index 7548677e1f..d576b3345c 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -3087,6 +3087,83 @@ static inline void gen_ld_modrm_VxHxWx(CPUX86State *env, DisasContext *s, int mo *aofs = offsetof(CPUX86State, xmm_regs[s->vex_v]); } +typedef void (*gen_ld_modrm_2_fp_t)(CPUX86State *env, DisasContext *s, int modrm, +uint32_t *dofs, uint32_t *aofs); +typedef void (*gen_ld_modrm_3_fp_t)(CPUX86State *env, DisasContext *s, int modrm, +uint32_t *dofs, uint32_t *aofs, uint32_t *bofs); +typedef void (*gen_gvec_2_fp_t)(unsigned vece, uint32_t dofs, uint32_t aofs, +uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + +static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s, + int modrm, unsigned vece, + uint32_t oprsz, uint32_t maxsz, + gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp, + gen_gvec_2_fp_t gen_gvec_2_fp, + int opctl) +{ +uint32_t ofss[2]; + +const int opd = ((opctl >> 6) & 7) - 1; +const int opa = ((opctl >> 3) & 7) - 1; +const int opb = ((opctl >> 0) & 7) - 1; + +assert(0 <= opd && opd < 2); +assert(0 <= opa && opa < 2); +assert(0 <= opb && opb < 2); + +(*gen_ld_modrm_2_fp)(env, s, modrm, [0], [1]); +(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz); +} + +static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s, + int modrm, unsigned vece, + uint32_t oprsz, uint32_t maxsz, + gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp, + gen_gvec_2_fp_t gen_gvec_2_fp, + int opctl) +{ +uint32_t ofss[3]; + +const int opd = ((opctl >> 6) & 7) - 1; +const int opa = ((opctl >> 3) & 7) - 1; +const int opb = ((opctl >> 0) & 7) - 1; + +assert(0 <= opd && opd < 3); +assert(0 <= opa && opa < 3); +assert(0 <= opb && opb < 3); + +(*gen_ld_modrm_3_fp)(env, s, modrm, [0], [1], [2]); +(*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz); +} + +#define gen_gvec_ld_modrm_mm(env, s, modrm, vece, \ + gen_gvec_2_fp, opctl) \ +gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\ +sizeof(MMXReg), sizeof(MMXReg), \ +gen_ld_modrm_PqQq, \ +gen_gvec_2_fp, (opctl)) + +#define gen_gvec_ld_modrm_xmm(env, s, modrm, vece, \ + gen_gvec_2_fp, opctl) \ +gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),\ +sizeof(XMMReg), sizeof(XMMReg), \ +gen_ld_modrm_VxWx, \ +gen_gvec_2_fp, (opctl)) + +#define gen_gvec_ld_modrm_vxmm(env, s, modrm, vece, \ + gen_gvec_2_fp, opctl)\ +gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\ +sizeof(XMMReg), sizeof(ZMMReg), \ +gen_ld_modrm_VxHxWx,\ +gen_gvec_2_fp, (opctl)) + +#define gen_gvec_ld_modrm_vymm(env, s, modrm, vece, \ + gen_gvec_2_fp, opctl)\ +gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),\ +sizeof(YMMReg), sizeof(ZMMReg), \ +gen_ld_modrm_VxHxWx,\ +gen_gvec_2_fp, (opctl)) + static void gen_sse(CPUX86State *env, DisasContext *s, int b) { int b1, op1_offset, op2_offset, is_xmm, val; -- 2.20.1