On Tue, May 3, 2022 at 9:33 AM ~eopxd <eo...@git.sr.ht> wrote: > > From: eopXD <eop.c...@sifive.com> > > According to v-spec, tail agnostic behavior can be either kept as > undisturbed or set elements' bits to all 1s. To distinguish the > difference of tail policies, QEMU should be able to simulate the tail > agnostic behavior as "set tail elements' bits to all 1s". > > There are multiple possibility for agnostic elements according to > v-spec. The main intent of this patch-set tries to add option that > can distinguish between tail policies. Setting agnostic elements to > all 1s allows QEMU to express this. > > This is the first commit regarding the optional tail agnostic > behavior. Follow-up commits will add this optional behavior > for all rvv instructions. > > Signed-off-by: eop Chen <eop.c...@sifive.com> > Reviewed-by: Frank Chang <frank.ch...@sifive.com> > Reviewed-by: Weiwei Li <liwei...@iscas.ac.cn>
Acked-by: Alistair Francis <alistair.fran...@wdc.com> Alistair > --- > target/riscv/cpu.h | 2 + > target/riscv/cpu_helper.c | 2 + > target/riscv/insn_trans/trans_rvv.c.inc | 11 + > target/riscv/internals.h | 5 +- > target/riscv/translate.c | 2 + > target/riscv/vector_helper.c | 295 +++++++++++++----------- > 6 files changed, 186 insertions(+), 131 deletions(-) > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h > index c069fe85fa..8c4a79b5a0 100644 > --- a/target/riscv/cpu.h > +++ b/target/riscv/cpu.h > @@ -369,6 +369,7 @@ struct RISCVCPUConfig { > bool ext_zhinxmin; > bool ext_zve32f; > bool ext_zve64f; > + bool rvv_ta_all_1s; > > /* Vendor-specific custom extensions */ > bool ext_XVentanaCondOps; > @@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2) > /* If PointerMasking should be applied */ > FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) > FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) > +FIELD(TB_FLAGS, VTA, 24, 1) > > #ifdef TARGET_RISCV32 > #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c > index 1c60fb2e80..2941c88c31 100644 > --- a/target/riscv/cpu_helper.c > +++ b/target/riscv/cpu_helper.c > @@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong > *pc, > flags = FIELD_DP32(flags, TB_FLAGS, LMUL, > FIELD_EX64(env->vtype, VTYPE, VLMUL)); > flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax); > + flags = FIELD_DP32(flags, TB_FLAGS, VTA, > + FIELD_EX64(env->vtype, VTYPE, VTA)); > } else { > flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); > } > diff --git a/target/riscv/insn_trans/trans_rvv.c.inc > b/target/riscv/insn_trans/trans_rvv.c.inc > index 57953923d5..cc80bf00ff 100644 > --- a/target/riscv/insn_trans/trans_rvv.c.inc > +++ b/target/riscv/insn_trans/trans_rvv.c.inc > @@ -1223,6 +1223,16 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn > *gvec_fn, > tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); > > if (a->vm && s->vl_eq_vlmax) { > + if (s->vta && s->lmul < 0) { > + /* > + * tail elements may pass vlmax when lmul < 0 > + * set tail elements to 1s > + */ > + uint32_t vlenb = s->cfg_ptr->vlen >> 3; > + tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd), > + vreg_ofs(s, a->rd), -1, > + vlenb, vlenb); > + } > gvec_fn(s->sew, vreg_ofs(s, a->rd), > vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), > MAXSZ(s), MAXSZ(s)); > @@ -1231,6 +1241,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn > *gvec_fn, > > data = FIELD_DP32(data, VDATA, VM, a->vm); > data = FIELD_DP32(data, VDATA, LMUL, s->lmul); > + data = FIELD_DP32(data, VDATA, VTA, s->vta); > tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), > vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), > cpu_env, s->cfg_ptr->vlen / 8, > diff --git a/target/riscv/internals.h b/target/riscv/internals.h > index dbb322bfa7..512c6c30cf 100644 > --- a/target/riscv/internals.h > +++ b/target/riscv/internals.h > @@ -24,8 +24,9 @@ > /* share data between vector helpers and decode code */ > FIELD(VDATA, VM, 0, 1) > FIELD(VDATA, LMUL, 1, 3) > -FIELD(VDATA, NF, 4, 4) > -FIELD(VDATA, WD, 4, 1) > +FIELD(VDATA, VTA, 4, 1) > +FIELD(VDATA, NF, 5, 4) > +FIELD(VDATA, WD, 5, 1) > > /* float point classify helpers */ > target_ulong fclass_h(uint64_t frs1); > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index fac998a6b5..7775dade26 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -94,6 +94,7 @@ typedef struct DisasContext { > */ > int8_t lmul; > uint8_t sew; > + uint8_t vta; > target_ulong vstart; > bool vl_eq_vlmax; > uint8_t ntemp; > @@ -1083,6 +1084,7 @@ static void > riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) > ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); > ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); > ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3); > + ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s; > ctx->vstart = env->vstart; > ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); > ctx->misa_mxl_max = env->misa_mxl_max; > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index d0452a7756..79d4fca091 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -122,6 +122,11 @@ static inline int32_t vext_lmul(uint32_t desc) > return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); > } > > +static inline uint32_t vext_vta(uint32_t desc) > +{ > + return FIELD_EX32(simd_data(desc), VDATA, VTA); > +} > + > /* > * Get the maximum number of elements can be operated. > * > @@ -140,6 +145,20 @@ static inline uint32_t vext_max_elems(uint32_t desc, > uint32_t log2_esz) > return scale < 0 ? vlenb >> -scale : vlenb << scale; > } > > +/* > + * Get number of total elements, including prestart, body and tail elements. > + * Note that when LMUL < 1, the tail includes the elements past VLMAX that > + * are held in the same vector register. > + */ > +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t > desc, uint32_t esz) > +{ > + uint32_t vlenb = simd_maxsz(desc); > + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); > + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : > + ctzl(esz) - ctzl(sew) + vext_lmul(desc); > + return (vlenb << emul) / esz; > +} > + > static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) > { > return (addr & env->cur_pmmask) | env->cur_pmbase; > @@ -172,6 +191,20 @@ static void probe_pages(CPURISCVState *env, target_ulong > addr, > } > } > > +/* set agnostic elements to 1s */ > +static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, > + uint32_t tot) > +{ > + if (is_agnostic == 0) { > + /* policy undisturbed */ > + return; > + } > + if (tot - cnt == 0) { > + return ; > + } > + memset(base + cnt, -1, tot - cnt); > +} > + > static inline void vext_set_elem_mask(void *v0, int index, > uint8_t value) > { > @@ -710,10 +743,12 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) > > static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, > CPURISCVState *env, uint32_t desc, > - opivv2_fn *fn) > + opivv2_fn *fn, uint32_t esz) > { > uint32_t vm = vext_vm(desc); > uint32_t vl = env->vl; > + uint32_t total_elems = vext_get_total_elems(env, desc, esz); > + uint32_t vta = vext_vta(desc); > uint32_t i; > > for (i = env->vstart; i < vl; i++) { > @@ -723,26 +758,28 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, > void *vs2, > fn(vd, vs1, vs2, i); > } > env->vstart = 0; > + /* set tail elements to 1s */ > + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); > } > > /* generate the helpers for OPIVV */ > -#define GEN_VEXT_VV(NAME) \ > +#define GEN_VEXT_VV(NAME, ESZ) \ > void HELPER(NAME)(void *vd, void *v0, void *vs1, \ > void *vs2, CPURISCVState *env, \ > uint32_t desc) \ > { \ > do_vext_vv(vd, v0, vs1, vs2, env, desc, \ > - do_##NAME); \ > + do_##NAME, ESZ); \ > } > > -GEN_VEXT_VV(vadd_vv_b) > -GEN_VEXT_VV(vadd_vv_h) > -GEN_VEXT_VV(vadd_vv_w) > -GEN_VEXT_VV(vadd_vv_d) > -GEN_VEXT_VV(vsub_vv_b) > -GEN_VEXT_VV(vsub_vv_h) > -GEN_VEXT_VV(vsub_vv_w) > -GEN_VEXT_VV(vsub_vv_d) > +GEN_VEXT_VV(vadd_vv_b, 1) > +GEN_VEXT_VV(vadd_vv_h, 2) > +GEN_VEXT_VV(vadd_vv_w, 4) > +GEN_VEXT_VV(vadd_vv_d, 8) > +GEN_VEXT_VV(vsub_vv_b, 1) > +GEN_VEXT_VV(vsub_vv_h, 2) > +GEN_VEXT_VV(vsub_vv_w, 4) > +GEN_VEXT_VV(vsub_vv_d, 8) > > typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); > > @@ -887,30 +924,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, > DO_ADD) > RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) > RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) > RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) > -GEN_VEXT_VV(vwaddu_vv_b) > -GEN_VEXT_VV(vwaddu_vv_h) > -GEN_VEXT_VV(vwaddu_vv_w) > -GEN_VEXT_VV(vwsubu_vv_b) > -GEN_VEXT_VV(vwsubu_vv_h) > -GEN_VEXT_VV(vwsubu_vv_w) > -GEN_VEXT_VV(vwadd_vv_b) > -GEN_VEXT_VV(vwadd_vv_h) > -GEN_VEXT_VV(vwadd_vv_w) > -GEN_VEXT_VV(vwsub_vv_b) > -GEN_VEXT_VV(vwsub_vv_h) > -GEN_VEXT_VV(vwsub_vv_w) > -GEN_VEXT_VV(vwaddu_wv_b) > -GEN_VEXT_VV(vwaddu_wv_h) > -GEN_VEXT_VV(vwaddu_wv_w) > -GEN_VEXT_VV(vwsubu_wv_b) > -GEN_VEXT_VV(vwsubu_wv_h) > -GEN_VEXT_VV(vwsubu_wv_w) > -GEN_VEXT_VV(vwadd_wv_b) > -GEN_VEXT_VV(vwadd_wv_h) > -GEN_VEXT_VV(vwadd_wv_w) > -GEN_VEXT_VV(vwsub_wv_b) > -GEN_VEXT_VV(vwsub_wv_h) > -GEN_VEXT_VV(vwsub_wv_w) > +GEN_VEXT_VV(vwaddu_vv_b, 2) > +GEN_VEXT_VV(vwaddu_vv_h, 4) > +GEN_VEXT_VV(vwaddu_vv_w, 8) > +GEN_VEXT_VV(vwsubu_vv_b, 2) > +GEN_VEXT_VV(vwsubu_vv_h, 4) > +GEN_VEXT_VV(vwsubu_vv_w, 8) > +GEN_VEXT_VV(vwadd_vv_b, 2) > +GEN_VEXT_VV(vwadd_vv_h, 4) > +GEN_VEXT_VV(vwadd_vv_w, 8) > +GEN_VEXT_VV(vwsub_vv_b, 2) > +GEN_VEXT_VV(vwsub_vv_h, 4) > +GEN_VEXT_VV(vwsub_vv_w, 8) > +GEN_VEXT_VV(vwaddu_wv_b, 2) > +GEN_VEXT_VV(vwaddu_wv_h, 4) > +GEN_VEXT_VV(vwaddu_wv_w, 8) > +GEN_VEXT_VV(vwsubu_wv_b, 2) > +GEN_VEXT_VV(vwsubu_wv_h, 4) > +GEN_VEXT_VV(vwsubu_wv_w, 8) > +GEN_VEXT_VV(vwadd_wv_b, 2) > +GEN_VEXT_VV(vwadd_wv_h, 4) > +GEN_VEXT_VV(vwadd_wv_w, 8) > +GEN_VEXT_VV(vwsub_wv_b, 2) > +GEN_VEXT_VV(vwsub_wv_h, 4) > +GEN_VEXT_VV(vwsub_wv_w, 8) > > RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) > RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) > @@ -1089,18 +1126,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, > DO_XOR) > RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) > RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) > RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) > -GEN_VEXT_VV(vand_vv_b) > -GEN_VEXT_VV(vand_vv_h) > -GEN_VEXT_VV(vand_vv_w) > -GEN_VEXT_VV(vand_vv_d) > -GEN_VEXT_VV(vor_vv_b) > -GEN_VEXT_VV(vor_vv_h) > -GEN_VEXT_VV(vor_vv_w) > -GEN_VEXT_VV(vor_vv_d) > -GEN_VEXT_VV(vxor_vv_b) > -GEN_VEXT_VV(vxor_vv_h) > -GEN_VEXT_VV(vxor_vv_w) > -GEN_VEXT_VV(vxor_vv_d) > +GEN_VEXT_VV(vand_vv_b, 1) > +GEN_VEXT_VV(vand_vv_h, 2) > +GEN_VEXT_VV(vand_vv_w, 4) > +GEN_VEXT_VV(vand_vv_d, 8) > +GEN_VEXT_VV(vor_vv_b, 1) > +GEN_VEXT_VV(vor_vv_h, 2) > +GEN_VEXT_VV(vor_vv_w, 4) > +GEN_VEXT_VV(vor_vv_d, 8) > +GEN_VEXT_VV(vxor_vv_b, 1) > +GEN_VEXT_VV(vxor_vv_h, 2) > +GEN_VEXT_VV(vxor_vv_w, 4) > +GEN_VEXT_VV(vxor_vv_d, 8) > > RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) > RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) > @@ -1346,22 +1383,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, > DO_MAX) > RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) > RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) > RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) > -GEN_VEXT_VV(vminu_vv_b) > -GEN_VEXT_VV(vminu_vv_h) > -GEN_VEXT_VV(vminu_vv_w) > -GEN_VEXT_VV(vminu_vv_d) > -GEN_VEXT_VV(vmin_vv_b) > -GEN_VEXT_VV(vmin_vv_h) > -GEN_VEXT_VV(vmin_vv_w) > -GEN_VEXT_VV(vmin_vv_d) > -GEN_VEXT_VV(vmaxu_vv_b) > -GEN_VEXT_VV(vmaxu_vv_h) > -GEN_VEXT_VV(vmaxu_vv_w) > -GEN_VEXT_VV(vmaxu_vv_d) > -GEN_VEXT_VV(vmax_vv_b) > -GEN_VEXT_VV(vmax_vv_h) > -GEN_VEXT_VV(vmax_vv_w) > -GEN_VEXT_VV(vmax_vv_d) > +GEN_VEXT_VV(vminu_vv_b, 1) > +GEN_VEXT_VV(vminu_vv_h, 2) > +GEN_VEXT_VV(vminu_vv_w, 4) > +GEN_VEXT_VV(vminu_vv_d, 8) > +GEN_VEXT_VV(vmin_vv_b, 1) > +GEN_VEXT_VV(vmin_vv_h, 2) > +GEN_VEXT_VV(vmin_vv_w, 4) > +GEN_VEXT_VV(vmin_vv_d, 8) > +GEN_VEXT_VV(vmaxu_vv_b, 1) > +GEN_VEXT_VV(vmaxu_vv_h, 2) > +GEN_VEXT_VV(vmaxu_vv_w, 4) > +GEN_VEXT_VV(vmaxu_vv_d, 8) > +GEN_VEXT_VV(vmax_vv_b, 1) > +GEN_VEXT_VV(vmax_vv_h, 2) > +GEN_VEXT_VV(vmax_vv_w, 4) > +GEN_VEXT_VV(vmax_vv_d, 8) > > RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) > RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) > @@ -1402,10 +1439,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, > DO_MUL) > RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) > RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) > RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) > -GEN_VEXT_VV(vmul_vv_b) > -GEN_VEXT_VV(vmul_vv_h) > -GEN_VEXT_VV(vmul_vv_w) > -GEN_VEXT_VV(vmul_vv_d) > +GEN_VEXT_VV(vmul_vv_b, 1) > +GEN_VEXT_VV(vmul_vv_h, 2) > +GEN_VEXT_VV(vmul_vv_w, 4) > +GEN_VEXT_VV(vmul_vv_d, 8) > > static int8_t do_mulh_b(int8_t s2, int8_t s1) > { > @@ -1509,18 +1546,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, > do_mulhsu_b) > RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) > RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) > RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) > -GEN_VEXT_VV(vmulh_vv_b) > -GEN_VEXT_VV(vmulh_vv_h) > -GEN_VEXT_VV(vmulh_vv_w) > -GEN_VEXT_VV(vmulh_vv_d) > -GEN_VEXT_VV(vmulhu_vv_b) > -GEN_VEXT_VV(vmulhu_vv_h) > -GEN_VEXT_VV(vmulhu_vv_w) > -GEN_VEXT_VV(vmulhu_vv_d) > -GEN_VEXT_VV(vmulhsu_vv_b) > -GEN_VEXT_VV(vmulhsu_vv_h) > -GEN_VEXT_VV(vmulhsu_vv_w) > -GEN_VEXT_VV(vmulhsu_vv_d) > +GEN_VEXT_VV(vmulh_vv_b, 1) > +GEN_VEXT_VV(vmulh_vv_h, 2) > +GEN_VEXT_VV(vmulh_vv_w, 4) > +GEN_VEXT_VV(vmulh_vv_d, 8) > +GEN_VEXT_VV(vmulhu_vv_b, 1) > +GEN_VEXT_VV(vmulhu_vv_h, 2) > +GEN_VEXT_VV(vmulhu_vv_w, 4) > +GEN_VEXT_VV(vmulhu_vv_d, 8) > +GEN_VEXT_VV(vmulhsu_vv_b, 1) > +GEN_VEXT_VV(vmulhsu_vv_h, 2) > +GEN_VEXT_VV(vmulhsu_vv_w, 4) > +GEN_VEXT_VV(vmulhsu_vv_d, 8) > > RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) > RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) > @@ -1579,22 +1616,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, > DO_REM) > RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) > RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) > RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) > -GEN_VEXT_VV(vdivu_vv_b) > -GEN_VEXT_VV(vdivu_vv_h) > -GEN_VEXT_VV(vdivu_vv_w) > -GEN_VEXT_VV(vdivu_vv_d) > -GEN_VEXT_VV(vdiv_vv_b) > -GEN_VEXT_VV(vdiv_vv_h) > -GEN_VEXT_VV(vdiv_vv_w) > -GEN_VEXT_VV(vdiv_vv_d) > -GEN_VEXT_VV(vremu_vv_b) > -GEN_VEXT_VV(vremu_vv_h) > -GEN_VEXT_VV(vremu_vv_w) > -GEN_VEXT_VV(vremu_vv_d) > -GEN_VEXT_VV(vrem_vv_b) > -GEN_VEXT_VV(vrem_vv_h) > -GEN_VEXT_VV(vrem_vv_w) > -GEN_VEXT_VV(vrem_vv_d) > +GEN_VEXT_VV(vdivu_vv_b, 1) > +GEN_VEXT_VV(vdivu_vv_h, 2) > +GEN_VEXT_VV(vdivu_vv_w, 4) > +GEN_VEXT_VV(vdivu_vv_d, 8) > +GEN_VEXT_VV(vdiv_vv_b, 1) > +GEN_VEXT_VV(vdiv_vv_h, 2) > +GEN_VEXT_VV(vdiv_vv_w, 4) > +GEN_VEXT_VV(vdiv_vv_d, 8) > +GEN_VEXT_VV(vremu_vv_b, 1) > +GEN_VEXT_VV(vremu_vv_h, 2) > +GEN_VEXT_VV(vremu_vv_w, 4) > +GEN_VEXT_VV(vremu_vv_d, 8) > +GEN_VEXT_VV(vrem_vv_b, 1) > +GEN_VEXT_VV(vrem_vv_h, 2) > +GEN_VEXT_VV(vrem_vv_w, 4) > +GEN_VEXT_VV(vrem_vv_d, 8) > > RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) > RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) > @@ -1639,15 +1676,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, > DO_MUL) > RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) > RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) > RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) > -GEN_VEXT_VV(vwmul_vv_b) > -GEN_VEXT_VV(vwmul_vv_h) > -GEN_VEXT_VV(vwmul_vv_w) > -GEN_VEXT_VV(vwmulu_vv_b) > -GEN_VEXT_VV(vwmulu_vv_h) > -GEN_VEXT_VV(vwmulu_vv_w) > -GEN_VEXT_VV(vwmulsu_vv_b) > -GEN_VEXT_VV(vwmulsu_vv_h) > -GEN_VEXT_VV(vwmulsu_vv_w) > +GEN_VEXT_VV(vwmul_vv_b, 2) > +GEN_VEXT_VV(vwmul_vv_h, 4) > +GEN_VEXT_VV(vwmul_vv_w, 8) > +GEN_VEXT_VV(vwmulu_vv_b, 2) > +GEN_VEXT_VV(vwmulu_vv_h, 4) > +GEN_VEXT_VV(vwmulu_vv_w, 8) > +GEN_VEXT_VV(vwmulsu_vv_b, 2) > +GEN_VEXT_VV(vwmulsu_vv_h, 4) > +GEN_VEXT_VV(vwmulsu_vv_w, 8) > > RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) > RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) > @@ -1698,22 +1735,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, > DO_NMSUB) > RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) > RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) > RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) > -GEN_VEXT_VV(vmacc_vv_b) > -GEN_VEXT_VV(vmacc_vv_h) > -GEN_VEXT_VV(vmacc_vv_w) > -GEN_VEXT_VV(vmacc_vv_d) > -GEN_VEXT_VV(vnmsac_vv_b) > -GEN_VEXT_VV(vnmsac_vv_h) > -GEN_VEXT_VV(vnmsac_vv_w) > -GEN_VEXT_VV(vnmsac_vv_d) > -GEN_VEXT_VV(vmadd_vv_b) > -GEN_VEXT_VV(vmadd_vv_h) > -GEN_VEXT_VV(vmadd_vv_w) > -GEN_VEXT_VV(vmadd_vv_d) > -GEN_VEXT_VV(vnmsub_vv_b) > -GEN_VEXT_VV(vnmsub_vv_h) > -GEN_VEXT_VV(vnmsub_vv_w) > -GEN_VEXT_VV(vnmsub_vv_d) > +GEN_VEXT_VV(vmacc_vv_b, 1) > +GEN_VEXT_VV(vmacc_vv_h, 2) > +GEN_VEXT_VV(vmacc_vv_w, 4) > +GEN_VEXT_VV(vmacc_vv_d, 8) > +GEN_VEXT_VV(vnmsac_vv_b, 1) > +GEN_VEXT_VV(vnmsac_vv_h, 2) > +GEN_VEXT_VV(vnmsac_vv_w, 4) > +GEN_VEXT_VV(vnmsac_vv_d, 8) > +GEN_VEXT_VV(vmadd_vv_b, 1) > +GEN_VEXT_VV(vmadd_vv_h, 2) > +GEN_VEXT_VV(vmadd_vv_w, 4) > +GEN_VEXT_VV(vmadd_vv_d, 8) > +GEN_VEXT_VV(vnmsub_vv_b, 1) > +GEN_VEXT_VV(vnmsub_vv_h, 2) > +GEN_VEXT_VV(vnmsub_vv_w, 4) > +GEN_VEXT_VV(vnmsub_vv_d, 8) > > #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ > static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ > @@ -1766,15 +1803,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, > DO_MACC) > RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) > RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) > RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) > -GEN_VEXT_VV(vwmaccu_vv_b) > -GEN_VEXT_VV(vwmaccu_vv_h) > -GEN_VEXT_VV(vwmaccu_vv_w) > -GEN_VEXT_VV(vwmacc_vv_b) > -GEN_VEXT_VV(vwmacc_vv_h) > -GEN_VEXT_VV(vwmacc_vv_w) > -GEN_VEXT_VV(vwmaccsu_vv_b) > -GEN_VEXT_VV(vwmaccsu_vv_h) > -GEN_VEXT_VV(vwmaccsu_vv_w) > +GEN_VEXT_VV(vwmaccu_vv_b, 2) > +GEN_VEXT_VV(vwmaccu_vv_h, 4) > +GEN_VEXT_VV(vwmaccu_vv_w, 8) > +GEN_VEXT_VV(vwmacc_vv_b, 2) > +GEN_VEXT_VV(vwmacc_vv_h, 4) > +GEN_VEXT_VV(vwmacc_vv_w, 8) > +GEN_VEXT_VV(vwmaccsu_vv_b, 2) > +GEN_VEXT_VV(vwmaccsu_vv_h, 4) > +GEN_VEXT_VV(vwmaccsu_vv_w, 8) > > RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) > RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) > -- > 2.34.2 > >