We're going to make changes that will required each helper to be responsible for the 'vstart' management, i.e. we will relieve the 'vstart < vl' assumption that helpers have today.
To do that we'll need to deal with how we're updating tail elements first. We can't update them if vstart >= vl, but at this moment we're not guarding for it. We have the vext_set_tail_elems_1s() helper to update tail elements. Change it to accept an 'env' pointer, where we can read both vstart and vl, and make it a no-op if vstart >= vl. Note that callers will need to set env->start = 0 *after* the helper from now on. The exception are three helpers: vext_ldst_stride(), vext_ldst_us() and vext_ldst_index(). They are are incrementing env->vstart during execution and will end up with env->vstart = vl when tail updating. For these cases only, do an early check and exit if vstart >= vl, and set env->vstart = 0 before updating the tail. For everyone else we'll do vext_set_tail_elems_1s() and then clear env->vstart. This is the case of vext_ldff() that is already using set_tail_elems_1s(), and will be the case for the rest after the next patches. Let's also simplify the API a little by removing the 'nf' argument since it can be derived from 'desc'. Signed-off-by: Daniel Henrique Barboza <dbarb...@ventanamicro.com> --- target/riscv/vector_helper.c | 59 ++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index ca79571ae2..a3b496b6e9 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -174,19 +174,32 @@ GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) -static void vext_set_tail_elems_1s(target_ulong vl, void *vd, - uint32_t desc, uint32_t nf, - uint32_t esz, uint32_t max_elems) +/* + * This function is sensitive to env->vstart changes since + * it'll be a no-op if vstart >= vl. Do not clear env->vstart + * before calling it unless you're certain that vstart < vl. + */ +static void vext_set_tail_elems_1s(CPURISCVState *env, void *vd, + uint32_t desc, uint32_t esz, + uint32_t max_elems) { uint32_t vta = vext_vta(desc); + uint32_t nf = vext_nf(desc); int k; - if (vta == 0) { + /* + * Section 5.4 of the RVV spec mentions: + * "When vstart ≥ vl, there are no body elements, and no + * elements are updated in any destination vector register + * group, including that no tail elements are updated + * with agnostic values." + */ + if (vta == 0 || env->vstart >= env->vl) { return; } for (k = 0; k < nf; ++k) { - vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, (k * max_elems + max_elems) * esz); } } @@ -207,6 +220,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, uint32_t esz = 1 << log2_esz; uint32_t vma = vext_vma(desc); + if (env->vstart >= env->vl) { + env->vstart = 0; + return; + } + for (i = env->vstart; i < env->vl; i++, env->vstart++) { k = 0; while (k < nf) { @@ -222,9 +240,13 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, k++; } } + /* + * Set vstart before tail update - vstart changed during + * execution and we already checked that vstart < vl. + */ env->vstart = 0; - vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); + vext_set_tail_elems_1s(env, vd, desc, esz, max_elems); } #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ @@ -272,6 +294,11 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t max_elems = vext_max_elems(desc, log2_esz); uint32_t esz = 1 << log2_esz; + if (env->vstart >= env->vl) { + env->vstart = 0; + return; + } + /* load bytes from guest memory */ for (i = env->vstart; i < evl; i++, env->vstart++) { k = 0; @@ -281,9 +308,13 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, k++; } } + /* + * Set vstart before tail update - vstart changed during + * execution and we already checked that vstart < vl. + */ env->vstart = 0; - vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); + vext_set_tail_elems_1s(env, vd, desc, esz, max_elems); } /* @@ -386,6 +417,11 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t esz = 1 << log2_esz; uint32_t vma = vext_vma(desc); + if (env->vstart >= env->vl) { + env->vstart = 0; + return; + } + /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; i++, env->vstart++) { k = 0; @@ -402,9 +438,13 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, k++; } } + /* + * Set vstart before tail update - vstart changed during + * execution and we already checked that vstart < vl. + */ env->vstart = 0; - vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); + vext_set_tail_elems_1s(env, vd, desc, esz, max_elems); } #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ @@ -532,9 +572,8 @@ ProbeSuccess: k++; } } + vext_set_tail_elems_1s(env, vd, desc, esz, max_elems); env->vstart = 0; - - vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); } #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ -- 2.43.2