On 08.03.22 02:53, Richard Henderson wrote: > From: David Miller <dmiller...@gmail.com> > > Signed-off-by: David Miller <dmiller...@gmail.com> > Message-Id: <20220307020327.3003-3-dmiller...@gmail.com> > [rth: Rewrite helpers; fix validation of m6.] > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > > The substring search was incorrect, in that it didn't properly > restart the search when a match failed. Split the helper into > multiple, so that the memory accesses can be optimized. > --- > target/s390x/helper.h | 6 ++ > target/s390x/tcg/translate.c | 3 +- > target/s390x/tcg/vec_string_helper.c | 101 +++++++++++++++++++++++++++ > target/s390x/tcg/translate_vx.c.inc | 26 +++++++ > target/s390x/tcg/insn-data.def | 2 + > 5 files changed, 137 insertions(+), 1 deletion(-) > > diff --git a/target/s390x/helper.h b/target/s390x/helper.h > index 7cbcbd7f0b..7412130883 100644 > --- a/target/s390x/helper.h > +++ b/target/s390x/helper.h > @@ -246,6 +246,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, > cptr, env, i32) > DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32) > DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32) > DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32) > +DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32) > > /* === Vector Floating-Point Instructions */ > DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, > i32) > diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c > index 904b51542f..d9ac29573d 100644 > --- a/target/s390x/tcg/translate.c > +++ b/target/s390x/tcg/translate.c > @@ -6222,7 +6222,8 @@ enum DisasInsnEnum { > #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */ > #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION > #define FAC_V S390_FEAT_VECTOR /* vector facility */ > -#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility > 1 */ > +#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements > facility 1 */ > +#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements > facility 2 */ > #define FAC_MIE2 S390_FEAT_MISC_INSTRUCTION_EXT2 /* > miscellaneous-instruction-extensions facility 2 */ > #define FAC_MIE3 S390_FEAT_MISC_INSTRUCTION_EXT3 /* > miscellaneous-instruction-extensions facility 3 */ > > diff --git a/target/s390x/tcg/vec_string_helper.c > b/target/s390x/tcg/vec_string_helper.c > index ac315eb095..6c0476ecc1 100644 > --- a/target/s390x/tcg/vec_string_helper.c > +++ b/target/s390x/tcg/vec_string_helper.c > @@ -471,3 +471,104 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const > void *v2, const void *v3, \ > DEF_VSTRC_CC_RT_HELPER(8) > DEF_VSTRC_CC_RT_HELPER(16) > DEF_VSTRC_CC_RT_HELPER(32) > + > +static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, > + const S390Vector *v4, uint8_t es, bool zs) > +{ > + int substr_elen, substr_0, str_elen, i, j, k, cc; > + int nelem = 16 >> es; > + bool eos = false; > + > + substr_elen = s390_vec_read_element8(v4, 7) >> es; > + > + /* If ZS, bound substr length by min(nelem, strlen(v3)). */ > + if (zs) { > + int i;
You can drop this "int i;" > + for (i = 0; i < nelem; i++) { > + if (s390_vec_read_element(v3, i, es) == 0) { > + break; > + } > + } > + if (i < substr_elen) { > + substr_elen = i; > + } Maybe combine both, I guess there is no need to search beyond substr_elen. substr_elen = MIN(substr_elen, nelem); for (i = 0; i < substr_elen; i++) { if (s390_vec_read_element(v3, i, es) == 0) { substr_elen = i; break; } } We should do the MIN(substr_elen, nelem) maybe right when reading it from v4. > + } > + > + if (substr_elen == 0) { > + cc = 2; /* full match for degenerate case of empty substr */ > + k = 0; > + goto done; > + } > + > + /* If ZS, look for eos in the searched string. */ > + if (zs) { > + for (k = 0; k < nelem; k++) { > + if (s390_vec_read_element(v2, k, es) == 0) { > + eos = true; > + break; > + } > + } I guess we could move that into the main search loop and avoid parsing the string twice. Not sure what's better. > + str_elen = k; > + } else { > + str_elen = nelem; > + } > + > + substr_0 = s390_vec_read_element(v3, 0, es); > + > + for (k = 0; ; k++) { > + for (; k < str_elen; k++) { > + if (s390_vec_read_element(v2, k, es) == substr_0) { > + break; > + } > + } > + > + /* If we reached the end of the string, no match. */ > + if (k == str_elen) { > + cc = eos; /* no match (with or without zero char) */ > + goto done; > + } > + > + /* If the substring is only one char, match. */ > + if (substr_elen == 1) { > + cc = 2; /* full match */ > + goto done; > + } > + > + /* If the match begins at the last char, we have a partial match. */ > + if (k == str_elen - 1) { > + cc = 3; /* partial match */ > + goto done; > + } > + > + i = MIN(nelem, k + substr_elen); > + for (j = k + 1; j < i; j++) { > + uint32_t e2 = s390_vec_read_element(v2, j, es); > + uint32_t e3 = s390_vec_read_element(v3, j - k, es); > + if (e2 != e3) { > + break; > + } > + } > + if (j == i) { > + /* Matched up until "end". */ > + cc = i - k == substr_elen ? 2 : 3; /* full or partial match */ > + goto done; > + } > + } > + > + done: > + s390_vec_write_element64(v1, 0, k << es); > + s390_vec_write_element64(v1, 1, 0); > + return cc; > +} > + > +#define DEF_VSTRS_HELPER(BITS) \ > +void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2, \ > + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ > + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); } \ > +void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2, \ > + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ > + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); } > + > +DEF_VSTRS_HELPER(8) > +DEF_VSTRS_HELPER(16) > +DEF_VSTRS_HELPER(32) > diff --git a/target/s390x/tcg/translate_vx.c.inc > b/target/s390x/tcg/translate_vx.c.inc > index ea28e40d4f..d514e8b218 100644 > --- a/target/s390x/tcg/translate_vx.c.inc > +++ b/target/s390x/tcg/translate_vx.c.inc > @@ -2497,6 +2497,32 @@ static DisasJumpType op_vstrc(DisasContext *s, > DisasOps *o) > return DISAS_NEXT; > } > > +static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o) > +{ > + typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr, > + TCGv_ptr, TCGv_ptr, TCGv_i32); > + static const helper_vstrs fns[3][2] = { > + { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 }, > + { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 }, > + { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 }, > + }; > + Superfluous empty line. > + const uint8_t m5 = get_field(s, m5); Could so a s/m5/es/ , as we do it in other handlers. > + const uint8_t m6 = get_field(s, m6); > + bool zs = m6 & 2; I remember we wanted to use extract32() for such bit-tests, at least we do it in most of the other handlers :) const bool zs = extract32(m6, 1, 1); ? > + > + if (m5 > ES_32 || m6 & ~2) { > + gen_program_exception(s, PGM_SPECIFICATION); > + return DISAS_NORETURN; > + } > + -- Thanks, David / dhildenb