Signed-off-by: Stephen Long <stepl...@quicinc.com> --- Submitting this for early review. I'm working with Richard on SVE2 support for qemu. I'll be attempting to tackle the insns in the 'SVE2 integer add/subtract narrow high part' category next [1].
[1] ISA manual: https://static.docs.arm.com/ddi0602/d/ISA_A64_xml_futureA-2019-12_OPT.pdf (page 2950) target/arm/helper-sve.h | 10 +++++++++ target/arm/sve.decode | 5 +++++ target/arm/sve_helper.c | 29 +++++++++++++++++++++++++ target/arm/translate-sve.c | 43 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 5dd880cf6d..2077df9a95 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -2516,6 +2516,16 @@ DEF_HELPER_FLAGS_3(sve2_uqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve2_uqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve2_uqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_match_zpzz_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_match_zpzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sve2_nmatch_zpzz_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_nmatch_zpzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG, diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 374e47fb05..652668df02 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -1305,6 +1305,11 @@ UQSHRNT 01000101 .. 1 ..... 00 1101 ..... ..... @rd_rn_tszimm_shr UQRSHRNB 01000101 .. 1 ..... 00 1110 ..... ..... @rd_rn_tszimm_shr UQRSHRNT 01000101 .. 1 ..... 00 1111 ..... ..... @rd_rn_tszimm_shr +### SVE2 Character Match + +MATCH 01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm +NMATCH 01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm + ## SVE2 floating-point pairwise operations FADDP 01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index b68f62cd7f..c75258b56d 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -6890,3 +6890,32 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64) #undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_D + +#define DO_ZPZZ_CHAR_MATCH(NAME, TYPE, H, EQUALS) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \ + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ + uint16_t *pd = (uint16_t *)(vd + H1_2(i >> 3)); \ + *pd = (*pd & ~1) | ((0 & EQUALS) | (1 & !EQUALS)); \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)(vn + H(i)); \ + for (intptr_t j = 0; j < 16; j += sizeof(TYPE)) { \ + TYPE mm = *(TYPE *)(vm + H(i * 16 + j)); \ + bool eq = nn == mm; \ + if ((eq && EQUALS) || (!eq && !EQUALS)) { \ + *pd = (*pd & ~1) | ((1 & EQUALS) | (0 & !EQUALS)); \ + } \ + } \ + } \ + } \ +} + +DO_ZPZZ_CHAR_MATCH(sve2_match_zpzz_b, uint8_t, H1, true) +DO_ZPZZ_CHAR_MATCH(sve2_match_zpzz_h, uint16_t, H1_2, true) + +DO_ZPZZ_CHAR_MATCH(sve2_nmatch_zpzz_b, uint8_t, H1, false) +DO_ZPZZ_CHAR_MATCH(sve2_nmatch_zpzz_h, uint16_t, H1_2, false) + +#undef DO_ZPZZ_CHAR_MATCH diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 07a2040208..7175148bfd 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -7246,6 +7246,49 @@ static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a) return do_sve2_shr_narrow(s, a, ops); } +static bool do_sve2_zpzz_char_match(DisasContext *s, arg_rprr_esz *a, + gen_helper_gvec_4 *fn) +{ + if (!dc_isar_feature(aa64_sve2, s)) { + return false; + } + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + unsigned psz = pred_full_reg_size(s); + int dofs = pred_full_reg_offset(s, a->rd); + int nofs = vec_full_reg_offset(s, a->rn); + int mofs = vec_full_reg_offset(s, a->rm); + int gofs = pred_full_reg_offset(s, a->pg); + + /* Save a copy if the destination overwrites the guarding predicate */ + int tofs = gofs; + if (a->rd == a->pg) { + tofs = offsetof(CPUARMState, vfp.preg_tmp); + tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); + } + + tcg_gen_gvec_4_ool(dofs, nofs, mofs, gofs, vsz, vsz, 0, fn); + do_predtest(s, dofs, tofs, psz / 8); + } + return true; +} + +#define DO_SVE2_ZPZZ_CHAR_MATCH(NAME, name) \ +static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ +{ \ + static gen_helper_gvec_4 * const fns[4] = { \ + gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \ + NULL, NULL \ + }; \ + return do_sve2_zpzz_char_match(s, a, fns[a->esz]); \ +} + +DO_SVE2_ZPZZ_CHAR_MATCH(MATCH, match) +DO_SVE2_ZPZZ_CHAR_MATCH(NMATCH, nmatch) + static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4_ptr *fn) { -- 2.17.1