Signed-off-by: Stephen Long <stepl...@quicinc.com> --- Realized that I was handling the predicate register incorrectly for the 32 bit case for histcnt_s. There might be a cleaner way to write the handler function.
target/arm/helper-sve.h | 7 ++++ target/arm/sve.decode | 6 ++++ target/arm/sve_helper.c | 65 ++++++++++++++++++++++++++++++++++++++ target/arm/translate-sve.c | 29 +++++++++++++++++ 4 files changed, 107 insertions(+) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 4733614614..958ad623f6 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -2526,6 +2526,13 @@ DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_b, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_h, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_histcnt_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve2_histcnt_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(sve2_histseg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG, diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 26690d4208..50ed0726fb 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -147,6 +147,7 @@ &rprrr_esz rn=%reg_movprfx @rdn_pg_rm_ra ........ esz:2 . ra:5 ... pg:3 rm:5 rd:5 \ &rprrr_esz rn=%reg_movprfx +@rd5_pg_rn_rm ........ esz:2 . rm:5 ... pg:3 rn:5 rd:5 &rprr_esz # One register operand, with governing predicate, vector element size @rd_pg_rn ........ esz:2 ... ... ... pg:3 rn:5 rd:5 &rpr_esz @@ -1325,6 +1326,11 @@ UQRSHRNT 01000101 .. 1 ..... 00 1111 ..... ..... @rd_rn_tszimm_shr MATCH 01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm NMATCH 01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm +### SVE2 Histogram Computation + +HISTCNT 01000101 .. 1 ..... 110 ... ..... ..... @rd5_pg_rn_rm +HISTSEG 01000101 .. 1 ..... 101 000 ..... ..... @rd_rn_rm + ## SVE2 floating-point pairwise operations FADDP 01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 7c65009bb8..79e54b767d 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -7016,3 +7016,68 @@ DO_PPZZ_MATCH(sve2_nmatch_ppzz_b, MO_8, true) DO_PPZZ_MATCH(sve2_nmatch_ppzz_h, MO_16, true) #undef DO_PPZZ_MATCH + +#define DO_HISTCNT(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, j; \ + intptr_t opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; ++i) { \ + TYPE count = 0; \ + uint8_t pred = pg[H1(i)] >> ((i & 1) * 4); \ + if (pred & 1) { \ + TYPE nn = n[H(i)]; \ + for (j = 0; j <= i; ++j) { \ + uint8_t pred = pg[H1(j)] >> ((j & 1) * 4); \ + if (pred & 1 && nn == m[H(j)]) { \ + ++count; \ + } \ + } \ + } \ + d[H(i)] = count; \ + } \ +} + +DO_HISTCNT(sve2_histcnt_s, uint32_t, H1_2) +DO_HISTCNT(sve2_histcnt_d, uint64_t, ) + +#undef DO_HISTCNT + +static inline uint8_t get_count(uint8_t n, uint64_t m) +{ + int i; + uint8_t count = 0; + + for (i = 0; i < 64; i += 8) { + if (n == extract64(m, i, 8)) { + ++count; + } + } + return count; +} + +void HELPER(sve2_histseg)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, j; + intptr_t opr_sz = simd_oprsz(desc); + uint64_t *m = vm; + + for (i = 0; i < opr_sz; i += 8) { + uint64_t n = *(uint64_t *)(vn + i); + uint64_t out = 0; + + for (j = 0; j < 64; j += 8) { + uint64_t m0 = *m; + uint64_t m1 = *(m + 1); + + uint8_t count = get_count(n >> j, m0) + get_count(n >> j, m1); + out |= count << j; + + m += 2; + } + + *(uint64_t *)(vd + i) = out; + } +} diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 5403ceb1d1..a4bfbe72f4 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -7526,6 +7526,35 @@ static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ DO_SVE2_PPZZ_MATCH(MATCH, match) DO_SVE2_PPZZ_MATCH(NMATCH, nmatch) +static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a) +{ + if (a->esz > 1) { + return false; + } + static gen_helper_gvec_4 * const fns[2] = { + gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d + }; + return do_zpzz_ool(s, a, fns[a->esz]); +} + +static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a) +{ + if (!dc_isar_feature(aa64_sve2, s)) { + return false; + } + if (a->esz != 0) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vsz, vsz, 0, gen_helper_sve2_histseg); + } + return true; +} + static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4_ptr *fn) { -- 2.17.1