Signed-off-by: Richard Henderson <[email protected]>
---
target/arm/tcg/helper-sve-defs.h | 3 ++
target/arm/tcg/sve_helper.c | 30 +++++++++++++++++
target/arm/tcg/translate-sve.c | 56 ++++++++++++++++++++++++++++++++
target/arm/tcg/sve.decode | 3 ++
4 files changed, 92 insertions(+)
diff --git a/target/arm/tcg/helper-sve-defs.h b/target/arm/tcg/helper-sve-defs.h
index 11342e1e59..440a868cea 100644
--- a/target/arm/tcg/helper-sve-defs.h
+++ b/target/arm/tcg/helper-sve-defs.h
@@ -961,6 +961,9 @@ DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr,
ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32)
+DEF_HELPER_FLAGS_3(sve2p2_firstp, TCG_CALL_NO_RWG_SE, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2p2_lastp, TCG_CALL_NO_RWG_SE, i64, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index 3f99a362c3..57c7823feb 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -4302,6 +4302,36 @@ uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t
desc)
return count >> p.lg2_stride;
}
+uint64_t HELPER(sve2p2_firstp)(void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ uint64_t *n = vn, *g = vg, mask = pred_esz_masks[esz];
+
+ for (intptr_t i = 0; i < words; ++i) {
+ uint64_t t = n[i] & g[i] & mask;
+ if (t) {
+ return (ctz64(t) + i * 64) >> esz;
+ }
+ }
+ return -1;
+}
+
+uint64_t HELPER(sve2p2_lastp)(void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ uint64_t *n = vn, *g = vg, mask = pred_esz_masks[esz];
+
+ for (intptr_t i = words - 1; i >= 0; --i) {
+ uint64_t t = n[i] & g[i] & mask;
+ if (t) {
+ return ((clz64(t) ^ 63) + i * 64) >> esz;
+ }
+ }
+ return -1;
+}
+
/* C.f. Arm pseudocode EncodePredCount */
static uint64_t encode_pred_count(uint32_t elements, uint32_t count,
uint32_t esz, bool invert)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 325fc28fb3..912290f460 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3481,6 +3481,62 @@ static bool trans_SINCDECP_z(DisasContext *s,
arg_incdec2_pred *a)
return true;
}
+static bool do_firstp_lastp(DisasContext *s, arg_rpr_esz *a, bool firstp)
+{
+ if (sve_access_check(s)) {
+ unsigned psz = pred_full_reg_size(s);
+ TCGv_i64 v = cpu_reg(s, a->rd);
+
+ if (psz <= 8) {
+ uint64_t psz_mask;
+
+ tcg_gen_ld_i64(v, tcg_env, pred_full_reg_offset(s, a->rn));
+ if (a->rn != a->pg) {
+ TCGv_i64 g = tcg_temp_new_i64();
+ tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, a->pg));
+ tcg_gen_and_i64(v, v, g);
+ }
+
+ /*
+ * Reduce the pred_esz_masks value simply to reduce the
+ * size of the code generated here.
+ */
+ psz_mask = MAKE_64BIT_MASK(0, psz * 8);
+ tcg_gen_andi_i64(v, v, pred_esz_masks[a->esz] & psz_mask);
+
+ if (firstp) {
+ tcg_gen_ctzi_i64(v, v, -1);
+ } else {
+ tcg_gen_clzi_i64(v, v, 64);
+ tcg_gen_subfi_i64(v, 63, v);
+ }
+ tcg_gen_sari_i64(v, v, a->esz);
+ } else {
+ TCGv_ptr t_pn = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+ unsigned desc = 0;
+ TCGv_i32 t_desc;
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+
+ tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
+ t_desc = tcg_constant_i32(desc);
+
+ if (firstp) {
+ gen_helper_sve2p2_firstp(v, t_pn, t_pg, t_desc);
+ } else {
+ gen_helper_sve2p2_lastp(v, t_pn, t_pg, t_desc);
+ }
+ }
+ }
+ return true;
+}
+
+TRANS_FEAT(FIRSTP, aa64_sme2p2_or_sve2p2, do_firstp_lastp, a, true)
+TRANS_FEAT(LASTP, aa64_sme2p2_or_sve2p2, do_firstp_lastp, a, false)
+
/*
*** SVE Integer Compare Scalars Group
*/
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index a3eefd60e5..bdc2b7f0c0 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -852,6 +852,9 @@ BRKN 00100101 0. 01100001 .... 0 .... 0 ....
@pd_pg_pn_s
CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn
CNTP_c 00100101 esz:2 100 000 10 000 vl:1 1 rn:4 rd:5
+FIRSTP 00100101 .. 100 001 10 .... 0 .... ..... @rd_pg4_pn
+LASTP 00100101 .. 100 010 10 .... 0 .... ..... @rd_pg4_pn
+
# SVE inc/dec register by predicate count
INCDECP_r 00100101 .. 10110 d:1 10001 00 .... ..... @incdec_pred u=1
--
2.43.0