On Fri, 5 Jun 2026 at 00:51, Richard Henderson
<[email protected]> wrote:
>
> Signed-off-by: Richard Henderson <[email protected]>
> ---
> target/arm/tcg/helper-sve-defs.h | 3 ++
> target/arm/tcg/sve_helper.c | 30 +++++++++++++++++
> target/arm/tcg/translate-sve.c | 56 ++++++++++++++++++++++++++++++++
> target/arm/tcg/sve.decode | 3 ++
> 4 files changed, 92 insertions(+)
>
> diff --git a/target/arm/tcg/helper-sve-defs.h
> b/target/arm/tcg/helper-sve-defs.h
> index 11342e1e59..440a868cea 100644
> --- a/target/arm/tcg/helper-sve-defs.h
> +++ b/target/arm/tcg/helper-sve-defs.h
> @@ -961,6 +961,9 @@ DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr,
> ptr, ptr, i32)
> DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
> DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32)
>
> +DEF_HELPER_FLAGS_3(sve2p2_firstp, TCG_CALL_NO_RWG_SE, i64, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_3(sve2p2_lastp, TCG_CALL_NO_RWG_SE, i64, ptr, ptr, i32)
> +
> DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
> DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
>
> diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
> index 3f99a362c3..57c7823feb 100644
> --- a/target/arm/tcg/sve_helper.c
> +++ b/target/arm/tcg/sve_helper.c
> @@ -4302,6 +4302,36 @@ uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t
> desc)
> return count >> p.lg2_stride;
> }
>
> +uint64_t HELPER(sve2p2_firstp)(void *vn, void *vg, uint32_t pred_desc)
> +{
> + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
> + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
> + uint64_t *n = vn, *g = vg, mask = pred_esz_masks[esz];
> +
> + for (intptr_t i = 0; i < words; ++i) {
> + uint64_t t = n[i] & g[i] & mask;
> + if (t) {
> + return (ctz64(t) + i * 64) >> esz;
> + }
> + }
> + return -1;
> +}
> +
> +uint64_t HELPER(sve2p2_lastp)(void *vn, void *vg, uint32_t pred_desc)
> +{
> + intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
> + intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
> + uint64_t *n = vn, *g = vg, mask = pred_esz_masks[esz];
> +
> + for (intptr_t i = words - 1; i >= 0; --i) {
> + uint64_t t = n[i] & g[i] & mask;
> + if (t) {
> + return ((clz64(t) ^ 63) + i * 64) >> esz;
Why XOR? Shouldn't this be the same kind of expression as
we use in last_active_element():
return i * 64 + (63 - clz64(this_g));
> + }
> + }
> + return -1;
> +}
> + if (firstp) {
> + tcg_gen_ctzi_i64(v, v, -1);
> + } else {
> + tcg_gen_clzi_i64(v, v, 64);
> + tcg_gen_subfi_i64(v, 63, v);
...and we use 63 - clz64(...) here.
thanks
-- PMM