On Fri, 5 Jun 2026 at 00:51, Richard Henderson
<[email protected]> wrote:
>
> Signed-off-by: Richard Henderson <[email protected]>
> ---
>  target/arm/tcg/helper-sve-defs.h |  3 ++
>  target/arm/tcg/sve_helper.c      | 30 +++++++++++++++++
>  target/arm/tcg/translate-sve.c   | 56 ++++++++++++++++++++++++++++++++
>  target/arm/tcg/sve.decode        |  3 ++
>  4 files changed, 92 insertions(+)
>
> diff --git a/target/arm/tcg/helper-sve-defs.h 
> b/target/arm/tcg/helper-sve-defs.h
> index 11342e1e59..440a868cea 100644
> --- a/target/arm/tcg/helper-sve-defs.h
> +++ b/target/arm/tcg/helper-sve-defs.h
> @@ -961,6 +961,9 @@ DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, 
> ptr, ptr, i32)
>  DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32)
>
> +DEF_HELPER_FLAGS_3(sve2p2_firstp, TCG_CALL_NO_RWG_SE, i64, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_3(sve2p2_lastp, TCG_CALL_NO_RWG_SE, i64, ptr, ptr, i32)
> +
>  DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
>  DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
>
> diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
> index 3f99a362c3..57c7823feb 100644
> --- a/target/arm/tcg/sve_helper.c
> +++ b/target/arm/tcg/sve_helper.c
> @@ -4302,6 +4302,36 @@ uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t 
> desc)
>      return count >> p.lg2_stride;
>  }
>
> +uint64_t HELPER(sve2p2_firstp)(void *vn, void *vg, uint32_t pred_desc)
> +{
> +    intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
> +    intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
> +    uint64_t *n = vn, *g = vg, mask = pred_esz_masks[esz];
> +
> +    for (intptr_t i = 0; i < words; ++i) {
> +        uint64_t t = n[i] & g[i] & mask;
> +        if (t) {
> +            return (ctz64(t) + i * 64) >> esz;
> +        }
> +    }
> +    return -1;
> +}
> +
> +uint64_t HELPER(sve2p2_lastp)(void *vn, void *vg, uint32_t pred_desc)
> +{
> +    intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
> +    intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
> +    uint64_t *n = vn, *g = vg, mask = pred_esz_masks[esz];
> +
> +    for (intptr_t i = words - 1; i >= 0; --i) {
> +        uint64_t t = n[i] & g[i] & mask;
> +        if (t) {
> +            return ((clz64(t) ^ 63) + i * 64) >> esz;

Why XOR? Shouldn't this be the same kind of expression as
we use in last_active_element():

            return i * 64 + (63 - clz64(this_g));

> +        }
> +    }
> +    return -1;
> +}

> +            if (firstp) {
> +                tcg_gen_ctzi_i64(v, v, -1);
> +            } else {
> +                tcg_gen_clzi_i64(v, v, 64);
> +                tcg_gen_subfi_i64(v, 63, v);

...and we use 63 - clz64(...) here.

thanks
-- PMM

Reply via email to