On Fri, 5 Jun 2026 at 00:52, Richard Henderson
<[email protected]> wrote:
>
> Implement with a DO_COMPACT macro and general purpose
> predicate handling.
>
> Signed-off-by: Richard Henderson <[email protected]>
> ---
>  target/arm/tcg/sve_helper.c | 51 +++++++++++++++----------------------
>  1 file changed, 21 insertions(+), 30 deletions(-)
>
> diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
> index a13ccf4b85..40cf567b0d 100644
> --- a/target/arm/tcg/sve_helper.c
> +++ b/target/arm/tcg/sve_helper.c
> @@ -3637,39 +3637,30 @@ DO_TRN(sve2_trn_q, Int128, )
>  #undef DO_UZP
>  #undef DO_TRN
>
> -void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc)
> -{
> -    intptr_t i, j, opr_sz = simd_oprsz(desc) / 4;
> -    uint32_t *d = vd, *n = vn;
> -    uint8_t *pg = vg;
> -
> -    for (i = j = 0; i < opr_sz; i++) {
> -        if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) {
> -            d[H4(j)] = n[H4(i)];
> -            j++;
> -        }
> -    }
> -    for (; j < opr_sz; j++) {
> -        d[H4(j)] = 0;
> -    }
> +#define DO_COMPACT(NAME, TYPE, H)                                     \
> +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)        \
> +{                                                                     \
> +    intptr_t i = 0, j = 0, oprsz = simd_oprsz(desc);                  \
> +    do {                                                              \
> +        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));               \
> +        do {                                                          \
> +            if (pg & 1) {                                             \
> +                *(TYPE *)(vd + H(j)) = *(TYPE *)(vn + H(i));          \
> +                j += sizeof(TYPE);                                    \
> +            }                                                         \
> +            i += sizeof(TYPE);                                        \
> +            pg >>= sizeof(TYPE);                                      \
> +        } while (i & 15);                                             \
> +    } while (i < oprsz);                                              \
> +    for (; j < oprsz; j += sizeof(TYPE)) {                            \
> +        *(TYPE *)(vd + H(j)) = 0;                                     \
> +    }                                                                 \
>  }

Why do we have special versions of the predicated loop for 64-bit operands
elsewhere (eg DO_ZPZ vs DO_ZPZ_D, DO_ZPZZ vs DO_ZPZZ_D, etc) but we don't
need to do that for compact ?

Similar query about do-while to the previous patch.

-- PMM

Reply via email to