On Fri, 5 Jun 2026 at 00:52, Richard Henderson <[email protected]> wrote: > > Implement with a DO_COMPACT macro and general purpose > predicate handling. > > Signed-off-by: Richard Henderson <[email protected]> > --- > target/arm/tcg/sve_helper.c | 51 +++++++++++++++---------------------- > 1 file changed, 21 insertions(+), 30 deletions(-) > > diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c > index a13ccf4b85..40cf567b0d 100644 > --- a/target/arm/tcg/sve_helper.c > +++ b/target/arm/tcg/sve_helper.c > @@ -3637,39 +3637,30 @@ DO_TRN(sve2_trn_q, Int128, ) > #undef DO_UZP > #undef DO_TRN > > -void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc) > -{ > - intptr_t i, j, opr_sz = simd_oprsz(desc) / 4; > - uint32_t *d = vd, *n = vn; > - uint8_t *pg = vg; > - > - for (i = j = 0; i < opr_sz; i++) { > - if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) { > - d[H4(j)] = n[H4(i)]; > - j++; > - } > - } > - for (; j < opr_sz; j++) { > - d[H4(j)] = 0; > - } > +#define DO_COMPACT(NAME, TYPE, H) \ > +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ > +{ \ > + intptr_t i = 0, j = 0, oprsz = simd_oprsz(desc); \ > + do { \ > + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ > + do { \ > + if (pg & 1) { \ > + *(TYPE *)(vd + H(j)) = *(TYPE *)(vn + H(i)); \ > + j += sizeof(TYPE); \ > + } \ > + i += sizeof(TYPE); \ > + pg >>= sizeof(TYPE); \ > + } while (i & 15); \ > + } while (i < oprsz); \ > + for (; j < oprsz; j += sizeof(TYPE)) { \ > + *(TYPE *)(vd + H(j)) = 0; \ > + } \ > }
Why do we have special versions of the predicated loop for 64-bit operands elsewhere (eg DO_ZPZ vs DO_ZPZ_D, DO_ZPZZ vs DO_ZPZZ_D, etc) but we don't need to do that for compact ? Similar query about do-while to the previous patch. -- PMM
