https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114814
ktkachov at gcc dot gnu.org changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |ktkachov at gcc dot gnu.org Ever confirmed|0 |1 Status|UNCONFIRMED |NEW Last reconfirmed| |2024-06-20 --- Comment #1 from ktkachov at gcc dot gnu.org --- Confirmed. The SVE2 codegen isn't much better. -O2 -mcpu=neoverse-v2 --param aarch64-autovec-preference=2 .L3: ld1b z3.b, p5/z, [x0, x3] mov p5.b, p13.b cmpeq p14.b, p5/z, z30.b, z3.b mov z26.b, p14/z, #1 uunpklo z1.h, z26.b whilelo p5.b, x3, x4 uunpklo z2.s, z1.h uunpkhi z26.h, z26.b uunpkhi z1.s, z1.h uunpklo z0.s, z26.h uunpklo z27.d, z2.s uunpklo z24.d, z1.s add z28.d, p7/m, z28.d, z27.d uunpkhi z26.s, z26.h mov p7.b, p15.b uunpklo z25.d, z0.s uunpklo z29.d, z26.s whilelo p15.d, x3, x11 uunpkhi z2.d, z2.s uunpkhi z1.d, z1.s add z28.d, p6/m, z28.d, z2.d uunpkhi z0.d, z0.s add z28.d, p4/m, z28.d, z24.d whilelo p6.d, x3, x5 add z28.d, p3/m, z28.d, z1.d whilelo p4.d, x3, x6 add z28.d, p2/m, z28.d, z25.d whilelo p3.d, x3, x7 add z28.d, p1/m, z28.d, z0.d whilelo p2.d, x3, x8 add z28.d, p0/m, z28.d, z29.d whilelo p1.d, x3, x9 whilelo p0.d, x3, x10 mov x1, x3 uunpkhi z26.d, z26.s add x3, x3, x12 add z28.d, p7/m, z28.d, z26.d whilelo p7.d, x1, x4 b.any .L3 mov p7.b, p13.b uaddv d31, p7, z28.d fmov x0, d31 ret