https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114814

ktkachov at gcc dot gnu.org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |ktkachov at gcc dot gnu.org
     Ever confirmed|0                           |1
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2024-06-20

--- Comment #1 from ktkachov at gcc dot gnu.org ---
Confirmed.
The SVE2 codegen isn't much better.
-O2 -mcpu=neoverse-v2 --param aarch64-autovec-preference=2

.L3:
        ld1b    z3.b, p5/z, [x0, x3]
        mov     p5.b, p13.b
        cmpeq   p14.b, p5/z, z30.b, z3.b
        mov     z26.b, p14/z, #1
        uunpklo z1.h, z26.b
        whilelo p5.b, x3, x4
        uunpklo z2.s, z1.h
        uunpkhi z26.h, z26.b
        uunpkhi z1.s, z1.h
        uunpklo z0.s, z26.h
        uunpklo z27.d, z2.s
        uunpklo z24.d, z1.s
        add     z28.d, p7/m, z28.d, z27.d
        uunpkhi z26.s, z26.h
        mov     p7.b, p15.b
        uunpklo z25.d, z0.s
        uunpklo z29.d, z26.s
        whilelo p15.d, x3, x11
        uunpkhi z2.d, z2.s
        uunpkhi z1.d, z1.s
        add     z28.d, p6/m, z28.d, z2.d
        uunpkhi z0.d, z0.s
        add     z28.d, p4/m, z28.d, z24.d
        whilelo p6.d, x3, x5
        add     z28.d, p3/m, z28.d, z1.d
        whilelo p4.d, x3, x6
        add     z28.d, p2/m, z28.d, z25.d
        whilelo p3.d, x3, x7
        add     z28.d, p1/m, z28.d, z0.d
        whilelo p2.d, x3, x8
        add     z28.d, p0/m, z28.d, z29.d
        whilelo p1.d, x3, x9
        whilelo p0.d, x3, x10
        mov     x1, x3
        uunpkhi z26.d, z26.s
        add     x3, x3, x12
        add     z28.d, p7/m, z28.d, z26.d
        whilelo p7.d, x1, x4
        b.any   .L3
        mov     p7.b, p13.b
        uaddv   d31, p7, z28.d
        fmov    x0, d31
        ret

Reply via email to