[llvm-bugs] [Bug 181490] [AARCH64] manual widening adjacent arithmetic not recognized

LLVM Bugs via llvm-bugs Sat, 14 Feb 2026 09:04:29 -0800

Issue	181490
Summary	[AARCH64] manual widening adjacent arithmetic not recognized
Labels	new issue
Assignees
Reporter	folkertdev

    I'd expect all three to optimize to the one instruction

https://godbolt.org/z/ETqGa8Ynh


```llvm
define <8 x i16> @vpaddlq_u8_v1(<16 x i8> %a) unnamed_addr {
start:
  %_0 = tail call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a)
  ret <8 x i16> %_0
}

define <8 x i16> @vpaddlq_u8_v2_widen_shuffle_add(<16 x i8> %a) unnamed_addr {
start:
  %0 = zext <16 x i8> %a to <16 x i16>
  %1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %3 = add nuw nsw <8 x i16> %1, %2
  ret <8 x i16> %3
}

define range(i16 0, 511) <8 x i16> @vpaddlq_u8_v3_shuffle_widen_add(<16 x i8> %a) unnamed_addr {
start:
  %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %2 = zext <8 x i8> %0 to <8 x i16>
  %3 = zext <8 x i8> %1 to <8 x i16>
  %4 = add nuw nsw <8 x i16> %2, %3
  ret <8 x i16> %4
}
```

instead we get 

```asm
vpaddlq_u8_v1:
        uaddlp v0.8h, v0.16b
        ret

vpaddlq_u8_v2_widen_shuffle_add:
        mov v1.16b, v0.16b
        fmov    d0, d1
        mov     d2, v1.d[1]
 ushll   v0.8h, v0.8b, #0
        ushll2  v1.8h, v1.16b, #0
        addp v0.8h, v0.8h, v1.8h
        ret

vpaddlq_u8_v3_shuffle_widen_add:
 mov     v1.16b, v0.16b
        uzp1    v3.16b, v1.16b, v2.16b
        fmov d0, d3
        mov     d3, v3.d[1]
        uzp2    v2.16b, v1.16b, v2.16b
        fmov    d1, d2
        mov     d2, v2.d[1]
        uaddl v0.8h, v0.8b, v1.8b
        ret
```

I believe the same thing happens for many other (arithmetic) operations, so ideally there is some general solution.

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 181490] [AARCH64] manual widening adjacent arithmetic not recognized

Reply via email to