[llvm-bugs] [Bug 148655] [AArch64] Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register

LLVM Bugs via llvm-bugs Mon, 14 Jul 2025 08:46:42 -0700

Issue	148655
Summary	[AArch64] Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register
Labels	new issue
Assignees
Reporter	sjoerdmeijer

    For a build with expensive checks enables, we are running in an error. 
It is not the smallest IR reproducer, but this is what I got with llvm-reduce:


```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"

define <vscale x 2 x i32> @_Z4testyxbsaabtaaiPxPA20_hPaPA20_A20_bS1_PA20_yPA20_A20_jPyS_PA20_A20_aPA20_tS5_PA20_A20_xPA20_SF_SL_(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %step.add, <vscale x 2 x i64> %step.add.2, <vscale x 2 x i64> %step.add.3, <vscale x 2 x i1> %3, <vscale x 2 x i32> %4, <vscale x 2 x i32> %5, <vscale x 2 x ptr> %6, ptr %7, ptr %8, ptr %9, ptr %10, <vscale x 2 x i8> %11, <vscale x 2 x ptr> %12, <vscale x 2 x ptr> %13, <vscale x 2 x i64> %14, <vscale x 2 x i1> %15) #0 {
entry:
  %step.add.22 = or <vscale x 2 x i64> %0, %2
  %step.add.33 = or <vscale x 2 x i64> %1, %0
  %16 = getelementptr [20 x [20 x i8]], ptr null, i64 0, <vscale x 2 x i64> %step.add, <vscale x 2 x i64> %step.add
  %17 = getelementptr [20 x [20 x i8]], ptr null, i64 0, <vscale x 2 x i64> %step.add.2, <vscale x 2 x i64> %step.add.2
  %18 = getelementptr [20 x [20 x i8]], ptr null, i64 0, <vscale x 2 x i64> %step.add.3, <vscale x 2 x i64> %step.add.33
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> splat (i16 -21429), <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  %19 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %0, i64 10, i64 10
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %19, i32 0, <vscale x 2 x i1> %3)
  %wide.vec1154 = load <vscale x 4 x i64>, ptr null, align 8
  %strided.vec1155 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1154)
  %20 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1155, 0
 %21 = trunc <vscale x 2 x i64> %20 to <vscale x 2 x i32>
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> splat (i8 -1), <vscale x 2 x ptr> %16, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> %17, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> %18, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  %22 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %step.add.22, i64 11, i64 11
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %22, i32 0, <vscale x 2 x i1> %3)
  %23 = sub <vscale x 2 x i32> zeroinitializer, %4
  %24 = tail call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> %21, <vscale x 2 x i32> %23)
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> %6, i32 0, <vscale x 2 x i1> %3)
 %wide.vec1204 = load <vscale x 4 x i64>, ptr null, align 8
 %strided.vec1205 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1204)
  %25 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1205, 0
 %26 = trunc <vscale x 2 x i64> %25 to <vscale x 2 x i32>
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> zeroinitializer)
  %wide.vec1220 = load <vscale x 4 x i64>, ptr null, align 8
  %strided.vec1221 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1220)
  %27 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1221, 0
 %28 = trunc <vscale x 2 x i64> %27 to <vscale x 2 x i32>
  %29 = tail call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> %26, <vscale x 2 x i32> %28)
  %wide.vec1226 = load <vscale x 4 x i64>, ptr %10, align 8
 %strided.vec1227 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1226)
  %30 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1227, 0
 %wide.vec1228 = load <vscale x 4 x i64>, ptr %7, align 8
  %strided.vec1229 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1228)
  %31 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1229, 0
 %wide.vec1230 = load <vscale x 4 x i64>, ptr %9, align 8
  %strided.vec1231 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1230)
  %32 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1231, 0
 %wide.vec1232 = load <vscale x 4 x i64>, ptr %8, align 8
  %strided.vec1233 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1232)
  %33 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1233, 0
 %34 = trunc <vscale x 2 x i64> %30 to <vscale x 2 x i8>
  %35 = trunc <vscale x 2 x i64> %31 to <vscale x 2 x i8>
  %36 = trunc <vscale x 2 x i64> %32 to <vscale x 2 x i8>
  %37 = xor <vscale x 2 x i8> %36, %11
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %34, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %35, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %37, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> splat (i8 -1), <vscale x 2 x ptr> %13, i32 0, <vscale x 2 x i1> %3)
  %38 = getelementptr [20 x [20 x i16]], ptr null, i64 0, <vscale x 2 x i64> %0, i64 14
  %39 = getelementptr [20 x [20 x i16]], ptr null, i64 0, <vscale x 2 x i64> %step.add, i64 14
 tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %38, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %39, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> splat (i16 -21429), <vscale x 2 x ptr> %6, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  %40 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %0, i64 15, i64 15
  %41 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %step.add.2, i64 15, i64 15
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %40, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %12, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %41, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %13, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  tail call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %5, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
  %rdx.minmax = tail call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> %24, <vscale x 2 x i32> %29)
  ret <vscale x 2 x i32> %rdx.minmax

; uselistorder directives
  uselistorder <vscale x 2 x i64> %0, { 1, 2, 3, 0, 4 }
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32 immarg, <vscale x 2 x i1>) #1

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32 immarg, <vscale x 2 x i1>) #1

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32 immarg, <vscale x 2 x i1>) #1

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>) #2

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>) #3

; uselistorder directives
uselistorder ptr @llvm.masked.scatter.nxv2i16.nxv2p0, { 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }
uselistorder ptr @llvm.masked.scatter.nxv2i8.nxv2p0, { 8, 7, 6, 5, 4, 3, 2, 1, 0 }
uselistorder ptr @llvm.vector.deinterleave2.nxv4i64, { 6, 5, 4, 3, 2, 1, 0 }
uselistorder ptr @llvm.smax.nxv2i32, { 2, 1, 0 }

attributes #0 = { "target-cpu"="grace" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(write) }
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

```

This gives:

```
*** Bad machine code: Illegal virtual register for instruction ***
- function: _Z4testyxbsaabtaaiPxPA20_hPaPA20_A20_bS1_PA20_yPA20_A20_jPyS_PA20_A20_aPA20_tS5_PA20_A20_xPA20_SF_SL_
- basic block: %bb.0 entry (0xaaaaaf6bbd48) [0B;1968B)
- instruction: 808B STR_ZZXI %77:zpr2, %stack.0, 0 :: (store (s256) into %stack.0, align 16)
- operand 0:   %77:zpr2
Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register

*** Bad machine code: Illegal virtual register for instruction ***
- function: _Z4testyxbsaabtaaiPxPA20_hPaPA20_A20_bS1_PA20_yPA20_A20_jPyS_PA20_A20_aPA20_tS5_PA20_A20_xPA20_SF_SL_
- basic block: %bb.0 entry (0xaaaaaf6bbd48) [0B;1968B)
- instruction: 1720B %78:zpr2 = LDR_ZZXI %stack.0, 0 :: (load (s256) from %stack.0, align 16)
- operand 0:   %78:zpr2
Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register
LLVM ERROR: Found 2 machine code errors.
```

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 148655] [AArch64] Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register

Reply via email to