Issue |
148655
|
Summary |
[AArch64] Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register
|
Labels |
new issue
|
Assignees |
|
Reporter |
sjoerdmeijer
|
For a build with expensive checks enables, we are running in an error.
It is not the smallest IR reproducer, but this is what I got with llvm-reduce:
```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"
define <vscale x 2 x i32> @_Z4testyxbsaabtaaiPxPA20_hPaPA20_A20_bS1_PA20_yPA20_A20_jPyS_PA20_A20_aPA20_tS5_PA20_A20_xPA20_SF_SL_(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %step.add, <vscale x 2 x i64> %step.add.2, <vscale x 2 x i64> %step.add.3, <vscale x 2 x i1> %3, <vscale x 2 x i32> %4, <vscale x 2 x i32> %5, <vscale x 2 x ptr> %6, ptr %7, ptr %8, ptr %9, ptr %10, <vscale x 2 x i8> %11, <vscale x 2 x ptr> %12, <vscale x 2 x ptr> %13, <vscale x 2 x i64> %14, <vscale x 2 x i1> %15) #0 {
entry:
%step.add.22 = or <vscale x 2 x i64> %0, %2
%step.add.33 = or <vscale x 2 x i64> %1, %0
%16 = getelementptr [20 x [20 x i8]], ptr null, i64 0, <vscale x 2 x i64> %step.add, <vscale x 2 x i64> %step.add
%17 = getelementptr [20 x [20 x i8]], ptr null, i64 0, <vscale x 2 x i64> %step.add.2, <vscale x 2 x i64> %step.add.2
%18 = getelementptr [20 x [20 x i8]], ptr null, i64 0, <vscale x 2 x i64> %step.add.3, <vscale x 2 x i64> %step.add.33
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> splat (i16 -21429), <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
%19 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %0, i64 10, i64 10
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %19, i32 0, <vscale x 2 x i1> %3)
%wide.vec1154 = load <vscale x 4 x i64>, ptr null, align 8
%strided.vec1155 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1154)
%20 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1155, 0
%21 = trunc <vscale x 2 x i64> %20 to <vscale x 2 x i32>
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> splat (i8 -1), <vscale x 2 x ptr> %16, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> %17, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> %18, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
%22 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %step.add.22, i64 11, i64 11
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %22, i32 0, <vscale x 2 x i1> %3)
%23 = sub <vscale x 2 x i32> zeroinitializer, %4
%24 = tail call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> %21, <vscale x 2 x i32> %23)
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> %6, i32 0, <vscale x 2 x i1> %3)
%wide.vec1204 = load <vscale x 4 x i64>, ptr null, align 8
%strided.vec1205 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1204)
%25 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1205, 0
%26 = trunc <vscale x 2 x i64> %25 to <vscale x 2 x i32>
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> zeroinitializer)
%wide.vec1220 = load <vscale x 4 x i64>, ptr null, align 8
%strided.vec1221 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1220)
%27 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1221, 0
%28 = trunc <vscale x 2 x i64> %27 to <vscale x 2 x i32>
%29 = tail call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> %26, <vscale x 2 x i32> %28)
%wide.vec1226 = load <vscale x 4 x i64>, ptr %10, align 8
%strided.vec1227 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1226)
%30 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1227, 0
%wide.vec1228 = load <vscale x 4 x i64>, ptr %7, align 8
%strided.vec1229 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1228)
%31 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1229, 0
%wide.vec1230 = load <vscale x 4 x i64>, ptr %9, align 8
%strided.vec1231 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1230)
%32 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1231, 0
%wide.vec1232 = load <vscale x 4 x i64>, ptr %8, align 8
%strided.vec1233 = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %wide.vec1232)
%33 = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %strided.vec1233, 0
%34 = trunc <vscale x 2 x i64> %30 to <vscale x 2 x i8>
%35 = trunc <vscale x 2 x i64> %31 to <vscale x 2 x i8>
%36 = trunc <vscale x 2 x i64> %32 to <vscale x 2 x i8>
%37 = xor <vscale x 2 x i8> %36, %11
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %34, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %35, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %37, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> splat (i8 -1), <vscale x 2 x ptr> %13, i32 0, <vscale x 2 x i1> %3)
%38 = getelementptr [20 x [20 x i16]], ptr null, i64 0, <vscale x 2 x i64> %0, i64 14
%39 = getelementptr [20 x [20 x i16]], ptr null, i64 0, <vscale x 2 x i64> %step.add, i64 14
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %38, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %39, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> splat (i16 -21429), <vscale x 2 x ptr> %6, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
%40 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %0, i64 15, i64 15
%41 = getelementptr [20 x [20 x [20 x i16]]], ptr null, i64 0, <vscale x 2 x i64> %step.add.2, i64 15, i64 15
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %40, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %12, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %41, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> %13, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
tail call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %5, <vscale x 2 x ptr> zeroinitializer, i32 0, <vscale x 2 x i1> %3)
%rdx.minmax = tail call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> %24, <vscale x 2 x i32> %29)
ret <vscale x 2 x i32> %rdx.minmax
; uselistorder directives
uselistorder <vscale x 2 x i64> %0, { 1, 2, 3, 0, 4 }
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32 immarg, <vscale x 2 x i1>) #1
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32 immarg, <vscale x 2 x i1>) #1
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32 immarg, <vscale x 2 x i1>) #1
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>) #2
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>) #3
; uselistorder directives
uselistorder ptr @llvm.masked.scatter.nxv2i16.nxv2p0, { 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }
uselistorder ptr @llvm.masked.scatter.nxv2i8.nxv2p0, { 8, 7, 6, 5, 4, 3, 2, 1, 0 }
uselistorder ptr @llvm.vector.deinterleave2.nxv4i64, { 6, 5, 4, 3, 2, 1, 0 }
uselistorder ptr @llvm.smax.nxv2i32, { 2, 1, 0 }
attributes #0 = { "target-cpu"="grace" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(write) }
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```
This gives:
```
*** Bad machine code: Illegal virtual register for instruction ***
- function: _Z4testyxbsaabtaaiPxPA20_hPaPA20_A20_bS1_PA20_yPA20_A20_jPyS_PA20_A20_aPA20_tS5_PA20_A20_xPA20_SF_SL_
- basic block: %bb.0 entry (0xaaaaaf6bbd48) [0B;1968B)
- instruction: 808B STR_ZZXI %77:zpr2, %stack.0, 0 :: (store (s256) into %stack.0, align 16)
- operand 0: %77:zpr2
Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register
*** Bad machine code: Illegal virtual register for instruction ***
- function: _Z4testyxbsaabtaaiPxPA20_hPaPA20_A20_bS1_PA20_yPA20_A20_jPyS_PA20_A20_aPA20_tS5_PA20_A20_xPA20_SF_SL_
- basic block: %bb.0 entry (0xaaaaaf6bbd48) [0B;1968B)
- instruction: 1720B %78:zpr2 = LDR_ZZXI %stack.0, 0 :: (load (s256) from %stack.0, align 16)
- operand 0: %78:zpr2
Expected a ZPR2StridedOrContiguous register, but got a ZPR2 register
LLVM ERROR: Found 2 machine code errors.
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs