| Issue |
184531
|
| Summary |
[hexagon] pre-allocframe spill - incorrect codegen?
|
| Labels |
backend:Hexagon
|
| Assignees |
|
| Reporter |
androm3da
|
When building test cases for [libeigen](https://libeigen.gitlab.io/) using 21.1.8 or 22.1.0 we frequently see test failures where function prologues look wrong.
The only workaround so far seems to be `-O0`.
Abnormal codegen appearing in test cases:
```asm
; store via r16 combined w/allocframe -- r16 uninitialized
{
memw(r16+#-132) = r5 ;; BUG: R16 is uninitialized!
allocframe(r29,#256):raw ;; allocframe in same packet
}
... callee-saved register saves via r30 (correct) ...
; r16 finally set, ~40 instructions later
{
r16 = and(r30,#-128)
}
```
Example LLVM `pre_allocframe_spill_llvm_reduced.ll`:
```llvm
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon-unknown-linux-musl"
declare void @_ZNK5Eigen8internal13gemm_pack_lhsIfiNS0_22const_blas_data_mapperIfiLi0EEELi64ELi32ENS0_9HVXPacketILNS0_13HVXPacketSizeE0ELi0EEELi0ELb0ELb0EEclEPfRKS3_iiii(ptr, ptr, i32, i32)
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.smin.i32(i32, i32) #0
define void @_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE(i32 %rows, i32 %depth, ptr %lhs_, ptr %blocking, i32 %.sroa.speculated312, i32 %mul) {
entry:
%0 = alloca i8, i32 %rows, align 128
br label %invoke.cont60
invoke.cont60: ; preds = %invoke.cont74, %entry
%i2.03416 = phi i32 [ 0, %entry ], [ %mul, %invoke.cont74 ]
%add7 = or i32 %i2.03416, %.sroa.speculated312
br label %invoke.cont74
invoke.cont74: ; preds = %invoke.cont74, %invoke.cont60
%k2.0339 = phi i32 [ 0, %invoke.cont60 ], [ %add67, %invoke.cont74 ]
%add67 = add i32 %k2.0339, %depth
%.sroa.speculated298 = call i32 @llvm.smin.i32(i32 %add67, i32 0)
%sub71 = sub i32 %.sroa.speculated298, %k2.0339
store i32 %rows, ptr %lhs_, align 4
call void @_ZNK5Eigen8internal13gemm_pack_lhsIfiNS0_22const_blas_data_mapperIfiLi0EEELi64ELi32ENS0_9HVXPacketILNS0_13HVXPacketSizeE0ELi0EEELi0ELb0ELb0EEclEPfRKS3_iiii(ptr %0, ptr %blocking, i32 %sub71, i32 %add7)
%cmp63 = icmp slt i32 %add67, 0
br i1 %cmp63, label %invoke.cont74, label %invoke.cont60
}
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```
```bash
$ ./bin/llc -O2 -mtriple=hexagon-linux < ../llvm-project/pre_allocframe_spill_llvm_reduced.ll
```
```asm
.attribute 4, 68 // Tag_arch
.attribute 10, 1 // Tag_cabac
.file "<stdin>"
.text
.globl _ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE // -- Begin function _ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE
.p2align 4
.type _ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE,@function
_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE: // @_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE
.cfi_startproc
// %bb.0: // %entry
{
memw(r16+#-132) = r5
allocframe(r29,#256):raw
} // 4-byte Folded Spill
.cfi_def_cfa r30, 8
.cfi_offset r31, -4
.cfi_offset r30, -8
.cfi_offset r17, -12
.cfi_offset r16, -16
.cfi_offset r19, -20
.cfi_offset r18, -24
.cfi_offset r21, -28
.cfi_offset r20, -32
.cfi_offset r23, -36
.cfi_offset r22, -40
.cfi_offset r25, -44
.cfi_offset r24, -48
.cfi_offset r27, -52
.cfi_offset r26, -56
{
r17 = r0
r0 = add(r0,#7)
r29 = and(r29,#-128)
memd(r30+#-8) = r17:16
} // 8-byte Folded Spill
{
r6 = and(r0,#-8)
r19 = r4
memd(r30+#-32) = r23:22
memd(r30+#-16) = r19:18
} // 8-byte Folded Spill
{
r23 = sub(r29,r6)
r21:20 = combine(r2,r3)
memd(r30+#-24) = r21:20
memd(r30+#-40) = r25:24
} // 8-byte Folded Spill
{
r25 = #0
r23 = and(r23,#-128)
r0 = #0
r22 = r1
}
{
r29 = sub(r29,r6)
r16 = and(r30,#-128)
memd(r30+#-48) = r27:26
} // 8-byte Folded Spill
{
r29 = and(r29,#-128)
}
.p2align 4
.LBB0_1: // %invoke.cont60
// =>This Loop Header: Depth=1
// Child Loop BB0_2 Depth 2
{
r24 = or(r0,r19)
r27:26 = combine(r22,#0)
r18 = #0
}
.p2align 4
.LBB0_2: // %invoke.cont74
// Parent Loop BB0_1 Depth=1
// => This Inner Loop Header: Depth=2
{
r2 = min(r27,r25)
r1:0 = combine(r20,r23)
r3 = r24
r18 = add(r18,r22)
}
{
call _ZNK5Eigen8internal13gemm_pack_lhsIfiNS0_22const_blas_data_mapperIfiLi0EEELi64ELi32ENS0_9HVXPacketILNS0_13HVXPacketSizeE0ELi0EEELi0ELb0ELb0EEclEPfRKS3_iiii
r2 = add(r2,r26)
memw(r21+#0) = r17
}
{
r27 = add(r27,r22)
r26 = sub(r26,r22)
p0 = cmp.gt(r18,#-1); if (!p0.new) jump:t .LBB0_2
}
// %bb.3: // in Loop: Header=BB0_1 Depth=1
{
jump .LBB0_1
r0 = memw(r16+#-132)
} // 4-byte Folded Reload
.Lfunc_end0:
.size _ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE, .Lfunc_end0-_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE
.cfi_endproc
// -- End function
.section ".note.GNU-stack","",@progbits
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs