Issue 184531
Summary [hexagon] pre-allocframe spill - incorrect codegen?
Labels backend:Hexagon
Assignees
Reporter androm3da
    When building test cases for [libeigen](https://libeigen.gitlab.io/) using 21.1.8 or 22.1.0 we frequently see test failures where function prologues look wrong.

The only workaround so far seems to be `-O0`.

Abnormal codegen appearing in test cases:

```asm
; store via r16 combined w/allocframe -- r16 uninitialized
{
    memw(r16+#-132) = r5       ;; BUG: R16 is uninitialized!
    allocframe(r29,#256):raw   ;; allocframe in same packet
}

... callee-saved register saves via r30 (correct) ...

; r16 finally set, ~40 instructions later
{
    r16 = and(r30,#-128)
}
```

Example LLVM `pre_allocframe_spill_llvm_reduced.ll`:

```llvm
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon-unknown-linux-musl"

declare void @_ZNK5Eigen8internal13gemm_pack_lhsIfiNS0_22const_blas_data_mapperIfiLi0EEELi64ELi32ENS0_9HVXPacketILNS0_13HVXPacketSizeE0ELi0EEELi0ELb0ELb0EEclEPfRKS3_iiii(ptr, ptr, i32, i32)

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.smin.i32(i32, i32) #0

define void @_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE(i32 %rows, i32 %depth, ptr %lhs_, ptr %blocking, i32 %.sroa.speculated312, i32 %mul) {
entry:
  %0 = alloca i8, i32 %rows, align 128
  br label %invoke.cont60

invoke.cont60:                                    ; preds = %invoke.cont74, %entry
  %i2.03416 = phi i32 [ 0, %entry ], [ %mul, %invoke.cont74 ]
  %add7 = or i32 %i2.03416, %.sroa.speculated312
  br label %invoke.cont74

invoke.cont74:                                    ; preds = %invoke.cont74, %invoke.cont60
  %k2.0339 = phi i32 [ 0, %invoke.cont60 ], [ %add67, %invoke.cont74 ]
  %add67 = add i32 %k2.0339, %depth
  %.sroa.speculated298 = call i32 @llvm.smin.i32(i32 %add67, i32 0)
 %sub71 = sub i32 %.sroa.speculated298, %k2.0339
  store i32 %rows, ptr %lhs_, align 4
  call void @_ZNK5Eigen8internal13gemm_pack_lhsIfiNS0_22const_blas_data_mapperIfiLi0EEELi64ELi32ENS0_9HVXPacketILNS0_13HVXPacketSizeE0ELi0EEELi0ELb0ELb0EEclEPfRKS3_iiii(ptr %0, ptr %blocking, i32 %sub71, i32 %add7)
  %cmp63 = icmp slt i32 %add67, 0
  br i1 %cmp63, label %invoke.cont74, label %invoke.cont60
}

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

```

```bash
$ ./bin/llc  -O2 -mtriple=hexagon-linux < ../llvm-project/pre_allocframe_spill_llvm_reduced.ll 
```

```asm
	.attribute	4, 68	// Tag_arch
	.attribute	10, 1	// Tag_cabac
	.file	"<stdin>"
	.text
	.globl	_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE // -- Begin function _ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE
	.p2align	4
	.type	_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE,@function
_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE: // @_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE
	.cfi_startproc
// %bb.0:                               // %entry
	{
		memw(r16+#-132) = r5
		allocframe(r29,#256):raw
	} // 4-byte Folded Spill
	.cfi_def_cfa r30, 8
	.cfi_offset r31, -4
	.cfi_offset r30, -8
	.cfi_offset r17, -12
	.cfi_offset r16, -16
	.cfi_offset r19, -20
	.cfi_offset r18, -24
	.cfi_offset r21, -28
	.cfi_offset r20, -32
	.cfi_offset r23, -36
	.cfi_offset r22, -40
	.cfi_offset r25, -44
	.cfi_offset r24, -48
	.cfi_offset r27, -52
	.cfi_offset r26, -56
	{
		r17 = r0
		r0 = add(r0,#7)
		r29 = and(r29,#-128)
		memd(r30+#-8) = r17:16
	} // 8-byte Folded Spill
	{
		r6 = and(r0,#-8)
		r19 = r4
		memd(r30+#-32) = r23:22
		memd(r30+#-16) = r19:18
	} // 8-byte Folded Spill
	{
		r23 = sub(r29,r6)
		r21:20 = combine(r2,r3)
		memd(r30+#-24) = r21:20
		memd(r30+#-40) = r25:24
	} // 8-byte Folded Spill
	{
		r25 = #0
		r23 = and(r23,#-128)
		r0 = #0
		r22 = r1
	}
	{
		r29 = sub(r29,r6)
		r16 = and(r30,#-128)
		memd(r30+#-48) = r27:26
	} // 8-byte Folded Spill
	{
		r29 = and(r29,#-128)
	}
	.p2align	4
.LBB0_1: // %invoke.cont60
                                        // =>This Loop Header: Depth=1
                                        //     Child Loop BB0_2 Depth 2
	{
		r24 = or(r0,r19)
		r27:26 = combine(r22,#0)
		r18 = #0
	}
	.p2align	4
.LBB0_2: // %invoke.cont74
                                        // Parent Loop BB0_1 Depth=1
                                        // => This Inner Loop Header: Depth=2
	{
		r2 = min(r27,r25)
		r1:0 = combine(r20,r23)
		r3 = r24
		r18 = add(r18,r22)
	}
	{
		call _ZNK5Eigen8internal13gemm_pack_lhsIfiNS0_22const_blas_data_mapperIfiLi0EEELi64ELi32ENS0_9HVXPacketILNS0_13HVXPacketSizeE0ELi0EEELi0ELb0ELb0EEclEPfRKS3_iiii
		r2 = add(r2,r26)
		memw(r21+#0) = r17
	}
	{
		r27 = add(r27,r22)
		r26 = sub(r26,r22)
		p0 = cmp.gt(r18,#-1); if (!p0.new) jump:t .LBB0_2
	}
// %bb.3:                               //   in Loop: Header=BB0_1 Depth=1
	{
		jump .LBB0_1
		r0 = memw(r16+#-132)
	} // 4-byte Folded Reload
.Lfunc_end0:
	.size	_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE, .Lfunc_end0-_ZN5Eigen8internal29general_matrix_matrix_productIifLi0ELb0EfLi0ELb0ELi0ELi1EE3runEiiiPKfiS4_iPfiifRNS0_15level3_blockingIffEEPNS0_16GemmParallelInfoIiEE
	.cfi_endproc
 // -- End function
	.section	".note.GNU-stack","",@progbits
```


_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to