Issue 182095
Summary AMDGPU misses dot patterns using vector reductions
Labels backend:AMDGPU, missed-optimization
Assignees
Reporter arsenm
    These functions are extracted from the fallback path without the dot intrinsics in [ockl](https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/ockl/src/dots.cl) (at the moment, this IR is including some patches to simplify the IR output). The compiler should be able to pattern match these into using the dot instructions when available. Currently none of them do.

https://godbolt.org/z/r37Kq6xEz

```
; RUN: llc -mcpu=gfx950 < %s

target triple = "amdgcn-amd-amdhsa"

define i32 @test_udot2_sat(<2 x i16> %a, <2 x i16> %b, i32 %c) {
entry:
  %conv.i = zext <2 x i16> %a to <2 x i32>
  %conv6.i = zext <2 x i16> %b to <2 x i32>
  %mul.i = mul nuw <2 x i32> %conv6.i, %conv.i
  %0 = extractelement <2 x i32> %mul.i, i64 0
  %1 = extractelement <2 x i32> %mul.i, i64 1
  %add.i = add i32 %0, %1
 %cond.i.i = tail call i32 @llvm.uadd.sat.i32(i32 %add.i, i32 %c)
  ret i32 %cond.i.i
}

define i32 @test_udot2_unsat(<2 x i16> %a, <2 x i16> %b, i32 %c) {
entry:
  %conv.i = zext <2 x i16> %a to <2 x i32>
  %conv6.i = zext <2 x i16> %b to <2 x i32>
  %mul.i = mul nuw <2 x i32> %conv6.i, %conv.i
 %0 = extractelement <2 x i32> %mul.i, i64 0
  %1 = extractelement <2 x i32> %mul.i, i64 1
  %add.i = add i32 %1, %c
  %add8.i = add i32 %add.i, %0
 ret i32 %add8.i
}

define i32 @test_sdot2_sat(<2 x i16> %a, <2 x i16> %b, i32 %c) {
entry:
  %conv.i = sext <2 x i16> %a to <2 x i32>
  %conv6.i = sext <2 x i16> %b to <2 x i32>
  %mul.i = mul nsw <2 x i32> %conv6.i, %conv.i
  %0 = extractelement <2 x i32> %mul.i, i64 0
  %1 = extractelement <2 x i32> %mul.i, i64 1
  %add.i = add nsw i32 %0, %1
  %cond1.i.i = tail call i32 @llvm.sadd.sat.i32(i32 %add.i, i32 %c)
  ret i32 %cond1.i.i
}

define i32 @test_sdot2_unsat(<2 x i16> %a, <2 x i16> %b, i32 %c) {
entry:
  %conv.i = sext <2 x i16> %a to <2 x i32>
  %conv6.i = sext <2 x i16> %b to <2 x i32>
  %mul.i = mul nsw <2 x i32> %conv6.i, %conv.i
 %0 = extractelement <2 x i32> %mul.i, i64 0
  %1 = extractelement <2 x i32> %mul.i, i64 1
  %add.i = add i32 %1, %c
  %add8.i = add i32 %add.i, %0
 ret i32 %add8.i
}

define i32 @test_udot4_sat(<4 x i8> %a, <4 x i8> %b, i32 %c) {
entry:
  %conv.i = zext <4 x i8> %a to <4 x i32>
  %conv8.i = zext <4 x i8> %b to <4 x i32>
  %mul.i = mul nuw nsw <4 x i32> %conv8.i, %conv.i
  %rdx.add.i = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul.i)
  %cond.i.i = tail call i32 @llvm.uadd.sat.i32(i32 %rdx.add.i, i32 %c)
  ret i32 %cond.i.i
}

define i32 @test_udot4_unsat(<4 x i8> %a, <4 x i8> %b, i32 %c) {
entry:
  %conv.i = zext <4 x i8> %a to <4 x i32>
 %conv8.i = zext <4 x i8> %b to <4 x i32>
  %mul.i = mul nuw nsw <4 x i32> %conv8.i, %conv.i
  %rdx.add.i = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul.i)
  %add.i = add i32 %rdx.add.i, %c
  ret i32 %add.i
}

define i32 @test_sdot4_sat(<4 x i8> %a, <4 x i8> %b, i32 %c) {
entry:
  %conv.i = sext <4 x i8> %a to <4 x i32>
  %conv8.i = sext <4 x i8> %b to <4 x i32>
  %mul.i = mul nsw <4 x i32> %conv8.i, %conv.i
  %rdx.add.i = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul.i)
  %cond1.i.i = tail call i32 @llvm.sadd.sat.i32(i32 %rdx.add.i, i32 %c)
  ret i32 %cond1.i.i
}

define i32 @test_sdot4_unsat(<4 x i8> %a, <4 x i8> %b, i32 %c) {
entry:
  %conv.i = sext <4 x i8> %a to <4 x i32>
  %conv8.i = sext <4 x i8> %b to <4 x i32>
 %mul.i = mul nsw <4 x i32> %conv8.i, %conv.i
  %rdx.add.i = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul.i)
  %add.i = add i32 %rdx.add.i, %c
  ret i32 %add.i
}

declare i32 @llvm.sadd.sat.i32(i32, i32) #0
declare i32 @llvm.uadd.sat.i32(i32, i32) #0
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1

attributes #0 = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

```


_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to