[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
llvm-ci wrote: LLVM Buildbot has detected a new failure on builder `ppc64le-mlir-rhel-clang` running on `ppc64le-mlir-rhel-test` while building `clang,llvm,mlir` at step 3 "clean-build-dir". Full details are available at: https://lab.llvm.org/buildbot/#/builders/129/builds/35561 Here is the relevant piece of the build log for the reference ``` Step 3 (clean-build-dir) failure: Delete failed. (failure) (timed out) Step 4 (cmake-configure) failure: cmake (failure) (timed out) command timed out: 1200 seconds without output running [b'cmake', b'-DLLVM_TARGETS_TO_BUILD=PowerPC', b'-DLLVM_INSTALL_UTILS=ON', b'-DCMAKE_CXX_STANDARD=17', b'-DLLVM_ENABLE_PROJECTS=mlir', b'-DLLVM_LIT_ARGS=-vj 256', b'-DCMAKE_C_COMPILER_LAUNCHER=ccache', b'-DCMAKE_CXX_COMPILER_LAUNCHER=ccache', b'-DCMAKE_BUILD_TYPE=Release', b'-DLLVM_ENABLE_ASSERTIONS=ON', b'-GNinja', b'../llvm-project/llvm'], attempting to kill process killed by signal 9 program finished with exit code -1 elapsedTime=1200.339650 ``` https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/shiltian closed https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
0xzre wrote: Hello @shiltian I think no more review incoming and PR been opened for too long. Kindly help merge this PR 🙏 , sorry if you're still in holiday/sth https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
0xzre wrote: Updated PR description https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre edited https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre edited https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/krzysz00 approved this pull request. MLIR side is good https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/shiltian approved this pull request. LLVM part LGTM. You might want someone else to look at the MLIR part. https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From b7a391c60ef7965ee047f6c54308f53dd5e404cf Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/13] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl| 4 ++--
...tins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td| 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll| 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 11 ++-
mlir/test/Target/LLVMIR/rocdl.mlir | 18 +++---
11 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 88b306462a92c..1851b6a354671 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -857,7 +857,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
0xzre wrote:
No problem, it happens 👍. I was not being confident enough to push the
discussion here lol, my bad. Reverting.
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -857,7 +857,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32") shiltian wrote: Hmm, I thought one point, your PR was adding some new builtins instead of updating existing ones. That doesn't seem like the case. https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
shiltian wrote:
When I made that comment, I thought your PR was adding new builtins. That is
why I said “new builtins”. If that is not the case, your previous handling was
fine. Could you revert `11b1f67a372a20aafc91f94ce8623db2100a1624`? Sorry for
the confusion.
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From b7a391c60ef7965ee047f6c54308f53dd5e404cf Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/15] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl| 4 ++--
...tins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td| 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll| 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 11 ++-
mlir/test/Target/LLVMIR/rocdl.mlir | 18 +++---
11 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 88b306462a92c..1851b6a354671 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -857,7 +857,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From b7a391c60ef7965ee047f6c54308f53dd5e404cf Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/14] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl| 4 ++--
...tins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td| 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll| 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 11 ++-
mlir/test/Target/LLVMIR/rocdl.mlir | 18 +++---
11 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 88b306462a92c..1851b6a354671 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -857,7 +857,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From b7a391c60ef7965ee047f6c54308f53dd5e404cf Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/13] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl| 4 ++--
...tins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td| 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll| 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 11 ++-
mlir/test/Target/LLVMIR/rocdl.mlir | 18 +++---
11 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 88b306462a92c..1851b6a354671 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -857,7 +857,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From c9d94c0002cfd640455d59f720aee2c831f69063 Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/13] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a consta
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From c9d94c0002cfd640455d59f720aee2c831f69063 Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/13] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a consta
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
shiltian wrote:
Builtins are builtins, intrinsics are intrinsics. I don't see what's the issue
here.
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/12] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a consta
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
0xzre wrote:
Forcing an 8th arg immediately would break downstream code just as @arsenm
mentioned here
https://github.com/llvm/llvm-project/pull/171069#pullrequestreview-3552446843
Maybe what we need is to make clamp required later in a transition plan 🤔
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
shiltian wrote:
I'm not sure if that's a good idea to make it an optional argument. They are
new builtins so I think we just want them to be a regular `i1` argument. In
that case, you don't need any special handling here and other places.
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32") shiltian wrote: Oh interesting. I didn't even realize that. https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Verify that the legacy WMMA IU8 intrinsic without the clamp operand is
+; upgraded by appending clamp=false.
+
+define <8 x i32> @wmma_legacy(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
+; CHECK-LABEL: @wmma_legacy(
+; CHECK-NEXT: call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1
false, <8 x i32> %a, i1 false, <8 x i32> %b, <8 x i32> %c, i1 false, i1 false,
i1 false) #1, !annotation !0
+; CHECK-NEXT: ret <8 x i32>
+ %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(
+ i1 false, <8 x i32> %a, i1 false, <8 x i32> %b, <8 x i32> %c,
+ i1 false, i1 false) #1, !annotation !0
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(
+ i1, <8 x i32>, i1, <8 x i32>, <8 x i32>, i1, i1)
+
+attributes #1 = { cold }
+
+!0 = !{!"wmma-upgrade"}
arsenm wrote:
```suggestion
attributes #0 = { "preserve-me" }
!0 = !{!"preserve-me"}
```
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Verify that the legacy WMMA IU8 intrinsic without the clamp operand is
+; upgraded by appending clamp=false.
+
+define <8 x i32> @wmma_legacy(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
+; CHECK-LABEL: @wmma_legacy(
+; CHECK-NEXT: call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1
false, <8 x i32> %a, i1 false, <8 x i32> %b, <8 x i32> %c, i1 false, i1 false,
i1 false) #1, !annotation !0
+; CHECK-NEXT: ret <8 x i32>
+ %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(
+ i1 false, <8 x i32> %a, i1 false, <8 x i32> %b, <8 x i32> %c,
+ i1 false, i1 false) #1, !annotation !0
arsenm wrote:
```suggestion
i1 false, i1 false) #0, !annotation !0
```
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -108,10 +108,16 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, false,
mod);
0xzre wrote:
fixed it, please review the implementation. thanks!
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/11] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a consta
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -108,10 +108,16 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, false,
mod);
jmmartinez wrote:
Since the llvm-intrinsic expects an immediate, yes I'd rather raise an error in
the frontend if the clamp argument is not an immediate.
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -108,10 +108,16 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, false,
mod);
0xzre wrote:
My implementation is following just like the other WMMA/SWMMAC reuse flags,
which are allowed to be non-literal. Though I can add `BuiltinConstantArg` on
`SemaAMDGPU` if needed 👍
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32") arsenm wrote: No, those correspond directly to an opcode https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -108,10 +108,16 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, false,
mod);
jmmartinez wrote:
Is expected that the test pass on this case when the clamp argument is not a
literal ?
Should we check for it on `SemaAMDGPU` ?
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 01/10] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a consta
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
github-actions[bot] wrote: # :window: Windows x64 Test Results * 128696 tests passed * 2821 tests skipped * 1 test failed ## Failed Tests (click on a test name to see its output) ### Clang Clang.CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl ``` Exit Code: 2 Command Output (stdout): -- # RUN: at line 3 c:\_work\llvm-project\llvm-project\build\bin\clang.exe -cc1 -internal-isystem C:\_work\llvm-project\llvm-project\build\lib\clang\22\include -nostdsysteminc -triple amdgcn-unknown-unknown -target-cpu gfx1250 -target-feature +wavefrontsize32 -emit-llvm -o - C:\_work\llvm-project\llvm-project\clang\test\CodeGenOpenCL\builtins-amdgcn-gfx1250-wmma-w32.cl | c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe C:\_work\llvm-project\llvm-project\clang\test\CodeGenOpenCL\builtins-amdgcn-gfx1250-wmma-w32.cl --check-prefix=CHECK-GFX1250 # executed command: 'c:\_work\llvm-project\llvm-project\build\bin\clang.exe' -cc1 -internal-isystem 'C:\_work\llvm-project\llvm-project\build\lib\clang\22\include' -nostdsysteminc -triple amdgcn-unknown-unknown -target-cpu gfx1250 -target-feature +wavefrontsize32 -emit-llvm -o - 'C:\_work\llvm-project\llvm-project\clang\test\CodeGenOpenCL\builtins-amdgcn-gfx1250-wmma-w32.cl' # .---command stderr # | Assertion failed: (i >= FTy->getNumParams() || FTy->getParamType(i) == Args[i]->getType()) && "Calling a function with a bad signature!", file C:\_work\llvm-project\llvm-project\llvm\lib\IR\Instructions.cpp, line 766 # | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script. # | Stack dump: # | 0. Program arguments: c:\\_work\\llvm-project\\llvm-project\\build\\bin\\clang.exe -cc1 -internal-isystem C:\\_work\\llvm-project\\llvm-project\\build\\lib\\clang\\22\\include -nostdsysteminc -triple amdgcn-unknown-unknown -target-cpu gfx1250 -target-feature +wavefrontsize32 -emit-llvm -o - C:\\_work\\llvm-project\\llvm-project\\clang\\test\\CodeGenOpenCL\\builtins-amdgcn-gfx1250-wmma-w32.cl # | 1. C:\_work\llvm-project\llvm-project\clang\test\CodeGenOpenCL\builtins-amdgcn-gfx1250-wmma-w32.cl:166:1: current parser token 'void' # | 2. C:\_work\llvm-project\llvm-project\clang\test\CodeGenOpenCL\builtins-amdgcn-gfx1250-wmma-w32.cl:155:6: LLVM IR generation of declaration 'test_amdgcn_wmma_i32_16x16x64_iu8' # | 3. C:\_work\llvm-project\llvm-project\clang\test\CodeGenOpenCL\builtins-amdgcn-gfx1250-wmma-w32.cl:155:6: Generating code for declaration 'test_amdgcn_wmma_i32_16x16x64_iu8' # | Exception Code: 0xC01D # | #0 0x7ff752866296 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x7e6296) # | #1 0x7ff86387bb04 (C:\Windows\System32\ucrtbase.dll+0x7bb04) # | #2 0x7ff86387cad1 (C:\Windows\System32\ucrtbase.dll+0x7cad1) # | #3 0x7ff86387e4a1 (C:\Windows\System32\ucrtbase.dll+0x7e4a1) # | #4 0x7ff86387e6e1 (C:\Windows\System32\ucrtbase.dll+0x7e6e1) # | #5 0x7ff752f32818 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0xeb2818) # | #6 0x7ff75259794c (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x51794c) # | #7 0x7ff7525973f6 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x5173f6) # | #8 0x7ff758a33b55 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x69b3b55) # | #9 0x7ff7581aaef1 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x612aef1) # | #10 0x7ff7581c6de3 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x6146de3) # | #11 0x7ff756a6385e (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x49e385e) # | #12 0x7ff75801b036 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x5f9b036) # | #13 0x7ff75802119c (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x5fa119c) # | #14 0x7ff7580128a1 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x5f928a1) # | #15 0x7ff758003503 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x5f83503) # | #16 0x7ff756a34a0c (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x49b4a0c) # | #17 0x7ff756a34911 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x49b4911) # | #18 0x7ff756ce52c0 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x4c652c0) # | #19 0x7ff756cf3d85 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x4c73d85) # | #20 0x7ff7569fe9d5 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x497e9d5) # | #21 0x7ff75476bc34 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x26ebc34) # | #22 0x7ff754761eff (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x26e1eff) # | #23 0x7ff75476813a (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x26e813a) # | #24 0x7ff754760794 (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x26e0794) # | #25 0x7ff757935a8b (c:\_work\llvm-project\llvm-project\build\bin\clang.exe+0x58b5a8b) # | #26 0x
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 1/9] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 1/8] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32") 0xzre wrote: Btw are these what you're referring to? > BUILTIN(__builtin_amdgcn_rsq_clamp, "dd", "nc") BUILTIN(__builtin_amdgcn_rsq_clampf, "ff", "nc") https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32") 0xzre wrote: Thanks for the suggestion! AFAIK introducing it would duplicate surface area and require extra maintenance, without adding new capability. I think we should use the current intrinsic unless there's strong use case 🤔 https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") -TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32") shiltian wrote: Alternatively, you can add a builtin `__builtin_amdgcn_wmma_i32_16x16x64_iu8_clamp` as well as a corresponding intrinsic `llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32.clamp` dedicated for the case where clamp is enabled. I saw some other code like that. https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
+Args.push_back(Builder.getFalse());
+
+ auto ToBool = [&](Value *V) {
+if (V->getType()->isIntegerTy(1))
+ return V;
+return Builder.CreateIntCast(V, Builder.getInt1Ty(), false);
0xzre wrote:
Understood, was worried if something upstream left a wider integer.
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -4504,6 +4511,21 @@ static Value *upgradeARMIntrinsicCall(StringRef Name,
CallBase *CI, Function *F,
//
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
+ if (Name.starts_with("wmma.i32.16x16x64.iu8")) {
+// Legacy WMMA IU8 intrinsic lacked the optional clamp operand. Append
+// clamp=false for compatibility.
+if (CI->arg_size() != 7)
+ return nullptr;
+
+SmallVector Args(CI->args().begin(), CI->args().end());
+Args.push_back(Builder.getFalse());
+
+Function *NewDecl = Intrinsic::getOrInsertDeclaration(
+ F->getParent(), Intrinsic::amdgcn_wmma_i32_16x16x64_iu8,
+ {CI->getArgOperand(4)->getType(), CI->getArgOperand(1)->getType()});
+return Builder.CreateCall(NewDecl, Args);
0xzre wrote:
the operand list can’t be expanded in place based on `CallBase`. Now will copy
everything else so behavior is preserved.
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -4504,6 +4511,21 @@ static Value *upgradeARMIntrinsicCall(StringRef Name,
CallBase *CI, Function *F,
//
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
+ if (Name.starts_with("wmma.i32.16x16x64.iu8")) {
arsenm wrote:
Can you make this check based on intrinsic ID? The intrinsic name didn't change
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -4504,6 +4511,21 @@ static Value *upgradeARMIntrinsicCall(StringRef Name,
CallBase *CI, Function *F,
//
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
+ if (Name.starts_with("wmma.i32.16x16x64.iu8")) {
+// Legacy WMMA IU8 intrinsic lacked the optional clamp operand. Append
+// clamp=false for compatibility.
+if (CI->arg_size() != 7)
arsenm wrote:
Can move the arg size check above the name check
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Verify that the legacy WMMA IU8 intrinsic without the clamp operand is
+; upgraded by appending clamp=false.
+
+define <8 x i32> @wmma_legacy(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
+; CHECK-LABEL: @wmma_legacy(
+; CHECK: call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1
false, <8 x i32> %a, i1 false, <8 x i32> %b, <8 x i32> %c, i1 false, i1 false,
i1 false)
+; CHECK: ret <8 x i32>
arsenm wrote:
CHECK-NEXTs
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
+Args.push_back(Builder.getFalse());
+
+ auto ToBool = [&](Value *V) {
+if (V->getType()->isIntegerTy(1))
+ return V;
+return Builder.CreateIntCast(V, Builder.getInt1Ty(), false);
+ };
+
+ // Ensure predicate-like operands are i1 to match intrinsic signature.
+ Args[0] = ToBool(Args[0]);
+ Args[2] = ToBool(Args[2]);
+ Args[5] = ToBool(Args[5]);
+ Args[6] = ToBool(Args[6]);
+ Args[7] = ToBool(Args[7]);
arsenm wrote:
Shouldn't need to do anything to these arguments
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1219,6 +1220,12 @@ static bool upgradeIntrinsicFunction1(Function *F,
Function *&NewFn,
break; // No other 'amdgcn.atomic.*'
}
+ if (Name.starts_with("wmma.i32.16x16x64.iu8") && F->arg_size() == 7) {
arsenm wrote:
Swap checks (but also since this wasn't a removed intrinsic, I would expect you
can still use intrinsic ID checks)
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -4504,6 +4511,21 @@ static Value *upgradeARMIntrinsicCall(StringRef Name,
CallBase *CI, Function *F,
//
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
+ if (Name.starts_with("wmma.i32.16x16x64.iu8")) {
+// Legacy WMMA IU8 intrinsic lacked the optional clamp operand. Append
+// clamp=false for compatibility.
+if (CI->arg_size() != 7)
+ return nullptr;
+
+SmallVector Args(CI->args().begin(), CI->args().end());
+Args.push_back(Builder.getFalse());
+
+Function *NewDecl = Intrinsic::getOrInsertDeclaration(
+ F->getParent(), Intrinsic::amdgcn_wmma_i32_16x16x64_iu8,
+ {CI->getArgOperand(4)->getType(), CI->getArgOperand(1)->getType()});
+return Builder.CreateCall(NewDecl, Args);
arsenm wrote:
Do you really need to create a new call, and not mutate the one in place? As it
is this is losing callsite attributes and metadata
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -1665,6 +1665,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
+if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
+ if (Args.size() == 7)
+Args.push_back(Builder.getFalse());
+
+ auto ToBool = [&](Value *V) {
+if (V->getType()->isIntegerTy(1))
+ return V;
+return Builder.CreateIntCast(V, Builder.getInt1Ty(), false);
arsenm wrote:
This shouldn't require casting here, this should have been an implicit cast in
the AST to start with
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Verify that the legacy WMMA IU8 intrinsic without the clamp operand is
+; upgraded by appending clamp=false.
+
+define <8 x i32> @wmma_legacy(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
+; CHECK-LABEL: @wmma_legacy(
+; CHECK: call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1
false, <8 x i32> %a, i1 false, <8 x i32> %b, <8 x i32> %c, i1 false, i1 false,
i1 false)
+; CHECK: ret <8 x i32>
+ %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(
+ i1 false, <8 x i32> %a, i1 false, <8 x i32> %b, <8 x i32> %c,
+ i1 false, i1 false)
arsenm wrote:
Test with metadata attachment, and a callsite attribute
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 1/7] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
github-actions[bot] wrote:
:warning: C/C++ code formatter, clang-format found issues in your code.
:warning:
You can test this locally with the following command:
``bash
git-clang-format --diff origin/main HEAD --extensions cpp,cl --
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp clang/lib/Sema/SemaAMDGPU.cpp
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
llvm/lib/IR/AutoUpgrade.cpp --diff_from_common_commit
``
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
View the diff from clang-format here.
``diff
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index acb9df20b..7fbfdb722 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -32,8 +32,8 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
@@ -4612,8 +4612,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name,
CallBase *CI,
Args.push_back(Builder.getFalse());
Function *NewDecl = Intrinsic::getOrInsertDeclaration(
- F->getParent(), Intrinsic::amdgcn_wmma_i32_16x16x64_iu8,
- {CI->getArgOperand(4)->getType(), CI->getArgOperand(1)->getType()});
+F->getParent(), Intrinsic::amdgcn_wmma_i32_16x16x64_iu8,
+{CI->getArgOperand(4)->getType(), CI->getArgOperand(1)->getType()});
return Builder.CreateCall(NewDecl, Args);
}
``
https://github.com/llvm/llvm-project/pull/171069
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 1/6] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
krzysz00 wrote: > Not yet, where's the preferred doc location? 🤔 Well, if it's listed in the public bits of the ISA manual, it's probably fine, on further thought Otherwise ... that's an annoyingly hard question. My intuitions say AMDGPUUsage https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
0xzre wrote: > Documentation somewhere of what `clamp` does? Not yet, where's the preferred doc location? 🤔 https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
0xzre wrote: > This can't just break existing uses. Needs to implement bitcode autoupgrade, > and some compatibility on the builtin signature. > > Also, can we just infer clamping as an optimization fold based on the use > context? Good catch. Will add a bitcode autupgrade for the old 4 op WMMA IU8 intrinsic to append clamp=0, plus a lit bitcode test. I'll keep source compatibility by keeping the existing builtin spelling mapped to clamp=0 and adding the 5 op form. Clamp inference can be a follow-up combine I think... this patch stays on correctness/compat. Would that be okay @arsenm ? https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/krzysz00 commented: Documentation somewhere of what `clamp` does? https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/arsenm commented: This can't just break existing uses. Needs to implement bitcode autoupgrade, and some compatibility on the builtin signature. Also, can we just infer clamping as an optimization fold based on the use context? https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
0xzre wrote: hi @shiltian , requesting again help to review. thanks! https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 1/4] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
@@ -483,14 +483,19 @@ v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] // GFX1250: v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] ; encoding: [0x10,0x00,0x72,0xcc,0x00,0x11,0x42,0x5c] // WAVESIZE-ERR: :[[@LINE-3]]:1: error: instruction requires wavesize=32 -v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] matrix_a_reuse +v_wmma_i32_16x16x64_iu8 v[16:23], v[0:7], v[8:15], v[16:23] clamp shiltian wrote: add the corresponding dasm test as well https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre updated
https://github.com/llvm/llvm-project/pull/171069
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH 1/2] [AMDGPU] add clamp immediate operand to WMMA iu8
intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); //
expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/shiltian commented: The LLVM changes look good to me. https://github.com/llvm/llvm-project/pull/171069 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
github-actions[bot] wrote: # :penguin: Linux x64 Test Results * 166853 tests passed * 2914 tests skipped * 4 tests failed ## Failed Tests (click on a test name to see its output) ### LLVM LLVM.CodeGen/AMDGPU/wmma-coececution-valu-hazards.mir ``` Exit Code: 2 Command Output (stdout): -- # RUN: at line 2 /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/wmma-coececution-valu-hazards.mir -o - | /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck -check-prefix=GFX1250 /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/wmma-coececution-valu-hazards.mir # executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/wmma-coececution-valu-hazards.mir -o - # .---command stderr # | # | # Machine code for function test_wmma_I32_16x16x64_IU8_D0_overlaps_Use1: IsSSA, NoPHIs, NoVRegs, TiedOpsRewritten # | # | bb.0: # | early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23(tied-def 0), 0, 0, 0, implicit $exec # | $vgpr32 = V_ADD_F32_e32 $vgpr16, $vgpr33, implicit $mode, implicit $exec # | # | # End machine code for function test_wmma_I32_16x16x64_IU8_D0_overlaps_Use1. # | # | *** Bad machine code: Expected immediate, but got non-immediate *** # | - function:test_wmma_I32_16x16x64_IU8_D0_overlaps_Use1 # | - basic block: %bb.0 (0x2e6bb238) # | - instruction: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23(tied-def 0), 0, 0, 0, implicit $exec # | # | *** Bad machine code: Explicit operand marked as implicit *** # | - function:test_wmma_I32_16x16x64_IU8_D0_overlaps_Use1 # | - basic block: %bb.0 (0x2e6bb238) # | - instruction: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23(tied-def 0), 0, 0, 0, implicit $exec # | - operand 9: implicit $exec # | # | *** Bad machine code: Expected a non-register operand. *** # | - function:test_wmma_I32_16x16x64_IU8_D0_overlaps_Use1 # | - basic block: %bb.0 (0x2e6bb238) # | - instruction: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23(tied-def 0), 0, 0, 0, implicit $exec # | - operand 9: implicit $exec # | LLVM ERROR: Found 3 machine code errors. # | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug. # | Stack dump: # | 0. Program arguments: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/wmma-coececution-valu-hazards.mir -o - # | #0 0x07f40428 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Support/Unix/Signals.inc:834:13 # | #1 0x07f3db35 llvm::sys::RunSignalHandlers() /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Support/Signals.cpp:105:18 # | #2 0x07f411f1 SignalHandler(int, siginfo_t*, void*) /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Support/Unix/Signals.inc:426:38 # | #3 0x7cf334a3a330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330) # | #4 0x7cf334a93b2c pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x9eb2c) # | #5 0x7cf334a3a27e raise (/lib/x86_64-linux-gnu/libc.so.6+0x4527e) # | #6 0x7cf334a1d8ff abort (/lib/x86_64-linux-gnu/libc.so.6+0x288ff) # | #7 0x07ea4965 llvm::report_fatal_error(llvm::Twine const&, bool) /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Support/ErrorHandling.cpp:137:5 # | #8 0x0706e34b (/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc+0x706e34b) #
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
llvmbot wrote:
@llvm/pr-subscribers-llvm-ir
Author: Muhammad Abdul (0xzre)
Changes
Fixes #166989
- Adds a clamp immediate operand to the AMDGPU WMMA iu8 intrinsic and threads
it through LLVM IR, MIR lowering, Clang builtins/tests, and MLIR ROCDL dialect
so all layers agree on the new operand
- Updates AMDGPUWmmaIntrinsicModsAB so the clamp attribute is emitted, teaches
VOP3P encoding to accept the immediate, and adjusts Clang codegen/builtin
headers plus MLIR op definitions and tests to match
---
Patch is 21.73 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/171069.diff
11 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1-1)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
(+2-2)
- (modified)
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl (+5-4)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+2-1)
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+3-1)
- (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll
(+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll
(+3-3)
- (modified) mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td (+3-2)
- (modified) mlir/test/Target/LLVMIR/rocdl.mlir (+10-6)
``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32
[clang] [llvm] [mlir] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic (PR #171069)
https://github.com/0xzre created
https://github.com/llvm/llvm-project/pull/171069
Fixes #166989
- Adds a clamp immediate operand to the AMDGPU WMMA iu8 intrinsic and threads
it through LLVM IR, MIR lowering, Clang builtins/tests, and MLIR ROCDL dialect
so all layers agree on the new operand
- Updates AMDGPUWmmaIntrinsicModsAB so the clamp attribute is emitted, teaches
VOP3P encoding to accept the immediate, and adjusts Clang codegen/builtin
headers plus MLIR op definitions and tests to match
>From df40111bf084022085b4facd555c03fd42c2827d Mon Sep 17 00:00:00 2001
From: 0xzre
Date: Mon, 8 Dec 2025 07:32:45 +0700
Subject: [PATCH] [AMDGPU] add clamp immediate operand to WMMA iu8 intrinsic
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
.../builtins-amdgcn-gfx1250-wmma-w32.cl | 4 ++--
...iltins-amdgcn-error-gfx1250-wmma-w32-param.cl | 9 +
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +++-
.../UniformityAnalysis/AMDGPU/intrinsics.ll | 6 +++---
.../AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll | 8
.../AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll | 6 +++---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 5 +++--
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++--
11 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 8af6ce1528a45..ebdac12ce107b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -856,7 +856,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8,
"V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8,
"V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb",
"nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8,
"V8iIbV8iIbV8iV8iIbIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8,
"V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index bdb1a7f0bb32f..41c2eb2155b89 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,13 +148,13 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
+// CHECK-GFX1250-NEXT:[[TMP0:%.*]] = tail call <8 x i32>
@llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]],
i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
// CHECK-GFX1250-NEXT:store <8 x i32> [[TMP0]], ptr addrspace(1)
[[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1250-NEXT:ret void
//
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
+ *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true,
false);
}
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
diff --git
a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 49ef2e571740c..8821524fde2db 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -108,10 +108,11 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h*
out, v8i a, v8i b, v8h c,
void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c,
int mod)
{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false,
false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a
constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false,
false);
