https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/92962
Fixes: SWDEV-459212 >From 5c342cbb389d32468695a925a6db3b42b09b15c4 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Tue, 21 May 2024 16:40:41 -0400 Subject: [PATCH] [AMDGPU] Clang builtin for GLOBAL_LOAD_LDS on MI3XX Fixes: SWDEV-459212 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 35 ++++++++++---------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 3e21a2fe2ac6b..efa652eee9901 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -240,6 +240,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "at TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3UiiUi", "t", "gfx940-insts") //===----------------------------------------------------------------------===// // Deep learning builtins. diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index be8048ca2459c..c6912196de5d7 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2466,23 +2466,24 @@ def int_amdgcn_perm : // GFX9 Intrinsics //===----------------------------------------------------------------------===// -class AMDGPUGlobalLoadLDS : Intrinsic < - [], - [LLVMQualPointerType<1>, // Base global pointer to load from - LLVMQualPointerType<3>, // LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 - llvm_i32_ty, // imm offset (applied to both global and LDS address) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc/sc0, - // bit 1 = slc/sc1, - // bit 2 = dlc on gfx10/gfx11)) - // bit 4 = scc/nt on gfx90a+)) - // gfx12+: - // cachepolicy (bits [0-2] = th, - // bits [3-4] = scope) - // swizzled buffer (bit 6 = swz), - [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, - ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], - "", [SDNPMemOperand]>; +class AMDGPUGlobalLoadLDS : + ClangBuiltin<"__builtin_amdgcn_global_load_lds">, + Intrinsic < + [], + [LLVMQualPointerType<1>, // Base global pointer to load from + LLVMQualPointerType<3>, // LDS base pointer to store to + llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) + llvm_i32_ty, // imm offset (applied to both global and LDS address) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc/sc0, + // bit 1 = slc/sc1, + // bit 4 = scc/nt on gfx90a+)) + // gfx12+: + // cachepolicy (bits [0-2] = th, + // bits [3-4] = scope) + // swizzled buffer (bit 6 = swz), + [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], + "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; //===----------------------------------------------------------------------===// _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits