https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/93064
>From cf074221241e4d5c83426c58f70438fb592ca7ad Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Wed, 22 May 2024 12:36:33 -0400 Subject: [PATCH] [AMDGPU][Clang] Add check of size for __builtin_amdgcn_global_load_lds --- clang/lib/CodeGen/CGBuiltin.cpp | 101 +++++++++++------- .../builtins-amdgcn-gfx940-err.cl | 9 ++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 22 ++-- 3 files changed, 83 insertions(+), 49 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ba94bf89e4751..b39b9d4f0ae85 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2537,6 +2537,47 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, return RValue::get(CGF->Builder.CreateCall(UBF, Args)); } +static void buildInstrinsicCallArgs(CodeGenFunction &CGF, const CallExpr *E, + unsigned BuiltinID, + Function *Callee, + SmallVectorImpl<Value *> &Args) { + // Find out if any arguments are required to be integer constant + // expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + CGF.getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + + llvm::FunctionType *FTy = Callee->getFunctionType(); + + for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { + Value *ArgValue = CGF.EmitScalarOrConstFoldImmArg(ICEArguments, i, E); + // If the intrinsic arg type is different from the builtin arg type + // we need to do a bit cast. + llvm::Type *PTy = FTy->getParamType(i); + if (PTy != ArgValue->getType()) { + // XXX - vector of pointers? + if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) { + if (PtrTy->getAddressSpace() != + ArgValue->getType()->getPointerAddressSpace()) { + ArgValue = CGF.Builder.CreateAddrSpaceCast( + ArgValue, llvm::PointerType::get(CGF.getLLVMContext(), + PtrTy->getAddressSpace())); + } + } + // Cast vector type (e.g., v256i32) to x86_amx, this only happen + // in amx intrinsics. + if (PTy->isX86_AMXTy()) + ArgValue = + CGF.Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile, + {ArgValue->getType()}, {ArgValue}); + else + ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy); + } + Args.push_back(ArgValue); + } +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -6024,44 +6065,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (IntrinsicID != Intrinsic::not_intrinsic) { SmallVector<Value*, 16> Args; - - // Find out if any arguments are required to be integer constant - // expressions. - unsigned ICEArguments = 0; - ASTContext::GetBuiltinTypeError Error; - getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); - assert(Error == ASTContext::GE_None && "Should not codegen an error"); - Function *F = CGM.getIntrinsic(IntrinsicID); - llvm::FunctionType *FTy = F->getFunctionType(); - - for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { - Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E); - // If the intrinsic arg type is different from the builtin arg type - // we need to do a bit cast. - llvm::Type *PTy = FTy->getParamType(i); - if (PTy != ArgValue->getType()) { - // XXX - vector of pointers? - if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) { - if (PtrTy->getAddressSpace() != - ArgValue->getType()->getPointerAddressSpace()) { - ArgValue = Builder.CreateAddrSpaceCast( - ArgValue, llvm::PointerType::get(getLLVMContext(), - PtrTy->getAddressSpace())); - } - } - - // Cast vector type (e.g., v256i32) to x86_amx, this only happen - // in amx intrinsics. - if (PTy->isX86_AMXTy()) - ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile, - {ArgValue->getType()}, {ArgValue}); - else - ArgValue = Builder.CreateBitCast(ArgValue, PTy); - } - - Args.push_back(ArgValue); - } + buildInstrinsicCallArgs(*this, E, BuiltinID, F, Args); Value *V = Builder.CreateCall(F, Args); QualType BuiltinRetType = E->getType(); @@ -19040,6 +19045,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); return Builder.CreateCall(F, {Arg}); } + case AMDGPU::BI__builtin_amdgcn_global_load_lds: { + SmallVector<Value *, 5> Args; + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_global_load_lds); + buildInstrinsicCallArgs(*this, E, BuiltinID, F, Args); + constexpr const int SizeIdx = 2; + ConstantInt *SizeVal = dyn_cast<ConstantInt>(Args[SizeIdx]); + if (!SizeVal) { + CGM.Error(E->getExprLoc(), "size must be a constant"); + return nullptr; + } + uint64_t Size = SizeVal->getZExtValue(); + switch (Size) { + default: + CGM.Error(E->getExprLoc(), "size must be a 1/2/4"); + return nullptr; + case 1: + case 2: + case 4: + break; + } + return Builder.CreateCall(F, Args); + } default: return nullptr; } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl new file mode 100644 index 0000000000000..96df07ebf96b6 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +typedef unsigned int u32; + +void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 size) { + __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{size must be a constant}} expected-error{{cannot compile this builtin function yet}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error {{size must be a 1/2/4}} expected-error{{cannot compile this builtin function yet}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 0b774b724d0c0..82e3ecd268190 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2466,20 +2466,18 @@ def int_amdgcn_perm : // GFX9 Intrinsics //===----------------------------------------------------------------------===// -class AMDGPUGlobalLoadLDS : - ClangBuiltin<"__builtin_amdgcn_global_load_lds">, - Intrinsic < - [], - [LLVMQualPointerType<1>, // Base global pointer to load from - LLVMQualPointerType<3>, // LDS base pointer to store to - llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950) - llvm_i32_ty, // imm offset (applied to both global and LDS address) - llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, +class AMDGPUGlobalLoadLDS : Intrinsic < + [], + [LLVMQualPointerType<1>, // Base global pointer to load from + LLVMQualPointerType<3>, // LDS base pointer to store to + llvm_i32_ty, // Data byte size: 1/2/4 + llvm_i32_ty, // imm offset (applied to both global and LDS address) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0, // bit 1 = sc1, // bit 4 = scc)) - [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, - ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], - "", [SDNPMemOperand]>; + [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], + "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; //===----------------------------------------------------------------------===// _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits