https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/93064

>From cf074221241e4d5c83426c58f70438fb592ca7ad Mon Sep 17 00:00:00 2001
From: Shilei Tian <i...@tianshilei.me>
Date: Wed, 22 May 2024 12:36:33 -0400
Subject: [PATCH] [AMDGPU][Clang] Add check of size for
 __builtin_amdgcn_global_load_lds

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 101 +++++++++++-------
 .../builtins-amdgcn-gfx940-err.cl             |   9 ++
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  22 ++--
 3 files changed, 83 insertions(+), 49 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ba94bf89e4751..b39b9d4f0ae85 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2537,6 +2537,47 @@ static RValue 
EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
   return RValue::get(CGF->Builder.CreateCall(UBF, Args));
 }
 
+static void buildInstrinsicCallArgs(CodeGenFunction &CGF, const CallExpr *E,
+                                    unsigned BuiltinID,
+                                    Function *Callee,
+                                    SmallVectorImpl<Value *> &Args) {
+  // Find out if any arguments are required to be integer constant
+  // expressions.
+  unsigned ICEArguments = 0;
+  ASTContext::GetBuiltinTypeError Error;
+  CGF.getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+  assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
+  llvm::FunctionType *FTy = Callee->getFunctionType();
+
+  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
+    Value *ArgValue = CGF.EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
+    // If the intrinsic arg type is different from the builtin arg type
+    // we need to do a bit cast.
+    llvm::Type *PTy = FTy->getParamType(i);
+    if (PTy != ArgValue->getType()) {
+      // XXX - vector of pointers?
+      if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
+        if (PtrTy->getAddressSpace() !=
+            ArgValue->getType()->getPointerAddressSpace()) {
+          ArgValue = CGF.Builder.CreateAddrSpaceCast(
+              ArgValue, llvm::PointerType::get(CGF.getLLVMContext(),
+                                               PtrTy->getAddressSpace()));
+        }
+      }
+      // Cast vector type (e.g., v256i32) to x86_amx, this only happen
+      // in amx intrinsics.
+      if (PTy->isX86_AMXTy())
+        ArgValue =
+            CGF.Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
+                                        {ArgValue->getType()}, {ArgValue});
+      else
+        ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
+    }
+    Args.push_back(ArgValue);
+  }
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned 
BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -6024,44 +6065,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 
   if (IntrinsicID != Intrinsic::not_intrinsic) {
     SmallVector<Value*, 16> Args;
-
-    // Find out if any arguments are required to be integer constant
-    // expressions.
-    unsigned ICEArguments = 0;
-    ASTContext::GetBuiltinTypeError Error;
-    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
-    assert(Error == ASTContext::GE_None && "Should not codegen an error");
-
     Function *F = CGM.getIntrinsic(IntrinsicID);
-    llvm::FunctionType *FTy = F->getFunctionType();
-
-    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
-      Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
-      // If the intrinsic arg type is different from the builtin arg type
-      // we need to do a bit cast.
-      llvm::Type *PTy = FTy->getParamType(i);
-      if (PTy != ArgValue->getType()) {
-        // XXX - vector of pointers?
-        if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
-          if (PtrTy->getAddressSpace() !=
-              ArgValue->getType()->getPointerAddressSpace()) {
-            ArgValue = Builder.CreateAddrSpaceCast(
-                ArgValue, llvm::PointerType::get(getLLVMContext(),
-                                                 PtrTy->getAddressSpace()));
-          }
-        }
-
-        // Cast vector type (e.g., v256i32) to x86_amx, this only happen
-        // in amx intrinsics.
-        if (PTy->isX86_AMXTy())
-          ArgValue = 
Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
-                                             {ArgValue->getType()}, 
{ArgValue});
-        else
-          ArgValue = Builder.CreateBitCast(ArgValue, PTy);
-      }
-
-      Args.push_back(ArgValue);
-    }
+    buildInstrinsicCallArgs(*this, E, BuiltinID, F, Args);
 
     Value *V = Builder.CreateCall(F, Args);
     QualType BuiltinRetType = E->getType();
@@ -19040,6 +19045,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
         CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
     return Builder.CreateCall(F, {Arg});
   }
+  case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+    SmallVector<Value *, 5> Args;
+    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_global_load_lds);
+    buildInstrinsicCallArgs(*this, E, BuiltinID, F, Args);
+    constexpr const int SizeIdx = 2;
+    ConstantInt *SizeVal = dyn_cast<ConstantInt>(Args[SizeIdx]);
+    if (!SizeVal) {
+      CGM.Error(E->getExprLoc(), "size must be a constant");
+      return nullptr;
+    }
+    uint64_t Size = SizeVal->getZExtValue();
+    switch (Size) {
+    default:
+      CGM.Error(E->getExprLoc(), "size must be a 1/2/4");
+      return nullptr;
+    case 1:
+    case 2:
+    case 4:
+      break;
+    }
+    return Builder.CreateCall(F, Args);
+  }
   default:
     return nullptr;
   }
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl
new file mode 100644
index 0000000000000..96df07ebf96b6
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940-err.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown 
-target-cpu gfx940 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned int u32;
+
+void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, 
u32 size) {
+  __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); 
// expected-error{{size must be a constant}} expected-error{{cannot compile 
this builtin function yet}}
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, 
/*aux=*/0); // expected-error {{size must be a 1/2/4}} expected-error{{cannot 
compile this builtin function yet}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 0b774b724d0c0..82e3ecd268190 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2466,20 +2466,18 @@ def int_amdgcn_perm :
 // GFX9 Intrinsics
 
//===----------------------------------------------------------------------===//
 
-class AMDGPUGlobalLoadLDS :
-  ClangBuiltin<"__builtin_amdgcn_global_load_lds">,
-  Intrinsic <
-    [],
-    [LLVMQualPointerType<1>,            // Base global pointer to load from
-     LLVMQualPointerType<3>,            // LDS base pointer to store to
-     llvm_i32_ty,                       // Data byte size: 1/2/4 (/12/16 for 
gfx950)
-     llvm_i32_ty,                       // imm offset (applied to both global 
and LDS address)
-     llvm_i32_ty],                      // auxiliary data (imm, cachepolicy 
(bit 0 = sc0,
+class AMDGPUGlobalLoadLDS : Intrinsic <
+   [],
+   [LLVMQualPointerType<1>,             // Base global pointer to load from
+    LLVMQualPointerType<3>,             // LDS base pointer to store to
+    llvm_i32_ty,                        // Data byte size: 1/2/4
+    llvm_i32_ty,                        // imm offset (applied to both global 
and LDS address)
+    llvm_i32_ty],                       // auxiliary data (imm, cachepolicy 
(bit 0 = sc0,
                                         //                                   
bit 1 = sc1,
                                         //                                   
bit 4 = scc))
-    [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
-     ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, 
IntrNoCallback, IntrNoFree],
-     "", [SDNPMemOperand]>;
+   [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
+    ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, 
IntrNoCallback, IntrNoFree],
+   "", [SDNPMemOperand]>;
 def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
 
 
//===----------------------------------------------------------------------===//

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to