arsenm created this revision.
arsenm added reviewers: sstefan1, jdoerfert, yaxunl, AMDGPU, nikic, 
alexander-shaposhnikov.
Herald added subscribers: kosarev, StephenFan, kerbowa, jvesely.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.

Also increases the alignment of llvm.amdgcn.implicitarg.ptr to 8 to
match clang.


https://reviews.llvm.org/D142823

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  clang/test/CodeGenCUDA/builtins-amdgcn.cu
  clang/test/CodeGenOpenCL/builtins-amdgcn.cl
  llvm/include/llvm/IR/Intrinsics.td
  llvm/include/llvm/IR/IntrinsicsAMDGPU.td
  llvm/utils/TableGen/CodeGenIntrinsics.h
  llvm/utils/TableGen/CodeGenTarget.cpp
  llvm/utils/TableGen/IntrinsicEmitter.cpp

Index: llvm/utils/TableGen/IntrinsicEmitter.cpp
===================================================================
--- llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -726,6 +726,10 @@
           OS << "      Attribute::get(C, Attribute::Alignment, "
              << Attr.Value << "),\n";
           break;
+        case CodeGenIntrinsic::Dereferenceable:
+          OS << "      Attribute::get(C, Attribute::Dereferenceable, "
+             << Attr.Value << "),\n";
+          break;
         }
       }
       OS << "    });\n";
Index: llvm/utils/TableGen/CodeGenTarget.cpp
===================================================================
--- llvm/utils/TableGen/CodeGenTarget.cpp
+++ llvm/utils/TableGen/CodeGenTarget.cpp
@@ -923,6 +923,10 @@
     unsigned ArgNo = R->getValueAsInt("ArgNo");
     uint64_t Align = R->getValueAsInt("Align");
     addArgAttribute(ArgNo, Alignment, Align);
+  } else if (R->isSubClassOf("Dereferenceable")) {
+    unsigned ArgNo = R->getValueAsInt("ArgNo");
+    uint64_t Bytes = R->getValueAsInt("Bytes");
+    addArgAttribute(ArgNo, Dereferenceable, Bytes);
   } else
     llvm_unreachable("Unknown property!");
 }
Index: llvm/utils/TableGen/CodeGenIntrinsics.h
===================================================================
--- llvm/utils/TableGen/CodeGenIntrinsics.h
+++ llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -119,7 +119,8 @@
     WriteOnly,
     ReadNone,
     ImmArg,
-    Alignment
+    Alignment,
+    Dereferenceable
   };
 
   struct ArgAttribute {
Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -141,8 +141,10 @@
                                <"__builtin_amdgcn_workgroup_id">;
 
 def int_amdgcn_dispatch_ptr :
+  ClangBuiltin<"__builtin_amdgcn_dispatch_ptr">,
   DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
-  [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable]>;
+  [Align<RetIndex, 4>, Dereferenceable<RetIndex, 64>, IntrNoMem,
+   IntrSpeculatable]>;
 
 def int_amdgcn_queue_ptr :
   ClangBuiltin<"__builtin_amdgcn_queue_ptr">,
@@ -157,7 +159,8 @@
 def int_amdgcn_implicitarg_ptr :
   ClangBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
   DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
-  [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable]>;
+  [Align<RetIndex, 8>, Dereferenceable<RetIndex, 256>,
+   IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_groupstaticsize :
   ClangBuiltin<"__builtin_amdgcn_groupstaticsize">,
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -94,6 +94,11 @@
   int Align = align;
 }
 
+class Dereferenceable<AttrIndex idx, int bytes> : IntrinsicProperty {
+  int ArgNo = idx.Value;
+  int Bytes = bytes;
+}
+
 // Returned - The specified argument is always the return value of the
 // intrinsic.
 class Returned<AttrIndex idx> : IntrinsicProperty {
Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -516,12 +516,15 @@
 }
 
 // CHECK-LABEL: @test_dispatch_ptr
-// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 void test_dispatch_ptr(__constant unsigned char ** out)
 {
   *out = __builtin_amdgcn_dispatch_ptr();
 }
 
+// CHECK: declare align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+
+
 // CHECK-LABEL: @test_queue_ptr
 // CHECK: call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
 void test_queue_ptr(__constant unsigned char ** out)
@@ -543,6 +546,9 @@
   *out = __builtin_amdgcn_implicitarg_ptr();
 }
 
+// CHECK: declare align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+
+
 // CHECK-LABEL: @test_get_group_id(
 // CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x()
 // CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y()
@@ -583,7 +589,7 @@
 }
 
 // CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 4
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 6
@@ -601,7 +607,7 @@
 }
 
 // CHECK-LABEL: @test_get_grid_size(
-// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 12
 // CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !invariant.load
 // CHECK: getelementptr i8, ptr addrspace(4) %{{.*}}, i64 16
Index: clang/test/CodeGenCUDA/builtins-amdgcn.cu
===================================================================
--- clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -21,7 +21,7 @@
 // CHECK-NEXT:    store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 // CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr
 // CHECK-NEXT:    store ptr [[TMP2]], ptr [[DISPATCH_PTR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8
@@ -154,7 +154,7 @@
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[X:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 // CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
 // CHECK-NEXT:    store ptr [[TMP1]], ptr [[X_ASCAST]], align 8
 // CHECK-NEXT:    ret void
Index: clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,16 +1,16 @@
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
 // RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
-// RUN:     | FileCheck -check-prefix=PRECOV5 %s
+// RUN:     | FileCheck -check-prefixes=PRECOV5,CHECK %s
 
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
 // RUN:     -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
-// RUN:     | FileCheck -check-prefix=COV5 %s
+// RUN:     | FileCheck -check-prefixes=COV5,CHECK %s
 
 #include "Inputs/cuda.h"
 
 // PRECOV5-LABEL: test_get_workgroup_size
-// PRECOV5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// PRECOV5: call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 // PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
 // PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 // PRECOV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
@@ -19,7 +19,7 @@
 // PRECOV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 
 // COV5-LABEL: test_get_workgroup_size
-// COV5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// COV5: call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 // COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
 // COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 // COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
@@ -36,4 +36,7 @@
   }
 }
 
+// COV4: declare align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// COV5: declare align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+
 // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -16753,25 +16753,14 @@
                              const CallExpr *E = nullptr) {
   auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
   auto *Call = CGF.Builder.CreateCall(F);
-  Call->addRetAttr(
-      Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
-  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
   if (!E)
     return Call;
-  QualType BuiltinRetType = E->getType();
-  auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
-  if (RetTy == Call->getType())
-    return Call;
-  return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
+  return CGF.Builder.CreateAddrSpaceCast(Call, CGF.ConvertType(E->getType()));
 }
 
 Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
   auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
-  auto *Call = CGF.Builder.CreateCall(F);
-  Call->addRetAttr(
-      Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
-  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
-  return Call;
+  return CGF.Builder.CreateCall(F);
 }
 
 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to