yaxunl created this revision.
yaxunl added reviewers: arsenm, b-sumner, cfang.
Herald added subscribers: kerbowa, t-tye, tpr, dstuttard, nhaehnle, wdng, 
jvesely, kzhuravl.

The main purpose of introducing these builtins is to add a range metadata [1, 
1025) on the work group size loaded from dispatch ptr, which cannot be done by 
source code.


https://reviews.llvm.org/D76772

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -527,6 +527,24 @@
 	}
 }
 
+// CHECK-LABEL: @test_get_workgroup_size(
+// CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 4
+// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]]
+// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 6
+// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]]
+// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 8
+// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]]
+void test_get_workgroup_size(int d, global int *out)
+{
+	switch (d) {
+	case 0: *out = __builtin_amdgcn_workgroup_size_x(); break;
+	case 1: *out = __builtin_amdgcn_workgroup_size_y(); break;
+	case 2: *out = __builtin_amdgcn_workgroup_size_z(); break;
+	default: *out = 0;
+	}
+}
+
 // CHECK-LABEL: @test_fmed3_f32
 // CHECK: call float @llvm.amdgcn.fmed3.f32(
 void test_fmed3_f32(global float* out, float a, float b, float c)
@@ -698,6 +716,7 @@
 }
 
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
+// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }
 // CHECK-DAG: ![[$EXEC]] = !{!"exec"}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -13407,6 +13407,44 @@
   }
 }
 
+namespace {
+// If \p E is not null pointer, insert address space cast to match return
+// type of \p E if necessary.
+Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
+                             const CallExpr *E = nullptr) {
+  auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
+  auto *Call = CGF.Builder.CreateCall(F);
+  Call->addAttribute(
+      AttributeList::ReturnIndex,
+      Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
+  Call->addAttribute(AttributeList::ReturnIndex,
+                     Attribute::getWithAlignment(Call->getContext(), Align(4)));
+  if (!E)
+    return Call;
+  QualType BuiltinRetType = E->getType();
+  auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
+  if (RetTy == Call->getType())
+    return Call;
+  return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
+}
+
+// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
+Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
+  const unsigned XOffset = 4;
+  auto *DP = EmitAMDGPUDispatchPtr(CGF);
+  auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 2);
+  auto *GEP = CGF.Builder.CreateGEP(DP, Offset);
+  auto *DstTy =
+      CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace());
+  auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy);
+  auto *LD = CGF.Builder.CreateLoad(Address(Cast, CharUnits::fromQuantity(2)));
+  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+  llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), APInt(16, 1025));
+  LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
+  return LD;
+}
+} // namespace
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E) {
   switch (BuiltinID) {
@@ -13489,21 +13527,8 @@
   case AMDGPU::BI__builtin_amdgcn_cosf:
   case AMDGPU::BI__builtin_amdgcn_cosh:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
-  case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: {
-    auto *F = CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
-    auto *Call = Builder.CreateCall(F);
-    Call->addAttribute(
-        AttributeList::ReturnIndex,
-        Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
-    Call->addAttribute(
-        AttributeList::ReturnIndex,
-        Attribute::getWithAlignment(Call->getContext(), Align(4)));
-    QualType BuiltinRetType = E->getType();
-    auto *RetTy = cast<llvm::PointerType>(ConvertType(BuiltinRetType));
-    if (RetTy == Call->getType())
-      return Call;
-    return Builder.CreateAddrSpaceCast(Call, RetTy);
-  }
+  case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
+    return EmitAMDGPUDispatchPtr(*this, E);
   case AMDGPU::BI__builtin_amdgcn_log_clampf:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
   case AMDGPU::BI__builtin_amdgcn_ldexp:
@@ -13599,6 +13624,14 @@
   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
 
+  // amdgcn workgroup size
+  case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
+    return EmitAMDGPUWorkGroupSize(*this, 0);
+  case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
+    return EmitAMDGPUWorkGroupSize(*this, 1);
+  case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
+    return EmitAMDGPUWorkGroupSize(*this, 2);
+
   // r600 intrinsics
   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -33,6 +33,10 @@
 BUILTIN(__builtin_amdgcn_workitem_id_y, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc")
 
+BUILTIN(__builtin_amdgcn_workgroup_size_x, "Ui", "nc")
+BUILTIN(__builtin_amdgcn_workgroup_size_y, "Ui", "nc")
+BUILTIN(__builtin_amdgcn_workgroup_size_z, "Ui", "nc")
+
 BUILTIN(__builtin_amdgcn_mbcnt_hi, "UiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to