arsenm created this revision.
arsenm added reviewers: yaxunl, rampitec.
Herald added subscribers: kerbowa, t-tye, tpr, dstuttard, nhaehnle, jvesely, 
kzhuravl.
arsenm requested review of this revision.
Herald added a subscriber: wdng.

When the default flat work group size is 256, it should also apply to
callable functions.


https://reviews.llvm.org/D89582

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
  clang/test/CodeGenOpenCL/amdgpu-attrs.cl


Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -190,5 +190,5 @@
 // CHECK-DAG: attributes 
[[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} 
"amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" 
"amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
 // CHECK-DAG: attributes 
[[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = 
{{.*}} "amdgpu-flat-work-group-size"="32,64" 
"amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" 
"amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
 
-// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}}
+// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} 
"amdgpu-flat-work-group-size"="1,256"
 // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} 
"amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"
Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -16,6 +16,10 @@
 // CHECK: define amdgpu_kernel void @_Z28flat_work_group_size_defaultv() 
[[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
 }
 
+__device__ void func_flat_work_group_size_default() {
+// CHECK: define void @_Z33func_flat_work_group_size_defaultv() 
[[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC:#[0-9]+]]
+}
+
 __attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
 __global__ void flat_work_group_size_32_64() {
 // CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() 
[[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
@@ -40,7 +44,11 @@
 // NAMD-NOT: "amdgpu-num-sgpr"
 
 // DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = 
{{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
+// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = 
{{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
+
 // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = 
{{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = 
{{.*}}"amdgpu-flat-work-group-size"="1,1024"
+
 // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = 
{{.*}}"amdgpu-flat-work-group-size"="32,64"
 // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
 // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -9031,7 +9031,7 @@
       (M.getTriple().getOS() == llvm::Triple::AMDHSA))
     F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
 
-  if (IsHIPKernel)
+  if (M.getLangOpts().HIP)
     F->addFnAttr("uniform-work-group-size", "true");
 
 
@@ -9057,7 +9057,7 @@
       F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
     } else
       assert(Max == 0 && "Max must be zero");
-  } else if (IsOpenCLKernel || IsHIPKernel) {
+  } else {
     // By default, restrict the maximum size to a value specified by
     // --gpu-max-threads-per-block=n or its default value.
     std::string AttrVal =


Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -190,5 +190,5 @@
 // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
 // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
 
-// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}}
+// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "amdgpu-flat-work-group-size"="1,256"
 // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"
Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -16,6 +16,10 @@
 // CHECK: define amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
 }
 
+__device__ void func_flat_work_group_size_default() {
+// CHECK: define void @_Z33func_flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC:#[0-9]+]]
+}
+
 __attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
 __global__ void flat_work_group_size_32_64() {
 // CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
@@ -40,7 +44,11 @@
 // NAMD-NOT: "amdgpu-num-sgpr"
 
 // DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
+// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
+
 // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+
 // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
 // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
 // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -9031,7 +9031,7 @@
       (M.getTriple().getOS() == llvm::Triple::AMDHSA))
     F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
 
-  if (IsHIPKernel)
+  if (M.getLangOpts().HIP)
     F->addFnAttr("uniform-work-group-size", "true");
 
 
@@ -9057,7 +9057,7 @@
       F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
     } else
       assert(Max == 0 && "Max must be zero");
-  } else if (IsOpenCLKernel || IsHIPKernel) {
+  } else {
     // By default, restrict the maximum size to a value specified by
     // --gpu-max-threads-per-block=n or its default value.
     std::string AttrVal =
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to