arsenm created this revision. arsenm added reviewers: yaxunl, rampitec. Herald added subscribers: kerbowa, t-tye, tpr, dstuttard, nhaehnle, jvesely, kzhuravl. arsenm requested review of this revision. Herald added a subscriber: wdng.
When the default flat work group size is 256, it should also apply to callable functions. https://reviews.llvm.org/D89582 Files: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu clang/test/CodeGenOpenCL/amdgpu-attrs.cl Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -190,5 +190,5 @@ // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} +// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -16,6 +16,10 @@ // CHECK: define amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]] } +__device__ void func_flat_work_group_size_default() { +// CHECK: define void @_Z33func_flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC:#[0-9]+]] +} + __attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics __global__ void flat_work_group_size_32_64() { // CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]] @@ -40,7 +44,11 @@ // NAMD-NOT: "amdgpu-num-sgpr" // DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" + // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" + // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32" Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -9031,7 +9031,7 @@ (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); - if (IsHIPKernel) + if (M.getLangOpts().HIP) F->addFnAttr("uniform-work-group-size", "true"); @@ -9057,7 +9057,7 @@ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); - } else if (IsOpenCLKernel || IsHIPKernel) { + } else { // By default, restrict the maximum size to a value specified by // --gpu-max-threads-per-block=n or its default value. std::string AttrVal =
Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -190,5 +190,5 @@ // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} +// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -16,6 +16,10 @@ // CHECK: define amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]] } +__device__ void func_flat_work_group_size_default() { +// CHECK: define void @_Z33func_flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC:#[0-9]+]] +} + __attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics __global__ void flat_work_group_size_32_64() { // CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]] @@ -40,7 +44,11 @@ // NAMD-NOT: "amdgpu-num-sgpr" // DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" + // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" + // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32" Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -9031,7 +9031,7 @@ (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); - if (IsHIPKernel) + if (M.getLangOpts().HIP) F->addFnAttr("uniform-work-group-size", "true"); @@ -9057,7 +9057,7 @@ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); - } else if (IsOpenCLKernel || IsHIPKernel) { + } else { // By default, restrict the maximum size to a value specified by // --gpu-max-threads-per-block=n or its default value. std::string AttrVal =
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits