https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/183925
>From 52d318f2a9d517fac9e8682563e18b335cb676d6 Mon Sep 17 00:00:00 2001 From: Shilei Tian <[email protected]> Date: Sat, 28 Feb 2026 10:22:56 -0500 Subject: [PATCH] [AMDGPU] Make uniform-work-group-size a valueless attribute The "uniform-work-group-size" function attribute previously took a string value of "true" or "false". Since presence alone can convey the "true" semantics and absence can convey "false", the value is unnecessary. This patch converts it to a valueless string attribute: presence indicates true, absence indicates false. For backward compatibility, auto-upgrade logic is added in both UpgradeAttributes (bitcode) and UpgradeFunctionAttributes: if the old value is "true", the attribute is kept without a value; if "false", the attribute is removed. All setters (Clang CodeGen, OMPIRBuilder, AMDGPUAttributor, ROCDL translation) and readers (AMDGPUAttributor, AMDGPULowerKernelAttributes, AMDGPUHSAMetadataStreamer) are updated accordingly. The attribute is also documented in the AMDGPU LLVM IR Attributes table where it was previously missing. --- clang/lib/CodeGen/CGCall.cpp | 9 ++-- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu | 4 +- clang/test/CodeGenHIP/default-attributes.hip | 2 +- .../test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 2 +- .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 50 +++++++++---------- .../test/CodeGenOpenCL/cl-uniform-wg-size.cl | 27 +++++----- .../cl20-device-side-enqueue-attributes.cl | 8 +-- clang/test/OpenMP/amdgcn-attributes.cpp | 6 +-- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 +- llvm/lib/IR/AutoUpgrade.cpp | 21 ++++++++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 13 ++--- .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 2 +- .../AMDGPU/AMDGPULowerKernelAttributes.cpp | 2 +- .../test/Analysis/KernelInfo/openmp/amdgpu.ll | 2 +- .../upgrade-uniform-work-group-size.ll | 21 ++++++++ .../amdhsa-kernarg-preload-num-sgprs.ll | 2 +- .../hsa-metadata-uniform-workgroup-size-v5.ll | 5 +- .../CodeGen/AMDGPU/implicit-arg-v5-opt.ll | 2 +- llvm/test/CodeGen/AMDGPU/inline-attr.ll | 10 ++-- llvm/test/CodeGen/AMDGPU/inlineasm-sgmask.ll | 2 +- ...alid-hidden-kernarg-in-kernel-signature.ll | 2 +- ...ne-sink-temporal-divergence-swdev407790.ll | 2 +- .../CodeGen/AMDGPU/mdt-preserving-crash.ll | 2 +- .../CodeGen/AMDGPU/mul24-pass-ordering.ll | 2 +- llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll | 2 +- .../AMDGPU/preload-implicit-kernargs.ll | 2 +- llvm/test/CodeGen/AMDGPU/preload-kernargs.ll | 2 +- .../AMDGPU/promote-constOffset-to-imm.ll | 2 +- .../CodeGen/AMDGPU/reqd-work-group-size.ll | 6 +-- .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 2 +- .../si-opt-vgpr-liverange-bug-deadlanes.mir | 2 +- .../si-optimize-vgpr-live-range-dbg-instr.mir | 2 +- llvm/test/CodeGen/AMDGPU/swdev-549940.ll | 2 +- .../uniform-work-group-attribute-missing.ll | 4 +- .../AMDGPU/uniform-work-group-multistep.ll | 6 +-- ...niform-work-group-nested-function-calls.ll | 4 +- ...ork-group-prevent-attribute-propagation.ll | 4 +- .../uniform-work-group-propagate-attribute.ll | 11 ++-- .../uniform-work-group-recursion-test.ll | 6 +-- .../CodeGen/AMDGPU/uniform-work-group-test.ll | 10 ++-- ...ine-function-info-long-branch-reg-debug.ll | 2 +- llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 2 +- .../split_work_group_barrier_12.ll | 2 +- .../split_work_group_barrier_20.ll | 2 +- .../split_work_group_barrier_spirv.ll | 2 +- .../cl_intel_sub_groups.ll | 2 +- .../vararr_spec_const.ll | 2 +- .../subgroup-rotate.ll | 2 +- .../constrained-comparison.ll.bak | 2 +- .../ROCDL/ROCDLToLLVMIRTranslation.cpp | 8 +-- mlir/test/Target/LLVMIR/rocdl.mlir | 8 +-- 51 files changed, 167 insertions(+), 134 deletions(-) create mode 100644 llvm/test/Bitcode/upgrade-uniform-work-group-size.ll diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b57802ebfced8..04b27925bab8e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2619,22 +2619,21 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // OpenCL Kernel Stub if (getLangOpts().OpenCLVersion <= 120) { // OpenCL v1.2 Work groups are always uniform - FuncAttrs.addAttribute("uniform-work-group-size", "true"); + FuncAttrs.addAttribute("uniform-work-group-size"); } else { // OpenCL v2.0 Work groups may be whether uniform or not. // '-cl-uniform-work-group-size' compile option gets a hint // to the compiler that the global work-size be a multiple of // the work-group size specified to clEnqueueNDRangeKernel // (i.e. work groups are uniform). - FuncAttrs.addAttribute( - "uniform-work-group-size", - llvm::toStringRef(getLangOpts().OffloadUniformBlock)); + if (getLangOpts().OffloadUniformBlock) + FuncAttrs.addAttribute("uniform-work-group-size"); } } if (TargetDecl->hasAttr<CUDAGlobalAttr>() && getLangOpts().OffloadUniformBlock) - FuncAttrs.addAttribute("uniform-work-group-size", "true"); + FuncAttrs.addAttribute("uniform-work-group-size"); if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>()) FuncAttrs.addAttribute("aarch64_pstate_sm_body"); diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index fa4821e3c597f..bdd4682374e1d 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -103,7 +103,7 @@ template __global__ void template_a_b_c_max_num_work_groups<32, 4, 2>(); // NAMD-NOT: "amdgpu-num-sgpr" // NAMD-NOT: "amdgpu-max-num-work-groups" -// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size" // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" // MAX1024-SPIRV-DAG: [[MAX_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1} // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" @@ -114,4 +114,4 @@ template __global__ void template_a_b_c_max_num_work_groups<32, 4, 2>(); // CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_4_2]] = {{.*}}"amdgpu-max-num-workgroups"="32,4,2" // CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}}"amdgpu-max-num-workgroups"="32,1,1" -// NOUB-NOT: "uniform-work-group-size"="true" +// NOUB-NOT: "uniform-work-group-size" diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 9aa40f18696c8..ef7d28740d270 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -35,7 +35,7 @@ __global__ void kernel() { //. // OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } // OPTNONE: attributes #[[ATTR3]] = { convergent nounwind } //. // OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl index ece84d5b75ca7..70bc51bfa2d70 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -26,7 +26,7 @@ kernel void foo(global int *p) { *p = 1; } // CHECK-NEXT: ret void // //. -// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "uniform-work-group-size"="false" } +// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" } // CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" } // CHECK: attributes #[[ATTR2]] = { convergent nounwind } //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 002c19ede0e56..e654ccb890df5 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -107,13 +107,13 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 // NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 -// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR10:[0-9]+]] +// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]] // NOCPU-NEXT: ret void // // // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test( -// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) @@ -235,19 +235,19 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define dso_local amdgpu_kernel void @test_target_features_kernel( -// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr // NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8 -// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR10]] +// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR8]] // NOCPU-NEXT: ret void // // // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel( -// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -268,7 +268,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7:[0-9]+]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -287,7 +287,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -298,7 +298,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke_2( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -323,7 +323,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -334,7 +334,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke_3( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) @@ -365,7 +365,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR8]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] { +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 @@ -376,7 +376,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke_4( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -388,13 +388,13 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 -// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR10]] +// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]] // NOCPU-NEXT: ret void // // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -405,7 +405,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_target_features_kernel_block_invoke( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -419,7 +419,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_target_features_kernel_block_invoke_kernel( -// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) // NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -805,19 +805,17 @@ kernel void test_target_features_kernel(global int *i) { //. // NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } // NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR4]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } -// NOCPU: attributes #[[ATTR5]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" } -// NOCPU: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// NOCPU: attributes #[[ATTR7]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR8]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } -// NOCPU: attributes #[[ATTR10]] = { convergent nounwind } +// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" } +// NOCPU: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// NOCPU: attributes #[[ATTR5]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// NOCPU: attributes #[[ATTR6]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } +// NOCPU: attributes #[[ATTR8]] = { convergent nounwind } //. // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } // GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" } -// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" "uniform-work-group-size"="false" } +// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" } // GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" } // GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } // GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl index 98587c694619f..62a4d9b84fe32 100644 --- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl +++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl @@ -1,19 +1,20 @@ -// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM -// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM -// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM -// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s --check-prefix=CHECK-UNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s --check-prefix=CHECK-NONUNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s | FileCheck %s --check-prefix=CHECK-UNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s | FileCheck %s --check-prefix=CHECK-UNIFORM -kernel void ker() {}; -// CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]] +// CHECK-UNIFORM: define dso_local spir_kernel void @ker(){{.*}}[[ATTR0:#[0-9]+]] +// CHECK-UNIFORM: define dso_local void @__clang_ocl_kern_imp_ker(){{.*}}[[ATTR1:#[0-9]+]] +// CHECK-UNIFORM: define dso_local void @foo{{.*}}[[ATTR1:#[0-9]+]] -// CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]] +// CHECK-NONUNIFORM: define dso_local spir_kernel void @ker(){{.*}}[[ATTR0:#[0-9]+]] +// CHECK-NONUNIFORM: define dso_local void @__clang_ocl_kern_imp_ker(){{.*}}[[ATTR0]] +// CHECK-NONUNIFORM: define dso_local void @foo{{.*}}[[ATTR0]] +kernel void ker() {}; void foo() {}; -// CHECK: define{{.*}}@foo() #[[ATTR1:[0-9]+]] -// CHECK: attributes #[[ATTR0]] -// CHECK-UNIFORM: "uniform-work-group-size"="true" -// CHECK-NONUNIFORM: "uniform-work-group-size"="false" +// CHECK-UNIFORM: attributes [[ATTR0]] {{.*}} "uniform-work-group-size" +// CHECK-UNIFORM-NOT: attributes [[ATTR1]] {{.*}} "uniform-work-group-size" -// CHECK: attributes #[[ATTR1]] -// CHECK-NOT: uniform-work-group-size +// CHECK-NONUNIFORM-NOT: attributes [[ATTR0]] {{.*}} "uniform-work-group-size" diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl index 0149d1618e2b0..4c3b72d15c97e 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl @@ -196,19 +196,19 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-NEXT: ret void // //. -// SPIR32: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// SPIR32: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } // SPIR32: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } // SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // SPIR32: attributes #[[ATTR3:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } // SPIR32: attributes #[[ATTR4]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size"="true" } +// SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size" } //. -// STRICTFP: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone strictfp "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +// STRICTFP: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone strictfp "stack-protector-buffer-size"="8" } // STRICTFP: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } // STRICTFP: attributes #[[ATTR2]] = { convergent noinline nounwind optnone strictfp "stack-protector-buffer-size"="8" } // STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) } // STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" } -// STRICTFP: attributes #[[ATTR5]] = { convergent nounwind strictfp "uniform-work-group-size"="false" } +// STRICTFP: attributes #[[ATTR5]] = { convergent nounwind strictfp } // STRICTFP: attributes #[[ATTR6]] = { strictfp } //. // SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp index ad7d19cbbd55c..6b7a86477cb4a 100644 --- a/clang/test/OpenMP/amdgcn-attributes.cpp +++ b/clang/test/OpenMP/amdgcn-attributes.cpp @@ -31,9 +31,9 @@ int callable(int x) { return x + 1; } -// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "uniform-work-group-size"="true" } -// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size" } +// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "uniform-work-group-size" } +// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size" } // DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" } diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 6775674d733fe..9035ae7796a2e 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -11065,7 +11065,7 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, // Add a function attribute for the kernel. Fn->addFnAttr("kernel"); if (T.isAMDGCN()) - Fn->addFnAttr("uniform-work-group-size", "true"); + Fn->addFnAttr("uniform-work-group-size"); Fn->addFnAttr(Attribute::MustProgress); } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 790ecb40dc87b..311a1cac76963 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -6352,6 +6352,16 @@ void llvm::UpgradeFunctionAttributes(Function &F) { AddingAttrs = RemovingAttrs = true; } + if (Attribute A = F.getFnAttribute("uniform-work-group-size"); + A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) { + AttrsToRemove.addAttribute("uniform-work-group-size"); + RemovingAttrs = true; + if (A.getValueAsString() == "true") { + AttrsToAdd.addAttribute("uniform-work-group-size"); + AddingAttrs = true; + } + } + if (!F.empty()) { // For some reason this is called twice, and the first time is before any // instructions are loaded into the body. @@ -6749,6 +6759,17 @@ void llvm::UpgradeAttributes(AttrBuilder &B) { if (NullPointerIsValid) B.addAttribute(Attribute::NullPointerIsValid); } + + A = B.getAttribute("uniform-work-group-size"); + if (A.isValid()) { + StringRef Val = A.getValueAsString(); + if (!Val.empty()) { + bool IsTrue = Val == "true"; + B.removeAttribute("uniform-work-group-size"); + if (IsTrue) + B.addAttribute("uniform-work-group-size"); + } + } } void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 2ba986f7bd322..0afe1b4d0bcf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -362,11 +362,7 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { if (CC != CallingConv::AMDGPU_KERNEL) return; - bool InitialValue = false; - if (F->hasFnAttribute("uniform-work-group-size")) - InitialValue = - F->getFnAttribute("uniform-work-group-size").getValueAsString() == - "true"; + bool InitialValue = F->hasFnAttribute("uniform-work-group-size"); if (InitialValue) indicateOptimisticFixpoint(); @@ -405,10 +401,9 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { return ChangeStatus::UNCHANGED; LLVMContext &Ctx = getAssociatedFunction()->getContext(); - return A.manifestAttrs( - getIRPosition(), - {Attribute::get(Ctx, "uniform-work-group-size", "true")}, - /*ForceReplace=*/true); + return A.manifestAttrs(getIRPosition(), + {Attribute::get(Ctx, "uniform-work-group-size")}, + /*ForceReplace=*/true); } bool isValidState() const override { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 3c88d1b8214f7..35ecf98836678 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -731,7 +731,7 @@ void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM, MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern); const Function &Func = MF.getFunction(); - if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) + if (Func.hasFnAttribute("uniform-work-group-size")) Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index fbfb71059b6b1..d82dfd2771275 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -103,7 +103,7 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) { const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3; const bool HasUniformWorkGroupSize = - F->getFnAttribute("uniform-work-group-size").getValueAsBool(); + F->hasFnAttribute("uniform-work-group-size"); SmallVector<unsigned> MaxNumWorkgroups = AMDGPU::getIntegerVecAttribute(*F, "amdgpu-max-num-workgroups", diff --git a/llvm/test/Analysis/KernelInfo/openmp/amdgpu.ll b/llvm/test/Analysis/KernelInfo/openmp/amdgpu.ll index a84e261357de0..6cb54e929f63f 100644 --- a/llvm/test/Analysis/KernelInfo/openmp/amdgpu.ll +++ b/llvm/test/Analysis/KernelInfo/openmp/amdgpu.ll @@ -157,7 +157,7 @@ define hidden void @g() #3 !dbg !43 { } attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="256" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" } +attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="256" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size" } attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } attributes #3 = { convergent noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } attributes #4 = { convergent } diff --git a/llvm/test/Bitcode/upgrade-uniform-work-group-size.ll b/llvm/test/Bitcode/upgrade-uniform-work-group-size.ll new file mode 100644 index 0000000000000..39c3ada3d6c1f --- /dev/null +++ b/llvm/test/Bitcode/upgrade-uniform-work-group-size.ll @@ -0,0 +1,21 @@ +; RUN: llvm-as < %s | llvm-dis - | FileCheck %s + +; "uniform-work-group-size"="true" should be upgraded to a valueless attribute. +; CHECK: define void @true_val() #[[ATTR_TRUE:[0-9]+]] +define void @true_val() "uniform-work-group-size"="true" { + ret void +} + +; "uniform-work-group-size"="false" should be removed entirely. +; CHECK: define void @false_val() { +define void @false_val() "uniform-work-group-size"="false" { + ret void +} + +; Already-upgraded valueless attribute should be left alone. +; CHECK: define void @no_val() #[[ATTR_TRUE]] +define void @no_val() "uniform-work-group-size" { + ret void +} + +; CHECK-DAG: attributes #[[ATTR_TRUE]] = { "uniform-work-group-size" } diff --git a/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll b/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll index a160cdc950eb5..4054d4c99c2b8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll @@ -70,4 +70,4 @@ define amdgpu_kernel void @amdhsa_kernarg_preload_1_implicit_2(i32 inreg) #0 { r define amdgpu_kernel void @amdhsa_kernarg_preload_0_implicit_2(i32) #0 { ret void } -attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll index d1152b8ae7de0..692d7f9ff7c6d 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll @@ -14,7 +14,7 @@ bb: ; CHECK: - .args: ; CHECK-LABEL: .name: kernel_non_uniform_workgroup ; CHECK-NOT: .uniform_work_group_size: -define amdgpu_kernel void @kernel_non_uniform_workgroup() #1 { +define amdgpu_kernel void @kernel_non_uniform_workgroup() { bb: ret void } @@ -26,8 +26,7 @@ define amdgpu_kernel void @kernel_no_attr() { bb: ret void } -attributes #0 = { "uniform-work-group-size"="true" } -attributes #1 = { "uniform-work-group-size"="false" } +attributes #0 = { "uniform-work-group-size" } !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll index 3563e737f5520..be4c93d33fb63 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll @@ -197,7 +197,7 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #1 !llvm.module.flags = !{!1} -attributes #0 = { nounwind "uniform-work-group-size"="true" } +attributes #0 = { nounwind "uniform-work-group-size" } attributes #1 = { nounwind readnone speculatable } !0 = !{i32 8, i32 16, i32 2} !1 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll index 73c80fbf94175..ad201ded71bc3 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -35,17 +35,17 @@ entry: ret void } -attributes #0 = { nounwind "uniform-work-group-size"="false"} +attributes #0 = { nounwind } attributes #1 = { nounwind "less-precise-fpmad"="true" "no-nans-fp-math"="true" } -; NOINFS: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" } -; NOINFS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="false" "uniform-work-group-size"="false" } +; NOINFS: attributes #[[ATTR0]] = { nounwind } +; NOINFS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="false" } ; NOINFS: [[META0]] = !{} ;. -; UNSAFE: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" } +; UNSAFE: attributes #[[ATTR0]] = { nounwind } ; UNSAFE: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="false" } ;. -; NONANS: attributes #[[ATTR0]] = { nounwind "no-nans-fp-math"="true" "uniform-work-group-size"="false" } +; NONANS: attributes #[[ATTR0]] = { nounwind "no-nans-fp-math"="true" } ; NONANS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-nans-fp-math"="true" } ;. ; UNSAFE: [[META0]] = !{} diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-sgmask.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-sgmask.ll index da692f1a557e7..77320094b1b65 100644 --- a/llvm/test/CodeGen/AMDGPU/inlineasm-sgmask.ll +++ b/llvm/test/CodeGen/AMDGPU/inlineasm-sgmask.ll @@ -172,7 +172,7 @@ declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #1 declare void @llvm.amdgcn.sched.group.barrier(i32 immarg, i32 immarg, i32 immarg) #2 -attributes #0 = { convergent mustprogress norecurse nounwind "amdgpu-agpr-alloc"="1" "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx942" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" "uniform-work-group-size"="true" } +attributes #0 = { convergent mustprogress norecurse nounwind "amdgpu-agpr-alloc"="1" "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx942" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" "uniform-work-group-size" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #2 = { convergent nocallback nofree nounwind willreturn } attributes #3 = { convergent nounwind memory(none) } diff --git a/llvm/test/CodeGen/AMDGPU/invalid-hidden-kernarg-in-kernel-signature.ll b/llvm/test/CodeGen/AMDGPU/invalid-hidden-kernarg-in-kernel-signature.ll index 8344cf9265397..bbdf791017247 100644 --- a/llvm/test/CodeGen/AMDGPU/invalid-hidden-kernarg-in-kernel-signature.ll +++ b/llvm/test/CodeGen/AMDGPU/invalid-hidden-kernarg-in-kernel-signature.ll @@ -18,4 +18,4 @@ define amdgpu_kernel void @preloadremainder_z_no_preload_diag(ptr addrspace(1) i ret void } -attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 9ecd35e7ddd11..66683d323837d 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -1098,7 +1098,7 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) #3 attributes #0 = { convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" } attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" } -attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="64,64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" "uniform-work-group-size"="true" } +attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="64,64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" "uniform-work-group-size" } attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #4 = { convergent nounwind willreturn memory(none) } attributes #5 = { convergent nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll index 4c2967a52fe93..b9d3ac8f12695 100644 --- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll @@ -212,4 +212,4 @@ cleanup.cont: ; preds = %if.end15, %if.end ret void } -attributes #0 = { "uniform-work-group-size"="true" } +attributes #0 = { "uniform-work-group-size" } diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 0c0919de4aedc..c4322e260dece 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -265,6 +265,6 @@ declare float @llvm.fmuladd.f32(float, float, float) #1 attributes #0 = { nounwind willreturn denormal_fpenv(float: preservesign) } attributes #1 = { nounwind readnone speculatable } -attributes #2 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" denormal_fpenv(float: preservesign) } +attributes #2 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" denormal_fpenv(float: preservesign) } !0 = !{float 2.500000e+00} diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index bc2ab7d9776d6..90e9ce1477c1a 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -401,7 +401,7 @@ bb.1: declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone speculatable } -attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll index 6f222aa7d6977..8b37734cfa046 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll @@ -1105,4 +1105,4 @@ define amdgpu_kernel void @preload_block_count_z_workgroup_size_z_remainder_z(pt ret void } -attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll index e8d717db2ce3b..f899450e383a7 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll @@ -1687,4 +1687,4 @@ define amdgpu_kernel void @ptr1_i8_trailing_unused(ptr addrspace(1) inreg %out, ret void } -attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +attributes #0 = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index c029ab3884750..a2b0f4d56ebea 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -2671,4 +2671,4 @@ end: } -attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" } diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll index 6e85a92690e59..b79d6bcc39b3c 100644 --- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll @@ -444,10 +444,10 @@ declare i32 @llvm.umin.i32(i32, i32) #1 declare i32 @llvm.smin.i32(i32, i32) #1 declare i32 @llvm.umax.i32(i32, i32) #1 -attributes #0 = { nounwind "uniform-work-group-size"="true" } +attributes #0 = { nounwind "uniform-work-group-size" } attributes #1 = { nounwind readnone speculatable } -attributes #2 = { nounwind "uniform-work-group-size"="true" } -attributes #3 = { nounwind "uniform-work-group-size"="false" } +attributes #2 = { nounwind "uniform-work-group-size" } +attributes #3 = { nounwind } !0 = !{i32 8, i32 16, i32 2} !1 = !{i32 8, i32 16} diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index 03b88858c7318..9005e26d24abb 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -6,7 +6,7 @@ ret [13 x i32] poison } - attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" denormal_fpenv(float: preservesign) "target-cpu"="gfx1030" "target-features"="+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" } + attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" denormal_fpenv(float: preservesign) "target-cpu"="gfx1030" "target-features"="+wavefrontsize32,+cumode,+enable-flat-scratch" } ... --- diff --git a/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir b/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir index 683a1c062ba65..c10096fd4ca0f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir +++ b/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir @@ -11,7 +11,7 @@ unreachable } - attributes #0 = { "target-cpu"="gfx1100" "target-features"="+wavefrontsize64,+cumode" "uniform-work-group-size"="false" } + attributes #0 = { "target-cpu"="gfx1100" "target-features"="+wavefrontsize64,+cumode" } ... --- name: _amdgpu_ps_main diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir index 7c7b930b6c318..1f77105f32443 100644 --- a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir +++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir @@ -8,7 +8,7 @@ ret void } - attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" "uniform-work-group-size"="false" } + attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3} diff --git a/llvm/test/CodeGen/AMDGPU/swdev-549940.ll b/llvm/test/CodeGen/AMDGPU/swdev-549940.ll index 5998359353796..e905e8cdc5434 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev-549940.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev-549940.ll @@ -594,7 +594,7 @@ declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, attributes #0 = { cold noreturn nounwind memory(inaccessiblemem: write) } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1201" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size"="true" } +attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1201" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size" } attributes #3 = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) } attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #5 = { nocallback nofree nosync nounwind willreturn memory(read) } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll index 029781e8a625f..56819036c1a5d 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -29,8 +29,8 @@ define amdgpu_kernel void @kernel1() #1 { ret void } -attributes #0 = { "uniform-work-group-size"="true" } +attributes #0 = { "uniform-work-group-size" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll index 2651ed8cc46c4..d889c1a70c22a 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -94,9 +94,9 @@ define amdgpu_kernel void @kernel2() #0 { ret void } -attributes #0 = { "uniform-work-group-size"="true" } +attributes #0 = { "uniform-work-group-size" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll index 31949c6003e58..05b1aa7dcfc6e 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -39,8 +39,8 @@ define amdgpu_kernel void @kernel3() #2 { ret void } -attributes #2 = { "uniform-work-group-size"="true" } +attributes #2 = { "uniform-work-group-size" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll index 252c83304756c..8554e3d0b63f4 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -39,8 +39,8 @@ define amdgpu_kernel void @kernel2() #2 { ret void } -attributes #1 = { "uniform-work-group-size"="true" } +attributes #1 = { "uniform-work-group-size" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll index b5a1152a459c5..53a7021c12bd5 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -17,7 +17,7 @@ define void @func() #0 { ret void } -define amdgpu_kernel void @kernel1() #1 { +define amdgpu_kernel void @kernel1() { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: call void @func() @@ -38,7 +38,7 @@ define weak_odr void @weak_func() #0 { ret void } -define amdgpu_kernel void @kernel2() #2 { +define amdgpu_kernel void @kernel2() #1 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: call void @weak_func() @@ -49,11 +49,10 @@ define amdgpu_kernel void @kernel2() #2 { } attributes #0 = { nounwind } -attributes #1 = { "uniform-work-group-size"="false" } -attributes #2 = { "uniform-work-group-size"="true" } +attributes #1 = { "uniform-work-group-size" } ;. ; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } ; CHECK: attributes #[[ATTR2]] = { nounwind } -; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index 00c6b7e6e60fd..1472004f0d7c2 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -99,9 +99,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 { ; nounwind and readnone are added to match attributor results. attributes #0 = { nounwind readnone } -attributes #1 = { "uniform-work-group-size"="true" } +attributes #1 = { "uniform-work-group-size" } ;. ; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll index 737486567b19a..333a9993a89d6 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -25,9 +25,9 @@ define void @func4() { ret void } -define void @func2() #0 { +define void @func2() { ; CHECK-LABEL: define {{[^@]+}}@func2 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: call void @func4() ; CHECK-NEXT: call void @func1() ; CHECK-NEXT: ret void @@ -47,9 +47,9 @@ define void @func3() { ret void } -define amdgpu_kernel void @kernel3() #0 { +define amdgpu_kernel void @kernel3() { ; CHECK-LABEL: define {{[^@]+}}@kernel3 -; CHECK-SAME: () #[[ATTR1]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: call void @func2() ; CHECK-NEXT: call void @func3() ; CHECK-NEXT: ret void @@ -59,8 +59,6 @@ define amdgpu_kernel void @kernel3() #0 { ret void } -attributes #0 = { "uniform-work-group-size"="false" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll index 72387307bb68f..194653dbf1fbc 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll @@ -99,7 +99,7 @@ ; Function Attrs: convergent nocallback nofree nounwind willreturn declare void @llvm.amdgcn.end.cf.i64(i64) #2 - attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } + attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #2 = { convergent nocallback nofree nounwind willreturn } attributes #3 = { convergent nocallback nofree nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index 827097e90e7d3..af89eb3f93af1 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -936,7 +936,7 @@ define void @device_func(ptr byval(i32) align 4 %input) { ret void } -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "target-cpu"="sm_60" "target-features"="+ptx78,+sm_60" "uniform-work-group-size"="true" } +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "target-cpu"="sm_60" "target-features"="+ptx78,+sm_60" "uniform-work-group-size" } attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_12.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_12.ll index 11e7d6927c1e8..00f6143d57b1d 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_12.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_12.ll @@ -64,7 +64,7 @@ declare dso_local spir_func void @_Z31intel_work_group_barrier_arrivej(i32 nound ; Function Attrs: convergent declare dso_local spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef) local_unnamed_addr #1 -attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #2 = { convergent nounwind } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_20.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_20.ll index 35cee1d963d8b..2d94e1f35094b 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_20.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_20.ll @@ -126,7 +126,7 @@ declare dso_local spir_func void @_Z31intel_work_group_barrier_arrivej12memory_s ; Function Attrs: convergent declare dso_local spir_func void @_Z29intel_work_group_barrier_waitj12memory_scope(i32 noundef, i32 noundef) local_unnamed_addr #1 -attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #2 = { convergent nounwind } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_spirv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_spirv.ll index 5f2090cf55f82..620d8ac91e11d 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_spirv.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_spirv.ll @@ -120,7 +120,7 @@ declare dso_local spir_func void @_Z33__spirv_ControlBarrierArriveINTELiii(i32 n ; Function Attrs: convergent declare dso_local spir_func void @_Z31__spirv_ControlBarrierWaitINTELiii(i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #1 -attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #2 = { convergent nounwind } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll index 457b3a14fd8fc..2bf8ef05c1cdb 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll @@ -171,7 +171,7 @@ declare spir_func <2 x i64> @_Z30intel_sub_group_block_read_ul2PU3AS1Km(ptr addr ; Function Attrs: convergent declare spir_func void @_Z31intel_sub_group_block_write_ul2PU3AS1mDv2_m(ptr addrspace(1), <2 x i64>) local_unnamed_addr #1 -attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { convergent nounwind } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_variable_length_array/vararr_spec_const.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_variable_length_array/vararr_spec_const.ll index dea04c9a21a1c..4153ba58cfe60 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_variable_length_array/vararr_spec_const.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_variable_length_array/vararr_spec_const.ll @@ -85,7 +85,7 @@ declare void @llvm.stackrestore.p0(ptr) #4 declare i64 @_Z20__spirv_SpecConstantix(i32, i64) -attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/intel/vla_spec_const.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/intel/vla_spec_const.cpp" "uniform-work-group-size" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind willreturn } attributes #2 = { inlinehint norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll index 1391fddfcdb36..5a6c19e4bd1f4 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll @@ -329,7 +329,7 @@ declare spir_func double @_Z16sub_group_rotatedi(double noundef, i32 noundef) #1 ; Function Attrs: convergent nounwind declare spir_func double @_Z26sub_group_clustered_rotatedij(double noundef, i32 noundef, i32 noundef) #1 -attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #2 = { convergent nounwind } diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-comparison.ll.bak b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-comparison.ll.bak index f0f0957b9b4c4..2328a64404c4e 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-comparison.ll.bak +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-comparison.ll.bak @@ -44,7 +44,7 @@ declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, meta ; Function Attrs: inaccessiblememonly nounwind willreturn declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) #1 -attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { inaccessiblememonly nounwind willreturn } attributes #2 = { strictfp } diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp index 88a9d4c2a7ef2..8142347d80cb8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp @@ -101,7 +101,7 @@ class ROCDLDialectLLVMIRTranslationInterface // assumption. This assumption may be overridden by setting // `rocdl.uniform_work_group_size` on a given function. if (!llvmFunc->hasFnAttribute("uniform-work-group-size")) - llvmFunc->addFnAttr("uniform-work-group-size", "true"); + llvmFunc->addFnAttr("uniform-work-group-size"); } // Override flat-work-group-size // TODO: update clients to rocdl.flat_work_group_size instead, @@ -170,8 +170,10 @@ class ROCDLDialectLLVMIRTranslationInterface " must be a boolean"); llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName()); - llvmFunc->addFnAttr("uniform-work-group-size", - value.getValue() ? "true" : "false"); + if (value.getValue()) + llvmFunc->addFnAttr("uniform-work-group-size"); + else + llvmFunc->removeFnAttr("uniform-work-group-size"); } if (dialect->getUnsafeFpAtomicsAttrHelper().getName() == attribute.getName()) { diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 4eb98a2abe55f..78a78c0bd1bbd 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -2115,10 +2115,10 @@ llvm.func @rocdl.cvt.scalef32.sr.pk16(%v16xf32: vector<16xf32>, llvm.return } -// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" } +// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size" } // CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128" -// CHECK-DAG: attributes #[[$KERNEL_NO_UNIFORM_WORK_GROUPS_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="false" } +// CHECK-DAG: attributes #[[$KERNEL_NO_UNIFORM_WORK_GROUPS_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" } // CHECK-DAG: ![[$REQD_WORK_GROUP_SIZE]] = !{i32 16, i32 4, i32 2} -// CHECK-DAG: attributes #[[$KERNEL_WAVES_PER_EU_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "uniform-work-group-size"="true" } -// CHECK-DAG: attributes #[[$KERNEL_UNSAFE_FP_ATOMICS_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-unsafe-fp-atomics"="true" "uniform-work-group-size"="true" } +// CHECK-DAG: attributes #[[$KERNEL_WAVES_PER_EU_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "uniform-work-group-size" } +// CHECK-DAG: attributes #[[$KERNEL_UNSAFE_FP_ATOMICS_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-unsafe-fp-atomics"="true" "uniform-work-group-size" } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
