llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-analysis Author: Shilei Tian (shiltian) <details> <summary>Changes</summary> - **[NFC][Clang] Auto generate check lines for `clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl`** - **[AMDGPU] Make uniform-work-group-size a valueless attribute** --- Patch is 100.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/183925.diff 51 Files Affected: - (modified) clang/lib/CodeGen/CGCall.cpp (+4-5) - (modified) clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu (+2-2) - (modified) clang/test/CodeGenHIP/default-attributes.hip (+1-1) - (modified) clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl (+1-1) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+24-26) - (modified) clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl (+73-15) - (modified) clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl (+4-4) - (modified) clang/test/OpenMP/amdgcn-attributes.cpp (+3-3) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+1-1) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+21) - (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+4-9) - (modified) llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp (+1-1) - (modified) llvm/test/Analysis/KernelInfo/openmp/amdgpu.ll (+1-1) - (added) llvm/test/Bitcode/upgrade-uniform-work-group-size.ll (+21) - (modified) llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/inline-attr.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/inlineasm-sgmask.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/invalid-hidden-kernarg-in-kernel-signature.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/swdev-549940.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll (+5-6) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+4-6) - (modified) llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll (+1-1) - (modified) llvm/test/CodeGen/NVPTX/lower-byval-args.ll (+1-1) - (modified) llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_12.ll (+1-1) - (modified) llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_20.ll (+1-1) - (modified) llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/split_work_group_barrier_spirv.ll (+1-1) - (modified) llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll (+1-1) - (modified) llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_variable_length_array/vararr_spec_const.ll (+1-1) - (modified) llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_subgroup_rotate/subgroup-rotate.ll (+1-1) - (modified) llvm/test/CodeGen/SPIRV/llvm-intrinsics/constrained-comparison.ll.bak (+1-1) - (modified) mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp (+5-3) - (modified) mlir/test/Target/LLVMIR/rocdl.mlir (+4-4) ``````````diff diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b57802ebfced8..04b27925bab8e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2619,22 +2619,21 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // OpenCL Kernel Stub if (getLangOpts().OpenCLVersion <= 120) { // OpenCL v1.2 Work groups are always uniform - FuncAttrs.addAttribute("uniform-work-group-size", "true"); + FuncAttrs.addAttribute("uniform-work-group-size"); } else { // OpenCL v2.0 Work groups may be whether uniform or not. // '-cl-uniform-work-group-size' compile option gets a hint // to the compiler that the global work-size be a multiple of // the work-group size specified to clEnqueueNDRangeKernel // (i.e. work groups are uniform). - FuncAttrs.addAttribute( - "uniform-work-group-size", - llvm::toStringRef(getLangOpts().OffloadUniformBlock)); + if (getLangOpts().OffloadUniformBlock) + FuncAttrs.addAttribute("uniform-work-group-size"); } } if (TargetDecl->hasAttr<CUDAGlobalAttr>() && getLangOpts().OffloadUniformBlock) - FuncAttrs.addAttribute("uniform-work-group-size", "true"); + FuncAttrs.addAttribute("uniform-work-group-size"); if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>()) FuncAttrs.addAttribute("aarch64_pstate_sm_body"); diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index fa4821e3c597f..bdd4682374e1d 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -103,7 +103,7 @@ template __global__ void template_a_b_c_max_num_work_groups<32, 4, 2>(); // NAMD-NOT: "amdgpu-num-sgpr" // NAMD-NOT: "amdgpu-max-num-work-groups" -// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size" // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" // MAX1024-SPIRV-DAG: [[MAX_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1} // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" @@ -114,4 +114,4 @@ template __global__ void template_a_b_c_max_num_work_groups<32, 4, 2>(); // CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_4_2]] = {{.*}}"amdgpu-max-num-workgroups"="32,4,2" // CHECK-DAG: attributes [[MAX_NUM_WORK_GROUPS_32_1_1]] = {{.*}}"amdgpu-max-num-workgroups"="32,1,1" -// NOUB-NOT: "uniform-work-group-size"="true" +// NOUB-NOT: "uniform-work-group-size" diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index 9aa40f18696c8..ef7d28740d270 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -35,7 +35,7 @@ __global__ void kernel() { //. // OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" } // OPTNONE: attributes #[[ATTR3]] = { convergent nounwind } //. // OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl index ece84d5b75ca7..70bc51bfa2d70 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl @@ -26,7 +26,7 @@ kernel void foo(global int *p) { *p = 1; } // CHECK-NEXT: ret void // //. -// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "uniform-work-group-size"="false" } +// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" } // CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" } // CHECK: attributes #[[ATTR2]] = { convergent nounwind } //. diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 002c19ede0e56..e654ccb890df5 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -107,13 +107,13 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 // NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 -// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR10:[0-9]+]] +// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]] // NOCPU-NEXT: ret void // // // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test( -// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) @@ -235,19 +235,19 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define dso_local amdgpu_kernel void @test_target_features_kernel( -// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr // NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 // NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8 -// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR10]] +// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR8]] // NOCPU-NEXT: ret void // // // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel( -// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -268,7 +268,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7:[0-9]+]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -287,7 +287,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -298,7 +298,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke_2( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -323,7 +323,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -334,7 +334,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke_3( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) @@ -365,7 +365,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR8]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] { +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 @@ -376,7 +376,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_block_invoke_4( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -388,13 +388,13 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 -// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR10]] +// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]] // NOCPU-NEXT: ret void // // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel( -// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -405,7 +405,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent noinline nounwind optnone denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal void @__test_target_features_kernel_block_invoke( -// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -419,7 +419,7 @@ kernel void test_target_features_kernel(global int *i) { // // NOCPU: Function Attrs: convergent nounwind denormal_fpenv(float: preservesign) // NOCPU-LABEL: define internal amdgpu_kernel void @__test_target_features_kernel_block_invoke_kernel( -// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { // NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) // NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -805,19 +805,17 @@ kernel void test_target_features_kernel(global int *i) { //. // NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } // NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR4]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } -// NOCPU: attributes #[[ATTR5]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" } -// NOCPU: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// NOCPU: attributes #[[ATTR7]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR8]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/183925 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
