llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) <details> <summary>Changes</summary> This concerns offset computations for kernargs and RegBankLegalizeHelper::splitLoad, which should all be within the bounds of a memory object. See #<!-- -->150392 for the motivation for introducing the buildObjectPtrOffset function. For SWDEV-516125. --- Patch is 113.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150899.diff 10 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+10-10) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+2-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+16-16) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+45-45) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+64-64) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (+9-5) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir (+26-26) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index fedfa3f9dd900..3d494374fb33b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2295,8 +2295,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture( LLT::scalar(32), commonAlignment(Align(64), Offset)); // Pointer address - B.buildPtrAdd(LoadAddr, KernargPtrReg, - B.buildConstant(LLT::scalar(64), Offset).getReg(0)); + B.buildObjectPtrOffset(LoadAddr, KernargPtrReg, + B.buildConstant(LLT::scalar(64), Offset).getReg(0)); // Load address return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); } @@ -2317,8 +2317,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture( MachineMemOperand::MOInvariant, LLT::scalar(32), commonAlignment(Align(64), StructOffset)); - B.buildPtrAdd(LoadAddr, QueuePtr, - B.buildConstant(LLT::scalar(64), StructOffset).getReg(0)); + B.buildObjectPtrOffset( + LoadAddr, QueuePtr, + B.buildConstant(LLT::scalar(64), StructOffset).getReg(0)); return B.buildLoad(S32, LoadAddr, *MMO).getReg(0); } @@ -4500,8 +4501,7 @@ Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B, llvm_unreachable("failed to find kernarg segment ptr"); auto COffset = B.buildConstant(LLT::scalar(64), Offset); - // TODO: Should get nuw - return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0); + return B.buildObjectPtrOffset(PtrTy, KernArgReg, COffset).getReg(0); } /// Legalize a value that's loaded from kernel arguments. This is only used by @@ -5676,8 +5676,8 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR)) return false; - // FIXME: This should be nuw - B.buildPtrAdd(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0)); + B.buildObjectPtrOffset(DstReg, KernargPtrReg, + B.buildConstant(IdxTy, Offset).getReg(0)); return true; } @@ -7019,8 +7019,8 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( // Pointer address Register LoadAddr = MRI.createGenericVirtualRegister( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); - B.buildPtrAdd(LoadAddr, KernargPtrReg, - B.buildConstant(LLT::scalar(64), Offset).getReg(0)); + B.buildObjectPtrOffset(LoadAddr, KernargPtrReg, + B.buildConstant(LLT::scalar(64), Offset).getReg(0)); // Load address Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0); B.buildCopy(SGPR01, Temp); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index f471881ee7693..b45627d9c1c5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -294,7 +294,8 @@ void RegBankLegalizeHelper::splitLoad(MachineInstr &MI, BasePlusOffset = Base; } else { auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset); - BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0); + BasePlusOffset = + B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0); } auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy); auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll index 3e7a5671bb5de..33a9c5e258ea2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -24,7 +24,7 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] @@ -65,7 +65,7 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], [[C]](s64) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) @@ -105,7 +105,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() { ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY4]], [[C]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index 33862de91430c..57ee2c8f88073 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -31,7 +31,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64) ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64) ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -230,7 +230,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64) ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -319,7 +319,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64) ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -668,7 +668,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64) ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] @@ -710,7 +710,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64) ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] @@ -756,7 +756,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64) ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] @@ -802,7 +802,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64) ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] @@ -852,7 +852,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64) ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] @@ -898,7 +898,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], [[C1]](s64) ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] @@ -949,7 +949,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64) ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] @@ -996,7 +996,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64) ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] @@ -1047,7 +1047,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64) ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] @@ -1098,7 +1098,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64) ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] @@ -1153,7 +1153,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size ! ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64) ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] @@ -1200,7 +1200,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size ! ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], [[C1]](s64) ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 559391709f41d..6f624b2536f1a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -91,7 +91,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C1]](s64) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -174,7 +174,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -252,7 +252,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -314,7 +314,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -376,7 +376,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -456,7 +456,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], [[C]](s64) ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -518,7 +518,7 @@ define amdgp... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/150899 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits