llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

<details>
<summary>Changes</summary>

This concerns offset computations for kernargs and
RegBankLegalizeHelper::splitLoad, which should all be within the bounds of a
memory object. See #<!-- -->150392 for the motivation for introducing the
buildObjectPtrOffset function.

For SWDEV-516125.

---

Patch is 113.72 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/150899.diff


10 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+10-10) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+2-1) 
- (modified) 
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll 
(+3-3) 
- (modified) 
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll (+16-16) 
- (modified) 
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+45-45) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll 
(+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+64-64) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll 
(+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir 
(+9-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir 
(+26-26) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index fedfa3f9dd900..3d494374fb33b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2295,8 +2295,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
         LLT::scalar(32), commonAlignment(Align(64), Offset));
 
     // Pointer address
-    B.buildPtrAdd(LoadAddr, KernargPtrReg,
-                  B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+    B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
+                           B.buildConstant(LLT::scalar(64), Offset).getReg(0));
     // Load address
     return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
   }
@@ -2317,8 +2317,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
           MachineMemOperand::MOInvariant,
       LLT::scalar(32), commonAlignment(Align(64), StructOffset));
 
-  B.buildPtrAdd(LoadAddr, QueuePtr,
-                B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
+  B.buildObjectPtrOffset(
+      LoadAddr, QueuePtr,
+      B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
   return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
 }
 
@@ -4500,8 +4501,7 @@ Register 
AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
     llvm_unreachable("failed to find kernarg segment ptr");
 
   auto COffset = B.buildConstant(LLT::scalar(64), Offset);
-  // TODO: Should get nuw
-  return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0);
+  return B.buildObjectPtrOffset(PtrTy, KernArgReg, COffset).getReg(0);
 }
 
 /// Legalize a value that's loaded from kernel arguments. This is only used by
@@ -5676,8 +5676,8 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register 
DstReg,
                       AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
     return false;
 
-  // FIXME: This should be nuw
-  B.buildPtrAdd(DstReg, KernargPtrReg, B.buildConstant(IdxTy, 
Offset).getReg(0));
+  B.buildObjectPtrOffset(DstReg, KernargPtrReg,
+                         B.buildConstant(IdxTy, Offset).getReg(0));
   return true;
 }
 
@@ -7019,8 +7019,8 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
     // Pointer address
     Register LoadAddr = MRI.createGenericVirtualRegister(
         LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
-    B.buildPtrAdd(LoadAddr, KernargPtrReg,
-                  B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+    B.buildObjectPtrOffset(LoadAddr, KernargPtrReg,
+                           B.buildConstant(LLT::scalar(64), Offset).getReg(0));
     // Load address
     Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0);
     B.buildCopy(SGPR01, Temp);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index f471881ee7693..b45627d9c1c5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -294,7 +294,8 @@ void RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
       BasePlusOffset = Base;
     } else {
       auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
-      BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0);
+      BasePlusOffset =
+          B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0);
     }
     auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
     auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
diff --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 3e7a5671bb5de..33a9c5e258ea2 100644
--- 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -24,7 +24,7 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], 
[[C]](s64)
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
@@ -65,7 +65,7 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY9]], 
[[C]](s64)
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
@@ -105,7 +105,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY4]], 
[[C]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
diff --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 33862de91430c..57ee2c8f88073 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -31,7 +31,7 @@ define amdgpu_kernel void 
@test_call_external_void_func_i32([17 x i8]) #0 {
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -84,7 +84,7 @@ define amdgpu_kernel void 
@test_call_external_void_func_i32([17 x i8]) #0 {
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -230,7 +230,7 @@ define amdgpu_kernel void 
@test_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -319,7 +319,7 @@ define amdgpu_kernel void 
@test_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -668,7 +668,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 
!reqd_work_group_size !0
   ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -710,7 +710,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 
!reqd_work_group_size !0
   ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -756,7 +756,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 
!reqd_work_group_size !1
   ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -802,7 +802,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 
!reqd_work_group_size !1
   ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -852,7 +852,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 
!reqd_work_group_size !2
   ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -898,7 +898,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 
!reqd_work_group_size !2
   ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY10]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
@@ -949,7 +949,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 
!reqd_work_group_size !
   ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
   ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -996,7 +996,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 
!reqd_work_group_size !
   ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
   ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1047,7 +1047,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 
!reqd_work_group_size !
   ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
   ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1098,7 +1098,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 
!reqd_work_group_size !
   ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
   ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1153,7 +1153,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 
!reqd_work_group_size !
   ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], 
[[C1]](s64)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], 
[[C1]](s64)
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
   ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -1200,7 +1200,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 
!reqd_work_group_size !
   ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], 
[[C1]](s64)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY11]], 
[[C1]](s64)
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
   ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
diff --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 559391709f41d..6f624b2536f1a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -91,7 +91,7 @@ define amdgpu_kernel void 
@test_call_external_i32_func_i32_imm(ptr addrspace(1)
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C1]](s64)
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -174,7 +174,7 @@ define amdgpu_kernel void 
@test_call_external_i1_func_void() #0 {
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C]](s64)
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -252,7 +252,7 @@ define amdgpu_kernel void 
@test_call_external_i1_zeroext_func_void() #0 {
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C]](s64)
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -314,7 +314,7 @@ define amdgpu_kernel void 
@test_call_external_i1_signext_func_void() #0 {
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C]](s64)
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -376,7 +376,7 @@ define amdgpu_kernel void 
@test_call_external_i8_func_void() #0 {
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C]](s64)
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -456,7 +456,7 @@ define amdgpu_kernel void 
@test_call_external_i8_zeroext_func_void() #0 {
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[COPY12]], 
[[C]](s64)
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -518,7 +518,7 @@ define amdgp...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/150899
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to