[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-26 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

### Merge activity

* **Jun 26, 7:27 AM UTC**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/143672).


https://github.com/llvm/llvm-project/pull/143672
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-24 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From 74777534d4ee80311ee342af11b4d5a87564f137 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/143672
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-23 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From ee92e376ca004da16f3683ede1de06d3e370d963 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-23 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From ee92e376ca004da16f3683ede1de06d3e370d963 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From ac6d5eb285b1f56b5c32133279224feb2b8bd8a9 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From a448f7ed1c303cf5878d975f2bbe6cb1f7b175fc Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From a448f7ed1c303cf5878d975f2bbe6cb1f7b175fc Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From 37747657c81cc49feb345810b792f01e35d28511 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From 37747657c81cc49feb345810b792f01e35d28511 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From c9cbbce907dc77f1580019bb78ae3c175f99af37 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-13 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/143672

>From c9cbbce907dc77f1580019bb78ae3c175f99af37 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-11 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a ready_for_review 
https://github.com/llvm/llvm-project/pull/143672
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-11 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)


Changes

Pre-committing tests to show improvements in a follow-up PR.

---
Full diff: https://github.com/llvm/llvm-project/pull/143672.diff


1 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll (+176) 


``diff
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+; Tests the target-specific (ptradd x, shl(0 - y, k)) -> sub(x, shl(y, k)) fold
+define ptr addrspac

[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-11 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/143672?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#143673** https://app.graphite.dev/github/pr/llvm/llvm-project/143673?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#143672** https://app.graphite.dev/github/pr/llvm/llvm-project/143672?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/143672?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#142778** https://app.graphite.dev/github/pr/llvm/llvm-project/142778?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#142777** https://app.graphite.dev/github/pr/llvm/llvm-project/142777?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#142739** https://app.graphite.dev/github/pr/llvm/llvm-project/142739?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#142738** https://app.graphite.dev/github/pr/llvm/llvm-project/142738?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#141725** https://app.graphite.dev/github/pr/llvm/llvm-project/141725?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/143672
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)

2025-06-11 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/143672

Pre-committing tests to show improvements in a follow-up PR.

>From b209c96bd0615f3ab6f283ddeabeeed5e2e0d2dc Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 11 Jun 2025 05:14:34 -0400
Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines

Pre-committing tests to show improvements in a follow-up PR.
---
 .../AMDGPU/ptradd-sdag-optimizations.ll   | 176 ++
 1 file changed, 176 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll 
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 2e76033a480f4..1ec94162951a6 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -142,3 +142,179 @@ entry:
   tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 
1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
   ret void
 }
+
+; Test skipping the lower-32-bit addition if it is unnecessary.
+define ptr @huge_offset_low_32_unused(ptr %p) {
+; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
+; GFX942_PTRADD:   ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0
+; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
+; GFX942_LEGACY:   ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+  %gep = getelementptr inbounds i8, ptr %p, i64 u0x1
+  ret ptr %gep
+}
+
+; Reassociate address computation if it leads to more scalar operations.
+define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_r:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_r:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %voffset, %soffset
+  %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset
+  store ptr addrspace(1) %gep, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr 
addrspace(1) %p, i64 %soffset) {
+; GFX942_PTRADD-LABEL: reassoc_scalar_l:
+; GFX942_PTRADD:   ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
+; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_PTRADD-NEXT:s_endpgm
+;
+; GFX942_LEGACY-LABEL: reassoc_scalar_l:
+; GFX942_LEGACY:   ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6
+; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
+; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1]
+; GFX942_LEGACY-NEXT:s_endpgm
+entry:
+  %voffset32 = call i32 @llvm.amdgcn.workitem.id.x()
+  %voffset = zext i32 %voffset32 to i64
+  %offset = add nuw nsw i64 %soffset, %voffset
+  %gep = getelementptr i8, ptr addrspace(1)