[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
ritter-x2a wrote: ### Merge activity * **Jul 18, 7:54 AM UTC**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/143880). https://github.com/llvm/llvm-project/pull/143880 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From c4bda032514d199feafe799693d53e118874e3d8 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From c4bda032514d199feafe799693d53e118874e3d8 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 604ad3d39fac13c1abec2b7db46d1e671d842399 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 604ad3d39fac13c1abec2b7db46d1e671d842399 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 2d2b48ce3f9749a0b88b71e4f8e33e9a68adf365 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From cab9968daf80e744a1a2e688ef050e519599db18 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 3873d1a4be3dce63b923cd926701f4542842472c Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 3873d1a4be3dce63b923cd926701f4542842472c Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 3f69917b67760c64fdafcb42b5783b8aaafb1406 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 69fddba7ea92c1dc4542dafaad1ea72ba4ce592c Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 69fddba7ea92c1dc4542dafaad1ea72ba4ce592c Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 78abf559c52c400cbd651fc8aa9fc39262a3eb32 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 78abf559c52c400cbd651fc8aa9fc39262a3eb32 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 99d65b3e0a8627b581673b55505962665a3ffcb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143880
>From 99d65b3e0a8627b581673b55505962665a3ffcb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Fabian Ritter (ritter-x2a)
Changes
Pre-committing tests to show improvements in a follow-up PR.
---
Full diff: https://github.com/llvm/llvm-project/pull/143880.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll (+45)
``diff
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
``
https://github.com/llvm/llvm-project/pull/143880
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a ready_for_review https://github.com/llvm/llvm-project/pull/143880 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
ritter-x2a wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/143880?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#143881** https://app.graphite.dev/github/pr/llvm/llvm-project/143881?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#143880** https://app.graphite.dev/github/pr/llvm/llvm-project/143880?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/143880?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#143673** https://app.graphite.dev/github/pr/llvm/llvm-project/143673?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#143672** https://app.graphite.dev/github/pr/llvm/llvm-project/143672?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142778** https://app.graphite.dev/github/pr/llvm/llvm-project/142778?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142777** https://app.graphite.dev/github/pr/llvm/llvm-project/142777?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142739** https://app.graphite.dev/github/pr/llvm/llvm-project/142739?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142738** https://app.graphite.dev/github/pr/llvm/llvm-project/142738?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141725** https://app.graphite.dev/github/pr/llvm/llvm-project/141725?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/143880 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a created
https://github.com/llvm/llvm-project/pull/143880
Pre-committing tests to show improvements in a follow-up PR.
>From d4f77f6692b2bf892dc33023219cd1bc402db546 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Thu, 12 Jun 2025 06:13:26 -0400
Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns
Pre-committing tests to show improvements in a follow-up PR.
---
.../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index c00bccdbce6b7..d48bfe0bb7f21 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p)
{
store float 1.0, ptr addrspace(1) %p1
ret void
}
+
+; Use non-zero shift amounts in v_lshl_add_u64.
+define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) {
+; GFX942_PTRADD-LABEL: select_v_lshl_add_u64:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3]
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: select_v_lshl_add_u64:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %gep = getelementptr inbounds i64, ptr %base, i64 %voffset
+ ret ptr %gep
+}
+
+; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the
+; mul into a mul24.
+define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
+; GFX942_PTRADD-LABEL: fold_mul24_into_mad:
+; GFX942_PTRADD: ; %bb.0:
+; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4
+; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4
+; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4
+; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: fold_mul24_into_mad:
+; GFX942_LEGACY: ; %bb.0:
+; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2
+; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4
+; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1]
+; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31]
+ %a_masked = and i64 %a, u0xf
+ %b_masked = and i64 %b, u0xf
+ %mul = mul i64 %a_masked, %b_masked
+ %gep = getelementptr inbounds i8, ptr %base, i64 %mul
+ ret ptr %gep
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
