[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From 91bd709f8f7b0f8a05578bd1291295b5a8d1b6c1 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 139
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 160 insertions(+), 134 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 30ee6a99b9dfc..45edcf9992706 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6710,7 +6710,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b54c6cbb18de4..ec57d231dab5d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14480,7 +14482,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14513,7 +14515,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14625,8 +14627,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -15100,42 +15105,116 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-if (N1.hasOneUse()) {
- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
- if (ZIsConstant != YIsConstant) {
-// If both additions in the original were NUW, the new ones are as
well.
-
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From 17e45843d6909ce14269f9e3bb796be4f890b120 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 139
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 160 insertions(+), 134 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5d8db8be9731f..0f5a943d663d7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6710,7 +6710,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b54c6cbb18de4..ec57d231dab5d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14480,7 +14482,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14513,7 +14515,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14625,8 +14627,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -15100,42 +15105,116 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-if (N1.hasOneUse()) {
- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
- if (ZIsConstant != YIsConstant) {
-// If both additions in the original were NUW, the new ones are as
well.
-
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From 10494be4478143e69a6116653228170195c00dc2 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 139
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 160 insertions(+), 134 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f645b09db734b..bd123fc4ffd1b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,42 +14954,116 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-if (N1.hasOneUse()) {
- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
- if (ZIsConstant != YIsConstant) {
-// If both additions in the original were NUW, the new ones are as
well.
-
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From 7eb2283f214e29022a5d580fb1bfa6d2effc9c4c Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 167 insertions(+), 139 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 28e612640d159..e085008e3de1d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
- SDNodeFlags Flags;
- // If both additions in the original we
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From 7eb2283f214e29022a5d580fb1bfa6d2effc9c4c Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 167 insertions(+), 139 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 28e612640d159..e085008e3de1d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
- SDNodeFlags Flags;
- // If both additions in the original we
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From 50de6e085242ce975af812088f4ef48896444fb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 167 insertions(+), 139 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
- SDNodeFlags Flags;
- // If both additions in the original we
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From 50de6e085242ce975af812088f4ef48896444fb6 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 167 insertions(+), 139 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
- SDNodeFlags Flags;
- // If both additions in the original we
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From a3d204e9a8aae5de008a83904215d44d8d0c3380 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 167 insertions(+), 139 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
- SDNodeFlags Flags;
- // If both additions in the original we
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/143673
>From a3d204e9a8aae5de008a83904215d44d8d0c3380 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 167 insertions(+), 139 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..1210777428020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6706,7 +6706,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 184984abcdf32..fe002b3daed89 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14329,7 +14331,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14362,7 +14364,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14474,8 +14476,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14949,44 +14954,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
- SDNodeFlags Flags;
- // If both additions in the original we
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a created
https://github.com/llvm/llvm-project/pull/143673
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
>From 11bd2c5d43cf9b49e1009c09aa11e583b37957e9 Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Wed, 11 Jun 2025 05:48:45 -0400
Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++
.../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++
3 files changed, 167 insertions(+), 139 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 526a395181764..cc24585b4e4ad 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6765,7 +6765,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0708c76bcb3fc..6149d7ca84354 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14320,7 +14322,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14353,7 +14355,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14465,8 +14467,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14940,44 +14945,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a ready_for_review https://github.com/llvm/llvm-project/pull/143673 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Fabian Ritter (ritter-x2a)
Changes
This patch adds several (AMDGPU-)target-specific DAG combines for
ISD::PTRADD nodes that reproduce existing similar transforms for
ISD::ADD nodes. There is no functional change intended for the existing
target-specific PTRADD combine.
For SWDEV-516125.
---
Full diff: https://github.com/llvm/llvm-project/pull/143673.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+3-1)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+116-35)
- (modified) llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll (+48-103)
``diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 526a395181764..cc24585b4e4ad 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6765,7 +6765,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode,
EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0708c76bcb3fc..6149d7ca84354 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -14320,7 +14322,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG,
const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14353,7 +14355,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14465,8 +14467,11 @@
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+unsigned Opcode = N->getOpcode();
+if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
-DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32,
N->getFlags());
+DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -14940,44 +14945,120 @@ SDValue
SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
-//y is not, and (add y, z) is used only once.
-// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
-//z is not, and (add y, z) is used only once.
-// The goal is to move constant offsets to the outermost ptradd, to create
-// more opportunities to fold offsets into memory instructions.
-// Together with the generic combines in DAGCombiner.cpp, this also
-// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
-//
-// This transform is here instead of in the general DAGCombiner as it can
-// turn in-bounds pointer arithmetic out-of-bounds, which is problematic
for
-// AArch64's CPA.
-SDValue X = N0;
-SDValue Y = N1.getOperand(0);
-SDValue Z = N1.getOperand(1);
-bool N1OneUse = N1.hasOneUse();
-bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
-bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
-if ((ZIsConstant != YIsConstant) && N1OneUse) {
- SDNodeFlags Flags;
- // If both additions in the original were NUW, the new ones are as well.
- if (N->getFlags().hasNoUnsignedWrap() &&
- N1->getFlags().hasNoUnsignedWrap())
-
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
ritter-x2a wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/143673?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#143673** https://app.graphite.dev/github/pr/llvm/llvm-project/143673?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/143673?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#143672** https://app.graphite.dev/github/pr/llvm/llvm-project/143672?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142778** https://app.graphite.dev/github/pr/llvm/llvm-project/142778?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142777** https://app.graphite.dev/github/pr/llvm/llvm-project/142777?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142739** https://app.graphite.dev/github/pr/llvm/llvm-project/142739?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142738** https://app.graphite.dev/github/pr/llvm/llvm-project/142738?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#141725** https://app.graphite.dev/github/pr/llvm/llvm-project/141725?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/143673 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
