[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
petar-avramovic wrote: https://github.com/llvm/llvm-project/pull/145886 https://github.com/llvm/llvm-project/pull/142789 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic closed https://github.com/llvm/llvm-project/pull/142789 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142789
>From fada12c02954dd1c244c944fa37dbae674284923 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
regbanklegalize
---
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 124 +++---
.../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +---
.../GlobalISel/readanylane-combines.mir | 78 +++
3 files changed, 127 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b38dacfe9958d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,111 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
+ return {nullptr, -1};
+
+Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+ return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+ MRI.replaceRegWith(Dst, Src);
+else
+ B.buildCopy(Dst, Src);
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+ RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+ // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+ // Dst = Copy Src $Dst = Copy Src
+ // -> ->
+ // Dst = RALSrc $Dst = Copy RALSrc
+ replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+ // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
+ // Src = G_BITCAST RALDst Src = G_BITCAST RALDst
+ // Dst = Copy Src Dst = Copy Src
+ // -> ->
+ // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
+ // Dst = NewVgpr$Dst = Copy NewVgpr
+ auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+ replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+ return;
+
Reg
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142789
>From fada12c02954dd1c244c944fa37dbae674284923 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
regbanklegalize
---
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 124 +++---
.../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +---
.../GlobalISel/readanylane-combines.mir | 78 +++
3 files changed, 127 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b38dacfe9958d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,111 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
+ return {nullptr, -1};
+
+Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+ return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+ MRI.replaceRegWith(Dst, Src);
+else
+ B.buildCopy(Dst, Src);
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+ RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+ // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+ // Dst = Copy Src $Dst = Copy Src
+ // -> ->
+ // Dst = RALSrc $Dst = Copy RALSrc
+ replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+ // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
+ // Src = G_BITCAST RALDst Src = G_BITCAST RALDst
+ // Dst = Copy Src Dst = Copy Src
+ // -> ->
+ // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
+ // Dst = NewVgpr$Dst = Copy NewVgpr
+ auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+ replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+ return;
+
Reg
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
Pierre-vh wrote:
use early return here?
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/Pierre-vh approved this pull request. https://github.com/llvm/llvm-project/pull/142789 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142789
>From 28f0f171b27aaf707706db71978d525c12e21491 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
regbanklegalize
---
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 122 +++---
.../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +---
.../GlobalISel/readanylane-combines.mir | 78 +++
3 files changed, 125 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+ MRI.replaceRegWith(Dst, Src);
+else
+ B.buildCopy(Dst, Src);
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+ RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+ // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+ // Dst = Copy Src $Dst = Copy Src
+ // -> ->
+ // Dst = RALSrc $Dst = Copy RALSrc
+ replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+ // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
+ // Src = G_BITCAST RALDst Src = G_BITCAST RALDst
+ // Dst = Copy Src Dst = Copy Src
+ // -> ->
+ // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
+ // Dst = NewVgpr$Dst = Copy NewVgpr
+ auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+ replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+ return;
+
Register Dst = MI.get
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142789
>From 28f0f171b27aaf707706db71978d525c12e21491 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
regbanklegalize
---
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 122 +++---
.../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +---
.../GlobalISel/readanylane-combines.mir | 78 +++
3 files changed, 125 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+ MRI.replaceRegWith(Dst, Src);
+else
+ B.buildCopy(Dst, Src);
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+ RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+ // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+ // Dst = Copy Src $Dst = Copy Src
+ // -> ->
+ // Dst = RALSrc $Dst = Copy RALSrc
+ replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+ // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
+ // Src = G_BITCAST RALDst Src = G_BITCAST RALDst
+ // Dst = Copy Src Dst = Copy Src
+ // -> ->
+ // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
+ // Dst = NewVgpr$Dst = Copy NewVgpr
+ auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+ replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+ return;
+
Register Dst = MI.get
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
petar-avramovic wrote:
Not sure, did not see any cases yet
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+if (Dst.isVirtual()) {
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
petar-avramovic wrote:
No, have to set it manually before using the builder, it was a bug.
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142789
>From 64d7853a9edefabe8de40748e01348d2d5c017c5 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
regbanklegalize
---
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 122 +++---
.../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +---
.../GlobalISel/readanylane-combines.mir | 78 +++
3 files changed, 125 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+ MRI.replaceRegWith(Dst, Src);
+else
+ B.buildCopy(Dst, Src);
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+ RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+ // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+ // Dst = Copy Src $Dst = Copy Src
+ // -> ->
+ // Dst = RALSrc $Dst = Copy RALSrc
+ replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+ // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
+ // Src = G_BITCAST RALDst Src = G_BITCAST RALDst
+ // Dst = Copy Src Dst = Copy Src
+ // -> ->
+ // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
+ // Dst = NewVgpr$Dst = Copy NewVgpr
+ auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+ replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+ return;
+
Register Dst = MI.get
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142789
>From 64d7853a9edefabe8de40748e01348d2d5c017c5 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
regbanklegalize
---
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 122 +++---
.../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +---
.../GlobalISel/readanylane-combines.mir | 78 +++
3 files changed, 125 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+ MRI.replaceRegWith(Dst, Src);
+else
+ B.buildCopy(Dst, Src);
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+ RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+ // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+ // Dst = Copy Src $Dst = Copy Src
+ // -> ->
+ // Dst = RALSrc $Dst = Copy RALSrc
+ replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+ // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
+ // Src = G_BITCAST RALDst Src = G_BITCAST RALDst
+ // Dst = Copy Src Dst = Copy Src
+ // -> ->
+ // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
+ // Dst = NewVgpr$Dst = Copy NewVgpr
+ auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+ replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+ return;
+
Register Dst = MI.get
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+if (Dst.isVirtual()) {
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
Pierre-vh wrote:
Does this work as intended without the `B.setInstr(Copy)` call?
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+if (Dst.isVirtual()) {
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
Pierre-vh wrote:
Just wondering, can we just emit a COPY instead and let another combine take
care of the folding?
The two branches are very similar, it'd be nice to make this more terse. Maybe
we could use a helper like `copyOrReplace` for `Dst` that does the right thing
depending on whether `Dst` is virtual or not?
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
Pierre-vh wrote:
```suggestion
if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
```
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
Pierre-vh wrote:
```suggestion
if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
RALDst = SrcMI.getOperand(1).getReg();
```
nit: can we have other opcodes than bitcast and that'd matter, like inreg
extensions, assert exts ?
It feels like we should have a helper for this somewhere
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
Pierre-vh wrote:
```suggestion
std::pair tryMatchRALFromUnmerge(Register Src) {
```
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
Pierre-vh wrote:
```suggestion
MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
```
I think we generally use `auto` only if the type is already in the RHS
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Petar Avramovic (petar-avramovic)
Changes
---
Patch is 22.08 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/142789.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (+118-18)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
(+2-23)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
(+19-59)
``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b5fe0ed499255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+if (Dst.isVirtual()) {
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+MRI.replaceRegWith(Dst, Bitcast.getReg(0));
+ }
+} else {
+ B.setInstr(Copy);
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// $Dst = Copy Src
+// ->
+// $Dst = Copy RALSrc
+B.buildCopy(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// $Dst = Copy NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+B.buildCopy(Dst, Bitcast.getReg(0));
+ }
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(MachineI
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
llvmbot wrote:
@llvm/pr-subscribers-llvm-globalisel
Author: Petar Avramovic (petar-avramovic)
Changes
---
Patch is 22.08 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/142789.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (+118-18)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
(+2-23)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
(+19-59)
``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b5fe0ed499255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+if (Dst.isVirtual()) {
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+MRI.replaceRegWith(Dst, Bitcast.getReg(0));
+ }
+} else {
+ B.setInstr(Copy);
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// $Dst = Copy Src
+// ->
+// $Dst = Copy RALSrc
+B.buildCopy(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// $Dst = Copy NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+B.buildCopy(Dst, Bitcast.getReg(0));
+ }
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+ }
+
void tryCombineCopy(Machine
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic ready_for_review https://github.com/llvm/llvm-project/pull/142789 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
petar-avramovic wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/142789?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#142790** https://app.graphite.dev/github/pr/llvm/llvm-project/142790?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142789** https://app.graphite.dev/github/pr/llvm/llvm-project/142789?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/142789?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#142788** https://app.graphite.dev/github/pr/llvm/llvm-project/142788?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/142789 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)
https://github.com/petar-avramovic created
https://github.com/llvm/llvm-project/pull/142789
None
>From fcd0dc75f4674297ef1f5c591ecf6c16314ce3e2 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Wed, 4 Jun 2025 17:12:16 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
regbanklegalize
---
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 136 +++---
.../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +---
.../GlobalISel/readanylane-combines.mir | 78 +++---
3 files changed, 139 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b5fe0ed499255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
+ std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+ Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+ auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+ if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+ }
+
+ Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+ unsigned NumElts = Merge->getNumSources();
+ auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+ if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+ // check if all elements are from same unmerge and there is no shuffling
+ for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+ return {};
+ }
+ return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+ int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+ auto *Merge = getOpcodeDef(UnMerge->getSourceReg(),
MRI);
+ if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+ return RALSrc;
+ }
+}
+
+return {};
+ }
+
+ bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+ return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+ RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+ return false;
+
+if (Dst.isVirtual()) {
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+MRI.replaceRegWith(Dst, Bitcast.getReg(0));
+ }
+} else {
+ B.setInstr(Copy);
+ if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// $Dst = Copy Src
+// ->
+// $Dst = Copy RALSrc
+B.buildCopy(Dst, RALSrc);
+ } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// $Dst = Copy NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+B.buildCopy(Dst, Bitcast.getReg(0));
+ }
+}
+
+eraseInstr(Copy, MRI, nullptr);
