[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
petar-avramovic wrote: https://github.com/llvm/llvm-project/pull/145887 https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic closed https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -stop-after=amdgpu-regbanklegalize -regbankselect-fast -o - %s | FileCheck %s arsenm wrote: I guess, how much longer until that happens? https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -205,7 +207,14 @@ class AMDGPURegBankLegalizeCombiner {
bool tryEliminateReadAnyLane(MachineInstr &Copy) {
Register Dst = Copy.getOperand(0).getReg();
Register Src = Copy.getOperand(1).getReg();
-if (!Src.isVirtual())
+
+// Skip non-vgpr Dst
+if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
+: !TRI.isVGPR(MRI, Dst))
+ return false;
+
+// Skip physical source registers and source registers with register class
arsenm wrote:
This shouldn't happen?
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+using ReadLaneFnTy =
+function_ref;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
arsenm wrote:
Make the function a template argument?
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142790
>From d5bdc951f61533379fed9a86ed6c0eab18b7893c Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:43:04 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
.../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 53 +++-
.../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 2 +
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 17 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 239 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 1 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 22 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 6 +-
.../AMDGPU/GlobalISel/buffer-schedule.ll | 2 +-
.../llvm.amdgcn.make.buffer.rsrc.ll | 2 +-
.../regbankselect-amdgcn.raw.buffer.load.ll | 59 ++---
...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 59 ++---
...regbankselect-amdgcn.struct.buffer.load.ll | 59 ++---
...ankselect-amdgcn.struct.ptr.buffer.load.ll | 59 ++---
.../llvm.amdgcn.buffer.load-last-use.ll | 2 +-
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 48 ++--
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 48 ++--
.../CodeGen/AMDGPU/swizzle.bit.extract.ll | 4 +-
19 files changed, 523 insertions(+), 243 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..d8be3aee1f410 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+using ReadLaneFnTy =
+function_ref;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return bu
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142790
>From d5bdc951f61533379fed9a86ed6c0eab18b7893c Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:43:04 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
.../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 53 +++-
.../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 2 +
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 17 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 239 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 1 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 22 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 6 +-
.../AMDGPU/GlobalISel/buffer-schedule.ll | 2 +-
.../llvm.amdgcn.make.buffer.rsrc.ll | 2 +-
.../regbankselect-amdgcn.raw.buffer.load.ll | 59 ++---
...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 59 ++---
...regbankselect-amdgcn.struct.buffer.load.ll | 59 ++---
...ankselect-amdgcn.struct.ptr.buffer.load.ll | 59 ++---
.../llvm.amdgcn.buffer.load-last-use.ll | 2 +-
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 48 ++--
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 48 ++--
.../CodeGen/AMDGPU/swizzle.bit.extract.ll | 4 +-
19 files changed, 523 insertions(+), 243 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..d8be3aee1f410 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+using ReadLaneFnTy =
+function_ref;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return bu
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -57,6 +57,226 @@ void
RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
lower(MI, Mapping, WaterfallSgprs);
}
+bool RegBankLegalizeHelper::executeInWaterfallLoop(
+MachineIRBuilder &B, iterator_range Range,
+SmallSet &SGPROperandRegs) {
+ // Track use registers which have already been expanded with a readfirstlane
+ // sequence. This may have multiple uses if moving a sequence.
+ DenseMap WaterfalledRegMap;
+
+ MachineBasicBlock &MBB = B.getMBB();
+ MachineFunction &MF = B.getMF();
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
+ if (ST.isWave32()) {
petar-avramovic wrote:
Yes, changed to field
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -203,7 +205,14 @@ class AMDGPURegBankLegalizeCombiner {
bool tryEliminateReadAnyLane(MachineInstr &Copy) {
Register Dst = Copy.getOperand(0).getReg();
Register Src = Copy.getOperand(1).getReg();
-if (!Src.isVirtual())
+
+// Skip non-vgpr Dst
+if ((Dst.isVirtual() && MRI.getRegBankOrNull(Dst) != VgprRB) ||
+(Dst.isPhysical() && !TRI.isVGPR(MRI, Dst)))
Pierre-vh wrote:
```suggestion
if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB) :
!TRI.isVGPR(MRI, Dst))
```
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -57,6 +57,226 @@ void
RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
lower(MI, Mapping, WaterfallSgprs);
}
+bool RegBankLegalizeHelper::executeInWaterfallLoop(
+MachineIRBuilder &B, iterator_range Range,
+SmallSet &SGPROperandRegs) {
+ // Track use registers which have already been expanded with a readfirstlane
+ // sequence. This may have multiple uses if moving a sequence.
+ DenseMap WaterfalledRegMap;
+
+ MachineBasicBlock &MBB = B.getMBB();
+ MachineFunction &MF = B.getMF();
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
+ if (ST.isWave32()) {
Pierre-vh wrote:
So it can be a field, right?
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
petar-avramovic wrote: ping https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142790
>From ec14c19baccfeb87380bf99f728b213db3db05e2 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:43:04 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
.../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 53 +++-
.../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 2 +
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 17 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 239 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 1 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 22 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 6 +-
.../AMDGPU/GlobalISel/buffer-schedule.ll | 2 +-
.../llvm.amdgcn.make.buffer.rsrc.ll | 2 +-
.../regbankselect-amdgcn.raw.buffer.load.ll | 59 ++---
...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 59 ++---
...regbankselect-amdgcn.struct.buffer.load.ll | 59 ++---
...ankselect-amdgcn.struct.ptr.buffer.load.ll | 59 ++---
.../llvm.amdgcn.buffer.load-last-use.ll | 2 +-
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 48 ++--
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 48 ++--
.../CodeGen/AMDGPU/swizzle.bit.extract.ll | 4 +-
19 files changed, 523 insertions(+), 243 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..d8be3aee1f410 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+using ReadLaneFnTy =
+function_ref;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return bu
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142790
>From ec14c19baccfeb87380bf99f728b213db3db05e2 Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:43:04 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
.../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 53 +++-
.../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 2 +
.../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 17 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 239 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 1 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 22 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 6 +-
.../AMDGPU/GlobalISel/buffer-schedule.ll | 2 +-
.../llvm.amdgcn.make.buffer.rsrc.ll | 2 +-
.../regbankselect-amdgcn.raw.buffer.load.ll | 59 ++---
...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 59 ++---
...regbankselect-amdgcn.struct.buffer.load.ll | 59 ++---
...ankselect-amdgcn.struct.ptr.buffer.load.ll | 59 ++---
.../llvm.amdgcn.buffer.load-last-use.ll | 2 +-
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 48 ++--
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 48 ++--
.../CodeGen/AMDGPU/swizzle.bit.extract.ll | 4 +-
19 files changed, 523 insertions(+), 243 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..d8be3aee1f410 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+using ReadLaneFnTy =
+function_ref;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return bu
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -57,6 +57,226 @@ void
RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
lower(MI, Mapping, WaterfallSgprs);
}
+bool RegBankLegalizeHelper::executeInWaterfallLoop(
+MachineIRBuilder &B, iterator_range Range,
+SmallSet &SGPROperandRegs) {
+ // Track use registers which have already been expanded with a readfirstlane
+ // sequence. This may have multiple uses if moving a sequence.
+ DenseMap WaterfalledRegMap;
+
+ MachineBasicBlock &MBB = B.getMBB();
+ MachineFunction &MF = B.getMF();
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
+ if (ST.isWave32()) {
petar-avramovic wrote:
it is instantiated per ST, MRI pair, not per function
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -165,6 +165,8 @@ enum RegBankLLTMappingApplyID {
Sgpr32Trunc,
// Src only modifiers: waterfalls, extends
+ Sgpr32_W,
+ SgprV4S32_W,
petar-avramovic wrote:
Added one above, is it clear now?
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142790
>From ae9621601118004cc6b363be7fad70092e401cad Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:43:04 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
.../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 53 +++-
.../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 2 +
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 239 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 1 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 28 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 6 +-
.../AMDGPU/GlobalISel/buffer-schedule.ll | 2 +-
.../llvm.amdgcn.make.buffer.rsrc.ll | 2 +-
.../regbankselect-amdgcn.raw.buffer.load.ll | 59 ++---
...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 59 ++---
...regbankselect-amdgcn.struct.buffer.load.ll | 59 ++---
...ankselect-amdgcn.struct.ptr.buffer.load.ll | 59 ++---
.../llvm.amdgcn.buffer.load-last-use.ll | 2 +-
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 48 ++--
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 48 ++--
.../CodeGen/AMDGPU/swizzle.bit.extract.ll | 4 +-
18 files changed, 513 insertions(+), 242 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..d8be3aee1f410 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+using ReadLaneFnTy =
+function_ref;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return buildReadLane(
+ B, SgprDst, VgprSrc, RBI,
+ [](
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic updated
https://github.com/llvm/llvm-project/pull/142790
>From ae9621601118004cc6b363be7fad70092e401cad Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Thu, 5 Jun 2025 12:43:04 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
.../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 53 +++-
.../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 2 +
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 239 +-
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 1 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 28 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 6 +-
.../AMDGPU/GlobalISel/buffer-schedule.ll | 2 +-
.../llvm.amdgcn.make.buffer.rsrc.ll | 2 +-
.../regbankselect-amdgcn.raw.buffer.load.ll | 59 ++---
...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 59 ++---
...regbankselect-amdgcn.struct.buffer.load.ll | 59 ++---
...ankselect-amdgcn.struct.ptr.buffer.load.ll | 59 ++---
.../llvm.amdgcn.buffer.load-last-use.ll | 2 +-
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 48 ++--
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 48 ++--
.../CodeGen/AMDGPU/swizzle.bit.extract.ll | 4 +-
18 files changed, 513 insertions(+), 242 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..d8be3aee1f410 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,72 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+using ReadLaneFnTy =
+function_ref;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return buildReadLane(
+ B, SgprDst, VgprSrc, RBI,
+ [](
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -57,6 +57,226 @@ void
RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
lower(MI, Mapping, WaterfallSgprs);
}
+bool RegBankLegalizeHelper::executeInWaterfallLoop(
+MachineIRBuilder &B, iterator_range Range,
+SmallSet &SGPROperandRegs) {
+ // Track use registers which have already been expanded with a readfirstlane
+ // sequence. This may have multiple uses if moving a sequence.
+ DenseMap WaterfalledRegMap;
+
+ MachineBasicBlock &MBB = B.getMBB();
+ MachineFunction &MF = B.getMF();
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
+ if (ST.isWave32()) {
+MovExecOpc = AMDGPU::S_MOV_B32;
+MovExecTermOpc = AMDGPU::S_MOV_B32_term;
+XorTermOpc = AMDGPU::S_XOR_B32_term;
+AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
+ExecReg = AMDGPU::EXEC_LO;
+ } else {
+MovExecOpc = AMDGPU::S_MOV_B64;
+MovExecTermOpc = AMDGPU::S_MOV_B64_term;
+XorTermOpc = AMDGPU::S_XOR_B64_term;
+AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
+ExecReg = AMDGPU::EXEC;
+ }
+
+#ifndef NDEBUG
+ const int OrigRangeSize = std::distance(Range.begin(), Range.end());
+#endif
+
+ MachineRegisterInfo &MRI = *B.getMRI();
+ Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
+ Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
+
+ // Don't bother using generic instructions/registers for the exec mask.
+ B.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(InitSaveExecReg);
+
+ Register SavedExec = MRI.createVirtualRegister(WaveRC);
+
+ // To insert the loop we need to split the block. Move everything before
+ // this point to a new block, and insert a new empty block before this
+ // instruction.
+ MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *BodyBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *RestoreExecBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+ MF.insert(MBBI, LoopBB);
+ MF.insert(MBBI, BodyBB);
+ MF.insert(MBBI, RestoreExecBB);
+ MF.insert(MBBI, RemainderBB);
+
+ LoopBB->addSuccessor(BodyBB);
+ BodyBB->addSuccessor(RestoreExecBB);
+ BodyBB->addSuccessor(LoopBB);
+
+ // Move the rest of the block into a new block.
+ RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, Range.end(), MBB.end());
+
+ MBB.addSuccessor(LoopBB);
+ RestoreExecBB->addSuccessor(RemainderBB);
+
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
+ // +-MBB:+
+ // | ... |
+ // | %0 = G_INST_1 |
+ // | %Dst = MI %Vgpr |
+ // | %1 = G_INST_2 |
+ // | ... |
+ // +-+
+ // ->
+ // +-MBB---+
+ // | ... |
+ // | %0 = G_INST_1 |
+ // | %SaveExecReg = S_MOV_B32 $exec_lo |
+ // +|--+
+ // |
/--|
+ // VV
|
+ // +-LoopBB---+
|
+ // | %CurrentLaneReg:sgpr(s32) = READFIRSTLANE %Vgpr |
|
+ // | instead of executing for each lane, see if other lanes had |
|
+ // | same value for %Vgpr and execute for them also.|
|
+ // | %CondReg:vcc(s1) = G_ICMP eq %CurrentLaneReg, %Vgpr |
|
+ // | %CondRegLM:sreg_32 = ballot %CondReg // copy vcc to sreg32 lane mask |
|
+ // | %SavedExec = S_AND_SAVEEXEC_B32 %CondRegLM |
|
+ // | exec is active for lanes with the same "CurrentLane value" in Vgpr |
|
+ // +|-+
|
+ // V
|
+ // +-BodyBB+
|
+ // | %Dst = MI %CurrentLaneReg:sgpr(s32) |
|
+ // | executed only for active lanes and written to Dst |
|
+ // | $exec = S_XOR_B32 $exec, %SavedExec |
|
+ // | set active lanes to 0 in SavedExec, lanes that did not write to |
|
+ // | Dst yet, and set this as new exec (for READFIRSTLANE and ICMP) |
|
+ // | SI_WATERFALL_LOOP LoopBB
|-|
+ // +|--+
+ // V
+ // +-RestoreExecBB--+
+ // | $exec_lo = S_MOV_B32_term %SaveExecReg |
+ // +|---+
+ // V
+ // +-RemainderBB:--+
+ //
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -894,6 +1121,15 @@ void RegBankLegalizeHelper::applyMappingSrc(
}
break;
}
+// sgpr waterfall, scalars and vectors
+case Sgpr32_W:
+case SgprV4S32_W: {
+ assert(Ty == getTyFromID(MethodIDs[i]));
+ if (RB != SgprRB) {
+SgprWaterfallOperandRegs.insert(Reg);
+ }
Pierre-vh wrote:
```suggestion
if (RB != SgprRB)
SgprWaterfallOperandRegs.insert(Reg);
```
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -57,6 +57,226 @@ void
RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
lower(MI, Mapping, WaterfallSgprs);
}
+bool RegBankLegalizeHelper::executeInWaterfallLoop(
+MachineIRBuilder &B, iterator_range Range,
+SmallSet &SGPROperandRegs) {
+ // Track use registers which have already been expanded with a readfirstlane
+ // sequence. This may have multiple uses if moving a sequence.
+ DenseMap WaterfalledRegMap;
+
+ MachineBasicBlock &MBB = B.getMBB();
+ MachineFunction &MF = B.getMF();
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
+ if (ST.isWave32()) {
Pierre-vh wrote:
nit: I think those could go in the class directly so this isn't repeated
everytime no ?
The class is instantiated per function anyway
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -165,6 +165,8 @@ enum RegBankLLTMappingApplyID {
Sgpr32Trunc,
// Src only modifiers: waterfalls, extends
+ Sgpr32_W,
+ SgprV4S32_W,
Pierre-vh wrote:
Can you add a trailing comment or rename this ? The `_W` suffix is not
immediately clear to me
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -57,6 +57,226 @@ void
RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
lower(MI, Mapping, WaterfallSgprs);
}
+bool RegBankLegalizeHelper::executeInWaterfallLoop(
+MachineIRBuilder &B, iterator_range Range,
+SmallSet &SGPROperandRegs) {
+ // Track use registers which have already been expanded with a readfirstlane
+ // sequence. This may have multiple uses if moving a sequence.
+ DenseMap WaterfalledRegMap;
+
+ MachineBasicBlock &MBB = B.getMBB();
+ MachineFunction &MF = B.getMF();
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
+ unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
+ if (ST.isWave32()) {
+MovExecOpc = AMDGPU::S_MOV_B32;
+MovExecTermOpc = AMDGPU::S_MOV_B32_term;
+XorTermOpc = AMDGPU::S_XOR_B32_term;
+AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
+ExecReg = AMDGPU::EXEC_LO;
+ } else {
+MovExecOpc = AMDGPU::S_MOV_B64;
+MovExecTermOpc = AMDGPU::S_MOV_B64_term;
+XorTermOpc = AMDGPU::S_XOR_B64_term;
+AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
+ExecReg = AMDGPU::EXEC;
+ }
+
+#ifndef NDEBUG
+ const int OrigRangeSize = std::distance(Range.begin(), Range.end());
+#endif
+
+ MachineRegisterInfo &MRI = *B.getMRI();
+ Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
+ Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
+
+ // Don't bother using generic instructions/registers for the exec mask.
+ B.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(InitSaveExecReg);
+
+ Register SavedExec = MRI.createVirtualRegister(WaveRC);
+
+ // To insert the loop we need to split the block. Move everything before
+ // this point to a new block, and insert a new empty block before this
+ // instruction.
+ MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *BodyBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *RestoreExecBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+ MF.insert(MBBI, LoopBB);
+ MF.insert(MBBI, BodyBB);
+ MF.insert(MBBI, RestoreExecBB);
+ MF.insert(MBBI, RemainderBB);
+
+ LoopBB->addSuccessor(BodyBB);
+ BodyBB->addSuccessor(RestoreExecBB);
+ BodyBB->addSuccessor(LoopBB);
+
+ // Move the rest of the block into a new block.
+ RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, Range.end(), MBB.end());
+
+ MBB.addSuccessor(LoopBB);
+ RestoreExecBB->addSuccessor(RemainderBB);
+
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
+ // +-MBB:+
+ // | ... |
+ // | %0 = G_INST_1 |
+ // | %Dst = MI %Vgpr |
+ // | %1 = G_INST_2 |
+ // | ... |
+ // +-+
+ // ->
+ // +-MBB---+
+ // | ... |
+ // | %0 = G_INST_1 |
+ // | %SaveExecReg = S_MOV_B32 $exec_lo |
+ // +|--+
+ // |
/--|
+ // VV
|
+ // +-LoopBB---+
|
+ // | %CurrentLaneReg:sgpr(s32) = READFIRSTLANE %Vgpr |
|
+ // | instead of executing for each lane, see if other lanes had |
|
+ // | same value for %Vgpr and execute for them also.|
|
+ // | %CondReg:vcc(s1) = G_ICMP eq %CurrentLaneReg, %Vgpr |
|
+ // | %CondRegLM:sreg_32 = ballot %CondReg // copy vcc to sreg32 lane mask |
|
+ // | %SavedExec = S_AND_SAVEEXEC_B32 %CondRegLM |
|
+ // | exec is active for lanes with the same "CurrentLane value" in Vgpr |
|
+ // +|-+
|
+ // V
|
+ // +-BodyBB+
|
+ // | %Dst = MI %CurrentLaneReg:sgpr(s32) |
|
+ // | executed only for active lanes and written to Dst |
|
+ // | $exec = S_XOR_B32 $exec, %SavedExec |
|
+ // | set active lanes to 0 in SavedExec, lanes that did not write to |
|
+ // | Dst yet, and set this as new exec (for READFIRSTLANE and ICMP) |
|
+ // | SI_WATERFALL_LOOP LoopBB
|-|
+ // +|--+
+ // V
+ // +-RestoreExecBB--+
+ // | $exec_lo = S_MOV_B32_term %SaveExecReg |
+ // +|---+
+ // V
+ // +-RemainderBB:--+
+ //
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -stop-after=amdgpu-regbanklegalize -regbankselect-fast -o - %s | FileCheck %s Pierre-vh wrote: @arsenm Is it fine to move tests entirely to this new RBSelect, or should we keep coverage for both until the old RB is removed? https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -117,45 +117,73 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+typedef std::functionhttps://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
@@ -117,45 +117,73 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+typedef std::function
+ReadLaneFnTy;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return buildReadLane(
+ B, SgprDst, VgprSrc, RBI,
+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
+return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst},
{VgprSrc});
+ });
+}
+
+void AMDGPU::buildReadFirstLane(MachineIRBuilder &B, Register SgprDst,
+Register VgprSrc, const RegisterBankInfo &RBI)
{
+ return buildReadLane(
+ B, SgprDst, VgprSrc, RBI,
+ [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
+return B.buildIntrinsic(Intrinsic::amdgcn_readfirstlane, SgprDst)
Pierre-vh wrote:
Not for this PR, but we should really have an opcode for this too instead of
having one being an intrinsic and one being a generic opcode
https://github.com/llvm/llvm-project/pull/142790
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic created
https://github.com/llvm/llvm-project/pull/142790
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
>From 6dd26d44b55420f91a1684e78938ea8b426680cc Mon Sep 17 00:00:00 2001
From: Petar Avramovic
Date: Wed, 4 Jun 2025 17:12:41 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
.../Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 54 +++-
.../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 2 +
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp| 240 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 28 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 2 +
.../AMDGPU/GlobalISel/buffer-schedule.ll | 2 +-
.../llvm.amdgcn.make.buffer.rsrc.ll | 2 +-
.../regbankselect-amdgcn.raw.buffer.load.ll | 59 ++---
...egbankselect-amdgcn.raw.ptr.buffer.load.ll | 59 ++---
...regbankselect-amdgcn.struct.buffer.load.ll | 59 ++---
...ankselect-amdgcn.struct.ptr.buffer.load.ll | 59 ++---
.../llvm.amdgcn.buffer.load-last-use.ll | 2 +-
.../llvm.amdgcn.raw.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.raw.ptr.atomic.buffer.load.ll | 42 +--
.../llvm.amdgcn.struct.atomic.buffer.load.ll | 48 ++--
...vm.amdgcn.struct.ptr.atomic.buffer.load.ll | 48 ++--
.../CodeGen/AMDGPU/swizzle.bit.extract.ll | 4 +-
17 files changed, 512 insertions(+), 240 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..b3edb959e14c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,73 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+typedef std::function
+ReadLaneFnTy;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildReadLane);
B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0);
}
+
+void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI) {
+ return bu
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
https://github.com/petar-avramovic ready_for_review https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
llvmbot wrote:
@llvm/pr-subscribers-llvm-globalisel
Author: Petar Avramovic (petar-avramovic)
Changes
Add rules for G_AMDGPU_BUFFER_LOAD and implement waterfall lowering
for divergent operands that must be sgpr.
---
Patch is 89.10 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/142790.diff
17 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp (+41-13)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+238-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+18-10)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h (+2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll (+1-1)
- (modified)
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll (+1-1)
- (modified)
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
(+28-31)
- (modified)
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll
(+28-31)
- (modified)
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
(+28-31)
- (modified)
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll
(+28-31)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll
(+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
(+22-20)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
(+22-20)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
(+25-23)
- (modified)
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll (+25-23)
- (modified) llvm/test/CodeGen/AMDGPU/swizzle.bit.extract.ll (+2-2)
``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 00979f44f9d34..b3edb959e14c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -117,45 +117,73 @@ static LLT getReadAnyLaneSplitTy(LLT Ty) {
return LLT::scalar(32);
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI);
+typedef std::function
+ReadLaneFnTy;
+
+static Register buildReadLane(MachineIRBuilder &, Register,
+ const RegisterBankInfo &, ReadLaneFnTy);
static void unmergeReadAnyLane(MachineIRBuilder &B,
SmallVectorImpl &SgprDstParts,
LLT UnmergeTy, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID);
auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc);
for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
-SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI));
+SgprDstParts.push_back(buildReadLane(B, Unmerge.getReg(i), RBI, BuildRL));
}
}
-static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc,
- const RegisterBankInfo &RBI) {
+static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
+ const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildRL) {
LLT Ty = B.getMRI()->getType(VgprSrc);
const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID);
if (Ty.getSizeInBits() == 32) {
-return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}},
{VgprSrc})
-.getReg(0);
+Register SgprDst = B.getMRI()->createVirtualRegister({SgprRB, Ty});
+return BuildRL(B, SgprDst, VgprSrc).getReg(0);
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
+ BuildRL);
return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0);
}
-void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
- Register VgprSrc, const RegisterBankInfo &RBI) {
+static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
+ Register VgprSrc, const RegisterBankInfo &RBI,
+ ReadLaneFnTy BuildReadLane) {
LLT Ty = B.getMRI()->getType(VgprSrc);
if (Ty.getSizeInBits() == 32) {
-B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc});
+BuildReadLane(B, SgprDst, VgprSrc);
return;
}
SmallVector SgprDstParts;
- unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI);
+ unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSr
[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Add waterfall lowering in regbanklegalize (PR #142790)
petar-avramovic wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/142790?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#142790** https://app.graphite.dev/github/pr/llvm/llvm-project/142790?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/142790?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#142789** https://app.graphite.dev/github/pr/llvm/llvm-project/142789?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142788** https://app.graphite.dev/github/pr/llvm/llvm-project/142788?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/142790 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
