https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/197579
>From f6e09bb2f2cf4cb367b124f46b18ef7321069648 Mon Sep 17 00:00:00 2001 From: Lucas Ramirez <[email protected]> Date: Sun, 26 Apr 2026 11:52:17 +0000 Subject: [PATCH 1/2] [CodeGen][AMDGPU] Prepare rematerializer for subreg remat support (NFC) This makes some NFCs to the rematerializer before starting to improve support for sub-register rematerialization. The main changes are the replacement of `Rematerializer::Reg::Dependency` type (essentially a pair of a machine operand index and a register index) in favor of a simple register index, dropping the machine operand index. The latter has no current uses and will lose meaning once we allow rematerializable registers to be defined by multiple MIs. Similarly, and for the same rationale, unrematerializable register dependencies are now tracked as a register/lanemask pair instead of a machine operand index. Other minor changes listed below. - Removal of `DefRegion` argument to `Rematerializer::recreteReg`. Registers are always re-created in their original region so there is no need to set their region again. - Removal of `InsertPos` unused argument to `Rematerializer::postRematerialization`. - Refactor of how AMDGPU's scheduler checks whether a given register is rematerializable. This will make the future functional change for subreg support minimal. --- llvm/include/llvm/CodeGen/Rematerializer.h | 59 +++--- llvm/lib/CodeGen/Rematerializer.cpp | 180 ++++++++++-------- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 35 ++-- llvm/unittests/CodeGen/RematerializerTest.cpp | 26 ++- 4 files changed, 171 insertions(+), 129 deletions(-) diff --git a/llvm/include/llvm/CodeGen/Rematerializer.h b/llvm/include/llvm/CodeGen/Rematerializer.h index f441433ac6ceb..366d5e1fd3a47 100644 --- a/llvm/include/llvm/CodeGen/Rematerializer.h +++ b/llvm/include/llvm/CodeGen/Rematerializer.h @@ -98,10 +98,11 @@ class Rematerializer { /// A rematerializable register defined by a single machine instruction. /// /// A rematerializable register has a set of dependencies, which correspond - /// to the unique read register operands of its defining instruction. - /// They are identified by their machine operand index, and can themselves be - /// rematerializable. Operand indices corresponding to unrematerializable - /// dependencies are managed by and queried from the rematerializer. + /// to the unique read register operands of its defining instruction and which + /// can themselves be rematerializable. Operand indices corresponding to + /// unrematerializable dependencies are managed by and queried from the + /// rematerializer, whereas rematerializable ones are part of this struct and + /// identified through their register index. /// /// A rematerializable register also has an arbitrary number of users in an /// arbitrary number of regions, potentially including its own defining @@ -119,21 +120,9 @@ class Rematerializer { using RegionUsers = SmallDenseSet<MachineInstr *, 4>; /// Uses of the register, mapped by region. SmallDenseMap<unsigned, RegionUsers, 2> Uses; - - /// A read register operand of \p DefMI that is rematerializable (according - /// to the rematerializer). - struct Dependency { - /// The register's machine operand index in \p DefMI. - unsigned MOIdx; - /// The corresponding register's index in the rematerializer. - RegisterIdx RegIdx; - - Dependency(unsigned MOIdx, RegisterIdx RegIdx) - : MOIdx(MOIdx), RegIdx(RegIdx) {} - }; /// This register's rematerializable dependencies, one per unique /// rematerializable register operand. - SmallVector<Dependency, 2> Dependencies; + SmallVector<RegisterIdx, 2> Dependencies; /// Returns the rematerializable register from its defining instruction. Register getDefReg() const { @@ -250,12 +239,12 @@ class Rematerializer { /// register. bool isRematerializedRegister(RegisterIdx RegIdx) const { assert(RegIdx < Regs.size() && "out of bounds"); - return RegIdx >= UnrematableOprds.size(); + return RegIdx >= UnrematableDeps.size(); } /// Returns the origin index of rematerializable register \p RegIdx. RegisterIdx getOriginOf(RegisterIdx RematRegIdx) const { assert(isRematerializedRegister(RematRegIdx) && "not a rematerialization"); - return Origins[RematRegIdx - UnrematableOprds.size()]; + return Origins[RematRegIdx - UnrematableDeps.size()]; } /// If \p RegIdx is a rematerialization, returns its origin's index. If it is /// an original register's index, returns the same index. @@ -264,10 +253,11 @@ class Rematerializer { return getOriginOf(RegIdx); return RegIdx; } - /// Returns operand indices corresponding to unrematerializable operands for - /// any register \p RegIdx. - ArrayRef<unsigned> getUnrematableOprds(RegisterIdx RegIdx) const { - return UnrematableOprds[getOriginOrSelf(RegIdx)]; + /// Returns unreamaterializable read lanes of register operands for + /// register \p RegIdx. + ArrayRef<std::pair<Register, LaneBitmask>> + getUnrematableDeps(RegisterIdx RegIdx) const { + return UnrematableDeps[getOriginOrSelf(RegIdx)]; } /// If \p MI's first operand defines a register and that register is a @@ -363,7 +353,7 @@ class Rematerializer { /// be guaranteed to have enough space for an extra element. RegisterIdx rematerializeReg(RegisterIdx RegIdx, unsigned UseRegion, MachineBasicBlock::iterator InsertPos, - SmallVectorImpl<Reg::Dependency> &&Dependencies); + SmallVectorImpl<RegisterIdx> &&Dependencies); /// Re-creates a previously deleted register \p RegIdx before \p InsertPos, /// which must be in the register's original defining region. \p DefReg must @@ -401,6 +391,12 @@ class Rematerializer { /// Uses according to its current live interval. bool isMOIdenticalAtUses(MachineOperand &MO, ArrayRef<SlotIndex> Uses) const; + /// Determines whether lanes \p Mask of register \p Reg habe the same value at + /// all \p Uses as at \p RefSlot. This implies that it is also available at + /// all \p Uses according to its current live interval. + bool isRegIdenticalAtUses(Register Reg, LaneBitmask Mask, SlotIndex RefSlot, + ArrayRef<SlotIndex> Uses) const; + /// Finds the closest rematerialization of register \p RegIdx in region \p /// Region that exists before slot \p Before. If no such rematerialization /// exists, returns \ref Rematerializer::NoReg. @@ -437,11 +433,11 @@ class Rematerializer { /// deleted. Indices inside this vector serve as handles for rematerializable /// registers. SmallVector<Reg> Regs; - /// For each original register, stores indices of its read register operands - /// which are unrematerializable. This doesn't change after the initial - /// collection period, so the size of the vector indicates the number of - /// original registers. - SmallVector<SmallVector<unsigned, 2>> UnrematableOprds; + /// For each original register, stores unrematerializable read lanes of + /// register operands. This doesn't change after the initial collection + /// period, so the size of the vector indicates the number of original + /// registers. + SmallVector<SmallVector<std::pair<Register, LaneBitmask>, 2>> UnrematableDeps; /// Indicates the original register index of each rematerialization, in the /// order in which they are created. The size of the vector indicates the /// total number of rematerializations ever created, including those that were @@ -463,9 +459,8 @@ class Rematerializer { DenseSet<RegisterIdx> LISUpdates; /// Common post-processing step after creating a new register \p RematRegIdx - /// at \p InsertPos based on register \p ModelRegIdx. - void postRematerialization(RegisterIdx ModelRegIdx, RegisterIdx RematRegIdx, - MachineBasicBlock::iterator InsertPos); + /// based on register \p ModelRegIdx. + void postRematerialization(RegisterIdx ModelRegIdx, RegisterIdx RematRegIdx); /// During the analysis phase, creates a \ref Rematerializer::Reg object for /// virtual register \p VirtRegIdx if it is rematerializable. \p MIRegion maps diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp index 26e468cd00475..a77610ca20472 100644 --- a/llvm/lib/CodeGen/Rematerializer.cpp +++ b/llvm/lib/CodeGen/Rematerializer.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/Rematerializer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineOperand.h" @@ -93,21 +92,21 @@ Rematerializer::rematerializeToPos(RegisterIdx RootIdx, unsigned UseRegion, assert(!DRI.DependencyMap.contains(RootIdx)); LLVM_DEBUG(dbgs() << "Rematerializing " << printID(RootIdx) << '\n'); - SmallVector<Reg::Dependency, 2> NewDeps; + SmallVector<RegisterIdx, 2> NewDeps; // Copy all dependencies because recursive rematerialization of dependencies // may invalidate references to the backing vector of registers. - SmallVector<Reg::Dependency, 2> OldDeps(getReg(RootIdx).Dependencies); - for (const Reg::Dependency &Dep : OldDeps) { + SmallVector<RegisterIdx, 2> OldDeps(getReg(RootIdx).Dependencies); + for (RegisterIdx DepRegIdx : OldDeps) { // Recursively rematerialize required dependencies at the same position as // the root. Registers form a DAG so the recursion is guaranteed to // terminate. - auto RematIdx = DRI.DependencyMap.find(Dep.RegIdx); + auto RematIdx = DRI.DependencyMap.find(DepRegIdx); RegisterIdx NewDepRegIdx; if (RematIdx == DRI.DependencyMap.end()) - NewDepRegIdx = rematerializeToPos(Dep.RegIdx, UseRegion, InsertPos, DRI); + NewDepRegIdx = rematerializeToPos(DepRegIdx, UseRegion, InsertPos, DRI); else NewDepRegIdx = RematIdx->second; - NewDeps.emplace_back(Dep.MOIdx, NewDepRegIdx); + NewDeps.push_back(NewDepRegIdx); } RegisterIdx NewIdx = rematerializeReg(RootIdx, UseRegion, InsertPos, std::move(NewDeps)); @@ -170,9 +169,9 @@ void Rematerializer::transferUserImpl(RegisterIdx FromRegIdx, // new register. if (RegisterIdx UserRegIdx = getDefRegIdx(UserMI); UserRegIdx != NoReg) { // Look for the user's dependency that matches the register. - for (Reg::Dependency &Dep : Regs[UserRegIdx].Dependencies) { - if (Dep.RegIdx == FromRegIdx) { - Dep.RegIdx = ToRegIdx; + for (RegisterIdx &DepRegIdx : Regs[UserRegIdx].Dependencies) { + if (DepRegIdx == FromRegIdx) { + DepRegIdx = ToRegIdx; return; } } @@ -201,8 +200,7 @@ void Rematerializer::updateLiveIntervals() { }); // Update intervals for unrematerializable operands. - for (unsigned MOIdx : getUnrematableOprds(RegIdx)) { - Register UnrematReg = UpdateReg.DefMI->getOperand(MOIdx).getReg(); + for (const auto &[UnrematReg, Mask] : getUnrematableDeps(RegIdx)) { if (!SeenUnrematRegs.insert(UnrematReg).second) continue; LIS.removeInterval(UnrematReg); @@ -220,11 +218,9 @@ void Rematerializer::updateLiveIntervals() { LIS.splitSeparateComponents(LI, SplitLIs); } LLVM_DEBUG( - dbgs() << " Re-computed interval for register " - << printReg(UnrematReg, &TRI, - UpdateReg.DefMI->getOperand(MOIdx).getSubReg(), - &MRI) - << '\n'); + dbgs() << " Re-computed interval for unrematerializable register " + << printReg(UnrematReg, &TRI, 0, &MRI) << " with lanemask " + << Mask << '\n'); } } LISUpdates.clear(); @@ -232,15 +228,21 @@ void Rematerializer::updateLiveIntervals() { bool Rematerializer::isMOIdenticalAtUses(MachineOperand &MO, ArrayRef<SlotIndex> Uses) const { - if (Uses.empty()) - return true; - Register Reg = MO.getReg(); unsigned SubIdx = MO.getSubReg(); LaneBitmask Mask = SubIdx ? TRI.getSubRegIndexLaneMask(SubIdx) - : MRI.getMaxLaneMaskForVReg(Reg); + : MRI.getMaxLaneMaskForVReg(MO.getReg()); + return isRegIdenticalAtUses( + MO.getReg(), Mask, + LIS.getInstructionIndex(*MO.getParent()).getRegSlot(true), Uses); +} + +bool Rematerializer::isRegIdenticalAtUses(Register Reg, LaneBitmask Mask, + SlotIndex RefSlot, + ArrayRef<SlotIndex> Uses) const { + if (Uses.empty()) + return true; const LiveInterval &LI = LIS.getInterval(Reg); - const VNInfo *DefVN = - LI.getVNInfoAt(LIS.getInstructionIndex(*MO.getParent()).getRegSlot(true)); + const VNInfo *DefVN = LI.getVNInfoAt(RefSlot); for (SlotIndex Use : Uses) { if (!isIdenticalAtUse(*DefVN, Mask, Use, LI)) return false; @@ -284,13 +286,13 @@ void Rematerializer::deleteRegIfUnused(RegisterIdx RootIdx) { do { // A deleted register's dependencies may be deletable too. const Reg &DeleteReg = getReg(DepDAG.pop_back_val()); - for (const Reg::Dependency &Dep : DeleteReg.Dependencies) { + for (RegisterIdx DepRegIdx : DeleteReg.Dependencies) { // All dependencies lose a user (the deleted register). - Reg &DepReg = Regs[Dep.RegIdx]; + Reg &DepReg = Regs[DepRegIdx]; DepReg.eraseUser(DeleteReg.DefMI, DeleteReg.DefRegion); if (DepReg.Uses.empty()) { - DeleteOrder.push_back(Dep.RegIdx); - DepDAG.push_back(Dep.RegIdx); + DeleteOrder.push_back(DepRegIdx); + DepDAG.push_back(DepRegIdx); } } } while (!DepDAG.empty()); @@ -359,7 +361,7 @@ Rematerializer::Rematerializer(MachineFunction &MF, bool Rematerializer::analyze() { Regs.clear(); - UnrematableOprds.clear(); + UnrematableDeps.clear(); Origins.clear(); Rematerializations.clear(); RegionMBB.clear(); @@ -398,7 +400,7 @@ bool Rematerializer::analyze() { if (!SeenRegs[I]) addRegIfRematerializable(I, MIRegion, SeenRegs); } - assert(Regs.size() == UnrematableOprds.size()); + assert(Regs.size() == UnrematableDeps.size()); LLVM_DEBUG({ for (RegisterIdx I = 0, E = getNumRegs(); I < E; ++I) @@ -443,27 +445,35 @@ void Rematerializer::addRegIfRematerializable( if (RematReg.Uses.empty()) return; - // Collect the candidate's dependencies. If the same register is used - // multiple times we just need to consider it once. - SmallDenseSet<Register, 4> AllDepRegs; - SmallVector<unsigned, 2> UnrematDeps; - for (const auto &[MOIdx, MO] : enumerate(RematReg.DefMI->operands())) { + // Collect the candidate's dependencies, rematerializable or not. If the same + // rematerializable register is used multiple times we just need to consider + // it once. + SmallSetVector<RegisterIdx, 2> RematDeps; + SmallMapVector<Register, LaneBitmask, 2> UnrematDeps; + for (const MachineOperand &MO : DefMI.all_uses()) { Register DepReg = getRegDependency(MO); - if (!DepReg || !AllDepRegs.insert(DepReg).second) + if (!DepReg) continue; unsigned DepRegIdx = DepReg.virtRegIndex(); if (!SeenRegs[DepRegIdx]) addRegIfRematerializable(DepRegIdx, MIRegion, SeenRegs); - if (auto DepIt = RegToIdx.find(DepReg); DepIt != RegToIdx.end()) - RematReg.Dependencies.push_back(Reg::Dependency(MOIdx, DepIt->second)); - else - UnrematDeps.push_back(MOIdx); + if (auto DepIt = RegToIdx.find(DepReg); DepIt != RegToIdx.end()) { + RematDeps.insert(DepIt->second); + } else { + LaneBitmask &CurrentMask = + UnrematDeps.try_emplace(DepReg, LaneBitmask::getNone()).first->second; + LaneBitmask Mask = MO.getSubReg() + ? TRI.getSubRegIndexLaneMask(MO.getSubReg()) + : MRI.getMaxLaneMaskForVReg(DepReg); + CurrentMask |= Mask; + } } // The register is rematerializable. + RematReg.Dependencies = RematDeps.takeVector(); RegToIdx.insert({DefReg, Regs.size()}); Regs.push_back(RematReg); - UnrematableOprds.push_back(UnrematDeps); + UnrematableDeps.push_back(UnrematDeps.takeVector()); } bool Rematerializer::isMIRematerializable(const MachineInstr &MI) const { @@ -497,10 +507,10 @@ RegisterIdx Rematerializer::getDefRegIdx(const MachineInstr &MI) const { return UserRegIt->second; } -RegisterIdx Rematerializer::rematerializeReg( - RegisterIdx RegIdx, unsigned UseRegion, - MachineBasicBlock::iterator InsertPos, - SmallVectorImpl<Reg::Dependency> &&Dependencies) { +RegisterIdx +Rematerializer::rematerializeReg(RegisterIdx RegIdx, unsigned UseRegion, + MachineBasicBlock::iterator InsertPos, + SmallVectorImpl<RegisterIdx> &&Dependencies) { RegisterIdx NewRegIdx = Regs.size(); Reg &NewReg = Regs.emplace_back(); @@ -523,7 +533,7 @@ RegisterIdx Rematerializer::rematerializeReg( *FromReg.DefMI); NewReg.DefMI = &*std::prev(InsertPos); RegToIdx.insert({NewDefReg, NewRegIdx}); - postRematerialization(RegIdx, NewRegIdx, InsertPos); + postRematerialization(RegIdx, NewRegIdx); noteRegCreated(NewRegIdx); LLVM_DEBUG(dbgs() << "** Rematerialized " << printID(RegIdx) << " as " @@ -536,7 +546,7 @@ void Rematerializer::recreateReg(RegisterIdx RegIdx, Register DefReg) { assert(RegToIdx.contains(DefReg) && "unknown defined register"); assert(RegToIdx.at(DefReg) == RegIdx && "incorrect defined register"); - assert(!getReg(RegIdx).DefMI && "register is still alive"); + assert(!getReg(RegIdx).isAlive() && "register is still alive"); Reg &OriginReg = Regs[RegIdx]; @@ -553,7 +563,7 @@ void Rematerializer::recreateReg(RegisterIdx RegIdx, assert(Rematerializations.contains(RegIdx) && "expected remats"); ModelRegIdx = *Rematerializations.at(RegIdx).begin(); } else { - assert(getReg(getOriginOf(RegIdx)).DefMI && "expected alive origin"); + assert(getReg(getOriginOf(RegIdx)).isAlive() && "expected alive origin"); ModelRegIdx = getOriginOf(RegIdx); } const MachineInstr &ModelDefMI = *getReg(ModelRegIdx).DefMI; @@ -561,14 +571,13 @@ void Rematerializer::recreateReg(RegisterIdx RegIdx, TII.reMaterialize(*RegionMBB[OriginReg.DefRegion], InsertPos, DefReg, 0, ModelDefMI); OriginReg.DefMI = &*std::prev(InsertPos); - postRematerialization(ModelRegIdx, RegIdx, InsertPos); + postRematerialization(ModelRegIdx, RegIdx); LLVM_DEBUG(dbgs() << "** Recreated " << printID(RegIdx) << " as " << printRematReg(RegIdx) << '\n'); } -void Rematerializer::postRematerialization( - RegisterIdx ModelRegIdx, RegisterIdx RematRegIdx, - MachineBasicBlock::iterator InsertPos) { +void Rematerializer::postRematerialization(RegisterIdx ModelRegIdx, + RegisterIdx RematRegIdx) { // The start of the new register's region may have changed. Reg &ModelReg = Regs[ModelRegIdx], &RematReg = Regs[RematRegIdx]; @@ -580,21 +589,19 @@ void Rematerializer::postRematerialization( // Replace dependencies as needed in the rematerialized MI. All dependencies // of the latter gain a new user. auto ZipedDeps = zip_equal(ModelReg.Dependencies, RematReg.Dependencies); - for (const auto &[OldDep, NewDep] : ZipedDeps) { - assert(OldDep.MOIdx == NewDep.MOIdx && "operand mismatch"); - LLVM_DEBUG(dbgs() << " Operand #" << OldDep.MOIdx << ": " - << printID(OldDep.RegIdx) << " -> " - << printID(NewDep.RegIdx) << '\n'); - - Reg &NewDepReg = Regs[NewDep.RegIdx]; - if (OldDep.RegIdx != NewDep.RegIdx) { - Register OldDefReg = ModelReg.DefMI->getOperand(OldDep.MOIdx).getReg(); - RematReg.DefMI->substituteRegister(OldDefReg, NewDepReg.getDefReg(), 0, - TRI); - LISUpdates.insert(OldDep.RegIdx); + for (const auto &[OldDepRegIdx, NewDepRegIdx] : ZipedDeps) { + LLVM_DEBUG(dbgs() << " Dependency: " << printID(OldDepRegIdx) << " -> " + << printID(NewDepRegIdx) << '\n'); + + Reg &NewDepReg = Regs[NewDepRegIdx]; + if (OldDepRegIdx != NewDepRegIdx) { + Reg &OldDepReg = Regs[OldDepRegIdx]; + RematReg.DefMI->substituteRegister(OldDepReg.getDefReg(), + NewDepReg.getDefReg(), 0, TRI); + LISUpdates.insert(OldDepRegIdx); } NewDepReg.addUser(RematReg.DefMI, RematReg.DefRegion); - LISUpdates.insert(NewDep.RegIdx); + LISUpdates.insert(NewDepRegIdx); } } @@ -651,8 +658,8 @@ Printable Rematerializer::printDependencyDAG(RegisterIdx RootIdx) const { [&](RegisterIdx RegIdx, unsigned Depth) -> void { unsigned MaxDepth = std::max(RegDepths.lookup_or(RegIdx, Depth), Depth); RegDepths.emplace_or_assign(RegIdx, MaxDepth); - for (const Reg::Dependency &Dep : getReg(RegIdx).Dependencies) - WalkTree(Dep.RegIdx, Depth + 1); + for (RegisterIdx DepRegIdx : getReg(RegIdx).Dependencies) + WalkTree(DepRegIdx, Depth + 1); }; WalkTree(RootIdx, 0); @@ -676,7 +683,7 @@ Printable Rematerializer::printID(RegisterIdx RegIdx) const { return Printable([&, RegIdx](raw_ostream &OS) { const Reg &PrintReg = getReg(RegIdx); OS << '(' << RegIdx << '/'; - if (!PrintReg.DefMI) { + if (!PrintReg.isAlive()) { OS << "<dead>"; } else { OS << printReg(PrintReg.getDefReg(), &TRI, @@ -690,21 +697,33 @@ Printable Rematerializer::printRematReg(RegisterIdx RegIdx, bool SkipRegions) const { return Printable([&, RegIdx, SkipRegions](raw_ostream &OS) { const Reg &PrintReg = getReg(RegIdx); + OS << printID(RegIdx); if (!SkipRegions) { - OS << printID(RegIdx) << " [" << PrintReg.DefRegion; + OS << " [" << PrintReg.DefRegion; if (!PrintReg.Uses.empty()) { - assert(PrintReg.DefMI && "dead register cannot have uses"); + assert(PrintReg.isAlive() && "dead register cannot have uses"); const LiveInterval &LI = LIS.getInterval(PrintReg.getDefReg()); // First display all regions in which the register is live-through and // not used. bool First = true; - for (const auto [I, Bounds] : enumerate(Regions)) { - if (Bounds.first == Bounds.second) + for (const auto &[I, Bounds] : enumerate(Regions)) { + if (PrintReg.Uses.contains(I)) continue; - if (!PrintReg.Uses.contains(I) && - LI.liveAt(LIS.getInstructionIndex(*Bounds.first)) && - LI.liveAt(LIS.getInstructionIndex(*std::prev(Bounds.second)) - .getRegSlot())) { + // The register must be live at the live-ins and live-outs of the + // region. + MachineBasicBlock::iterator LiveIn = + skipDebugInstructionsForward(Bounds.first, Bounds.second); + if (LiveIn == Bounds.second) { + // The region has no non-debug instructions, it's hard to assess + // whether the register is live across it without an index. + continue; + } + // LiveIn is inside the range and a non-debug instruction so we know + // this will also point to a non-debug instruction within the region. + MachineBasicBlock::iterator LiveOut = skipDebugInstructionsBackward( + std::prev(Bounds.second), Bounds.first); + if (LI.liveAt(LIS.getInstructionIndex(*LiveIn)) && + LI.liveAt(LIS.getInstructionIndex(*LiveOut).getDeadSlot())) { OS << (First ? " - " : ",") << I; First = false; } @@ -719,11 +738,12 @@ Printable Rematerializer::printRematReg(RegisterIdx RegIdx, } OS << "] "; } - OS << printID(RegIdx) << ' '; - PrintReg.DefMI->print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false, - /*SkipDebugLoc=*/false, /*AddNewLine=*/false); - OS << " @ "; - LIS.getInstructionIndex(*PrintReg.DefMI).print(OS); + if (PrintReg.isAlive()) { + PrintReg.DefMI->print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false, + /*SkipDebugLoc=*/false, /*AddNewLine=*/false); + OS << " @ "; + LIS.getInstructionIndex(*PrintReg.DefMI).print(OS); + } }); } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 8a6c1d5e21e85..82f013dc566ab 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1511,23 +1511,30 @@ bool PreRARematStage::initGCNSchedStage() { // We further filter the registers that we can rematerialize based on our // current tracking capabilities in the stage. The user cannot itself be // marked rematerializable, and no register operand of the defining MI can - // be marked rematerializable. + // be marked rematerializable. We also do not rematerialize an instruction + // if it uses registers that aren't available at its use. This ensures that + // we are not extending any live range while rematerializing. MachineInstr *UseMI = *CandReg.Uses.begin()->getSecond().begin(); const MachineOperand &UseMO = UseMI->getOperand(0); if (UseMO.isReg() && MarkedRegs.contains(UseMO.getReg())) continue; - if (llvm::any_of(CandReg.DefMI->all_uses(), - [&MarkedRegs](const MachineOperand &MO) { - return MarkedRegs.contains(MO.getReg()); - })) - continue; - - // Do not rematerialize an instruction if it uses registers that aren't - // available at its use. This ensures that we are not extending any live - // range while rematerializing. SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true); - if (!VirtRegAuxInfo::allUsesAvailableAt(CandReg.DefMI, UseIdx, *DAG.LIS, - DAG.MRI, *DAG.TII)) + SlotIndex RefIdx = + DAG.LIS->getInstructionIndex(*CandReg.DefMI).getRegSlot(true); + if (llvm::any_of(CandReg.Dependencies, [&](RegisterIdx DepRegIdx) { + const Rematerializer::Reg &DepReg = Remater.getReg(DepRegIdx); + Register DepDefReg = DepReg.getDefReg(); + return MarkedRegs.contains(DepDefReg) || + !Remater.isRegIdenticalAtUses(DepDefReg, DepReg.Mask, RefIdx, + {UseIdx}); + })) + continue; + if (llvm::any_of(Remater.getUnrematableDeps(RegIdx), + [&](const std::pair<Register, LaneBitmask> &RegAndMask) { + const auto &[Reg, Mask] = RegAndMask; + return !Remater.isRegIdenticalAtUses(Reg, Mask, RefIdx, + {UseIdx}); + })) continue; MarkedRegs.insert(CandReg.getDefReg()); @@ -2957,8 +2964,8 @@ void PreRARematStage::ScoredRemat::rematerialize( Rematerializer &Remater) const { const Rematerializer::Reg &Reg = Remater.getReg(RegIdx); Rematerializer::DependencyReuseInfo DRI; - for (const Rematerializer::Reg::Dependency &Dep : Reg.Dependencies) - DRI.reuse(Dep.RegIdx); + for (RegisterIdx DepRegIdx : Reg.Dependencies) + DRI.reuse(DepRegIdx); unsigned UseRegion = Reg.Uses.begin()->first; Remater.rematerializeToRegion(RegIdx, UseRegion, DRI); } diff --git a/llvm/unittests/CodeGen/RematerializerTest.cpp b/llvm/unittests/CodeGen/RematerializerTest.cpp index 5435e98ac91d5..365045d3f066d 100644 --- a/llvm/unittests/CodeGen/RematerializerTest.cpp +++ b/llvm/unittests/CodeGen/RematerializerTest.cpp @@ -455,7 +455,7 @@ TEST_F(RematerializerTest, EmptyRegion) { /// Checks that only registers with a single definition are rematerializable, /// even when registers are made up of multiple sub-registers each with their /// own definition. -TEST_F(RematerializerTest, SubReg) { +TEST_F(RematerializerTest, SubRegRematSupport) { StringRef MIRBody = R"MIR( bb.0: undef %01.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode @@ -464,10 +464,21 @@ TEST_F(RematerializerTest, SubReg) { undef %2.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode undef %34.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - + + undef %56.sub0:sreg_64 = S_MOV_B32 5 + %56.sub1:sreg_64 = S_MOV_B32 6, implicit-def $m0 + + undef %78.sub0:sreg_64 = S_MOV_B32 7 + S_NOP 0, implicit %78.sub0 + %78.sub1:sreg_64 = S_MOV_B32 8 + + undef %99.sub0:sreg_64 = S_MOV_B32 9 + %99.sub1:sreg_64 = S_MOV_B32 %99.sub0 + bb.1: %34.sub1:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode - S_NOP 0, implicit %01, implicit %2, implicit %34 + + S_NOP 0, implicit %01, implicit %2, implicit %34, implicit %56, implicit %78, implicit %99 S_ENDPGM 0 )MIR"; rematerializerTest(MIRBody, [](RematerializerWrapper &RW) { @@ -476,6 +487,15 @@ TEST_F(RematerializerTest, SubReg) { const unsigned MBB0 = 0, MBB1 = 1; const RegisterIdx Cst2 = 0; + // - %01 is not rematerializable because it has multiplie definitions. + // - %34 is not rematerializable because it is defined over multiple + // regions. + // - %56 is not rematerializable because the second defining MI is + // unrematerializable due to the implicit def. + // - %78 is not rematerializable because it is read by an MI not defining it + // before its last definition. + // - %99 is not rematerializable because it has multiplie definitions. + RegisterIdx RematCst2 = RW->rematerializeToRegion(Cst2, MBB1, DRI); RW.moveMIs(MBB0, MBB1, 1); ASSERT_REGION_SIZES(); >From bf1040451db1145e1cf22d7334acd3dfa3b5b6a6 Mon Sep 17 00:00:00 2001 From: Lucas Ramirez <[email protected]> Date: Tue, 9 Jun 2026 16:04:43 +0000 Subject: [PATCH 2/2] Re-add include --- llvm/lib/CodeGen/Rematerializer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp index a77610ca20472..d8a9c6865d24e 100644 --- a/llvm/lib/CodeGen/Rematerializer.cpp +++ b/llvm/lib/CodeGen/Rematerializer.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/Rematerializer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineOperand.h" _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
