llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Dhruva Chakrabarti (dhruvachak) <details> <summary>Changes</summary> Depends on https://github.com/llvm/llvm-project/pull/198472 PR #<!-- -->198472 skips unspilling a slot if a spill reload is reachable from entry along a path that does not contain a spill store. This patch builds on it by finding a basic block where an IMPLICIT_DEF can be inserted to provide a reaching definition on all paths to such reloads, allowing the unspill to proceed. This new def may extend the rewritten vreg's live range, so extra interference checks are performed over the extended region to pick an appropriate physical register. For the joint-dominance tests, an IMPLICIT_DEF insertion block is found, but no physical register is interference-free over the extended range, so the unspill is conservatively skipped. Assisted-by: Cursor/Claude Opus --- Full diff: https://github.com/llvm/llvm-project/pull/200126.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+100-6) - (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom-mir.mir (+9-4) - (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom.ll (+9-4) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index a8cc3757c57d2..da7a7d9a0bd42 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -601,11 +601,79 @@ void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const { return false; }; - if (!llvm::all_of(Loads, IsLoadJointlyDominatedByStores)) { - LLVM_DEBUG( - dbgs() << "Skipping " << printReg(Slot, &TRI) - << ": some reachable load not jointly dominated by stores\n"); - continue; + // Collect basic blocks of any loads not jointly dominated by stores. After + // rewriting, we will attempt to insert a single IMPLICIT_DEF for those uses + // of NewVReg. + SmallVector<MachineBasicBlock *, 4> ProblematicLoadMBBs; + for (MachineInstr *L : Loads) { + if (!IsLoadJointlyDominatedByStores(L)) + ProblematicLoadMBBs.push_back(L->getParent()); + } + + // If we have problematic reloads, compute the IMPLICIT_DEF insertion + // block. Safety condition for the insertion block P: + // (a) P dominates every problematic load, and + // (b) On every entry->P path no spill store has executed yet (so the + // IMPLICIT_DEF cannot clobber a real spill-store COPY-def on + // any path that reaches it). + // + // Build MayStored, where MayStored[B] is true iff some entry->B path + // crossed a store block strictly before B. (b) is equivalent to + // "P is not in MayStored". + MachineBasicBlock *ImplicitDefMBB = nullptr; + if (!ProblematicLoadMBBs.empty()) { + SmallPtrSet<MachineBasicBlock *, 16> MayStored; + SmallVector<MachineBasicBlock *, 16> MayStoredWL; + for (MachineBasicBlock *SB : StoreBlocks) { + for (MachineBasicBlock *Succ : SB->successors()) { + if (MayStored.insert(Succ).second) + MayStoredWL.push_back(Succ); + } + } + while (!MayStoredWL.empty()) { + MachineBasicBlock *MBB = MayStoredWL.pop_back_val(); + for (MachineBasicBlock *Succ : MBB->successors()) { + if (MayStored.insert(Succ).second) + MayStoredWL.push_back(Succ); + } + } + + // We start at the nearest common dominator of all problematic load blocks + // and walk up the dominator tree until (b) holds. The + // walk always terminates: the entry block has no predecessors and so + // can never be in MayStored. + ImplicitDefMBB = ProblematicLoadMBBs.front(); + for (MachineBasicBlock *MBB : ArrayRef(ProblematicLoadMBBs).drop_front()) + ImplicitDefMBB = MDT.findNearestCommonDominator(ImplicitDefMBB, MBB); + while (MayStored.contains(ImplicitDefMBB)) { + assert(ImplicitDefMBB != MDT.getRoot() && + "MayStored unexpectedly reached the entry block"); + ImplicitDefMBB = MDT.getNode(ImplicitDefMBB)->getIDom()->getBlock(); + } + LLVM_DEBUG(dbgs() << "Selected IMPLICIT_DEF block for SS#" << Slot + << ": %bb." << ImplicitDefMBB->getNumber() << " (" + << ProblematicLoadMBBs.size() + << " problematic reload block(s))\n"); + } + + // The IMPLICIT_DEF may extend NewVReg's live range beyond the slot's + // stack LI, so the usual interference check against the stack LI alone + // is not sufficient. Conservatively, the extension covers every block + // reachable from ImplicitDefMBB up to (and including) the first store + // block on each path, where a COPY-def will re-define NewVReg. + SmallPtrSet<MachineBasicBlock *, 8> ImpDefExtension; + if (ImplicitDefMBB) { + ImpDefExtension.insert(ImplicitDefMBB); + SmallVector<MachineBasicBlock *, 8> ExtensionWL; + ExtensionWL.push_back(ImplicitDefMBB); + while (!ExtensionWL.empty()) { + MachineBasicBlock *MBB = ExtensionWL.pop_back_val(); + if (StoreBlocks.contains(MBB)) + continue; + for (MachineBasicBlock *Succ : MBB->successors()) + if (ImpDefExtension.insert(Succ).second) + ExtensionWL.push_back(Succ); + } } const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot); @@ -619,15 +687,41 @@ void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const { if (LRM.checkInterference(*LI, PhysReg) != LiveRegMatrix::IK_Free) continue; + // If we are inserting an IMPLICIT_DEF, require conservatively that + // PhysReg is free over every block in the IMPLICIT_DEF's extra coverage. + if (ImplicitDefMBB && + llvm::any_of(ImpDefExtension, [&](MachineBasicBlock *MBB) { + return LRM.checkInterference(LIS.getMBBStartIdx(MBB), + LIS.getMBBEndIdx(MBB), PhysReg); + })) { + LLVM_DEBUG(dbgs() << "Cannot use " << printReg(PhysReg, &TRI) + << " for SS#" << Slot + << ": IMPLICIT_DEF extension interferes\n"); + continue; + } + LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to " << printReg(PhysReg, &TRI) << '\n'); - const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot); Register NewVReg = MRI.createVirtualRegister(RC); for (MachineInstr *SpillMI : SpillReferences->second) replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg); + // Insert an IMPLICIT_DEF for the NewVReg, if required to satisfy reaching + // defs for every use of NewVReg. + if (ImplicitDefMBB) { + MachineBasicBlock::iterator InsertPt = + ImplicitDefMBB->SkipPHIsLabelsAndDebug(ImplicitDefMBB->begin()); + MachineInstr *ImpDef = + BuildMI(*ImplicitDefMBB, InsertPt, DebugLoc(), + TII.get(TargetOpcode::IMPLICIT_DEF), NewVReg); + LIS.InsertMachineInstrInMaps(*ImpDef); + LLVM_DEBUG(dbgs() << "Inserted IMPLICIT_DEF for " + << printReg(NewVReg, &TRI) << " in " + << printMBBReference(*ImplicitDefMBB) << '\n'); + } + // TODO: We should be able to transfer the information from the stack // slot's LiveInterval without recomputing from scratch with the // replacement vreg uses. diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom-mir.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom-mir.mir index 87a3276b10891..7ea072f5aaaa0 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom-mir.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom-mir.mir @@ -5,11 +5,16 @@ # RUN: -debug-only=amdgpu-rewrite-agpr-copy-mfma -filetype=null %s 2>&1 \ # RUN: | FileCheck %s -# It is legal for a spill reload to not be jointly dominated by the slot's -# spill stores. The AGPR rewrite pass must not unspill such a slot into a -# vreg, otherwise the compiler will crash. +# It is legal for a spill reload to not have a dominating spill store. When +# the AGPR rewrite pass unspills such a slot into a vreg, it must insert an +# IMPLICIT_DEF so the vreg has a def on all paths to such reloads. For the +# unspill to occur, the physical register must be interference-free for the +# extended live range produced by the IMPLICIT_DEF. In this case, no physical +# register is found that satisfies the interference property, so the unspill +# does not occur. -# CHECK: Skipping ${{[a-zA-Z0-9_]+}}: some reachable load not jointly dominated by stores +# CHECK: Selected IMPLICIT_DEF block for SS#{{[0-9]+}}: %bb.{{[0-9]+}} ({{[0-9]+}} problematic reload block(s)) +# CHECK: IMPLICIT_DEF extension interferes --- | define amdgpu_kernel void @rewrite_vgpr_mfma_to_agpr_spill_joint_dom(i1 %arg, <16 x float> %.sroa.366.2) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom.ll index 13a230ea5d9a8..cb0ce0dea57c4 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-spill-joint-dom.ll @@ -6,11 +6,16 @@ ; Regression test from https://github.com/llvm/llvm-project/issues/196671 -; It is legal for a spill reload to not be jointly dominated by the slot's -; spill stores. The AGPR rewrite pass must not unspill such a slot into a -; vreg, otherwise the compiler will crash. +; It is legal for a spill reload to not have a dominating spill store. When +; the AGPR rewrite pass unspills such a slot into a vreg, it must insert an +; IMPLICIT_DEF so the vreg has a def on all paths to such reloads. For the +; unspill to occur, the physical register must be interference-free for the +; extended live range produced by the IMPLICIT_DEF. In this case, no physical +; register is found that satisfies the interference property, so the unspill +; does not occur. -; CHECK: Skipping ${{[a-zA-Z0-9_]+}}: some reachable load not jointly dominated by stores +; CHECK: Selected IMPLICIT_DEF block for SS#{{[0-9]+}}: %bb.{{[0-9]+}} ({{[0-9]+}} problematic reload block(s)) +; CHECK: IMPLICIT_DEF extension interferes define amdgpu_kernel void @rewrite_vgpr_mfma_to_agpr_spill_joint_dom(i1 %arg, <16 x float> %.sroa.366.2) #0 { .lr.ph.i: `````````` </details> https://github.com/llvm/llvm-project/pull/200126 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
