================
@@ -2904,6 +2905,68 @@ bool SIInsertWaitcnts::applyDSLoopWaitOpt(MachineInstr
&MI,
return true;
}
+// Insert DS_CNT flush in preheaders of loops where DS wait relaxation was
+// applied. This is necessary because the relaxed wait counts inside the loop
+// are computed based on the DS loads issued at the end of the previous
+// iteration (via backedge), but the first iteration enters via the preheader.
+// We must ensure all DS loads from the preheader are complete before entering
+// the loop.
+bool SIInsertWaitcnts::insertDSPreheaderFlushes(MachineFunction &MF) {
+ bool Modified = false;
+
+ for (auto &[LoopHeader, Info] : LoopDSWaitOptCache) {
+ if (!Info.Valid || !Info.RelaxationApplied)
+ continue;
+
+ MachineLoop *ML = MLI->getLoopFor(LoopHeader);
+ if (!ML)
+ continue;
+
+ MachineBasicBlock *Preheader = ML->getLoopPreheader();
+ if (!Preheader)
+ continue;
+
+ // Insert s_wait_dscnt 0 at the end of the preheader (before the
terminator)
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+ if (InsertPos == Preheader->end() && !Preheader->empty())
+ InsertPos = std::prev(Preheader->end());
+
+ // Check if there's already a DS wait at this position
+ bool NeedInsert = true;
+ if (InsertPos != Preheader->end() && InsertPos != Preheader->begin()) {
+ auto CheckPos = std::prev(InsertPos);
+ if (CheckPos->getOpcode() == AMDGPU::S_WAIT_DSCNT_soft ||
+ CheckPos->getOpcode() == AMDGPU::S_WAIT_DSCNT) {
+ if (CheckPos->getOperand(0).getImm() == 0)
+ NeedInsert = false;
+ else {
+ // Change existing wait to 0
+ CheckPos->getOperand(0).setImm(0);
+ NeedInsert = false;
+ Modified = true;
+ LLVM_DEBUG(dbgs() << "DS Loop Opt: Changed existing DS_CNT wait to 0"
+ << " in preheader ";
+ Preheader->printName(dbgs()); dbgs() << "\n");
+ }
+ }
+ }
+
+ if (NeedInsert) {
+ DebugLoc DL;
+ if (InsertPos != Preheader->end())
+ DL = InsertPos->getDebugLoc();
+ BuildMI(*Preheader, InsertPos, DL, TII->get(AMDGPU::S_WAIT_DSCNT_soft))
----------------
Pierre-vh wrote:
Don't add _soft waitcnts after this pass is done. This pass is supposed to
eliminate them. Add normal waits
https://github.com/llvm/llvm-project/pull/171948
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits