================
@@ -2789,6 +2806,42 @@ void
SIInsertWaitcnts::analyzeSingleBBLoopDSLoads(MachineLoop *ML) {
if (!AfterLastBarrier)
continue;
+ // Check for instructions that write to LDS through DMA (global_load_lds,
+ // etc). These write to LDS but aren't DS instructions.
+ // Bail out if any appear after the barrier.
+ if (SIInstrInfo::mayWriteLDSThroughDMA(MI)) {
+ LLVM_DEBUG(
+ dbgs() << "Loop DS Wait Opt: LDS DMA write after last barrier, "
+ << "skipping\n");
+ Info.Valid = false;
+ return;
+ }
+
+ // Check for tensor_load_to_lds instructions (MIMG, not caught by above)
+ if (MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
+ MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2) {
+ LLVM_DEBUG(dbgs() << "Loop DS Wait Opt: tensor_load_to_lds after last "
+ << "barrier, skipping\n");
+ Info.Valid = false;
+ return;
+ }
+
+ // Check if this instruction uses or overwrites any tracked DS load
+ // destination. If so, baseline will have inserted a wait that flushes
+ // all loads up to that position (since DS loads complete in order).
+ // Overwrites also require the load to complete first to avoid races.
+ for (auto &[Reg, Position] : TrackedLoads) {
+ if (Position <= LastFlushedPosition)
+ continue; // Already flushed
+
+ if (MI.readsRegister(Reg, TRI) || MI.modifiesRegister(Reg, TRI)) {
----------------
arsenm wrote:
These queries can be fused to any register reference?
https://github.com/llvm/llvm-project/pull/171952
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits