================
@@ -2789,6 +2806,42 @@ void 
SIInsertWaitcnts::analyzeSingleBBLoopDSLoads(MachineLoop *ML) {
     if (!AfterLastBarrier)
       continue;
 
+    // Check for instructions that write to LDS through DMA (global_load_lds,
+    // etc). These write to LDS but aren't DS instructions.
+    // Bail out if any appear after the barrier.
+    if (SIInstrInfo::mayWriteLDSThroughDMA(MI)) {
+      LLVM_DEBUG(
+          dbgs() << "Loop DS Wait Opt: LDS DMA write after last barrier, "
+                 << "skipping\n");
+      Info.Valid = false;
+      return;
+    }
+
+    // Check for tensor_load_to_lds instructions (MIMG, not caught by above)
+    if (MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
+        MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2) {
+      LLVM_DEBUG(dbgs() << "Loop DS Wait Opt: tensor_load_to_lds after last "
+                        << "barrier, skipping\n");
+      Info.Valid = false;
+      return;
+    }
+
+    // Check if this instruction uses or overwrites any tracked DS load
+    // destination. If so, baseline will have inserted a wait that flushes
+    // all loads up to that position (since DS loads complete in order).
+    // Overwrites also require the load to complete first to avoid races.
+    for (auto &[Reg, Position] : TrackedLoads) {
----------------
arsenm wrote:

```suggestion
    for (auto [Reg, Position] : TrackedLoads) {
```

https://github.com/llvm/llvm-project/pull/171952
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to