https://github.com/lucas-rami created 
https://github.com/llvm/llvm-project/pull/177205

When the rematerialization stage fails to increase occupancy in all regions, 
the current implementation only reverts the effect of re-scheduling in regions 
in which the increased occupancy target could not be achieved. However, given 
that re-scheduling with a higher occupancy target puts more pressure on the 
scheduler to achieve lower maximum RP at the cost of potentially lower ILP as 
well, region schedules made with higher occupancy targets are generally less 
desirable if the whole function is not able to meet that target. Therefore, if 
at least one region cannot reach its target, it makes sense to revert 
re-scheduling in all affected regions to go back to a schedule that was made 
with a lower occupancy target.

This implements such logic for the rematerialization stage, and adds a test to 
showcase that re-scheduling is indeed interrupted/reverted as soon as a 
re-scheduled region that does not meet the increased target occupancy is 
encountered.

As a minor improvement, this also sets higher occupancy targets for 
re-scheduling at the end of stage initialization in some cases. In cases where 
rematerializations alone are not able to achieve the target, this can push the 
scheduler to be more aggressive in reducing RP and achieve the target.

>From 46ca94a1c4da7dd40b36e774ca63310b36493bfe Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <[email protected]>
Date: Tue, 20 Jan 2026 23:15:08 +0000
Subject: [PATCH] [AMDGPU][Scheduler] Revert all regions when remat fails to
 increase occ.

When the rematerialization stage fails to increase occupancy in all
regions, the current implementation only reverts the effect of
re-scheduling in regions in which the increased occupancy target could
not be achieved. However, given that re-scheduling with a higher
occupancy target puts more pressure on the scheduler to achieve lower
maximum RP at the cost of potentially lower ILP as well, region
schedules made with higher occupancy targets are generally less
desirable if the whole function is not able to meet that target.
Therefore, if at least one region cannot reach its target, it makes
sense to revert re-scheduling in all affected regions to go back to
a schedule that was made with a lower occupancy target.

This implements such logic for the rematerialization stage, and adds a
test to showcase that re-scheduling is indeed interrupted/reverted as
soon as a re-scheduled region that does not meet the increased target
occupancy is encountered.

As a minor improvement, this also sets higher occupancy targets
for re-scheduling at the end of stage initialization in some cases. In
cases where rematerializations alone are not able to achieve the target,
this can push the scheduler to be more aggressive in reducing RP and
achieve the target.
---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp   |  75 ++++++++---
 llvm/lib/Target/AMDGPU/GCNSchedStrategy.h     |  29 ++++-
 ...ne-scheduler-sink-trivial-remats-debug.mir | 118 ++++++++++++++++++
 .../machine-scheduler-sink-trivial-remats.mir |  30 ++---
 4 files changed, 219 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp 
b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f656fb56e9e74..8bd47aaadae56 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -30,6 +30,7 @@
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1309,11 +1310,7 @@ bool PreRARematStage::initGCNSchedStage() {
     dbgs() << ")\n";
   });
 
-  if (AchievedOcc > DAG.MinOccupancy) {
-    DAG.MinOccupancy = AchievedOcc;
-    SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-    MFI.increaseOccupancy(MF, DAG.MinOccupancy);
-  }
+  DAG.setTargetOccupancy(getStageTargetOccupancy());
   return true;
 }
 
@@ -1424,10 +1421,8 @@ bool UnclusteredHighRPStage::initGCNRegion() {
   // occupancy changes in the DAG and MFI.
   if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
     IsAnyRegionScheduled = true;
-    if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) {
-      DAG.MinOccupancy = TempTargetOccupancy;
-      MFI.increaseOccupancy(MF, TempTargetOccupancy);
-    }
+    if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
+      DAG.setTargetOccupancy(TempTargetOccupancy);
   }
   return IsSchedulingThisRegion;
 }
@@ -1476,6 +1471,23 @@ void GCNSchedStage::finalizeGCNRegion() {
     SavedMutations.swap(DAG.Mutations);
 }
 
+void PreRARematStage::finalizeGCNRegion() {
+  GCNSchedStage::finalizeGCNRegion();
+  // When the goal is to increase occupancy, all regions must reach the target
+  // occupancy for rematerializations to be possibly useful, otherwise we will
+  // just hurt latency for no benefit. If minimum occupancy drops below the
+  // target there is no point in trying to re-schedule further regions.
+  if (TargetOcc) {
+    RegionReverts.emplace_back(RegionIdx, Unsched, PressureBefore);
+    if (DAG.MinOccupancy < *TargetOcc) {
+      REMAT_DEBUG(dbgs() << "Region " << RegionIdx
+                         << " cannot meet occupancy target, interrupting "
+                            "re-scheduling in all regions\n");
+      RescheduleRegions.reset();
+    }
+  }
+}
+
 void GCNSchedStage::checkScheduling() {
   // Check the results of scheduling.
   PressureAfter = DAG.getRealRegPressure(RegionIdx);
@@ -1749,8 +1761,7 @@ bool 
ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
 }
 
 bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
-  return GCNSchedStage::shouldRevertScheduling(WavesAfter) ||
-         mayCauseSpilling(WavesAfter) || (TargetOcc && WavesAfter < TargetOcc);
+  return mayCauseSpilling(WavesAfter);
 }
 
 bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
@@ -1827,6 +1838,10 @@ void GCNSchedStage::modifyRegionSchedule(unsigned 
RegionIdx,
   DAG.Regions[RegionIdx].first = MIOrder.front();
 }
 
+unsigned PreRARematStage::getStageTargetOccupancy() const {
+  return TargetOcc ? *TargetOcc : MFI.getMinWavesPerEU();
+}
+
 bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
   const Function &F = MF.getFunction();
 
@@ -2168,13 +2183,31 @@ bool PreRARematStage::isReMaterializable(const 
MachineInstr &MI) {
 
 void PreRARematStage::finalizeGCNSchedStage() {
   // We consider that reducing spilling is always beneficial so we never
-  // rollback rematerializations in such cases. It's also possible that
-  // rescheduling lowers occupancy over the one achieved just through remats, 
in
-  // which case we do not want to rollback either (the rescheduling was already
-  // reverted in PreRARematStage::shouldRevertScheduling in such cases).
-  unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);
-  if (!TargetOcc || MaxOcc >= *TargetOcc)
+  // rollback rematerializations or revert scheduling in such cases.
+  if (!TargetOcc)
+    return;
+
+  // When increasing occupancy, it is possible that re-scheduling is not able 
to
+  // achieve the target occupancy in all regions, in which case re-scheduling 
in
+  // all regions should be reverted.
+  if (DAG.MinOccupancy >= *TargetOcc)
+    return;
+  for (const auto &[RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
+    REMAT_DEBUG(dbgs() << "Reverting re-scheduling in region " << RegionIdx
+                        << '\n');
+    DAG.Pressure[RegionIdx] = MaxPressure;
+    modifyRegionSchedule(RegionIdx, RegionBB[RegionIdx], OrigMIOrder);
+  }
+
+  // It is possible that re-scheduling lowers occupancy over the one achieved
+  // just through rematerializations, in which case we revert re-scheduling in
+  // all regions but do not roll back rematerializations.
+  if (AchievedOcc >= *TargetOcc) {
+    DAG.setTargetOccupancy(AchievedOcc);
     return;
+  }
+  // Reset the target occupancy to what it was pre-rematerialization.
+  DAG.setTargetOccupancy(*TargetOcc - 1);
 
   REMAT_DEBUG(dbgs() << "Rolling back all rematerializations\n");
   const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
@@ -2243,6 +2276,14 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
     RegionBounds.first = NewMI; // Insertion
 }
 
+void GCNScheduleDAGMILive::setTargetOccupancy(unsigned TargetOccupancy) {
+  MinOccupancy = TargetOccupancy;
+  if (MFI.getOccupancy() < TargetOccupancy)
+    MFI.increaseOccupancy(MF, MinOccupancy);
+  else
+    MFI.limitOccupancy(MinOccupancy);
+}
+
 static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
   const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);
   return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h 
b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index a4485621f48bf..c06d2fca52831 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -308,6 +308,9 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive 
{
                               MachineBasicBlock::iterator MI,
                               MachineInstr *NewMI);
 
+  /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
+  void setTargetOccupancy(unsigned TargetOccupancy);
+
   void runSchedStages();
 
   std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID 
SchedStageID);
@@ -497,9 +500,31 @@ class PreRARematStage : public GCNSchedStage {
   /// objective is spilling reduction.
   std::optional<unsigned> TargetOcc;
   /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
-  /// Smaller than or equal to the target occupancy.
   unsigned AchievedOcc;
 
+  /// State of a region pre-re-scheduling but post-rematerializations that we
+  /// must keep to be able to revert re-scheduling effects.
+  struct RegionSchedRevert {
+    /// Region number;
+    unsigned RegionIdx;
+    /// Original instruction order (both debug and non-debug MIs).
+    std::vector<MachineInstr *> OrigMIOrder;
+    /// Maximum pressure recorded in the region.
+    GCNRegPressure MaxPressure;
+
+    RegionSchedRevert(unsigned RegionIdx,
+                      const std::vector<MachineInstr *> &OrigMIOrder,
+                      const GCNRegPressure &MaxPressure)
+        : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
+          MaxPressure(MaxPressure) {}
+  };
+  /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
+  /// regions.
+  SmallVector<RegionSchedRevert> RegionReverts;
+
+  /// Returns the occupancy the stage is trying to achieve.
+  unsigned getStageTargetOccupancy() const;
+
   /// Returns whether remat can reduce spilling or increase function occupancy
   /// by 1 through rematerialization. If it can do one, collects instructions 
in
   /// PreRARematStage::Rematerializations and sets the target occupancy in
@@ -524,6 +549,8 @@ class PreRARematStage : public GCNSchedStage {
 
   bool initGCNRegion() override;
 
+  void finalizeGCNRegion() override;
+
   bool shouldRevertScheduling(unsigned WavesAfter) override;
 
   PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
diff --git 
a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir 
b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
index 371753801d1a3..050ff23cd1f27 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
@@ -10,6 +10,10 @@
   define void @sink_and_inc_idx_when_skipping_small_regions_2() 
"amdgpu-flat-work-group-size"="1,64" {
     ret void
   }
+
+  define void @test_occ_inc_revert_all_regions() {
+    ret void
+  }
 ---
 name:            sink_and_inc_idx_when_skipping_small_region_1
 tracksRegLiveness: true
@@ -154,3 +158,117 @@ body:             |
     S_NOP 0, implicit %22
     S_ENDPGM 0
 ...
+# bb.1 cannot meet the occupancy target even by rematerializing %64 into it
+# even though rematerialization heuristics believes it can; scheduling should
+# be interrupted and reverted in all re-scheduled regions.
+---
+name:            test_occ_inc_revert_all_regions
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; DEBUG: Machine code for function test_occ_inc_revert_all_regions: IsSSA, 
NoPHIs, TracksLiveness
+  ; DEBUG: [PreRARemat] Retrying function scheduling with new min. occupancy 
of 7 from rematerializing (original was 7, target was 8)
+  ; DEBUG: Region 1 cannot meet occupancy target, interrupting re-scheduling 
in all regions
+  ; DEBUG: Reverting re-scheduling in region 0
+  ; DEBUG: Reverting re-scheduling in region 1
+  ; DEBUG-NOT: Reverting re-scheduling in region 3
+  ; DEBUG-NOT: Reverting re-scheduling in region 4
+  bb.0:
+    successors: %bb.1
+
+    %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit 
$mode, implicit-def $m0
+    %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit 
$mode, implicit-def $m0
+    %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit 
$mode, implicit-def $m0
+    %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit 
$mode, implicit-def $m0
+    %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit 
$mode, implicit-def $m0
+    %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit 
$mode, implicit-def $m0
+    %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit 
$mode, implicit-def $m0
+    %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit 
$mode, implicit-def $m0
+    %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit 
$mode, implicit-def $m0
+    %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit 
$mode, implicit-def $m0
+    %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit 
$mode, implicit-def $m0
+    %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit 
$mode, implicit-def $m0
+    %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit 
$mode, implicit-def $m0
+    %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit 
$mode, implicit-def $m0
+    %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit 
$mode, implicit-def $m0
+    %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit 
$mode, implicit-def $m0
+    %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit 
$mode, implicit-def $m0
+    %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit 
$mode, implicit-def $m0
+    %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit 
$mode, implicit-def $m0
+    %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit 
$mode, implicit-def $m0
+    %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit 
$mode, implicit-def $m0
+    %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit 
$mode, implicit-def $m0
+    %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit 
$mode, implicit-def $m0
+    %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit 
$mode, implicit-def $m0
+    %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit 
$mode, implicit-def $m0
+    %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit 
$mode, implicit-def $m0
+    %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit 
$mode, implicit-def $m0
+    %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit 
$mode, implicit-def $m0
+    %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit 
$mode, implicit-def $m0
+    %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit 
$mode, implicit-def $m0
+    %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit 
$mode, implicit-def $m0
+    %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit 
$mode, implicit-def $m0
+
+    %64:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit 
$mode
+
+  bb.1:
+    successors: %bb.2
+
+    S_NOP 0, implicit %64
+
+  bb.2:
+    successors: %bb.3
+
+    S_NOP 0, implicit %0,  implicit %1,  implicit %2,  implicit %3,  implicit 
%4,  implicit %5,  implicit %6,  implicit %7
+    S_NOP 0, implicit %8,  implicit %9,  implicit %10, implicit %11, implicit 
%12, implicit %13, implicit %14, implicit %15
+    S_NOP 0, implicit %16, implicit %17, implicit %18, implicit %19, implicit 
%20, implicit %21, implicit %22, implicit %23
+    S_NOP 0, implicit %24, implicit %25, implicit %26, implicit %27, implicit 
%28, implicit %29, implicit %30, implicit %31
+
+  bb.3:
+    successors: %bb.4
+
+    %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit 
$mode, implicit-def $m0
+    %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit 
$mode, implicit-def $m0
+    %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit 
$mode, implicit-def $m0
+    %35:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit 
$mode, implicit-def $m0
+    %36:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit 
$mode, implicit-def $m0
+    %37:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 37, implicit $exec, implicit 
$mode, implicit-def $m0
+    %38:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 38, implicit $exec, implicit 
$mode, implicit-def $m0
+    %39:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 39, implicit $exec, implicit 
$mode, implicit-def $m0
+    %40:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 40, implicit $exec, implicit 
$mode, implicit-def $m0
+    %41:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 41, implicit $exec, implicit 
$mode, implicit-def $m0
+    %42:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 42, implicit $exec, implicit 
$mode, implicit-def $m0
+    %43:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 43, implicit $exec, implicit 
$mode, implicit-def $m0
+    %44:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 44, implicit $exec, implicit 
$mode, implicit-def $m0
+    %45:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 45, implicit $exec, implicit 
$mode, implicit-def $m0
+    %46:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 46, implicit $exec, implicit 
$mode, implicit-def $m0
+    %47:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 47, implicit $exec, implicit 
$mode, implicit-def $m0
+    %48:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 48, implicit $exec, implicit 
$mode, implicit-def $m0
+    %49:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 49, implicit $exec, implicit 
$mode, implicit-def $m0
+    %50:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 50, implicit $exec, implicit 
$mode, implicit-def $m0
+    %51:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 51, implicit $exec, implicit 
$mode, implicit-def $m0
+    %52:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 52, implicit $exec, implicit 
$mode, implicit-def $m0
+    %53:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 53, implicit $exec, implicit 
$mode, implicit-def $m0
+    %54:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 54, implicit $exec, implicit 
$mode, implicit-def $m0
+    %55:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 55, implicit $exec, implicit 
$mode, implicit-def $m0
+    %56:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 56, implicit $exec, implicit 
$mode, implicit-def $m0
+    %57:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 57, implicit $exec, implicit 
$mode, implicit-def $m0
+    %58:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 58, implicit $exec, implicit 
$mode, implicit-def $m0
+    %59:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 59, implicit $exec, implicit 
$mode, implicit-def $m0
+    %60:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 60, implicit $exec, implicit 
$mode, implicit-def $m0
+    %61:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 61, implicit $exec, implicit 
$mode, implicit-def $m0
+    %62:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 62, implicit $exec, implicit 
$mode, implicit-def $m0
+    %63:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit 
$mode, implicit-def $m0
+
+    %65:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 65, implicit $exec, implicit 
$mode
+
+  bb.4:
+    S_NOP 0, implicit %32, implicit %33, implicit %34, implicit %35, implicit 
%36, implicit %37, implicit %38, implicit %39
+    S_NOP 0, implicit %40, implicit %41, implicit %42, implicit %43, implicit 
%44, implicit %45, implicit %46, implicit %47
+    S_NOP 0, implicit %48, implicit %49, implicit %50, implicit %51, implicit 
%52, implicit %53, implicit %54, implicit %55
+    S_NOP 0, implicit %56, implicit %57, implicit %58, implicit %59, implicit 
%60, implicit %61, implicit %62, implicit %63
+    S_NOP 0, implicit %65
+
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir 
b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index eaad4d6e56183..a6f652e1a7d24 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -352,8 +352,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.2(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
@@ -510,8 +510,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.2(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit 
[[V_CVT_I32_F64_e32_22]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
@@ -665,8 +665,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.2(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
@@ -678,8 +678,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.4(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.4:
@@ -862,8 +862,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.2(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit 
[[V_CVT_I32_F64_e32_24]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
@@ -876,8 +876,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.4(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]], implicit 
[[V_CVT_I32_F64_e32_25]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.4:
@@ -3084,9 +3084,13 @@ body:             |
   ; GFX908-NEXT:   [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
   ; GFX908-NEXT:   [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
   ; GFX908-NEXT:   [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
   ; GFX908-NEXT:   [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
   ; GFX908-NEXT:   [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
   ; GFX908-NEXT:   [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
   ; GFX908-NEXT:   [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
   ; GFX908-NEXT:   [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
   ; GFX908-NEXT:   [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
@@ -3096,11 +3100,7 @@ body:             |
   ; GFX908-NEXT:   [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
   ; GFX908-NEXT:   [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
   ; GFX908-NEXT:   [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
-  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
   ; GFX908-NEXT:   [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
-  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
   ; GFX908-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -4426,8 +4426,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.3(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_28]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.3:
@@ -4691,8 +4691,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.3(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.3:
@@ -4974,8 +4974,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.3(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_36:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_36]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.3:
@@ -8607,8 +8607,8 @@ body:             |
   ; GFX908-NEXT:   successors: %bb.2(0x80000000)
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
   ; GFX908-NEXT:   [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept 
V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_F64_I32_e32_]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:
@@ -8760,8 +8760,8 @@ body:             |
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
   ; GFX908-NEXT:   [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept 
V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
-  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit 
[[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64 = nofpexcept 
V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
+  ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit 
[[V_CVT_I32_F64_e32_23]]
   ; GFX908-NEXT:   S_NOP 0, implicit [[V_CVT_F64_I32_e32_]]
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.2:

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to