This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 5e6ca9f9145 HBASE-28158 Decouple RIT list management from TRSP (#7449)
5e6ca9f9145 is described below

commit 5e6ca9f9145b1eb9b11f37cd8200fabb34a672fe
Author: Umesh <[email protected]>
AuthorDate: Mon Dec 1 21:26:33 2025 +0530

    HBASE-28158 Decouple RIT list management from TRSP (#7449)
    
    Co-authored-by: ukumawat <[email protected]>
    
    Signed-off-by: Andrew Purtell <[email protected]>
    Signed-off-by: Duo Zhang <[email protected]>
---
 .../hadoop/hbase/rsgroup/RSGroupAdminServer.java   |  17 +--
 .../hbase/rsgroup/TestRSGroupsOfflineMode.java     |   2 +-
 .../org/apache/hadoop/hbase/master/DeadServer.java |   9 ++
 .../org/apache/hadoop/hbase/master/HMaster.java    |  20 +--
 .../hadoop/hbase/master/RegionServerTracker.java   |  26 ++--
 .../apache/hadoop/hbase/master/ServerManager.java  |   9 +-
 .../hbase/master/assignment/AssignmentManager.java | 105 ++++++++++++--
 .../master/assignment/AssignmentManagerUtil.java   |   8 +-
 .../assignment/RegionInTransitionTracker.java      | 159 +++++++++++++++++++++
 .../hbase/master/assignment/RegionStateNode.java   |  22 +--
 .../hbase/master/assignment/RegionStates.java      |  95 ++----------
 .../master/procedure/FlushRegionProcedure.java     |   2 +-
 .../hbase/master/procedure/ProcedureSyncWait.java  |   2 +-
 .../procedure/ReopenTableRegionsProcedure.java     |   2 +-
 .../master/procedure/ServerCrashProcedure.java     |   4 +
 .../master/assignmentManagerStatus.jsp             |   2 +-
 .../apache/hadoop/hbase/HBaseTestingUtility.java   |  48 ++++++-
 .../hbase/client/TestAsyncRegionAdminApi.java      |   4 +-
 .../hbase/client/TestSplitOrMergeStatus.java       |   4 +-
 .../hbase/master/TestAssignmentManagerMetrics.java |   2 +-
 .../apache/hadoop/hbase/master/TestDeadServer.java |   3 +-
 .../hbase/master/TestMasterBalanceThrottling.java  |   8 +-
 .../hbase/master/TestMasterDryRunBalancer.java     |   2 +-
 .../master/assignment/AssignmentTestingUtil.java   |   4 +-
 .../master/assignment/MockMasterServices.java      |   1 +
 .../assignment/TestAssignmentManagerBase.java      |   2 +-
 .../assignment/TestAssignmentManagerUtil.java      |   2 +-
 .../TestTransitRegionStateProcedure.java           |   6 +-
 .../TestFavoredStochasticLoadBalancer.java         |  15 +-
 .../hbase/master/http/TestMasterStatusUtil.java    |   2 +-
 .../janitor/TestCatalogJanitorInMemoryStates.java  |   9 +-
 .../procedure/MasterProcedureTestingUtility.java   |   1 +
 .../hbase/regionserver/TestHRegionFileSystem.java  |   6 +-
 .../TestRegionMergeTransactionOnCluster.java       |  17 +--
 .../TestSplitTransactionOnCluster.java             |  15 +-
 35 files changed, 439 insertions(+), 196 deletions(-)

diff --git 
a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
 
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
index 9d1b03e18eb..2a97668e0f9 100644
--- 
a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
+++ 
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
@@ -26,6 +26,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.SortedSet;
 import java.util.concurrent.Future;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
@@ -47,7 +48,6 @@ import org.apache.hadoop.hbase.master.RegionPlan;
 import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.ServerManager;
 import org.apache.hadoop.hbase.master.TableStateManager;
-import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
 import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
 import org.apache.hadoop.hbase.net.Address;
@@ -532,8 +532,7 @@ public class RSGroupAdminServer implements RSGroupAdmin {
       Map<String, RegionState> groupRIT = 
rsGroupGetRegionsInTransition(groupName);
       if (groupRIT.size() > 0 && !request.isIgnoreRegionsInTransition()) {
         LOG.debug("Not running balancer because {} region(s) in transition: 
{}", groupRIT.size(),
-          StringUtils.abbreviate(
-            
master.getAssignmentManager().getRegionStates().getRegionsInTransition().toString(),
+          
StringUtils.abbreviate(master.getAssignmentManager().getRegionsInTransition().toString(),
             256));
         return responseBuilder.build();
       }
@@ -654,14 +653,12 @@ public class RSGroupAdminServer implements RSGroupAdmin {
 
   private Map<String, RegionState> rsGroupGetRegionsInTransition(String 
groupName)
     throws IOException {
+    SortedSet<TableName> tablesInGroup = getRSGroupInfo(groupName).getTables();
     Map<String, RegionState> rit = Maps.newTreeMap();
-    AssignmentManager am = master.getAssignmentManager();
-    for (TableName tableName : getRSGroupInfo(groupName).getTables()) {
-      for (RegionInfo regionInfo : 
am.getRegionStates().getRegionsOfTable(tableName)) {
-        RegionState state = 
am.getRegionStates().getRegionTransitionState(regionInfo);
-        if (state != null) {
-          rit.put(regionInfo.getEncodedName(), state);
-        }
+    for (RegionStateNode regionNode : 
master.getAssignmentManager().getRegionsInTransition()) {
+      TableName tn = regionNode.getTable();
+      if (tablesInGroup.contains(tn)) {
+        rit.put(regionNode.getRegionInfo().getEncodedName(), 
regionNode.toRegionState());
       }
     }
     return rit;
diff --git 
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
index 452fd63d3c8..0d1810101a3 100644
--- 
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
+++ 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
@@ -138,7 +138,7 @@ public class TestRSGroupsOfflineMode {
       @Override
       public boolean evaluate() throws Exception {
         return groupRS.getNumberOfOnlineRegions() < 1
-          && 
master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() < 
1;
+          && master.getAssignmentManager().getRegionsInTransitionCount() < 1;
       }
     });
     // Move table to group and wait.
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 9467512fc66..94158e25b62 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -75,6 +75,10 @@ public class DeadServer {
     this.deadServers.putIfAbsent(sn, EnvironmentEdgeManager.currentTime());
   }
 
+  synchronized void putIfAbsent(ServerName sn, long crashedTime) {
+    this.deadServers.putIfAbsent(sn, crashedTime);
+  }
+
   public synchronized int size() {
     return deadServers.size();
   }
@@ -169,6 +173,11 @@ public class DeadServer {
     return time == null ? null : new Date(time);
   }
 
+  public synchronized long getDeathTimestamp(final ServerName deadServerName) {
+    Long time = deadServers.get(deadServerName);
+    return time == null ? 0 : time;
+  }
+
   /**
    * Called from rpc by operator cleaning up deadserver list.
    * @param deadServerName the dead server name
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 35f212b4459..9e6e929eb10 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1020,10 +1020,11 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     // TODO: Generate the splitting and live Set in one pass instead of two as 
we currently do.
     this.regionServerTracker.upgrade(
       procsByType.getOrDefault(ServerCrashProcedure.class, 
Collections.emptyList()).stream()
-        .map(p -> (ServerCrashProcedure) p).map(p -> 
p.getServerName()).collect(Collectors.toSet()),
+        .map(p -> (ServerCrashProcedure) p).collect(
+          Collectors.toMap(ServerCrashProcedure::getServerName, 
Procedure::getSubmittedTime)),
       Sets.union(rsListStorage.getAll(), 
walManager.getLiveServersFromWALDir()),
       walManager.getSplittingServersFromWALDir());
-    // This manager will be started AFTER hbase:meta is confirmed on line.
+    // This manager will be started AFTER hbase:meta is confirmed on line..
     // hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect. 
They read table
     // state from zookeeper while hbase2 reads it from hbase:meta. Disable if 
no hbase1 clients.
     this.tableStateManager =
@@ -1121,6 +1122,7 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     final ColumnFamilyDescriptor replBarrierFamilyDesc =
       metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
 
+    this.assignmentManager.initializationPostMetaOnline();
     this.assignmentManager.joinCluster();
     // The below depends on hbase:meta being online.
     try {
@@ -1944,7 +1946,7 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     // But if there are zero regions in transition, it can skip sleep to speed 
up.
     while (
       !interrupted && EnvironmentEdgeManager.currentTime() < 
nextBalanceStartTime
-        && this.assignmentManager.getRegionStates().hasRegionsInTransition()
+        && this.assignmentManager.getRegionTransitScheduledCount() > 0
     ) {
       try {
         Thread.sleep(100);
@@ -1956,8 +1958,7 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     // Throttling by max number regions in transition
     while (
       !interrupted && maxRegionsInTransition > 0
-        && 
this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
-            >= maxRegionsInTransition
+        && this.assignmentManager.getRegionTransitScheduledCount() >= 
maxRegionsInTransition
         && EnvironmentEdgeManager.currentTime() <= cutoffTime
     ) {
       try {
@@ -2036,7 +2037,7 @@ public class HMaster extends HRegionServer implements 
MasterServices {
 
     synchronized (this.balancer) {
       // Only allow one balance run at at time.
-      if (this.assignmentManager.hasRegionsInTransition()) {
+      if (this.assignmentManager.getRegionTransitScheduledCount() > 0) {
         List<RegionStateNode> regionsInTransition = 
assignmentManager.getRegionsInTransition();
         // if hbase:meta region is in transition, result of assignment cannot 
be recorded
         // ignore the force flag in that case
@@ -2051,7 +2052,8 @@ public class HMaster extends HRegionServer implements 
MasterServices {
 
         if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
           LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + 
metaInTransition
-            + ") because " + regionsInTransition.size() + " region(s) in 
transition: " + toPrint
+            + ") because " + assignmentManager.getRegionTransitScheduledCount()
+            + " region(s) are scheduled to transit " + toPrint
             + (truncated ? "(truncated list)" : ""));
           return responseBuilder.build();
         }
@@ -2188,7 +2190,7 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     if (skipRegionManagementAction("region normalizer")) {
       return false;
     }
-    if (assignmentManager.hasRegionsInTransition()) {
+    if (assignmentManager.getRegionTransitScheduledCount() > 0) {
       return false;
     }
 
@@ -3012,7 +3014,7 @@ public class HMaster extends HRegionServer implements 
MasterServices {
         case REGIONS_IN_TRANSITION: {
           if (assignmentManager != null) {
             builder.setRegionsInTransition(
-              
assignmentManager.getRegionStates().getRegionsStateInTransition());
+              new 
ArrayList<>(assignmentManager.getRegionsStateInTransition()));
           }
           break;
         }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
index c1d997797c8..fe4129e5111 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -116,22 +117,26 @@ public class RegionServerTracker extends ZKListener {
    * In this method, we will also construct the region server sets in {@link 
ServerManager}. If a
    * region server is dead between the crash of the previous master instance 
and the start of the
    * current master instance, we will schedule a SCP for it. This is done in
-   * {@link ServerManager#findDeadServersAndProcess(Set, Set)}, we call it 
here under the lock
+   * {@link ServerManager#findDeadServersAndProcess(Map, Set)}, we call it 
here under the lock
    * protection to prevent concurrency issues with server expiration operation.
-   * @param deadServersFromPE          the region servers which already have 
SCP associated.
-   * @param liveServersBeforeRestart   the live region servers we recorded 
before master restarts.
-   * @param splittingServersFromWALDir Servers whose WALs are being actively 
'split'.
+   * @param deadServersWithDeathTimeFromPE the region servers which already 
have SCP associated,
+   *                                       have deathTime as the value.
+   * @param liveServersBeforeRestart       the live region servers we recorded 
before master
+   *                                       restarts.
+   * @param splittingServersFromWALDir     Servers whose WALs are being 
actively 'split'.
    */
-  public void upgrade(Set<ServerName> deadServersFromPE, Set<ServerName> 
liveServersBeforeRestart,
-    Set<ServerName> splittingServersFromWALDir) throws KeeperException, 
IOException {
+  public void upgrade(Map<ServerName, Long> deadServersWithDeathTimeFromPE,
+    Set<ServerName> liveServersBeforeRestart, Set<ServerName> 
splittingServersFromWALDir)
+    throws KeeperException, IOException {
     LOG.info(
       "Upgrading RegionServerTracker to active master mode; {} have existing"
         + "ServerCrashProcedures, {} possibly 'live' servers, and {} 
'splitting'.",
-      deadServersFromPE.size(), liveServersBeforeRestart.size(), 
splittingServersFromWALDir.size());
-    // deadServersFromPE is made from a list of outstanding 
ServerCrashProcedures.
+      deadServersWithDeathTimeFromPE.size(), liveServersBeforeRestart.size(),
+      splittingServersFromWALDir.size());
+    // deadServersWithDeathTimeFromPE is made from a list of outstanding 
ServerCrashProcedures.
     // splittingServersFromWALDir are being actively split -- the directory in 
the FS ends in
     // '-SPLITTING'. Each splitting server should have a corresponding SCP. 
Log if not.
-    splittingServersFromWALDir.stream().filter(s -> 
!deadServersFromPE.contains(s))
+    splittingServersFromWALDir.stream().filter(s -> 
!deadServersWithDeathTimeFromPE.containsKey(s))
       .forEach(s -> LOG.error("{} has no matching ServerCrashProcedure", s));
     // create ServerNode for all possible live servers from wal directory
     liveServersBeforeRestart
@@ -148,7 +153,8 @@ public class RegionServerTracker extends ZKListener {
           : ServerMetricsBuilder.of(serverName);
         serverManager.checkAndRecordNewServer(serverName, serverMetrics);
       }
-      serverManager.findDeadServersAndProcess(deadServersFromPE, 
liveServersBeforeRestart);
+      serverManager.findDeadServersAndProcess(deadServersWithDeathTimeFromPE,
+        liveServersBeforeRestart);
       active = true;
     }
   }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 443ef742f4f..9b07f87d15e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -399,12 +399,13 @@ public class ServerManager implements 
ConfigurationObserver {
    * <p/>
    * Must be called inside the initialization method of {@code 
RegionServerTracker} to avoid
    * concurrency issue.
-   * @param deadServersFromPE     the region servers which already have a SCP 
associated.
-   * @param liveServersFromWALDir the live region servers from wal directory.
+   * @param deadServersWithDeathTimeFromPE the region servers which already 
have an SCP associated,
+   *                                       have time of death as value.
+   * @param liveServersFromWALDir          the live region servers from wal 
directory.
    */
-  void findDeadServersAndProcess(Set<ServerName> deadServersFromPE,
+  void findDeadServersAndProcess(Map<ServerName, Long> 
deadServersWithDeathTimeFromPE,
     Set<ServerName> liveServersFromWALDir) {
-    deadServersFromPE.forEach(deadservers::putIfAbsent);
+    deadServersWithDeathTimeFromPE.forEach(deadservers::putIfAbsent);
     liveServersFromWALDir.stream().filter(sn -> !onlineServers.containsKey(sn))
       .forEach(this::expireServer);
   }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 0bc651b4954..a1f93545389 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -22,11 +22,14 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -228,6 +231,9 @@ public class AssignmentManager {
 
   private final int forceRegionRetainmentRetries;
 
+  private final RegionInTransitionTracker regionInTransitionTracker =
+    new RegionInTransitionTracker();
+
   public AssignmentManager(MasterServices master, MasterRegion masterRegion) {
     this(master, masterRegion, new RegionStateStore(master, masterRegion));
   }
@@ -327,6 +333,8 @@ public class AssignmentManager {
             regionNode.setLastHost(lastHost);
             regionNode.setRegionLocation(regionLocation);
             regionNode.setOpenSeqNum(openSeqNum);
+            
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
+
             if (regionNode.getProcedure() != null) {
               regionNode.getProcedure().stateLoaded(this, regionNode);
             }
@@ -378,11 +386,17 @@ public class AssignmentManager {
           return;
         }
       }
-      LOG.info("Attach {} to {} to restore RIT", proc, regionNode);
+      LOG.info("Attach {} to {}", proc, regionNode);
       regionNode.setProcedure(proc);
     });
   }
 
+  public void initializationPostMetaOnline() {
+    // now that we are sure that meta is online, we can set TableStateManger in
+    // regionInTransitionTracker
+    
regionInTransitionTracker.setTableStateManager(master.getTableStateManager());
+  }
+
   public void stop() {
     if (!running.compareAndSet(true, false)) {
       return;
@@ -407,6 +421,7 @@ public class AssignmentManager {
 
     // Stop the RegionStateStore
     regionStates.clear();
+    regionInTransitionTracker.stop();
 
     // Update meta events (for testing)
     if (hasProcExecutor) {
@@ -1076,7 +1091,7 @@ public class AssignmentManager {
       regionNode.lock();
       try {
         if (shouldSubmit.apply(regionNode)) {
-          if (regionNode.isInTransition()) {
+          if (regionNode.isTransitionScheduled()) {
             logRIT.accept(regionNode);
             inTransitionCount++;
             continue;
@@ -1685,10 +1700,8 @@ public class AssignmentManager {
     }
 
     protected void update(final AssignmentManager am) {
-      final RegionStates regionStates = am.getRegionStates();
       this.statTimestamp = EnvironmentEdgeManager.currentTime();
-      update(regionStates.getRegionsStateInTransition(), statTimestamp);
-      update(regionStates.getRegionFailedOpen(), statTimestamp);
+      update(am.getRegionsStateInTransition(), statTimestamp);
 
       if (LOG.isDebugEnabled() && ritsOverThreshold != null && 
!ritsOverThreshold.isEmpty()) {
         LOG.debug("RITs over threshold: {}",
@@ -1856,6 +1869,20 @@ public class AssignmentManager {
       if (regionNode.getProcedure() != null) {
         regionNode.getProcedure().stateLoaded(AssignmentManager.this, 
regionNode);
       }
+      // add regions to RIT while visiting the meta
+      regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
+      // If region location of region belongs to a dead server mark the region 
crashed
+      if (
+        regionNode.getRegionLocation() != null
+          && 
master.getServerManager().isServerDead(regionNode.getRegionLocation())
+      ) {
+        long timeOfCrash = master.getServerManager().getDeadServers()
+          .getDeathTimestamp(regionNode.getRegionLocation());
+        if (timeOfCrash != 0) {
+          regionNode.crashed(timeOfCrash);
+        }
+        regionInTransitionTracker.regionCrashed(regionNode);
+      }
     }
   };
 
@@ -2029,15 +2056,52 @@ public class AssignmentManager {
     return new Pair<Integer, Integer>(ritCount, states.size());
   }
 
+  // This comparator sorts the RegionStates by time stamp then Region name.
+  // Comparing by timestamp alone can lead us to discard different 
RegionStates that happen
+  // to share a timestamp.
+  private final static class RegionStateStampComparator implements 
Comparator<RegionState> {
+    @Override
+    public int compare(final RegionState l, final RegionState r) {
+      int stampCmp = Long.compare(l.getStamp(), r.getStamp());
+      return stampCmp != 0 ? stampCmp : 
RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
+    }
+  }
+
+  public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR 
=
+    new RegionStateStampComparator();
+
   // 
============================================================================================
   // TODO: Region State In Transition
   // 
============================================================================================
   public boolean hasRegionsInTransition() {
-    return regionStates.hasRegionsInTransition();
+    return regionInTransitionTracker.hasRegionsInTransition();
   }
 
   public List<RegionStateNode> getRegionsInTransition() {
-    return regionStates.getRegionsInTransition();
+    return regionInTransitionTracker.getRegionsInTransition();
+  }
+
+  public boolean isRegionInTransition(final RegionInfo regionInfo) {
+    return regionInTransitionTracker.isRegionInTransition(regionInfo);
+  }
+
+  public int getRegionTransitScheduledCount() {
+    return regionStates.getRegionTransitScheduledCount();
+  }
+
+  /**
+   * Get the number of regions in transition.
+   */
+  public int getRegionsInTransitionCount() {
+    return regionInTransitionTracker.getRegionsInTransition().size();
+  }
+
+  public SortedSet<RegionState> getRegionsStateInTransition() {
+    final SortedSet<RegionState> rit = new 
TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
+    for (RegionStateNode node : getRegionsInTransition()) {
+      rit.add(node.toRegionState());
+    }
+    return rit;
   }
 
   public List<RegionInfo> getAssignedRegions() {
@@ -2104,6 +2168,8 @@ public class AssignmentManager {
       if (!succ) {
         // revert
         regionNode.setState(state);
+      } else {
+        regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
       }
     }
   }
@@ -2147,6 +2213,8 @@ public class AssignmentManager {
           // revert
           regionNode.setState(state);
           regionNode.setRegionLocation(regionLocation);
+        } else {
+          regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
         }
       }
     }
@@ -2208,6 +2276,8 @@ public class AssignmentManager {
         // revert
         regionNode.setState(state);
         regionNode.setRegionLocation(regionLocation);
+      } else {
+        regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
       }
     }
     if (regionLocation != null) {
@@ -2226,12 +2296,27 @@ public class AssignmentManager {
       // on table that contains state.
       setMetaAssigned(regionInfo, true);
     }
+    regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
   }
 
   // 
============================================================================================
   // The above methods can only be called in TransitRegionStateProcedure(and 
related procedures)
   // 
============================================================================================
 
+  // As soon as a server a crashed, region hosting on that are un-available, 
this method helps to
+  // track those un-available regions. This method can only be called from 
ServerCrashProcedure.
+  public void markRegionsAsCrashed(List<RegionInfo> regionsOnCrashedServer,
+    ServerCrashProcedure scp) {
+    ServerName crashedServerName = scp.getServerName();
+    for (RegionInfo regionInfo : regionsOnCrashedServer) {
+      RegionStateNode node = 
regionStates.getOrCreateRegionStateNode(regionInfo);
+      if (node.getRegionLocation() == crashedServerName) {
+        node.crashed(scp.getSubmittedTime());
+        regionInTransitionTracker.regionCrashed(node);
+      }
+    }
+  }
+
   public void markRegionAsSplit(final RegionInfo parent, final ServerName 
serverName,
     final RegionInfo daughterA, final RegionInfo daughterB) throws IOException 
{
     // Update hbase:meta. Parent will be marked offline and split up in 
hbase:meta.
@@ -2255,6 +2340,9 @@ public class AssignmentManager {
     // it is a split parent. And usually only one of them can match, as after 
restart, the region
     // state will be changed from SPLIT to CLOSED.
     regionStateStore.splitRegion(parent, daughterA, daughterB, serverName);
+    regionInTransitionTracker.handleRegionStateNodeOperation(node);
+    regionInTransitionTracker.handleRegionStateNodeOperation(nodeA);
+    regionInTransitionTracker.handleRegionStateNodeOperation(nodeB);
     if (shouldAssignFavoredNodes(parent)) {
       List<ServerName> onlineServers = 
this.master.getServerManager().getOnlineServersList();
       ((FavoredNodesPromoter) 
getBalancer()).generateFavoredNodesForDaughter(onlineServers, parent,
@@ -2277,9 +2365,10 @@ public class AssignmentManager {
     node.setState(State.MERGED);
     for (RegionInfo ri : mergeParents) {
       regionStates.deleteRegion(ri);
-
+      regionInTransitionTracker.handleRegionDelete(ri);
     }
     regionStateStore.mergeRegions(child, mergeParents, serverName);
+    regionInTransitionTracker.handleRegionStateNodeOperation(node);
     if (shouldAssignFavoredNodes(child)) {
       ((FavoredNodesPromoter) 
getBalancer()).generateFavoredNodesForMergedRegion(child,
         mergeParents);
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
index 82999202d0c..8945778c391 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
@@ -164,7 +164,7 @@ final class AssignmentManagerUtil {
           regionNode.lock();
           try {
             if (ignoreIfInTransition) {
-              if (regionNode.isInTransition()) {
+              if (regionNode.isTransitionScheduled()) {
                 return null;
               }
             } else {
@@ -172,7 +172,7 @@ final class AssignmentManagerUtil {
               // created, or has been successfully closed so should not be on 
any servers, so SCP
               // will
               // not process it either.
-              assert !regionNode.isInTransition();
+              assert !regionNode.isTransitionScheduled();
             }
             regionNode.setProcedure(proc);
           } finally {
@@ -194,7 +194,7 @@ final class AssignmentManagerUtil {
         // apply ignoreRITs to replica regions as well.
         if (
           !ignoreIfInTransition || 
!env.getAssignmentManager().getRegionStates()
-            .getOrCreateRegionStateNode(ri).isInTransition()
+            .getOrCreateRegionStateNode(ri).isTransitionScheduled()
         ) {
           replicaRegionInfos.add(ri);
         }
@@ -242,7 +242,7 @@ final class AssignmentManagerUtil {
       for (RegionInfo region : regionsAndReplicas) {
         if (
           
env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region)
-            .isInTransition()
+            .isTransitionScheduled()
         ) {
           return null;
         }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java
new file mode 100644
index 00000000000..e3c52e5bdc6
--- /dev/null
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ConcurrentSkipListMap;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.TableState;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.TableStateManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Tracks regions that are currently in transition (RIT) - those not yet in 
their terminal state.
+ */
[email protected]
+public class RegionInTransitionTracker {
+  private static final Logger LOG = 
LoggerFactory.getLogger(RegionInTransitionTracker.class);
+
+  private final List<RegionState.State> DISABLE_TABLE_REGION_STATE =
+    Arrays.asList(RegionState.State.OFFLINE, RegionState.State.CLOSED);
+
+  private final List<RegionState.State> ENABLE_TABLE_REGION_STATE =
+    Collections.singletonList(RegionState.State.OPEN);
+
+  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> 
regionInTransition =
+    new ConcurrentSkipListMap<>(RegionInfo.COMPARATOR);
+
+  private TableStateManager tableStateManager;
+
+  public boolean isRegionInTransition(final RegionInfo regionInfo) {
+    return regionInTransition.containsKey(regionInfo);
+  }
+
+  /**
+   * Handles a region whose hosting RegionServer has crashed. When a 
RegionServer fails, all regions
+   * it was hosting are automatically added to the RIT list since they need to 
be reassigned to
+   * other servers.
+   */
+  public void regionCrashed(RegionStateNode regionStateNode) {
+    if (regionStateNode.getRegionInfo().getReplicaId() != 
RegionInfo.DEFAULT_REPLICA_ID) {
+      return;
+    }
+
+    if (addRegionInTransition(regionStateNode)) {
+      LOG.debug("{} added to RIT list because hosting region server is crashed 
",
+        regionStateNode.getRegionInfo().getEncodedName());
+    }
+  }
+
+  /**
+   * Processes a region state change and updates the RIT tracking accordingly. 
This is the core
+   * method that determines whether a region should be added to or removed 
from the RIT list based
+   * on its current state and the table's enabled/disabled status. This method 
should be called
+   * whenever a region state changes get stored to hbase:meta Note: Only 
default replicas (replica
+   * ID 0) are tracked. Read replicas are ignored.
+   * @param regionStateNode the region state node with the current state 
information
+   */
+  public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
+    // only consider default replica for availability
+    if (regionStateNode.getRegionInfo().getReplicaId() != 
RegionInfo.DEFAULT_REPLICA_ID) {
+      return;
+    }
+
+    RegionState.State currentState = regionStateNode.getState();
+    boolean tableEnabled = isTableEnabled(regionStateNode.getTable());
+    List<RegionState.State> terminalStates =
+      tableEnabled ? ENABLE_TABLE_REGION_STATE : DISABLE_TABLE_REGION_STATE;
+
+    // if region is merged or split it should not be in RIT list
+    if (
+      currentState == RegionState.State.SPLIT || currentState == 
RegionState.State.MERGED
+        || regionStateNode.getRegionInfo().isSplit()
+    ) {
+      if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
+        LOG.debug("Removed {} from RIT list as it is split or merged",
+          regionStateNode.getRegionInfo().getEncodedName());
+      }
+    } else if (!terminalStates.contains(currentState)) {
+      if (addRegionInTransition(regionStateNode)) {
+        LOG.debug("{} added to RIT list because it is in-between state, region 
state : {} ",
+          regionStateNode.getRegionInfo().getEncodedName(), currentState);
+      }
+    } else {
+      if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
+        LOG.debug("Removed {} from RIT list as reached to terminal state {}",
+          regionStateNode.getRegionInfo().getEncodedName(), currentState);
+      }
+    }
+  }
+
+  private boolean isTableEnabled(TableName tableName) {
+    if (tableStateManager != null) {
+      return tableStateManager.isTableState(tableName, 
TableState.State.ENABLED,
+        TableState.State.ENABLING);
+    }
+    // AssignmentManager calls setTableStateManager once hbase:meta is 
confirmed online, if it is
+    // still null it means confirmation is still pending. One should not 
access TableStateManger
+    // till the time.
+    assert TableName.isMetaTableName(tableName);
+    return true;
+  }
+
+  /**
+   * Handles the deletion of a region by removing it from RIT tracking. This 
is called when a region
+   * is permanently removed from the cluster, typically after a successful 
merge operation where the
+   * parent regions are cleaned up. During table deletion, table should be 
already disabled and all
+   * the region are already OFFLINE
+   * @param regionInfo the region being deleted
+   */
+  public void handleRegionDelete(RegionInfo regionInfo) {
+    removeRegionInTransition(regionInfo);
+  }
+
+  private boolean addRegionInTransition(final RegionStateNode regionStateNode) 
{
+    return regionInTransition.putIfAbsent(regionStateNode.getRegionInfo(), 
regionStateNode) == null;
+  }
+
+  private boolean removeRegionInTransition(final RegionInfo regionInfo) {
+    return regionInTransition.remove(regionInfo) != null;
+  }
+
+  public void stop() {
+    regionInTransition.clear();
+  }
+
+  public boolean hasRegionsInTransition() {
+    return !regionInTransition.isEmpty();
+  }
+
+  public List<RegionStateNode> getRegionsInTransition() {
+    return new ArrayList<>(regionInTransition.values());
+  }
+
+  public void setTableStateManager(TableStateManager tableStateManager) {
+    this.tableStateManager = tableStateManager;
+  }
+}
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
index c00f8c367ad..26d16cd9920 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
@@ -18,7 +18,7 @@
 package org.apache.hadoop.hbase.master.assignment;
 
 import java.util.Arrays;
-import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import org.apache.hadoop.hbase.HConstants;
@@ -68,6 +68,9 @@ import org.slf4j.LoggerFactory;
 public class RegionStateNode implements Comparable<RegionStateNode> {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(RegionStateNode.class);
+  // It stores count of all active TRSP in the master. Had to pass it from 
regionStates to
+  // maintain the count
+  private final AtomicInteger activeTransitProcedureCount;
 
   private static final class AssignmentProcedureEvent extends 
ProcedureEvent<RegionInfo> {
     public AssignmentProcedureEvent(final RegionInfo regionInfo) {
@@ -78,7 +81,6 @@ public class RegionStateNode implements 
Comparable<RegionStateNode> {
   final Lock lock = new ReentrantLock();
   private final RegionInfo regionInfo;
   private final ProcedureEvent<?> event;
-  private final ConcurrentMap<RegionInfo, RegionStateNode> ritMap;
 
   // volatile only for getLastUpdate and test usage, the upper layer should 
sync on the
   // RegionStateNode before accessing usually.
@@ -96,16 +98,16 @@ public class RegionStateNode implements 
Comparable<RegionStateNode> {
 
   /**
    * Updated whenever a call to {@link #setRegionLocation(ServerName)} or
-   * {@link #setState(RegionState.State, RegionState.State...)}.
+   * {@link #setState(RegionState.State, RegionState.State...)} or {@link 
#crashed(long)}.
    */
   private volatile long lastUpdate = 0;
 
   private volatile long openSeqNum = HConstants.NO_SEQNUM;
 
-  RegionStateNode(RegionInfo regionInfo, ConcurrentMap<RegionInfo, 
RegionStateNode> ritMap) {
+  RegionStateNode(RegionInfo regionInfo, AtomicInteger 
activeTransitProcedureCount) {
     this.regionInfo = regionInfo;
     this.event = new AssignmentProcedureEvent(regionInfo);
-    this.ritMap = ritMap;
+    this.activeTransitProcedureCount = activeTransitProcedureCount;
   }
 
   /**
@@ -160,7 +162,7 @@ public class RegionStateNode implements 
Comparable<RegionStateNode> {
     return isInState(State.FAILED_OPEN) && getProcedure() != null;
   }
 
-  public boolean isInTransition() {
+  public boolean isTransitionScheduled() {
     return getProcedure() != null;
   }
 
@@ -189,6 +191,10 @@ public class RegionStateNode implements 
Comparable<RegionStateNode> {
     this.lastHost = serverName;
   }
 
+  public void crashed(long crashTime) {
+    this.lastUpdate = crashTime;
+  }
+
   public void setOpenSeqNum(final long seqId) {
     this.openSeqNum = seqId;
   }
@@ -206,14 +212,14 @@ public class RegionStateNode implements 
Comparable<RegionStateNode> {
   public TransitRegionStateProcedure setProcedure(TransitRegionStateProcedure 
proc) {
     assert this.procedure == null;
     this.procedure = proc;
-    ritMap.put(regionInfo, this);
+    activeTransitProcedureCount.incrementAndGet();
     return proc;
   }
 
   public void unsetProcedure(TransitRegionStateProcedure proc) {
     assert this.procedure == proc;
+    activeTransitProcedureCount.decrementAndGet();
     this.procedure = null;
-    ritMap.remove(regionInfo, this);
   }
 
   public TransitRegionStateProcedure getProcedure() {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index 5ae6a253bbc..d34ec7a56d5 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -20,13 +20,10 @@ package org.apache.hadoop.hbase.master.assignment;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.SortedSet;
-import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -54,22 +51,10 @@ import org.slf4j.LoggerFactory;
 public class RegionStates {
   private static final Logger LOG = 
LoggerFactory.getLogger(RegionStates.class);
 
-  // This comparator sorts the RegionStates by time stamp then Region name.
-  // Comparing by timestamp alone can lead us to discard different 
RegionStates that happen
-  // to share a timestamp.
-  private static class RegionStateStampComparator implements 
Comparator<RegionState> {
-    @Override
-    public int compare(final RegionState l, final RegionState r) {
-      int stampCmp = Long.compare(l.getStamp(), r.getStamp());
-      return stampCmp != 0 ? stampCmp : 
RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
-    }
-  }
-
-  public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR 
=
-    new RegionStateStampComparator();
-
   private final Object regionsMapLock = new Object();
 
+  private final AtomicInteger activeTransitProcedureCount = new 
AtomicInteger(0);
+
   // TODO: Replace the ConcurrentSkipListMaps
   /**
    * A Map from {@link RegionInfo#getRegionName()} to {@link RegionStateNode}
@@ -84,9 +69,6 @@ public class RegionStates {
   private final ConcurrentSkipListMap<String, RegionStateNode> 
encodedRegionsMap =
     new ConcurrentSkipListMap<>();
 
-  private final ConcurrentSkipListMap<RegionInfo, RegionStateNode> 
regionInTransition =
-    new ConcurrentSkipListMap<>(RegionInfo.COMPARATOR);
-
   /**
    * Regions marked as offline on a read of hbase:meta. Unused or at least, 
once offlined, regions
    * have no means of coming on line again. TODO.
@@ -109,23 +91,17 @@ public class RegionStates {
   public void clear() {
     regionsMap.clear();
     encodedRegionsMap.clear();
-    regionInTransition.clear();
     regionOffline.clear();
     serverMap.clear();
   }
 
-  public boolean isRegionInRegionStates(final RegionInfo hri) {
-    return (regionsMap.containsKey(hri.getRegionName()) || 
regionInTransition.containsKey(hri)
-      || regionOffline.containsKey(hri));
-  }
-
   // ==========================================================================
   // RegionStateNode helpers
   // ==========================================================================
   RegionStateNode createRegionStateNode(RegionInfo regionInfo) {
     synchronized (regionsMapLock) {
       RegionStateNode node = 
regionsMap.computeIfAbsent(regionInfo.getRegionName(),
-        key -> new RegionStateNode(regionInfo, regionInTransition));
+        key -> new RegionStateNode(regionInfo, activeTransitProcedureCount));
 
       if (encodedRegionsMap.get(regionInfo.getEncodedName()) != node) {
         encodedRegionsMap.put(regionInfo.getEncodedName(), node);
@@ -157,12 +133,6 @@ public class RegionStates {
       regionsMap.remove(regionInfo.getRegionName());
       encodedRegionsMap.remove(regionInfo.getEncodedName());
     }
-    // See HBASE-20860
-    // After master restarts, merged regions' RIT state may not be cleaned,
-    // making sure they are cleaned here
-    if (regionInTransition.containsKey(regionInfo)) {
-      regionInTransition.remove(regionInfo);
-    }
     // Remove from the offline regions map too if there.
     if (this.regionOffline.containsKey(regionInfo)) {
       if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " 
+ regionInfo);
@@ -459,7 +429,7 @@ public class RegionStates {
   public List<RegionInfo> getAssignedRegions() {
     final List<RegionInfo> result = new ArrayList<RegionInfo>();
     for (RegionStateNode node : regionsMap.values()) {
-      if (!node.isInTransition()) {
+      if (!node.isTransitionScheduled()) {
         result.add(node.getRegionInfo());
       }
     }
@@ -618,59 +588,6 @@ public class RegionStates {
       TableState.State.DISABLING);
   }
 
-  // ==========================================================================
-  // Region in transition helpers
-  // ==========================================================================
-  public boolean hasRegionsInTransition() {
-    return !regionInTransition.isEmpty();
-  }
-
-  public boolean isRegionInTransition(final RegionInfo regionInfo) {
-    final RegionStateNode node = regionInTransition.get(regionInfo);
-    return node != null ? node.isInTransition() : false;
-  }
-
-  public RegionState getRegionTransitionState(RegionInfo hri) {
-    RegionStateNode node = regionInTransition.get(hri);
-    if (node == null) {
-      return null;
-    }
-
-    node.lock();
-    try {
-      return node.isInTransition() ? node.toRegionState() : null;
-    } finally {
-      node.unlock();
-    }
-  }
-
-  public List<RegionStateNode> getRegionsInTransition() {
-    return new ArrayList<RegionStateNode>(regionInTransition.values());
-  }
-
-  /**
-   * Get the number of regions in transition.
-   */
-  public int getRegionsInTransitionCount() {
-    return regionInTransition.size();
-  }
-
-  public List<RegionState> getRegionsStateInTransition() {
-    final List<RegionState> rit = new 
ArrayList<RegionState>(regionInTransition.size());
-    for (RegionStateNode node : regionInTransition.values()) {
-      rit.add(node.toRegionState());
-    }
-    return rit;
-  }
-
-  public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() {
-    final SortedSet<RegionState> rit = new 
TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
-    for (RegionStateNode node : regionInTransition.values()) {
-      rit.add(node.toRegionState());
-    }
-    return rit;
-  }
-
   // ==========================================================================
   // Region offline helpers
   // ==========================================================================
@@ -680,6 +597,10 @@ public class RegionStates {
     regionOffline.put(regionNode.getRegionInfo(), regionNode);
   }
 
+  public int getRegionTransitScheduledCount() {
+    return activeTransitProcedureCount.get();
+  }
+
   // ==========================================================================
   // Region FAIL_OPEN helpers
   // ==========================================================================
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/FlushRegionProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/FlushRegionProcedure.java
index 7c67f0e3ee9..d90dbed8953 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/FlushRegionProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/FlushRegionProcedure.java
@@ -95,7 +95,7 @@ public class FlushRegionProcedure extends 
Procedure<MasterProcedureEnv>
     }
     regionNode.lock();
     try {
-      if (!regionNode.isInState(State.OPEN) || regionNode.isInTransition()) {
+      if (!regionNode.isInState(State.OPEN) || 
regionNode.isTransitionScheduled()) {
         LOG.info("State of region {} is not OPEN or in transition. Skip {} 
...", region, this);
         return null;
       }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
index 122c901fd73..892391c57d1 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
@@ -252,7 +252,7 @@ public final class ProcedureSyncWait {
         new ProcedureSyncWait.Predicate<Boolean>() {
           @Override
           public Boolean evaluate() throws IOException {
-            return !states.isRegionInTransition(region);
+            return !states.getRegionStateNode(region).isTransitionScheduled();
           }
         });
     }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
index 03f04792af5..6e3491a24a8 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -184,7 +184,7 @@ public class ReopenTableRegionsProcedure
     // If the region node is null, then at least in the next round we can 
remove this region to make
     // progress. And the second condition is a normal one, if there are no 
TRSP with it then we can
     // schedule one to make progress.
-    return regionNode == null || !regionNode.isInTransition();
+    return regionNode == null || !regionNode.isTransitionScheduled();
   }
 
   @Override
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 2a7e96149df..6efa872d16b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -23,6 +23,7 @@ import static 
org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.ServerName;
@@ -161,6 +162,8 @@ public class ServerCrashProcedure extends
           LOG.info("Start " + this);
           // If carrying meta, process it first. Else, get list of regions on 
crashed server.
           if (this.carryingMeta) {
+            env.getAssignmentManager().markRegionsAsCrashed(
+              
Collections.singletonList(RegionInfoBuilder.FIRST_META_REGIONINFO), this);
             setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
           } else {
             setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
@@ -200,6 +203,7 @@ public class ServerCrashProcedure extends
             if (LOG.isTraceEnabled()) {
               this.regionsOnCrashedServer.stream().forEach(ri -> 
LOG.trace(ri.getShortNameToLog()));
             }
+            
env.getAssignmentManager().markRegionsAsCrashed(regionsOnCrashedServer, this);
           }
           if (!this.shouldSplitWal) {
             setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
diff --git 
a/hbase-server/src/main/resources/hbase-webapps/master/assignmentManagerStatus.jsp
 
b/hbase-server/src/main/resources/hbase-webapps/master/assignmentManagerStatus.jsp
index 0966d04316b..4f73a68941e 100644
--- 
a/hbase-server/src/main/resources/hbase-webapps/master/assignmentManagerStatus.jsp
+++ 
b/hbase-server/src/main/resources/hbase-webapps/master/assignmentManagerStatus.jsp
@@ -32,7 +32,7 @@
   AssignmentManager assignmentManager = master.getAssignmentManager();
   int limit = 100;
 
-  SortedSet<RegionState> rit = 
assignmentManager.getRegionStates().getRegionsInTransitionOrderedByTimestamp();
+  SortedSet<RegionState> rit = assignmentManager.getRegionsStateInTransition();
 
 if (!rit.isEmpty()) {
   long currentTime = System.currentTimeMillis();
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 48a7615b2a4..4ebfa3f6cfe 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -4035,9 +4035,8 @@ public class HBaseTestingUtility extends 
HBaseZKTestingUtility {
     return new ExplainingPredicate<IOException>() {
       @Override
       public String explainFailure() throws IOException {
-        final RegionStates regionStates =
-          
getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
-        return "found in transition: " + 
regionStates.getRegionsInTransition().toString();
+        final AssignmentManager am = 
getMiniHBaseCluster().getMaster().getAssignmentManager();
+        return "found in transition: " + 
am.getRegionsInTransition().toString();
       }
 
       @Override
@@ -4051,6 +4050,34 @@ public class HBaseTestingUtility extends 
HBaseZKTestingUtility {
     };
   }
 
+  /**
+   * Returns a {@link Predicate} for checking that there are no procedure to 
region transition in
+   * master
+   */
+  public ExplainingPredicate<IOException> predicateNoRegionTransitScheduled() {
+    return new ExplainingPredicate<IOException>() {
+      @Override
+      public String explainFailure() throws IOException {
+        final AssignmentManager am = 
getMiniHBaseCluster().getMaster().getAssignmentManager();
+        return "Number of procedure scheduled for region transit: "
+          + am.getRegionTransitScheduledCount();
+      }
+
+      @Override
+      public boolean evaluate() throws IOException {
+        HMaster master = getMiniHBaseCluster().getMaster();
+        if (master == null) {
+          return false;
+        }
+        AssignmentManager am = master.getAssignmentManager();
+        if (am == null) {
+          return false;
+        }
+        return am.getRegionTransitScheduledCount() == 0;
+      }
+    };
+  }
+
   /**
    * Returns a {@link Predicate} for checking that table is enabled
    */
@@ -4135,6 +4162,21 @@ public class HBaseTestingUtility extends 
HBaseZKTestingUtility {
     waitUntilNoRegionsInTransition(15 * 60000);
   }
 
+  /**
+   * Wait until no regions in transition.
+   * @param timeout How long to wait.
+   */
+  public void waitUntilNoRegionTransitScheduled(final long timeout) throws 
IOException {
+    waitFor(timeout, predicateNoRegionTransitScheduled());
+  }
+
+  /**
+   * Wait until no TRSP is present
+   */
+  public void waitUntilNoRegionTransitScheduled() throws IOException {
+    waitUntilNoRegionTransitScheduled(15 * 60000);
+  }
+
   /**
    * Wait until labels is ready in VisibilityLabelsCache.
    */
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
index 2455590ec25..d3b4f51e1d2 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
@@ -94,12 +94,12 @@ public class TestAsyncRegionAdminApi extends 
TestAsyncAdminBase {
       // Expected
       assertThat(e.getCause(), instanceOf(DoNotRetryRegionException.class));
     }
-    assertFalse(am.getRegionStates().getRegionStateNode(hri).isInTransition());
+    
assertFalse(am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
     assertTrue(regionStates.getRegionState(hri).isOpened());
 
     // unassign region
     admin.unassign(hri.getRegionName(), true).get();
-    assertFalse(am.getRegionStates().getRegionStateNode(hri).isInTransition());
+    
assertFalse(am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
     assertTrue(regionStates.getRegionState(hri).isClosed());
   }
 
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
index fa838f05f39..6600784c401 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
@@ -206,8 +206,8 @@ public class TestSplitOrMergeStatus {
     ProcedureTestingUtility.waitProcedure(procExec, procId2);
     AssignmentTestingUtil.killRs(TEST_UTIL, serverName);
     Threads.sleepWithoutInterrupt(5000);
-    boolean hasRegionsInTransition = 
TEST_UTIL.getMiniHBaseCluster().getMaster()
-      .getAssignmentManager().getRegionStates().hasRegionsInTransition();
+    boolean hasRegionsInTransition =
+      
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().hasRegionsInTransition();
     assertEquals(false, hasRegionsInTransition);
   }
 
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
index add0c4ed2a5..d1c8e5ddf6b 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
@@ -154,7 +154,7 @@ public class TestAssignmentManagerMetrics {
       // Sleep 5 seconds, wait for doMetrics chore catching up
       // the rit count consists of rit and failed opens. see 
RegionInTransitionStat#update
       // Waiting for the completion of rit makes the assert stable.
-      TEST_UTIL.waitUntilNoRegionsInTransition();
+      TEST_UTIL.waitUntilNoRegionTransitScheduled();
       Thread.sleep(MSG_INTERVAL * 5);
       
METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, 
amSource);
       
METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME,
 1,
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index ecad5a19343..c94c3e70cfb 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.master;
 
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.util.List;
@@ -83,7 +84,7 @@ public class TestDeadServer {
     assertTrue(ds.isDeadServer(deadServer));
     Set<ServerName> deadServerNames = ds.copyServerNames();
     for (ServerName eachDeadServer : deadServerNames) {
-      Assert.assertNotNull(ds.getTimeOfDeath(eachDeadServer));
+      assertNotEquals(0, ds.getDeathTimestamp(eachDeadServer));
     }
     final ServerName deadServerHostComingAlive = 
ServerName.valueOf("127.0.0.1", 9090, 223341L);
     assertTrue(ds.cleanPreviousInstance(deadServerHostComingAlive));
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
index ea527bc6704..f82385e8b37 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
@@ -126,8 +126,8 @@ public class TestMasterBalanceThrottling {
       @Override
       public void run() {
         while (!stop.get()) {
-          maxCount.set(Math.max(maxCount.get(),
-            
master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount()));
+          maxCount.set(
+            Math.max(maxCount.get(), 
master.getAssignmentManager().getRegionsInTransitionCount()));
           try {
             Thread.sleep(10);
           } catch (InterruptedException e) {
@@ -142,7 +142,7 @@ public class TestMasterBalanceThrottling {
   }
 
   private void unbalance(HMaster master, TableName tableName) throws Exception 
{
-    while 
(master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() 
> 0) {
+    while (master.getAssignmentManager().getRegionsInTransitionCount() > 0) {
       Thread.sleep(100);
     }
     HRegionServer biasedServer = 
TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
@@ -150,7 +150,7 @@ public class TestMasterBalanceThrottling {
       master.move(regionInfo.getEncodedNameAsBytes(),
         Bytes.toBytes(biasedServer.getServerName().getServerName()));
     }
-    while 
(master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() 
> 0) {
+    while (master.getAssignmentManager().getRegionsInTransitionCount() > 0) {
       Thread.sleep(100);
     }
   }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
index 411cc0eb6d1..c5a2880b96d 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
@@ -119,6 +119,6 @@ public class TestMasterDryRunBalancer {
 
   private void waitForRegionsToSettle(HMaster master) {
     Waiter.waitFor(TEST_UTIL.getConfiguration(), 60_000,
-      () -> 
master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() 
<= 0);
+      () -> master.getAssignmentManager().getRegionsInTransitionCount() <= 0);
   }
 }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
index 662d39a0c41..a36c5bb2da8 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
@@ -50,7 +50,7 @@ public final class AssignmentTestingUtil {
 
   public static void waitForRegionToBeInTransition(final HBaseTestingUtility 
util,
     final RegionInfo hri) throws Exception {
-    while 
(!getMaster(util).getAssignmentManager().getRegionStates().isRegionInTransition(hri))
 {
+    while (!getMaster(util).getAssignmentManager().isRegionInTransition(hri)) {
       Threads.sleep(10);
     }
   }
@@ -141,7 +141,7 @@ public final class AssignmentTestingUtil {
     RegionStateNode regionNode = 
am.getRegionStates().getRegionStateNode(regionInfo);
     // Wait until the region has already been open, or we have a TRSP along 
with it.
     Waiter.waitFor(am.getConfiguration(), 30000,
-      () -> regionNode.isInState(State.OPEN) || regionNode.isInTransition());
+      () -> regionNode.isInState(State.OPEN) || 
regionNode.isTransitionScheduled());
     TransitRegionStateProcedure proc = regionNode.getProcedure();
     regionNode.lock();
     try {
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
index 538476be306..7ec02b66790 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
@@ -124,6 +124,7 @@ public class MockMasterServices extends 
MockNoopMasterServices {
     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
     this.serverManager = new ServerManager(this, new DummyRegionServerList());
     this.tableStateManager = mock(TableStateManager.class);
+    assignmentManager.initializationPostMetaOnline();
     when(this.tableStateManager.getTableState(any())).thenReturn(new 
TableState(
       TableName.valueOf("AnyTableNameSetInMockMasterServcies"), 
TableState.State.ENABLED));
 
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
index 9bcbabf1b02..667a1ea9bd5 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
@@ -280,7 +280,7 @@ public abstract class TestAssignmentManagerBase {
     TransitRegionStateProcedure proc;
     regionNode.lock();
     try {
-      assertFalse(regionNode.isInTransition());
+      assertFalse(regionNode.isTransitionScheduled());
       proc = TransitRegionStateProcedure
         .unassign(master.getMasterProcedureExecutor().getEnvironment(), hri);
       regionNode.setProcedure(proc);
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
index 2d87646a62f..6a599f12fcc 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
@@ -129,6 +129,6 @@ public class TestAssignmentManagerUtil {
     IntStream.range(0, REGION_REPLICATION)
       .mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(regionA, i))
       .map(AM.getRegionStates()::getRegionStateNode).forEachOrdered(
-        rn -> assertFalse("Should have unset the proc for " + rn, 
rn.isInTransition()));
+        rn -> assertFalse("Should have unset the proc for " + rn, 
rn.isTransitionScheduled()));
   }
 }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
index dc8f82295a4..6120de50e38 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
@@ -99,15 +99,15 @@ public class TestTransitRegionStateProcedure {
     HMaster master = UTIL.getHBaseCluster().getMaster();
     AssignmentManager am = master.getAssignmentManager();
     RegionStateNode regionNode = 
am.getRegionStates().getRegionStateNode(proc.getRegion());
-    assertFalse(regionNode.isInTransition());
+    assertFalse(regionNode.isTransitionScheduled());
     regionNode.setProcedure(proc);
-    assertTrue(regionNode.isInTransition());
+    assertTrue(regionNode.isTransitionScheduled());
     ProcedureExecutor<MasterProcedureEnv> procExec = 
master.getMasterProcedureExecutor();
     ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
     long procId = procExec.submitProcedure(proc);
     MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, 
procId);
     regionNode = am.getRegionStates().getRegionStateNode(proc.getRegion());
-    assertFalse(regionNode.isInTransition());
+    assertFalse(regionNode.isTransitionScheduled());
   }
 
   @Test
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
index e5be89e8ecb..967217af008 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.LoadBalancer;
 import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
 import org.apache.hadoop.hbase.master.assignment.RegionStates;
 import org.apache.hadoop.hbase.regionserver.HRegion;
@@ -261,7 +262,9 @@ public class TestFavoredStochasticLoadBalancer extends 
BalancerTestBase {
 
     // Balancer should unassign the region
     assertTrue("Balancer did not run", admin.balancer());
-    TEST_UTIL.waitUntilNoRegionsInTransition();
+    TEST_UTIL.waitUntilNoRegionTransitScheduled();
+    assertEquals("One region should be unassigned", 1,
+      master.getAssignmentManager().getRegionsInTransitionCount());
 
     admin.assign(region.getEncodedNameAsBytes());
     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
@@ -382,7 +385,8 @@ public class TestFavoredStochasticLoadBalancer extends 
BalancerTestBase {
     // Lets kill all the RS that are favored nodes for this region.
     stopServersAndWaitUntilProcessed(currentFN);
 
-    final RegionStates regionStates = 
master.getAssignmentManager().getRegionStates();
+    final AssignmentManager am = master.getAssignmentManager();
+    final RegionStates regionStates = am.getRegionStates();
     TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
       @Override
       public boolean evaluate() throws Exception {
@@ -403,7 +407,7 @@ public class TestFavoredStochasticLoadBalancer extends 
BalancerTestBase {
     FavoredNodeAssignmentHelper helper = new 
FavoredNodeAssignmentHelper(serversForNewFN, conf);
     helper.initialize();
 
-    for (RegionStateNode regionState : regionStates.getRegionsInTransition()) {
+    for (RegionStateNode regionState : am.getRegionsInTransition()) {
       RegionInfo regionInfo = regionState.getRegionInfo();
       List<ServerName> newFavoredNodes = 
helper.generateFavoredNodes(regionInfo);
       assertNotNull(newFavoredNodes);
@@ -445,7 +449,8 @@ public class TestFavoredStochasticLoadBalancer extends 
BalancerTestBase {
     // Lets kill all the RS that are favored nodes for this region.
     stopServersAndWaitUntilProcessed(currentFN);
 
-    final RegionStates regionStatesBeforeMaster = 
master.getAssignmentManager().getRegionStates();
+    final AssignmentManager am = master.getAssignmentManager();
+    final RegionStates regionStatesBeforeMaster = am.getRegionStates();
     TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
       @Override
       public boolean evaluate() throws Exception {
@@ -457,7 +462,7 @@ public class TestFavoredStochasticLoadBalancer extends 
BalancerTestBase {
       regionStatesBeforeMaster.getRegionState(region).isFailedOpen());
 
     List<RegionInfo> rit = Lists.newArrayList();
-    for (RegionStateNode regionState : 
regionStatesBeforeMaster.getRegionsInTransition()) {
+    for (RegionStateNode regionState : am.getRegionsInTransition()) {
       RegionInfo regionInfo = regionState.getRegionInfo();
       LOG.debug("Region in transition after stopping FN's: " + regionInfo);
       rit.add(regionInfo);
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusUtil.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusUtil.java
index 7bcadf37c22..26a7c85c5c0 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusUtil.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusUtil.java
@@ -98,7 +98,7 @@ public class TestMasterStatusUtil {
     regionsInTransition
       .add(new RegionState(FAKE_HRI, RegionState.State.CLOSING, 12345L, 
FAKE_HOST));
     Mockito.doReturn(rs).when(am).getRegionStates();
-    Mockito.doReturn(regionsInTransition).when(rs).getRegionsInTransition();
+    Mockito.doReturn(regionsInTransition).when(am).getRegionsInTransition();
     Mockito.doReturn(am).when(master).getAssignmentManager();
     Mockito.doReturn(serverManager).when(master).getServerManager();
 
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
index 86e4c4a4bf9..8c5e4e944ab 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.janitor;
 
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
@@ -116,8 +117,8 @@ public class TestCatalogJanitorInMemoryStates {
     LOG.info("Daughter regions: " + daughters);
     assertNotNull("Should have found daughter regions for " + parent, 
daughters);
 
-    assertTrue("Parent region should exist in RegionStates",
-      am.getRegionStates().isRegionInRegionStates(parent.getRegion()));
+    assertNotNull("Parent region should exist in RegionStates",
+      
am.getRegionStates().getRegionStateNodeFromName(parent.getRegion().getRegionName()));
     assertTrue("Parent region should exist in ServerManager",
       sm.isRegionInServerManagerStates(parent.getRegion()));
 
@@ -140,8 +141,8 @@ public class TestCatalogJanitorInMemoryStates {
       }
     });
 
-    assertFalse("Parent region should have been removed from RegionStates",
-      am.getRegionStates().isRegionInRegionStates(parent.getRegion()));
+    assertNull("Parent region should have been removed from RegionStates",
+      
am.getRegionStates().getRegionStateNodeFromName(parent.getRegion().getRegionName()));
     assertFalse("Parent region should have been removed from ServerManager",
       sm.isRegionInServerManagerStates(parent.getRegion()));
 
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
index fc3013e4b8a..95a095ee1ba 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
@@ -111,6 +111,7 @@ public class MasterProcedureTestingUtility {
           // create server state node, to simulate master start up
           env.getMasterServices().getServerManager().getOnlineServersList()
             .forEach(am.getRegionStates()::createServer);
+          am.initializationPostMetaOnline();
           master.setServiceStarted(true);
           return null;
         }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
index 7f8e3e71afb..cdb069dfdd6 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
@@ -116,8 +116,7 @@ public class TestHRegionFileSystem {
       hcdA.setValue(HStore.BLOCK_STORAGE_POLICY_KEY, "ONE_SSD");
       admin.modifyColumnFamily(TABLE_NAME, hcdA);
       while (
-        
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
-          .hasRegionsInTransition()
+        
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().hasRegionsInTransition()
       ) {
         Thread.sleep(200);
         LOG.debug("Waiting on table to finish schema altering");
@@ -127,8 +126,7 @@ public class TestHRegionFileSystem {
       hcdB.setStoragePolicy("ALL_SSD");
       admin.modifyColumnFamily(TABLE_NAME, hcdB);
       while (
-        
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
-          .hasRegionsInTransition()
+        
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().hasRegionsInTransition()
       ) {
         Thread.sleep(200);
         LOG.debug("Waiting on table to finish schema altering");
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
index 549371f6cc3..200386d1c7c 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
@@ -58,7 +58,6 @@ import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.MasterRpcServices;
 import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
 import 
org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
@@ -165,15 +164,17 @@ public class TestRegionMergeTransactionOnCluster {
         : mergedRegions.getSecond();
       MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
       AssignmentManager am = cluster.getMaster().getAssignmentManager();
-      RegionStates regionStates = am.getRegionStates();
 
-      // We should not be able to assign it again
+      // We should not be able to assign it again, but we are able to do it 
here. Assertions are
+      // poor here and missing that assign is possible here. Created 
HBASE-29692 for resolving this.
       am.assign(hri);
-      assertFalse("Merged region can't be assigned", 
regionStates.isRegionInTransition(hri));
+      assertFalse("Merged region can't be assigned",
+        am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
 
       // We should not be able to unassign it either
       am.unassign(hri);
-      assertFalse("Merged region can't be unassigned", 
regionStates.isRegionInTransition(hri));
+      assertFalse("Merged region can't be unassigned",
+        am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
 
       table.close();
     } finally {
@@ -570,11 +571,11 @@ public class TestRegionMergeTransactionOnCluster {
         enabled.get() && req.getTransition(0).getTransitionCode() == 
TransitionCode.READY_TO_MERGE
           && !resp.hasErrorMessage()
       ) {
-        RegionStates regionStates = 
myMaster.getAssignmentManager().getRegionStates();
-        for (RegionState regionState : 
regionStates.getRegionsStateInTransition()) {
+        AssignmentManager am = myMaster.getAssignmentManager();
+        for (RegionState regionState : am.getRegionsStateInTransition()) {
           // Find the merging_new region and remove it
           if (regionState.isMergingNew()) {
-            regionStates.deleteRegion(regionState.getRegion());
+            am.getRegionStates().deleteRegion(regionState.getRegion());
           }
         }
       }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index db2a9d68f28..841e8f4b3e9 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -911,7 +911,7 @@ public class TestSplitTransactionOnCluster {
       } catch (DoNotRetryIOException e) {
         // Expected
       }
-      assertFalse("Split region can't be assigned", 
regionStates.isRegionInTransition(hri));
+      assertFalse("Split region can't be assigned", 
am.isRegionInTransition(hri));
       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
 
       // We should not be able to unassign it either
@@ -921,7 +921,7 @@ public class TestSplitTransactionOnCluster {
       } catch (DoNotRetryIOException e) {
         // Expected
       }
-      assertFalse("Split region can't be unassigned", 
regionStates.isRegionInTransition(hri));
+      assertFalse("Split region can't be unassigned", 
am.isRegionInTransition(hri));
       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
     } finally {
       admin.balancerSwitch(true, false);
@@ -1150,12 +1150,11 @@ public class TestSplitTransactionOnCluster {
           && 
req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT)
           && !resp.hasErrorMessage()
       ) {
-        RegionStates regionStates = 
myMaster.getAssignmentManager().getRegionStates();
-        for (RegionStateNode regionState : 
regionStates.getRegionsInTransition()) {
-          /*
-           * TODO!!!! // Find the merging_new region and remove it if 
(regionState.isSplittingNew())
-           * { regionStates.deleteRegion(regionState.getRegion()); }
-           */
+        AssignmentManager am = myMaster.getAssignmentManager();
+        for (RegionStateNode regionState : am.getRegionsInTransition()) {
+          if (regionState.toRegionState().isSplittingNew()) {
+            
am.getRegionStates().deleteRegion(regionState.toRegionState().getRegion());
+          }
         }
       }
       return resp;

Reply via email to