This is an automated email from the ASF dual-hosted git repository.

taklwu pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 1da3ccbfd23 HBASE-29720 Backport "HBASE-25282: Remove 
processingServers in DeadServer as we can get this information by Procedure of 
master" to branch-2 (#7463)
1da3ccbfd23 is described below

commit 1da3ccbfd23169297b667a2e347d3a57c13c14e8
Author: Kevin Geiszler <[email protected]>
AuthorDate: Wed Nov 19 09:33:52 2025 -0800

    HBASE-29720 Backport "HBASE-25282: Remove processingServers in DeadServer 
as we can get this information by Procedure of master" to branch-2 (#7463)
    
    Signed-off-by: Tak Lon (Stephen) Wu <[email protected]>
---
 .../org/apache/hadoop/hbase/master/DeadServer.java | 48 ----------------------
 .../hadoop/hbase/master/MasterRpcServices.java     | 15 +++++--
 .../apache/hadoop/hbase/master/ServerManager.java  |  5 ++-
 .../master/procedure/ServerCrashProcedure.java     |  2 -
 .../apache/hadoop/hbase/TestRegionRebalancing.java |  2 +-
 .../apache/hadoop/hbase/master/TestDeadServer.java | 20 ++-------
 .../hadoop/hbase/master/procedure/TestHBCKSCP.java |  1 -
 7 files changed, 18 insertions(+), 75 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 84d660e66ee..9467512fc66 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -33,8 +33,6 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
-
 /**
  * Class to hold dead servers list and utility querying dead server list. 
Servers are added when
  * they expire or when we find them in filesystem on startup. When a server 
crash procedure is
@@ -56,12 +54,6 @@ public class DeadServer {
    */
   private final Map<ServerName, Long> deadServers = new HashMap<>();
 
-  /**
-   * Set of dead servers currently being processed by a SCP. Added to this 
list at the start of SCP
-   * and removed after it is done processing the crash.
-   */
-  private final Set<ServerName> processingServers = new HashSet<>();
-
   /**
    * @param serverName server name.
    * @return true if this server is on the dead servers list false otherwise
@@ -70,15 +62,6 @@ public class DeadServer {
     return deadServers.containsKey(serverName);
   }
 
-  /**
-   * Checks if there are currently any dead servers being processed by the 
master. Returns true if
-   * at least one region server is currently being processed as dead.
-   * @return true if any RS are being processed as dead
-   */
-  synchronized boolean areDeadServersInProgress() {
-    return !processingServers.isEmpty();
-  }
-
   public synchronized Set<ServerName> copyServerNames() {
     Set<ServerName> clone = new HashSet<>(deadServers.size());
     clone.addAll(deadServers.keySet());
@@ -90,29 +73,6 @@ public class DeadServer {
    */
   synchronized void putIfAbsent(ServerName sn) {
     this.deadServers.putIfAbsent(sn, EnvironmentEdgeManager.currentTime());
-    processing(sn);
-  }
-
-  /**
-   * Add <code>sn<</code> to set of processing deadservers.
-   * @see #finish(ServerName)
-   */
-  public synchronized void processing(ServerName sn) {
-    if (processingServers.add(sn)) {
-      // Only log on add.
-      LOG.debug("Processing {}; numProcessing={}", sn, 
processingServers.size());
-    }
-  }
-
-  /**
-   * Complete processing for this dead server.
-   * @param sn ServerName for the dead server.
-   * @see #processing(ServerName)
-   */
-  public synchronized void finish(ServerName sn) {
-    if (processingServers.remove(sn)) {
-      LOG.debug("Removed {} from processing; numProcessing={}", sn, 
processingServers.size());
-    }
   }
 
   public synchronized int size() {
@@ -171,17 +131,12 @@ public class DeadServer {
     // Display unified set of servers from both maps
     Set<ServerName> servers = new HashSet<>();
     servers.addAll(deadServers.keySet());
-    servers.addAll(processingServers);
     StringBuilder sb = new StringBuilder();
     for (ServerName sn : servers) {
       if (sb.length() > 0) {
         sb.append(", ");
       }
       sb.append(sn.toString());
-      // Star entries that are being processed
-      if (processingServers.contains(sn)) {
-        sb.append("*");
-      }
     }
     return sb.toString();
   }
@@ -220,9 +175,6 @@ public class DeadServer {
    * @return true if this server was removed
    */
   public synchronized boolean removeDeadServer(final ServerName 
deadServerName) {
-    Preconditions.checkState(!processingServers.contains(deadServerName),
-      "Asked to remove server still in processingServers set " + 
deadServerName + " (numProcessing="
-        + processingServers.size() + ")");
     return this.deadServers.remove(deadServerName) != null;
   }
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 194cdcca65b..5b3213d7049 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -2527,11 +2527,18 @@ public class MasterRpcServices extends RSRpcServices
         LOG.debug("Some dead server is still under processing, won't clear the 
dead server list");
         response.addAllServerName(request.getServerNameList());
       } else {
+        DeadServer deadServer = master.getServerManager().getDeadServers();
         for (HBaseProtos.ServerName pbServer : request.getServerNameList()) {
-          if (
-            !master.getServerManager().getDeadServers()
-              .removeDeadServer(ProtobufUtil.toServerName(pbServer))
-          ) {
+          ServerName server = ProtobufUtil.toServerName(pbServer);
+          final boolean deadInProcess =
+            master.getProcedures().stream().anyMatch(p -> (p instanceof 
ServerCrashProcedure)
+              && ((ServerCrashProcedure) p).getServerName().equals(server));
+          if (deadInProcess) {
+            throw new ServiceException(
+              String.format("Dead server '%s' is not 'dead' in fact...", 
server));
+          }
+
+          if (!deadServer.removeDeadServer(server)) {
             response.addServerName(pbServer);
           }
         }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index f7115a5cefb..075fb78198f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -50,6 +50,7 @@ import 
org.apache.hadoop.hbase.ipc.DecommissionedHostRejectedException;
 import org.apache.hadoop.hbase.ipc.HBaseRpcController;
 import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;
 import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
@@ -547,8 +548,8 @@ public class ServerManager implements ConfigurationObserver 
{
    * Checks if any dead servers are currently in progress.
    * @return true if any RS are being processed as dead, false if not
    */
-  public boolean areDeadServersInProgress() {
-    return this.deadservers.areDeadServersInProgress();
+  public boolean areDeadServersInProgress() throws IOException {
+    return master.getProcedures().stream().anyMatch(p -> p instanceof 
ServerCrashProcedure);
   }
 
   void letRegionServersShutdown() {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 69ebd0567de..2a7e96149df 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -140,7 +140,6 @@ public class ServerCrashProcedure extends
     // This adds server to the DeadServer processing list but not to the 
DeadServers list.
     // Server gets removed from processing list below on procedure successful 
finish.
     if (!notifiedDeadServer) {
-      services.getServerManager().getDeadServers().processing(serverName);
       notifiedDeadServer = true;
     }
 
@@ -255,7 +254,6 @@ public class ServerCrashProcedure extends
         case SERVER_CRASH_FINISH:
           LOG.info("removed crashed server {} after splitting done", 
serverName);
           
services.getAssignmentManager().getRegionStates().removeServer(serverName);
-          services.getServerManager().getDeadServers().finish(serverName);
           updateProgress(true);
           return Flow.NO_MORE_STATE;
         default:
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
index 1cc0dd8379b..d9fbe57785b 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
@@ -183,7 +183,7 @@ public class TestRegionRebalancing {
   /**
    * Wait on crash processing. Balancer won't run if processing a crashed 
server.
    */
-  private void waitOnCrashProcessing() {
+  private void waitOnCrashProcessing() throws IOException {
     while 
(UTIL.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress())
 {
       LOG.info("Waiting on processing of crashed server before proceeding...");
       Threads.sleep(1000);
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index 046c72050ba..94ac1823ac3 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Set;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -69,22 +70,10 @@ public class TestDeadServer {
   public void testIsDead() {
     DeadServer ds = new DeadServer();
     ds.putIfAbsent(hostname123);
-    ds.processing(hostname123);
-    assertTrue(ds.areDeadServersInProgress());
-    ds.finish(hostname123);
-    assertFalse(ds.areDeadServersInProgress());
 
     ds.putIfAbsent(hostname1234);
-    ds.processing(hostname1234);
-    assertTrue(ds.areDeadServersInProgress());
-    ds.finish(hostname1234);
-    assertFalse(ds.areDeadServersInProgress());
 
     ds.putIfAbsent(hostname12345);
-    ds.processing(hostname12345);
-    assertTrue(ds.areDeadServersInProgress());
-    ds.finish(hostname12345);
-    assertFalse(ds.areDeadServersInProgress());
 
     // Already dead = 127.0.0.1,9090,112321
     // Coming back alive = 127.0.0.1,9090,223341
@@ -104,7 +93,7 @@ public class TestDeadServer {
   }
 
   @Test
-  public void testCrashProcedureReplay() {
+  public void testCrashProcedureReplay() throws IOException {
     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
     final ProcedureExecutor<MasterProcedureEnv> pExecutor = 
master.getMasterProcedureExecutor();
     ServerCrashProcedure proc =
@@ -112,7 +101,7 @@ public class TestDeadServer {
 
     ProcedureTestingUtility.submitAndWait(pExecutor, proc);
 
-    
assertFalse(master.getServerManager().getDeadServers().areDeadServersInProgress());
+    assertTrue(master.getServerManager().areDeadServersInProgress());
   }
 
   @Test
@@ -163,17 +152,14 @@ public class TestDeadServer {
     d.putIfAbsent(hostname1234);
     Assert.assertEquals(2, d.size());
 
-    d.finish(hostname123);
     d.removeDeadServer(hostname123);
     Assert.assertEquals(1, d.size());
-    d.finish(hostname1234);
     d.removeDeadServer(hostname1234);
     Assert.assertTrue(d.isEmpty());
 
     d.putIfAbsent(hostname1234);
     Assert.assertFalse(d.removeDeadServer(hostname123_2));
     Assert.assertEquals(1, d.size());
-    d.finish(hostname1234);
     Assert.assertTrue(d.removeDeadServer(hostname1234));
     Assert.assertTrue(d.isEmpty());
   }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
index 2303cb1965e..367c68213a2 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
@@ -145,7 +145,6 @@ public class TestHBCKSCP extends TestSCPBase {
     cluster.killRegionServer(rsServerName);
 
     master.getServerManager().moveFromOnlineToDeadServers(rsServerName);
-    master.getServerManager().getDeadServers().finish(rsServerName);
     master.getServerManager().getDeadServers().removeDeadServer(rsServerName);
     master.getAssignmentManager().getRegionStates().removeServer(rsServerName);
     // Kill the server. Nothing should happen since an 'Unknown Server' as far

Reply via email to