This is an automated email from the ASF dual-hosted git repository.
taklwu pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 1da3ccbfd23 HBASE-29720 Backport "HBASE-25282: Remove
processingServers in DeadServer as we can get this information by Procedure of
master" to branch-2 (#7463)
1da3ccbfd23 is described below
commit 1da3ccbfd23169297b667a2e347d3a57c13c14e8
Author: Kevin Geiszler <[email protected]>
AuthorDate: Wed Nov 19 09:33:52 2025 -0800
HBASE-29720 Backport "HBASE-25282: Remove processingServers in DeadServer
as we can get this information by Procedure of master" to branch-2 (#7463)
Signed-off-by: Tak Lon (Stephen) Wu <[email protected]>
---
.../org/apache/hadoop/hbase/master/DeadServer.java | 48 ----------------------
.../hadoop/hbase/master/MasterRpcServices.java | 15 +++++--
.../apache/hadoop/hbase/master/ServerManager.java | 5 ++-
.../master/procedure/ServerCrashProcedure.java | 2 -
.../apache/hadoop/hbase/TestRegionRebalancing.java | 2 +-
.../apache/hadoop/hbase/master/TestDeadServer.java | 20 ++-------
.../hadoop/hbase/master/procedure/TestHBCKSCP.java | 1 -
7 files changed, 18 insertions(+), 75 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 84d660e66ee..9467512fc66 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -33,8 +33,6 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
-
/**
* Class to hold dead servers list and utility querying dead server list.
Servers are added when
* they expire or when we find them in filesystem on startup. When a server
crash procedure is
@@ -56,12 +54,6 @@ public class DeadServer {
*/
private final Map<ServerName, Long> deadServers = new HashMap<>();
- /**
- * Set of dead servers currently being processed by a SCP. Added to this
list at the start of SCP
- * and removed after it is done processing the crash.
- */
- private final Set<ServerName> processingServers = new HashSet<>();
-
/**
* @param serverName server name.
* @return true if this server is on the dead servers list false otherwise
@@ -70,15 +62,6 @@ public class DeadServer {
return deadServers.containsKey(serverName);
}
- /**
- * Checks if there are currently any dead servers being processed by the
master. Returns true if
- * at least one region server is currently being processed as dead.
- * @return true if any RS are being processed as dead
- */
- synchronized boolean areDeadServersInProgress() {
- return !processingServers.isEmpty();
- }
-
public synchronized Set<ServerName> copyServerNames() {
Set<ServerName> clone = new HashSet<>(deadServers.size());
clone.addAll(deadServers.keySet());
@@ -90,29 +73,6 @@ public class DeadServer {
*/
synchronized void putIfAbsent(ServerName sn) {
this.deadServers.putIfAbsent(sn, EnvironmentEdgeManager.currentTime());
- processing(sn);
- }
-
- /**
- * Add <code>sn<</code> to set of processing deadservers.
- * @see #finish(ServerName)
- */
- public synchronized void processing(ServerName sn) {
- if (processingServers.add(sn)) {
- // Only log on add.
- LOG.debug("Processing {}; numProcessing={}", sn,
processingServers.size());
- }
- }
-
- /**
- * Complete processing for this dead server.
- * @param sn ServerName for the dead server.
- * @see #processing(ServerName)
- */
- public synchronized void finish(ServerName sn) {
- if (processingServers.remove(sn)) {
- LOG.debug("Removed {} from processing; numProcessing={}", sn,
processingServers.size());
- }
}
public synchronized int size() {
@@ -171,17 +131,12 @@ public class DeadServer {
// Display unified set of servers from both maps
Set<ServerName> servers = new HashSet<>();
servers.addAll(deadServers.keySet());
- servers.addAll(processingServers);
StringBuilder sb = new StringBuilder();
for (ServerName sn : servers) {
if (sb.length() > 0) {
sb.append(", ");
}
sb.append(sn.toString());
- // Star entries that are being processed
- if (processingServers.contains(sn)) {
- sb.append("*");
- }
}
return sb.toString();
}
@@ -220,9 +175,6 @@ public class DeadServer {
* @return true if this server was removed
*/
public synchronized boolean removeDeadServer(final ServerName
deadServerName) {
- Preconditions.checkState(!processingServers.contains(deadServerName),
- "Asked to remove server still in processingServers set " +
deadServerName + " (numProcessing="
- + processingServers.size() + ")");
return this.deadServers.remove(deadServerName) != null;
}
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 194cdcca65b..5b3213d7049 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -2527,11 +2527,18 @@ public class MasterRpcServices extends RSRpcServices
LOG.debug("Some dead server is still under processing, won't clear the
dead server list");
response.addAllServerName(request.getServerNameList());
} else {
+ DeadServer deadServer = master.getServerManager().getDeadServers();
for (HBaseProtos.ServerName pbServer : request.getServerNameList()) {
- if (
- !master.getServerManager().getDeadServers()
- .removeDeadServer(ProtobufUtil.toServerName(pbServer))
- ) {
+ ServerName server = ProtobufUtil.toServerName(pbServer);
+ final boolean deadInProcess =
+ master.getProcedures().stream().anyMatch(p -> (p instanceof
ServerCrashProcedure)
+ && ((ServerCrashProcedure) p).getServerName().equals(server));
+ if (deadInProcess) {
+ throw new ServiceException(
+ String.format("Dead server '%s' is not 'dead' in fact...",
server));
+ }
+
+ if (!deadServer.removeDeadServer(server)) {
response.addServerName(pbServer);
}
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index f7115a5cefb..075fb78198f 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -50,6 +50,7 @@ import
org.apache.hadoop.hbase.ipc.DecommissionedHostRejectedException;
import org.apache.hadoop.hbase.ipc.HBaseRpcController;
import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
@@ -547,8 +548,8 @@ public class ServerManager implements ConfigurationObserver
{
* Checks if any dead servers are currently in progress.
* @return true if any RS are being processed as dead, false if not
*/
- public boolean areDeadServersInProgress() {
- return this.deadservers.areDeadServersInProgress();
+ public boolean areDeadServersInProgress() throws IOException {
+ return master.getProcedures().stream().anyMatch(p -> p instanceof
ServerCrashProcedure);
}
void letRegionServersShutdown() {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 69ebd0567de..2a7e96149df 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -140,7 +140,6 @@ public class ServerCrashProcedure extends
// This adds server to the DeadServer processing list but not to the
DeadServers list.
// Server gets removed from processing list below on procedure successful
finish.
if (!notifiedDeadServer) {
- services.getServerManager().getDeadServers().processing(serverName);
notifiedDeadServer = true;
}
@@ -255,7 +254,6 @@ public class ServerCrashProcedure extends
case SERVER_CRASH_FINISH:
LOG.info("removed crashed server {} after splitting done",
serverName);
services.getAssignmentManager().getRegionStates().removeServer(serverName);
- services.getServerManager().getDeadServers().finish(serverName);
updateProgress(true);
return Flow.NO_MORE_STATE;
default:
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
index 1cc0dd8379b..d9fbe57785b 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
@@ -183,7 +183,7 @@ public class TestRegionRebalancing {
/**
* Wait on crash processing. Balancer won't run if processing a crashed
server.
*/
- private void waitOnCrashProcessing() {
+ private void waitOnCrashProcessing() throws IOException {
while
(UTIL.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress())
{
LOG.info("Waiting on processing of crashed server before proceeding...");
Threads.sleep(1000);
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index 046c72050ba..94ac1823ac3 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -69,22 +70,10 @@ public class TestDeadServer {
public void testIsDead() {
DeadServer ds = new DeadServer();
ds.putIfAbsent(hostname123);
- ds.processing(hostname123);
- assertTrue(ds.areDeadServersInProgress());
- ds.finish(hostname123);
- assertFalse(ds.areDeadServersInProgress());
ds.putIfAbsent(hostname1234);
- ds.processing(hostname1234);
- assertTrue(ds.areDeadServersInProgress());
- ds.finish(hostname1234);
- assertFalse(ds.areDeadServersInProgress());
ds.putIfAbsent(hostname12345);
- ds.processing(hostname12345);
- assertTrue(ds.areDeadServersInProgress());
- ds.finish(hostname12345);
- assertFalse(ds.areDeadServersInProgress());
// Already dead = 127.0.0.1,9090,112321
// Coming back alive = 127.0.0.1,9090,223341
@@ -104,7 +93,7 @@ public class TestDeadServer {
}
@Test
- public void testCrashProcedureReplay() {
+ public void testCrashProcedureReplay() throws IOException {
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
final ProcedureExecutor<MasterProcedureEnv> pExecutor =
master.getMasterProcedureExecutor();
ServerCrashProcedure proc =
@@ -112,7 +101,7 @@ public class TestDeadServer {
ProcedureTestingUtility.submitAndWait(pExecutor, proc);
-
assertFalse(master.getServerManager().getDeadServers().areDeadServersInProgress());
+ assertTrue(master.getServerManager().areDeadServersInProgress());
}
@Test
@@ -163,17 +152,14 @@ public class TestDeadServer {
d.putIfAbsent(hostname1234);
Assert.assertEquals(2, d.size());
- d.finish(hostname123);
d.removeDeadServer(hostname123);
Assert.assertEquals(1, d.size());
- d.finish(hostname1234);
d.removeDeadServer(hostname1234);
Assert.assertTrue(d.isEmpty());
d.putIfAbsent(hostname1234);
Assert.assertFalse(d.removeDeadServer(hostname123_2));
Assert.assertEquals(1, d.size());
- d.finish(hostname1234);
Assert.assertTrue(d.removeDeadServer(hostname1234));
Assert.assertTrue(d.isEmpty());
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
index 2303cb1965e..367c68213a2 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
@@ -145,7 +145,6 @@ public class TestHBCKSCP extends TestSCPBase {
cluster.killRegionServer(rsServerName);
master.getServerManager().moveFromOnlineToDeadServers(rsServerName);
- master.getServerManager().getDeadServers().finish(rsServerName);
master.getServerManager().getDeadServers().removeDeadServer(rsServerName);
master.getAssignmentManager().getRegionStates().removeServer(rsServerName);
// Kill the server. Nothing should happen since an 'Unknown Server' as far