This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.6 by this push:
new f5c466b5060 HBASE-28023 ITBLL's RollingBatchSuspendResumeRsAction uses
waitForRegionServerToSuspend to verify the success of the suspendRs action.
(#6583)
f5c466b5060 is described below
commit f5c466b50602a3717034fa48bd2d31cbc0f92908
Author: hiping-tech <[email protected]>
AuthorDate: Mon Jan 13 21:50:14 2025 +0800
HBASE-28023 ITBLL's RollingBatchSuspendResumeRsAction uses
waitForRegionServerToSuspend to verify the success of the suspendRs action.
(#6583)
Co-authored-by: lvhaiping.lhp <[email protected]>
Signed-off-by: Duo Zhang <[email protected]>
(cherry picked from commit b23d585f92e04b488e837496c048a7ada4c35a63)
---
.../org/apache/hadoop/hbase/ClusterManager.java | 12 +++++++
.../hadoop/hbase/DistributedHBaseCluster.java | 38 ++++++++++++++++++++++
.../apache/hadoop/hbase/HBaseClusterManager.java | 18 ++++++++++
.../apache/hadoop/hbase/RESTApiClusterManager.java | 10 ++++++
.../apache/hadoop/hbase/ZNodeClusterManager.java | 12 +++++++
.../actions/RollingBatchSuspendResumeRsAction.java | 2 +-
.../chaos/monkies/PolicyBasedChaosMonkey.java | 1 +
.../java/org/apache/hadoop/hbase/HBaseCluster.java | 14 ++++++++
.../org/apache/hadoop/hbase/MiniHBaseCluster.java | 10 ++++++
9 files changed, 116 insertions(+), 1 deletion(-)
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java
index 9fe59828062..2fc13592893 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java
@@ -93,6 +93,18 @@ interface ClusterManager extends Configurable {
*/
boolean isRunning(ServiceType service, String hostname, int port) throws
IOException;
+ /**
+ * Returns whether the service is suspended on the remote host. This only
checks whether the
+ * service status is suspended.
+ */
+ boolean isSuspended(ServiceType service, String hostname, int port) throws
IOException;
+
+ /**
+ * Returns whether the service is resumed on the remote host. This only
checks whether the service
+ * status is resumed.
+ */
+ boolean isResumed(ServiceType service, String hostname, int port) throws
IOException;
+
/*
* TODO: further API ideas: //return services running on host: ServiceType[]
* getRunningServicesOnHost(String hostname); //return which services can be
run on host (for
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
index 9477c465321..8fdd9b4a451 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
@@ -142,6 +142,16 @@ public class DistributedHBaseCluster extends HBaseCluster {
waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout);
}
+ @Override
+ public void waitForRegionServerToSuspend(ServerName serverName, long
timeout) throws IOException {
+ waitForServiceToSuspend(ServiceType.HBASE_REGIONSERVER, serverName,
timeout);
+ }
+
+ @Override
+ public void waitForRegionServerToResume(ServerName serverName, long timeout)
throws IOException {
+ waitForServiceToResume(ServiceType.HBASE_REGIONSERVER, serverName,
timeout);
+ }
+
@Override
public void suspendRegionServer(ServerName serverName) throws IOException {
LOG.info("Suspend RS: {}", serverName.getServerName());
@@ -307,6 +317,34 @@ public class DistributedHBaseCluster extends HBaseCluster {
throw new IOException("Timed-out waiting for service to start: " +
serverName);
}
+ private void waitForServiceToSuspend(ServiceType service, ServerName
serverName, long timeout)
+ throws IOException {
+ LOG.info("Waiting for service: {} to suspend: {}", service,
serverName.getServerName());
+ long start = System.currentTimeMillis();
+
+ while ((System.currentTimeMillis() - start) < timeout) {
+ if (clusterManager.isSuspended(service, serverName.getHostname(),
serverName.getPort())) {
+ return;
+ }
+ Threads.sleep(100);
+ }
+ throw new IOException("Timed-out waiting for service to suspend: " +
serverName);
+ }
+
+ private void waitForServiceToResume(ServiceType service, ServerName
serverName, long timeout)
+ throws IOException {
+ LOG.info("Waiting for service: {} to resume: {}", service,
serverName.getServerName());
+ long start = System.currentTimeMillis();
+
+ while ((System.currentTimeMillis() - start) < timeout) {
+ if (clusterManager.isResumed(service, serverName.getHostname(),
serverName.getPort())) {
+ return;
+ }
+ Threads.sleep(100);
+ }
+ throw new IOException("Timed-out waiting for service to resume: " +
serverName);
+ }
+
@Override
public MasterService.BlockingInterface getMasterAdminService() throws
IOException {
return ((ClusterConnection) this.connection).getMaster();
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
index a73748d5c4f..15ec1f3193a 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java
@@ -216,6 +216,10 @@ public class HBaseClusterManager extends Configured
implements ClusterManager {
service);
}
+ protected String getStateCommand(ServiceType service) {
+ return String.format("%s | xargs ps -o state= -p ",
findPidCommand(service));
+ }
+
public String signalCommand(ServiceType service, String signal) {
return String.format("%s | xargs kill -s %s", findPidCommand(service),
signal);
}
@@ -465,4 +469,18 @@ public class HBaseClusterManager extends Configured
implements ClusterManager {
public void resume(ServiceType service, String hostname, int port) throws
IOException {
signal(service, Signal.SIGCONT, hostname);
}
+
+ public boolean isSuspended(ServiceType service, String hostname, int port)
throws IOException {
+ String ret =
+ execWithRetries(hostname, service,
getCommandProvider(service).getStateCommand(service))
+ .getSecond();
+ return ret != null && ret.trim().equals("T");
+ }
+
+ public boolean isResumed(ServiceType service, String hostname, int port)
throws IOException {
+ String ret =
+ execWithRetries(hostname, service,
getCommandProvider(service).getStateCommand(service))
+ .getSecond();
+ return ret != null && !ret.trim().equals("T");
+ }
}
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java
index 8c5339605e2..8b00d1372f4 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java
@@ -264,6 +264,16 @@ public class RESTApiClusterManager extends Configured
implements ClusterManager
hBaseClusterManager.resume(service, hostname, port);
}
+ @Override
+ public boolean isSuspended(ServiceType service, String hostname, int port)
throws IOException {
+ return hBaseClusterManager.isSuspended(service, hostname, port);
+ }
+
+ @Override
+ public boolean isResumed(ServiceType service, String hostname, int port)
throws IOException {
+ return hBaseClusterManager.isResumed(service, hostname, port);
+ }
+
// Convenience method to execute command against role on hostname. Only
graceful commands are
// supported since cluster management APIs don't tend to let you SIGKILL
things.
private void performClusterManagerCommand(ServiceType role, String hostname,
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/ZNodeClusterManager.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/ZNodeClusterManager.java
index 2a75f670f5b..fefa1248b73 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/ZNodeClusterManager.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/ZNodeClusterManager.java
@@ -111,6 +111,18 @@ public class ZNodeClusterManager extends Configured
implements ClusterManager {
CmdType.bool.toString() +
getCommandProvider(service).isRunningCommand(service)));
}
+ @Override
+ public boolean isSuspended(ServiceType service, String hostname, int port)
throws IOException {
+ String ret = createZNode(hostname,
getCommandProvider(service).getStateCommand(service));
+ return ret != null && ret.trim().equals("T");
+ }
+
+ @Override
+ public boolean isResumed(ServiceType service, String hostname, int port)
throws IOException {
+ String ret = createZNode(hostname,
getCommandProvider(service).getStateCommand(service));
+ return ret != null && !ret.trim().equals("T");
+ }
+
enum CmdType {
exec,
bool
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java
index 07261bb58e0..559dec829ee 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java
@@ -65,7 +65,7 @@ public class RollingBatchSuspendResumeRsAction extends Action
{
@Override
public void perform() throws Exception {
- getLogger().info("Performing action: Rolling batch restarting {}% of
region servers",
+ getLogger().info("Performing action: Rolling batch suspending {}% of
region servers",
(int) (ratio * 100));
List<ServerName> selectedServers = selectServers();
Queue<ServerName> serversToBeSuspended = new ArrayDeque<>(selectedServers);
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java
index f5af796b575..fb8ab209c3a 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java
@@ -85,6 +85,7 @@ public class PolicyBasedChaosMonkey extends ChaosMonkey {
private static ExecutorService buildMonkeyThreadPool(final int size) {
return Executors.newFixedThreadPool(size, new
ThreadFactoryBuilder().setDaemon(false)
.setNameFormat("ChaosMonkey-%d").setUncaughtExceptionHandler((t, e) -> {
+ LOG.error("Uncaught exception in thread {}", t.getName(), e);
throw new RuntimeException(e);
}).build());
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
index 101eb285ef3..d2950a2bbd1 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
@@ -175,6 +175,13 @@ public abstract class HBaseCluster implements Closeable,
Configurable {
*/
public abstract void suspendRegionServer(ServerName serverName) throws
IOException;
+ /**
+ * Wait for the specified region server to suspend the thread / process.
+ * @throws IOException if something goes wrong or timeout occurs
+ */
+ public abstract void waitForRegionServerToSuspend(ServerName serverName,
long timeout)
+ throws IOException;
+
/**
* Resume the region server
* @param serverName the hostname to resume the regionserver on
@@ -182,6 +189,13 @@ public abstract class HBaseCluster implements Closeable,
Configurable {
*/
public abstract void resumeRegionServer(ServerName serverName) throws
IOException;
+ /**
+ * Wait for the specified region server to resume the thread / process.
+ * @throws IOException if something goes wrong or timeout occurs
+ */
+ public abstract void waitForRegionServerToResume(ServerName serverName, long
timeout)
+ throws IOException;
+
/**
* Starts a new zookeeper node on the given hostname or if this is a
mini/local cluster, silently
* logs warning message.
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
index 226ee4013ae..d6093d33118 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
@@ -302,6 +302,16 @@ public class MiniHBaseCluster extends HBaseCluster {
waitOnRegionServer(getRegionServerIndex(serverName));
}
+ @Override
+ public void waitForRegionServerToSuspend(ServerName serverName, long
timeout) throws IOException {
+ LOG.warn("Waiting for regionserver to suspend on mini cluster is not
supported");
+ }
+
+ @Override
+ public void waitForRegionServerToResume(ServerName serverName, long timeout)
throws IOException {
+ LOG.warn("Waiting for regionserver to resume on mini cluster is not
supported");
+ }
+
@Override
public void startZkNode(String hostname, int port) throws IOException {
LOG.warn("Starting zookeeper nodes on mini cluster is not supported");