This is an automated email from the ASF dual-hosted git repository.
Apache9 pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-3 by this push:
new 605439f4169 HBASE-30119 Stop JmxCacheBuster in HBTU (#8156)
605439f4169 is described below
commit 605439f41694d8cae673c1d1d605e261f3564491
Author: Duo Zhang <[email protected]>
AuthorDate: Tue Apr 28 20:11:59 2026 +0800
HBASE-30119 Stop JmxCacheBuster in HBTU (#8156)
Signed-off-by: Xiao Liu <[email protected]>
(cherry picked from commit 1ba4d70b746c5b998e5e922de83f65dcf64c14a0)
---
.../hadoop/metrics2/impl/JmxCacheBuster.java | 24 ++++++++++++----------
.../org/apache/hadoop/hbase/HBaseTestingUtil.java | 13 ++++++++++++
.../procedure/TestCreateTableNoRegionServer.java | 8 --------
.../apache/hadoop/hbase/HBaseTestingUtility.java | 13 ++++++++++++
4 files changed, 39 insertions(+), 19 deletions(-)
diff --git
a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
index 9b62cd898f6..99f12902ae5 100644
---
a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
+++
b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
@@ -38,9 +38,9 @@ import org.slf4j.LoggerFactory;
@InterfaceAudience.Private
public final class JmxCacheBuster {
private static final Logger LOG =
LoggerFactory.getLogger(JmxCacheBuster.class);
- private static AtomicReference<ScheduledFuture> fut = new
AtomicReference<>(null);
- private static MetricsExecutor executor = new MetricsExecutorImpl();
- private static AtomicBoolean stopped = new AtomicBoolean(false);
+ private static final AtomicReference<ScheduledFuture<?>> FUT = new
AtomicReference<>(null);
+ private static final MetricsExecutor EXECUTOR = new MetricsExecutorImpl();
+ private static final AtomicBoolean STOPPED = new AtomicBoolean(false);
private JmxCacheBuster() {
// Static only cache.
@@ -54,16 +54,16 @@ public final class JmxCacheBuster {
LOG.trace("clearing JMX Cache" + StringUtils.stringifyException(new
Exception()));
}
// If there are more then 100 ms before the executor will run then
everything should be merged.
- ScheduledFuture future = fut.get();
+ ScheduledFuture<?> future = FUT.get();
if ((future != null && (!future.isDone() &&
future.getDelay(TimeUnit.MILLISECONDS) > 100))) {
// BAIL OUT
return;
}
- if (stopped.get()) {
+ if (STOPPED.get()) {
return;
}
- future = executor.getExecutor().schedule(new JmxCacheBusterRunnable(), 5,
TimeUnit.SECONDS);
- fut.set(future);
+ future = EXECUTOR.getExecutor().schedule(new JmxCacheBusterRunnable(), 5,
TimeUnit.SECONDS);
+ FUT.set(future);
}
/**
@@ -71,9 +71,11 @@ public final class JmxCacheBuster {
* some test environments where we manually inject sources or sinks
dynamically.
*/
public static void stop() {
- stopped.set(true);
- ScheduledFuture future = fut.get();
- future.cancel(false);
+ STOPPED.set(true);
+ ScheduledFuture<?> future = FUT.get();
+ if (future != null) {
+ future.cancel(false);
+ }
}
/**
@@ -81,7 +83,7 @@ public final class JmxCacheBuster {
* @see #stop()
*/
public static void restart() {
- stopped.set(false);
+ STOPPED.set(false);
}
final static class JmxCacheBusterRunnable implements Runnable {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java
index 4ada86e2be1..1b26b2f0270 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java
@@ -153,6 +153,7 @@ import
org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
import org.apache.hadoop.minikdc.MiniKdc;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
@@ -214,6 +215,18 @@ public class HBaseTestingUtil extends HBaseZKTestingUtil {
/** This is for unit tests parameterized with a single boolean. */
public static final List<Object[]> MEMSTORETS_TAGS_PARAMETRIZED =
memStoreTSAndTagsCombination();
+ static {
+ // JmxCacheBuster may cause dead lock in test environment. As on master
side, the table/region
+ // related metrics updating will finally lead to a meta access, so if meta
is not online yet, we
+ // will block when updating while holding the metrics lock. But when we
assign meta, there are
+ // bunch of places where we need to register a new metrics thus need to
get the metrics lock,
+ // and then lead to a dead lock and cause the test to hang forever.
+ // The code is in hadoop so there is no easy way for us to fix, so here we
just stop
+ // JmxCacheBuster to stabilize our tests first. See HBASE-30118 for more
details and future
+ // plans.
+ JmxCacheBuster.stop();
+ }
+
/**
* Checks to see if a specific port is available.
* @param port the port number to check for availability
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
index 5119133e7b0..e1f8d925b43 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
@@ -112,13 +111,6 @@ public class TestCreateTableNoRegionServer {
public static void setUp() throws Exception {
UTIL.startMiniCluster(
StartTestingClusterOption.builder().masterClass(HMasterForTest.class).build());
- // this may cause dead lock if there is no live region server and want to
start a new server.
- // In JmxCacheBuster we will reinitialize the metrics system so it will
get some metrics which
- // will need to access meta, since there is no region server, the request
will hang there for a
- // long time while holding the lock of MetricsSystemImpl, but when start a
new region server, we
- // also need to update metrics in handleReportForDutyResponse, since we
are all in the same
- // process and uses the same metrics instance, we hit dead lock.
- JmxCacheBuster.stop();
}
@AfterAll
diff --git
a/hbase-testing-util/src/main/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
b/hbase-testing-util/src/main/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 6e4759a6e53..652a4cbfda7 100644
---
a/hbase-testing-util/src/main/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++
b/hbase-testing-util/src/main/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -142,6 +142,7 @@ import
org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.TaskLog;
+import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
import org.apache.hadoop.minikdc.MiniKdc;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.WatchedEvent;
@@ -202,6 +203,18 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
/** This is for unit tests parameterized with a single boolean. */
public static final List<Object[]> MEMSTORETS_TAGS_PARAMETRIZED =
memStoreTSAndTagsCombination();
+ static {
+ // JmxCacheBuster may cause dead lock in test environment. As on master
side, the table/region
+ // related metrics updating will finally lead to a meta access, so if meta
is not online yet, we
+ // will block when updating while holding the metrics lock. But when we
assign meta, there are
+ // bunch of places where we need to register a new metrics thus need to
get the metrics lock,
+ // and then lead to a dead lock and cause the test to hang forever.
+ // The code is in hadoop so there is no easy way for us to fix, so here we
just stop
+ // JmxCacheBuster to stabilize our tests first. See HBASE-30118 for more
details and future
+ // plans.
+ JmxCacheBuster.stop();
+ }
+
/**
* Checks to see if a specific port is available.
* @param port the port number to check for availability