This is an automated email from the ASF dual-hosted git repository.
Apache9 pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.6 by this push:
new 8e4f1074522 HBASE-30119 Stop JmxCacheBuster in HBTU (#8156)
8e4f1074522 is described below
commit 8e4f1074522eea56287cbd07bf643bb2e116b257
Author: Duo Zhang <[email protected]>
AuthorDate: Tue Apr 28 20:11:59 2026 +0800
HBASE-30119 Stop JmxCacheBuster in HBTU (#8156)
Signed-off-by: Xiao Liu <[email protected]>
(cherry picked from commit 1ba4d70b746c5b998e5e922de83f65dcf64c14a0)
---
.../hadoop/metrics2/impl/JmxCacheBuster.java | 24 ++++++++++++----------
.../apache/hadoop/hbase/HBaseTestingUtility.java | 15 ++++++++++++++
.../procedure/TestCreateTableNoRegionServer.java | 8 --------
3 files changed, 28 insertions(+), 19 deletions(-)
diff --git
a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
index 9b62cd898f6..99f12902ae5 100644
---
a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
+++
b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
@@ -38,9 +38,9 @@ import org.slf4j.LoggerFactory;
@InterfaceAudience.Private
public final class JmxCacheBuster {
private static final Logger LOG =
LoggerFactory.getLogger(JmxCacheBuster.class);
- private static AtomicReference<ScheduledFuture> fut = new
AtomicReference<>(null);
- private static MetricsExecutor executor = new MetricsExecutorImpl();
- private static AtomicBoolean stopped = new AtomicBoolean(false);
+ private static final AtomicReference<ScheduledFuture<?>> FUT = new
AtomicReference<>(null);
+ private static final MetricsExecutor EXECUTOR = new MetricsExecutorImpl();
+ private static final AtomicBoolean STOPPED = new AtomicBoolean(false);
private JmxCacheBuster() {
// Static only cache.
@@ -54,16 +54,16 @@ public final class JmxCacheBuster {
LOG.trace("clearing JMX Cache" + StringUtils.stringifyException(new
Exception()));
}
// If there are more then 100 ms before the executor will run then
everything should be merged.
- ScheduledFuture future = fut.get();
+ ScheduledFuture<?> future = FUT.get();
if ((future != null && (!future.isDone() &&
future.getDelay(TimeUnit.MILLISECONDS) > 100))) {
// BAIL OUT
return;
}
- if (stopped.get()) {
+ if (STOPPED.get()) {
return;
}
- future = executor.getExecutor().schedule(new JmxCacheBusterRunnable(), 5,
TimeUnit.SECONDS);
- fut.set(future);
+ future = EXECUTOR.getExecutor().schedule(new JmxCacheBusterRunnable(), 5,
TimeUnit.SECONDS);
+ FUT.set(future);
}
/**
@@ -71,9 +71,11 @@ public final class JmxCacheBuster {
* some test environments where we manually inject sources or sinks
dynamically.
*/
public static void stop() {
- stopped.set(true);
- ScheduledFuture future = fut.get();
- future.cancel(false);
+ STOPPED.set(true);
+ ScheduledFuture<?> future = FUT.get();
+ if (future != null) {
+ future.cancel(false);
+ }
}
/**
@@ -81,7 +83,7 @@ public final class JmxCacheBuster {
* @see #stop()
*/
public static void restart() {
- stopped.set(false);
+ STOPPED.set(false);
}
final static class JmxCacheBusterRunnable implements Runnable {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 68456b7d341..62084bd5408 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -158,6 +158,7 @@ import
org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
import org.apache.hadoop.minikdc.MiniKdc;
import org.apache.hadoop.security.authentication.util.KerberosName;
import org.apache.yetus.audience.InterfaceAudience;
@@ -230,6 +231,18 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
/** This is for unit tests parameterized with a single boolean. */
public static final List<Object[]> MEMSTORETS_TAGS_PARAMETRIZED =
memStoreTSAndTagsCombination();
+ static {
+ // JmxCacheBuster may cause dead lock in test environment. As on master
side, the table/region
+ // related metrics updating will finally lead to a meta access, so if meta
is not online yet, we
+ // will block when updating while holding the metrics lock. But when we
assign meta, there are
+ // bunch of places where we need to register a new metrics thus need to
get the metrics lock,
+ // and then lead to a dead lock and cause the test to hang forever.
+ // The code is in hadoop so there is no easy way for us to fix, so here we
just stop
+ // JmxCacheBuster to stabilize our tests first. See HBASE-30118 for more
details and future
+ // plans.
+ JmxCacheBuster.stop();
+ }
+
/**
* Checks to see if a specific port is available.
* @param port the port number to check for availability
@@ -3625,6 +3638,7 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
HMaster master = getHBaseCluster().getMaster();
final RegionStates states =
master.getAssignmentManager().getRegionStates();
waitFor(timeout, 200, new ExplainingPredicate<IOException>() {
+
@Override
public String explainFailure() throws IOException {
return explainTableAvailability(tableName);
@@ -3635,6 +3649,7 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
List<RegionInfo> hris = states.getRegionsOfTable(tableName);
return hris != null && !hris.isEmpty();
}
+
});
}
LOG.info("All regions for table " + tableName + " assigned.");
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
index 469296aeb1e..9fd137c80f4 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
@@ -112,13 +111,6 @@ public class TestCreateTableNoRegionServer {
public static void setUp() throws Exception {
UTIL
.startMiniCluster(StartMiniClusterOption.builder().masterClass(HMasterForTest.class).build());
- // this may cause dead lock if there is no live region server and want to
start a new server.
- // In JmxCacheBuster we will reinitialize the metrics system so it will
get some metrics which
- // will need to access meta, since there is no region server, the request
will hang there for a
- // long time while holding the lock of MetricsSystemImpl, but when start a
new region server, we
- // also need to update metrics in handleReportForDutyResponse, since we
are all in the same
- // process and uses the same metrics instance, we hit dead lock.
- JmxCacheBuster.stop();
}
@AfterAll