This is an automated email from the ASF dual-hosted git repository.

Apache9 pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new e602cbe08f5 HBASE-30119 Stop JmxCacheBuster in HBTU (#8156)
e602cbe08f5 is described below

commit e602cbe08f559d6734c05a44d0f0f29b75450b5c
Author: Duo Zhang <[email protected]>
AuthorDate: Tue Apr 28 20:11:59 2026 +0800

    HBASE-30119 Stop JmxCacheBuster in HBTU (#8156)
    
    Signed-off-by: Xiao Liu <[email protected]>
    (cherry picked from commit 1ba4d70b746c5b998e5e922de83f65dcf64c14a0)
---
 .../hadoop/metrics2/impl/JmxCacheBuster.java       | 24 ++++++++++++----------
 .../apache/hadoop/hbase/HBaseTestingUtility.java   | 15 ++++++++++++++
 .../procedure/TestCreateTableNoRegionServer.java   |  8 --------
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git 
a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
 
b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
index 9b62cd898f6..99f12902ae5 100644
--- 
a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
+++ 
b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/metrics2/impl/JmxCacheBuster.java
@@ -38,9 +38,9 @@ import org.slf4j.LoggerFactory;
 @InterfaceAudience.Private
 public final class JmxCacheBuster {
   private static final Logger LOG = 
LoggerFactory.getLogger(JmxCacheBuster.class);
-  private static AtomicReference<ScheduledFuture> fut = new 
AtomicReference<>(null);
-  private static MetricsExecutor executor = new MetricsExecutorImpl();
-  private static AtomicBoolean stopped = new AtomicBoolean(false);
+  private static final AtomicReference<ScheduledFuture<?>> FUT = new 
AtomicReference<>(null);
+  private static final MetricsExecutor EXECUTOR = new MetricsExecutorImpl();
+  private static final AtomicBoolean STOPPED = new AtomicBoolean(false);
 
   private JmxCacheBuster() {
     // Static only cache.
@@ -54,16 +54,16 @@ public final class JmxCacheBuster {
       LOG.trace("clearing JMX Cache" + StringUtils.stringifyException(new 
Exception()));
     }
     // If there are more then 100 ms before the executor will run then 
everything should be merged.
-    ScheduledFuture future = fut.get();
+    ScheduledFuture<?> future = FUT.get();
     if ((future != null && (!future.isDone() && 
future.getDelay(TimeUnit.MILLISECONDS) > 100))) {
       // BAIL OUT
       return;
     }
-    if (stopped.get()) {
+    if (STOPPED.get()) {
       return;
     }
-    future = executor.getExecutor().schedule(new JmxCacheBusterRunnable(), 5, 
TimeUnit.SECONDS);
-    fut.set(future);
+    future = EXECUTOR.getExecutor().schedule(new JmxCacheBusterRunnable(), 5, 
TimeUnit.SECONDS);
+    FUT.set(future);
   }
 
   /**
@@ -71,9 +71,11 @@ public final class JmxCacheBuster {
    * some test environments where we manually inject sources or sinks 
dynamically.
    */
   public static void stop() {
-    stopped.set(true);
-    ScheduledFuture future = fut.get();
-    future.cancel(false);
+    STOPPED.set(true);
+    ScheduledFuture<?> future = FUT.get();
+    if (future != null) {
+      future.cancel(false);
+    }
   }
 
   /**
@@ -81,7 +83,7 @@ public final class JmxCacheBuster {
    * @see #stop()
    */
   public static void restart() {
-    stopped.set(false);
+    STOPPED.set(false);
   }
 
   final static class JmxCacheBusterRunnable implements Runnable {
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 68456b7d341..62084bd5408 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -158,6 +158,7 @@ import 
org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
 import org.apache.hadoop.minikdc.MiniKdc;
 import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -230,6 +231,18 @@ public class HBaseTestingUtility extends 
HBaseZKTestingUtility {
   /** This is for unit tests parameterized with a single boolean. */
   public static final List<Object[]> MEMSTORETS_TAGS_PARAMETRIZED = 
memStoreTSAndTagsCombination();
 
+  static {
+    // JmxCacheBuster may cause dead lock in test environment. As on master 
side, the table/region
+    // related metrics updating will finally lead to a meta access, so if meta 
is not online yet, we
+    // will block when updating while holding the metrics lock. But when we 
assign meta, there are
+    // bunch of places where we need to register a new metrics thus need to 
get the metrics lock,
+    // and then lead to a dead lock and cause the test to hang forever.
+    // The code is in hadoop so there is no easy way for us to fix, so here we 
just stop
+    // JmxCacheBuster to stabilize our tests first. See HBASE-30118 for more 
details and future
+    // plans.
+    JmxCacheBuster.stop();
+  }
+
   /**
    * Checks to see if a specific port is available.
    * @param port the port number to check for availability
@@ -3625,6 +3638,7 @@ public class HBaseTestingUtility extends 
HBaseZKTestingUtility {
       HMaster master = getHBaseCluster().getMaster();
       final RegionStates states = 
master.getAssignmentManager().getRegionStates();
       waitFor(timeout, 200, new ExplainingPredicate<IOException>() {
+
         @Override
         public String explainFailure() throws IOException {
           return explainTableAvailability(tableName);
@@ -3635,6 +3649,7 @@ public class HBaseTestingUtility extends 
HBaseZKTestingUtility {
           List<RegionInfo> hris = states.getRegionsOfTable(tableName);
           return hris != null && !hris.isEmpty();
         }
+
       });
     }
     LOG.info("All regions for table " + tableName + " assigned.");
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
index 469296aeb1e..9fd137c80f4 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableNoRegionServer.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Tag;
@@ -112,13 +111,6 @@ public class TestCreateTableNoRegionServer {
   public static void setUp() throws Exception {
     UTIL
       
.startMiniCluster(StartMiniClusterOption.builder().masterClass(HMasterForTest.class).build());
-    // this may cause dead lock if there is no live region server and want to 
start a new server.
-    // In JmxCacheBuster we will reinitialize the metrics system so it will 
get some metrics which
-    // will need to access meta, since there is no region server, the request 
will hang there for a
-    // long time while holding the lock of MetricsSystemImpl, but when start a 
new region server, we
-    // also need to update metrics in handleReportForDutyResponse, since we 
are all in the same
-    // process and uses the same metrics instance, we hit dead lock.
-    JmxCacheBuster.stop();
   }
 
   @AfterAll

Reply via email to