This is an automated email from the ASF dual-hosted git repository.
cconnell pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.5 by this push:
new ade1196ceec HBASE-30073 Test fixes for some flappers and a
reproducible error (#8057) (#8082) (#8092)
ade1196ceec is described below
commit ade1196ceec9f32b0cbef48da464a5804e244523
Author: Hari Krishna Dara <[email protected]>
AuthorDate: Thu Apr 16 17:24:22 2026 +0530
HBASE-30073 Test fixes for some flappers and a reproducible error (#8057)
(#8082) (#8092)
Signed-off by: Charles Connell <[email protected]>
---
.../apache/hadoop/hbase/HBaseJupiterExtension.java | 2 +-
hbase-compression/pom.xml | 5 +
.../apache/hadoop/hbase/HBaseTestingUtility.java | 10 +-
.../org/apache/hadoop/hbase/TestZooKeeper.java | 1 -
.../hbase/client/AbstractTestAsyncTableScan.java | 18 +++-
.../hbase/master/assignment/TestRollbackSCP.java | 5 +
.../hadoop/hbase/regionserver/TestHRegion.java | 119 +++++++++++++++++----
.../hbase/replication/TestReplicationBase.java | 4 +
8 files changed, 137 insertions(+), 27 deletions(-)
diff --git
a/hbase-common/src/test/java/org/apache/hadoop/hbase/HBaseJupiterExtension.java
b/hbase-common/src/test/java/org/apache/hadoop/hbase/HBaseJupiterExtension.java
index 9d4ea87e0ec..057c4642ffa 100644
---
a/hbase-common/src/test/java/org/apache/hadoop/hbase/HBaseJupiterExtension.java
+++
b/hbase-common/src/test/java/org/apache/hadoop/hbase/HBaseJupiterExtension.java
@@ -84,7 +84,7 @@ public class HBaseJupiterExtension implements
InvocationInterceptor, BeforeAllCa
private static final Map<String, Duration> TAG_TO_TIMEOUT =
ImmutableMap.of(SmallTests.TAG, Duration.ofMinutes(3), MediumTests.TAG,
Duration.ofMinutes(6),
- LargeTests.TAG, Duration.ofMinutes(13), IntegrationTests.TAG,
Duration.ZERO);
+ LargeTests.TAG, Duration.ofMinutes(20), IntegrationTests.TAG,
Duration.ZERO);
private static final String EXECUTOR = "executor";
diff --git a/hbase-compression/pom.xml b/hbase-compression/pom.xml
index 4b99b3dd34b..680af73dea0 100644
--- a/hbase-compression/pom.xml
+++ b/hbase-compression/pom.xml
@@ -46,6 +46,11 @@
<artifactId>hbase-resource-bundle</artifactId>
<optional>true</optional>
</dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-api</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 68456b7d341..17eb3160452 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -489,7 +489,13 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
String sysValue = System.getProperty(propertyName);
- if (sysValue != null) {
+ // Check if directory sharing should be disabled for this test.
+ // Tests that run with high parallelism and don't need shared directories
can set this
+ // to avoid race conditions where one test's tearDown() deletes
directories another test
+ // is still using.
+ boolean disableSharing =
conf.getBoolean("hbase.test.disable-directory-sharing", false);
+
+ if (sysValue != null && !disableSharing) {
// There is already a value set. So we do nothing but hope
// that there will be no conflicts
LOG.info("System.getProperty(\"" + propertyName + "\") already set to: "
+ sysValue
@@ -502,7 +508,7 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
}
conf.set(propertyName, sysValue);
} else {
- // Ok, it's not set, so we create it as a subdirectory
+ // Ok, it's not set (or sharing is disabled), so we create it as a
subdirectory
createSubDir(propertyName, parent, subDirName);
System.setProperty(propertyName, conf.get(propertyName));
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
index 0b0454e42eb..39eca489515 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java
@@ -77,7 +77,6 @@ public class TestZooKeeper {
conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
MockLoadBalancer.class,
LoadBalancer.class);
- TEST_UTIL.startMiniDFSCluster(2);
}
@AfterAll
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/AbstractTestAsyncTableScan.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/AbstractTestAsyncTableScan.java
index 6f36be61c65..158db897587 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/AbstractTestAsyncTableScan.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/AbstractTestAsyncTableScan.java
@@ -46,6 +46,7 @@ import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ConnectionRule;
+import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.MatcherPredicate;
import org.apache.hadoop.hbase.MiniClusterRule;
@@ -73,8 +74,21 @@ import org.junit.rules.TestRule;
public abstract class AbstractTestAsyncTableScan {
protected static final OpenTelemetryClassRule OTEL_CLASS_RULE =
OpenTelemetryClassRule.create();
- protected static final MiniClusterRule MINI_CLUSTER_RULE =
MiniClusterRule.newBuilder()
-
.setMiniClusterOption(StartMiniClusterOption.builder().numWorkers(3).build()).build();
+
+ private static Configuration createConfiguration() {
+ // Use HBaseConfiguration.create() instead of new Configuration() to
properly load
+ // hbase-default.xml which contains required default values (e.g. for log
cleaner plugins)
+ Configuration conf = HBaseConfiguration.create();
+ // Disable directory sharing to prevent race conditions when tests run in
parallel.
+ // Each test instance gets its own isolated directories to avoid one
test's tearDown()
+ // deleting directories another parallel test is still using.
+ conf.setBoolean("hbase.test.disable-directory-sharing", true);
+ return conf;
+ }
+
+ protected static final MiniClusterRule MINI_CLUSTER_RULE =
+ MiniClusterRule.newBuilder().setConfiguration(createConfiguration())
+
.setMiniClusterOption(StartMiniClusterOption.builder().numWorkers(3).build()).build();
protected static final ConnectionRule CONN_RULE =
ConnectionRule.createAsyncConnectionRule(MINI_CLUSTER_RULE::createAsyncConnection);
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRollbackSCP.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRollbackSCP.java
index 25f2e582068..364057c06b1 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRollbackSCP.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRollbackSCP.java
@@ -136,6 +136,11 @@ public class TestRollbackSCP {
@Before
public void setUp() throws IOException {
UTIL.ensureSomeNonStoppedRegionServersAvailable(2);
+ // Surefire reruns failed tests in the same JVM without re-running
@BeforeClass. Reset injection
+ // state so compareAndSet in persistToMeta can succeed again and
kill-before-store flags clear.
+ INJECTED.set(false);
+ ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdateInRollback(
+ UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(),
false);
}
private ServerCrashProcedure getSCPForServer(ServerName serverName) throws
IOException {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index aa6c718bc78..53b566c5297 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -259,6 +259,10 @@ public class TestHRegion {
TEST_UTIL = HBaseTestingUtility.createLocalHTU();
FILESYSTEM = TEST_UTIL.getTestFileSystem();
CONF = TEST_UTIL.getConfiguration();
+ // Disable directory sharing to prevent race conditions when tests run in
parallel.
+ // Each test instance gets its own isolated directories to avoid one
test's tearDown()
+ // deleting directories another parallel test is still using.
+ CONF.setBoolean("hbase.test.disable-directory-sharing", true);
NettyAsyncFSWALConfigHelper.setEventLoopConfig(CONF, GROUP,
NioSocketChannel.class);
dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
method = name.getMethodName();
@@ -1352,18 +1356,23 @@ public class TestHRegion {
threads[i].start();
}
} finally {
+ done.set(true);
+ for (GetTillDoneOrException t : threads) {
+ if (t != null) {
+ try {
+ t.join(5000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ }
if (this.region != null) {
HBaseTestingUtility.closeRegionAndWAL(this.region);
this.region = null;
}
}
- done.set(true);
+ // Check for errors after threads have been stopped
for (GetTillDoneOrException t : threads) {
- try {
- t.join();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
if (t.e != null) {
LOG.info("Exception=" + t.e);
assertFalse("Found a NPE in " + t.getName(), t.e instanceof
NullPointerException);
@@ -1391,7 +1400,7 @@ public class TestHRegion {
@Override
public void run() {
- while (!this.done.get()) {
+ while (!this.done.get() && !Thread.currentThread().isInterrupted()) {
try {
assertTrue(region.get(g).size() > 0);
this.count.incrementAndGet();
@@ -4517,7 +4526,7 @@ public class TestHRegion {
@Override
public void run() {
done = false;
- while (!done) {
+ while (!done && !Thread.currentThread().isInterrupted()) {
synchronized (this) {
try {
wait();
@@ -4730,7 +4739,7 @@ public class TestHRegion {
@Override
public void run() {
done = false;
- while (!done) {
+ while (!done && !Thread.currentThread().isInterrupted()) {
try {
for (int r = 0; r < numRows; r++) {
byte[] row = Bytes.toBytes("row" + r);
@@ -5264,7 +5273,7 @@ public class TestHRegion {
Runnable flusher = new Runnable() {
@Override
public void run() {
- while (!incrementDone.get()) {
+ while (!incrementDone.get() &&
!Thread.currentThread().isInterrupted()) {
try {
region.flush(true);
} catch (Exception e) {
@@ -5280,13 +5289,38 @@ public class TestHRegion {
long expected = (long) threadNum * incCounter;
Thread[] incrementers = new Thread[threadNum];
Thread flushThread = new Thread(flusher);
+ flushThread.setName("FlushThread-" + method);
for (int i = 0; i < threadNum; i++) {
incrementers[i] = new Thread(new Incrementer(this.region, incCounter));
incrementers[i].start();
}
flushThread.start();
- for (int i = 0; i < threadNum; i++) {
- incrementers[i].join();
+ try {
+ for (int i = 0; i < threadNum; i++) {
+ incrementers[i].join();
+ }
+
+ incrementDone.set(true);
+ flushThread.join();
+
+ Get get = new Get(Incrementer.incRow);
+ get.addColumn(Incrementer.family, Incrementer.qualifier);
+ get.readVersions(1);
+ Result res = this.region.get(get);
+ List<Cell> kvs = res.getColumnCells(Incrementer.family,
Incrementer.qualifier);
+
+ // we just got the latest version
+ assertEquals(1, kvs.size());
+ Cell kv = kvs.get(0);
+ assertEquals(expected, Bytes.toLong(kv.getValueArray(),
kv.getValueOffset()));
+ } finally {
+ // Ensure flush thread is stopped even if test fails or times out
+ incrementDone.set(true);
+ flushThread.interrupt();
+ flushThread.join(5000); // Wait up to 5 seconds for thread to stop
+ if (flushThread.isAlive()) {
+ LOG.warn("Flush thread did not stop within timeout for test " +
method);
+ }
}
incrementDone.set(true);
@@ -5349,7 +5383,7 @@ public class TestHRegion {
Runnable flusher = new Runnable() {
@Override
public void run() {
- while (!appendDone.get()) {
+ while (!appendDone.get() && !Thread.currentThread().isInterrupted()) {
try {
region.flush(true);
} catch (Exception e) {
@@ -5369,13 +5403,41 @@ public class TestHRegion {
}
Thread[] appenders = new Thread[threadNum];
Thread flushThread = new Thread(flusher);
+ flushThread.setName("FlushThread-" + method);
for (int i = 0; i < threadNum; i++) {
appenders[i] = new Thread(new Appender(this.region, appendCounter));
appenders[i].start();
}
flushThread.start();
- for (int i = 0; i < threadNum; i++) {
- appenders[i].join();
+ try {
+ for (int i = 0; i < threadNum; i++) {
+ appenders[i].join();
+ }
+
+ appendDone.set(true);
+ flushThread.join();
+
+ Get get = new Get(Appender.appendRow);
+ get.addColumn(Appender.family, Appender.qualifier);
+ get.readVersions(1);
+ Result res = this.region.get(get);
+ List<Cell> kvs = res.getColumnCells(Appender.family, Appender.qualifier);
+
+ // we just got the latest version
+ assertEquals(1, kvs.size());
+ Cell kv = kvs.get(0);
+ byte[] appendResult = new byte[kv.getValueLength()];
+ System.arraycopy(kv.getValueArray(), kv.getValueOffset(), appendResult,
0,
+ kv.getValueLength());
+ assertArrayEquals(expected, appendResult);
+ } finally {
+ // Ensure flush thread is stopped even if test fails or times out
+ appendDone.set(true);
+ flushThread.interrupt();
+ flushThread.join(5000); // Wait up to 5 seconds for thread to stop
+ if (flushThread.isAlive()) {
+ LOG.warn("Flush thread did not stop within timeout for test " +
method);
+ }
}
appendDone.set(true);
@@ -7337,7 +7399,7 @@ public class TestHRegion {
// Writer thread
Thread writerThread = new Thread(() -> {
try {
- while (true) {
+ while (!Thread.currentThread().isInterrupted()) {
// If all the reader threads finish, then stop the writer thread
if (latch.await(0, TimeUnit.MILLISECONDS)) {
return;
@@ -7362,15 +7424,19 @@ public class TestHRegion {
.addColumn(fam1, q3, tsIncrement + 1, Bytes.toBytes(1L))
.addColumn(fam1, q4, tsAppend + 1, Bytes.toBytes("a")) });
}
+ } catch (InterruptedException e) {
+ // Test interrupted, exit gracefully
+ Thread.currentThread().interrupt();
} catch (Exception e) {
assertionError.set(new AssertionError(e));
}
});
+ writerThread.setName("WriterThread-" + method);
writerThread.start();
// Reader threads
for (int i = 0; i < numReaderThreads; i++) {
- new Thread(() -> {
+ Thread readerThread = new Thread(() -> {
try {
for (int j = 0; j < 10000; j++) {
// Verify the values
@@ -7399,13 +7465,24 @@ public class TestHRegion {
}
latch.countDown();
- }).start();
+ });
+ readerThread.setName("ReaderThread-" + i + "-" + method);
+ readerThread.start();
}
- writerThread.join();
+ try {
+ writerThread.join();
- if (assertionError.get() != null) {
- throw assertionError.get();
+ if (assertionError.get() != null) {
+ throw assertionError.get();
+ }
+ } finally {
+ // Ensure writer thread is stopped on test timeout
+ writerThread.interrupt();
+ writerThread.join(5000);
+ if (writerThread.isAlive()) {
+ LOG.warn("Writer thread did not stop within timeout for test " +
method);
+ }
}
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java
index e87e13d151b..9f74acb58b8 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java
@@ -183,6 +183,10 @@ public class TestReplicationBase {
protected static void setupConfig(HBaseTestingUtility util, String
znodeParent) {
Configuration conf = util.getConfiguration();
conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, znodeParent);
+ // Disable directory sharing to prevent race conditions when tests run in
parallel.
+ // Each test instance gets its own isolated directories to avoid one
test's tearDown()
+ // deleting directories another parallel test is still using.
+ conf.setBoolean("hbase.test.disable-directory-sharing", true);
// We don't want too many edits per batch sent to the ReplicationEndpoint
to trigger
// sufficient number of events. But we don't want to go too low because
// HBaseInterClusterReplicationEndpoint partitions entries into batches
and we want