This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new f0388a19541 HDDS-10306. Speed up TestSnapshotBackgroundServices (#9721)
f0388a19541 is described below
commit f0388a195411e68aac360de8ab95735b2eb295de
Author: Han-Wen Hsu <[email protected]>
AuthorDate: Tue Feb 17 17:48:22 2026 +0800
HDDS-10306. Speed up TestSnapshotBackgroundServices (#9721)
---
.../snapshot/TestSnapshotBackgroundServices.java | 160 +++++++++++++--------
.../hadoop/ozone/MiniOzoneHAClusterImpl.java | 35 +++++
2 files changed, 133 insertions(+), 62 deletions(-)
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
index e5da202421a..5e3f49e4f39 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
@@ -36,6 +36,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
@@ -62,7 +63,6 @@
import org.apache.hadoop.ozone.conf.OMClientConfig;
import org.apache.hadoop.ozone.om.OMConfigKeys;
import org.apache.hadoop.ozone.om.OmSnapshot;
-import org.apache.hadoop.ozone.om.OmTestUtil;
import org.apache.hadoop.ozone.om.OzoneManager;
import org.apache.hadoop.ozone.om.SstFilteringService;
import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
@@ -80,15 +80,21 @@
import org.apache.ozone.test.tag.Flaky;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier;
-import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.MethodOrderer;
+import org.junit.jupiter.api.Order;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.TestMethodOrder;
/**
* Tests snapshot background services.
*/
+@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public class TestSnapshotBackgroundServices {
private MiniOzoneHAClusterImpl cluster;
private ObjectStore objectStore;
@@ -105,8 +111,8 @@ public class TestSnapshotBackgroundServices {
private OzoneClient client;
private final AtomicInteger counter = new AtomicInteger();
- @BeforeEach
- public void init(TestInfo testInfo) throws Exception {
+ @BeforeAll
+ public void init() throws Exception {
OzoneConfiguration conf = new OzoneConfiguration();
String omServiceId = "om-service-test1";
OzoneManagerRatisServerConfig omRatisConf =
conf.getObject(OzoneManagerRatisServerConfig.class);
@@ -120,66 +126,48 @@ public void init(TestInfo testInfo) throws Exception {
conf.setInt(OMConfigKeys.OZONE_OM_RATIS_LOG_PURGE_GAP, LOG_PURGE_GAP);
conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_SIZE_KEY, 16,
StorageUnit.KB);
conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY,
16, StorageUnit.KB);
- if ("testSSTFilteringBackgroundService".equals(testInfo.getDisplayName()))
{
- conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 1,
- TimeUnit.SECONDS);
- }
- if ("testCompactionLogBackgroundService"
- .equals(testInfo.getDisplayName())) {
- conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED,
1,
- TimeUnit.MILLISECONDS);
- }
- if ("testBackupCompactionFilesPruningBackgroundService"
- .equals(testInfo.getDisplayName())) {
- conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED,
1,
- TimeUnit.MILLISECONDS);
- conf.setTimeDuration(
- OZONE_OM_SNAPSHOT_COMPACTION_DAG_PRUNE_DAEMON_RUN_INTERVAL, 1,
- TimeUnit.SECONDS);
- }
- if ("testSnapshotAndKeyDeletionBackgroundServices"
- .equals(testInfo.getDisplayName())) {
- conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1,
- TimeUnit.SECONDS);
- conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 1,
- TimeUnit.SECONDS);
- conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED,
1,
- TimeUnit.MILLISECONDS);
- conf.setTimeDuration(
- OZONE_OM_SNAPSHOT_COMPACTION_DAG_PRUNE_DAEMON_RUN_INTERVAL, 3,
- TimeUnit.SECONDS);
- conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 3,
- TimeUnit.SECONDS);
- }
+
+ // Used by: testSSTFilteringBackgroundService
+ conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 1,
TimeUnit.SECONDS);
+
+ // Used by: testCompactionLogBackgroundService,
testBackupCompactionFilesPruningBackgroundService,
+ //
testSnapshotAndKeyDeletionBackgroundServices
+ conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED, 1,
TimeUnit.MILLISECONDS);
+
+ // Used by: testCompactionLogBackgroundService,
testBackupCompactionFilesPruningBackgroundService
+
conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_PRUNE_DAEMON_RUN_INTERVAL,
3, TimeUnit.SECONDS);
+
+ // Used by: testSnapshotAndKeyDeletionBackgroundServices
+ conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1,
TimeUnit.SECONDS);
+ // Used by: testSnapshotAndKeyDeletionBackgroundServices
+ conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 1,
TimeUnit.SECONDS);
+
conf.setLong(
OMConfigKeys.OZONE_OM_RATIS_SNAPSHOT_AUTO_TRIGGER_THRESHOLD_KEY,
SNAPSHOT_THRESHOLD);
int numOfOMs = 3;
cluster = MiniOzoneCluster.newHABuilder(conf)
- .setOMServiceId("om-service-test1")
+ .setOMServiceId(omServiceId)
.setNumOfOzoneManagers(numOfOMs)
- .setNumOfActiveOMs(2)
+ .setNumOfActiveOMs(numOfOMs)
.build();
- if ("testBackupCompactionFilesPruningBackgroundService"
- .equals(testInfo.getDisplayName())) {
- cluster.
- getOzoneManagersList()
- .forEach(
- TestSnapshotBackgroundServices
- ::suspendBackupCompactionFilesPruning);
- }
+
cluster.waitForClusterToBeReady();
client = OzoneClientFactory.getRpcClient(omServiceId, conf);
objectStore = client.getObjectStore();
+ }
+
+ @BeforeEach
+ public void setupTest() throws IOException, InterruptedException,
TimeoutException {
+ recoverCluster();
+ stopFollowerOM(cluster.getOMLeader());
volumeName = "volume" + counter.incrementAndGet();
bucketName = "bucket" + counter.incrementAndGet();
-
VolumeArgs createVolumeArgs = VolumeArgs.newBuilder()
.setOwner("user" + counter.incrementAndGet())
.setAdmin("admin" + counter.incrementAndGet())
.build();
-
objectStore.createVolume(volumeName, createVolumeArgs);
OzoneVolume retVolumeinfo = objectStore.getVolume(volumeName);
@@ -188,7 +176,32 @@ public void init(TestInfo testInfo) throws Exception {
ozoneBucket = retVolumeinfo.getBucket(bucketName);
}
- @AfterEach
+ private void recoverCluster() throws InterruptedException, TimeoutException,
IOException {
+ for (OzoneManager ozoneManager : cluster.getOzoneManagersList()) {
+ if (!ozoneManager.isRunning()) {
+ cluster.restartOzoneManager(ozoneManager, false);
+ }
+
+ if
(!ozoneManager.getMetadataManager().getStore().getRocksDBCheckpointDiffer().shouldRun())
{
+ resumeBackupCompactionFilesPruning(ozoneManager);
+ }
+ }
+ cluster.waitForClusterToBeReady();
+ cluster.waitForLeaderOM();
+ }
+
+ private void stopFollowerOM(OzoneManager leaderOM) throws TimeoutException,
InterruptedException {
+ for (OzoneManager om : cluster.getOzoneManagersList()) {
+ if (om != leaderOM && om.isRunning()) {
+ String omNodeId = om.getOMNodeId();
+ cluster.stopOzoneManager(omNodeId);
+ GenericTestUtils.waitFor(() -> !om.isRunning() &&
!om.isOmRpcServerRunning(), 100, 15000);
+ break;
+ }
+ }
+ }
+
+ @AfterAll
public void shutdown() {
IOUtils.closeQuietly(client);
if (cluster != null) {
@@ -202,7 +215,7 @@ public void shutdown() {
public void testSnapshotAndKeyDeletionBackgroundServices()
throws Exception {
OzoneManager leaderOM = getLeaderOM();
- OzoneManager followerOM = getInactiveFollowerOM(leaderOM);
+ OzoneManager followerOM = getInactiveFollowerOM();
createSnapshotsEachWithNewKeys(leaderOM);
@@ -332,7 +345,7 @@ private void startInactiveFollower(OzoneManager leaderOM,
long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex();
// Start the inactive OM. Checkpoint installation will happen
spontaneously.
- cluster.startInactiveOM(followerOM.getOMNodeId());
+ cluster.restartOzoneManager(followerOM, true);
actionAfterStarting.run();
// The recently started OM should be lagging behind the leader OM.
@@ -357,26 +370,41 @@ private void createSnapshotsEachWithNewKeys(OzoneManager
ozoneManager)
}
}
- private OzoneManager getInactiveFollowerOM(OzoneManager leaderOM) {
- String followerNodeId = leaderOM.getPeerNodes().get(0).getNodeId();
- if (cluster.isOMActive(followerNodeId)) {
- followerNodeId = leaderOM.getPeerNodes().get(1).getNodeId();
- }
- return cluster.getOzoneManager(followerNodeId);
+ private OzoneManager getInactiveFollowerOM()
+ throws TimeoutException, InterruptedException {
+ // Wait for an inactive OM to be available
+ AtomicReference<OzoneManager> inactiveOM = new AtomicReference<>();
+ GenericTestUtils.waitFor(() -> {
+ Iterator<OzoneManager> iterator = cluster.getInactiveOM();
+ if (iterator.hasNext()) {
+ inactiveOM.set(iterator.next());
+ return true;
+ }
+ return false;
+ }, 100, 10000);
+ OzoneManager result = inactiveOM.get();
+ assertNotNull(result, "No inactive OM available");
+ return result;
}
private OzoneManager getLeaderOM() {
- final String leaderOMNodeId =
OmTestUtil.getCurrentOmProxyNodeId(objectStore);
- return cluster.getOzoneManager(leaderOMNodeId);
+ return cluster.getOMLeader();
}
@Test
@DisplayName("testCompactionLogBackgroundService")
@Flaky("HDDS-11672")
+ @Order(Integer.MAX_VALUE)
public void testCompactionLogBackgroundService()
throws IOException, InterruptedException, TimeoutException {
+
+ // reset to the default value to avoid side effects
+ cluster.restartOzoneManagersWithConfigCustomizer(config -> {
+
config.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_PRUNE_DAEMON_RUN_INTERVAL,
10, TimeUnit.MINUTES);
+ });
+
OzoneManager leaderOM = getLeaderOM();
- OzoneManager followerOM = getInactiveFollowerOM(leaderOM);
+ OzoneManager followerOM = getInactiveFollowerOM();
createSnapshotsEachWithNewKeys(leaderOM);
@@ -407,6 +435,7 @@ public void testCompactionLogBackgroundService()
newLeaderOM.getMetadataManager().getStore()
.getRocksDBCheckpointDiffer().getForwardCompactionDAG().nodes()
.stream().map(CompactionNode::getFileName).collect(toSet()));
+
assertEquals(leaderOM.getMetadataManager().getStore()
.getRocksDBCheckpointDiffer().getForwardCompactionDAG().edges()
.stream().map(edge ->
@@ -440,8 +469,14 @@ private List<CompactionLogEntry>
getCompactionLogEntries(OzoneManager om)
@DisplayName("testBackupCompactionFilesPruningBackgroundService")
public void testBackupCompactionFilesPruningBackgroundService()
throws IOException, InterruptedException, TimeoutException {
+
+ cluster.getOzoneManagersList().stream()
+ .filter(OzoneManager::isRunning)
+
.forEach(TestSnapshotBackgroundServices::suspendBackupCompactionFilesPruning);
+
OzoneManager leaderOM = getLeaderOM();
- OzoneManager followerOM = getInactiveFollowerOM(leaderOM);
+ OzoneManager followerOM = getInactiveFollowerOM();
+
startInactiveFollower(leaderOM, followerOM,
() -> suspendBackupCompactionFilesPruning(followerOM));
@@ -463,6 +498,7 @@ public void
testBackupCompactionFilesPruningBackgroundService()
assertNotNull(files);
int numberOfSstFiles = files.length;
+ assertEquals(cluster.getOMLeader(), newLeaderOM);
resumeBackupCompactionFilesPruning(newLeaderOM);
checkIfCompactionBackupFilesWerePruned(sstBackupDir,
@@ -494,7 +530,7 @@ private static void suspendBackupCompactionFilesPruning(
public void testSSTFilteringBackgroundService()
throws IOException, InterruptedException, TimeoutException {
OzoneManager leaderOM = getLeaderOM();
- OzoneManager followerOM = getInactiveFollowerOM(leaderOM);
+ OzoneManager followerOM = getInactiveFollowerOM();
createSnapshotsEachWithNewKeys(leaderOM);
diff --git
a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
index d3369534617..06a9c9f4ee2 100644
---
a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
+++
b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
@@ -33,6 +33,7 @@
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
+import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hdds.ExitManager;
@@ -125,6 +126,10 @@ public Iterator<StorageContainerManager> getInactiveSCM() {
return scmhaService.inactiveServices();
}
+ public Iterator<OzoneManager> getInactiveOM() {
+ return omhaService.inactiveServices();
+ }
+
public StorageContainerManager getSCM(String scmNodeId) {
return this.scmhaService.getServiceById(scmNodeId);
}
@@ -141,6 +146,30 @@ public List<OzoneManager> getOzoneManagersList() {
return omhaService.getServices();
}
+ public void
restartOzoneManagersWithConfigCustomizer(Consumer<OzoneConfiguration>
configCustomizer)
+ throws IOException, TimeoutException, InterruptedException {
+ List<OzoneManager> toRestart = new ArrayList<>();
+ for (OzoneManager om : getOzoneManagersList()) {
+ OzoneConfiguration configuration = new
OzoneConfiguration(om.getConfiguration());
+ if (configCustomizer != null) {
+ configCustomizer.accept(configuration);
+ }
+ om.setConfiguration(configuration);
+ if (om.isRunning()) {
+ toRestart.add(om);
+ }
+ }
+ for (OzoneManager om : toRestart) {
+ if (!om.stop()) {
+ continue;
+ }
+ om.join();
+ om.restart();
+ GenericTestUtils.waitFor(om::isRunning, 1000, 30000);
+ }
+ waitForLeaderOM();
+ }
+
public List<StorageContainerManager> getStorageContainerManagersList() {
return scmhaService.getServices();
}
@@ -244,6 +273,12 @@ public void restartOzoneManager(OzoneManager ozoneManager,
boolean waitForOM)
GenericTestUtils.waitFor(ozoneManager::isRunning,
1000, waitForClusterToBeReadyTimeout);
}
+
+ omhaService.inactiveServices().forEachRemaining(om -> {
+ if (om.equals(ozoneManager)) {
+ this.omhaService.activate(om);
+ }
+ });
}
public void shutdownStorageContainerManager(StorageContainerManager scm) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]