Copilot commented on code in PR #9721:
URL: https://github.com/apache/ozone/pull/9721#discussion_r2791832422


##########
hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java:
##########
@@ -141,6 +146,30 @@ public List<OzoneManager> getOzoneManagersList() {
     return omhaService.getServices();
   }
 
+  public void 
restartOzoneManagersWithConfigCustomizer(Consumer<OzoneConfiguration> 
configCustomizer)
+      throws IOException, TimeoutException, InterruptedException {
+    List<OzoneManager> toRestart = new ArrayList<>();
+    for (OzoneManager om : getOzoneManagersList()) {
+      OzoneConfiguration configuration = new 
OzoneConfiguration(om.getConfiguration());
+      if (configCustomizer != null) {
+        configCustomizer.accept(configuration);
+      }
+      om.setConfiguration(configuration);
+      if (om.isRunning()) {
+        toRestart.add(om);
+      }
+    }
+    for (OzoneManager om : toRestart) {
+      if (!om.stop()) {
+        continue;
+      }
+      om.join();
+      om.restart();
+      GenericTestUtils.waitFor(om::isRunning, 1000, 30000);
+    }

Review Comment:
   `restartOzoneManagersWithConfigCustomizer` silently skips restarting an OM 
when `om.stop()` returns false (even though the OM was in the `toRestart` 
list). This can mask a failed stop and leave the cluster partially restarted 
with a mix of old/new config. Consider failing fast (throw) or at least logging 
and still attempting a restart / waiting for a consistent state.



##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java:
##########
@@ -463,6 +498,7 @@ public void 
testBackupCompactionFilesPruningBackgroundService()
     assertNotNull(files);
     int numberOfSstFiles = files.length;
 
+    assertEquals(cluster.getOMLeader(), newLeaderOM);

Review Comment:
   `assertEquals(cluster.getOMLeader(), newLeaderOM)` can be flaky because 
`getOMLeader()` explicitly returns null when no leader is ready or when 
multiple leaders appear ready. If the intent is to verify leadership, use 
`cluster.waitForLeaderOM()` (and compare), or assert 
`newLeaderOM.isLeaderReady()`.
   ```suggestion
       assertEquals(cluster.waitForLeaderOM(), newLeaderOM);
   ```



##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java:
##########
@@ -120,66 +126,48 @@ public void init(TestInfo testInfo) throws Exception {
     conf.setInt(OMConfigKeys.OZONE_OM_RATIS_LOG_PURGE_GAP, LOG_PURGE_GAP);
     conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_SIZE_KEY, 16, 
StorageUnit.KB);
     
conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY, 
16, StorageUnit.KB);
-    if ("testSSTFilteringBackgroundService".equals(testInfo.getDisplayName())) 
{
-      conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 1,
-          TimeUnit.SECONDS);
-    }
-    if ("testCompactionLogBackgroundService"
-        .equals(testInfo.getDisplayName())) {
-      conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED, 
1,
-          TimeUnit.MILLISECONDS);
-    }
-    if ("testBackupCompactionFilesPruningBackgroundService"
-        .equals(testInfo.getDisplayName())) {
-      conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED, 
1,
-          TimeUnit.MILLISECONDS);
-      conf.setTimeDuration(
-          OZONE_OM_SNAPSHOT_COMPACTION_DAG_PRUNE_DAEMON_RUN_INTERVAL, 1,
-          TimeUnit.SECONDS);
-    }
-    if ("testSnapshotAndKeyDeletionBackgroundServices"
-        .equals(testInfo.getDisplayName())) {
-      conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1,
-          TimeUnit.SECONDS);
-      conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 1,
-          TimeUnit.SECONDS);
-      conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED, 
1,
-          TimeUnit.MILLISECONDS);
-      conf.setTimeDuration(
-          OZONE_OM_SNAPSHOT_COMPACTION_DAG_PRUNE_DAEMON_RUN_INTERVAL, 3,
-          TimeUnit.SECONDS);
-      conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 3,
-          TimeUnit.SECONDS);
-    }
+
+    // Used by: testSSTFilteringBackgroundService
+    conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 1, 
TimeUnit.SECONDS);
+
+    // Used by: testCompactionLogBackgroundService, 
testBackupCompactionFilesPruningBackgroundService,
+    //                                         
testSnapshotAndKeyDeletionBackgroundServices
+    conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_MAX_TIME_ALLOWED, 1, 
TimeUnit.MILLISECONDS);
+
+    // Used by: testCompactionLogBackgroundService, 
testBackupCompactionFilesPruningBackgroundService
+    
conf.setTimeDuration(OZONE_OM_SNAPSHOT_COMPACTION_DAG_PRUNE_DAEMON_RUN_INTERVAL,
 3, TimeUnit.SECONDS);
+
+    // Used by: testSnapshotAndKeyDeletionBackgroundServices
+    conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1, 
TimeUnit.SECONDS);
+    // Used by: testSnapshotAndKeyDeletionBackgroundServices
+    conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 1, 
TimeUnit.SECONDS);
+
     conf.setLong(
         OMConfigKeys.OZONE_OM_RATIS_SNAPSHOT_AUTO_TRIGGER_THRESHOLD_KEY,
         SNAPSHOT_THRESHOLD);
     int numOfOMs = 3;
     cluster = MiniOzoneCluster.newHABuilder(conf)
-        .setOMServiceId("om-service-test1")
+        .setOMServiceId(omServiceId)
         .setNumOfOzoneManagers(numOfOMs)
-        .setNumOfActiveOMs(2)
+        .setNumOfActiveOMs(numOfOMs)
         .build();
-    if ("testBackupCompactionFilesPruningBackgroundService"
-        .equals(testInfo.getDisplayName())) {
-      cluster.
-          getOzoneManagersList()
-          .forEach(
-              TestSnapshotBackgroundServices
-                  ::suspendBackupCompactionFilesPruning);
-    }
+
     cluster.waitForClusterToBeReady();
     client = OzoneClientFactory.getRpcClient(omServiceId, conf);
     objectStore = client.getObjectStore();
+  }
+
+  @BeforeEach
+  public void setupTest() throws IOException, InterruptedException, 
TimeoutException {
+    recoverCluster();
+    stopFollowerOM(cluster.getOMLeader());

Review Comment:
   `setupTest` passes `cluster.getOMLeader()` into `stopFollowerOM(...)`, but 
`getOMLeader()` can legally return null transiently (eg during leader 
transitions) and would cause the helper to stop an arbitrary OM. Prefer using 
the result of `cluster.waitForLeaderOM()` (or assert non-null) to avoid test 
flakiness.
   ```suggestion
       OzoneManager leader = cluster.waitForLeaderOM();
       stopFollowerOM(leader);
   ```



##########
hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java:
##########
@@ -141,6 +146,30 @@ public List<OzoneManager> getOzoneManagersList() {
     return omhaService.getServices();
   }
 
+  public void 
restartOzoneManagersWithConfigCustomizer(Consumer<OzoneConfiguration> 
configCustomizer)
+      throws IOException, TimeoutException, InterruptedException {
+    List<OzoneManager> toRestart = new ArrayList<>();
+    for (OzoneManager om : getOzoneManagersList()) {
+      OzoneConfiguration configuration = new 
OzoneConfiguration(om.getConfiguration());
+      if (configCustomizer != null) {
+        configCustomizer.accept(configuration);
+      }
+      om.setConfiguration(configuration);
+      if (om.isRunning()) {

Review Comment:
   Access of [element](1) annotated with VisibleForTesting found in production 
code.



##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java:
##########
@@ -357,24 +370,39 @@ private void createSnapshotsEachWithNewKeys(OzoneManager 
ozoneManager)
     }
   }
 
-  private OzoneManager getInactiveFollowerOM(OzoneManager leaderOM) {
-    String followerNodeId = leaderOM.getPeerNodes().get(0).getNodeId();
-    if (cluster.isOMActive(followerNodeId)) {
-      followerNodeId = leaderOM.getPeerNodes().get(1).getNodeId();
-    }
-    return cluster.getOzoneManager(followerNodeId);
+  private OzoneManager getInactiveFollowerOM(OzoneManager leaderOM)
+      throws TimeoutException, InterruptedException {
+    // Wait for an inactive OM to be available
+    AtomicReference<OzoneManager> inactiveOM = new AtomicReference<>();
+    GenericTestUtils.waitFor(() -> {
+      Iterator<OzoneManager> iterator = cluster.getInactiveOM();
+      if (iterator.hasNext()) {
+        inactiveOM.set(iterator.next());
+        return true;

Review Comment:
   `getInactiveFollowerOM` no longer uses the `leaderOM` parameter. This makes 
the signature misleading and may trigger unused-parameter warnings in some 
builds/checkstyle configs. Consider removing the parameter (and updating 
callers) or using it to ensure you don't accidentally pick the leader as the 
"inactive" OM.
   ```suggestion
       // Wait for an inactive OM (that is not the leader) to be available
       AtomicReference<OzoneManager> inactiveOM = new AtomicReference<>();
       GenericTestUtils.waitFor(() -> {
         Iterator<OzoneManager> iterator = cluster.getInactiveOM();
         while (iterator.hasNext()) {
           OzoneManager candidate = iterator.next();
           // Ensure we don't accidentally pick the leader as the "inactive" OM.
           if (!Objects.equals(candidate, leaderOM)) {
             inactiveOM.set(candidate);
             return true;
           }
   ```



##########
hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java:
##########
@@ -141,6 +146,30 @@ public List<OzoneManager> getOzoneManagersList() {
     return omhaService.getServices();
   }
 
+  public void 
restartOzoneManagersWithConfigCustomizer(Consumer<OzoneConfiguration> 
configCustomizer)
+      throws IOException, TimeoutException, InterruptedException {
+    List<OzoneManager> toRestart = new ArrayList<>();
+    for (OzoneManager om : getOzoneManagersList()) {
+      OzoneConfiguration configuration = new 
OzoneConfiguration(om.getConfiguration());
+      if (configCustomizer != null) {
+        configCustomizer.accept(configuration);
+      }
+      om.setConfiguration(configuration);

Review Comment:
   Access of [element](1) annotated with VisibleForTesting found in production 
code.



##########
hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java:
##########
@@ -141,6 +146,30 @@ public List<OzoneManager> getOzoneManagersList() {
     return omhaService.getServices();
   }
 
+  public void 
restartOzoneManagersWithConfigCustomizer(Consumer<OzoneConfiguration> 
configCustomizer)
+      throws IOException, TimeoutException, InterruptedException {
+    List<OzoneManager> toRestart = new ArrayList<>();
+    for (OzoneManager om : getOzoneManagersList()) {
+      OzoneConfiguration configuration = new 
OzoneConfiguration(om.getConfiguration());
+      if (configCustomizer != null) {
+        configCustomizer.accept(configuration);
+      }
+      om.setConfiguration(configuration);
+      if (om.isRunning()) {
+        toRestart.add(om);
+      }
+    }
+    for (OzoneManager om : toRestart) {
+      if (!om.stop()) {
+        continue;
+      }
+      om.join();
+      om.restart();
+      GenericTestUtils.waitFor(om::isRunning, 1000, 30000);

Review Comment:
   Access of [element](1) annotated with VisibleForTesting found in production 
code.
   ```suggestion
         long startTime = System.currentTimeMillis();
         while (!om.isRunning()) {
           if (System.currentTimeMillis() - startTime > 30000) {
             throw new TimeoutException("Timed out waiting for OzoneManager to 
start.");
           }
           Thread.sleep(1000);
         }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to