This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 2b8ac5ded3 HDDS-8871. Intermittent failure in 
TestOpenKeyCleanupService (#5493)
2b8ac5ded3 is described below

commit 2b8ac5ded3017bc51f5b41f8db9872addde14c9a
Author: Doroszlai, Attila <[email protected]>
AuthorDate: Thu Oct 26 18:35:07 2023 +0200

    HDDS-8871. Intermittent failure in TestOpenKeyCleanupService (#5493)
---
 .../om/service/TestOpenKeyCleanupService.java      | 143 +++++++++++----------
 1 file changed, 74 insertions(+), 69 deletions(-)

diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
index c421e10f94..fad99837e2 100644
--- 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestOpenKeyCleanupService.java
@@ -20,7 +20,7 @@
 package org.apache.hadoop.ozone.om.service;
 
 import org.apache.commons.lang3.RandomUtils;
-import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
+import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
@@ -54,6 +54,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.nio.file.Path;
 import java.time.Duration;
 import java.util.ArrayList;
@@ -66,23 +67,17 @@ import static 
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_EXPIRE_T
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-/**
- * Test Key Deleting Service.
- * <p>
- * This test does the following things.
- * <p>
- * 1. Creates a bunch of keys. 2. Then executes delete key directly using
- * Metadata Manager. 3. Waits for a while for the KeyDeleting Service to pick 
up
- * and call into SCM. 4. Confirms that calls have been successful.
- */
-public class TestOpenKeyCleanupService {
+class TestOpenKeyCleanupService {
   private OzoneManagerProtocol writeClient;
   private OzoneManager om;
   private static final Logger LOG =
       LoggerFactory.getLogger(TestOpenKeyCleanupService.class);
 
-  private static final Duration SERVICE_INTERVAL = Duration.ofMillis(100);
-  private static final Duration EXPIRE_THRESHOLD = Duration.ofMillis(200);
+  private static final int SERVICE_INTERVAL = 20;
+  private static final int EXPIRE_THRESHOLD_MS = 140;
+  private static final Duration EXPIRE_THRESHOLD =
+      Duration.ofMillis(EXPIRE_THRESHOLD_MS);
+  private static final int WAIT_TIME = (int) Duration.ofSeconds(10).toMillis();
   private static final int NUM_MPU_PARTS = 5;
   private KeyManager keyManager;
   private OMMetadataManager omMetadataManager;
@@ -98,9 +93,9 @@ public class TestOpenKeyCleanupService {
     System.setProperty(DBConfigFromFile.CONFIG_DIR, "/");
     ServerUtils.setOzoneMetaDirPath(conf, tempDir.toString());
     conf.setTimeDuration(OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_INTERVAL,
-        SERVICE_INTERVAL.toMillis(), TimeUnit.MILLISECONDS);
+        SERVICE_INTERVAL, TimeUnit.MILLISECONDS);
     conf.setTimeDuration(OZONE_OM_OPEN_KEY_EXPIRE_THRESHOLD,
-        EXPIRE_THRESHOLD.toMillis(), TimeUnit.MILLISECONDS);
+        EXPIRE_THRESHOLD_MS, TimeUnit.MILLISECONDS);
     conf.setBoolean(OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, true);
     conf.setQuietMode(false);
     OmTestManagers omTestManagers = new OmTestManagers(conf);
@@ -112,7 +107,9 @@ public class TestOpenKeyCleanupService {
 
   @AfterEach
   public void cleanup() throws Exception {
-    om.stop();
+    if (om.stop()) {
+      om.join();
+    }
   }
 
   /**
@@ -143,7 +140,7 @@ public class TestOpenKeyCleanupService {
 
     openKeyCleanupService.suspend();
     // wait for submitted tasks to complete
-    Thread.sleep(SERVICE_INTERVAL.toMillis());
+    Thread.sleep(SERVICE_INTERVAL);
     final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount();
     final long oldrunCount = openKeyCleanupService.getRunCount();
     LOG.info("oldkeyCount={}, oldrunCount={}", oldkeyCount, oldrunCount);
@@ -158,7 +155,7 @@ public class TestOpenKeyCleanupService {
     createOpenKeys(numFSOKeys, hsync, BucketLayout.FILE_SYSTEM_OPTIMIZED);
 
     // wait for open keys to expire
-    Thread.sleep(EXPIRE_THRESHOLD.toMillis());
+    Thread.sleep(EXPIRE_THRESHOLD_MS);
 
     assertExpiredOpenKeys(numDEFKeys == 0, false, BucketLayout.DEFAULT);
     assertExpiredOpenKeys(numFSOKeys == 0, hsync,
@@ -166,26 +163,22 @@ public class TestOpenKeyCleanupService {
 
     openKeyCleanupService.resume();
 
-    GenericTestUtils.waitFor(() -> openKeyCleanupService
-            .getRunCount() > oldrunCount,
-        (int) SERVICE_INTERVAL.toMillis(),
-        5 * (int) SERVICE_INTERVAL.toMillis());
+    GenericTestUtils.waitFor(
+        () -> openKeyCleanupService.getSubmittedOpenKeyCount() >= keyCount,
+        SERVICE_INTERVAL, WAIT_TIME);
+    GenericTestUtils.waitFor(
+        () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2,
+        SERVICE_INTERVAL, WAIT_TIME);
 
-    // wait for requests to complete
-    final int n = hsync ? numDEFKeys + numFSOKeys : 1;
-    Thread.sleep(n * SERVICE_INTERVAL.toMillis());
+    waitForOpenKeyCleanup(false, BucketLayout.DEFAULT);
+    waitForOpenKeyCleanup(hsync, BucketLayout.FILE_SYSTEM_OPTIMIZED);
 
-    assertTrue(openKeyCleanupService.getSubmittedOpenKeyCount() >=
-        oldkeyCount + keyCount);
-    assertExpiredOpenKeys(true, false, BucketLayout.DEFAULT);
-    assertExpiredOpenKeys(true, hsync,
-        BucketLayout.FILE_SYSTEM_OPTIMIZED);
     if (hsync) {
-      assertEquals(numDEFKeys, metrics.getNumOpenKeysCleaned());
-      assertTrue(metrics.getNumOpenKeysHSyncCleaned() >= numFSOKeys);
+      assertAtLeast(numDEFKeys, metrics.getNumOpenKeysCleaned());
+      assertAtLeast(numFSOKeys, metrics.getNumOpenKeysHSyncCleaned());
       assertEquals(numFSOKeys, metrics.getNumKeyHSyncs());
     } else {
-      assertEquals(keyCount, metrics.getNumOpenKeysCleaned());
+      assertAtLeast(keyCount, metrics.getNumOpenKeysCleaned());
       assertEquals(0, metrics.getNumOpenKeysHSyncCleaned());
       assertEquals(0, metrics.getNumKeyHSyncs());
     }
@@ -214,7 +207,7 @@ public class TestOpenKeyCleanupService {
 
     openKeyCleanupService.suspend();
     // wait for submitted tasks to complete
-    Thread.sleep(SERVICE_INTERVAL.toMillis());
+    Thread.sleep(SERVICE_INTERVAL);
     final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount();
     final long oldrunCount = openKeyCleanupService.getRunCount();
     LOG.info("oldMpuKeyCount={}, oldMpuRunCount={}", oldkeyCount, oldrunCount);
@@ -230,7 +223,7 @@ public class TestOpenKeyCleanupService {
         NUM_MPU_PARTS, true);
 
     // wait for open keys to expire
-    Thread.sleep(EXPIRE_THRESHOLD.toMillis());
+    Thread.sleep(EXPIRE_THRESHOLD_MS);
 
     // All MPU open keys should be skipped
     assertExpiredOpenKeys(true, false, BucketLayout.DEFAULT);
@@ -239,13 +232,12 @@ public class TestOpenKeyCleanupService {
 
     openKeyCleanupService.resume();
 
-    GenericTestUtils.waitFor(() -> openKeyCleanupService
-            .getRunCount() > oldrunCount,
-        (int) SERVICE_INTERVAL.toMillis(),
-        5 * (int) SERVICE_INTERVAL.toMillis());
+    GenericTestUtils.waitFor(
+        () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2,
+        SERVICE_INTERVAL, WAIT_TIME);
 
     // wait for requests to complete
-    Thread.sleep(SERVICE_INTERVAL.toMillis());
+    Thread.sleep(SERVICE_INTERVAL);
 
     // No expired open keys fetched
     assertEquals(openKeyCleanupService.getSubmittedOpenKeyCount(), 
oldkeyCount);
@@ -279,7 +271,7 @@ public class TestOpenKeyCleanupService {
 
     openKeyCleanupService.suspend();
     // wait for submitted tasks to complete
-    Thread.sleep(SERVICE_INTERVAL.toMillis());
+    Thread.sleep(SERVICE_INTERVAL);
     final long oldkeyCount = openKeyCleanupService.getSubmittedOpenKeyCount();
     final long oldrunCount = openKeyCleanupService.getRunCount();
     LOG.info("oldMpuKeyCount={}, oldMpuRunCount={}", oldkeyCount, oldrunCount);
@@ -290,52 +282,65 @@ public class TestOpenKeyCleanupService {
     assertEquals(0, metrics.getNumOpenKeysCleaned());
     assertEquals(0, metrics.getNumOpenKeysHSyncCleaned());
     final int keyCount = numDEFKeys + numFSOKeys;
+    final int partCount = NUM_MPU_PARTS * keyCount;
     createIncompleteMPUKeys(numDEFKeys, BucketLayout.DEFAULT, NUM_MPU_PARTS,
         false);
     createIncompleteMPUKeys(numFSOKeys, BucketLayout.FILE_SYSTEM_OPTIMIZED,
         NUM_MPU_PARTS, false);
 
-    Thread.sleep(EXPIRE_THRESHOLD.toMillis());
+    Thread.sleep(EXPIRE_THRESHOLD_MS);
 
     // Each MPU keys create 1 MPU open key and some MPU open part keys
     // only the MPU open part keys will be deleted
-    assertExpiredOpenKeys((numDEFKeys * NUM_MPU_PARTS) == 0, false,
+    assertExpiredOpenKeys(numDEFKeys == 0, false,
         BucketLayout.DEFAULT);
-    assertExpiredOpenKeys((numFSOKeys * NUM_MPU_PARTS) == 0, false,
+    assertExpiredOpenKeys(numFSOKeys == 0, false,
         BucketLayout.FILE_SYSTEM_OPTIMIZED);
 
     openKeyCleanupService.resume();
 
-    GenericTestUtils.waitFor(() -> openKeyCleanupService
-            .getRunCount() > oldrunCount,
-        (int) SERVICE_INTERVAL.toMillis(),
-        5 * (int) SERVICE_INTERVAL.toMillis());
-
-    // wait for requests to complete
-    Thread.sleep(SERVICE_INTERVAL.toMillis());
+    GenericTestUtils.waitFor(
+        () -> openKeyCleanupService.getSubmittedOpenKeyCount() >= partCount,
+        SERVICE_INTERVAL, WAIT_TIME);
+    GenericTestUtils.waitFor(
+        () -> openKeyCleanupService.getRunCount() >= oldrunCount + 2,
+        SERVICE_INTERVAL, WAIT_TIME);
 
     // No expired MPU parts fetched
-    int numExpiredParts = NUM_MPU_PARTS * keyCount;
-    assertTrue(openKeyCleanupService.getSubmittedOpenKeyCount() >=
-        (oldkeyCount + numExpiredParts));
-    assertExpiredOpenKeys(true, false, BucketLayout.DEFAULT);
-    assertExpiredOpenKeys(true, false,
-        BucketLayout.FILE_SYSTEM_OPTIMIZED);
+    waitForOpenKeyCleanup(false, BucketLayout.DEFAULT);
+    waitForOpenKeyCleanup(false, BucketLayout.FILE_SYSTEM_OPTIMIZED);
+    assertAtLeast(partCount, metrics.getNumOpenKeysCleaned());
+  }
 
-    assertEquals(numExpiredParts,
-        metrics.getNumOpenKeysCleaned());
+  private static void assertAtLeast(long expectedMinimum, long actual) {
+    assertTrue(actual >= expectedMinimum,
+        () -> actual + " < " + expectedMinimum);
   }
 
-  void assertExpiredOpenKeys(boolean expectedToEmpty, boolean hsync,
-      BucketLayout layout) throws IOException {
-    final ExpiredOpenKeys expired = keyManager.getExpiredOpenKeys(
-        EXPIRE_THRESHOLD, 100, layout);
-    final int size = (hsync ? expired.getHsyncKeys()
-        : expired.getOpenKeyBuckets()).size();
+  private void assertExpiredOpenKeys(boolean expectedToEmpty, boolean hsync,
+      BucketLayout layout) {
+    final int size = getExpiredOpenKeys(hsync, layout);
     assertEquals(expectedToEmpty, size == 0,
         () -> "size=" + size + ", layout=" + layout);
   }
 
+  private int getExpiredOpenKeys(boolean hsync, BucketLayout layout) {
+    try {
+      final ExpiredOpenKeys expired = keyManager.getExpiredOpenKeys(
+          EXPIRE_THRESHOLD, 100, layout);
+      return (hsync ? expired.getHsyncKeys() : expired.getOpenKeyBuckets())
+          .size();
+    } catch (IOException e) {
+      throw new UncheckedIOException(e);
+    }
+  }
+
+  void waitForOpenKeyCleanup(boolean hsync, BucketLayout layout)
+      throws Exception {
+    GenericTestUtils.waitFor(() -> 0 == getExpiredOpenKeys(hsync, layout),
+        SERVICE_INTERVAL, WAIT_TIME);
+  }
+
   private void createOpenKeys(int keyCount, boolean hsync,
       BucketLayout bucketLayout) throws IOException {
     String volume = UUID.randomUUID().toString();
@@ -383,7 +388,7 @@ public class TestOpenKeyCleanupService {
             .setBucketName(bucketName)
             .setKeyName(keyName)
             .setAcls(Collections.emptyList())
-            .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+            .setReplicationConfig(RatisReplicationConfig.getInstance(
                 HddsProtos.ReplicationFactor.ONE))
             .setLocationInfoList(new ArrayList<>())
             .build();
@@ -436,7 +441,7 @@ public class TestOpenKeyCleanupService {
             .setBucketName(bucketName)
             .setKeyName(keyName)
             .setAcls(Collections.emptyList())
-            .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+            .setReplicationConfig(RatisReplicationConfig.getInstance(
                 HddsProtos.ReplicationFactor.ONE))
             .setLocationInfoList(new ArrayList<>())
             .build();
@@ -455,7 +460,7 @@ public class TestOpenKeyCleanupService {
               .setMultipartUploadID(omMultipartInfo.getUploadID())
               .setMultipartUploadPartNumber(i)
               .setAcls(Collections.emptyList())
-              .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+              .setReplicationConfig(RatisReplicationConfig.getInstance(
                   HddsProtos.ReplicationFactor.ONE))
               .build();
 
@@ -471,7 +476,7 @@ public class TestOpenKeyCleanupService {
                 .setMultipartUploadID(omMultipartInfo.getUploadID())
                 .setMultipartUploadPartNumber(i)
                 .setAcls(Collections.emptyList())
-                .setReplicationConfig(StandaloneReplicationConfig.getInstance(
+                .setReplicationConfig(RatisReplicationConfig.getInstance(
                     HddsProtos.ReplicationFactor.ONE))
                 .setLocationInfoList(Collections.emptyList())
                 .build();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to