This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 65e1c9b15a HDDS-3009. Create Container should retry other volumes if 
writes fail (#5300)
65e1c9b15a is described below

commit 65e1c9b15ab328d8e9be3a80511e40991e326f8f
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Tue Sep 19 19:45:32 2023 +0100

    HDDS-3009. Create Container should retry other volumes if writes fail 
(#5300)
---
 .../container/keyvalue/KeyValueContainer.java      | 145 +++++++++++++--------
 .../container/keyvalue/TestKeyValueContainer.java  |  63 ++++++++-
 2 files changed, 148 insertions(+), 60 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
index d6d0e74ce0..62ea88e5ef 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
@@ -29,6 +29,7 @@ import java.nio.file.StandardCopyOption;
 import java.time.Instant;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -150,74 +151,110 @@ public class KeyValueContainer implements 
Container<KeyValueContainerData> {
     long maxSize = containerData.getMaxSize();
     volumeSet.readLock();
     try {
-      HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume(
-          StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()),
-          maxSize);
-      String hddsVolumeDir = containerVolume.getHddsRootDir().toString();
-      // Set volume before getContainerDBFile(), because we may need the
-      // volume to deduce the db file.
-      containerData.setVolume(containerVolume);
-
-      long containerID = containerData.getContainerID();
-      String idDir = VersionedDatanodeFeatures.ScmHA.chooseContainerPathID(
-              containerVolume, clusterId);
-      // Set schemaVersion before the dbFile since we have to
-      // choose the dbFile location based on schema version.
-      String schemaVersion = VersionedDatanodeFeatures.SchemaV3
-          .chooseSchemaVersion(config);
-      containerData.setSchemaVersion(schemaVersion);
+      List<HddsVolume> volumes
+          = StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList());
+      while (true) {
+        HddsVolume containerVolume;
+        try {
+          containerVolume = volumeChoosingPolicy.chooseVolume(volumes, 
maxSize);
+        } catch (DiskOutOfSpaceException ex) {
+          throw new StorageContainerException("Container creation failed, " +
+              "due to disk out of space", ex, DISK_OUT_OF_SPACE);
+        } catch (IOException ex) {
+          throw new StorageContainerException(
+              "Container creation failed. " + ex.getMessage(), ex,
+              CONTAINER_INTERNAL_ERROR);
+        }
 
-      containerMetaDataPath = KeyValueContainerLocationUtil
-          .getContainerMetaDataPath(hddsVolumeDir, idDir, containerID);
-      containerData.setMetadataPath(containerMetaDataPath.getPath());
+        try {
+          String hddsVolumeDir = containerVolume.getHddsRootDir().toString();
+          // Set volume before getContainerDBFile(), because we may need the
+          // volume to deduce the db file.
+          containerData.setVolume(containerVolume);
 
-      File chunksPath = KeyValueContainerLocationUtil.getChunksLocationPath(
-          hddsVolumeDir, idDir, containerID);
+          long containerID = containerData.getContainerID();
+          String idDir = VersionedDatanodeFeatures.ScmHA.chooseContainerPathID(
+              containerVolume, clusterId);
+          // Set schemaVersion before the dbFile since we have to
+          // choose the dbFile location based on schema version.
+          String schemaVersion = VersionedDatanodeFeatures.SchemaV3
+              .chooseSchemaVersion(config);
+          containerData.setSchemaVersion(schemaVersion);
 
-      // Check if it is new Container.
-      ContainerUtils.verifyIsNewContainer(containerMetaDataPath);
+          containerMetaDataPath = KeyValueContainerLocationUtil
+              .getContainerMetaDataPath(hddsVolumeDir, idDir, containerID);
+          containerData.setMetadataPath(containerMetaDataPath.getPath());
 
-      //Create Metadata path chunks path and metadata db
-      File dbFile = getContainerDBFile();
+          File chunksPath = 
KeyValueContainerLocationUtil.getChunksLocationPath(
+              hddsVolumeDir, idDir, containerID);
 
-      KeyValueContainerUtil.createContainerMetaData(
-              containerMetaDataPath, chunksPath, dbFile,
-              containerData.getSchemaVersion(), config);
+          // Check if it is new Container.
+          ContainerUtils.verifyIsNewContainer(containerMetaDataPath);
 
-      //Set containerData for the KeyValueContainer.
-      containerData.setChunksPath(chunksPath.getPath());
-      containerData.setDbFile(dbFile);
+          //Create Metadata path chunks path and metadata db
+          File dbFile = getContainerDBFile();
 
-      // Create .container file
-      File containerFile = getContainerFile();
-      createContainerFile(containerFile);
+          createContainerMetaData(containerMetaDataPath, chunksPath, dbFile,
+              containerData.getSchemaVersion(), config);
 
-    } catch (StorageContainerException ex) {
-      if (containerMetaDataPath != null && 
containerMetaDataPath.getParentFile()
-          .exists()) {
-        FileUtil.fullyDelete(containerMetaDataPath.getParentFile());
-      }
-      throw ex;
-    } catch (DiskOutOfSpaceException ex) {
-      throw new StorageContainerException("Container creation failed, due to " 
+
-          "disk out of space", ex, DISK_OUT_OF_SPACE);
-    } catch (FileAlreadyExistsException ex) {
-      throw new StorageContainerException("Container creation failed because " 
+
-          "ContainerFile already exists", ex, CONTAINER_ALREADY_EXISTS);
-    } catch (IOException ex) {
-      if (containerMetaDataPath != null && 
containerMetaDataPath.getParentFile()
-          .exists()) {
-        FileUtil.fullyDelete(containerMetaDataPath.getParentFile());
+          //Set containerData for the KeyValueContainer.
+          containerData.setChunksPath(chunksPath.getPath());
+          containerData.setDbFile(dbFile);
+
+          // Create .container file
+          File containerFile = getContainerFile();
+          createContainerFile(containerFile);
+
+          return;
+        } catch (StorageContainerException ex) {
+          if (containerMetaDataPath != null
+              && containerMetaDataPath.getParentFile().exists()) {
+            FileUtil.fullyDelete(containerMetaDataPath.getParentFile());
+          }
+          throw ex;
+        } catch (FileAlreadyExistsException ex) {
+          throw new StorageContainerException("Container creation failed " +
+              "because ContainerFile already exists", ex,
+              CONTAINER_ALREADY_EXISTS);
+        } catch (IOException ex) {
+          // This is a general catch all - no space left of device, which 
should
+          // not happen as the volume Choosing policy should filter out full
+          // disks, but it may still be possible if the disk quickly fills,
+          // or some IO error on the disk etc. In this case we try again with a
+          // different volume if there are any left to try.
+          if (containerMetaDataPath != null &&
+              containerMetaDataPath.getParentFile().exists()) {
+            FileUtil.fullyDelete(containerMetaDataPath.getParentFile());
+          }
+          volumes.remove(containerVolume);
+          LOG.error("Exception attempting to create container {} on volume {}" 
+
+              " remaining volumes to try {}", containerData.getContainerID(),
+              containerVolume.getHddsRootDir(), volumes.size(), ex);
+          if (volumes.size() == 0) {
+            throw new StorageContainerException(
+                "Container creation failed. " + ex.getMessage(), ex,
+                CONTAINER_INTERNAL_ERROR);
+          }
+        }
       }
-
-      throw new StorageContainerException(
-          "Container creation failed. " + ex.getMessage(), ex,
-          CONTAINER_INTERNAL_ERROR);
     } finally {
       volumeSet.readUnlock();
     }
   }
 
+
+  /**
+   * The Static method call is wrapped in a protected instance method so it can
+   * be overridden in tests.
+   */
+  @VisibleForTesting
+  protected void createContainerMetaData(File containerMetaDataPath,
+      File chunksPath, File dbFile, String schemaVersion,
+      ConfigurationSource configuration) throws IOException {
+    KeyValueContainerUtil.createContainerMetaData(containerMetaDataPath,
+        chunksPath, dbFile, schemaVersion, configuration);
+  }
+
   /**
    * Set all of the path realted container data fields based on the name
    * conventions.
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
index 7efbe8a1b3..3c1a57c9d2 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
@@ -44,6 +44,7 @@ import 
org.apache.hadoop.ozone.container.common.utils.db.DatanodeDBProfile;
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
 import org.apache.hadoop.ozone.container.common.volume
     .RoundRobinVolumeChoosingPolicy;
+import org.apache.hadoop.ozone.container.common.volume.StorageVolume;
 import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
 import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
 import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
@@ -82,6 +83,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.List;
 import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
@@ -118,7 +120,7 @@ public class TestKeyValueContainer {
 
   private final ContainerLayoutVersion layout;
   private String schemaVersion;
-  private HddsVolume hddsVolume;
+  private List<HddsVolume> hddsVolumes;
 
   // Use one configuration object across parameterized runs of tests.
   // This preserves the column family options in the container options
@@ -144,15 +146,27 @@ public class TestKeyValueContainer {
     CONF.setFromObject(dc);
 
     datanodeId = UUID.randomUUID();
-    hddsVolume = new HddsVolume.Builder(folder.getRoot()
+
+    hddsVolumes = new ArrayList<>();
+
+    hddsVolumes.add(new HddsVolume.Builder(folder.getRoot()
         .getAbsolutePath()).conf(CONF).datanodeUuid(datanodeId
-        .toString()).build();
-    StorageVolumeUtil.checkVolume(hddsVolume, scmId, scmId, CONF, null, null);
+        .toString()).build());
+    StorageVolumeUtil.checkVolume(hddsVolumes.get(0), scmId, scmId, CONF,
+        null, null);
 
     volumeSet = mock(MutableVolumeSet.class);
     volumeChoosingPolicy = mock(RoundRobinVolumeChoosingPolicy.class);
-    Mockito.when(volumeChoosingPolicy.chooseVolume(anyList(), anyLong()))
-        .thenReturn(hddsVolume);
+    Mockito.when(volumeSet.getVolumesList())
+        .thenAnswer(i -> hddsVolumes.stream()
+            .map(v -> (StorageVolume) v)
+            .collect(Collectors.toList()));
+    Mockito.when(volumeChoosingPolicy
+        .chooseVolume(anyList(), anyLong())).thenAnswer(
+            invocation -> {
+              List<HddsVolume> volumes = invocation.getArgument(0);
+              return volumes.get(0);
+            });
 
     keyValueContainerData = new KeyValueContainerData(1L,
         layout,
@@ -204,6 +218,40 @@ public class TestKeyValueContainer {
     Assert.assertTrue(chunksDir.exists());
   }
 
+  @Test
+  public void testNextVolumeTriedOnWriteFailure() throws Exception {
+    HddsVolume newVolume = new HddsVolume.Builder(
+        folder.newFolder().getAbsolutePath())
+        .conf(CONF).datanodeUuid(datanodeId.toString()).build();
+    StorageVolumeUtil.checkVolume(newVolume, scmId, scmId, CONF, null, null);
+    hddsVolumes.add(newVolume);
+
+    // Override the class, so that the first time we call it, it throws
+    // simulating a disk or write failure. The second time it should be ok
+
+    final AtomicInteger callCount = new AtomicInteger(0);
+    keyValueContainer = new KeyValueContainer(keyValueContainerData, CONF)  {
+
+      @Override
+      protected void createContainerMetaData(File containerMetaDataPath,
+          File chunksPath, File dbFile, String schemaVers,
+          ConfigurationSource configuration) throws IOException {
+        if (callCount.get() == 0) {
+          callCount.incrementAndGet();
+          throw new IOException("Injected failure");
+        } else {
+          callCount.incrementAndGet();
+          super.createContainerMetaData(containerMetaDataPath, chunksPath,
+              dbFile, schemaVers, configuration);
+        }
+      }
+    };
+    testCreateContainer();
+    // We should have called the mocked class twice. Once to get an error and
+    // then retry without a failure.
+    assertEquals(2, callCount.get());
+  }
+
   @Test
   public void testEmptyContainerImportExport() throws Exception {
     createContainer();
@@ -454,6 +502,7 @@ public class TestKeyValueContainer {
   @Test
   public void testDiskFullExceptionCreateContainer() throws Exception {
 
+    Mockito.reset(volumeChoosingPolicy);
     Mockito.when(volumeChoosingPolicy.chooseVolume(anyList(), anyLong()))
         .thenThrow(DiskChecker.DiskOutOfSpaceException.class);
     try {
@@ -705,6 +754,7 @@ public class TestKeyValueContainer {
         .conf(CONF).datanodeUuid(datanodeId.toString()).build();
     StorageVolumeUtil.checkVolume(newVolume, scmId, scmId, CONF, null, null);
     List<HddsVolume> volumeList = new ArrayList<>();
+    HddsVolume hddsVolume = hddsVolumes.get(0);
     volumeList.add(hddsVolume);
     volumeList.add(newVolume);
 
@@ -719,6 +769,7 @@ public class TestKeyValueContainer {
     KeyValueContainer container;
     List<File> exportFiles = new ArrayList<>();
     for (HddsVolume volume: volumeList) {
+      Mockito.reset(volumeChoosingPolicy);
       Mockito.when(volumeChoosingPolicy.chooseVolume(anyList(), anyLong()))
           .thenReturn(volume);
       for (int index = 0; index < count; index++, containerId++) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to