This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 7fa7d7650d HDDS-9657. Mark recovering containers unhealthy after DN 
restart (#5686)
7fa7d7650d is described below

commit 7fa7d7650d0db9361d3f080d445e0ca8e1804077
Author: SaketaChalamchala <[email protected]>
AuthorDate: Wed Dec 6 01:54:32 2023 -0800

    HDDS-9657. Mark recovering containers unhealthy after DN restart (#5686)
---
 .../ozone/container/ozoneimpl/ContainerReader.java |  8 +--
 .../container/ozoneimpl/TestContainerReader.java   |  7 +-
 .../hdds/scm/storage/TestContainerCommandsEC.java  | 76 ++++++++++++++++++++++
 3 files changed, 85 insertions(+), 6 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
index 1ffc9e00cb..5f300a446d 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
@@ -214,10 +214,10 @@ public class ContainerReader implements Runnable {
             config);
         if (kvContainer.getContainerState() == RECOVERING) {
           if (shouldDeleteRecovering) {
-            cleanupContainer(hddsVolume, kvContainer);
-            kvContainer.delete();
-            LOG.info("Delete recovering container {}.",
-                kvContainer.getContainerData().getContainerID());
+            kvContainer.markContainerUnhealthy();
+            LOG.info("Stale recovering container {} marked UNHEALTHY",
+                kvContainerData.getContainerID());
+            containerSet.addContainer(kvContainer);
           }
           return;
         }
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
index e9f60b5c6c..b22f3a6851 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
@@ -61,6 +61,7 @@ import java.util.UUID;
 
 import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.DELETED;
 import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.RECOVERING;
+import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
 import static 
org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded;
 import static org.mockito.ArgumentMatchers.anyList;
 import static org.mockito.ArgumentMatchers.anyLong;
@@ -240,8 +241,10 @@ public class TestContainerReader {
     thread.start();
     thread.join();
 
-    //recovering container should be deleted, so the count should be 2
-    Assert.assertEquals(2, containerSet.containerCount());
+    //recovering container should be marked unhealthy, so the count should be 3
+    Assert.assertEquals(UNHEALTHY, containerSet.getContainer(
+        recoveringContainerData.getContainerID()).getContainerState());
+    Assert.assertEquals(3, containerSet.containerCount());
 
     for (int i = 0; i < 2; i++) {
       Container keyValueContainer = containerSet.getContainer(i);
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
index 88222c56a6..f2fe3fa31a 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
@@ -110,6 +110,7 @@ import java.util.stream.Stream;
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED;
 import static 
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED;
+import static 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.BlockTokenSecretProto.AccessModeProto.READ;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.BlockTokenSecretProto.AccessModeProto.WRITE;
 import static 
org.apache.hadoop.ozone.container.ContainerTestHelper.newWriteChunkRequestBuilder;
@@ -523,6 +524,81 @@ public class TestContainerCommandsEC {
     }
   }
 
+  @Test
+  public void testCreateRecoveryContainerAfterDNRestart() throws Exception {
+    try (XceiverClientManager xceiverClientManager =
+             new XceiverClientManager(config)) {
+      ECReplicationConfig replicationConfig = new ECReplicationConfig(3, 2);
+      Pipeline newPipeline =
+          scm.getPipelineManager().createPipeline(replicationConfig);
+      scm.getPipelineManager().activatePipeline(newPipeline.getId());
+      final ContainerInfo container = scm.getContainerManager()
+          .allocateContainer(replicationConfig, "test");
+      Token<ContainerTokenIdentifier> cToken = containerTokenGenerator
+          .generateToken(ANY_USER, container.containerID());
+      scm.getContainerManager().getContainerStateManager()
+          .addContainer(container.getProtobuf());
+
+      DatanodeDetails targetDN = newPipeline.getNodes().get(0);
+      XceiverClientSpi dnClient = xceiverClientManager.acquireClient(
+          createSingleNodePipeline(newPipeline, targetDN,
+              2));
+      try {
+        // To create the actual situation, container would have been in closed
+        // state at SCM.
+        scm.getContainerManager().getContainerStateManager()
+            .updateContainerState(container.containerID().getProtobuf(),
+                HddsProtos.LifeCycleEvent.FINALIZE);
+        scm.getContainerManager().getContainerStateManager()
+            .updateContainerState(container.containerID().getProtobuf(),
+                HddsProtos.LifeCycleEvent.CLOSE);
+
+        //Create the recovering container in target DN.
+        String encodedToken = cToken.encodeToUrlString();
+        ContainerProtocolCalls.createRecoveringContainer(dnClient,
+            container.containerID().getProtobuf().getId(),
+            encodedToken, 4);
+
+        // Restart the DN.
+        cluster.restartHddsDatanode(targetDN, true);
+
+        // Recovering container state after DN restart should be UNHEALTHY.
+        Assert.assertEquals(ContainerProtos.ContainerDataProto.State.UNHEALTHY,
+            cluster.getHddsDatanode(targetDN)
+                .getDatanodeStateMachine()
+                .getContainer()
+                .getContainerSet()
+                .getContainer(container.getContainerID())
+                .getContainerState());
+
+        // Writes to recovering container after DN restart should fail
+        // because the container is marked UNHEALTHY
+        // and hence does not accept any writes.
+        BlockID blockID = ContainerTestHelper
+            .getTestBlockID(container.containerID().getProtobuf().getId());
+        Token<? extends TokenIdentifier> blockToken =
+            blockTokenGenerator.generateToken(ANY_USER, blockID,
+                EnumSet.of(READ, WRITE), Long.MAX_VALUE);
+        byte[] data = "TestData".getBytes(UTF_8);
+        ContainerProtos.ContainerCommandRequestProto writeChunkRequest =
+            newWriteChunkRequestBuilder(newPipeline, blockID,
+                ChunkBuffer.wrap(ByteBuffer.wrap(data)), 0)
+                .setEncodedToken(blockToken.encodeToUrlString())
+                .build();
+        scm.getPipelineManager().activatePipeline(newPipeline.getId());
+
+        try {
+          dnClient.sendCommand(writeChunkRequest);
+        } catch (StorageContainerException e) {
+          Assert.assertEquals(CONTAINER_UNHEALTHY, e.getResult());
+        }
+
+      } finally {
+        xceiverClientManager.releaseClient(dnClient, false);
+      }
+    }
+  }
+
   private static byte[] getBytesWith(int singleDigitNumber, int total) {
     StringBuilder builder = new StringBuilder(singleDigitNumber);
     for (int i = 1; i <= total; i++) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to