This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 7fa7d7650d HDDS-9657. Mark recovering containers unhealthy after DN
restart (#5686)
7fa7d7650d is described below
commit 7fa7d7650d0db9361d3f080d445e0ca8e1804077
Author: SaketaChalamchala <[email protected]>
AuthorDate: Wed Dec 6 01:54:32 2023 -0800
HDDS-9657. Mark recovering containers unhealthy after DN restart (#5686)
---
.../ozone/container/ozoneimpl/ContainerReader.java | 8 +--
.../container/ozoneimpl/TestContainerReader.java | 7 +-
.../hdds/scm/storage/TestContainerCommandsEC.java | 76 ++++++++++++++++++++++
3 files changed, 85 insertions(+), 6 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
index 1ffc9e00cb..5f300a446d 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
@@ -214,10 +214,10 @@ public class ContainerReader implements Runnable {
config);
if (kvContainer.getContainerState() == RECOVERING) {
if (shouldDeleteRecovering) {
- cleanupContainer(hddsVolume, kvContainer);
- kvContainer.delete();
- LOG.info("Delete recovering container {}.",
- kvContainer.getContainerData().getContainerID());
+ kvContainer.markContainerUnhealthy();
+ LOG.info("Stale recovering container {} marked UNHEALTHY",
+ kvContainerData.getContainerID());
+ containerSet.addContainer(kvContainer);
}
return;
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
index e9f60b5c6c..b22f3a6851 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
@@ -61,6 +61,7 @@ import java.util.UUID;
import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.DELETED;
import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.RECOVERING;
+import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
import static
org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded;
import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.ArgumentMatchers.anyLong;
@@ -240,8 +241,10 @@ public class TestContainerReader {
thread.start();
thread.join();
- //recovering container should be deleted, so the count should be 2
- Assert.assertEquals(2, containerSet.containerCount());
+ //recovering container should be marked unhealthy, so the count should be 3
+ Assert.assertEquals(UNHEALTHY, containerSet.getContainer(
+ recoveringContainerData.getContainerID()).getContainerState());
+ Assert.assertEquals(3, containerSet.containerCount());
for (int i = 0; i < 2; i++) {
Container keyValueContainer = containerSet.getContainer(i);
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
index 88222c56a6..f2fe3fa31a 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/storage/TestContainerCommandsEC.java
@@ -110,6 +110,7 @@ import java.util.stream.Stream;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED;
import static
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED;
+import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.BlockTokenSecretProto.AccessModeProto.READ;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.BlockTokenSecretProto.AccessModeProto.WRITE;
import static
org.apache.hadoop.ozone.container.ContainerTestHelper.newWriteChunkRequestBuilder;
@@ -523,6 +524,81 @@ public class TestContainerCommandsEC {
}
}
+ @Test
+ public void testCreateRecoveryContainerAfterDNRestart() throws Exception {
+ try (XceiverClientManager xceiverClientManager =
+ new XceiverClientManager(config)) {
+ ECReplicationConfig replicationConfig = new ECReplicationConfig(3, 2);
+ Pipeline newPipeline =
+ scm.getPipelineManager().createPipeline(replicationConfig);
+ scm.getPipelineManager().activatePipeline(newPipeline.getId());
+ final ContainerInfo container = scm.getContainerManager()
+ .allocateContainer(replicationConfig, "test");
+ Token<ContainerTokenIdentifier> cToken = containerTokenGenerator
+ .generateToken(ANY_USER, container.containerID());
+ scm.getContainerManager().getContainerStateManager()
+ .addContainer(container.getProtobuf());
+
+ DatanodeDetails targetDN = newPipeline.getNodes().get(0);
+ XceiverClientSpi dnClient = xceiverClientManager.acquireClient(
+ createSingleNodePipeline(newPipeline, targetDN,
+ 2));
+ try {
+ // To create the actual situation, container would have been in closed
+ // state at SCM.
+ scm.getContainerManager().getContainerStateManager()
+ .updateContainerState(container.containerID().getProtobuf(),
+ HddsProtos.LifeCycleEvent.FINALIZE);
+ scm.getContainerManager().getContainerStateManager()
+ .updateContainerState(container.containerID().getProtobuf(),
+ HddsProtos.LifeCycleEvent.CLOSE);
+
+ //Create the recovering container in target DN.
+ String encodedToken = cToken.encodeToUrlString();
+ ContainerProtocolCalls.createRecoveringContainer(dnClient,
+ container.containerID().getProtobuf().getId(),
+ encodedToken, 4);
+
+ // Restart the DN.
+ cluster.restartHddsDatanode(targetDN, true);
+
+ // Recovering container state after DN restart should be UNHEALTHY.
+ Assert.assertEquals(ContainerProtos.ContainerDataProto.State.UNHEALTHY,
+ cluster.getHddsDatanode(targetDN)
+ .getDatanodeStateMachine()
+ .getContainer()
+ .getContainerSet()
+ .getContainer(container.getContainerID())
+ .getContainerState());
+
+ // Writes to recovering container after DN restart should fail
+ // because the container is marked UNHEALTHY
+ // and hence does not accept any writes.
+ BlockID blockID = ContainerTestHelper
+ .getTestBlockID(container.containerID().getProtobuf().getId());
+ Token<? extends TokenIdentifier> blockToken =
+ blockTokenGenerator.generateToken(ANY_USER, blockID,
+ EnumSet.of(READ, WRITE), Long.MAX_VALUE);
+ byte[] data = "TestData".getBytes(UTF_8);
+ ContainerProtos.ContainerCommandRequestProto writeChunkRequest =
+ newWriteChunkRequestBuilder(newPipeline, blockID,
+ ChunkBuffer.wrap(ByteBuffer.wrap(data)), 0)
+ .setEncodedToken(blockToken.encodeToUrlString())
+ .build();
+ scm.getPipelineManager().activatePipeline(newPipeline.getId());
+
+ try {
+ dnClient.sendCommand(writeChunkRequest);
+ } catch (StorageContainerException e) {
+ Assert.assertEquals(CONTAINER_UNHEALTHY, e.getResult());
+ }
+
+ } finally {
+ xceiverClientManager.releaseClient(dnClient, false);
+ }
+ }
+ }
+
private static byte[] getBytesWith(int singleDigitNumber, int total) {
StringBuilder builder = new StringBuilder(singleDigitNumber);
for (int i = 1; i <= total; i++) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]