This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new b2c542dfdd HDDS-9327. LegacyReplicationManager: Handle all UNHEALTHY
replicas of a CLOSING container (#5348)
b2c542dfdd is described below
commit b2c542dfdd13db94ce1c5f14ac03e9edcf48e207
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Wed Sep 27 22:40:34 2023 +0530
HDDS-9327. LegacyReplicationManager: Handle all UNHEALTHY replicas of a
CLOSING container (#5348)
---
.../protocol/commands/CloseContainerCommand.java | 4 +
.../replication/LegacyReplicationManager.java | 16 ++++
.../replication/TestLegacyReplicationManager.java | 85 +++++++++++++++++++++-
3 files changed, 104 insertions(+), 1 deletion(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java
index 21efd20668..f07539abdc 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java
@@ -82,6 +82,10 @@ public class CloseContainerCommand
return pipelineID;
}
+ public boolean isForce() {
+ return force;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java
index 568a013dbc..3154b2e52f 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java
@@ -381,8 +381,10 @@ public class LegacyReplicationManager {
*/
if (state == LifeCycleState.CLOSING) {
setHealthStateForClosing(replicas, container, report);
+ boolean foundHealthy = false;
for (ContainerReplica replica: replicas) {
if (replica.getState() != State.UNHEALTHY) {
+ foundHealthy = true;
sendCloseCommand(
container, replica.getDatanodeDetails(), false);
}
@@ -398,7 +400,21 @@ public class LegacyReplicationManager {
*/
if (replicas.isEmpty() && (container.getNumberOfKeys() == 0)) {
closeEmptyContainer(container);
+ return;
+ }
+
+ if (!foundHealthy) {
+ /* If we get here, then this container has replicas and all are
+ UNHEALTHY. Move it from CLOSING to QUASI_CLOSED so RM can then try
+ to maintain replication factor number of replicas.
+ */
+ containerManager.updateContainerState(container.containerID(),
+ HddsProtos.LifeCycleEvent.QUASI_CLOSE);
+ LOG.debug("Moved container {} from CLOSING to QUASI_CLOSED " +
+ "because it has only UNHEALTHY replicas: {}.", container,
+ replicas);
}
+
return;
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java
index bceb430668..4919adb4bb 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java
@@ -65,6 +65,7 @@ import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
import org.apache.hadoop.hdds.utils.db.LongCodec;
import
org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException;
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
+import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand;
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand;
import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
@@ -120,6 +121,7 @@ import static
org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicaBuilder;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas;
import static
org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails;
+import static org.mockito.Mockito.times;
import static org.mockito.Mockito.when;
/**
@@ -389,7 +391,7 @@ public class TestLegacyReplicationManager {
*/
@Test
public void testClosingMissingContainer()
- throws IOException, TimeoutException {
+ throws IOException, InvalidStateTransitionException {
final ContainerInfo container = getContainer(LifeCycleState.CLOSING);
final ContainerID id = container.containerID();
@@ -435,6 +437,10 @@ public class TestLegacyReplicationManager {
ReplicationManagerReport.HealthState.UNDER_REPLICATED));
Assertions.assertEquals(1, report.getStat(
ReplicationManagerReport.HealthState.MIS_REPLICATED));
+
+ Mockito.verify(containerManager, times(0))
+ .updateContainerState(container.containerID(),
+ LifeCycleEvent.QUASI_CLOSE);
}
@Test
@@ -582,6 +588,83 @@ public class TestLegacyReplicationManager {
*/
@Nested
class UnstableReplicas {
+
+ /**
+ * A CLOSING container which has only UNHEALTHY replicas should be moved
+ * to QUASI_CLOSED state so that RM can then maintain replication factor
+ * number of replicas.
+ */
+ @Test
+ public void testClosingContainerWithOnlyUnhealthyReplicas()
+ throws IOException, InvalidStateTransitionException {
+ final ContainerInfo container = getContainer(LifeCycleState.CLOSING);
+ final ContainerID id = container.containerID();
+ containerStateManager.addContainer(container.getProtobuf());
+
+ // all replicas are UNHEALTHY
+ final Set<ContainerReplica> replicas = getReplicas(id, UNHEALTHY,
+ randomDatanodeDetails(), randomDatanodeDetails(),
+ randomDatanodeDetails());
+ for (ContainerReplica replica : replicas) {
+ containerStateManager.updateContainerReplica(id, replica);
+ }
+
+ replicationManager.processAll();
+ Mockito.verify(containerManager, times(1))
+ .updateContainerState(container.containerID(),
+ LifeCycleEvent.QUASI_CLOSE);
+
+ containerStateManager.updateContainerState(
+ container.containerID().getProtobuf(), LifeCycleEvent.QUASI_CLOSE);
+
+ replicationManager.processAll();
+ Assertions.assertEquals(1,
+ replicationManager.getContainerReport().getStat(
+ ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
+ }
+
+ /**
+ * Close command should be sent to the healthy replicas. The container
+ * should not be moved to quasi-closed immediately.
+ */
+ @Test
+ public void testClosingContainerWithSomeUnhealthyReplicas()
+ throws IOException, InvalidStateTransitionException {
+ final ContainerInfo container = getContainer(LifeCycleState.CLOSING);
+ final ContainerID id = container.containerID();
+ containerStateManager.addContainer(container.getProtobuf());
+
+ // 2 UNHEALTHY, 1 OPEN
+ final Set<ContainerReplica> replicas = getReplicas(id, UNHEALTHY,
+ randomDatanodeDetails(), randomDatanodeDetails());
+ final DatanodeDetails datanode = randomDatanodeDetails();
+ replicas.addAll(getReplicas(id, State.OPEN, datanode));
+ for (ContainerReplica replica : replicas) {
+ containerStateManager.updateContainerReplica(id, replica);
+ }
+
+ final int currentCloseCommandCount = datanodeCommandHandler
+ .getInvocationCount(SCMCommandProto.Type.closeContainerCommand);
+ replicationManager.processAll();
+ eventQueue.processAll(1000);
+
+ Mockito.verify(containerManager, times(0))
+ .updateContainerState(container.containerID(),
+ LifeCycleEvent.QUASI_CLOSE);
+ Assertions.assertEquals(currentCloseCommandCount + 1,
+ datanodeCommandHandler.getInvocationCount(
+ SCMCommandProto.Type.closeContainerCommand));
+ Assertions.assertEquals(1,
+ datanodeCommandHandler.getReceivedCommands().size());
+ SCMCommand command =
+ datanodeCommandHandler.getReceivedCommands().iterator().next()
+ .getCommand();
+ Assertions.assertSame(SCMCommandProto.Type.closeContainerCommand,
+ command.getType());
+ CloseContainerCommand closeCommand = (CloseContainerCommand) command;
+ Assertions.assertFalse(closeCommand.isForce());
+ }
+
/**
* 2 open replicas
* 1 quasi-closed replica
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]