This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 2747bf2fbf HDDS-9591. Replication Manager could incorrectly use
QUASI_CLOSED replicas as replication sources for CLOSED containers (#5537)
2747bf2fbf is described below
commit 2747bf2fbfc6cb83b5a915fda8994246a874ede8
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Tue Nov 7 21:47:36 2023 +0530
HDDS-9591. Replication Manager could incorrectly use QUASI_CLOSED replicas
as replication sources for CLOSED containers (#5537)
---
.../replication/RatisUnderReplicationHandler.java | 25 ++++++++++--
.../TestRatisUnderReplicationHandler.java | 47 ++++++++++++++++++++++
2 files changed, 68 insertions(+), 4 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
index 931da903ea..5af7a16df2 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdds.scm.container.replication;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
import org.apache.hadoop.hdds.scm.PlacementPolicy;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -244,8 +245,8 @@ public class RatisUnderReplicationHandler
* @param pendingOps List of pending ContainerReplicaOp
* @return List of healthy datanodes that have closed/quasi-closed replicas
* (or UNHEALTHY replicas if they're the only ones available) and are not
- * pending replica deletion. Sorted in descending order of
- * sequence id.
+ * pending replica deletion. If there is a maximum sequence ID, then only
+ * replicas with that sequence ID are returned.
*/
private List<DatanodeDetails> getSources(
RatisContainerReplicaCount replicaCount,
@@ -259,8 +260,24 @@ public class RatisUnderReplicationHandler
}
Predicate<ContainerReplica> predicate =
- replica -> replica.getState() == State.CLOSED ||
- replica.getState() == State.QUASI_CLOSED;
+ replica -> replica.getState() == State.CLOSED;
+
+ /*
+ If no CLOSED replicas are available, or if the container itself is
+ QUASI_CLOSED, then QUASI_CLOSED replicas are allowed to be sources.
+ */
+ boolean hasClosedReplica = false;
+ for (ContainerReplica replica : replicaCount.getReplicas()) {
+ if (replica.getState() == State.CLOSED) {
+ hasClosedReplica = true;
+ break;
+ }
+ }
+ if (!hasClosedReplica ||
+ replicaCount.getContainer().getState() == LifeCycleState.QUASI_CLOSED)
{
+ predicate =
+ predicate.or(replica -> replica.getState() == State.QUASI_CLOSED);
+ }
if (replicaCount.getHealthyReplicaCount() == 0) {
predicate = predicate.or(
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
index 3155f149a5..8e37aade06 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
@@ -59,6 +59,7 @@ import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalSt
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE;
+import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainer;
import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;
import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas;
@@ -561,6 +562,52 @@ public class TestRatisUnderReplicationHandler {
command.getKey()));
}
+ @Test
+ public void testOnlyClosedReplicasOfClosedContainersAreSources()
+ throws IOException {
+ container = createContainerInfo(RATIS_REPLICATION_CONFIG, 1,
+ HddsProtos.LifeCycleState.CLOSED, 1);
+
+ final Set<ContainerReplica> replicas = new HashSet<>(2);
+ final ContainerReplica closedReplica =
+ createContainerReplica(container.containerID(), 0, IN_SERVICE,
+ State.CLOSED, 1);
+ replicas.add(closedReplica);
+ replicas.add(createContainerReplica(container.containerID(), 0,
+ IN_SERVICE, State.QUASI_CLOSED, 1));
+
+ final Set<Pair<DatanodeDetails, SCMCommand<?>>> commands =
+ testProcessing(replicas, Collections.emptyList(),
+ getUnderReplicatedHealthResult(), 2, 1);
+ commands.forEach(
+ command -> Assert.assertEquals(closedReplica.getDatanodeDetails(),
+ command.getKey()));
+ }
+
+ @Test
+ public void testQuasiClosedReplicasAreSourcesWhenOnlyTheyAreAvailable()
+ throws IOException {
+ container = createContainerInfo(RATIS_REPLICATION_CONFIG, 1,
+ HddsProtos.LifeCycleState.CLOSED, 1);
+
+ Set<ContainerReplica> replicas = new HashSet<>(1);
+ replicas.add(createContainerReplica(container.containerID(), 0,
+ IN_SERVICE, State.QUASI_CLOSED, 1));
+
+ testProcessing(replicas, Collections.emptyList(),
+ getUnderReplicatedHealthResult(), 2, 2);
+
+ // test the same, but for a QUASI_CLOSED container
+ container = createContainer(HddsProtos.LifeCycleState.QUASI_CLOSED,
+ RATIS_REPLICATION_CONFIG);
+ replicas = new HashSet<>(1);
+ replicas.add(createContainerReplica(container.containerID(), 0,
+ IN_SERVICE, State.QUASI_CLOSED, container.getSequenceId()));
+
+ commandsSent.clear();
+ testProcessing(replicas, Collections.emptyList(),
+ getUnderReplicatedHealthResult(), 2, 2);
+ }
/**
* Tests whether the specified expectNumCommands number of commands are
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]