This is an automated email from the ASF dual-hosted git repository.
siddhant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 38b67b445f HDDS-9354. LegacyReplicationManager: Unhealthy replicas of
a sufficiently replicated container can block decommissioning (#5385)
38b67b445f is described below
commit 38b67b445fc0c7ab3946150d851171918e00ed37
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Fri Oct 6 09:56:31 2023 +0530
HDDS-9354. LegacyReplicationManager: Unhealthy replicas of a sufficiently
replicated container can block decommissioning (#5385)
---
.../replication/ContainerReplicaCount.java | 14 +++-
.../replication/ECContainerReplicaCount.java | 10 ++-
.../LegacyRatisContainerReplicaCount.java | 65 ++++++++++++++++
.../replication/RatisContainerReplicaCount.java | 9 ++-
.../hdds/scm/node/DatanodeAdminMonitorImpl.java | 18 ++++-
.../replication/TestECContainerReplicaCount.java | 8 +-
.../hdds/scm/node/TestDatanodeAdminMonitor.java | 86 ++++++++++++++++++++++
7 files changed, 201 insertions(+), 9 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
index 9c47ac244e..80ef8ab4c0 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
@@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
import java.util.List;
@@ -37,7 +38,16 @@ public interface ContainerReplicaCount {
boolean isSufficientlyReplicated();
- boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode);
+ /**
+ * Checks if a container has enough replicas to allow the specified
+ * datanode to be taken offline. This method is the interface between the
+ * decommissioning flow and Replication Manager.
+ * @param datanode the datanode being taken offline
+ * @param nodeManager an instance of {@link NodeManager}
+ * @return true if the datanode can be taken offline, otherwise false
+ */
+ boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+ NodeManager nodeManager);
boolean isOverReplicated();
@@ -63,6 +73,8 @@ public interface ContainerReplicaCount {
}
+ boolean isHealthyEnoughForOffline();
+
/**
* Return true if there are insufficient replicas to recover this container.
*
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
index 35abccd577..cf6d2c75c7 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
import java.util.ArrayList;
import java.util.Collections;
@@ -497,11 +498,13 @@ public class ECContainerReplicaCount implements
ContainerReplicaCount {
* replica on the node going offline has a copy elsewhere on another
* IN_SERVICE node, and if so that replica is sufficiently replicated.
* @param datanode The datanode being checked to go offline.
+ * @param nodeManager not used in this implementation
* @return True if the container is sufficiently replicated or if this
replica
* on the passed node is present elsewhere on an IN_SERVICE node.
*/
@Override
- public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode) {
+ public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+ NodeManager nodeManager) {
boolean sufficientlyReplicated = isSufficientlyReplicated(false);
if (sufficientlyReplicated) {
return true;
@@ -533,6 +536,11 @@ public class ECContainerReplicaCount implements
ContainerReplicaCount {
return healthyIndexes.containsKey(thisReplica.getReplicaIndex());
}
+ @Override
+ public boolean isHealthyEnoughForOffline() {
+ return isHealthy();
+ }
+
@Override
public boolean isSufficientlyReplicated() {
return isSufficientlyReplicated(false);
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
index 80c05afbd4..90c7afebcc 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
@@ -16,11 +16,17 @@
*/
package org.apache.hadoop.hdds.scm.container.replication;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
import java.util.Set;
+import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+
/**
* When HDDS-6447 was done to improve the LegacyReplicationManager, work on
* the new replication manager had already started. When this class was added,
@@ -59,4 +65,63 @@ public class LegacyRatisContainerReplicaCount extends
public int getUnhealthyReplicaCountAdapter() {
return getMisMatchedReplicaCount();
}
+
+ /**
+ * Checks if all replicas (except UNHEALTHY) on in-service nodes are in the
+ * same health state as the container. This is similar to what
+ * {@link ContainerReplicaCount#isHealthy()} does. The difference is in how
+ * both methods treat UNHEALTHY replicas.
+ * <p>
+ * This method is the interface between the decommissioning flow and
+ * Replication Manager. Callers can use it to check whether replicas of a
+ * container are in the same state as the container before a datanode is
+ * taken offline.
+ * <p>
+ * Note that this method's purpose is to only compare the replica state with
+ * the container state. It does not check if the container has sufficient
+ * number of replicas - that is the job of {@link ContainerReplicaCount
+ * #isSufficientlyReplicatedForOffline(DatanodeDetails, NodeManager)}.
+ * @return true if the container is healthy enough, which is determined by
+ * various checks
+ */
+ @Override
+ public boolean isHealthyEnoughForOffline() {
+ long countInService = getReplicas().stream()
+ .filter(r -> r.getDatanodeDetails().getPersistedOpState() ==
IN_SERVICE)
+ .count();
+ if (countInService == 0) {
+ /*
+ Having no in-service nodes is unexpected and SCM shouldn't allow this
+ to happen in the first place. Return false here just to be safe.
+ */
+ return false;
+ }
+
+ LifeCycleState containerState = getContainer().getState();
+ return (containerState == LifeCycleState.CLOSED
+ || containerState == LifeCycleState.QUASI_CLOSED)
+ && getReplicas().stream()
+ .filter(r -> r.getDatanodeDetails().getPersistedOpState() ==
IN_SERVICE)
+ .filter(r -> r.getState() !=
+ ContainerReplicaProto.State.UNHEALTHY)
+ .allMatch(r -> ReplicationManager.compareState(
+ containerState, r.getState()));
+ }
+
+ /**
+ * For Legacy Replication Manager and Ratis Containers, this method checks
+ * if the container is sufficiently replicated. It also checks whether
+ * there are any UNHEALTHY replicas that need to be replicated.
+ * @param datanode Not used in this implementation
+ * @param nodeManager An instance of NodeManager, used to check the health
+ * status of a node
+ * @return true if the container is sufficiently replicated and there are
+ * no UNHEALTHY replicas that need to be replicated, false otherwise
+ */
+ @Override
+ public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+ NodeManager nodeManager) {
+ return super.isSufficientlyReplicated() &&
+ super.getVulnerableUnhealthyReplicas(nodeManager).isEmpty();
+ }
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
index 0c6052ef35..c516bc73a0 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
@@ -413,14 +413,21 @@ public class RatisContainerReplicaCount implements
ContainerReplicaCount {
/**
* For Ratis, this method is the same as isSufficientlyReplicated.
* @param datanode Not used in this implementation
+ * @param nodeManager not used in this implementation
* @return True if the container is sufficiently replicated and False
* otherwise.
*/
@Override
- public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode) {
+ public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+ NodeManager nodeManager) {
return isSufficientlyReplicated();
}
+ @Override
+ public boolean isHealthyEnoughForOffline() {
+ return isHealthy();
+ }
+
/**
* QUASI_CLOSED containers that have a mix of healthy and UNHEALTHY
* replicas require special treatment. If the healthy replicas don't have
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
index 0263078ec8..34a69047c8 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
@@ -346,7 +346,7 @@ public class DatanodeAdminMonitorImpl implements
DatanodeAdminMonitor {
try {
ContainerReplicaCount replicaSet =
replicationManager.getContainerReplicaCount(cid);
- if (replicaSet.isSufficientlyReplicatedForOffline(dn)) {
+ if (replicaSet.isSufficientlyReplicatedForOffline(dn, nodeManager)) {
sufficientlyReplicated++;
} else {
if (LOG.isDebugEnabled()) {
@@ -359,7 +359,21 @@ public class DatanodeAdminMonitorImpl implements
DatanodeAdminMonitor {
}
underReplicated++;
}
- if (!replicaSet.isHealthy()) {
+
+ boolean isHealthy;
+ /*
+ If LegacyReplicationManager is enabled, then use the
+ isHealthyEnoughForOffline API. ReplicationManager doesn't support this
+ API yet.
+ */
+ boolean legacyEnabled = conf.getBoolean("hdds.scm.replication.enable" +
+ ".legacy", false);
+ if (legacyEnabled) {
+ isHealthy = replicaSet.isHealthyEnoughForOffline();
+ } else {
+ isHealthy = replicaSet.isHealthy();
+ }
+ if (!isHealthy) {
if (LOG.isDebugEnabled()) {
unhealthyIDs.add(cid);
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
index af0206140f..88ae2e79be 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
@@ -688,18 +688,18 @@ public class TestECContainerReplicaCount {
1);
Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
Assertions.assertTrue(rcnt.isSufficientlyReplicatedForOffline(
- offlineReplica.getDatanodeDetails()));
+ offlineReplica.getDatanodeDetails(), null));
Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline(
- offlineNotReplicated.getDatanodeDetails()));
+ offlineNotReplicated.getDatanodeDetails(), null));
// A random DN not hosting a replica for this container should return
false.
Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline(
- MockDatanodeDetails.randomDatanodeDetails()));
+ MockDatanodeDetails.randomDatanodeDetails(), null));
// Passing the IN_SERVICE node should return false even though the
// replica is on a healthy node
Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline(
- inServiceReplica.getDatanodeDetails()));
+ inServiceReplica.getDatanodeDetails(), null));
}
@Test
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
index 32e2c2a5f1..ac9755e972 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
@@ -17,16 +17,23 @@
*/
package org.apache.hadoop.hdds.scm.node;
+import com.google.common.collect.ImmutableSet;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import
org.apache.hadoop.hdds.scm.container.replication.LegacyRatisContainerReplicaCount;
import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
import org.apache.hadoop.hdds.scm.container.SimpleMockNodeManager;
+import org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.server.events.EventQueue;
@@ -209,6 +216,85 @@ public class TestDatanodeAdminMonitor {
nodeManager.getNodeStatus(dn1).getOperationalState());
}
+ /**
+ * Situation: A QUASI_CLOSED container has an UNHEALTHY replica with the
+ * greatest BCSID, and three QUASI_CLOSED replicas with a smaller BCSID. The
+ * UNHEALTHY container is on a decommissioning node, and there are no other
+ * copies of this replica, that is, replicas with the same Origin ID as
+ * this replica.
+ *
+ * Expectation: Decommissioning should not complete until the UNHEALTHY
+ * replica has been replicated to another node.
+ *
+ * Note: This test currently uses the LegacyReplicationManager, as the new
+ * one doesn't support this behaviour yet.
+ * @throws NodeNotFoundException
+ * @throws ContainerNotFoundException
+ */
+ @Test
+ public void testDecommissionWaitsForUnhealthyReplicaToReplicate()
+ throws NodeNotFoundException, ContainerNotFoundException {
+ conf.setBoolean("hdds.scm.replication.enable.legacy", true);
+
+ DatanodeDetails dn1 = MockDatanodeDetails.randomDatanodeDetails();
+ nodeManager.register(dn1,
+ new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+ HddsProtos.NodeState.HEALTHY));
+
+ // create 3 QUASI_CLOSED replicas with containerID 1 and same origin ID
+ ContainerID containerID = ContainerID.valueOf(1);
+ Set<ContainerReplica> replicas =
+ ReplicationTestUtil.createReplicasWithSameOrigin(containerID,
+ State.QUASI_CLOSED, 0, 0, 0);
+
+ // the container's sequence id is greater than the healthy replicas'
+ ContainerInfo container = ReplicationTestUtil.createContainerInfo(
+ RatisReplicationConfig.getInstance(
+ HddsProtos.ReplicationFactor.THREE), containerID.getId(),
+ HddsProtos.LifeCycleState.QUASI_CLOSED,
+ replicas.iterator().next().getSequenceId() + 1);
+ // UNHEALTHY replica is on a unique origin and has same sequence id as
+ // the container
+ ContainerReplica unhealthy =
+ ReplicationTestUtil.createContainerReplica(containerID, 0,
+ dn1.getPersistedOpState(), State.UNHEALTHY,
+ container.getNumberOfKeys(), container.getUsedBytes(), dn1,
+ dn1.getUuid(), container.getSequenceId());
+ replicas.add(unhealthy);
+ nodeManager.setContainers(dn1, ImmutableSet.of(containerID));
+
+ Mockito.when(repManager.getContainerReplicaCount(Mockito.eq(containerID)))
+ .thenReturn(new LegacyRatisContainerReplicaCount(container, replicas,
+ 0, 0, 3, 2));
+
+ // start monitoring dn1
+ monitor.startMonitoring(dn1);
+ monitor.run();
+ assertEquals(1, monitor.getTrackedNodeCount());
+ assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+ nodeManager.getNodeStatus(dn1).getOperationalState());
+
+ // Running the monitor again causes it to remain DECOMMISSIONING
+ // as nothing has changed.
+ monitor.run();
+ assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+ nodeManager.getNodeStatus(dn1).getOperationalState());
+
+ // add a copy of the UNHEALTHY replica on a new node, dn1 should get
+ // decommissioned now
+ ContainerReplica copyOfUnhealthyOnNewNode = unhealthy.toBuilder()
+ .setDatanodeDetails(MockDatanodeDetails.randomDatanodeDetails())
+ .build();
+ replicas.add(copyOfUnhealthyOnNewNode);
+ Mockito.when(repManager.getContainerReplicaCount(Mockito.eq(containerID)))
+ .thenReturn(new LegacyRatisContainerReplicaCount(container, replicas,
+ 0, 0, 3, 2));
+ monitor.run();
+ assertEquals(0, monitor.getTrackedNodeCount());
+ assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED,
+ nodeManager.getNodeStatus(dn1).getOperationalState());
+ }
+
@Test
public void testDecommissionNodeWithUnrecoverableECContainer()
throws NodeNotFoundException, ContainerNotFoundException {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]