[ozone] branch master updated: HDDS-9354. LegacyReplicationManager: Unhealthy replicas of a sufficiently replicated container can block decommissioning (#5385)

siddhant Thu, 05 Oct 2023 21:28:07 -0700

This is an automated email from the ASF dual-hosted git repository.

siddhant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git



The following commit(s) were added to refs/heads/master by this push:
     new 38b67b445f HDDS-9354. LegacyReplicationManager: Unhealthy replicas of 
a sufficiently replicated container can block decommissioning (#5385)
38b67b445f is described below

commit 38b67b445fc0c7ab3946150d851171918e00ed37
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Fri Oct 6 09:56:31 2023 +0530

    HDDS-9354. LegacyReplicationManager: Unhealthy replicas of a sufficiently 
replicated container can block decommissioning (#5385)
---
 .../replication/ContainerReplicaCount.java         | 14 +++-
 .../replication/ECContainerReplicaCount.java       | 10 ++-
 .../LegacyRatisContainerReplicaCount.java          | 65 ++++++++++++++++
 .../replication/RatisContainerReplicaCount.java    |  9 ++-
 .../hdds/scm/node/DatanodeAdminMonitorImpl.java    | 18 ++++-
 .../replication/TestECContainerReplicaCount.java   |  8 +-
 .../hdds/scm/node/TestDatanodeAdminMonitor.java    | 86 ++++++++++++++++++++++
 7 files changed, 201 insertions(+), 9 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
index 9c47ac244e..80ef8ab4c0 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaCount.java
@@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
 
 import java.util.List;
 
@@ -37,7 +38,16 @@ public interface ContainerReplicaCount {
 
   boolean isSufficientlyReplicated();
 
-  boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode);
+  /**
+   * Checks if a container has enough replicas to allow the specified
+   * datanode to be taken offline. This method is the interface between the
+   * decommissioning flow and Replication Manager.
+   * @param datanode the datanode being taken offline
+   * @param nodeManager an instance of {@link NodeManager}
+   * @return true if the datanode can be taken offline, otherwise false
+   */
+  boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+      NodeManager nodeManager);
 
   boolean isOverReplicated();
 
@@ -63,6 +73,8 @@ public interface ContainerReplicaCount {
 
   }
 
+  boolean isHealthyEnoughForOffline();
+
   /**
    * Return true if there are insufficient replicas to recover this container.
    *
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
index 35abccd577..cf6d2c75c7 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerReplicaCount.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -497,11 +498,13 @@ public class ECContainerReplicaCount implements 
ContainerReplicaCount {
    * replica on the node going offline has a copy elsewhere on another
    * IN_SERVICE node, and if so that replica is sufficiently replicated.
    * @param datanode The datanode being checked to go offline.
+   * @param nodeManager not used in this implementation
    * @return True if the container is sufficiently replicated or if this 
replica
    *         on the passed node is present elsewhere on an IN_SERVICE node.
    */
   @Override
-  public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode) {
+  public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+      NodeManager nodeManager) {
     boolean sufficientlyReplicated = isSufficientlyReplicated(false);
     if (sufficientlyReplicated) {
       return true;
@@ -533,6 +536,11 @@ public class ECContainerReplicaCount implements 
ContainerReplicaCount {
     return healthyIndexes.containsKey(thisReplica.getReplicaIndex());
   }
 
+  @Override
+  public boolean isHealthyEnoughForOffline() {
+    return isHealthy();
+  }
+
   @Override
   public boolean isSufficientlyReplicated() {
     return isSufficientlyReplicated(false);
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
index 80c05afbd4..90c7afebcc 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
@@ -16,11 +16,17 @@
  */
 package org.apache.hadoop.hdds.scm.container.replication;
 
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
+import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
 
 import java.util.Set;
 
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+
 /**
  * When HDDS-6447 was done to improve the LegacyReplicationManager, work on
  * the new replication manager had already started. When this class was added,
@@ -59,4 +65,63 @@ public class LegacyRatisContainerReplicaCount extends
   public int getUnhealthyReplicaCountAdapter() {
     return getMisMatchedReplicaCount();
   }
+
+  /**
+   * Checks if all replicas (except UNHEALTHY) on in-service nodes are in the
+   * same health state as the container. This is similar to what
+   * {@link ContainerReplicaCount#isHealthy()} does. The difference is in how
+   * both methods treat UNHEALTHY replicas.
+   * <p>
+   * This method is the interface between the decommissioning flow and
+   * Replication Manager. Callers can use it to check whether replicas of a
+   * container are in the same state as the container before a datanode is
+   * taken offline.
+   * <p>
+   * Note that this method's purpose is to only compare the replica state with
+   * the container state. It does not check if the container has sufficient
+   * number of replicas - that is the job of {@link ContainerReplicaCount
+   * #isSufficientlyReplicatedForOffline(DatanodeDetails, NodeManager)}.
+   * @return true if the container is healthy enough, which is determined by
+   * various checks
+   */
+  @Override
+  public boolean isHealthyEnoughForOffline() {
+    long countInService = getReplicas().stream()
+        .filter(r -> r.getDatanodeDetails().getPersistedOpState() == 
IN_SERVICE)
+        .count();
+    if (countInService == 0) {
+      /*
+      Having no in-service nodes is unexpected and SCM shouldn't allow this
+      to happen in the first place. Return false here just to be safe.
+      */
+      return false;
+    }
+
+    LifeCycleState containerState = getContainer().getState();
+    return (containerState == LifeCycleState.CLOSED
+        || containerState == LifeCycleState.QUASI_CLOSED)
+        && getReplicas().stream()
+        .filter(r -> r.getDatanodeDetails().getPersistedOpState() == 
IN_SERVICE)
+        .filter(r -> r.getState() !=
+            ContainerReplicaProto.State.UNHEALTHY)
+        .allMatch(r -> ReplicationManager.compareState(
+            containerState, r.getState()));
+  }
+
+  /**
+   * For Legacy Replication Manager and Ratis Containers, this method checks
+   * if the container is sufficiently replicated. It also checks whether
+   * there are any UNHEALTHY replicas that need to be replicated.
+   * @param datanode Not used in this implementation
+   * @param nodeManager An instance of NodeManager, used to check the health
+   * status of a node
+   * @return true if the container is sufficiently replicated and there are
+   * no UNHEALTHY replicas that need to be replicated, false otherwise
+   */
+  @Override
+  public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+      NodeManager nodeManager) {
+    return super.isSufficientlyReplicated() &&
+        super.getVulnerableUnhealthyReplicas(nodeManager).isEmpty();
+  }
 }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
index 0c6052ef35..c516bc73a0 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
@@ -413,14 +413,21 @@ public class RatisContainerReplicaCount implements 
ContainerReplicaCount {
   /**
    * For Ratis, this method is the same as isSufficientlyReplicated.
    * @param datanode Not used in this implementation
+   * @param nodeManager not used in this implementation
    * @return True if the container is sufficiently replicated and False
    *         otherwise.
    */
   @Override
-  public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode) {
+  public boolean isSufficientlyReplicatedForOffline(DatanodeDetails datanode,
+      NodeManager nodeManager) {
     return isSufficientlyReplicated();
   }
 
+  @Override
+  public boolean isHealthyEnoughForOffline() {
+    return isHealthy();
+  }
+
   /**
    * QUASI_CLOSED containers that have a mix of healthy and UNHEALTHY
    * replicas require special treatment. If the healthy replicas don't have
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
index 0263078ec8..34a69047c8 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java
@@ -346,7 +346,7 @@ public class DatanodeAdminMonitorImpl implements 
DatanodeAdminMonitor {
       try {
         ContainerReplicaCount replicaSet =
             replicationManager.getContainerReplicaCount(cid);
-        if (replicaSet.isSufficientlyReplicatedForOffline(dn)) {
+        if (replicaSet.isSufficientlyReplicatedForOffline(dn, nodeManager)) {
           sufficientlyReplicated++;
         } else {
           if (LOG.isDebugEnabled()) {
@@ -359,7 +359,21 @@ public class DatanodeAdminMonitorImpl implements 
DatanodeAdminMonitor {
           }
           underReplicated++;
         }
-        if (!replicaSet.isHealthy()) {
+
+        boolean isHealthy;
+        /*
+        If LegacyReplicationManager is enabled, then use the
+        isHealthyEnoughForOffline API. ReplicationManager doesn't support this
+        API yet.
+         */
+        boolean legacyEnabled = conf.getBoolean("hdds.scm.replication.enable" +
+            ".legacy", false);
+        if (legacyEnabled) {
+          isHealthy = replicaSet.isHealthyEnoughForOffline();
+        } else {
+          isHealthy = replicaSet.isHealthy();
+        }
+        if (!isHealthy) {
           if (LOG.isDebugEnabled()) {
             unhealthyIDs.add(cid);
           }
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
index af0206140f..88ae2e79be 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECContainerReplicaCount.java
@@ -688,18 +688,18 @@ public class TestECContainerReplicaCount {
             1);
     Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
     Assertions.assertTrue(rcnt.isSufficientlyReplicatedForOffline(
-        offlineReplica.getDatanodeDetails()));
+        offlineReplica.getDatanodeDetails(), null));
     Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline(
-        offlineNotReplicated.getDatanodeDetails()));
+        offlineNotReplicated.getDatanodeDetails(), null));
 
     // A random DN not hosting a replica for this container should return 
false.
     Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline(
-        MockDatanodeDetails.randomDatanodeDetails()));
+        MockDatanodeDetails.randomDatanodeDetails(), null));
 
     // Passing the IN_SERVICE node should return false even though the
     // replica is on a healthy node
     Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline(
-        inServiceReplica.getDatanodeDetails()));
+        inServiceReplica.getDatanodeDetails(), null));
   }
 
   @Test
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
index 32e2c2a5f1..ac9755e972 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java
@@ -17,16 +17,23 @@
  */
 package org.apache.hadoop.hdds.scm.node;
 
+import com.google.common.collect.ImmutableSet;
 import org.apache.commons.lang3.tuple.Triple;
 import org.apache.hadoop.hdds.client.ECReplicationConfig;
+import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
+import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import 
org.apache.hadoop.hdds.scm.container.replication.LegacyRatisContainerReplicaCount;
 import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
 import org.apache.hadoop.hdds.scm.container.SimpleMockNodeManager;
+import org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.server.events.EventQueue;
@@ -209,6 +216,85 @@ public class TestDatanodeAdminMonitor {
         nodeManager.getNodeStatus(dn1).getOperationalState());
   }
 
+  /**
+   * Situation: A QUASI_CLOSED container has an UNHEALTHY replica with the
+   * greatest BCSID, and three QUASI_CLOSED replicas with a smaller BCSID. The
+   * UNHEALTHY container is on a decommissioning node, and there are no other
+   * copies of this replica, that is, replicas with the same Origin ID as
+   * this replica.
+   *
+   * Expectation: Decommissioning should not complete until the UNHEALTHY
+   * replica has been replicated to another node.
+   *
+   * Note: This test currently uses the LegacyReplicationManager, as the new
+   * one doesn't support this behaviour yet.
+   * @throws NodeNotFoundException
+   * @throws ContainerNotFoundException
+   */
+  @Test
+  public void testDecommissionWaitsForUnhealthyReplicaToReplicate()
+      throws NodeNotFoundException, ContainerNotFoundException {
+    conf.setBoolean("hdds.scm.replication.enable.legacy", true);
+
+    DatanodeDetails dn1 = MockDatanodeDetails.randomDatanodeDetails();
+    nodeManager.register(dn1,
+        new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+            HddsProtos.NodeState.HEALTHY));
+
+    // create 3 QUASI_CLOSED replicas with containerID 1 and same origin ID
+    ContainerID containerID = ContainerID.valueOf(1);
+    Set<ContainerReplica> replicas =
+        ReplicationTestUtil.createReplicasWithSameOrigin(containerID,
+            State.QUASI_CLOSED, 0, 0, 0);
+
+    // the container's sequence id is greater than the healthy replicas'
+    ContainerInfo container = ReplicationTestUtil.createContainerInfo(
+        RatisReplicationConfig.getInstance(
+            HddsProtos.ReplicationFactor.THREE), containerID.getId(),
+        HddsProtos.LifeCycleState.QUASI_CLOSED,
+        replicas.iterator().next().getSequenceId() + 1);
+    // UNHEALTHY replica is on a unique origin and has same sequence id as
+    // the container
+    ContainerReplica unhealthy =
+        ReplicationTestUtil.createContainerReplica(containerID, 0,
+            dn1.getPersistedOpState(), State.UNHEALTHY,
+            container.getNumberOfKeys(), container.getUsedBytes(), dn1,
+            dn1.getUuid(), container.getSequenceId());
+    replicas.add(unhealthy);
+    nodeManager.setContainers(dn1, ImmutableSet.of(containerID));
+
+    Mockito.when(repManager.getContainerReplicaCount(Mockito.eq(containerID)))
+        .thenReturn(new LegacyRatisContainerReplicaCount(container, replicas,
+            0, 0, 3, 2));
+
+    // start monitoring dn1
+    monitor.startMonitoring(dn1);
+    monitor.run();
+    assertEquals(1, monitor.getTrackedNodeCount());
+    assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+        nodeManager.getNodeStatus(dn1).getOperationalState());
+
+    // Running the monitor again causes it to remain DECOMMISSIONING
+    // as nothing has changed.
+    monitor.run();
+    assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
+        nodeManager.getNodeStatus(dn1).getOperationalState());
+
+    // add a copy of the UNHEALTHY replica on a new node, dn1 should get
+    // decommissioned now
+    ContainerReplica copyOfUnhealthyOnNewNode = unhealthy.toBuilder()
+        .setDatanodeDetails(MockDatanodeDetails.randomDatanodeDetails())
+        .build();
+    replicas.add(copyOfUnhealthyOnNewNode);
+    Mockito.when(repManager.getContainerReplicaCount(Mockito.eq(containerID)))
+        .thenReturn(new LegacyRatisContainerReplicaCount(container, replicas,
+            0, 0, 3, 2));
+    monitor.run();
+    assertEquals(0, monitor.getTrackedNodeCount());
+    assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED,
+        nodeManager.getNodeStatus(dn1).getOperationalState());
+  }
+
   @Test
   public void testDecommissionNodeWithUnrecoverableECContainer()
       throws NodeNotFoundException, ContainerNotFoundException {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[ozone] branch master updated: HDDS-9354. LegacyReplicationManager: Unhealthy replicas of a sufficiently replicated container can block decommissioning (#5385)

Reply via email to