This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new f44d535428 HDDS-9125. Decommissioning blocked because of under 
replicated EC containers (#5246)
f44d535428 is described below

commit f44d5354288ef95a99747ac8929df89d5807ba77
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Thu Sep 7 12:27:39 2023 +0100

    HDDS-9125. Decommissioning blocked because of under replicated EC 
containers (#5246)
---
 .../hadoop/hdds/scm/SCMCommonPlacementPolicy.java  |  9 ++-
 .../algorithms/SCMContainerPlacementRackAware.java |  3 +-
 .../SCMContainerPlacementRackScatter.java          | 66 ++++++++++++++++-
 .../hdds/scm/pipeline/PipelinePlacementPolicy.java |  2 +-
 .../hdds/scm/TestSCMCommonPlacementPolicy.java     |  2 +-
 .../TestSCMContainerPlacementRackScatter.java      | 82 +++++++++++++++-------
 6 files changed, 128 insertions(+), 36 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
index c575f740e7..a7bd64645d 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
@@ -392,9 +392,12 @@ public abstract class SCMCommonPlacementPolicy implements
    * should have
    *
    * @param numReplicas - The desired replica counts
+   * @param excludedRackCount - The number of racks excluded due to containing
+   *                          only excluded nodes. The total racks on the
+   *                          cluster will be reduced by this number.
    * @return The number of racks containers should span to meet the policy
    */
-  protected int getRequiredRackCount(int numReplicas) {
+  protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
     return 1;
   }
 
@@ -432,7 +435,7 @@ public abstract class SCMCommonPlacementPolicy implements
       List<DatanodeDetails> dns, int replicas) {
     NetworkTopology topology = nodeManager.getClusterNetworkTopologyMap();
     // We have a network topology so calculate if it is satisfied or not.
-    int requiredRacks = getRequiredRackCount(replicas);
+    int requiredRacks = getRequiredRackCount(replicas, 0);
     if (topology == null || replicas == 1 || requiredRacks == 1) {
       if (dns.size() > 0) {
         // placement is always satisfied if there is at least one DN.
@@ -520,7 +523,7 @@ public abstract class SCMCommonPlacementPolicy implements
 
     int totalNumberOfReplicas = replicas.size();
     int requiredNumberOfPlacementGroups =
-            getRequiredRackCount(totalNumberOfReplicas);
+            getRequiredRackCount(totalNumberOfReplicas, 0);
     Set<ContainerReplica> copyReplicaSet = Sets.newHashSet();
     List<List<ContainerReplica>> replicaSet = placementGroupReplicaIdMap
             .values().stream()
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
index ed56a9c2bf..314d6e8139 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
@@ -591,9 +591,10 @@ public final class SCMContainerPlacementRackAware
   }
 
   @Override
-  protected int getRequiredRackCount(int numReplicas) {
+  protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
     int racks = networkTopology != null
         ? networkTopology.getNumOfNodes(networkTopology.getMaxLevel() - 1)
+            - excludedRackCount
         : 1;
     return Math.min(REQUIRED_RACKS, racks);
   }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
index cc6bf72362..141d85fd5e 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
@@ -265,8 +265,16 @@ public final class SCMContainerPlacementRackScatter
         usedRacksCntMap.merge(rack, 1, Math::addExact);
       }
     }
+
+    List<Node> unavailableRacks = findRacksWithOnlyExcludedNodes(excludedNodes,
+        usedRacksCntMap);
+    for (Node rack : unavailableRacks) {
+      racks.remove(rack);
+    }
+
     int requiredReplicationFactor = usedNodes.size() + nodesRequired;
-    int numberOfRacksRequired = 
getRequiredRackCount(requiredReplicationFactor);
+    int numberOfRacksRequired = getRequiredRackCount(requiredReplicationFactor,
+        unavailableRacks.size());
     int additionalRacksRequired =
         Math.min(nodesRequired, numberOfRacksRequired - 
usedRacksCntMap.size());
     LOG.debug("Additional nodes required: {}. Additional racks required: {}.",
@@ -364,6 +372,54 @@ public final class SCMContainerPlacementRackScatter
     return result;
   }
 
+  /**
+   * Given a list of excluded nodes, check if the rack for each excluded node 
is
+   * empty after removing the excluded nodes. If it is empty, then the rack
+   * contains only excluded nodes, and we return a list of these racks.
+   * @param excludedNodes List of excluded nodes
+   * @param usedRacksCntMap Map of used racks and their used node count
+   * @return List of racks that contain only excluded nodes or an empty list
+   */
+  private List<Node> findRacksWithOnlyExcludedNodes(
+      List<DatanodeDetails> excludedNodes, Map<Node, Integer> usedRacksCntMap) 
{
+    if (excludedNodes == null || excludedNodes.isEmpty()) {
+      return Collections.emptyList();
+    }
+
+    List<Node> unavailableRacks = new ArrayList<>();
+    Set<Node> excludedNodeRacks = new HashSet<>();
+    for (Node node : excludedNodes) {
+      Node rack = networkTopology.getAncestor(node, RACK_LEVEL);
+      if (rack != null && !usedRacksCntMap.containsKey(rack)) {
+        // Dead nodes are removed from the topology, so the node may have a 
null
+        // rack, hence the not null check.
+        // Anything that reaches here is the rack for an excluded node and no
+        // used nodes on are on the rack.
+        excludedNodeRacks.add(rack);
+      }
+    }
+    Set<Node> exc = new HashSet<>(excludedNodes);
+    for (Node rack : excludedNodeRacks) {
+      // If a node is removed from the cluster (eg goes dead), but the client
+      // already added it to the exclude list, then the rack may not be in the
+      // topology any longer. See test
+      // testExcludedNodesOverlapsOutOfServiceNodes
+      String rackPath = rack.getNetworkFullPath();
+      if (networkTopology.getNode(rackPath) == null) {
+        continue;
+      }
+
+      Node node = networkTopology.chooseRandom(rack.getNetworkFullPath(), exc);
+      if (node == null) {
+        // This implies we have a rack with all nodes excluded, so it is as if
+        // that rack does not exist. We also know there are no used nodes on
+        // this rack, so it means we need to reduce the rack count by 1
+        unavailableRacks.add(rack);
+      }
+    }
+    return unavailableRacks;
+  }
+
   @Override
   public DatanodeDetails chooseNode(List<DatanodeDetails> healthyNodes) {
     return null;
@@ -430,15 +486,19 @@ public final class SCMContainerPlacementRackScatter
    * For EC placement policy, desired rack count would be equal to the num of
    * Replicas.
    * @param numReplicas - num of Replicas.
+   * @param excludedRackCount - The number of racks excluded due to containing
+   *                          only excluded nodes. The total racks on the
+   *                          cluster will be reduced by this number.
    * @return required rack count.
    */
   @Override
-  protected int getRequiredRackCount(int numReplicas) {
+  protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
     if (networkTopology == null) {
       return 1;
     }
     int maxLevel = networkTopology.getMaxLevel();
-    int numRacks = networkTopology.getNumOfNodes(maxLevel - 1);
+    int numRacks = networkTopology.getNumOfNodes(maxLevel - 1)
+        - excludedRackCount;
     // Return the num of Rack if numRack less than numReplicas
     return Math.min(numRacks, numReplicas);
   }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
index 0ec5abb419..6c13960e17 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
@@ -573,7 +573,7 @@ public final class PipelinePlacementPolicy extends 
SCMCommonPlacementPolicy {
   }
 
   @Override
-  protected int getRequiredRackCount(int numReplicas) {
+  protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
     return REQUIRED_RACKS;
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
index 22b3519f10..ffefc7c5f5 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
@@ -506,7 +506,7 @@ public class TestSCMCommonPlacementPolicy {
     }
 
     @Override
-    protected int getRequiredRackCount(int numReplicas) {
+    protected int getRequiredRackCount(int numReplicas, int excludedRackCount) 
{
       return Math.min(numReplicas, rackCnt);
     }
   }
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
index 36b5e8ece7..8e267f4988 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
@@ -403,33 +403,11 @@ public class TestSCMContainerPlacementRackScatter {
     excludedNodes.clear();
     excludedNodes.add(datanodes.get(0));
     excludedNodes.add(datanodes.get(5));
-    if (datanodeCount == 6) {
-      /*
-      * when datanodeCount is 6, the clusterMap will be
-      * /rack0/node0
-      * /rack0/node1
-      * /rack0/node2
-      * /rack0/node3
-      * /rack0/node4
-      * /rack1/node5
-      * if we select node0 and node5 as the excluded datanode,
-      * only datanode in rack0 will be chosen when calling
-      * `policy.chooseDatanodes` and the placement will not be
-      *  met since there are two racks exist, but only one
-      * of them is chosen
-      * */
-      SCMException e = assertThrows(SCMException.class,
-          () -> policy.chooseDatanodes(excludedNodes, null, 3, 0, 15));
-      String message = e.getMessage();
-      assertTrue(message.contains("Chosen nodes size: 2, but required nodes " +
-          "to choose: 3 do not match."));
-    } else {
-      datanodeDetails = policy.chooseDatanodes(
-          excludedNodes, null, nodeNum, 0, 15);
-      Assertions.assertEquals(nodeNum, datanodeDetails.size());
-      Assertions.assertEquals(getRackSize(datanodeDetails, excludedNodes),
-          Math.min(totalNum, rackNum));
-    }
+    datanodeDetails = policy.chooseDatanodes(
+        excludedNodes, null, nodeNum, 0, 15);
+    Assertions.assertEquals(nodeNum, datanodeDetails.size());
+    Assertions.assertEquals(getRackSize(datanodeDetails, excludedNodes),
+        Math.min(totalNum, rackNum));
   }
 
   @ParameterizedTest
@@ -804,6 +782,56 @@ public class TestSCMContainerPlacementRackScatter {
     Assertions.assertEquals(nodeNum, datanodeDetails.size());
   }
 
+  @Test
+  public void testAllNodesOnRackExcludedReducesRackCount()
+      throws SCMException {
+    setup(10, 2);
+    // Here we have the following nodes / racks. Note that rack 4 is all
+    // excluded, so this test ensures we still get a node returned on the
+    // reduced set of racks.
+    // /rack0/node0 - used
+    // /rack0/node1 - free
+    // /rack1/node2 - used
+    // /rack1/node3 - free
+    // /rack2/node4 - used
+    // /rack2/node5 - free
+    // /rack3/node6 - used
+    // /rack3/node7 - free
+    // /rack4/node8 - excluded
+    // /rack4/node9 - excluded
+
+    List<DatanodeDetails> usedDns =
+        getDatanodes(Lists.newArrayList(0, 2, 4, 6));
+    List<DatanodeDetails> excludedDns = getDatanodes(Lists.newArrayList(8, 9));
+
+    List<DatanodeDetails> chosenNodes =
+        policy.chooseDatanodes(usedDns, excludedDns,
+            null, 1, 0, 5);
+    Assertions.assertEquals(1, chosenNodes.size());
+  }
+
+  @Test
+  public void testAllNodesOnRackExcludedReducesRackCount2()
+      throws SCMException {
+    setup(5, 2);
+    // Here we have a setup like this, which is like a Ratis container with a
+    // decommissioning node on rack 2. This makes rack 2 excluded, so we should
+    // get a node returned on rack 0 or 1 instead.
+    // /rack0/node0 - used
+    // /rack0/node1 - free
+    // /rack1/node2 - used
+    // /rack1/node3 - free
+    // /rack2/node4 - excluded (eg decommissioning)
+    List<DatanodeDetails> usedDns = getDatanodes(Lists.newArrayList(0, 2));
+    List<DatanodeDetails> excludedDns = getDatanodes(Lists.newArrayList(4));
+
+    List<DatanodeDetails> chosenNodes =
+        policy.chooseDatanodes(usedDns, excludedDns,
+            null, 1, 0, 5);
+    Assertions.assertEquals(1, chosenNodes.size());
+  }
+
+
   private int getRackSize(List<DatanodeDetails>... datanodeDetails) {
     Set<Node> racks = new HashSet<>();
     for (List<DatanodeDetails> list : datanodeDetails) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to