This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new f44d535428 HDDS-9125. Decommissioning blocked because of under
replicated EC containers (#5246)
f44d535428 is described below
commit f44d5354288ef95a99747ac8929df89d5807ba77
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Thu Sep 7 12:27:39 2023 +0100
HDDS-9125. Decommissioning blocked because of under replicated EC
containers (#5246)
---
.../hadoop/hdds/scm/SCMCommonPlacementPolicy.java | 9 ++-
.../algorithms/SCMContainerPlacementRackAware.java | 3 +-
.../SCMContainerPlacementRackScatter.java | 66 ++++++++++++++++-
.../hdds/scm/pipeline/PipelinePlacementPolicy.java | 2 +-
.../hdds/scm/TestSCMCommonPlacementPolicy.java | 2 +-
.../TestSCMContainerPlacementRackScatter.java | 82 +++++++++++++++-------
6 files changed, 128 insertions(+), 36 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
index c575f740e7..a7bd64645d 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
@@ -392,9 +392,12 @@ public abstract class SCMCommonPlacementPolicy implements
* should have
*
* @param numReplicas - The desired replica counts
+ * @param excludedRackCount - The number of racks excluded due to containing
+ * only excluded nodes. The total racks on the
+ * cluster will be reduced by this number.
* @return The number of racks containers should span to meet the policy
*/
- protected int getRequiredRackCount(int numReplicas) {
+ protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
return 1;
}
@@ -432,7 +435,7 @@ public abstract class SCMCommonPlacementPolicy implements
List<DatanodeDetails> dns, int replicas) {
NetworkTopology topology = nodeManager.getClusterNetworkTopologyMap();
// We have a network topology so calculate if it is satisfied or not.
- int requiredRacks = getRequiredRackCount(replicas);
+ int requiredRacks = getRequiredRackCount(replicas, 0);
if (topology == null || replicas == 1 || requiredRacks == 1) {
if (dns.size() > 0) {
// placement is always satisfied if there is at least one DN.
@@ -520,7 +523,7 @@ public abstract class SCMCommonPlacementPolicy implements
int totalNumberOfReplicas = replicas.size();
int requiredNumberOfPlacementGroups =
- getRequiredRackCount(totalNumberOfReplicas);
+ getRequiredRackCount(totalNumberOfReplicas, 0);
Set<ContainerReplica> copyReplicaSet = Sets.newHashSet();
List<List<ContainerReplica>> replicaSet = placementGroupReplicaIdMap
.values().stream()
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
index ed56a9c2bf..314d6e8139 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java
@@ -591,9 +591,10 @@ public final class SCMContainerPlacementRackAware
}
@Override
- protected int getRequiredRackCount(int numReplicas) {
+ protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
int racks = networkTopology != null
? networkTopology.getNumOfNodes(networkTopology.getMaxLevel() - 1)
+ - excludedRackCount
: 1;
return Math.min(REQUIRED_RACKS, racks);
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
index cc6bf72362..141d85fd5e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackScatter.java
@@ -265,8 +265,16 @@ public final class SCMContainerPlacementRackScatter
usedRacksCntMap.merge(rack, 1, Math::addExact);
}
}
+
+ List<Node> unavailableRacks = findRacksWithOnlyExcludedNodes(excludedNodes,
+ usedRacksCntMap);
+ for (Node rack : unavailableRacks) {
+ racks.remove(rack);
+ }
+
int requiredReplicationFactor = usedNodes.size() + nodesRequired;
- int numberOfRacksRequired =
getRequiredRackCount(requiredReplicationFactor);
+ int numberOfRacksRequired = getRequiredRackCount(requiredReplicationFactor,
+ unavailableRacks.size());
int additionalRacksRequired =
Math.min(nodesRequired, numberOfRacksRequired -
usedRacksCntMap.size());
LOG.debug("Additional nodes required: {}. Additional racks required: {}.",
@@ -364,6 +372,54 @@ public final class SCMContainerPlacementRackScatter
return result;
}
+ /**
+ * Given a list of excluded nodes, check if the rack for each excluded node
is
+ * empty after removing the excluded nodes. If it is empty, then the rack
+ * contains only excluded nodes, and we return a list of these racks.
+ * @param excludedNodes List of excluded nodes
+ * @param usedRacksCntMap Map of used racks and their used node count
+ * @return List of racks that contain only excluded nodes or an empty list
+ */
+ private List<Node> findRacksWithOnlyExcludedNodes(
+ List<DatanodeDetails> excludedNodes, Map<Node, Integer> usedRacksCntMap)
{
+ if (excludedNodes == null || excludedNodes.isEmpty()) {
+ return Collections.emptyList();
+ }
+
+ List<Node> unavailableRacks = new ArrayList<>();
+ Set<Node> excludedNodeRacks = new HashSet<>();
+ for (Node node : excludedNodes) {
+ Node rack = networkTopology.getAncestor(node, RACK_LEVEL);
+ if (rack != null && !usedRacksCntMap.containsKey(rack)) {
+ // Dead nodes are removed from the topology, so the node may have a
null
+ // rack, hence the not null check.
+ // Anything that reaches here is the rack for an excluded node and no
+ // used nodes on are on the rack.
+ excludedNodeRacks.add(rack);
+ }
+ }
+ Set<Node> exc = new HashSet<>(excludedNodes);
+ for (Node rack : excludedNodeRacks) {
+ // If a node is removed from the cluster (eg goes dead), but the client
+ // already added it to the exclude list, then the rack may not be in the
+ // topology any longer. See test
+ // testExcludedNodesOverlapsOutOfServiceNodes
+ String rackPath = rack.getNetworkFullPath();
+ if (networkTopology.getNode(rackPath) == null) {
+ continue;
+ }
+
+ Node node = networkTopology.chooseRandom(rack.getNetworkFullPath(), exc);
+ if (node == null) {
+ // This implies we have a rack with all nodes excluded, so it is as if
+ // that rack does not exist. We also know there are no used nodes on
+ // this rack, so it means we need to reduce the rack count by 1
+ unavailableRacks.add(rack);
+ }
+ }
+ return unavailableRacks;
+ }
+
@Override
public DatanodeDetails chooseNode(List<DatanodeDetails> healthyNodes) {
return null;
@@ -430,15 +486,19 @@ public final class SCMContainerPlacementRackScatter
* For EC placement policy, desired rack count would be equal to the num of
* Replicas.
* @param numReplicas - num of Replicas.
+ * @param excludedRackCount - The number of racks excluded due to containing
+ * only excluded nodes. The total racks on the
+ * cluster will be reduced by this number.
* @return required rack count.
*/
@Override
- protected int getRequiredRackCount(int numReplicas) {
+ protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
if (networkTopology == null) {
return 1;
}
int maxLevel = networkTopology.getMaxLevel();
- int numRacks = networkTopology.getNumOfNodes(maxLevel - 1);
+ int numRacks = networkTopology.getNumOfNodes(maxLevel - 1)
+ - excludedRackCount;
// Return the num of Rack if numRack less than numReplicas
return Math.min(numRacks, numReplicas);
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
index 0ec5abb419..6c13960e17 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelinePlacementPolicy.java
@@ -573,7 +573,7 @@ public final class PipelinePlacementPolicy extends
SCMCommonPlacementPolicy {
}
@Override
- protected int getRequiredRackCount(int numReplicas) {
+ protected int getRequiredRackCount(int numReplicas, int excludedRackCount) {
return REQUIRED_RACKS;
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
index 22b3519f10..ffefc7c5f5 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestSCMCommonPlacementPolicy.java
@@ -506,7 +506,7 @@ public class TestSCMCommonPlacementPolicy {
}
@Override
- protected int getRequiredRackCount(int numReplicas) {
+ protected int getRequiredRackCount(int numReplicas, int excludedRackCount)
{
return Math.min(numReplicas, rackCnt);
}
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
index 36b5e8ece7..8e267f4988 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java
@@ -403,33 +403,11 @@ public class TestSCMContainerPlacementRackScatter {
excludedNodes.clear();
excludedNodes.add(datanodes.get(0));
excludedNodes.add(datanodes.get(5));
- if (datanodeCount == 6) {
- /*
- * when datanodeCount is 6, the clusterMap will be
- * /rack0/node0
- * /rack0/node1
- * /rack0/node2
- * /rack0/node3
- * /rack0/node4
- * /rack1/node5
- * if we select node0 and node5 as the excluded datanode,
- * only datanode in rack0 will be chosen when calling
- * `policy.chooseDatanodes` and the placement will not be
- * met since there are two racks exist, but only one
- * of them is chosen
- * */
- SCMException e = assertThrows(SCMException.class,
- () -> policy.chooseDatanodes(excludedNodes, null, 3, 0, 15));
- String message = e.getMessage();
- assertTrue(message.contains("Chosen nodes size: 2, but required nodes " +
- "to choose: 3 do not match."));
- } else {
- datanodeDetails = policy.chooseDatanodes(
- excludedNodes, null, nodeNum, 0, 15);
- Assertions.assertEquals(nodeNum, datanodeDetails.size());
- Assertions.assertEquals(getRackSize(datanodeDetails, excludedNodes),
- Math.min(totalNum, rackNum));
- }
+ datanodeDetails = policy.chooseDatanodes(
+ excludedNodes, null, nodeNum, 0, 15);
+ Assertions.assertEquals(nodeNum, datanodeDetails.size());
+ Assertions.assertEquals(getRackSize(datanodeDetails, excludedNodes),
+ Math.min(totalNum, rackNum));
}
@ParameterizedTest
@@ -804,6 +782,56 @@ public class TestSCMContainerPlacementRackScatter {
Assertions.assertEquals(nodeNum, datanodeDetails.size());
}
+ @Test
+ public void testAllNodesOnRackExcludedReducesRackCount()
+ throws SCMException {
+ setup(10, 2);
+ // Here we have the following nodes / racks. Note that rack 4 is all
+ // excluded, so this test ensures we still get a node returned on the
+ // reduced set of racks.
+ // /rack0/node0 - used
+ // /rack0/node1 - free
+ // /rack1/node2 - used
+ // /rack1/node3 - free
+ // /rack2/node4 - used
+ // /rack2/node5 - free
+ // /rack3/node6 - used
+ // /rack3/node7 - free
+ // /rack4/node8 - excluded
+ // /rack4/node9 - excluded
+
+ List<DatanodeDetails> usedDns =
+ getDatanodes(Lists.newArrayList(0, 2, 4, 6));
+ List<DatanodeDetails> excludedDns = getDatanodes(Lists.newArrayList(8, 9));
+
+ List<DatanodeDetails> chosenNodes =
+ policy.chooseDatanodes(usedDns, excludedDns,
+ null, 1, 0, 5);
+ Assertions.assertEquals(1, chosenNodes.size());
+ }
+
+ @Test
+ public void testAllNodesOnRackExcludedReducesRackCount2()
+ throws SCMException {
+ setup(5, 2);
+ // Here we have a setup like this, which is like a Ratis container with a
+ // decommissioning node on rack 2. This makes rack 2 excluded, so we should
+ // get a node returned on rack 0 or 1 instead.
+ // /rack0/node0 - used
+ // /rack0/node1 - free
+ // /rack1/node2 - used
+ // /rack1/node3 - free
+ // /rack2/node4 - excluded (eg decommissioning)
+ List<DatanodeDetails> usedDns = getDatanodes(Lists.newArrayList(0, 2));
+ List<DatanodeDetails> excludedDns = getDatanodes(Lists.newArrayList(4));
+
+ List<DatanodeDetails> chosenNodes =
+ policy.chooseDatanodes(usedDns, excludedDns,
+ null, 1, 0, 5);
+ Assertions.assertEquals(1, chosenNodes.size());
+ }
+
+
private int getRackSize(List<DatanodeDetails>... datanodeDetails) {
Set<Node> racks = new HashSet<>();
for (List<DatanodeDetails> list : datanodeDetails) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]