sodonnel commented on code in PR #4614:
URL: https://github.com/apache/ozone/pull/4614#discussion_r1175178606


##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java:
##########
@@ -99,7 +99,177 @@ public SCMContainerPlacementRackAware(final NodeManager 
nodeManager,
    */
   @Override
   protected List<DatanodeDetails> chooseDatanodesInternal(
-          List<DatanodeDetails> usedNodes,
+      List<DatanodeDetails> usedNodes,
+      List<DatanodeDetails> excludedNodes,
+      List<DatanodeDetails> favoredNodes, int nodesRequired,
+      long metadataSizeRequired, long dataSizeRequired)
+      throws SCMException {
+    if (!usedNodesPassed(usedNodes)) {
+      // If interface is called without used nodes
+      // In this case consider only exclude nodes to determine racks
+      return chooseDatanodesInternalLegacy(excludedNodes,
+          favoredNodes, nodesRequired, metadataSizeRequired, dataSizeRequired);
+    }
+    Preconditions.checkArgument(nodesRequired > 0);
+    metrics.incrDatanodeRequestCount(nodesRequired);
+    int datanodeCount = networkTopology.getNumOfLeafNode(NetConstants.ROOT);
+    int excludedNodesCount = excludedNodes == null ? 0 : excludedNodes.size();
+    int usedNodesCount = usedNodes == null ? 0 : usedNodes.size();
+    if (datanodeCount < nodesRequired + excludedNodesCount + usedNodesCount) {
+      throw new SCMException("No enough datanodes to choose. " +
+          "TotalNode = " + datanodeCount +
+          " RequiredNode = " + nodesRequired +
+          " ExcludedNode = " + excludedNodesCount +
+          " UsedNode = " + usedNodesCount, null);
+    }
+    List<DatanodeDetails> mutableFavoredNodes = favoredNodes;
+    // sanity check of favoredNodes
+    if (mutableFavoredNodes != null && excludedNodes != null) {
+      mutableFavoredNodes = new ArrayList<>();
+      mutableFavoredNodes.addAll(favoredNodes);
+      mutableFavoredNodes.removeAll(excludedNodes);
+    }
+    int favoredNodeNum = mutableFavoredNodes == null ? 0 :
+        mutableFavoredNodes.size();
+
+    List<DatanodeDetails> chosenNodes = new ArrayList<>();
+    List<DatanodeDetails> mutableUsedNodes = new ArrayList<>();
+    mutableUsedNodes.addAll(usedNodes);
+    List<DatanodeDetails> mutableExcludedNodes = new ArrayList<>();
+    if (excludedNodes != null) {
+      mutableExcludedNodes.addAll(excludedNodes);
+    }
+
+    DatanodeDetails favoredNode;
+    int favorIndex = 0;
+    if (mutableUsedNodes.size() == 0) {
+      // choose all nodes for a new pipeline case
+      // choose first datanode from scope ROOT or from favoredNodes if not null
+      favoredNode = favoredNodeNum > favorIndex ?
+          mutableFavoredNodes.get(favorIndex) : null;
+      DatanodeDetails firstNode;
+      if (favoredNode != null) {
+        firstNode = favoredNode;
+        favorIndex++;
+      } else {
+        firstNode = chooseNode(mutableExcludedNodes, null, 
metadataSizeRequired,
+            dataSizeRequired);
+      }
+      chosenNodes.add(firstNode);
+      nodesRequired--;
+      if (nodesRequired == 0) {
+        return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+      }
+
+      // choose second datanode on the same rack as first one
+      favoredNode = favoredNodeNum > favorIndex ?
+          mutableFavoredNodes.get(favorIndex) : null;
+      DatanodeDetails secondNode;
+      if (favoredNode != null &&
+          networkTopology.isSameParent(firstNode, favoredNode)) {
+        secondNode = favoredNode;
+        favorIndex++;
+      } else {
+        mutableExcludedNodes.add(firstNode);
+        secondNode = chooseNode(mutableExcludedNodes, Arrays.asList(firstNode),
+            metadataSizeRequired, dataSizeRequired);
+      }
+      chosenNodes.add(secondNode);
+      nodesRequired--;
+      if (nodesRequired == 0) {
+        return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+      }
+      // choose remaining datanodes on different rack as first and second
+      mutableExcludedNodes.add(secondNode);
+    } else {
+
+      // choose node to meet replication requirement
+      // case 1: one used node, choose one on the same rack as the used
+      // node, choose others on different racks.
+      if (mutableUsedNodes.size() == 1) {
+        favoredNode = favoredNodeNum > favorIndex ?
+            mutableFavoredNodes.get(favorIndex) : null;
+        DatanodeDetails firstNode;
+        if (favoredNode != null &&
+            networkTopology.isSameParent(mutableUsedNodes.get(0),
+            favoredNode)) {
+          firstNode = favoredNode;
+          favorIndex++;
+        } else {
+          firstNode = chooseNode(mutableExcludedNodes, mutableUsedNodes,
+              metadataSizeRequired, dataSizeRequired);
+        }
+        chosenNodes.add(firstNode);
+        nodesRequired--;
+        if (nodesRequired == 0) {
+          return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+        }
+
+        // choose remaining nodes on different racks
+        mutableExcludedNodes.add(firstNode);
+        mutableExcludedNodes.addAll(mutableUsedNodes);
+      } else {
+        // case 2: two or more used nodes, if these two nodes are
+        // in the same rack, then choose nodes on different racks, otherwise,
+        // choose one on the same rack as one of used
+        // nodes, remaining chosen
+        // are on different racks.
+        for (int i = 0; i < usedNodesCount; i++) {
+          for (int j = i + 1; j < usedNodesCount; j++) {
+            if (networkTopology.isSameParent(
+                usedNodes.get(i), usedNodes.get(j))) {
+              // choose remaining nodes on different racks
+              mutableExcludedNodes.addAll(mutableUsedNodes);
+              return chooseNodes(mutableExcludedNodes, chosenNodes,
+                  mutableFavoredNodes, favorIndex, nodesRequired,
+                  metadataSizeRequired, dataSizeRequired);
+            }
+          }
+        }
+        // choose one data on the same rack with one used node
+        favoredNode = favoredNodeNum > favorIndex ?
+            mutableFavoredNodes.get(favorIndex) : null;
+        DatanodeDetails secondNode;
+        if (favoredNode != null && networkTopology.isSameParent(
+            chosenNodes.get(0), favoredNode)) {
+          secondNode = favoredNode;
+          favorIndex++;
+        } else {
+          secondNode =
+              chooseNode(mutableExcludedNodes, mutableUsedNodes,
+                  metadataSizeRequired, dataSizeRequired);
+        }
+        chosenNodes.add(secondNode);
+        mutableExcludedNodes.add(secondNode);
+        mutableExcludedNodes.addAll(mutableUsedNodes);
+        nodesRequired--;
+        if (nodesRequired == 0) {
+          return Arrays.asList(chosenNodes.toArray(new DatanodeDetails[0]));
+        }
+      }
+    }
+    // choose remaining nodes on different racks
+    return chooseNodes(mutableExcludedNodes, chosenNodes, mutableFavoredNodes,

Review Comment:
   This java doc gives a bit more of a clear indication about what is going on 
I think:
   
   ```
     /**
      * Randomly choose a leaf node from <i>scope</i>.
      *
      * If scope starts with ~, choose one from the all nodes except for the
      * ones in <i>scope</i>; otherwise, choose nodes from <i>scope</i>.
      * If excludedNodes is given, choose a node that's not in excludedNodes.
      *
      * @param scope range of nodes from which a node will be chosen
      * @param excludedNodes nodes to be excluded from.
      * @param ancestorGen matters when excludeNodes is not null. It means the
      * ancestor generation that's not allowed to share between chosen node and 
the
      * excludedNodes. For example, if ancestorGen is 1, means chosen node
      * cannot share the same parent with excludeNodes. If value is 2, cannot
      * share the same grand parent, and so on. If ancestorGen is 0, then no
      * effect.
      *
      * @return the chosen node
      */
     Node chooseRandom(String scope, Collection<Node> excludedNodes,
         int ancestorGen);
   ```
   
   It sounds like if you have affinityNodes passed and ancestorGen=1 (its 
always 1 in the code we are looking at) - then the selected node must share a 
rack with the affinityNodes.
   
   However, if affinityNodes are null, and there are excluded nodes, then with 
ancestorGen=1 the selected nodes cannot share a rack with any nodes listed in 
excludedNodes.
   
   I think that we will somehow have to pass the chosenNodes / usedNodes as 
excluded, and then use excludedScopes to exclude the "non-used" excluded nodes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to