HDFS-4937. ReplicationMonitor can infinite-loop in 
BlockPlacementPolicyDefault#chooseRandom(). Contributed by Kihwal Lee.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/43539b5f
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/43539b5f
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/43539b5f

Branch: refs/heads/HDFS-8966
Commit: 43539b5ff4ac0874a8a454dc93a2a782b0e0ea8f
Parents: ce31b22
Author: Kihwal Lee <kih...@apache.org>
Authored: Fri Oct 30 09:27:21 2015 -0500
Committer: Kihwal Lee <kih...@apache.org>
Committed: Fri Oct 30 09:29:13 2015 -0500

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt             |  3 +++
 .../blockmanagement/BlockPlacementPolicyDefault.java    | 12 ++++++++++++
 2 files changed, 15 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/43539b5f/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 38b9e55..f6a22a1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -2201,6 +2201,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9332. Fix Precondition failures from NameNodeEditLogRoller while
     saving namespace. (wang)
 
+    HDFS-4937. ReplicationMonitor can infinite-loop in
+    BlockPlacementPolicyDefault#chooseRandom() (kihwal)
+
 Release 2.7.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/43539b5f/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
index d9b8d60..f610574 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@@ -659,6 +659,7 @@ public class BlockPlacementPolicyDefault extends 
BlockPlacementPolicy {
 
     int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes(
         scope, excludedNodes);
+    int refreshCounter = numOfAvailableNodes;
     StringBuilder builder = null;
     if (LOG.isDebugEnabled()) {
       builder = debugLoggingBuilder.get();
@@ -708,6 +709,17 @@ public class BlockPlacementPolicyDefault extends 
BlockPlacementPolicy {
         // If no candidate storage was found on this DN then set badTarget.
         badTarget = (storage == null);
       }
+      // Refresh the node count. If the live node count became smaller,
+      // but it is not reflected in this loop, it may loop forever in case
+      // the replicas/rack cannot be satisfied.
+      if (--refreshCounter == 0) {
+        refreshCounter = clusterMap.countNumOfAvailableNodes(scope,
+            excludedNodes);
+        // It has already gone through enough number of nodes.
+        if (refreshCounter <= excludedNodes.size()) {
+          break;
+        }
+      }
     }
       
     if (numOfReplicas>0) {

Reply via email to