HDFS-4937. ReplicationMonitor can infinite-loop in BlockPlacementPolicyDefault#chooseRandom(). Contributed by Kihwal Lee.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ff47f35d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ff47f35d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ff47f35d Branch: refs/heads/HDFS-8707 Commit: ff47f35deed14ba6463cba76f0e6a6c15abb3eca Parents: c563401 Author: Kihwal Lee <kih...@apache.org> Authored: Thu Nov 5 09:25:20 2015 -0600 Committer: Kihwal Lee <kih...@apache.org> Committed: Thu Nov 5 09:25:20 2015 -0600 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../server/blockmanagement/BlockPlacementPolicyDefault.java | 9 +++++++++ 2 files changed, 12 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/ff47f35d/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 910753a..f5c6f0f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2256,6 +2256,9 @@ Release 2.7.3 - UNRELEASED HDFS-9289. Make DataStreamer#block thread safe and verify genStamp in commitBlock. (Chang Li via zhz) + HDFS-4937. ReplicationMonitor can infinite-loop in + BlockPlacementPolicyDefault#chooseRandom(). (kihwal) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/ff47f35d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index 56ebc35..d94179b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -659,6 +659,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes( scope, excludedNodes); + int refreshCounter = numOfAvailableNodes; StringBuilder builder = null; if (LOG.isDebugEnabled()) { builder = debugLoggingBuilder.get(); @@ -708,6 +709,14 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { // If no candidate storage was found on this DN then set badTarget. badTarget = (storage == null); } + // Refresh the node count. If the live node count became smaller, + // but it is not reflected in this loop, it may loop forever in case + // the replicas/rack cannot be satisfied. + if (--refreshCounter == 0) { + numOfAvailableNodes = clusterMap.countNumOfAvailableNodes(scope, + excludedNodes); + refreshCounter = numOfAvailableNodes; + } } if (numOfReplicas>0) {