This is an automated email from the ASF dual-hosted git repository. sunilg pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new c1ec516 YARN-8373. RM Received RMFatalEvent of type CRITICAL_THREAD_CRASH. Contributed by Wilfred Spiegelenburg. c1ec516 is described below commit c1ec51696cc7d9ba2e5d9a010f1381e3c71115e8 Author: Sunil G <sun...@apache.org> AuthorDate: Tue Nov 19 14:10:41 2019 +0530 YARN-8373. RM Received RMFatalEvent of type CRITICAL_THREAD_CRASH. Contributed by Wilfred Spiegelenburg. (cherry picked from commit ea68756c0cd3e6e20b8e7045a8b7edd180ea4708) --- .../resourcemanager/scheduler/ClusterNodeTracker.java | 18 +++++++++--------- .../resourcemanager/scheduler/fair/FairScheduler.java | 15 +++++++++------ .../scheduler/fair/TestContinuousScheduling.java | 2 -- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java index 19f9c68..16a5e0a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java @@ -33,12 +33,12 @@ import org.apache.hadoop.yarn.util.resource.ResourceUtils; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -386,21 +386,21 @@ public class ClusterNodeTracker<N extends SchedulerNode> { /** * Convenience method to sort nodes. + * Nodes can change while being sorted. Using a standard sort will fail + * without locking each node, the TreeSet handles this without locks. * - * Note that the sort is performed without holding a lock. We are sorting - * here instead of on the caller to allow for future optimizations (e.g. - * sort once every x milliseconds). + * @param comparator the comparator to sort the nodes with + * @return sorted set of nodes in the form of a TreeSet */ - public List<N> sortedNodeList(Comparator<N> comparator) { - List<N> sortedList = null; + public TreeSet<N> sortedNodeSet(Comparator<N> comparator) { + TreeSet<N> sortedSet = new TreeSet<>(comparator); readLock.lock(); try { - sortedList = new ArrayList(nodes.values()); + sortedSet.addAll(nodes.values()); } finally { readLock.unlock(); } - Collections.sort(sortedList, comparator); - return sortedList; + return sortedSet; } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 8ffdc5b..a60e4e0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -111,6 +111,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; @@ -1036,15 +1037,17 @@ public class FairScheduler extends @Deprecated void continuousSchedulingAttempt() throws InterruptedException { long start = getClock().getTime(); - List<FSSchedulerNode> nodeIdList; - // Hold a lock to prevent comparator order changes due to changes of node - // unallocated resources - synchronized (this) { - nodeIdList = nodeTracker.sortedNodeList(nodeAvailableResourceComparator); + TreeSet<FSSchedulerNode> nodeIdSet; + // Hold a lock to prevent node changes as much as possible. + readLock.lock(); + try { + nodeIdSet = nodeTracker.sortedNodeSet(nodeAvailableResourceComparator); + } finally { + readLock.unlock(); } // iterate all nodes - for (FSSchedulerNode node : nodeIdList) { + for (FSSchedulerNode node : nodeIdSet) { try { if (Resources.fitsIn(minimumAllocation, node.getUnallocatedResource())) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java index e6a841a..6760d9c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java @@ -323,9 +323,7 @@ public class TestContinuousScheduling extends FairSchedulerTestBase { for (int j = 0; j < 100; j++) { for (FSSchedulerNode node : clusterNodeTracker.getAllNodes()) { int i = ThreadLocalRandom.current().nextInt(-30, 30); - synchronized (scheduler) { node.deductUnallocatedResource(Resource.newInstance(i * 1024, i)); - } } } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org