YARN-4477. FairScheduler: Handle condition which can result in an infinite loop in attemptScheduling. (Tao Jie via asuresh)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e88422df Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e88422df Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e88422df Branch: refs/heads/yarn-2877 Commit: e88422df45550f788ae8dd73aec84bde28012aeb Parents: 0087734 Author: Arun Suresh <asur...@apache.org> Authored: Mon Dec 21 22:41:09 2015 -0800 Committer: Arun Suresh <asur...@apache.org> Committed: Mon Dec 21 22:41:09 2015 -0800 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 ++ .../scheduler/fair/FSAppAttempt.java | 12 ++++--- .../scheduler/fair/TestFairScheduler.java | 38 ++++++++++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e88422df/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c306c04..ab4634a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1173,6 +1173,9 @@ Release 2.8.0 - UNRELEASED YARN-4454. NM to nodelabel mapping going wrong after RM restart. (Bibin A Chundatt via wangda) + YARN-4477. FairScheduler: Handle condition which can result in an + infinite loop in attemptScheduling. (Tao Jie via asuresh) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/e88422df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index 3778cba..5f753dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -458,8 +458,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt * the container is {@code alreadyReserved} on the node, simply * update relevant bookeeping. This dispatches ro relevant handlers * in {@link FSSchedulerNode}.. + * return whether reservation was possible with the current threshold limits */ - private void reserve(Priority priority, FSSchedulerNode node, + private boolean reserve(Priority priority, FSSchedulerNode node, Container container, NodeType type, boolean alreadyReserved) { if (!reservationExceedsThreshold(node, type)) { @@ -477,7 +478,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt node.reserveResource(this, priority, rmContainer); setReservation(node); } + return true; } + return false; } private boolean reservationExceedsThreshold(FSSchedulerNode node, @@ -627,10 +630,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt return container.getResource(); } - if (isReservable(container)) { - // The desired container won't fit here, so reserve - reserve(request.getPriority(), node, container, type, reserved); - + // The desired container won't fit here, so reserve + if (isReservable(container) && + reserve(request.getPriority(), node, container, type, reserved)) { return FairScheduler.CONTAINER_RESERVED; } else { if (LOG.isDebugEnabled()) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/e88422df/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 2f48380..430eba7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -981,6 +981,43 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.getSchedulerApp(attId).getNumReservations(null, true)); } + @Test (timeout = 5000) + public void testReservationThresholdWithAssignMultiple() throws Exception { + // set reservable-nodes to 0 which make reservation exceed + conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 0f); + conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true); + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add two node + RMNode node1 = + MockNodes + .newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + RMNode node2 = + MockNodes + .newNodeInfo(2, Resources.createResource(4096, 4), 1, "127.0.0.2"); + NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); + scheduler.handle(nodeEvent2); + + //create one request and assign containers + ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 10); + scheduler.update(); + scheduler.handle(new NodeUpdateSchedulerEvent(node1)); + scheduler.update(); + scheduler.handle(new NodeUpdateSchedulerEvent(node2)); + + // Verify capacity allocation + assertEquals(8192, scheduler.getQueueManager().getQueue("queue1"). + getResourceUsage().getMemory()); + + // Verify number of reservations have decremented + assertEquals(0, + scheduler.getSchedulerApp(attId).getNumReservations(null, true)); + } + @Test (timeout = 500000) public void testContainerReservationAttemptExceedingQueueMax() throws Exception { @@ -4152,6 +4189,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { @Test public void testQueueMaxAMShareWithContainerReservation() throws Exception { conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 1f); PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); out.println("<?xml version=\"1.0\"?>"); out.println("<allocations>");