YARN-3697. FairScheduler: ContinuousSchedulingThread can fail to shutdown. (Zhihai Xu via kasha)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/332b520a Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/332b520a Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/332b520a Branch: refs/heads/YARN-1197 Commit: 332b520a480994b7bd56c135f7941aad30b05e9c Parents: 81df7b5 Author: Karthik Kambatla <ka...@apache.org> Authored: Sun Sep 13 18:07:43 2015 -0700 Committer: Karthik Kambatla <ka...@apache.org> Committed: Sun Sep 13 18:07:43 2015 -0700 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 ++ .../hadoop/yarn/event/TestAsyncDispatcher.java | 2 ++ .../scheduler/fair/FairScheduler.java | 7 +++++ .../scheduler/fair/TestFairScheduler.java | 31 ++++++++++++++++++++ 4 files changed, 43 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/332b520a/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 4a3a666..e4255c0 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -900,6 +900,9 @@ Release 2.7.2 - UNRELEASED YARN-4136. LinuxContainerExecutor loses info when forwarding ResourceHandlerException. (Bibin A Chundatt via vvasudev) + YARN-3697. FairScheduler: ContinuousSchedulingThread can fail to shutdown. + (Zhihai Xu via kasha) + Release 2.7.1 - 2015-07-06 http://git-wip-us.apache.org/repos/asf/hadoop/blob/332b520a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java index ba0deff..018096b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java @@ -50,7 +50,9 @@ public class TestAsyncDispatcher { disp.waitForEventThreadToWait(); try { disp.getEventHandler().handle(event); + Assert.fail("Expected YarnRuntimeException"); } catch (YarnRuntimeException e) { + Assert.assertTrue(e.getCause() instanceof InterruptedException); } // Queue should be empty and dispatcher should not hang on close Assert.assertTrue("Event Queue should have been empty", http://git-wip-us.apache.org/repos/asf/hadoop/blob/332b520a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 5243fb3..3a39799 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -1043,6 +1043,13 @@ public class FairScheduler extends } catch (Throwable ex) { LOG.error("Error while attempting scheduling for node " + node + ": " + ex.toString(), ex); + if ((ex instanceof YarnRuntimeException) && + (ex.getCause() instanceof InterruptedException)) { + // AsyncDispatcher translates InterruptedException to + // YarnRuntimeException with cause InterruptedException. + // Need to throw InterruptedException to stop schedulingThread. + throw (InterruptedException)ex.getCause(); + } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/332b520a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index c352cc9..a02cf18 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -27,7 +27,10 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Matchers.isA; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; import java.io.File; @@ -4320,6 +4323,34 @@ public class TestFairScheduler extends FairSchedulerTestBase { } @Test + public void testContinuousSchedulingInterruptedException() + throws Exception { + scheduler.init(conf); + scheduler.start(); + FairScheduler spyScheduler = spy(scheduler); + Assert.assertTrue("Continuous scheduling should be disabled.", + !spyScheduler.isContinuousSchedulingEnabled()); + // Add one nodes + RMNode node1 = + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + spyScheduler.handle(nodeEvent1); + Assert.assertEquals("We should have one alive node.", + 1, spyScheduler.getNumClusterNodes()); + InterruptedException ie = new InterruptedException(); + doThrow(new YarnRuntimeException(ie)).when(spyScheduler). + attemptScheduling(isA(FSSchedulerNode.class)); + // Invoke the continuous scheduling once + try { + spyScheduler.continuousSchedulingAttempt(); + fail("Expected InterruptedException to stop schedulingThread"); + } catch (InterruptedException e) { + Assert.assertEquals(ie, e); + } + } + + @Test public void testSchedulingOnRemovedNode() throws Exception { // Disable continuous scheduling, will invoke continuous scheduling manually scheduler.init(conf);