Author: jlowe Date: Tue Nov 13 15:59:13 2012 New Revision: 1408812 URL: http://svn.apache.org/viewvc?rev=1408812&view=rev Log: YARN-212. NM state machine ignores an APPLICATION_CONTAINER_FINISHED event when it shouldn't. Contributed by Nathan Roberts
Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1408812&r1=1408811&r2=1408812&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Tue Nov 13 15:59:13 2012 @@ -216,6 +216,9 @@ Release 0.23.5 - UNRELEASED YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails. (jlowe via jeagles) + YARN-212. NM state machine ignores an APPLICATION_CONTAINER_FINISHED event + when it shouldn't (Nathan Roberts via jlowe) + Release 0.23.4 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java?rev=1408812&r1=1408811&r2=1408812&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java Tue Nov 13 15:59:13 2012 @@ -144,6 +144,9 @@ public class ApplicationImpl implements ApplicationEventType.FINISH_APPLICATION, new AppFinishTriggeredTransition()) .addTransition(ApplicationState.INITING, ApplicationState.INITING, + ApplicationEventType.APPLICATION_CONTAINER_FINISHED, + CONTAINER_DONE_TRANSITION) + .addTransition(ApplicationState.INITING, ApplicationState.INITING, ApplicationEventType.APPLICATION_LOG_HANDLING_INITED, new AppLogInitDoneTransition()) .addTransition(ApplicationState.INITING, ApplicationState.RUNNING, Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java?rev=1408812&r1=1408811&r2=1408812&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java Tue Nov 13 15:59:13 2012 @@ -278,6 +278,8 @@ public class ContainerImpl implements Co .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.KILL_CONTAINER) .addTransition(ContainerState.DONE, ContainerState.DONE, + ContainerEventType.INIT_CONTAINER) + .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java?rev=1408812&r1=1408811&r2=1408812&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java Tue Nov 13 15:59:13 2012 @@ -155,6 +155,60 @@ public class TestApplication { } } + /** + * Finished containers properly tracked when only container finishes in APP_INITING + */ + @Test + public void testContainersCompleteDuringAppInit1() { + WrappedApplication wa = null; + try { + wa = new WrappedApplication(3, 314159265358979L, "yak", 1); + wa.initApplication(); + wa.initContainer(-1); + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.containerFinished(0); + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.applicationInited(); + assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState()); + assertEquals(0, wa.app.getContainers().size()); + } finally { + if (wa != null) + wa.finished(); + } + } + + /** + * Finished containers properly tracked when 1 of several containers finishes in APP_INITING + */ + @Test + public void testContainersCompleteDuringAppInit2() { + WrappedApplication wa = null; + try { + wa = new WrappedApplication(3, 314159265358979L, "yak", 3); + wa.initApplication(); + wa.initContainer(-1); + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.containerFinished(0); + + assertEquals(ApplicationState.INITING, wa.app.getApplicationState()); + + wa.applicationInited(); + assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState()); + assertEquals(2, wa.app.getContainers().size()); + + wa.containerFinished(1); + wa.containerFinished(2); + assertEquals(ApplicationState.RUNNING, wa.app.getApplicationState()); + assertEquals(0, wa.app.getContainers().size()); + } finally { + if (wa != null) + wa.finished(); + } + } + @Test @SuppressWarnings("unchecked") public void testAppFinishedOnRunningContainers() { Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java?rev=1408812&r1=1408811&r2=1408812&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java Tue Nov 13 15:59:13 2012 @@ -56,6 +56,8 @@ import org.apache.hadoop.yarn.event.Disp import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; @@ -65,6 +67,8 @@ import org.apache.hadoop.yarn.server.nod import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationRequestEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; @@ -208,6 +212,32 @@ public class TestContainer { } } } + + @Test + @SuppressWarnings("unchecked") // mocked generic + public void testInitWhileDone() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(6, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + wc.launchContainer(); + reset(wc.localizerBus); + wc.containerSuccessful(); + wc.containerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + // Now in DONE, issue INIT + wc.initContainer(); + // Verify still in DONE + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + verifyCleanupCall(wc); + } + finally { + if (wc != null) { + wc.finished(); + } + } + } @Test @SuppressWarnings("unchecked") // mocked generic @@ -506,6 +536,8 @@ public class TestContainer { final EventHandler<ContainersLauncherEvent> launcherBus; final EventHandler<ContainersMonitorEvent> monitorBus; final EventHandler<AuxServicesEvent> auxBus; + final EventHandler<ApplicationEvent> appBus; + final EventHandler<LogHandlerEvent> LogBus; final ContainerLaunchContext ctxt; final ContainerId cId; @@ -527,10 +559,14 @@ public class TestContainer { launcherBus = mock(EventHandler.class); monitorBus = mock(EventHandler.class); auxBus = mock(EventHandler.class); + appBus = mock(EventHandler.class); + LogBus = mock(EventHandler.class); dispatcher.register(LocalizationEventType.class, localizerBus); dispatcher.register(ContainersLauncherEventType.class, launcherBus); dispatcher.register(ContainersMonitorEventType.class, monitorBus); dispatcher.register(AuxServicesEventType.class, auxBus); + dispatcher.register(ApplicationEventType.class, appBus); + dispatcher.register(LogHandlerEventType.class, LogBus); this.user = user; ctxt = mock(ContainerLaunchContext.class); @@ -654,6 +690,11 @@ public class TestContainer { ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS)); drainDispatcherEvents(); } + public void containerResourcesCleanup() { + c.handle(new ContainerEvent(cId, + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + drainDispatcherEvents(); + } public void containerFailed(int exitCode) { c.handle(new ContainerExitEvent(cId, Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java?rev=1408812&r1=1408811&r2=1408812&view=diff ============================================================================== --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java Tue Nov 13 15:59:13 2012 @@ -319,6 +319,7 @@ public class TestLogAggregationService e this.user, null, ContainerLogsRetentionPolicy.AM_AND_FAILED_CONTAINERS_ONLY, this.acls)); + dispatcher.await(); ApplicationEvent expectedInitEvents[] = new ApplicationEvent[]{ new ApplicationEvent( application1,