Author: jeagles Date: Tue Mar 5 23:23:18 2013 New Revision: 1453087 URL: http://svn.apache.org/r1453087 Log: YARN-227. Application expiration difficult to debug for end-users (Jason Lowe via jeagles)
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1453087&r1=1453086&r2=1453087&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Tue Mar 5 23:23:18 2013 @@ -323,6 +323,9 @@ Release 0.23.7 - UNRELEASED YARN-269. Resource Manager not logging the health_check_script result when taking it out (Jason Lowe via kihwal) + YARN-227. Application expiration difficult to debug for end-users + (Jason Lowe via jeagles) + OPTIMIZATIONS YARN-357. App submission should not be synchronized (daryn) Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java?rev=1453087&r1=1453086&r2=1453087&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java Tue Mar 5 23:23:18 2013 @@ -147,6 +147,9 @@ public class RMAppAttemptImpl implements private Configuration conf; + private static final ExpiredTransition EXPIRED_TRANSITION = + new ExpiredTransition(); + private static final StateMachineFactory<RMAppAttemptImpl, RMAppAttemptState, RMAppAttemptEventType, @@ -243,7 +246,7 @@ public class RMAppAttemptImpl implements .addTransition( RMAppAttemptState.LAUNCHED, RMAppAttemptState.FAILED, RMAppAttemptEventType.EXPIRE, - new FinalTransition(RMAppAttemptState.FAILED)) + EXPIRED_TRANSITION) .addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.KILLED, RMAppAttemptEventType.KILL, new FinalTransition(RMAppAttemptState.KILLED)) @@ -268,7 +271,7 @@ public class RMAppAttemptImpl implements .addTransition( RMAppAttemptState.RUNNING, RMAppAttemptState.FAILED, RMAppAttemptEventType.EXPIRE, - new FinalTransition(RMAppAttemptState.FAILED)) + EXPIRED_TRANSITION) .addTransition( RMAppAttemptState.RUNNING, RMAppAttemptState.KILLED, RMAppAttemptEventType.KILL, @@ -491,6 +494,13 @@ public class RMAppAttemptImpl implements } } + private void setTrackingUrlToRMAppPage() { + origTrackingUrl = pjoin( + YarnConfiguration.getRMWebAppHostAndPort(conf), + "cluster", "app", getAppAttemptId().getApplicationId()); + proxiedTrackingUrl = origTrackingUrl; + } + @Override public ClientToken getClientToken() { return this.clientToken; @@ -992,7 +1002,23 @@ public class RMAppAttemptImpl implements } } } - + + private static class ExpiredTransition extends FinalTransition { + + public ExpiredTransition() { + super(RMAppAttemptState.FAILED); + } + + @Override + public void transition(RMAppAttemptImpl appAttempt, + RMAppAttemptEvent event) { + appAttempt.diagnostics.append("ApplicationMaster for attempt " + + appAttempt.getAppAttemptId() + " timed out"); + appAttempt.setTrackingUrlToRMAppPage(); + super.transition(appAttempt, event); + } + } + private static class UnexpectedAMRegisteredTransition extends BaseFinalTransition { @@ -1110,10 +1136,7 @@ public class RMAppAttemptImpl implements // When the AM dies, the trackingUrl is left pointing to the AM's URL, // which shows up in the scheduler UI as a broken link. Direct the // user to the app page on the RM so they can see the status and logs. - appAttempt.origTrackingUrl = pjoin( - YarnConfiguration.getRMWebAppHostAndPort(appAttempt.conf), - "cluster", "app", appAttempt.getAppAttemptId().getApplicationId()); - appAttempt.proxiedTrackingUrl = appAttempt.origTrackingUrl; + appAttempt.setTrackingUrlToRMAppPage(); new FinalTransition(RMAppAttemptState.FAILED).transition( appAttempt, containerFinishedEvent); Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java?rev=1453087&r1=1453086&r2=1453087&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java Tue Mar 5 23:23:18 2013 @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEqu import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -665,6 +666,39 @@ public class TestRMAppAttemptTransitions assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl()); } + @Test(timeout=10000) + public void testLaunchedExpire() { + Container amContainer = allocateApplicationAttempt(); + launchApplicationAttempt(amContainer); + applicationAttempt.handle(new RMAppAttemptEvent( + applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE)); + assertEquals(RMAppAttemptState.FAILED, + applicationAttempt.getAppAttemptState()); + assertTrue("expire diagnostics missing", + applicationAttempt.getDiagnostics().contains("timed out")); + String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app", + applicationAttempt.getAppAttemptId().getApplicationId()); + assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl()); + assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl()); + } + + @Test(timeout=20000) + public void testRunningExpire() { + Container amContainer = allocateApplicationAttempt(); + launchApplicationAttempt(amContainer); + runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl"); + applicationAttempt.handle(new RMAppAttemptEvent( + applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE)); + assertEquals(RMAppAttemptState.FAILED, + applicationAttempt.getAppAttemptState()); + assertTrue("expire diagnostics missing", + applicationAttempt.getDiagnostics().contains("timed out")); + String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app", + applicationAttempt.getAppAttemptId().getApplicationId()); + assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl()); + assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl()); + } + @Test public void testUnregisterToKilledFinishing() { Container amContainer = allocateApplicationAttempt();