[ https://issues.apache.org/jira/browse/YARN-4000?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14802698#comment-14802698 ]
Jian He edited comment on YARN-4000 at 9/17/15 10:04 AM: --------------------------------------------------------- - is this if condition a typo ? {code} if (event.getDiagnosticMsg().isEmpty()) app.appDiagnosticsBeforeKilling = event.getDiagnosticMsg().isEmpty() ? getAppKilledDiagnostics() : event.getDiagnosticMsg(); {code} Instead of introducing the appDiagnosticsBeforeKilling filed in RMAppImpl, I suggest doing below changes in RMAppImpl and RMAppAttemptImpl, the idea is to send the diagnostics from app to attempt and let attempt send it back. {code} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index ea9aa70..dc46326 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -1112,7 +1112,7 @@ private void rememberTargetTransitionsAndStoreState(RMAppEvent event, diags = getAppAttemptFailedDiagnostics(failedEvent); break; case ATTEMPT_KILLED: - diags = getAppKilledDiagnostics(); + diags = event.getDiagnostics(); break; default: break; @@ -1209,21 +1209,17 @@ public AppKilledTransition() { @Override public void transition(RMAppImpl app, RMAppEvent event) { - app.diagnostics.append(getAppKilledDiagnostics()); + app.diagnostics.append(event.getDiagnostics()); super.transition(app, event); }; } - private static String getAppKilledDiagnostics() { - return "Application killed by user."; - } - private static class KillAttemptTransition extends RMAppTransition { @Override public void transition(RMAppImpl app, RMAppEvent event) { app.stateBeforeKilling = app.getState(); app.handler.handle(new RMAppAttemptEvent(app.currentAttempt - .getAppAttemptId(), RMAppAttemptEventType.KILL)); + .getAppAttemptId(), RMAppAttemptEventType.KILL, event.getDiagnostics())); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 629b2a3..d4f254e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -1270,8 +1270,7 @@ public void transition(RMAppAttemptImpl appAttempt, appAttempt.invalidateAMHostAndPort(); appEvent = new RMAppFailedAttemptEvent(applicationId, - RMAppEventType.ATTEMPT_KILLED, - "Application killed by user.", false); + RMAppEventType.ATTEMPT_KILLED, event.getDiagnostics(), false); } break; case FAILED: {code} - random sleep may be flicky, use {{MockRM#waitForState(ApplicationId appId, RMAppState finalState)}} instead {code} // Wait for app and attempt to be killed. Thread.sleep(1000); {code} was (Author: jianhe): - is this if condition a typo ? {code} if (event.getDiagnosticMsg().isEmpty()) app.appDiagnosticsBeforeKilling = event.getDiagnosticMsg().isEmpty() ? getAppKilledDiagnostics() : event.getDiagnosticMsg(); {code} Instead of introducing the appDiagnosticsBeforeKilling filed in RMAppImpl, I suggest doing below changes in RMAppImpl and RMAppAttemptImpl {code} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index ea9aa70..dc46326 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -1112,7 +1112,7 @@ private void rememberTargetTransitionsAndStoreState(RMAppEvent event, diags = getAppAttemptFailedDiagnostics(failedEvent); break; case ATTEMPT_KILLED: - diags = getAppKilledDiagnostics(); + diags = event.getDiagnostics(); break; default: break; @@ -1209,21 +1209,17 @@ public AppKilledTransition() { @Override public void transition(RMAppImpl app, RMAppEvent event) { - app.diagnostics.append(getAppKilledDiagnostics()); + app.diagnostics.append(event.getDiagnostics()); super.transition(app, event); }; } - private static String getAppKilledDiagnostics() { - return "Application killed by user."; - } - private static class KillAttemptTransition extends RMAppTransition { @Override public void transition(RMAppImpl app, RMAppEvent event) { app.stateBeforeKilling = app.getState(); app.handler.handle(new RMAppAttemptEvent(app.currentAttempt - .getAppAttemptId(), RMAppAttemptEventType.KILL)); + .getAppAttemptId(), RMAppAttemptEventType.KILL, event.getDiagnostics())); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 629b2a3..d4f254e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -1270,8 +1270,7 @@ public void transition(RMAppAttemptImpl appAttempt, appAttempt.invalidateAMHostAndPort(); appEvent = new RMAppFailedAttemptEvent(applicationId, - RMAppEventType.ATTEMPT_KILLED, - "Application killed by user.", false); + RMAppEventType.ATTEMPT_KILLED, event.getDiagnostics(), false); } break; case FAILED: {code} - random sleep may be flicky, use {{MockRM#waitForState(ApplicationId appId, RMAppState finalState)}} instead {code} // Wait for app and attempt to be killed. Thread.sleep(1000); {code} > RM crashes with NPE if leaf queue becomes parent queue during restart > --------------------------------------------------------------------- > > Key: YARN-4000 > URL: https://issues.apache.org/jira/browse/YARN-4000 > Project: Hadoop YARN > Issue Type: Bug > Components: capacityscheduler, resourcemanager > Affects Versions: 2.6.0 > Reporter: Jason Lowe > Assignee: Varun Saxena > Attachments: YARN-4000.01.patch, YARN-4000.02.patch, > YARN-4000.03.patch > > > This is a similar situation to YARN-2308. If an application is active in > queue A and then the RM restarts with a changed capacity scheduler > configuration where queue A becomes a parent queue to other subqueues then > the RM will crash with a NullPointerException. -- This message was sent by Atlassian JIRA (v6.3.4#6332)