[ 
https://issues.apache.org/jira/browse/YARN-4000?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14802698#comment-14802698
 ] 

Jian He commented on YARN-4000:
-------------------------------

-  is this if condition a typo ? 
{code}
if (event.getDiagnosticMsg().isEmpty())

app.appDiagnosticsBeforeKilling =
    event.getDiagnosticMsg().isEmpty() ? 
getAppKilledDiagnostics() : event.getDiagnosticMsg();
{code}
Instead of introducing the appDiagnosticsBeforeKilling filed in RMAppImpl, I 
suggest doing below changes in RMAppImpl and RMAppAttemptImpl

{code}
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index ea9aa70..dc46326 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -1112,7 +1112,7 @@ private void 
rememberTargetTransitionsAndStoreState(RMAppEvent event,
       diags = getAppAttemptFailedDiagnostics(failedEvent);
       break;
     case ATTEMPT_KILLED:
-      diags = getAppKilledDiagnostics();
+      diags = event.getDiagnostics();
       break;
     default:
       break;
@@ -1209,21 +1209,17 @@ public AppKilledTransition() {
 
     @Override
     public void transition(RMAppImpl app, RMAppEvent event) {
-      app.diagnostics.append(getAppKilledDiagnostics());
+      app.diagnostics.append(event.getDiagnostics());
       super.transition(app, event);
     };
   }
 
-  private static String getAppKilledDiagnostics() {
-    return "Application killed by user.";
-  }
-
   private static class KillAttemptTransition extends RMAppTransition {
     @Override
     public void transition(RMAppImpl app, RMAppEvent event) {
       app.stateBeforeKilling = app.getState();
       app.handler.handle(new RMAppAttemptEvent(app.currentAttempt
-        .getAppAttemptId(), RMAppAttemptEventType.KILL));
+        .getAppAttemptId(), RMAppAttemptEventType.KILL, 
event.getDiagnostics()));
     }
   }
 
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 629b2a3..d4f254e 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -1270,8 +1270,7 @@ public void transition(RMAppAttemptImpl appAttempt,
           appAttempt.invalidateAMHostAndPort();
           appEvent =
               new RMAppFailedAttemptEvent(applicationId,
-                  RMAppEventType.ATTEMPT_KILLED,
-                  "Application killed by user.", false);
+                  RMAppEventType.ATTEMPT_KILLED, event.getDiagnostics(), 
false);
         }
         break;
         case FAILED:

{code}
- random sleep may be flicky, use {{MockRM#waitForState(ApplicationId appId, 
RMAppState finalState)}} instead
{code}
// Wait for app and attempt to be killed.
Thread.sleep(1000);
{code}

> RM crashes with NPE if leaf queue becomes parent queue during restart
> ---------------------------------------------------------------------
>
>                 Key: YARN-4000
>                 URL: https://issues.apache.org/jira/browse/YARN-4000
>             Project: Hadoop YARN
>          Issue Type: Bug
>          Components: capacityscheduler, resourcemanager
>    Affects Versions: 2.6.0
>            Reporter: Jason Lowe
>            Assignee: Varun Saxena
>         Attachments: YARN-4000.01.patch, YARN-4000.02.patch, 
> YARN-4000.03.patch
>
>
> This is a similar situation to YARN-2308.  If an application is active in 
> queue A and then the RM restarts with a changed capacity scheduler 
> configuration where queue A becomes a parent queue to other subqueues then 
> the RM will crash with a NullPointerException.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to