Repository: hadoop Updated Branches: refs/heads/trunk 8f195387a -> a0bca2b5a
YARN-261. Ability to fail AM attempts. Contributed by Andrey Klochkov and Rohith Sharma K S Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a0bca2b5 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a0bca2b5 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a0bca2b5 Branch: refs/heads/trunk Commit: a0bca2b5ad2344fda5411d910a3730c85f12a0df Parents: 8f19538 Author: Jason Lowe <jl...@apache.org> Authored: Fri Oct 9 14:17:38 2015 +0000 Committer: Jason Lowe <jl...@apache.org> Committed: Fri Oct 9 14:17:38 2015 +0000 ---------------------------------------------------------------------- .../hadoop/mapred/ResourceMgrDelegate.java | 6 + .../hadoop/mapred/TestClientRedirect.java | 8 ++ hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/api/ApplicationClientProtocol.java | 28 ++++ .../FailApplicationAttemptRequest.java | 61 +++++++++ .../FailApplicationAttemptResponse.java | 45 +++++++ .../main/proto/applicationclient_protocol.proto | 1 + .../src/main/proto/yarn_service_protos.proto | 7 + .../hadoop/yarn/client/api/YarnClient.java | 17 +++ .../yarn/client/api/impl/YarnClientImpl.java | 11 ++ .../hadoop/yarn/client/cli/ApplicationCLI.java | 28 ++++ .../apache/hadoop/yarn/client/cli/YarnCLI.java | 7 +- .../hadoop/yarn/client/cli/TestYarnCLI.java | 14 ++ .../ApplicationClientProtocolPBClientImpl.java | 19 +++ .../ApplicationClientProtocolPBServiceImpl.java | 19 +++ .../pb/FailApplicationAttemptRequestPBImpl.java | 135 +++++++++++++++++++ .../FailApplicationAttemptResponsePBImpl.java | 70 ++++++++++ .../amrmproxy/MockResourceManagerFacade.java | 8 ++ .../server/resourcemanager/ClientRMService.java | 76 +++++++++++ .../server/resourcemanager/RMAuditLogger.java | 1 + .../rmapp/attempt/RMAppAttemptEventType.java | 1 + .../rmapp/attempt/RMAppAttemptImpl.java | 69 +++++++++- .../attempt/event/RMAppAttemptFailedEvent.java | 39 ++++++ .../event/RMAppAttemptRegistrationEvent.java | 2 - .../yarn/server/resourcemanager/MockRM.java | 10 ++ .../server/resourcemanager/TestRMRestart.java | 62 +++++++++ .../attempt/TestRMAppAttemptTransitions.java | 74 ++++++++++ 27 files changed, 815 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index 29266d5..f5fd0cb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -294,6 +294,12 @@ public class ResourceMgrDelegate extends YarnClient { } @Override + public void failApplicationAttempt(ApplicationAttemptId attemptId) + throws YarnException, IOException { + client.failApplicationAttempt(attemptId); + } + + @Override public void killApplication(ApplicationId applicationId) throws YarnException, IOException { client.killApplication(applicationId); http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java index 8febec6..8ab3304 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java @@ -70,6 +70,8 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -328,6 +330,12 @@ public class TestClientRedirect { } @Override + public FailApplicationAttemptResponse failApplicationAttempt( + FailApplicationAttemptRequest request) throws IOException { + return recordFactory.newRecordInstance(FailApplicationAttemptResponse.class); + } + + @Override public KillApplicationResponse forceKillApplication( KillApplicationRequest request) throws IOException { return KillApplicationResponse.newInstance(true); http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 189ae8f..1adedd5 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -226,6 +226,9 @@ Release 2.8.0 - UNRELEASED YARN-1897. CLI and core support for signal container functionality. (Ming Ma via xgong) + YARN-261. Ability to fail AM attempts (Andrey Klochkov and + Rohith Sharma K S via jlowe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java index bcd3ef6..78dc196 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java @@ -24,6 +24,8 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.io.retry.Idempotent; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; @@ -150,6 +152,32 @@ public interface ApplicationClientProtocol extends ApplicationBaseProtocol { /** * <p>The interface used by clients to request the + * <code>ResourceManager</code> to fail an application attempt.</p> + * + * <p>The client, via {@link FailApplicationAttemptRequest} provides the + * {@link ApplicationAttemptId} of the attempt to be failed.</p> + * + * <p> In secure mode,the <code>ResourceManager</code> verifies access to the + * application, queue etc. before failing the attempt.</p> + * + * <p>Currently, the <code>ResourceManager</code> returns an empty response + * on success and throws an exception on rejecting the request.</p> + * + * @param request request to fail an attempt + * @return <code>ResourceManager</code> returns an empty response + * on success and throws an exception on rejecting the request + * @throws YarnException + * @throws IOException + * @see #getQueueUserAcls(GetQueueUserAclsInfoRequest) + */ + @Public + @Unstable + public FailApplicationAttemptResponse failApplicationAttempt( + FailApplicationAttemptRequest request) + throws YarnException, IOException; + + /** + * <p>The interface used by clients to request the * <code>ResourceManager</code> to abort submitted application.</p> * * <p>The client, via {@link KillApplicationRequest} provides the http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptRequest.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptRequest.java new file mode 100644 index 0000000..2d3c7a4 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptRequest.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Stable; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.util.Records; + +/** + * <p>The request sent by the client to the <code>ResourceManager</code> + * to fail an application attempt.</p> + * + * <p>The request includes the {@link ApplicationAttemptId} of the attempt to + * be failed.</p> + * + * @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest) + */ +@Public +@Stable +public abstract class FailApplicationAttemptRequest { + + @Public + @Stable + public static FailApplicationAttemptRequest newInstance( + ApplicationAttemptId attemptId) { + FailApplicationAttemptRequest request = + Records.newRecord(FailApplicationAttemptRequest.class); + request.setApplicationAttemptId(attemptId); + return request; + } + + /** + * Get the <code>ApplicationAttemptId</code> of the attempt to be failed. + * @return <code>ApplicationAttemptId</code> of the attempt. + */ + @Public + @Stable + public abstract ApplicationAttemptId getApplicationAttemptId(); + + @Public + @Stable + public abstract void setApplicationAttemptId( + ApplicationAttemptId applicationAttemptId); +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptResponse.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptResponse.java new file mode 100644 index 0000000..1a31e27 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FailApplicationAttemptResponse.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Stable; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.util.Records; + +/** + * <p>The response sent by the <code>ResourceManager</code> to the client + * failing an application attempt.</p> + * + * <p>Currently it's empty.</p> + * + * @see ApplicationClientProtocol#failApplicationAttempt(FailApplicationAttemptRequest) + */ +@Public +@Stable +public abstract class FailApplicationAttemptResponse { + @Private + @Unstable + public static FailApplicationAttemptResponse newInstance() { + FailApplicationAttemptResponse response = + Records.newRecord(FailApplicationAttemptResponse.class); + return response; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto index a2ab9c2..e98726b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto @@ -35,6 +35,7 @@ service ApplicationClientProtocolService { rpc getNewApplication (GetNewApplicationRequestProto) returns (GetNewApplicationResponseProto); rpc getApplicationReport (GetApplicationReportRequestProto) returns (GetApplicationReportResponseProto); rpc submitApplication (SubmitApplicationRequestProto) returns (SubmitApplicationResponseProto); + rpc failApplicationAttempt (FailApplicationAttemptRequestProto) returns (FailApplicationAttemptResponseProto); rpc forceKillApplication (KillApplicationRequestProto) returns (KillApplicationResponseProto); rpc getClusterMetrics (GetClusterMetricsRequestProto) returns (GetClusterMetricsResponseProto); rpc getApplications (GetApplicationsRequestProto) returns (GetApplicationsResponseProto); http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 15e99f9..a4b9c37 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -122,6 +122,13 @@ message SubmitApplicationRequestProto { message SubmitApplicationResponseProto { } +message FailApplicationAttemptRequestProto { + optional ApplicationAttemptIdProto application_attempt_id = 1; +} + +message FailApplicationAttemptResponseProto { +} + message KillApplicationRequestProto { optional ApplicationIdProto application_id = 1; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java index 7f6a9fc..523698f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java @@ -138,6 +138,23 @@ public abstract class YarnClient extends AbstractService { /** * <p> + * Fail an application attempt identified by given ID. + * </p> + * + * @param applicationAttemptId + * {@link ApplicationAttemptId} of the attempt to fail. + * @throws YarnException + * in case of errors or if YARN rejects the request due to + * access-control restrictions. + * @throws IOException + * @see #getQueueAclsInfo() + */ + public abstract void failApplicationAttempt( + ApplicationAttemptId applicationAttemptId) throws YarnException, + IOException; + + /** + * <p> * Kill an application identified by given ID. * </p> * http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 2bc6143..29fd417 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -42,6 +42,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -383,6 +384,16 @@ public class YarnClientImpl extends YarnClient { } @Override + public void failApplicationAttempt(ApplicationAttemptId attemptId) + throws YarnException, IOException { + LOG.info("Failing application attempt " + attemptId); + FailApplicationAttemptRequest request = + Records.newRecord(FailApplicationAttemptRequest.class); + request.setApplicationAttemptId(attemptId); + rmClient.failApplicationAttempt(request); + } + + @Override public void killApplication(ApplicationId applicationId) throws YarnException, IOException { KillApplicationRequest request = http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index be89ce2..e34675a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -38,6 +38,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; @@ -138,9 +139,11 @@ public class ApplicationCLI extends YarnCLI { "Prints the status of the application attempt."); opts.addOption(LIST_CMD, true, "List application attempts for aplication."); + opts.addOption(FAIL_CMD, true, "Fails application attempt."); opts.addOption(HELP_CMD, false, "Displays help for all commands."); opts.getOption(STATUS_CMD).setArgName("Application Attempt ID"); opts.getOption(LIST_CMD).setArgName("Application ID"); + opts.getOption(FAIL_CMD).setArgName("Application Attempt ID"); } else if (args.length > 0 && args[0].equalsIgnoreCase(CONTAINER)) { title = CONTAINER; opts.addOption(STATUS_CMD, true, @@ -252,6 +255,12 @@ public class ApplicationCLI extends YarnCLI { } moveApplicationAcrossQueues(cliParser.getOptionValue(MOVE_TO_QUEUE_CMD), cliParser.getOptionValue(QUEUE_CMD)); + } else if (cliParser.hasOption(FAIL_CMD)) { + if (!args[0].equalsIgnoreCase(APPLICATION_ATTEMPT)) { + printUsage(title, opts); + return exitCode; + } + failApplicationAttempt(cliParser.getOptionValue(FAIL_CMD)); } else if (cliParser.hasOption(HELP_CMD)) { printUsage(title, opts); return 0; @@ -519,6 +528,25 @@ public class ApplicationCLI extends YarnCLI { } /** + * Fails an application attempt. + * + * @param attemptId ID of the attempt to fail. If provided, applicationId + * parameter is not used. + * @throws YarnException + * @throws IOException + */ + private void failApplicationAttempt(String attemptId) throws YarnException, + IOException { + ApplicationId appId; + ApplicationAttemptId attId; + attId = ConverterUtils.toApplicationAttemptId(attemptId); + appId = attId.getApplicationId(); + + sysout.println("Failing attempt " + attId + " of application " + appId); + client.failApplicationAttempt(attId); + } + + /** * Prints the application report for an application id. * * @param applicationId http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/YarnCLI.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/YarnCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/YarnCLI.java index a0c0148..ac33dea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/YarnCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/YarnCLI.java @@ -33,6 +33,7 @@ public abstract class YarnCLI extends Configured implements Tool { public static final String STATUS_CMD = "status"; public static final String LIST_CMD = "list"; public static final String KILL_CMD = "kill"; + public static final String FAIL_CMD = "fail"; public static final String MOVE_TO_QUEUE_CMD = "movetoqueue"; public static final String HELP_CMD = "help"; public static final String SIGNAL_CMD = "signal"; @@ -42,11 +43,15 @@ public abstract class YarnCLI extends Configured implements Tool { public YarnCLI() { super(new YarnConfiguration()); - client = YarnClient.createYarnClient(); + client = createYarnClient(); client.init(getConf()); client.start(); } + protected YarnClient createYarnClient() { + return YarnClient.createYarnClient(); + } + public void setSysOutPrintStream(PrintStream sysout) { this.sysout = sysout; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java index b72fd2d..069ff7d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; import java.io.ByteArrayOutputStream; @@ -1443,6 +1444,18 @@ public class TestYarnCLI { } + @Test + public void testFailApplicationAttempt() throws Exception { + ApplicationCLI cli = createAndGetAppCLI(); + int exitCode = + cli.run(new String[] { "applicationattempt", "-fail", + "appattempt_1444199730803_0003_000001" }); + Assert.assertEquals(0, exitCode); + + verify(client).failApplicationAttempt(any(ApplicationAttemptId.class)); + verifyNoMoreInteractions(client); + } + private void verifyUsageInfo(YarnCLI cli) throws Exception { cli.setSysErrPrintStream(sysErr); cli.run(new String[] { "application" }); @@ -1527,6 +1540,7 @@ public class TestYarnCLI { ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintWriter pw = new PrintWriter(baos); pw.println("usage: applicationattempt"); + pw.println(" -fail <Application Attempt ID> Fails application attempt."); pw.println(" -help Displays help for all commands."); pw.println(" -list <Application ID> List application attempts for"); pw.println(" aplication."); http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java index 8d7351d..c1e6e9a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java @@ -33,6 +33,8 @@ import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ApplicationClientProtocolPB; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -83,6 +85,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptsRequestPBImpl; @@ -134,6 +138,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRespo import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.proto.YarnServiceProtos; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationAttemptReportRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationAttemptsRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationReportRequestProto; @@ -178,6 +183,20 @@ public class ApplicationClientProtocolPBClientImpl implements ApplicationClientP } @Override + public FailApplicationAttemptResponse failApplicationAttempt( + FailApplicationAttemptRequest request) throws YarnException, IOException { + FailApplicationAttemptRequestProto requestProto = + ((FailApplicationAttemptRequestPBImpl) request).getProto(); + try { + return new FailApplicationAttemptResponsePBImpl(proxy.failApplicationAttempt( + null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } + + @Override public KillApplicationResponse forceKillApplication( KillApplicationRequest request) throws YarnException, IOException { KillApplicationRequestProto requestProto = http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java index b9485a8..2ee88c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java @@ -30,6 +30,7 @@ import org.apache.hadoop.security.proto.SecurityProtos.RenewDelegationTokenRespo import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ApplicationClientProtocolPB; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; @@ -56,6 +57,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptsRequestPBImpl; @@ -104,6 +107,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationReque import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.proto.YarnServiceProtos; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationAttemptReportRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationAttemptReportResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationAttemptsRequestProto; @@ -163,6 +168,20 @@ public class ApplicationClientProtocolPBServiceImpl implements ApplicationClient } @Override + public FailApplicationAttemptResponseProto failApplicationAttempt(RpcController arg0, + FailApplicationAttemptRequestProto proto) throws ServiceException { + FailApplicationAttemptRequestPBImpl request = new FailApplicationAttemptRequestPBImpl(proto); + try { + FailApplicationAttemptResponse response = real.failApplicationAttempt(request); + return ((FailApplicationAttemptResponsePBImpl)response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + @Override public KillApplicationResponseProto forceKillApplication(RpcController arg0, KillApplicationRequestProto proto) throws ServiceException { KillApplicationRequestPBImpl request = new KillApplicationRequestPBImpl(proto); http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptRequestPBImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptRequestPBImpl.java new file mode 100644 index 0000000..d3ce424 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptRequestPBImpl.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationAttemptIdPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptIdProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptRequestProtoOrBuilder; + +import com.google.protobuf.TextFormat; + +@Private +@Unstable +public class FailApplicationAttemptRequestPBImpl extends FailApplicationAttemptRequest { + FailApplicationAttemptRequestProto proto = FailApplicationAttemptRequestProto.getDefaultInstance(); + FailApplicationAttemptRequestProto.Builder builder = null; + boolean viaProto = false; + + private ApplicationAttemptId applicationAttemptId = null; + + + public FailApplicationAttemptRequestPBImpl() { + builder = FailApplicationAttemptRequestProto.newBuilder(); + } + + public FailApplicationAttemptRequestPBImpl(FailApplicationAttemptRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + public FailApplicationAttemptRequestProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) + return false; + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } + + private void mergeLocalToBuilder() { + if (this.applicationAttemptId != null) { + builder.setApplicationAttemptId( + convertToProtoFormat(this.applicationAttemptId)); + } + } + + private void mergeLocalToProto() { + if (viaProto) + maybeInitBuilder(); + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = FailApplicationAttemptRequestProto.newBuilder(proto); + } + viaProto = false; + } + + + @Override + public ApplicationAttemptId getApplicationAttemptId() { + FailApplicationAttemptRequestProtoOrBuilder p = viaProto ? proto : builder; + if (this.applicationAttemptId != null) { + return this.applicationAttemptId; + } + if (!p.hasApplicationAttemptId()) { + return null; + } + this.applicationAttemptId = convertFromProtoFormat( + p.getApplicationAttemptId()); + return this.applicationAttemptId; + } + + @Override + public void setApplicationAttemptId( + ApplicationAttemptId applicationAttemptId) { + maybeInitBuilder(); + if (applicationAttemptId == null) + builder.clearApplicationAttemptId(); + this.applicationAttemptId = applicationAttemptId; + } + + private ApplicationAttemptIdPBImpl convertFromProtoFormat( + ApplicationAttemptIdProto p) { + return new ApplicationAttemptIdPBImpl(p); + } + + private ApplicationAttemptIdProto convertToProtoFormat( + ApplicationAttemptId t) { + return ((ApplicationAttemptIdPBImpl)t).getProto(); + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptResponsePBImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptResponsePBImpl.java new file mode 100644 index 0000000..21471d7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FailApplicationAttemptResponsePBImpl.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptResponseProto; + +import com.google.protobuf.TextFormat; + +@Private +@Unstable +public class FailApplicationAttemptResponsePBImpl extends FailApplicationAttemptResponse { + FailApplicationAttemptResponseProto proto = FailApplicationAttemptResponseProto.getDefaultInstance(); + FailApplicationAttemptResponseProto.Builder builder = null; + boolean viaProto = false; + + public FailApplicationAttemptResponsePBImpl() { + builder = FailApplicationAttemptResponseProto.newBuilder(); + } + + public FailApplicationAttemptResponsePBImpl(FailApplicationAttemptResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public FailApplicationAttemptResponseProto getProto() { + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) + return false; + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java index 0069aaa..c83adee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java @@ -36,6 +36,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; @@ -472,4 +474,10 @@ public class MockResourceManagerFacade implements SignalContainerRequest request) throws IOException { return null; } + + @Override + public FailApplicationAttemptResponse failApplicationAttempt( + FailApplicationAttemptRequest request) throws YarnException, IOException { + throw new NotImplementedException(); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index a69cc68..863c384 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -51,6 +51,8 @@ import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.protocolrecords.ApplicationsRequestScope; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -139,6 +141,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMoveEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeSignalContainerEvent; @@ -616,6 +619,79 @@ public class ClientRMService extends AbstractService implements @SuppressWarnings("unchecked") @Override + public FailApplicationAttemptResponse failApplicationAttempt( + FailApplicationAttemptRequest request) throws YarnException { + + ApplicationAttemptId attemptId = request.getApplicationAttemptId(); + ApplicationId applicationId = attemptId.getApplicationId(); + + UserGroupInformation callerUGI; + try { + callerUGI = UserGroupInformation.getCurrentUser(); + } catch (IOException ie) { + LOG.info("Error getting UGI ", ie); + RMAuditLogger.logFailure("UNKNOWN", AuditConstants.FAIL_ATTEMPT_REQUEST, + "UNKNOWN", "ClientRMService" , "Error getting UGI", + applicationId, attemptId); + throw RPCUtil.getRemoteException(ie); + } + + RMApp application = this.rmContext.getRMApps().get(applicationId); + if (application == null) { + RMAuditLogger.logFailure(callerUGI.getUserName(), + AuditConstants.FAIL_ATTEMPT_REQUEST, "UNKNOWN", "ClientRMService", + "Trying to fail an attempt of an absent application", applicationId, + attemptId); + throw new ApplicationNotFoundException("Trying to fail an attempt " + + attemptId + " of an absent application " + applicationId); + } + + RMAppAttempt appAttempt = application.getAppAttempts().get(attemptId); + if (appAttempt == null) { + throw new ApplicationAttemptNotFoundException( + "ApplicationAttempt with id '" + attemptId + "' doesn't exist in RM."); + } + + if (!checkAccess(callerUGI, application.getUser(), + ApplicationAccessType.MODIFY_APP, application)) { + RMAuditLogger.logFailure(callerUGI.getShortUserName(), + AuditConstants.FAIL_ATTEMPT_REQUEST, + "User doesn't have permissions to " + + ApplicationAccessType.MODIFY_APP.toString(), "ClientRMService", + AuditConstants.UNAUTHORIZED_USER, applicationId); + throw RPCUtil.getRemoteException(new AccessControlException("User " + + callerUGI.getShortUserName() + " cannot perform operation " + + ApplicationAccessType.MODIFY_APP.name() + " on " + applicationId)); + } + + FailApplicationAttemptResponse response = + recordFactory.newRecordInstance(FailApplicationAttemptResponse.class); + + if (!ACTIVE_APP_STATES.contains(application.getState())) { + if (COMPLETED_APP_STATES.contains(application.getState())) { + RMAuditLogger.logSuccess(callerUGI.getShortUserName(), + AuditConstants.FAIL_ATTEMPT_REQUEST, "ClientRMService", + applicationId); + return response; + } + } + + this.rmContext + .getDispatcher() + .getEventHandler() + .handle( + new RMAppAttemptFailedEvent(attemptId, + "Attempt failed by user.")); + + RMAuditLogger.logSuccess(callerUGI.getShortUserName(), + AuditConstants.FAIL_ATTEMPT_REQUEST, "ClientRMService", applicationId, + attemptId); + + return response; + } + + @SuppressWarnings("unchecked") + @Override public KillApplicationResponse forceKillApplication( KillApplicationRequest request) throws YarnException { http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java index 92745b7..8cb7c80 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java @@ -43,6 +43,7 @@ public class RMAuditLogger { static final String KEY_VAL_SEPARATOR = "="; static final char PAIR_SEPARATOR = '\t'; + public static final String FAIL_ATTEMPT_REQUEST = "Fail Attempt Request"; public static final String KILL_APP_REQUEST = "Kill Application Request"; public static final String SUBMIT_APP_REQUEST = "Submit Application Request"; public static final String MOVE_APP_REQUEST = "Move Application Request"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java index ddf782e..534b6b6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java @@ -22,6 +22,7 @@ public enum RMAppAttemptEventType { // Source: RMApp START, KILL, + FAIL, // Source: AMLauncher LAUNCHED, http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 43de3ac..962a810 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -86,6 +86,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptE import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent; @@ -175,6 +176,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private final boolean maybeLastAttempt; private static final ExpiredTransition EXPIRED_TRANSITION = new ExpiredTransition(); + private static final AttemptFailedTransition FAILED_TRANSITION = + new AttemptFailedTransition(); private RMAppAttemptEvent eventCausingFinalSaving; private RMAppAttemptState targetedFinalState; @@ -203,6 +206,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { new FinalSavingTransition(new BaseFinalTransition( RMAppAttemptState.KILLED), RMAppAttemptState.KILLED)) .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) + .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.REGISTERED, new FinalSavingTransition( new UnexpectedAMRegisteredTransition(), RMAppAttemptState.FAILED)) @@ -222,6 +229,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { new FinalSavingTransition(new BaseFinalTransition( RMAppAttemptState.KILLED), RMAppAttemptState.KILLED)) .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) + .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.REGISTERED, new FinalSavingTransition( new UnexpectedAMRegisteredTransition(), RMAppAttemptState.FAILED)) @@ -236,6 +247,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.KILL, new FinalSavingTransition(new BaseFinalTransition( RMAppAttemptState.KILLED), RMAppAttemptState.KILLED)) + .addTransition(RMAppAttemptState.SCHEDULED, RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) .addTransition(RMAppAttemptState.SCHEDULED, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.CONTAINER_FINISHED, @@ -260,6 +275,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { new FinalSavingTransition( new AMContainerCrashedBeforeRunningTransition(), RMAppAttemptState.FAILED)) + .addTransition(RMAppAttemptState.ALLOCATED_SAVING, + RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) // Transitions from LAUNCHED_UNMANAGED_SAVING State .addTransition(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, @@ -278,6 +298,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.KILL, new FinalSavingTransition(new BaseFinalTransition( RMAppAttemptState.KILLED), RMAppAttemptState.KILLED)) + .addTransition(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, + RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) // Transitions from ALLOCATED State .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED, @@ -292,6 +317,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { new KillAllocatedAMTransition(), RMAppAttemptState.KILLED)) .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) + .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.CONTAINER_FINISHED, new FinalSavingTransition( new AMContainerCrashedBeforeRunningTransition(), RMAppAttemptState.FAILED)) @@ -314,6 +343,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.KILL, new FinalSavingTransition(new FinalTransition( RMAppAttemptState.KILLED), RMAppAttemptState.KILLED)) + .addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) // Transitions from RUNNING State .addTransition(RMAppAttemptState.RUNNING, @@ -340,6 +373,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.KILL, new FinalSavingTransition(new FinalTransition( RMAppAttemptState.KILLED), RMAppAttemptState.KILLED)) + .addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.FINAL_SAVING, + RMAppAttemptEventType.FAIL, + new FinalSavingTransition(FAILED_TRANSITION, + RMAppAttemptState.FAILED)) // Transitions from FINAL_SAVING State .addTransition(RMAppAttemptState.FINAL_SAVING, @@ -363,7 +400,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { // Saving in scheduler RMAppAttemptEventType.CONTAINER_ALLOCATED, RMAppAttemptEventType.ATTEMPT_NEW_SAVED, - RMAppAttemptEventType.KILL)) + RMAppAttemptEventType.KILL, + RMAppAttemptEventType.FAIL)) // Transitions from FAILED State // For work-preserving AM restart, failed attempt are still capturing @@ -378,6 +416,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { EnumSet.of( RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.KILL, + RMAppAttemptEventType.FAIL, RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.STATUS_UPDATE, RMAppAttemptEventType.CONTAINER_ALLOCATED)) @@ -397,7 +436,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.CONTAINER_ALLOCATED, // ignore Kill as we have already saved the final Finished state in // state store. - RMAppAttemptEventType.KILL)) + RMAppAttemptEventType.KILL, + RMAppAttemptEventType.FAIL)) // Transitions from FINISHED State .addTransition( @@ -407,7 +447,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.CONTAINER_ALLOCATED, - RMAppAttemptEventType.KILL)) + RMAppAttemptEventType.KILL, + RMAppAttemptEventType.FAIL)) .addTransition(RMAppAttemptState.FINISHED, RMAppAttemptState.FINISHED, RMAppAttemptEventType.CONTAINER_FINISHED, @@ -425,6 +466,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppAttemptEventType.CONTAINER_ALLOCATED, RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.KILL, + RMAppAttemptEventType.FAIL, RMAppAttemptEventType.STATUS_UPDATE)) .addTransition(RMAppAttemptState.KILLED, RMAppAttemptState.KILLED, @@ -1175,6 +1217,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { break; case KILL: break; + case FAIL: + RMAppAttemptFailedEvent failEvent = + (RMAppAttemptFailedEvent) event; + diags = failEvent.getDiagnostics(); + break; case EXPIRE: diags = getAMExpiredDiagnostics(event); break; @@ -1321,6 +1368,22 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { } } + private static class AttemptFailedTransition extends BaseFinalTransition { + + public AttemptFailedTransition() { + super(RMAppAttemptState.FAILED); + } + + @Override + public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { + RMAppAttemptFailedEvent failedEvent = (RMAppAttemptFailedEvent) event; + if (failedEvent.getDiagnostics() != null) { + appAttempt.diagnostics.append(failedEvent.getDiagnostics()); + } + super.transition(appAttempt, event); + } + } + private static class AMLaunchedTransition extends BaseTransition { @Override public void transition(RMAppAttemptImpl appAttempt, http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptFailedEvent.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptFailedEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptFailedEvent.java new file mode 100644 index 0000000..c698e7d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptFailedEvent.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event; + +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; + +public class RMAppAttemptFailedEvent extends RMAppAttemptEvent { + + private final String diagnostics; + + public RMAppAttemptFailedEvent(ApplicationAttemptId appAttemptId, + String diagnostics) { + super(appAttemptId, RMAppAttemptEventType.FAIL); + this.diagnostics = diagnostics; + } + + public String getDiagnostics() { + return this.diagnostics; + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRegistrationEvent.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRegistrationEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRegistrationEvent.java index 3321c37..addc5c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRegistrationEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRegistrationEvent.java @@ -24,7 +24,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptE public class RMAppAttemptRegistrationEvent extends RMAppAttemptEvent { - private final ApplicationAttemptId appAttemptId; private final String host; private int rpcport; private String trackingurl; @@ -32,7 +31,6 @@ public class RMAppAttemptRegistrationEvent extends RMAppAttemptEvent { public RMAppAttemptRegistrationEvent(ApplicationAttemptId appAttemptId, String host, int rpcPort, String trackingUrl) { super(appAttemptId, RMAppAttemptEventType.REGISTERED); - this.appAttemptId = appAttemptId; this.host = host; this.rpcport = rpcPort; this.trackingurl = trackingUrl; http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 674529e..a2b4438 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -32,6 +32,8 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; @@ -587,6 +589,14 @@ public class MockRM extends ResourceManager { return client.forceKillApplication(req); } + public FailApplicationAttemptResponse failApplicationAttempt( + ApplicationAttemptId attemptId) throws Exception { + ApplicationClientProtocol client = getClientRMService(); + FailApplicationAttemptRequest req = + FailApplicationAttemptRequest.newInstance(attemptId); + return client.failApplicationAttempt(req); + } + // from AMLauncher public MockAM sendAMLaunched(ApplicationAttemptId appAttemptId) throws Exception { http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index de17acd..531a4a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -2170,6 +2170,68 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { rm2.stop(); } + @Test(timeout = 60000) + public void testRMRestartFailAppAttempt() throws Exception { + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); + int maxAttempt = + conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + RMState rmState = memStore.getState(); + Map<ApplicationId, ApplicationStateData> rmAppState = + rmState.getApplicationState(); + + // start RM + MockRM rm1 = createMockRM(conf, memStore); + rm1.start(); + MockNM nm1 = + new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService()); + nm1.registerNode(); + + // create app and launch the AM + RMApp app0 = rm1.submitApp(200); + MockAM am0 = launchAM(app0, rm1, nm1); + + ApplicationId applicationId = app0.getApplicationId(); + ApplicationAttemptId appAttemptId1 = + app0.getCurrentAppAttempt().getAppAttemptId(); + Assert.assertEquals(1, appAttemptId1.getAttemptId()); + + // fail the 1st app attempt. + rm1.failApplicationAttempt(appAttemptId1); + + rm1.waitForState(appAttemptId1, RMAppAttemptState.FAILED); + rm1.waitForState(applicationId, RMAppState.ACCEPTED); + + ApplicationAttemptId appAttemptId2 = + app0.getCurrentAppAttempt().getAppAttemptId(); + Assert.assertEquals(2, appAttemptId2.getAttemptId()); + rm1.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED); + + // restart rm + MockRM rm2 = createMockRM(conf, memStore); + rm2.start(); + RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(applicationId); + rm2.waitForState(applicationId, RMAppState.ACCEPTED); + rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED); + + + Assert.assertEquals(2, loadedApp0.getAppAttempts().size()); + rm2.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED); + + appAttemptId2 = loadedApp0.getCurrentAppAttempt().getAppAttemptId(); + Assert.assertEquals(2, appAttemptId2.getAttemptId()); + + // fail 2nd attempt + rm2.failApplicationAttempt(appAttemptId2); + + rm2.waitForState(appAttemptId2, RMAppAttemptState.FAILED); + rm2.waitForState(applicationId, RMAppState.FAILED); + Assert.assertEquals(maxAttempt, loadedApp0.getAppAttempts().size()); + } + private <E> Set<E> toSet(E... elements) { Set<E> set = Sets.newHashSet(elements); return set; http://git-wip-us.apache.org/repos/asf/hadoop/blob/a0bca2b5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 828e149..9cc6a23 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -90,6 +90,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; @@ -135,6 +136,7 @@ public class TestRMAppAttemptTransitions { LogFactory.getLog(TestRMAppAttemptTransitions.class); private static final String EMPTY_DIAGNOSTICS = ""; + private static final String FAILED_DIAGNOSTICS = "Attempt failed by user."; private static final String RM_WEBAPP_ADDR = WebAppUtils.getResolvedRMWebAppURLWithScheme(new Configuration()); @@ -1542,6 +1544,78 @@ public class TestRMAppAttemptTransitions { (RMAppAttemptImpl) applicationAttempt, null); } + @Test(timeout = 30000) + public void testNewToFailed() { + applicationAttempt.handle(new RMAppAttemptFailedEvent(applicationAttempt + .getAppAttemptId(), FAILED_DIAGNOSTICS)); + assertEquals(YarnApplicationAttemptState.NEW, + applicationAttempt.createApplicationAttemptState()); + testAppAttemptFailedState(null, FAILED_DIAGNOSTICS); + verifyTokenCount(applicationAttempt.getAppAttemptId(), 1); + } + + @Test(timeout = 30000) + public void testSubmittedToFailed() { + submitApplicationAttempt(); + applicationAttempt.handle(new RMAppAttemptFailedEvent(applicationAttempt + .getAppAttemptId(), FAILED_DIAGNOSTICS)); + assertEquals(YarnApplicationAttemptState.SUBMITTED, + applicationAttempt.createApplicationAttemptState()); + testAppAttemptFailedState(null, FAILED_DIAGNOSTICS); + } + + @Test(timeout = 30000) + public void testScheduledToFailed() { + scheduleApplicationAttempt(); + applicationAttempt.handle(new RMAppAttemptFailedEvent(applicationAttempt + .getAppAttemptId(), FAILED_DIAGNOSTICS)); + assertEquals(YarnApplicationAttemptState.SCHEDULED, + applicationAttempt.createApplicationAttemptState()); + testAppAttemptFailedState(null, FAILED_DIAGNOSTICS); + } + + @Test(timeout = 30000) + public void testAllocatedToFailedUserTriggeredFailEvent() { + Container amContainer = allocateApplicationAttempt(); + assertEquals(YarnApplicationAttemptState.ALLOCATED, + applicationAttempt.createApplicationAttemptState()); + applicationAttempt.handle(new RMAppAttemptFailedEvent(applicationAttempt + .getAppAttemptId(), FAILED_DIAGNOSTICS)); + testAppAttemptFailedState(amContainer, FAILED_DIAGNOSTICS); + } + + @Test(timeout = 30000) + public void testRunningToFailedUserTriggeredFailEvent() { + Container amContainer = allocateApplicationAttempt(); + launchApplicationAttempt(amContainer); + runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false); + applicationAttempt.handle(new RMAppAttemptFailedEvent(applicationAttempt + .getAppAttemptId(), FAILED_DIAGNOSTICS)); + assertEquals(RMAppAttemptState.FINAL_SAVING, + applicationAttempt.getAppAttemptState()); + + sendAttemptUpdateSavedEvent(applicationAttempt); + assertEquals(RMAppAttemptState.FAILED, + applicationAttempt.getAppAttemptState()); + + NodeId anyNodeId = NodeId.newInstance("host", 1234); + applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( + applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( + amContainer.getId(), ContainerState.COMPLETE, "", 0, + amContainer.getResource()), anyNodeId)); + + assertEquals(1, applicationAttempt.getJustFinishedContainers().size()); + assertEquals(amContainer, applicationAttempt.getMasterContainer()); + assertEquals(0, application.getRanNodes().size()); + String rmAppPageUrl = + pjoin(RM_WEBAPP_ADDR, "cluster", "app", applicationAttempt + .getAppAttemptId().getApplicationId()); + assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl()); + assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl()); + verifyAMHostAndPortInvalidated(); + verifyApplicationAttemptFinished(RMAppAttemptState.FAILED); + } + private void verifyAMCrashAtAllocatedDiagnosticInfo(String diagnostics, int exitCode, boolean shouldCheckURL) { assertTrue("Diagnostic information does not point the logs to the users",