SLIDER-570 handling of launch failures
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/73462659 Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/73462659 Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/73462659 Branch: refs/heads/develop Commit: 734626596c0af041c53637ac8053eb50d0a8d169 Parents: 517042f Author: Steve Loughran <ste...@apache.org> Authored: Wed Oct 29 20:49:39 2014 +0000 Committer: Steve Loughran <ste...@apache.org> Committed: Fri Oct 31 11:07:49 2014 +0000 ---------------------------------------------------------------------- .../funtest/framework/CommandTestBase.groovy | 95 ++++++++++++++++++-- .../slider/funtest/framework/SliderShell.groovy | 4 +- .../funtest/lifecycle/AgentFailuresIT.groovy | 6 +- .../lifecycle/AgentLaunchFailureIT.groovy | 95 ++++++++++++++++++++ .../funtest/lifecycle/AgentRegistryIT.groovy | 6 +- 5 files changed, 189 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/73462659/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy index 7b50c60..7928642 100644 --- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy +++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy @@ -30,12 +30,15 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.slider.api.StatusKeys import org.apache.slider.common.tools.ConfigHelper +import org.apache.slider.core.exceptions.SliderException +import org.apache.slider.core.launch.SerializedApplicationReport import org.apache.slider.core.main.ServiceLauncher import org.apache.slider.common.SliderKeys import org.apache.slider.common.SliderXmlConfKeys import org.apache.slider.api.ClusterDescription import org.apache.slider.common.tools.SliderUtils import org.apache.slider.client.SliderClient +import org.apache.slider.core.persist.ApplicationReportSerDeser import org.apache.slider.test.SliderTestUtils import org.junit.Before import org.junit.BeforeClass @@ -369,6 +372,20 @@ abstract class CommandTestBase extends SliderTestUtils { slider(cmd) } + static SliderShell lookup(int result, String id, File out) { + assert id + def commands = [ACTION_LOOKUP, ARG_ID, id] + if (out) commands += [ARG_OUTPUT, out.absolutePath] + slider(result, commands) + } + + static SliderShell lookup(String id, File out) { + assert id + def commands = [ACTION_LOOKUP, ARG_ID, id] + if (out) commands += [ARG_OUTPUT, out.absolutePath] + slider(commands) + } + static SliderShell list(int result, Collection<String> commands =[]) { slider(result, [ACTION_LIST] + commands ) } @@ -608,11 +625,18 @@ abstract class CommandTestBase extends SliderTestUtils { String name, String appTemplate, String resourceTemplate, - List<String> extraArgs=[]) { + List<String> extraArgs = [], + File launchReport = null) { + + if (!launchReport) { + launchReport = createAppReportFile() + } + List<String> commands = [ ACTION_CREATE, name, ARG_TEMPLATE, appTemplate, ARG_RESOURCES, resourceTemplate, + ARG_OUTPUT, launchReport.absolutePath, ARG_WAIT, Integer.toString(THAW_WAIT_TIME) ] @@ -633,20 +657,35 @@ abstract class CommandTestBase extends SliderTestUtils { shell.execute() if (!shell.execute()) { // app has failed. - + // grab the app report of the last known instance of this app // which may not be there if it was a config failure; may be out of date // from a previous run - log.error("Launch failed with exit code ${shell.ret}.\nLast instance of $name:") - slider([ACTION_LIST, name, ARG_VERBOSE]).dumpOutput() - - // trigger the assertion failure - shell.assertExitCode(EXIT_SUCCESS) + log.error( + "Launch failed with exit code ${shell.ret}") + shell.dumpOutput() + + // now grab that app report if it is there + def appReport = maybeLookupFromLaunchReport(launchReport) + String extraText = "" + if (appReport) { + log.error("Application report:\n$appReport") + extraText = appReport.diagnostics + } + + fail("Application Launch Failure, exit code ${shell.ret}\n${extraText}") } - return shell } + public File createAppReportFile() { + File reportFile = File.createTempFile( + "launch", + ".json", + new File("target")) + return reportFile + } + /** * If the option is not null/empty, add the command and the option * @param args arg list being built up @@ -662,7 +701,47 @@ abstract class CommandTestBase extends SliderTestUtils { } return args } + + public SerializedApplicationReport maybeLoadAppReport(File reportFile) { + if (reportFile.exists() && reportFile.length()> 0) { + ApplicationReportSerDeser serDeser = new ApplicationReportSerDeser() + def report = serDeser.fromFile(reportFile) + return report + } + return null; + } + + public SerializedApplicationReport maybeLookupFromLaunchReport(File launchReport) { + def report = maybeLoadAppReport(launchReport) + if (report) { + return lookupApplication(report.applicationId) + } else { + return null + } + } + + /** + * Lookup an application, return null if loading failed + * @param id application ID + * @return an application report or null + */ + public SerializedApplicationReport lookupApplication(String id) { + File reportFile = createAppReportFile(); + try { + def shell = lookup(id, reportFile) + if (shell.ret) { + return maybeLoadAppReport(reportFile) + } else { + log.warn("Lookup operation failed:\n" + shell.dumpOutput()) + return null + } + } finally { + reportFile.delete() + + } + } + public Path buildClusterPath(String clustername) { return new Path( clusterFS.homeDirectory, http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/73462659/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy index 43ac477..31830d9 100644 --- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy +++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/SliderShell.groovy @@ -223,11 +223,11 @@ class SliderShell extends Shell { * if not the output is printed and an assertion is raised * @param errorCode expected error code */ - public void assertExitCode(int errorCode) { + public void assertExitCode(int errorCode, String extra="") { if (this.ret != errorCode) { dumpOutput() throw new SliderException(ret, - "Expected exit code of command ${command} : ${errorCode} - actual=${ret}") + "Expected exit code of command ${command} : ${errorCode} - actual=${ret} $extra") } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/73462659/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy index a4eb1a2..3847e3f 100644 --- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy +++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy @@ -50,9 +50,9 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { } cleanup(APPLICATION_NAME) - def shell = createTemplatedSliderApplication( APPLICATION_NAME, - APP_TEMPLATE2, - APP_RESOURCE) + def shell = createTemplatedSliderApplication(APPLICATION_NAME, + APP_TEMPLATE2, + APP_RESOURCE) logShell(shell) http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/73462659/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy new file mode 100644 index 0000000..ce1e0f1 --- /dev/null +++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.slider.funtest.lifecycle + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import org.apache.hadoop.registry.client.binding.RegistryUtils +import org.apache.hadoop.registry.client.types.Endpoint +import org.apache.hadoop.registry.client.types.ServiceRecord +import org.apache.slider.api.InternalKeys +import org.apache.slider.common.SliderExitCodes +import org.apache.slider.common.SliderKeys +import org.apache.slider.common.params.Arguments +import org.apache.slider.common.params.SliderActions +import org.apache.slider.funtest.framework.AgentCommandTestBase +import org.apache.slider.funtest.framework.FuntestProperties +import org.apache.slider.funtest.framework.SliderShell +import org.junit.After +import org.junit.Before +import org.junit.Test + +import static org.apache.slider.core.registry.info.CustomRegistryConstants.* + +@CompileStatic +@Slf4j +public class AgentLaunchFailureIT extends AgentCommandTestBase + implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { + + + static String CLUSTER = "test-agent-launchfail" + + static String APP_RESOURCE2 = "../slider-core/src/test/app_packages/test_command_log/resources_no_role.json" + + + @Before + public void prepareCluster() { + setupCluster(CLUSTER) + } + + @After + public void destroyCluster() { + cleanup(CLUSTER) + } + + @Test + public void testAgentLaunchFailure() throws Throwable { + describe("Create a failing cluster and validate failure logic") + + // create an AM which fails to launch within a second + File launchReportFile = createAppReportFile(); + SliderShell shell = createTemplatedSliderApplication(CLUSTER, + APP_TEMPLATE, + APP_RESOURCE2, + [ + ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_ENABLED, "true", + ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_INTERVAL_SECONDS, "1", + ARG_INTERNAL, InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "100", + ], + launchReportFile) + + maybeLookupFromLaunchReport(launchReportFile) + ensureApplicationIsUp(CLUSTER) + + + //stop + freeze(0, CLUSTER, + [ + ARG_FORCE, + ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME), + ARG_MESSAGE, "final-shutdown" + ]) + + destroy(0, CLUSTER) + + //cluster now missing + exists(EXIT_UNKNOWN_INSTANCE, CLUSTER) + + } +} http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/73462659/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy index 50da8ae..16e65fa 100644 --- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy +++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy @@ -49,9 +49,7 @@ public class AgentRegistryIT extends AgentCommandTestBase @Before public void prepareCluster() { setupCluster(CLUSTER) - - - } + } @After public void destroyCluster() { @@ -59,7 +57,7 @@ public class AgentRegistryIT extends AgentCommandTestBase } @Test - public void testAgentClusterLifecycle() throws Throwable { + public void testAgentRegistry() throws Throwable { describe("Create a 0-role cluster and make registry queries against it") // sanity check to verify the config is correct