SLIDER-460 probes working more reliably
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/9c5ac0ff Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/9c5ac0ff Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/9c5ac0ff Branch: refs/heads/develop Commit: 9c5ac0ffa0c4ea90d50ea5cbce71278b3a6d4098 Parents: 75030d2 Author: Steve Loughran <ste...@apache.org> Authored: Thu Oct 23 10:48:42 2014 +0100 Committer: Steve Loughran <ste...@apache.org> Committed: Thu Oct 23 11:35:55 2014 +0100 ---------------------------------------------------------------------- .../framework/AgentCommandTestBase.groovy | 79 +------------ .../funtest/framework/CommandTestBase.groovy | 117 +++++++++++++++++-- .../lifecycle/AgentClusterLifecycleIT.groovy | 4 +- .../funtest/lifecycle/AgentFailuresIT.groovy | 1 - .../src/test/resources/log4j.properties | 2 +- 5 files changed, 113 insertions(+), 90 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy index 4a46f8b..8af51b4 100644 --- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy +++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy @@ -21,7 +21,6 @@ package org.apache.slider.funtest.framework import groovy.util.logging.Slf4j import org.apache.hadoop.fs.Path import org.apache.hadoop.security.UserGroupInformation -import org.apache.slider.api.ClusterDescription import org.apache.slider.common.SliderExitCodes import org.apache.slider.common.params.Arguments import org.apache.slider.common.params.SliderActions @@ -125,77 +124,6 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { } } - public static void logShell(SliderShell shell) { - shell.dumpOutput(); - } - - - public ClusterDescription execStatus(String application) { - ClusterDescription cd - File statusFile = File.createTempFile("status", ".json") - try { - SliderShell shell = slider(EXIT_SUCCESS, - [ - ACTION_STATUS, - application, - ARG_OUTPUT, statusFile.absolutePath - ]) - - assert statusFile.exists() - cd = new ClusterDescription(); - cd.fromFile(statusFile) - return cd - } finally { - statusFile.delete() - } - } - - public int queryRequestedCount(String application, String role) { - ClusterDescription cd = execStatus(application) - int requestedCount = cd.statistics[role]["containers.requested"] - return requestedCount - } - - boolean hasRequestedContainerCountExceeded(Map<String, String> args) { - String application = args['application'] - String role = args['role'] - int expectedCount = args['limit'].toInteger(); - return queryRequestedCount(application, role) >= expectedCount - } - - void expectContainerCountExceeded(String application, String role, int limit) { - - repeatUntilTrue( - this.&hasRequestedContainerCountExceeded, - 50, - 1000 * 10, - [limit : Integer.toString(limit), - role : role, - application: application], - true, - "countainer count not reached") { - status(application).dumpOutput() - }; - - } - public ClusterDescription expectContainersLive(String clustername, - String component, - int count) { - ClusterDescription cd = execStatus(clustername) - assertContainersLive(cd, component, count) - return cd; - } - - public static void assertContainersLive(ClusterDescription clusterDescription, - String component, int count) { - log.info("Asserting component count.") - int instanceCount = clusterDescription.instances[component].size() - if (count != instanceCount) { - log.warn(clusterDescription.toString()) - } - assert count == instanceCount - } - public static String findLineEntry(SliderShell shell, String[] locaters) { int index = 0; def output = shell.out @@ -279,12 +207,9 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions { return } - log.info "Cleaning app instance, if exists, by name " + applicationName + describe "Teardown app instance " + applicationName + // forced freeze with wait teardown(applicationName) - - // sleep till the instance is frozen - sleep(1000 * 3) - SliderShell shell = slider([ ACTION_DESTROY, applicationName]) http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy index 44d07d8..4b75c56 100644 --- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy +++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy @@ -37,7 +37,6 @@ import org.apache.slider.api.ClusterDescription import org.apache.slider.common.tools.SliderUtils import org.apache.slider.client.SliderClient import org.apache.slider.test.SliderTestUtils -import org.junit.Assert import org.junit.Before import org.junit.BeforeClass import org.junit.Rule @@ -45,6 +44,7 @@ import org.junit.rules.Timeout import org.slf4j.Logger import org.slf4j.LoggerFactory import static org.apache.slider.common.SliderExitCodes.* +import static org.apache.slider.core.main.LauncherExitCodes.* import static org.apache.slider.funtest.framework.FuntestProperties.* import static org.apache.slider.common.params.Arguments.* import static org.apache.slider.common.params.SliderActions.* @@ -169,6 +169,20 @@ abstract class CommandTestBase extends SliderTestUtils { "and YARN RM @ ${SLIDER_CONFIG.get(YarnConfiguration.RM_ADDRESS)}") } + public static void assertContainersLive(ClusterDescription clusterDescription, + String component, int count) { + log.info("Asserting component count.") + int instanceCount = clusterDescription.instances[component].size() + if (count != instanceCount) { + log.warn(clusterDescription.toString()) + } + assert count == instanceCount + } + + public static void logShell(SliderShell shell) { + shell.dumpOutput(); + } + /** * give the test thread a name */ @@ -333,7 +347,7 @@ abstract class CommandTestBase extends SliderTestUtils { } static SliderShell freezeForce(String name) { - freeze(name, [ARG_FORCE]) + freeze(name, [ARG_FORCE, ARG_WAIT, "10000"]) } static SliderShell killContainer(String name, String containerID) { @@ -643,9 +657,12 @@ abstract class CommandTestBase extends SliderTestUtils { sleep(5000) ensureApplicationIsUp(cluster) + +/* def sleeptime = SLIDER_CONFIG.getInt(KEY_AM_RESTART_SLEEP_TIME, DEFAULT_AM_RESTART_SLEEP_TIME) sleep(sleeptime) +*/ ClusterDescription status status = sliderClient.clusterDescription @@ -653,18 +670,19 @@ abstract class CommandTestBase extends SliderTestUtils { } protected void ensureApplicationIsUp(String application) { - repeatUntilTrue(this.&isApplicationUp, + repeatUntilTrue(this.&isApplicationRunning, SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME, DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS), 1000, [application: application], true, 'Application did not start, failing test.') { + describe "final state of app that tests say is not up" exists(application,true).dumpOutput() } } - protected boolean isApplicationUp(Map<String, String> args) { + protected boolean isApplicationRunning(Map<String, String> args) { String applicationName = args['application']; return isApplicationInState(YarnApplicationState.RUNNING, applicationName); } @@ -686,14 +704,30 @@ abstract class CommandTestBase extends SliderTestUtils { return shell.ret == 0 } - protected void repeatUntilTrue(Closure closure, + /** + * Repeat a probe until it succeeds, if it does not execute a failure + * closure then raise an exception with the supplied message + * @param probe probe + * @param maxAttempts max number of attempts + * @param sleepDur sleep between failing attempts + * @param args map of arguments to the probe + * @param failIfUnsuccessful if the probe fails after all the attempts + * âshould it raise an exception + * @param failureMessage message to include in exception raised + * @param failureHandler closure to invoke prior to the failure being raised + */ + protected void repeatUntilTrue(Closure probe, int maxAttempts, int sleepDur, Map args, - boolean failIfUnsuccessful = false, String message, + boolean failIfUnsuccessful = false, + String failureMessage, Closure failureHandler) { int attemptCount = 0 + boolean succeeded = false; while (attemptCount < maxAttempts) { - if (closure(args)) { + if (probe(args)) { // finished + log.debug("Success after $attemptCount attempt(s)") + succeeded = true; break }; attemptCount++; @@ -701,12 +735,77 @@ abstract class CommandTestBase extends SliderTestUtils { sleep(sleepDur) } - if (failIfUnsuccessful & attemptCount != maxAttempts) { + if (failIfUnsuccessful & !succeeded) { if (failureHandler) { failureHandler() } - fail(message) + fail(failureMessage) + } + } + + public ClusterDescription execStatus(String application) { + ClusterDescription cd + File statusFile = File.createTempFile("status", ".json") + try { + slider(EXIT_SUCCESS, + [ + ACTION_STATUS, + application, + ARG_OUTPUT, statusFile.absolutePath + ]) + + assert statusFile.exists() + cd = new ClusterDescription(); + cd.fromFile(statusFile) + return cd + } finally { + statusFile.delete() } } + public int queryRequestedCount(String application, String role) { + ClusterDescription cd = execStatus(application) + + if (!cd.statistics[role]) { + return 0; + } + def statsForRole = cd.statistics[role] + + def requested = statsForRole["containers.requested"] + assert null != statsForRole["containers.requested"] + int requestedCount = requested + return requestedCount + } + + boolean hasRequestedContainerCountExceeded(Map<String, String> args) { + String application = args['application'] + String role = args['role'] + int expectedCount = args['limit'].toInteger(); + return queryRequestedCount(application, role) >= expectedCount + } + + void expectContainerCountExceeded(String application, String role, int limit) { + + repeatUntilTrue( + this.&hasRequestedContainerCountExceeded, + 50, + 1000 * 10, + [limit : Integer.toString(limit), + role : role, + application: application], + true, + "countainer count not reached") { + describe "container count not reached" + status(application).dumpOutput() + }; + + } + + public ClusterDescription expectContainersLive(String clustername, + String component, + int count) { + ClusterDescription cd = execStatus(clustername) + assertContainersLive(cd, component, count) + return cd; + } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy index 0a5163a..dfdbf06 100644 --- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy +++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy @@ -147,7 +147,7 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase ARG_MESSAGE, "forced-freeze-in-test" ]) - describe " >>> Cluster is now frozen - 2nd time." + describe " >>> Cluster is now force frozen - 2nd time." //cluster is no longer live exists(0, CLUSTER, false) @@ -165,9 +165,9 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase describe " >>> Cluster is now thawed - 2nd time." - ClusterDescription status = killAmAndWaitForRestart(sliderClient, CLUSTER) describe " >>> Kill AM and wait for restart." + ClusterDescription status = killAmAndWaitForRestart(sliderClient, CLUSTER) def restarted = status.getInfo( StatusKeys.INFO_CONTAINERS_AM_RESTART) http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy index be8614c..ab6a811 100644 --- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy +++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy @@ -25,7 +25,6 @@ import org.apache.slider.common.params.Arguments import org.apache.slider.common.params.SliderActions import org.apache.slider.funtest.framework.AgentCommandTestBase import org.apache.slider.funtest.framework.FuntestProperties -import org.apache.slider.funtest.framework.SliderShell import org.junit.After import org.junit.Test http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/slider-funtest/src/test/resources/log4j.properties b/slider-funtest/src/test/resources/log4j.properties index a552a55..65135ca 100644 --- a/slider-funtest/src/test/resources/log4j.properties +++ b/slider-funtest/src/test/resources/log4j.properties @@ -42,7 +42,7 @@ log4j.logger.org.apache.hadoop.hdfs.server.datanode.BlockPoolSliceScanner=WARN log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement=WARN log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN log4j.logger.org.apache.hadoop.hdfs=WARN - +log4j.logger.org.apache.hadoop.hdfs.shortcircuit=FATAL log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN log4j.logger.org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl=WARN