SLIDER-202 monkey has mock tests and is set to run in AM if enabled
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/52bd11b5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/52bd11b5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/52bd11b5 Branch: refs/heads/feature/SLIDER-149_Support_a_YARN_service_registry Commit: 52bd11b5d4d3a17f231e5343c0bfa6c39492fbb3 Parents: 8ed52a9 Author: Steve Loughran <ste...@apache.org> Authored: Wed Aug 13 15:06:11 2014 +0100 Committer: Steve Loughran <ste...@apache.org> Committed: Wed Aug 13 15:06:11 2014 +0100 ---------------------------------------------------------------------- .../org/apache/slider/api/ResourceKeys.java | 26 +-- .../apache/slider/common/tools/SliderUtils.java | 6 +- .../apache/slider/core/conf/MapOperations.java | 54 ++++++- .../providers/agent/AgentClientProvider.java | 8 +- .../providers/agent/AgentProviderService.java | 7 +- .../server/appmaster/SliderAppMaster.java | 88 ++++++---- .../appmaster/actions/ActionKillContainer.java | 24 ++- .../server/appmaster/actions/QueueService.java | 8 +- .../server/appmaster/monkey/ChaosEntry.java | 12 +- .../server/appmaster/monkey/ChaosKillAM.java | 12 +- .../appmaster/monkey/ChaosKillContainer.java | 25 +-- .../appmaster/monkey/ChaosMonkeyService.java | 23 ++- .../model/mock/MockRMOperationHandler.groovy | 7 +- .../model/monkey/TestMockMonkey.groovy | 160 +++++++++++++++++++ .../apache/slider/test/SliderTestBase.groovy | 7 +- 15 files changed, 360 insertions(+), 107 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java index c2b78cc..3d54140 100644 --- a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java +++ b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java @@ -112,34 +112,14 @@ public interface ResourceKeys { "yarn.container.failure.threshold"; /** - * Minutes range of the container failure reset window. - * This is combined with the hour and day values to - * produce the full window. + * prefix for the time of the container failure reset window. * {@value} */ - String CONTAINER_FAILURE_WINDOW_MINUTES = - "yarn.container.failure.window.minutes"; - - - /** - * Hours range of the container failure reset window - * This is combined with the minute and day values to - * produce the full window. - * {@value} - */ - String CONTAINER_FAILURE_WINDOW_HOURS = - "yarn.container.failure.window.hours"; + String CONTAINER_FAILURE_WINDOW = + "yarn.container.failure.window"; - /** - * Hours range of the container failure reset window - * This is combined with the hour and minute values to - * produce the full window. - * {@value} - */ - String CONTAINER_FAILURE_WINDOW_DAYS = - "yarn.container.failure.window.days"; int DEFAULT_CONTAINER_FAILURE_WINDOW_DAYS = 0; int DEFAULT_CONTAINER_FAILURE_WINDOW_HOURS = 6; http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java index edbf2b2..d7f159c 100644 --- a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java +++ b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java @@ -43,7 +43,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.slider.api.OptionKeys; +import org.apache.slider.api.InternalKeys; import org.apache.slider.api.RoleKeys; import org.apache.slider.common.SliderKeys; import org.apache.slider.common.SliderXmlConfKeys; @@ -1303,8 +1303,8 @@ public final class SliderUtils { SliderException, IOException { Path imagePath; String imagePathOption = - internalOptions.get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH); - String appHomeOption = internalOptions.get(OptionKeys.INTERNAL_APPLICATION_HOME); + internalOptions.get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH); + String appHomeOption = internalOptions.get(InternalKeys.INTERNAL_APPLICATION_HOME); if (!isUnset(imagePathOption)) { imagePath = fs.createPathThatMustExist(imagePathOption); } else { http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java index bb57b94..98a51bc 100644 --- a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java +++ b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java @@ -18,6 +18,7 @@ package org.apache.slider.core.conf; +import com.google.common.base.Preconditions; import org.apache.slider.common.tools.SliderUtils; import org.apache.slider.core.exceptions.BadConfigException; import org.slf4j.Logger; @@ -61,12 +62,11 @@ public class MapOperations implements Map<String, String> { this.name = name; } - /** - * Get a cluster option or value + * Get an option value * - * @param key - * @param defVal + * @param key key + * @param defVal default value * @return option in map or the default */ public String getOption(String key, String defVal) { @@ -74,14 +74,27 @@ public class MapOperations implements Map<String, String> { return val != null ? val : defVal; } + /** + * Get a boolean option + * + * @param key option key + * @param defVal default value + * @return option true if the option equals "true", or the default value + * if the option was not defined at all. + */ + public Boolean getOptionBool(String key, boolean defVal) { + String val = getOption(key, Boolean.toString(defVal)); + return Boolean.valueOf(val); + } /** * Get a cluster option or value * - * @param key + * @param key option key * @return the value * @throws BadConfigException if the option is missing */ + public String getMandatoryOption(String key) throws BadConfigException { String val = options.get(key); if (val == null) { @@ -247,4 +260,35 @@ public class MapOperations implements Map<String, String> { } return builder.toString(); } + + /** + * Get the time range of a set of keys + * @param basekey + * @param defDays + * @param defHours + * @param defMins + * @param defSecs + * @return + */ + public long getTimeRange(String basekey, + int defDays, + int defHours, + int defMins, + int defSecs) { + Preconditions.checkArgument(basekey != null); + int days = getOptionInt(basekey + ".days", defDays); + int hours = getOptionInt(basekey + ".hours", defHours); + + int minutes = getOptionInt(basekey + ".minutes", defMins); + int seconds = getOptionInt(basekey + ".seconds", defSecs); + // range check + Preconditions.checkState(days >= 0 && hours >= 0 && minutes >= 0 + && seconds >= 0, + "Time range for %s has negative time component %s:%s:%s:%s", + basekey, days, hours, minutes, seconds); + + // calculate total time, schedule the reset if expected + long totalMinutes = days * 24 * 60 + hours * 24 + minutes; + return totalMinutes * 60 + seconds; + } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java index 2ce16c7..72b0a42 100644 --- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java +++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentClientProvider.java @@ -21,7 +21,7 @@ package org.apache.slider.providers.agent; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.slider.api.OptionKeys; +import org.apache.slider.api.InternalKeys; import org.apache.slider.api.ResourceKeys; import org.apache.slider.common.SliderKeys; import org.apache.slider.common.tools.SliderFileSystem; @@ -109,7 +109,7 @@ public class AgentClientProvider extends AbstractClientProvider getGlobalOptions().get(AgentKeys.PACKAGE_PATH); if (SliderUtils.isUnset(appHome)) { String agentImage = instanceDefinition.getInternalOperations(). - get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH); + get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH); sliderFileSystem.verifyFileExists(new Path(agentImage)); } } @@ -174,12 +174,12 @@ public class AgentClientProvider extends AbstractClientProvider String appHome = instanceDefinition.getAppConfOperations(). getGlobalOptions().get(AgentKeys.PACKAGE_PATH); String agentImage = instanceDefinition.getInternalOperations(). - get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH); + get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH); if (SliderUtils.isUnset(appHome) && SliderUtils.isUnset(agentImage)) { throw new BadConfigException("Either agent package path " + AgentKeys.PACKAGE_PATH + " or image root " + - OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH + InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH + " must be provided."); } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java index 59a6e29..0c57b79 100644 --- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java +++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.slider.api.ClusterDescription; import org.apache.slider.api.ClusterDescriptionKeys; import org.apache.slider.api.ClusterNode; +import org.apache.slider.api.InternalKeys; import org.apache.slider.api.OptionKeys; import org.apache.slider.api.StatusKeys; import org.apache.slider.common.SliderKeys; @@ -62,8 +63,6 @@ import org.apache.slider.providers.agent.application.metadata.Metainfo; import org.apache.slider.providers.agent.application.metadata.OSPackage; import org.apache.slider.providers.agent.application.metadata.OSSpecific; import org.apache.slider.server.appmaster.actions.ProviderReportedContainerLoss; -import org.apache.slider.server.appmaster.state.ContainerPriority; -import org.apache.slider.server.appmaster.state.RoleStatus; import org.apache.slider.server.appmaster.state.StateAccessForProviders; import org.apache.slider.server.appmaster.web.rest.agent.AgentCommandType; import org.apache.slider.server.appmaster.web.rest.agent.AgentRestOperations; @@ -265,7 +264,7 @@ public class AgentProviderService extends AbstractProviderService implements log.info("PYTHONPATH set to {}", pythonPath); String agentImage = instanceDefinition.getInternalOperations(). - get(OptionKeys.INTERNAL_APPLICATION_IMAGE_PATH); + get(InternalKeys.INTERNAL_APPLICATION_IMAGE_PATH); if (agentImage != null) { LocalResource agentImageRes = fileSystem.createAmResource(new Path(agentImage), LocalResourceType.ARCHIVE); launcher.addLocalResource(AgentKeys.AGENT_INSTALL_DIR, agentImageRes); @@ -1169,7 +1168,7 @@ public class AgentProviderService extends AbstractProviderService implements tokens.put("${DEFAULT_DATA_DIR}", getAmState() .getInternalsSnapshot() .getGlobalOptions() - .getMandatoryOption(OptionKeys.INTERNAL_DATA_DIR_PATH)); + .getMandatoryOption(InternalKeys.INTERNAL_DATA_DIR_PATH)); return tokens; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java index e6c7aba..1642cc5 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java @@ -19,6 +19,7 @@ package org.apache.slider.server.appmaster; import com.codahale.metrics.MetricRegistry; +import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.BlockingService; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -1195,31 +1196,19 @@ public class SliderAppMaster extends AbstractSliderLaunchedService ResetFailureWindow reset = new ResetFailureWindow(); ConfTreeOperations ops = new ConfTreeOperations(resources); MapOperations globals = ops.getGlobalOptions(); - int days = globals.getOptionInt(ResourceKeys.CONTAINER_FAILURE_WINDOW_DAYS, - ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_DAYS); - int hours = globals.getOptionInt( - ResourceKeys.CONTAINER_FAILURE_WINDOW_HOURS, - ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_HOURS); - int minutes = globals.getOptionInt( - ResourceKeys.CONTAINER_FAILURE_WINDOW_MINUTES, - ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_MINUTES); - // range check - if (days < 0 || hours < 0 || minutes < 0) { - throw new BadConfigException("Failure window contains negative values:" - + "days=%d hours=%d, minutes=%d", - days, hours, minutes); - } - // calculate total time, schedule the reset if expected - int totalMinutes = days * 24 * 60 + hours * 24 + minutes; - if (totalMinutes > 0) { + long seconds = globals.getTimeRange(ResourceKeys.CONTAINER_FAILURE_WINDOW, + ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_DAYS, + ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_HOURS, + ResourceKeys.DEFAULT_CONTAINER_FAILURE_WINDOW_MINUTES, 0); + if (seconds > 0) { log.info( - "Scheduling the failure window reset interval to every {} minutes", - totalMinutes); + "Scheduling the failure window reset interval to every {} seconds", + seconds); RenewingAction<ResetFailureWindow> renew = new RenewingAction<>( - reset, totalMinutes, totalMinutes, TimeUnit.MINUTES, 0); + reset, seconds, seconds, TimeUnit.SECONDS, 0); actionQueues.renewing("failures", renew); } else { - log.warn("Failure window reset interval is not set"); + log.info("Failure window reset interval is not set"); } } @@ -1458,7 +1447,8 @@ public class SliderAppMaster extends AbstractSliderLaunchedService //throws NoSuchNodeException if it is missing RoleInstance instance = appState.getLiveInstanceByContainerID(containerID); - queue(new ActionKillContainer(instance.getId(), 0, TimeUnit.MILLISECONDS)); + queue(new ActionKillContainer(instance.getId(), 0, TimeUnit.MILLISECONDS, + rmOperationHandler)); Messages.KillContainerResponseProto.Builder builder = Messages.KillContainerResponseProto.newBuilder(); builder.setSuccess(true); @@ -1469,6 +1459,15 @@ public class SliderAppMaster extends AbstractSliderLaunchedService rmOperationHandler.execute(operations); } + + /** + * Get the RM operations handler for direct scheduling of work. + */ + @VisibleForTesting + public RMOperationHandler getRmOperationHandler() { + return rmOperationHandler; + } + @Override public Messages.AMSuicideResponseProto amSuicide( Messages.AMSuicideRequestProto request) @@ -1742,18 +1741,45 @@ public class SliderAppMaster extends AbstractSliderLaunchedService } public boolean maybeStartMonkey() { + MapOperations internals = getGlobalInternalOptions(); + + Boolean enabled = + internals.getOptionBool(InternalKeys.INTERNAL_CHAOS_MONKEY_ENABLED, + InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_ENABLED); + if (!enabled) { + log.info("Chaos monkey disabled"); + } + + long monkeyInterval = internals.getTimeRange( + InternalKeys.INTERNAL_CHAOS_MONKEY_RATE, + InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_DAYS, + InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_HOURS, + InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_MINUTES, + 0); + log.info("Adding Chaos Monkey scheduled every {} seconds ({} hours)", + monkeyInterval, monkeyInterval/(60*60)); monkey = new ChaosMonkeyService(metrics, actionQueues); - int amKillProbability = 100; - int containerKillProbability = 200; - monkey.addTarget("AM killer", - new ChaosKillAM(this, actionQueues, -1), - amKillProbability); - monkey.addTarget("Container killer", - new ChaosKillContainer(this, actionQueues), - amKillProbability); + int amKillProbability = internals.getOptionInt( + InternalKeys.INTERNAL_CHAOS_MONKEY_PROBABILITY_AM_FAILURE, + InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE); + if (amKillProbability > 0) { + log.info("Adding AM killer with probability %f", amKillProbability/100.0); + monkey.addTarget("AM killer", + new ChaosKillAM(actionQueues, -1), amKillProbability + ); + } + int containerKillProbability = internals.getOptionInt( + InternalKeys.INTERNAL_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE, + InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE); + if (containerKillProbability > 0) { + monkey.addTarget("Container killer", + new ChaosKillContainer(appState, actionQueues, rmOperationHandler), + amKillProbability + ); + } initAndAddService(monkey); // and schedule it - schedule(monkey.getChaosAction(60, TimeUnit.SECONDS)); + schedule(monkey.getChaosAction(monkeyInterval, TimeUnit.SECONDS)); return true; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java index 7668bd5..95edca4 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionKillContainer.java @@ -18,10 +18,12 @@ package org.apache.slider.server.appmaster.actions; +import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.slider.server.appmaster.SliderAppMaster; import org.apache.slider.server.appmaster.operations.AbstractRMOperation; import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation; +import org.apache.slider.server.appmaster.operations.RMOperationHandler; import org.apache.slider.server.appmaster.state.AppState; import java.util.LinkedList; @@ -31,13 +33,27 @@ import java.util.concurrent.TimeUnit; public class ActionKillContainer extends AsyncAction { private final ContainerId containerId; - - public ActionKillContainer(ContainerId containerId, long delay, - TimeUnit timeUnit) { + private final RMOperationHandler operationHandler; + public ActionKillContainer( + ContainerId containerId, + long delay, + TimeUnit timeUnit, + RMOperationHandler operationHandler) { super("kill container", delay, timeUnit); + this.operationHandler = operationHandler; + Preconditions.checkArgument(containerId != null); + this.containerId = containerId; } + /** + * Get the container ID to kill + * @return + */ + public ContainerId getContainerId() { + return containerId; + } + @Override public void execute(SliderAppMaster appMaster, QueueAccess queueService, @@ -46,6 +62,6 @@ public class ActionKillContainer extends AsyncAction { ContainerReleaseOperation release = new ContainerReleaseOperation(containerId); opsList.add(release); //now apply the operations - appMaster.executeRMOperations(opsList); + operationHandler.execute(opsList); } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java index 1154a44..ac59096 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/QueueService.java @@ -55,18 +55,14 @@ implements Runnable, QueueAccess { /** * Immediate actions. - * Package scoped for testing. */ - @VisibleForTesting - final BlockingDeque<AsyncAction> actionQueue = + public final BlockingDeque<AsyncAction> actionQueue = new LinkedBlockingDeque<>(); /** * Actions to be scheduled in the future - * Package scoped for testing. */ - @VisibleForTesting - final DelayQueue<AsyncAction> scheduledActions = new DelayQueue<>(); + public final DelayQueue<AsyncAction> scheduledActions = new DelayQueue<>(); /** * Map of renewing actions by name ... this is to allow them to http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java index 13bda57..5905d2f 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosEntry.java @@ -41,7 +41,7 @@ public class ChaosEntry { /** - * Constructor -includes validaton of all arguments + * Constructor -includes validation of all arguments * @param name * @param target * @param probability @@ -51,7 +51,8 @@ public class ChaosEntry { Preconditions.checkArgument(!StringUtils.isEmpty(name), "missing name"); Preconditions.checkArgument(target != null, "null target"); Preconditions.checkArgument(probability > 0, "negative probability"); - Preconditions.checkArgument(probability < 10000, "probability over 100%"); + Preconditions.checkArgument(probability <= ChaosMonkeyService.PERCENT_100, + "probability over 100%"); this.name = name; this.target = target; this.probability = probability; @@ -71,11 +72,12 @@ public class ChaosEntry { /** * Invoke Chaos if the trigger value is in range of the probability - * @param trigger trigger value, 0-10K + * @param value trigger value, 0-10K * @return true if the chaos method was invoked */ - public boolean maybeInvokeChaos(long trigger) { - if (probability < 0) { + public boolean maybeInvokeChaos(long value) { + log.debug("Probability {} trigger={}", probability, value); + if (value < probability) { invokeChaos(); return true; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java index c464ac3..3c1a914 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillAM.java @@ -18,21 +18,21 @@ package org.apache.slider.server.appmaster.monkey; -import org.apache.slider.server.appmaster.SliderAppMaster; import org.apache.slider.server.appmaster.actions.ActionHalt; import org.apache.slider.server.appmaster.actions.QueueAccess; import java.util.concurrent.TimeUnit; +/** + * Kill the AM + */ public class ChaosKillAM implements ChaosTarget { - private final SliderAppMaster appMaster; + public static final int DELAY = 1000; private final QueueAccess queues; private final int exitCode; - public ChaosKillAM(SliderAppMaster appMaster, - QueueAccess queues, int exitCode) { - this.appMaster = appMaster; + public ChaosKillAM(QueueAccess queues, int exitCode) { this.queues = queues; this.exitCode = exitCode; } @@ -42,7 +42,7 @@ public class ChaosKillAM implements ChaosTarget { */ @Override public void chaosAction() { - queues.schedule(new ActionHalt(exitCode, "Chaos invoked halt", 1000, + queues.schedule(new ActionHalt(exitCode, "Chaos invoked halt", DELAY, TimeUnit.MILLISECONDS)); } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java index 0ffcc8a..daf2590 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java @@ -18,9 +18,11 @@ package org.apache.slider.server.appmaster.monkey; -import org.apache.slider.server.appmaster.SliderAppMaster; +import com.google.common.base.Preconditions; import org.apache.slider.server.appmaster.actions.ActionKillContainer; import org.apache.slider.server.appmaster.actions.QueueAccess; +import org.apache.slider.server.appmaster.operations.RMOperationHandler; +import org.apache.slider.server.appmaster.state.AppState; import org.apache.slider.server.appmaster.state.RoleInstance; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,24 +37,29 @@ import java.util.concurrent.TimeUnit; public class ChaosKillContainer implements ChaosTarget { protected static final Logger log = LoggerFactory.getLogger(ChaosKillContainer.class); - private final SliderAppMaster appMaster; + public static final int DELAY = 100; + private final AppState appState; private final QueueAccess queues; private final Random random = new Random(); + private final RMOperationHandler operationHandler; - public ChaosKillContainer(SliderAppMaster appMaster, - QueueAccess queues) { - this.appMaster = appMaster; + public ChaosKillContainer(AppState appState, + QueueAccess queues, + RMOperationHandler operationHandler) { + Preconditions.checkNotNull(appState); + Preconditions.checkNotNull(queues); + this.appState = appState; this.queues = queues; + this.operationHandler = operationHandler; } /** - * Trigger a container kill halt + * Trigger a container kill */ - @Override public void chaosAction() { List<RoleInstance> liveContainers = - appMaster.getAppState().cloneLiveContainerInfoList(); + appState.cloneLiveContainerInfoList(); int size = liveContainers.size(); if (size == 0) { log.info("No containers to kill"); @@ -63,6 +70,6 @@ public class ChaosKillContainer implements ChaosTarget { log.info("Killing {}", roleInstance); queues.schedule(new ActionKillContainer(roleInstance.getId(), - 100, TimeUnit.MILLISECONDS)); + DELAY, TimeUnit.MILLISECONDS, operationHandler)); } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java index c525d02..cd4b46b 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosMonkeyService.java @@ -20,7 +20,6 @@ package org.apache.slider.server.appmaster.monkey; import com.codahale.metrics.MetricRegistry; import org.apache.hadoop.service.AbstractService; -import org.apache.slider.server.appmaster.actions.AsyncAction; import org.apache.slider.server.appmaster.actions.QueueAccess; import org.apache.slider.server.appmaster.actions.RenewingAction; import org.slf4j.Logger; @@ -28,6 +27,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; +import java.util.Random; import java.util.concurrent.TimeUnit; /** @@ -36,8 +36,16 @@ import java.util.concurrent.TimeUnit; public class ChaosMonkeyService extends AbstractService { protected static final Logger log = LoggerFactory.getLogger(ChaosMonkeyService.class); + public static final int PERCENT_1 = 100; + public static final double PERCENT_1D = 100.0; + + /** + * the percentage value as multiplied up + */ + public static final int PERCENT_100 = 100 * PERCENT_1; private final MetricRegistry metrics; private final QueueAccess queues; + private final Random random = new Random(); private static final List<ChaosEntry> chaosEntries = new ArrayList<ChaosEntry>(); @@ -50,14 +58,19 @@ public class ChaosMonkeyService extends AbstractService { public synchronized void addTarget(String name, - ChaosTarget target, - long probability) { - + ChaosTarget target, long probability) { + log.info("Adding {} with probability {}", name, probability / PERCENT_1); chaosEntries.add(new ChaosEntry(name, target, probability, metrics)); } + /** + * Iterate through all the entries and invoke chaos on those wanted + */ public void play() { - + for (ChaosEntry chaosEntry : chaosEntries) { + long p = random.nextInt(PERCENT_100); + chaosEntry.maybeInvokeChaos(p); + } } public RenewingAction<MonkeyPlayAction> getChaosAction(long time, TimeUnit timeUnit) { http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy index 8ccd463..0fdba6b 100644 --- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy @@ -29,17 +29,20 @@ import org.apache.slider.server.appmaster.operations.RMOperationHandler @Slf4j class MockRMOperationHandler extends RMOperationHandler { public List<AbstractRMOperation> operations = []; - + int requests, releases; + @Override public void releaseAssignedContainer(ContainerId containerId) { operations.add(new ContainerReleaseOperation(containerId)) log.info("Releasing container ID " + containerId.getId()) + releases++; } @Override public void addContainerRequest(AMRMClient.ContainerRequest req) { operations.add(new ContainerRequestOperation(req)) log.info("Requesting container role #" + req.priority); + requests++; } /** @@ -47,5 +50,7 @@ class MockRMOperationHandler extends RMOperationHandler { */ public void clear() { operations.clear() + releases = 0; + requests = 0; } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy new file mode 100644 index 0000000..c789011 --- /dev/null +++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.slider.server.appmaster.model.monkey + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import org.apache.hadoop.yarn.conf.YarnConfiguration +import org.apache.slider.server.appmaster.actions.ActionHalt +import org.apache.slider.server.appmaster.actions.ActionKillContainer +import org.apache.slider.server.appmaster.actions.QueueService +import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest +import org.apache.slider.server.appmaster.model.mock.MockRMOperationHandler +import org.apache.slider.server.appmaster.monkey.ChaosKillAM +import org.apache.slider.server.appmaster.monkey.ChaosKillContainer +import org.apache.slider.server.appmaster.monkey.ChaosMonkeyService +import org.apache.slider.server.appmaster.monkey.ChaosTarget +import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation +import org.apache.slider.server.appmaster.state.RoleInstance +import org.junit.Before +import org.junit.Test + +@CompileStatic +@Slf4j +class TestMockMonkey extends BaseMockAppStateTest { + + /** + * This queue service is NOT started; tests need to poll the queue + * rather than expect them to execute + */ + QueueService queues = new QueueService(); + ChaosMonkeyService monkey = new ChaosMonkeyService(metricRegistry, + queues) + + @Before + public void init() { + def configuration = new YarnConfiguration() + queues.init(configuration) + monkey.init(configuration) + } + + @Test + public void testMonkeyStart() throws Throwable { + monkey.start() + monkey.stop() + } + + + @Test + public void testMonkeyPlay() throws Throwable { + ChaosCounter counter = new ChaosCounter() + monkey.addTarget("target", counter, ChaosMonkeyService.PERCENT_100) + + monkey.play() + assert counter.count == 1 + } + + @Test + public void testMonkeyPlaySometimes() throws Throwable { + ChaosCounter counter = new ChaosCounter() + ChaosCounter counter2 = new ChaosCounter() + monkey.addTarget("target1", counter, ChaosMonkeyService.PERCENT_1 * 50) + monkey.addTarget("target2", counter2, ChaosMonkeyService.PERCENT_1 * 25) + + for (int i = 0; i < 100; i++) { + monkey.play() + } + log.info("Counter1 = ${counter.count} counter2 = ${counter2.count}") + /* + * Relying on probability here to give approximate answers + */ + assert counter.count > 25 + assert counter.count < 75 + assert counter2.count < counter.count + } + + @Test + public void testAMKiller() throws Throwable { + + def chaos = new ChaosKillAM(queues, -1) + chaos.chaosAction(); + assert queues.scheduledActions.size() == 1 + def action = queues.scheduledActions.take() + assert action instanceof ActionHalt + } + + + @Test + public void testContainerKillerEmptyApp() throws Throwable { + + + def chaos = new ChaosKillContainer(appState, + queues, + new MockRMOperationHandler()) + chaos.chaosAction(); + assert queues.scheduledActions.size() == 0 + } + + + + @Test + public void testContainerKiller() throws Throwable { + MockRMOperationHandler ops = new MockRMOperationHandler(); + role0Status.desired = 1 + List<RoleInstance> instances = createAndStartNodes() + assert instances.size() == 1 + def instance = instances[0] + + def chaos = new ChaosKillContainer(appState, queues, ops) + chaos.chaosAction(); + assert queues.scheduledActions.size() == 1 + def action = queues.scheduledActions.take() + ActionKillContainer killer = (ActionKillContainer) action + assert killer.containerId == instance.containerId; + killer.execute(null, queues, appState) + assert ops.releases == 1; + + ContainerReleaseOperation operation = (ContainerReleaseOperation) ops.operations[0] + assert operation.containerId == instance.containerId + } + + + + /** + * Chaos target that just implement a counter + */ + private static class ChaosCounter implements ChaosTarget { + int count; + + @Override + void chaosAction() { + count++; + } + + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "ChaosCounter{"); + sb.append("count=").append(count); + sb.append('}'); + return sb.toString(); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/52bd11b5/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy index d7b328f..28b484f 100644 --- a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy @@ -18,6 +18,7 @@ package org.apache.slider.test +import com.codahale.metrics.MetricRegistry import groovy.transform.CompileStatic import org.apache.hadoop.fs.FileUtil import org.apache.slider.common.SliderXMLConfKeysForTesting @@ -38,7 +39,11 @@ import java.nio.file.Files @CompileStatic public abstract class SliderTestBase extends SliderTestUtils { - + /** + * Singleton metric registry + */ + public static final MetricRegistry metricRegistry = new MetricRegistry() + @Rule public TestName methodName = new TestName();