SLIDER-202: Chaos Monkey adds a (slow) minicluster test.

Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/53028eaa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/53028eaa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/53028eaa

Branch: refs/heads/feature/SLIDER-149_Support_a_YARN_service_registry
Commit: 53028eaae347899cef9e695e9ee8b41594c6377b
Parents: 52bd11b
Author: Steve Loughran <ste...@apache.org>
Authored: Wed Aug 13 17:30:49 2014 +0100
Committer: Steve Loughran <ste...@apache.org>
Committed: Wed Aug 13 17:30:49 2014 +0100

----------------------------------------------------------------------
 .../org/apache/slider/api/InternalKeys.java     | 49 +++++++---
 .../apache/slider/common/tools/SliderUtils.java |  2 +-
 .../server/appmaster/SliderAppMaster.java       | 39 ++++----
 .../TestStandaloneAMMonkeyRestart.groovy        | 94 ++++++++++++++++++++
 .../TestRegionServerFailureThreshold.groovy     |  5 +-
 5 files changed, 154 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/53028eaa/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java 
b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
index 4045c0e..ad384e2 100644
--- a/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
@@ -84,31 +84,54 @@ public interface InternalKeys {
    * Flag to indicate whether or not the chaos monkey is enabled:
    * {@value}
    */
-  String INTERNAL_CHAOS_MONKEY_ENABLED = "internal.chaos.monkey.enabled";
-  boolean DEFAULT_INTERNAL_CHAOS_MONKEY_ENABLED = false;
+  String CHAOS_MONKEY_ENABLED = "internal.chaos.monkey.enabled";
+  boolean DEFAULT_CHAOS_MONKEY_ENABLED = false;
 
 
   /**
    * Rate
    */
 
-  String INTERNAL_CHAOS_MONKEY_RATE = "internal.chaos.monkey.rate";
-
-  int DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_DAYS = 0;
-  int DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_HOURS = 1;
-  int DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_MINUTES = 0;
+  String CHAOS_MONKEY_INTERVAL = "internal.chaos.monkey.interval";
+  String CHAOS_MONKEY_INTERVAL_DAYS = CHAOS_MONKEY_INTERVAL + ".days";
+  String CHAOS_MONKEY_INTERVAL_HOURS = CHAOS_MONKEY_INTERVAL + ".hours";
+  String CHAOS_MONKEY_INTERVAL_MINUTES = CHAOS_MONKEY_INTERVAL + ".minutes";
+  String CHAOS_MONKEY_INTERVAL_SECONDS = CHAOS_MONKEY_INTERVAL + ".seconds";
   
-  String INTERNAL_CHAOS_MONKEY_PROBABILITY =
+  int DEFAULT_CHAOS_MONKEY_INTERVAL_DAYS = 0;
+  int DEFAULT_CHAOS_MONKEY_INTERVAL_HOURS = 0;
+  int DEFAULT_CHAOS_MONKEY_INTERVAL_MINUTES = 0;
+
+  /**
+   * Prefix for all chaos monkey probabilities
+   */
+  String CHAOS_MONKEY_PROBABILITY =
       "internal.chaos.monkey.probability";
   /**
    * Probabilies are out of 10000 ; 100==1%
    */
 
-  String INTERNAL_CHAOS_MONKEY_PROBABILITY_AM_FAILURE = 
INTERNAL_CHAOS_MONKEY_PROBABILITY +".amfailure";
-  int DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE = 10;
-  String INTERNAL_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE =
-      INTERNAL_CHAOS_MONKEY_PROBABILITY + ".containerfailure";
-  int DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE = 100;
+  /**
+   * Probability of a monkey check killing the AM:  {@value}
+   */
+  String CHAOS_MONKEY_PROBABILITY_AM_FAILURE = CHAOS_MONKEY_PROBABILITY 
+".amfailure";
+
+  /**
+   * Default probability of a monkey check killing the AM:  {@value}
+   */
+  int DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE = 0;
+
+  /**
+   * Probability of a monkey check killing a container:  {@value}
+   */
+
+  String CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE =
+      CHAOS_MONKEY_PROBABILITY + ".containerfailure";
+
+  /**
+   * Default probability of a monkey check killing the a container:  {@value}
+   */
+  int DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE = 0;
 
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/53028eaa/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
----------------------------------------------------------------------
diff --git 
a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java 
b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
index d7f159c..7a80f24 100644
--- a/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/common/tools/SliderUtils.java
@@ -1476,7 +1476,7 @@ public final class SliderUtils {
           }
           is = new ByteArrayInputStream(content);
         } else {
-          log.info("Size unknown. Reading {}", zipEntry.getName());
+          log.debug("Size unknown. Reading {}", zipEntry.getName());
           ByteArrayOutputStream baos = new ByteArrayOutputStream();
           while (true) {
             int byteRead = zis.read();

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/53028eaa/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index 1642cc5..7825e24 100644
--- 
a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ 
b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -779,17 +779,16 @@ public class SliderAppMaster extends 
AbstractSliderLaunchedService
     // now do the registration
     registerServiceInstance(clustername, appid);
 
-    sliderAMProvider.start();
+    // chaos monkey
+    maybeStartMonkey();
 
+    // Start the Slider AM provider
+    sliderAMProvider.start();
 
-    
-    // launch the provider; this is expected to trigger a callback that
+    // launch the real provider; this is expected to trigger a callback that
     // starts the node review process
     launchProviderService(instanceDefinition, confDir);
 
-    // chaos monkey
-    maybeStartMonkey();
-
     try {
       //now block waiting to be told to exit the process
       waitForAMCompletionSignal();
@@ -1459,7 +1458,6 @@ public class SliderAppMaster extends 
AbstractSliderLaunchedService
     rmOperationHandler.execute(operations);
   }
 
-
   /**
    * Get the RM operations handler for direct scheduling of work.
    */
@@ -1739,42 +1737,45 @@ public class SliderAppMaster extends 
AbstractSliderLaunchedService
     }
     signalAMComplete(exitCode, exception.toString());
   }
-  
-  public boolean maybeStartMonkey() {
+
+  /**
+   * Start the chaos monkey
+   * @return true if it started
+   */
+  private boolean maybeStartMonkey() {
     MapOperations internals = getGlobalInternalOptions();
 
     Boolean enabled =
-        internals.getOptionBool(InternalKeys.INTERNAL_CHAOS_MONKEY_ENABLED,
-            InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_ENABLED);
+        internals.getOptionBool(InternalKeys.CHAOS_MONKEY_ENABLED,
+            InternalKeys.DEFAULT_CHAOS_MONKEY_ENABLED);
     if (!enabled) {
       log.info("Chaos monkey disabled");
     }
     
     long monkeyInterval = internals.getTimeRange(
-        InternalKeys.INTERNAL_CHAOS_MONKEY_RATE,
-        InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_DAYS,
-        InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_HOURS,
-        InternalKeys.DEFAULT_INTERNAL_CHAOS_MONKEY_RATE_MINUTES,
+        InternalKeys.CHAOS_MONKEY_INTERVAL,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_DAYS,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_HOURS,
+        InternalKeys.DEFAULT_CHAOS_MONKEY_INTERVAL_MINUTES,
         0);
     log.info("Adding Chaos Monkey scheduled every {} seconds ({} hours)",
         monkeyInterval, monkeyInterval/(60*60));
     monkey = new ChaosMonkeyService(metrics, actionQueues);
     int amKillProbability = internals.getOptionInt(
-        InternalKeys.INTERNAL_CHAOS_MONKEY_PROBABILITY_AM_FAILURE,
+        InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE,
         InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_AM_FAILURE);
     if (amKillProbability > 0) {
-      log.info("Adding AM killer with probability %f", 
amKillProbability/100.0);
       monkey.addTarget("AM killer",
           new ChaosKillAM(actionQueues, -1), amKillProbability
       );
     }
     int containerKillProbability = internals.getOptionInt(
-        InternalKeys.INTERNAL_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE,
+        InternalKeys.CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE,
         InternalKeys.DEFAULT_CHAOS_MONKEY_PROBABILITY_CONTAINER_FAILURE);
     if (containerKillProbability > 0) {
       monkey.addTarget("Container killer",
           new ChaosKillContainer(appState, actionQueues, rmOperationHandler),
-          amKillProbability
+          containerKillProbability
       );
     }
     initAndAddService(monkey);

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/53028eaa/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
----------------------------------------------------------------------
diff --git 
a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
 
b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
new file mode 100644
index 0000000..162bab0
--- /dev/null
+++ 
b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMMonkeyRestart.groovy
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.slider.agent.standalone
+
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.SleepJob
+import org.apache.hadoop.yarn.api.records.ApplicationReport
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.slider.agent.AgentMiniClusterTestBase
+import org.apache.slider.api.InternalKeys
+import org.apache.slider.api.ResourceKeys
+import org.apache.slider.client.SliderClient
+import org.apache.slider.common.SliderXmlConfKeys
+import org.apache.slider.common.params.ActionAMSuicideArgs
+import org.apache.slider.common.params.Arguments
+import org.apache.slider.core.exceptions.ErrorStrings
+import org.apache.slider.core.main.ServiceLauncher
+import org.junit.Test
+
+/**
+ * kill a masterless AM and verify it shuts down. This test
+ * also sets the retry count to 1 to stop recreation attempts
+ */
+@CompileStatic
+@Slf4j
+
+class TestStandaloneAMMonkeyRestart extends AgentMiniClusterTestBase {
+
+
+  @Test
+  public void testStandaloneAMMonkeyRestart() throws Throwable {
+    describe "Run a Standalone AM with the Chaos monkey set to kill it"
+    // patch the configuration for AM restart
+    int threshold = 2;
+    YarnConfiguration conf = getRestartableConfiguration(threshold)
+
+    String clustername = createMiniCluster("", conf, 1, true)
+    ServiceLauncher<SliderClient> launcher =
+        createStandaloneAMWithArgs(clustername,
+            [
+                Arguments.ARG_OPTION, InternalKeys.CHAOS_MONKEY_ENABLED, 
"true",
+                Arguments.ARG_OPTION, 
InternalKeys.CHAOS_MONKEY_INTERVAL_SECONDS, "8",
+                Arguments.ARG_OPTION, 
InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_FAILURE, "75000",
+            ],
+            true,
+            false)
+    SliderClient sliderClient = launcher.service
+    addToTeardown(sliderClient);
+
+    ApplicationReport report
+    report = waitForClusterLive(sliderClient, 30000)
+    describe "Waiting for the cluster to fail"
+    sleep(40000)
+    // end of process
+    report = sliderClient.applicationReport
+    log.info(report.diagnostics)
+    assert report.currentApplicationAttemptId.attemptId == threshold
+    assert YarnApplicationState.FAILED == report.yarnApplicationState  
+    assert FinalApplicationStatus.FAILED == report.finalApplicationStatus
+  }
+
+  /**
+   * Get a restartable configuration
+   * @param restarts
+   * @return
+   */
+  public YarnConfiguration getRestartableConfiguration(int restarts) {
+    def conf = new YarnConfiguration(configuration)
+    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, restarts)
+    conf.setInt(SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, restarts)
+    conf
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/53028eaa/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
----------------------------------------------------------------------
diff --git 
a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
 
b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
index 1322fd3..b3aaa48 100644
--- 
a/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
+++ 
b/slider-providers/hbase/slider-hbase-provider/src/test/groovy/org/apache/slider/providers/hbase/minicluster/failures/TestRegionServerFailureThreshold.groovy
@@ -149,8 +149,9 @@ class TestRegionServerFailureThreshold extends 
HBaseMiniClusterTestBase {
         }
       }
     } catch (BadClusterStateException e) {
-      assert e.toString().contains(ErrorStrings.E_APPLICATION_NOT_RUNNING)
-      assert e.exitCode == SliderExitCodes.EXIT_BAD_STATE
+      assertExceptionDetails(e,
+          SliderExitCodes.EXIT_BAD_STATE, 
+          ErrorStrings.E_APPLICATION_NOT_RUNNING)
     }
     ApplicationReport report = client.applicationReport
     log.info(report.diagnostics)

Reply via email to