YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into
DistributedShell. Contributed by Xuan Gong


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8e5d6713
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8e5d6713
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8e5d6713

Branch: refs/heads/HDFS-6584
Commit: 8e5d6713cf16473d791c028cecc274fd2c7fd10b
Parents: 9f6891d
Author: XuanGong <xg...@apache.org>
Authored: Tue Sep 16 10:58:18 2014 -0700
Committer: XuanGong <xg...@apache.org>
Committed: Tue Sep 16 10:58:18 2014 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 +
 .../applications/distributedshell/Client.java   | 18 ++++-
 .../TestDSSleepingAppMaster.java                | 58 +++++++++++++++
 .../distributedshell/TestDistributedShell.java  | 76 ++++++++++++++++++++
 4 files changed, 154 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e5d6713/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 66623ec..ec59cba 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -229,6 +229,9 @@ Release 2.6.0 - UNRELEASED
     YARN-2547. Cross Origin Filter throws UnsupportedOperationException upon
     destroy (Mit Desai via jeagles)
 
+    YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into
+    DistributedShell (xgong)
+
   OPTIMIZATIONS
 
   BUG FIXES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e5d6713/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
index a86b521..f3ce64c 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
@@ -75,7 +75,6 @@ import 
org.apache.hadoop.yarn.client.api.YarnClientApplication;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.apache.hadoop.yarn.util.Records;
 
 /**
  * Client for Distributed Shell application submission to YARN.
@@ -163,6 +162,8 @@ public class Client {
   // flag to indicate whether to keep containers across application attempts.
   private boolean keepContainers = false;
 
+  private long attemptFailuresValidityInterval = -1;
+
   // Debug flag
   boolean debugFlag = false;   
 
@@ -248,6 +249,12 @@ public class Client {
       " If the flag is true, running containers will not be killed when" +
       " application attempt fails and these containers will be retrieved by" +
       " the new application attempt ");
+    opts.addOption("attempt_failures_validity_interval", true,
+      "when attempt_failures_validity_interval in milliseconds is set to > 0," 
+
+      "the failure number will not take failures which happen out of " +
+      "the validityInterval into failure count. " +
+      "If failure count reaches to maxAppAttempts, " +
+      "the application will be failed.");
     opts.addOption("debug", false, "Dump out debug information");
     opts.addOption("help", false, "Print usage");
 
@@ -372,6 +379,10 @@ public class Client {
 
     clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", 
"600000"));
 
+    attemptFailuresValidityInterval =
+        Long.parseLong(cliParser.getOptionValue(
+          "attempt_failures_validity_interval", "-1"));
+
     log4jPropFile = cliParser.getOptionValue("log_properties", "");
 
     return true;
@@ -456,6 +467,11 @@ public class Client {
     appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
     appContext.setApplicationName(appName);
 
+    if (attemptFailuresValidityInterval >= 0) {
+      appContext
+        .setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
+    }
+
     // set local resources for the application master
     // local files or archives as needed
     // In this scenario, the jar file for the application master is part of 
the local resources                        

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e5d6713/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
new file mode 100644
index 0000000..3004b69
--- /dev/null
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.applications.distributedshell;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class TestDSSleepingAppMaster extends ApplicationMaster{
+
+  private static final Log LOG = 
LogFactory.getLog(TestDSSleepingAppMaster.class);
+  private static final long SLEEP_TIME = 5000;
+
+  public static void main(String[] args) {
+    boolean result = false;
+    try {
+      TestDSSleepingAppMaster appMaster = new TestDSSleepingAppMaster();
+      boolean doRun = appMaster.init(args);
+      if (!doRun) {
+        System.exit(0);
+      }
+      appMaster.run();
+      if (appMaster.appAttemptID.getAttemptId() <= 2) {
+        try {
+          // sleep some time
+          Thread.sleep(SLEEP_TIME);
+        } catch (InterruptedException e) {}
+        // fail the first am.
+        System.exit(100);
+      }
+      result = appMaster.finish();
+    } catch (Throwable t) {
+      System.exit(1);
+    }
+    if (result) {
+      LOG.info("Application Master completed successfully. exiting");
+      System.exit(0);
+    } else {
+      LOG.info("Application Master failed. exiting");
+      System.exit(2);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e5d6713/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
index d7a1745..6dff94c 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
@@ -308,6 +308,82 @@ public class TestDistributedShell {
       Assert.assertTrue(result);
     }
 
+  /*
+   * The sleeping period in TestDSSleepingAppMaster is set as 5 seconds.
+   * Set attempt_failures_validity_interval as 2.5 seconds. It will check
+   * how many attempt failures for previous 2.5 seconds.
+   * The application is expected to be successful.
+   */
+  @Test(timeout=90000)
+  public void testDSAttemptFailuresValidityIntervalSucess() throws Exception {
+    String[] args = {
+        "--jar",
+        APPMASTER_JAR,
+        "--num_containers",
+        "1",
+        "--shell_command",
+        "sleep 8",
+        "--master_memory",
+        "512",
+        "--container_memory",
+        "128",
+        "--attempt_failures_validity_interval",
+        "2500"
+      };
+
+      LOG.info("Initializing DS Client");
+      Configuration conf = yarnCluster.getConfig();
+      conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
+      Client client = new Client(TestDSSleepingAppMaster.class.getName(),
+        new Configuration(conf));
+
+      client.init(args);
+      LOG.info("Running DS Client");
+      boolean result = client.run();
+
+      LOG.info("Client run completed. Result=" + result);
+      // application should succeed
+      Assert.assertTrue(result);
+    }
+
+  /*
+   * The sleeping period in TestDSSleepingAppMaster is set as 5 seconds.
+   * Set attempt_failures_validity_interval as 15 seconds. It will check
+   * how many attempt failure for previous 15 seconds.
+   * The application is expected to be fail.
+   */
+  @Test(timeout=90000)
+  public void testDSAttemptFailuresValidityIntervalFailed() throws Exception {
+    String[] args = {
+        "--jar",
+        APPMASTER_JAR,
+        "--num_containers",
+        "1",
+        "--shell_command",
+        "sleep 8",
+        "--master_memory",
+        "512",
+        "--container_memory",
+        "128",
+        "--attempt_failures_validity_interval",
+        "15000"
+      };
+
+      LOG.info("Initializing DS Client");
+      Configuration conf = yarnCluster.getConfig();
+      conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
+      Client client = new Client(TestDSSleepingAppMaster.class.getName(),
+        new Configuration(conf));
+
+      client.init(args);
+      LOG.info("Running DS Client");
+      boolean result = client.run();
+
+      LOG.info("Client run completed. Result=" + result);
+      // application should be failed
+      Assert.assertFalse(result);
+    }
+
   @Test(timeout=90000)
   public void testDSShellWithCustomLogPropertyFile() throws Exception {
     final File basedir =

Reply via email to