Repository: hive
Updated Branches:
  refs/heads/master 9249e9984 -> 719125cf1


HIVE-15651: LLAP: llap status tool enhancements (Prasanth Jayachandran reviewed 
by Siddharth Seth)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/719125cf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/719125cf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/719125cf

Branch: refs/heads/master
Commit: 719125cf183381f94590b619f69723eed348f856
Parents: 9249e99
Author: Prasanth Jayachandran <prasan...@apache.org>
Authored: Mon Jan 23 17:42:18 2017 -0800
Committer: Prasanth Jayachandran <prasan...@apache.org>
Committed: Mon Jan 23 17:42:18 2017 -0800

----------------------------------------------------------------------
 .../llap/cli/LlapStatusOptionsProcessor.java    | 53 ++++++++++------
 .../hive/llap/cli/LlapStatusServiceDriver.java  | 65 +++++++++++++++-----
 2 files changed, 84 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/719125cf/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java
----------------------------------------------------------------------
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java
index a501b6c..b4aa430 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java
@@ -37,6 +37,7 @@ public class LlapStatusOptionsProcessor {
 
   private static final long DEFAULT_STATUS_REFRESH_INTERVAL_MS = 1 * 1000l; // 
1 seconds wait until subsequent status
   private static final long DEFAULT_WATCH_MODE_TIMEOUT_MS = 5 * 60 * 1000l; // 
5 minutes timeout for watch mode
+  private static final float DEFAULT_RUNNING_NODES_THRESHOLD = 1.0f;
   enum OptionConstants {
 
     NAME("name", 'n', "LLAP cluster name", true),
@@ -44,9 +45,10 @@ public class LlapStatusOptionsProcessor {
         "Amount of time(s) that the tool will sleep to wait for the YARN 
application to start. negative values=wait forever, 0=Do not wait. default=" +
             TimeUnit.SECONDS.convert(FIND_YARN_APP_TIMEOUT_MS, 
TimeUnit.MILLISECONDS) + "s", true),
     OUTPUT_FILE("outputFile", 'o', "File to which output should be written 
(Default stdout)", true),
-    WATCH_UNTIL_STATUS_CHANGE("watchUntil", 'w', "Watch until LLAP application 
status changes to the specified " +
-      "desired state before printing to console. Accepted values are " + 
Arrays.toString(LlapStatusServiceDriver.State
-      .values()), true),
+    WATCH_MODE("watch", 'w', "Watch mode waits until all LLAP daemons are 
running or subset of the nodes are " +
+      "running (threshold can be specified via -r option) (Default wait until 
all nodes are running)", false),
+    RUNNING_NODES_THRESHOLD("runningNodesThreshold", 'r', "When watch mode is 
enabled (-w), wait until the " +
+      "specified threshold of nodes are running (Default 1.0 which means 100% 
nodes are running)", true),
     STATUS_REFRESH_INTERVAL("refreshInterval", 'i', "Amount of time in seconds 
to wait until subsequent status checks" +
       " in watch mode. Valid only for watch mode. (Default " +
       TimeUnit.SECONDS.convert(DEFAULT_STATUS_REFRESH_INTERVAL_MS, 
TimeUnit.MILLISECONDS) + "s)", true),
@@ -102,25 +104,27 @@ public class LlapStatusOptionsProcessor {
     private final long findAppTimeoutMs;
     private final String outputFile;
     private final long refreshIntervalMs;
-    private final LlapStatusServiceDriver.State watchUntil;
+    private final boolean watchMode;
     private final long watchTimeout;
+    private final float runningNodesThreshold;
 
-    public LlapStatusOptions(String name) {
-      this(name, new Properties(), FIND_YARN_APP_TIMEOUT_MS, null, 
DEFAULT_STATUS_REFRESH_INTERVAL_MS, null,
-        DEFAULT_WATCH_MODE_TIMEOUT_MS);
+    public LlapStatusOptions(final String name) {
+      this(name, new Properties(), FIND_YARN_APP_TIMEOUT_MS, null, 
DEFAULT_STATUS_REFRESH_INTERVAL_MS, false,
+        DEFAULT_WATCH_MODE_TIMEOUT_MS, DEFAULT_RUNNING_NODES_THRESHOLD);
     }
 
     public LlapStatusOptions(String name, Properties hiveProperties, long 
findAppTimeoutMs,
-                             String outputFile, long refreshIntervalMs,
-                             final LlapStatusServiceDriver.State watchUntil,
-                             final long watchTimeoutMs) {
+                              String outputFile, long refreshIntervalMs,
+                              final boolean watchMode, final long 
watchTimeoutMs,
+                              final float runningNodesThreshold) {
       this.name = name;
       this.conf = hiveProperties;
       this.findAppTimeoutMs = findAppTimeoutMs;
       this.outputFile = outputFile;
       this.refreshIntervalMs = refreshIntervalMs;
-      this.watchUntil = watchUntil;
+      this.watchMode = watchMode;
       this.watchTimeout = watchTimeoutMs;
+      this.runningNodesThreshold = runningNodesThreshold;
     }
 
     public String getName() {
@@ -143,13 +147,17 @@ public class LlapStatusOptionsProcessor {
       return refreshIntervalMs;
     }
 
-    public LlapStatusServiceDriver.State getWatchUntilState() {
-      return watchUntil;
+    public boolean isWatchMode() {
+      return watchMode;
     }
 
     public long getWatchTimeoutMs() {
       return watchTimeout;
     }
+
+    public float getRunningNodesThreshold() {
+      return runningNodesThreshold;
+    }
   }
 
   private final Options options = new Options();
@@ -208,12 +216,7 @@ public class LlapStatusOptionsProcessor {
       refreshIntervalMs = TimeUnit.MILLISECONDS.convert(refreshIntervalSec, 
TimeUnit.SECONDS);
     }
 
-    LlapStatusServiceDriver.State watchUntil = null;
-    if 
(commandLine.hasOption(OptionConstants.WATCH_UNTIL_STATUS_CHANGE.getLongOpt())) 
{
-      String watchUntilStr = 
commandLine.getOptionValue(OptionConstants.WATCH_UNTIL_STATUS_CHANGE.getLongOpt());
-      watchUntil = LlapStatusServiceDriver.State.valueOf(watchUntilStr);
-    }
-
+    boolean watchMode = 
commandLine.hasOption(OptionConstants.WATCH_MODE.getLongOpt()) ? true : false;
     long watchTimeoutMs = DEFAULT_WATCH_MODE_TIMEOUT_MS;
     if 
(commandLine.hasOption(OptionConstants.WATCH_MODE_TIMEOUT.getLongOpt())) {
       long watchTimeoutSec = 
Long.parseLong(commandLine.getOptionValue(OptionConstants.WATCH_MODE_TIMEOUT.getLongOpt()));
@@ -222,7 +225,17 @@ public class LlapStatusOptionsProcessor {
       }
       watchTimeoutMs = TimeUnit.MILLISECONDS.convert(watchTimeoutSec, 
TimeUnit.SECONDS);
     }
-    return new LlapStatusOptions(name, hiveConf, findAppTimeoutMs, outputFile, 
refreshIntervalMs, watchUntil, watchTimeoutMs);
+
+    float runningNodesThreshold = DEFAULT_RUNNING_NODES_THRESHOLD;
+    if 
(commandLine.hasOption(OptionConstants.RUNNING_NODES_THRESHOLD.getLongOpt())) {
+      runningNodesThreshold = 
Float.parseFloat(commandLine.getOptionValue(OptionConstants.RUNNING_NODES_THRESHOLD
+        .getLongOpt()));
+      if (runningNodesThreshold < 0.0f || runningNodesThreshold > 1.0f) {
+        throw new IllegalArgumentException("Running nodes threshold value 
should be between 0.0 and 1.0 (inclusive)");
+      }
+    }
+    return new LlapStatusOptions(name, hiveConf, findAppTimeoutMs, outputFile, 
refreshIntervalMs, watchMode,
+      watchTimeoutMs, runningNodesThreshold);
   }
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/719125cf/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
----------------------------------------------------------------------
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
index 97a131e..39d542b 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
@@ -24,6 +24,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintWriter;
+import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumSet;
@@ -945,40 +946,72 @@ public class LlapStatusServiceDriver {
     }
 
     final long refreshInterval = options.getRefreshIntervalMs();
-    final State watchUntilState = options.getWatchUntilState();
+    final boolean watchMode = options.isWatchMode();
     final long watchTimeout = options.getWatchTimeoutMs();
     long numAttempts = watchTimeout / refreshInterval;
+    State launchingState = null;
     State currentState = null;
+    boolean desiredStateAttained = false;
+    final float runningNodesThreshold = options.getRunningNodesThreshold();
     try (OutputStream os = options.getOutputFile() == null ? System.out :
       new BufferedOutputStream(new FileOutputStream(options.getOutputFile()));
          PrintWriter pw = new PrintWriter(os)) {
 
-      LOG.info("Configured refresh interval: {}s. Watch timeout: {}s. Attempts 
remaining: {}",
+      LOG.info("Configured refresh interval: {}s. Watch timeout: {}s. Attempts 
remaining: {}." +
+          " Watch mode: {}. Running nodes threshold: {}.",
         TimeUnit.SECONDS.convert(refreshInterval, TimeUnit.MILLISECONDS),
         TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS),
-        numAttempts);
+        numAttempts, watchMode, new 
DecimalFormat("#.###").format(runningNodesThreshold));
       while (numAttempts > 0) {
         try {
           ret = statusServiceDriver.run(options);
           if (ret == ExitCode.SUCCESS.getInt()) {
-            if (watchUntilState != null) {
+            if (watchMode) {
               currentState = statusServiceDriver.appStatusBuilder.state;
-              if (!currentState.equals(watchUntilState)) {
-                LOG.warn("Current state: {}. Desired state: {}. {}/{} 
instances.", currentState, watchUntilState,
+
+              // slider has started llap application, now if for some reason 
state changes to COMPLETE then fail fast
+              if (launchingState == null &&
+                (currentState.equals(State.LAUNCHING) || 
currentState.equals(State.RUNNING_PARTIAL))) {
+                launchingState = currentState;
+              }
+
+              if (launchingState != null && 
currentState.equals(State.COMPLETE)) {
+                LOG.warn("Application stopped while launching. COMPLETE state 
reached while waiting for RUNNING state."
+                  + " Failing " + "fast..");
+                break;
+              }
+
+              if (!(currentState.equals(State.RUNNING_PARTIAL) || 
currentState.equals(State.RUNNING_ALL))) {
+                LOG.warn("Current state: {}. Desired state: {}. {}/{} 
instances.", currentState,
+                  runningNodesThreshold == 1.0f ? State.RUNNING_ALL : 
State.RUNNING_PARTIAL,
                   statusServiceDriver.appStatusBuilder.getLiveInstances(),
                   statusServiceDriver.appStatusBuilder.getDesiredInstances());
                 numAttempts--;
                 continue;
               }
+
+              // we have reached RUNNING state, now check if running nodes 
threshold is met
+              final int liveInstances = 
statusServiceDriver.appStatusBuilder.getLiveInstances();
+              final int desiredInstances = 
statusServiceDriver.appStatusBuilder.getDesiredInstances();
+              if (liveInstances > 0 && desiredInstances > 0) {
+                final float ratio = (float) liveInstances / (float) 
desiredInstances;
+                if (ratio < runningNodesThreshold) {
+                  LOG.warn("Waiting until running nodes threshold is reached. 
Current: {} Desired: {}." +
+                      " {}/{} instances.", new 
DecimalFormat("#.###").format(ratio),
+                    new DecimalFormat("#.###").format(runningNodesThreshold),
+                    statusServiceDriver.appStatusBuilder.getLiveInstances(),
+                    
statusServiceDriver.appStatusBuilder.getDesiredInstances());
+                  numAttempts--;
+                  continue;
+                } else {
+                  desiredStateAttained = true;
+                }
+              }
             }
-            // desired state attained. print and break out of loop
-            statusServiceDriver.outputJson(pw);
-            os.flush();
-            pw.flush();
           }
           break;
         } finally {
-          if (watchUntilState != null) {
+          if (watchMode) {
             try {
               Thread.sleep(refreshInterval);
             } catch (InterruptedException e) {
@@ -990,9 +1023,13 @@ public class LlapStatusServiceDriver {
           }
         }
       }
-      if (numAttempts == 0 && watchUntilState != null && currentState!= null 
&& !currentState.equals(watchUntilState)) {
-        LOG.info("Watch timeout {}s exhausted before desired state {} is 
attained.",
-          TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS), 
watchUntilState);
+      // print current state before exiting
+      statusServiceDriver.outputJson(pw);
+      os.flush();
+      pw.flush();
+      if (numAttempts == 0 && watchMode && !desiredStateAttained) {
+        LOG.warn("Watch timeout {}s exhausted before desired state RUNNING is 
attained.",
+          TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS));
       }
     } catch (Throwable t) {
       logError(t);

Reply via email to