HIVE-15827: LLAP: status tool breaks out of watch mode when live instances is 0 (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/70bd0ce8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/70bd0ce8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/70bd0ce8 Branch: refs/heads/hive-14535 Commit: 70bd0ce86e64a5ec02e824650a4cd1fe5c68ccba Parents: 39a0d39 Author: Prasanth Jayachandran <[email protected]> Authored: Mon Feb 6 17:44:49 2017 -0800 Committer: Prasanth Jayachandran <[email protected]> Committed: Mon Feb 6 17:44:49 2017 -0800 ---------------------------------------------------------------------- .../hive/llap/cli/LlapStatusServiceDriver.java | 37 ++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/70bd0ce8/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java index 39d542b..ab4173c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java @@ -251,6 +251,9 @@ public class LlapStatusServiceDriver { if (ret != ExitCode.SUCCESS) { return ret.getInt(); + } else if (EnumSet.of(State.APP_NOT_FOUND, State.COMPLETE, State.LAUNCHING) + .contains(appStatusBuilder.getState())) { + return ExitCode.SUCCESS.getInt(); } else { try { ret = populateAppStatusFromLlapRegistry(appStatusBuilder); @@ -435,6 +438,15 @@ public class LlapStatusServiceDriver { int liveContainers = llapStats.get(StatusKeys.STATISTICS_CONTAINERS_LIVE); appStatusBuilder.setDesiredInstances(desiredContainers); appStatusBuilder.setLiveInstances(liveContainers); + if (liveContainers == 0) { + appStatusBuilder.setState(State.LAUNCHING); + } else { + if (desiredContainers >= liveContainers) { + appStatusBuilder.setState(State.RUNNING_ALL); + } else { + appStatusBuilder.setState(State.RUNNING_PARTIAL); + } + } } else { throw new LlapStatusCliException(ExitCode.SLIDER_CLIENT_ERROR_OTHER, "Failed to get statistics for LLAP"); // Error since LLAP should always exist. @@ -496,7 +508,7 @@ public class LlapStatusServiceDriver { Collection<ServiceInstance> serviceInstances; try { serviceInstances = llapRegistry.getInstances().getAll(); - } catch (IOException e) { + } catch (Exception e) { throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e); } @@ -540,7 +552,11 @@ public class LlapStatusServiceDriver { LOG.warn("Found more entries in LLAP registry, as compared to desired entries"); } } else { - appStatusBuilder.setState(State.RUNNING_PARTIAL); + if (validatedInstances.size() > 0) { + appStatusBuilder.setState(State.RUNNING_PARTIAL); + } else { + appStatusBuilder.setState(State.LAUNCHING); + } } // At this point, everything that can be consumed from AppStatusBuilder has been consumed. @@ -574,6 +590,8 @@ public class LlapStatusServiceDriver { private Long appStartTime; private Long appFinishTime; + private boolean runningThresholdAchieved = false; + private final List<LlapInstance> llapInstances = new LinkedList<>(); private transient Map<String, LlapInstance> containerToInstanceMap = new HashMap<>(); @@ -624,6 +642,11 @@ public class LlapStatusServiceDriver { return this; } + public AppStatusBuilder setRunningThresholdAchieved(boolean thresholdAchieved) { + this.runningThresholdAchieved = thresholdAchieved; + return this; + } + public LlapInstance removeAndgetLlapInstanceForContainer(String containerIdString) { return containerToInstanceMap.remove(containerIdString); } @@ -682,6 +705,10 @@ public class LlapStatusServiceDriver { return llapInstances; } + public boolean isRunningThresholdAchieved() { + return runningThresholdAchieved; + } + @JsonIgnore public AmInfo maybeCreateAndGetAmInfo() { if (amInfo == null) { @@ -993,7 +1020,7 @@ public class LlapStatusServiceDriver { // we have reached RUNNING state, now check if running nodes threshold is met final int liveInstances = statusServiceDriver.appStatusBuilder.getLiveInstances(); final int desiredInstances = statusServiceDriver.appStatusBuilder.getDesiredInstances(); - if (liveInstances > 0 && desiredInstances > 0) { + if (desiredInstances > 0) { final float ratio = (float) liveInstances / (float) desiredInstances; if (ratio < runningNodesThreshold) { LOG.warn("Waiting until running nodes threshold is reached. Current: {} Desired: {}." + @@ -1005,7 +1032,11 @@ public class LlapStatusServiceDriver { continue; } else { desiredStateAttained = true; + statusServiceDriver.appStatusBuilder.setRunningThresholdAchieved(true); } + } else { + numAttempts--; + continue; } } }
