Repository: ambari Updated Branches: refs/heads/trunk 4a42b6d5b -> 3ba1889f4
AMBARI-8587. Ambari UI hangs for more than 20 minutes during installation.(vbrodetskyi) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3ba1889f Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3ba1889f Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3ba1889f Branch: refs/heads/trunk Commit: 3ba1889f454ffcd14352954ef30ad0393b02dc4a Parents: 4a42b6d Author: Vitaly Brodetskyi <vbrodets...@hortonworks.com> Authored: Mon Dec 8 20:21:02 2014 +0200 Committer: Vitaly Brodetskyi <vbrodets...@hortonworks.com> Committed: Mon Dec 8 20:23:03 2014 +0200 ---------------------------------------------------------------------- .../server/actionmanager/ActionScheduler.java | 38 +++++++++++++++++--- .../ambari/server/agent/HeartBeatHandler.java | 4 +++ .../actionmanager/TestActionScheduler.java | 24 +++++++++---- 3 files changed, 55 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java index c23440e..b3dcca4 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java @@ -465,10 +465,6 @@ class ActionScheduler implements Runnable { // Map to track role status Map<String, RoleStats> roleStats = initRoleStats(s); long now = System.currentTimeMillis(); - long taskTimeout = actionTimeout; - if (taskTimeoutAdjustment) { - taskTimeout = actionTimeout + s.getStageTimeout(); - } Cluster cluster = null; if (null != s.getClusterName()) { @@ -522,6 +518,20 @@ class ActionScheduler implements Runnable { } } + //basic timeout for stage + long commandTimeout = actionTimeout; + if (taskTimeoutAdjustment) { + Map<String, String> commandParams = c.getCommandParams(); + String timeoutKey = ExecutionCommand.KeyNames.COMMAND_TIMEOUT; + if (commandParams != null && commandParams.containsKey(timeoutKey)) { + String timeoutStr = commandParams.get(timeoutKey); + commandTimeout += Long.parseLong(timeoutStr) * 1000; // Converting to milliseconds + } else { + LOG.error("Execution command has no timeout parameter" + + c.toString()); + } + } + // Check that service host component is not deleted if (hostDeleted) { @@ -537,7 +547,7 @@ class ActionScheduler implements Runnable { // We don't need to send CANCEL_COMMANDs in this case db.abortHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), message); status = HostRoleStatus.ABORTED; - } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, taskTimeout)) { + } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, commandTimeout)) { // Process command timeouts LOG.info("Host:" + host + ", role:" + roleStr + ", actionId:" + s.getActionId() + " timed out"); if (s.getAttemptCount(host, roleStr) >= maxAttempts) { @@ -677,6 +687,11 @@ class ActionScheduler implements Runnable { LOG.debug("Timing out action since agent is not heartbeating."); return true; } + // If we have other command in progress for this stage do not timeout this one + if (hasCommandInProgress(stage, host.getHostName()) + && !status.equals(HostRoleStatus.IN_PROGRESS)) { + return false; + } if (currentTime > stage.getLastAttemptTime(host.getHostName(), role) + taskTimeout) { return true; @@ -684,6 +699,19 @@ class ActionScheduler implements Runnable { return false; } + private boolean hasCommandInProgress(Stage stage, String host) { + List<ExecutionCommandWrapper> commandWrappers = stage.getExecutionCommands(host); + for (ExecutionCommandWrapper wrapper : commandWrappers) { + ExecutionCommand c = wrapper.getExecutionCommand(); + String roleStr = c.getRole(); + HostRoleStatus status = stage.getHostRoleStatus(host, roleStr); + if (status == HostRoleStatus.IN_PROGRESS) { + return true; + } + } + return false; + } + private ListMultimap<String, ServiceComponentHostEvent> formEventMap(Stage s, List<ExecutionCommand> commands) { ListMultimap<String, ServiceComponentHostEvent> serviceEventMap = ArrayListMultimap.create(); for (ExecutionCommand cmd : commands) { http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java index b32a252..d482109 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java @@ -387,6 +387,10 @@ public class HeartBeatHandler { if (hostRoleCommand.getStatus() == HostRoleStatus.ABORTED) { continue; } + if (hostRoleCommand.getStatus() == HostRoleStatus.QUEUED && + report.getStatus().equals("IN_PROGRESS")) { + hostRoleCommand.setStartTime(now); + } //pass custom STAR, STOP and RESTART if (RoleCommand.ACTIONEXECUTE.toString().equals(report.getRoleCommand()) || (RoleCommand.CUSTOM_COMMAND.toString().equals(report.getRoleCommand()) && http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java index 0640c34..e61c1a9 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java @@ -17,9 +17,7 @@ */ package org.apache.ambari.server.actionmanager; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyLong; import static org.mockito.Matchers.anyString; @@ -211,6 +209,10 @@ public class TestActionScheduler { List<Stage> stages = new ArrayList<Stage>(); final Stage s = StageUtils.getATestStage(1, 977, hostname, CLUSTER_HOST_INFO, "{\"host_param\":\"param_value\"}", "{\"stage_param\":\"param_value\"}"); + s.addHostRoleExecutionCommand(hostname, Role.SECONDARY_NAMENODE, RoleCommand.INSTALL, + new ServiceComponentHostInstallEvent("SECONDARY_NAMENODE", hostname, System.currentTimeMillis(), "HDP-1.2.0"), + "cluster1", "HDFS"); + s.setHostRoleStatus(hostname, "SECONDARY_NAMENODE", HostRoleStatus.IN_PROGRESS); stages.add(s); ActionDBAccessor db = mock(ActionDBAccessor.class); @@ -238,12 +240,22 @@ public class TestActionScheduler { // Start the thread int cycleCount = 0; - while (!stages.get(0).getHostRoleStatus(hostname, "NAMENODE") + scheduler.doWork(); + //Check that in_progress command is rescheduled + assertEquals(HostRoleStatus.QUEUED, stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE")); + + //Switch command back to IN_PROGRESS status and check that other command is not rescheduled + stages.get(0).setHostRoleStatus(hostname, "SECONDARY_NAMENODE", HostRoleStatus.IN_PROGRESS); + scheduler.doWork(); + assertEquals(1, stages.get(0).getAttemptCount(hostname, "NAMENODE")); + assertEquals(2, stages.get(0).getAttemptCount(hostname, "SECONDARY_NAMENODE")); + + while (!stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE") .equals(HostRoleStatus.TIMEDOUT) && cycleCount++ <= MAX_CYCLE_ITERATIONS) { scheduler.doWork(); } - assertEquals(stages.get(0).getHostRoleStatus(hostname, "NAMENODE"), - HostRoleStatus.TIMEDOUT); + assertEquals(HostRoleStatus.TIMEDOUT, + stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE")); verify(db, times(1)).startRequest(eq(1L)); verify(db, times(1)).abortOperation(1L);