[14/50] [abbrv] ambari git commit: AMBARI-21142. Log more info about heartbeat message/response when server - agent communication gets out of sync. (stoader)

2017-06-08 Thread rlevas
AMBARI-21142. Log more info about heartbeat message/response when server - 
agent communication gets out of sync. (stoader)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b7101f78
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b7101f78
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b7101f78

Branch: refs/heads/branch-feature-AMBARI-20859
Commit: b7101f782be9a1291de589262f01083c70dfc935
Parents: c3c06ea
Author: Toader, Sebastian 
Authored: Fri Jun 2 23:09:56 2017 +0200
Committer: Toader, Sebastian 
Committed: Fri Jun 2 23:12:46 2017 +0200

--
 .../src/main/python/ambari_agent/Controller.py|  6 +-
 .../ambari/server/agent/HeartBeatHandler.java | 18 ++
 2 files changed, 19 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-agent/src/main/python/ambari_agent/Controller.py
--
diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py 
b/ambari-agent/src/main/python/ambari_agent/Controller.py
index 0297f74..bc923c3 100644
--- a/ambari-agent/src/main/python/ambari_agent/Controller.py
+++ b/ambari-agent/src/main/python/ambari_agent/Controller.py
@@ -321,6 +321,7 @@ class Controller(threading.Thread):
   logger.log(logging_level, "Sending Heartbeat (id = %s)", 
self.responseId)
 
 response = self.sendRequest(self.heartbeatUrl, data)
+
 exitStatus = 0
 if 'exitstatus' in response.keys():
   exitStatus = int(response['exitstatus'])
@@ -366,7 +367,9 @@ class Controller(threading.Thread):
   self.restartAgent()
 
 if serverId != self.responseId + 1:
-  logger.error("Error in responseId sequence - restarting")
+  logger.error("Error in responseId sequence - received responseId={0} 
from server while expecting {1} - restarting..."
+  .format(serverId, self.responseId + 1))
+
   self.restartAgent()
 else:
   self.responseId = serverId
@@ -465,6 +468,7 @@ class Controller(threading.Thread):
 
 #randomize the heartbeat
 delay = randint(0, self.max_reconnect_retry_delay)
+logger.info("Waiting {0} seconds before reconnecting to 
{1}".format(delay, self.heartbeatUrl))
 time.sleep(delay)
 
   # Sleep for some time

http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
--
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index d800bc5..fc6e7a7 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -161,10 +161,20 @@ public class HeartBeatHandler {
 + ", receivedResponseId=" + heartbeat.getResponseId());
 
 if (heartbeat.getResponseId() == currentResponseId - 1) {
-  LOG.warn("Old responseId received - response was lost - returning cached 
response");
-  return hostResponses.get(hostname);
+  HeartBeatResponse heartBeatResponse = hostResponses.get(hostname);
+
+  LOG.warn("Old responseId={} received form host {} - response was lost - 
returning cached response with responseId={}",
+heartbeat.getResponseId(),
+hostname,
+heartBeatResponse.getResponseId());
+
+  return heartBeatResponse;
 } else if (heartbeat.getResponseId() != currentResponseId) {
-  LOG.error("Error in responseId sequence - sending agent restart 
command");
+  LOG.error("Error in responseId sequence - received responseId={} from 
host {} - sending agent restart command with responseId={}",
+heartbeat.getResponseId(),
+hostname,
+currentResponseId);
+
   return createRestartCommand(currentResponseId);
 }
 
@@ -186,7 +196,7 @@ public class HeartBeatHandler {
 
 if (hostObject.getState().equals(HostState.HEARTBEAT_LOST)) {
   // After loosing heartbeat agent should reregister
-  LOG.warn("Host is in HEARTBEAT_LOST state - sending register command");
+  LOG.warn("Host {} is in HEARTBEAT_LOST state - sending register 
command", hostname);
   return createRegisterCommand();
 }
 



[12/17] ambari git commit: AMBARI-21142. Log more info about heartbeat message/response when server - agent communication gets out of sync. (stoader)

2017-06-05 Thread jonathanhurley
AMBARI-21142. Log more info about heartbeat message/response when server - 
agent communication gets out of sync. (stoader)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b7101f78
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b7101f78
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b7101f78

Branch: refs/heads/branch-feature-AMBARI-12556
Commit: b7101f782be9a1291de589262f01083c70dfc935
Parents: c3c06ea
Author: Toader, Sebastian 
Authored: Fri Jun 2 23:09:56 2017 +0200
Committer: Toader, Sebastian 
Committed: Fri Jun 2 23:12:46 2017 +0200

--
 .../src/main/python/ambari_agent/Controller.py|  6 +-
 .../ambari/server/agent/HeartBeatHandler.java | 18 ++
 2 files changed, 19 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-agent/src/main/python/ambari_agent/Controller.py
--
diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py 
b/ambari-agent/src/main/python/ambari_agent/Controller.py
index 0297f74..bc923c3 100644
--- a/ambari-agent/src/main/python/ambari_agent/Controller.py
+++ b/ambari-agent/src/main/python/ambari_agent/Controller.py
@@ -321,6 +321,7 @@ class Controller(threading.Thread):
   logger.log(logging_level, "Sending Heartbeat (id = %s)", 
self.responseId)
 
 response = self.sendRequest(self.heartbeatUrl, data)
+
 exitStatus = 0
 if 'exitstatus' in response.keys():
   exitStatus = int(response['exitstatus'])
@@ -366,7 +367,9 @@ class Controller(threading.Thread):
   self.restartAgent()
 
 if serverId != self.responseId + 1:
-  logger.error("Error in responseId sequence - restarting")
+  logger.error("Error in responseId sequence - received responseId={0} 
from server while expecting {1} - restarting..."
+  .format(serverId, self.responseId + 1))
+
   self.restartAgent()
 else:
   self.responseId = serverId
@@ -465,6 +468,7 @@ class Controller(threading.Thread):
 
 #randomize the heartbeat
 delay = randint(0, self.max_reconnect_retry_delay)
+logger.info("Waiting {0} seconds before reconnecting to 
{1}".format(delay, self.heartbeatUrl))
 time.sleep(delay)
 
   # Sleep for some time

http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
--
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index d800bc5..fc6e7a7 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -161,10 +161,20 @@ public class HeartBeatHandler {
 + ", receivedResponseId=" + heartbeat.getResponseId());
 
 if (heartbeat.getResponseId() == currentResponseId - 1) {
-  LOG.warn("Old responseId received - response was lost - returning cached 
response");
-  return hostResponses.get(hostname);
+  HeartBeatResponse heartBeatResponse = hostResponses.get(hostname);
+
+  LOG.warn("Old responseId={} received form host {} - response was lost - 
returning cached response with responseId={}",
+heartbeat.getResponseId(),
+hostname,
+heartBeatResponse.getResponseId());
+
+  return heartBeatResponse;
 } else if (heartbeat.getResponseId() != currentResponseId) {
-  LOG.error("Error in responseId sequence - sending agent restart 
command");
+  LOG.error("Error in responseId sequence - received responseId={} from 
host {} - sending agent restart command with responseId={}",
+heartbeat.getResponseId(),
+hostname,
+currentResponseId);
+
   return createRestartCommand(currentResponseId);
 }
 
@@ -186,7 +196,7 @@ public class HeartBeatHandler {
 
 if (hostObject.getState().equals(HostState.HEARTBEAT_LOST)) {
   // After loosing heartbeat agent should reregister
-  LOG.warn("Host is in HEARTBEAT_LOST state - sending register command");
+  LOG.warn("Host {} is in HEARTBEAT_LOST state - sending register 
command", hostname);
   return createRegisterCommand();
 }
 



ambari git commit: AMBARI-21142. Log more info about heartbeat message/response when server - agent communication gets out of sync. (stoader)

2017-06-02 Thread stoader
Repository: ambari
Updated Branches:
  refs/heads/trunk c3c06ea98 -> b7101f782


AMBARI-21142. Log more info about heartbeat message/response when server - 
agent communication gets out of sync. (stoader)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b7101f78
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b7101f78
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b7101f78

Branch: refs/heads/trunk
Commit: b7101f782be9a1291de589262f01083c70dfc935
Parents: c3c06ea
Author: Toader, Sebastian 
Authored: Fri Jun 2 23:09:56 2017 +0200
Committer: Toader, Sebastian 
Committed: Fri Jun 2 23:12:46 2017 +0200

--
 .../src/main/python/ambari_agent/Controller.py|  6 +-
 .../ambari/server/agent/HeartBeatHandler.java | 18 ++
 2 files changed, 19 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-agent/src/main/python/ambari_agent/Controller.py
--
diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py 
b/ambari-agent/src/main/python/ambari_agent/Controller.py
index 0297f74..bc923c3 100644
--- a/ambari-agent/src/main/python/ambari_agent/Controller.py
+++ b/ambari-agent/src/main/python/ambari_agent/Controller.py
@@ -321,6 +321,7 @@ class Controller(threading.Thread):
   logger.log(logging_level, "Sending Heartbeat (id = %s)", 
self.responseId)
 
 response = self.sendRequest(self.heartbeatUrl, data)
+
 exitStatus = 0
 if 'exitstatus' in response.keys():
   exitStatus = int(response['exitstatus'])
@@ -366,7 +367,9 @@ class Controller(threading.Thread):
   self.restartAgent()
 
 if serverId != self.responseId + 1:
-  logger.error("Error in responseId sequence - restarting")
+  logger.error("Error in responseId sequence - received responseId={0} 
from server while expecting {1} - restarting..."
+  .format(serverId, self.responseId + 1))
+
   self.restartAgent()
 else:
   self.responseId = serverId
@@ -465,6 +468,7 @@ class Controller(threading.Thread):
 
 #randomize the heartbeat
 delay = randint(0, self.max_reconnect_retry_delay)
+logger.info("Waiting {0} seconds before reconnecting to 
{1}".format(delay, self.heartbeatUrl))
 time.sleep(delay)
 
   # Sleep for some time

http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
--
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index d800bc5..fc6e7a7 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -161,10 +161,20 @@ public class HeartBeatHandler {
 + ", receivedResponseId=" + heartbeat.getResponseId());
 
 if (heartbeat.getResponseId() == currentResponseId - 1) {
-  LOG.warn("Old responseId received - response was lost - returning cached 
response");
-  return hostResponses.get(hostname);
+  HeartBeatResponse heartBeatResponse = hostResponses.get(hostname);
+
+  LOG.warn("Old responseId={} received form host {} - response was lost - 
returning cached response with responseId={}",
+heartbeat.getResponseId(),
+hostname,
+heartBeatResponse.getResponseId());
+
+  return heartBeatResponse;
 } else if (heartbeat.getResponseId() != currentResponseId) {
-  LOG.error("Error in responseId sequence - sending agent restart 
command");
+  LOG.error("Error in responseId sequence - received responseId={} from 
host {} - sending agent restart command with responseId={}",
+heartbeat.getResponseId(),
+hostname,
+currentResponseId);
+
   return createRestartCommand(currentResponseId);
 }
 
@@ -186,7 +196,7 @@ public class HeartBeatHandler {
 
 if (hostObject.getState().equals(HostState.HEARTBEAT_LOST)) {
   // After loosing heartbeat agent should reregister
-  LOG.warn("Host is in HEARTBEAT_LOST state - sending register command");
+  LOG.warn("Host {} is in HEARTBEAT_LOST state - sending register 
command", hostname);
   return createRegisterCommand();
 }