AMBARI-18825. Make agent to server connect delay configurable (magyari_sandor)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/fcd0d2ad Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/fcd0d2ad Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/fcd0d2ad Branch: refs/heads/branch-dev-patch-upgrade Commit: fcd0d2adbfdce42fe63c4a1ad648595fd2f90bdb Parents: 3809ebc Author: Sandor Magyari <[email protected]> Authored: Thu Nov 10 12:28:12 2016 +0100 Committer: Sandor Magyari <[email protected]> Committed: Tue Nov 15 13:48:56 2016 +0100 ---------------------------------------------------------------------- ambari-agent/conf/unix/ambari-agent.ini | 3 +++ ambari-agent/conf/windows/ambari-agent.ini | 2 ++ ambari-agent/src/main/python/ambari_agent/Controller.py | 8 ++++---- ambari-agent/src/main/python/ambari_agent/NetUtil.py | 10 ++++++---- 4 files changed, 15 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/fcd0d2ad/ambari-agent/conf/unix/ambari-agent.ini ---------------------------------------------------------------------- diff --git a/ambari-agent/conf/unix/ambari-agent.ini b/ambari-agent/conf/unix/ambari-agent.ini index b0b64c9..61948d4 100644 --- a/ambari-agent/conf/unix/ambari-agent.ini +++ b/ambari-agent/conf/unix/ambari-agent.ini @@ -16,6 +16,9 @@ hostname=localhost url_port=8440 secured_url_port=8441 +connect_retry_delay=10 +max_reconnect_retry_delay=30 + [agent] logdir=/var/log/ambari-agent http://git-wip-us.apache.org/repos/asf/ambari/blob/fcd0d2ad/ambari-agent/conf/windows/ambari-agent.ini ---------------------------------------------------------------------- diff --git a/ambari-agent/conf/windows/ambari-agent.ini b/ambari-agent/conf/windows/ambari-agent.ini index 3612414..42d65c1 100644 --- a/ambari-agent/conf/windows/ambari-agent.ini +++ b/ambari-agent/conf/windows/ambari-agent.ini @@ -16,6 +16,8 @@ hostname=localhost url_port=8440 secured_url_port=8441 +connect_retry_delay=10 +max_reconnect_retry_delay=30 [agent] prefix=data http://git-wip-us.apache.org/repos/asf/ambari/blob/fcd0d2ad/ambari-agent/src/main/python/ambari_agent/Controller.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py b/ambari-agent/src/main/python/ambari_agent/Controller.py index cece30c..234795b 100644 --- a/ambari-agent/src/main/python/ambari_agent/Controller.py +++ b/ambari-agent/src/main/python/ambari_agent/Controller.py @@ -59,7 +59,7 @@ AGENT_RAM_OVERUSE_MESSAGE = "Ambari-agent RAM usage {used_ram} MB went above {co class Controller(threading.Thread): - def __init__(self, config, server_hostname, heartbeat_stop_callback = None, range=30): + def __init__(self, config, server_hostname, heartbeat_stop_callback = None): threading.Thread.__init__(self) logger.debug('Initializing Controller RPC thread.') if heartbeat_stop_callback is None: @@ -82,7 +82,7 @@ class Controller(threading.Thread): self.repeatRegistration = False self.isRegistered = False self.cachedconnect = None - self.range = range + self.max_reconnect_retry_delay = int(config.get('server','max_reconnect_retry_delay', default=30)) self.hasMappedComponents = True self.statusCommandsExecutor = None # Event is used for synchronizing heartbeat iterations (to make possible @@ -210,7 +210,7 @@ class Controller(threading.Thread): return except Exception, ex: # try a reconnect only after a certain amount of random time - delay = randint(0, self.range) + delay = randint(0, self.max_reconnect_retry_delay) logger.error("Unable to connect to: " + self.registerUrl, exc_info=True) logger.error("Error:" + str(ex)) logger.warn(""" Sleeping for {0} seconds and then trying again """.format(delay,)) @@ -432,7 +432,7 @@ class Controller(threading.Thread): retry = True #randomize the heartbeat - delay = randint(0, self.range) + delay = randint(0, self.max_reconnect_retry_delay) time.sleep(delay) # Sleep for some time http://git-wip-us.apache.org/repos/asf/ambari/blob/fcd0d2ad/ambari-agent/src/main/python/ambari_agent/NetUtil.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/NetUtil.py b/ambari-agent/src/main/python/ambari_agent/NetUtil.py index 2e9381b..c3cd621 100644 --- a/ambari-agent/src/main/python/ambari_agent/NetUtil.py +++ b/ambari-agent/src/main/python/ambari_agent/NetUtil.py @@ -30,7 +30,7 @@ logger = logging.getLogger(__name__) class NetUtil: - CONNECT_SERVER_RETRY_INTERVAL_SEC = 10 + DEFAULT_CONNECT_RETRY_DELAY_SEC = 10 HEARTBEAT_IDLE_INTERVAL_DEFAULT_MIN_SEC = 1 HEARTBEAT_IDLE_INTERVAL_DEFAULT_MAX_SEC = 10 MINIMUM_INTERVAL_BETWEEN_HEARTBEATS = 0.1 @@ -52,6 +52,8 @@ class NetUtil: stop_callback = HeartbeatStopHandlers() self.stopCallback = stop_callback self.config = config + self.connect_retry_delay = int(config.get('server','connect_retry_delay', + default=self.DEFAULT_CONNECT_RETRY_DELAY_SEC)) def checkURL(self, url): """Try to connect to a given url. Result is True if url returns HTTP code 200, in any other case @@ -94,7 +96,7 @@ class NetUtil: return False, responseBody def try_to_connect(self, server_url, max_retries, logger=None): - """Try to connect to a given url, sleeping for CONNECT_SERVER_RETRY_INTERVAL_SEC seconds + """Try to connect to a given url, sleeping for connect_retry_delay seconds between retries. No more than max_retries is performed. If max_retries is -1, connection attempts will be repeated forever until server is not reachable @@ -113,10 +115,10 @@ class NetUtil: else: if logger is not None: logger.warn('Server at {0} is not reachable, sleeping for {1} seconds...'.format(server_url, - self.CONNECT_SERVER_RETRY_INTERVAL_SEC)) + self.connect_retry_delay)) retries += 1 - if 0 == self.stopCallback.wait(self.CONNECT_SERVER_RETRY_INTERVAL_SEC): + if 0 == self.stopCallback.wait(self.connect_retry_delay): #stop waiting if logger is not None: logger.info("Stop event received")
