Jiří Moskovčák has uploaded a new change for review. Change subject: don't die when broker disconnects ......................................................................
don't die when broker disconnects Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1093646 Signed-off-by: Jiri Moskovcak <[email protected]> --- M ovirt_hosted_engine_ha/agent/agent.py M ovirt_hosted_engine_ha/agent/constants.py.in M ovirt_hosted_engine_ha/agent/hosted_engine.py M ovirt_hosted_engine_ha/lib/brokerlink.py 4 files changed, 30 insertions(+), 3 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/14/30114/1 diff --git a/ovirt_hosted_engine_ha/agent/agent.py b/ovirt_hosted_engine_ha/agent/agent.py index 0693814..142fe0f 100644 --- a/ovirt_hosted_engine_ha/agent/agent.py +++ b/ovirt_hosted_engine_ha/agent/agent.py @@ -29,8 +29,10 @@ import pwd import signal import sys +import time from ..lib import util +from ..lib import exceptions as ex from . import constants from . import hosted_engine @@ -151,4 +153,26 @@ def _run_agent(self): # Only one service type for now, run it in the main thread - hosted_engine.HostedEngine(self.shutdown_requested).start_monitoring() + + for attempt in range(constants.AGENT_START_RETRIES): + try: + hosted_engine.HostedEngine(self.shutdown_requested)\ + .start_monitoring() + # if we're here, the agent stopped gracefully, + # so we don't want to restart it + break + except ex.DisconnectionError as e: + self._log.error("Disconnected from broker '{0}'" + " - reinitializing".format(str(e))) + except ex.BrokerInitializationError as e: + self._log.error("Can't initialize brokerlink '{0}'" + " - reinitializing".format(str(e))) + except Exception as e: + self._log.error("") + + attempt += 1 + time.sleep(constants.AGENT_START_RETRY_WAIT) + self._log.warn("Restarting agent, attempt '{0}'".format(attempt)) + else: + self._log.error("Too many errors occurred, giving up. " + "Please review the log and consider filing a bug.") diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in b/ovirt_hosted_engine_ha/agent/constants.py.in index 083e046..20902f2 100644 --- a/ovirt_hosted_engine_ha/agent/constants.py.in +++ b/ovirt_hosted_engine_ha/agent/constants.py.in @@ -56,6 +56,8 @@ MAX_DOMAIN_MONITOR_WAIT_SECS = 240 METADATA_LOG_PERIOD_SECS = 600 ENGINE_STARTING_TIMEOUT = 600 +AGENT_START_RETRIES = 10 +AGENT_START_RETRY_WAIT = 5 BASE_SCORE = 2400 GATEWAY_SCORE_PENALTY = 1600 diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py b/ovirt_hosted_engine_ha/agent/hosted_engine.py index f5e5d4b..5dcf0cb 100644 --- a/ovirt_hosted_engine_ha/agent/hosted_engine.py +++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py @@ -368,7 +368,8 @@ if not self._broker: self._broker = brokerlink.BrokerLink() try: - self._broker.connect(constants.BROKER_CONNECTION_RETRIES) + self._broker.connect(constants.BROKER_CONNECTION_RETRIES, + constants.BROKER_CONNECTION_WAIT) except Exception as e: self._log.error("Failed to connect to ha-broker: %s", str(e)) raise diff --git a/ovirt_hosted_engine_ha/lib/brokerlink.py b/ovirt_hosted_engine_ha/lib/brokerlink.py index b46f9cd..2324ecd 100644 --- a/ovirt_hosted_engine_ha/lib/brokerlink.py +++ b/ovirt_hosted_engine_ha/lib/brokerlink.py @@ -39,7 +39,7 @@ self._log = logging.getLogger("%s.BrokerLink" % __name__) self._socket = None - def connect(self, retries=0, wait=1): + def connect(self, retries=0, wait=5): """ Connect to the HA Broker. Upon failure, reconnection attempts will be made approximately once per second until the specified number of -- To view, visit http://gerrit.ovirt.org/30114 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ibd8627346c03894c1654af5d41c3caaf9f3a5ffa Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: master Gerrit-Owner: Jiří Moskovčák <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
