Signed-off-by: Tom Limoncelli <[email protected]>
---
daemons/ganeti-watcher | 25 ++++++++++++++++++++++++-
lib/utils.py | 13 +++++++++++++
2 files changed, 37 insertions(+), 1 deletions(-)
diff --git a/daemons/ganeti-watcher b/daemons/ganeti-watcher
index 1f82db8..82bd24b 100755
--- a/daemons/ganeti-watcher
+++ b/daemons/ganeti-watcher
@@ -48,6 +48,7 @@ from ganeti import ssconf
from ganeti import bdev
from ganeti import hypervisor
from ganeti.confd import client as confd_client
+from ganeti.rapi import client as rapi_client
MAXTRIES = 5
@@ -666,7 +667,29 @@ def main():
client = cli.GetClient()
# we are on master now
- utils.EnsureDaemon(constants.RAPI)
+
+ # Restart RAPI if it isn't responding to queries.
+ # Only kill/restart RAPI once. Otherwise just give up.
+ rapi_restarted = False
+ while True:
+ utils.EnsureDaemon(constants.RAPI)
+ logging.debug("Attempting to talk with RAPI")
+ master_rapi = rapi_client.GanetiRapiClient("localhost",
+ ssl_cert_file=constants.RAPI_CERT_FILE)
+ try:
+ master_version = master_rapi.GetVersion()
+ except:
+ logging.error("Could not open connection to RAPI")
+ if rapi_restarted:
+ break
+ else:
+ logging.debug("RAPI is running but did not speak. Killing RAPI")
+ utils.StopDaemon(constants.RAPI)
+ continue
+ if master_version == 2:
+ break
+ else:
+ logging.fatal("RAPI version said %s, expecting 2" % master_version)
try:
watcher = Watcher(options, notepad)
diff --git a/lib/utils.py b/lib/utils.py
index 2b3e785..8594779 100644
--- a/lib/utils.py
+++ b/lib/utils.py
@@ -2148,6 +2148,19 @@ def EnsureDaemon(name):
return True
+def StopDaemon(name):
+ """Stop a daemon.
+
+ """
+ result = RunCmd([constants.DAEMON_UTIL, "stop", name])
+ if result.failed:
+ logging.error("Can't stop daemon '%s', failure %s, output: %s",
+ name, result.fail_reason, result.output)
+ return False
+
+ return True
+
+
def WritePidFile(name):
"""Write the current process pidfile.
--
1.7.0.1