Filippo Giunchedi has submitted this change and it was merged. Change subject: Add es-tool upgrade-fast and stopping paranoia ......................................................................
Add es-tool upgrade-fast and stopping paranoia 1. Add a new command `es-tool upgrade-fast` which will install the newest elasticsearch from apt. 2. Be much more paranoid during restart-fast (and upgrade-fast) and make really really sure that elasticsearch has stopped before moving on. We achieve this paranoia by running ```ps aux``` and looking for processes that you the Elasticsearch jar. This isn't perfect but its unlikely to give many false positives or false negatives. Change-Id: Ic5022130f01b4522bdfa8313b48c68a2cb1e827c --- M modules/elasticsearch/files/es-tool 1 file changed, 98 insertions(+), 6 deletions(-) Approvals: Filippo Giunchedi: Verified; Looks good to me, approved diff --git a/modules/elasticsearch/files/es-tool b/modules/elasticsearch/files/es-tool index 23c3d8a..256473f 100755 --- a/modules/elasticsearch/files/es-tool +++ b/modules/elasticsearch/files/es-tool @@ -3,6 +3,8 @@ import argparse import ipaddr import os +import logging +import re import subprocess import sys import time @@ -14,6 +16,16 @@ # How many times to try re-enabling allocation REPLICATION_ENABLE_ATTEMPTS = 10 + + +# We pipe things here.... +DEV_NULL = open(os.devnull, 'w') + +# Lets use a basic logging configuration so the Elasticsearch client doesn't +# complain. We go with ERROR here so curl doesn't log warnings when it can't +# connect to Elasticsearch. We alreaady catch the exceptions for that and +# handle them. +logging.basicConfig(level=logging.ERROR) # Helper functions go here @@ -121,7 +133,7 @@ sys.stdout.flush() -def es_restart_fast(): +def es_restart_fast(while_down): # Sanity checks if os.getuid() != 0: print "Must be run as root" @@ -137,13 +149,60 @@ return os.EX_UNAVAILABLE printu("ok\n") - # Actually restart the service + printu("Stopping elasticsearch...") try: - subprocess.check_call(["service", "elasticsearch", "restart"]) + process_args = ["service", "elasticsearch", "stop"] + subprocess.check_call(process_args, stdout=DEV_NULL) except CalledProcessError: - print "failed! -- You will still need to enable replication again", - print "with `es-tool start-replication`" + print "failed! Elasticserch is probably not stopped but you will ", + print "need to enable replication again with", + print "`es-tool start-replication`" return os.EX_UNAVAILABLE + printu("ok\n") + + printu("Double checking elasticsearch is stopped...") + end = time.time() + contains_re = re.compile("java.*elasticsearch-\\d+\\.\\d+\\.\\d\\.jar") + while True: + try: + ps = subprocess.Popen(["ps", "auxww"], stdout=subprocess.PIPE) + ps_out, _ = ps.communicate() + if contains_re.search(ps_out): + if time.time() > end + 240: + print "betrayal! Elasticserch never stopped! You will", + print "need to enable replication again with", + print "`es-tool start-replication`" + return os.EX_UNAVAILABLE + else: + printu(".") + time.sleep(1) + continue + break + except CalledProcessError: + print "failed to complete the check! Elasticsearch might be", + print "stopped or stopping so so you", + print "will have to start it again with `sudo service", + print "elasticsearch start and then reenable replication", + print "with `es-tool start-replication`" + + return os.EX_UNAVAILABLE + printu("ok\n") + + error = while_down() + if error: + return error + + printu("Starting elasticsearch...") + try: + process_args = ["service", "elasticsearch", "start"] + subprocess.check_call(process_args, stdout=DEV_NULL) + except CalledProcessError: + print "failed! Elasticsearch is probably still stopped so you", + print "will have to start it again with `sudo service", + print "elasticsearch start and then reenable replication", + print "with `es-tool start-replication`" + return os.EX_UNAVAILABLE + printu("ok\n") # Wait for it to come back alive printu("Waiting for Elasticsearch...") @@ -194,6 +253,38 @@ return os.EX_OK +def es_upgrade_fast(): + def upgrade_commands(): + printu("Updating apt...") + try: + subprocess.check_call(["apt-get", "update"], stdout=DEV_NULL) + except CalledProcessError: + print "failed! Elasticsearch is still stopped so you", + print "will have to start it again with `sudo service", + print "elasticsearch start and then reenable replication", + print "with `es-tool start-replication`" + return os.EX_UNAVAILABLE + printu("ok\n") + + printu("Installing Elasticsearch...") + try: + process_args = [ + "apt-get", + "-o", 'Dpkg::Options::="--force-confdef"', + "-o", 'Dpkg::Options::="--force-confold"', + "install", "elasticsearch"] + subprocess.check_call(process_args, stdout=DEV_NULL) + except CalledProcessError: + print "failed! Elasticsearch is still stopped so you", + print "will have to start it again with `sudo service", + print "elasticsearch start and then reenable replication", + print "with `es-tool start-replication`" + return os.EX_UNAVAILABLE + printu("ok\n") + + es_restart_fast(upgrade_commands) + + def es_start_replication(): if set_allocation_state("all"): print "All replication enabled" @@ -236,7 +327,8 @@ commands = { "ban-node": es_ban_node, "health": es_health, - "restart-fast": es_restart_fast, + "restart-fast": (lambda: es_restart_fast(lambda: 0)), + "upgrade-fast": es_upgrade_fast, "start-replication": es_start_replication, "stop-replication": es_stop_replication, "unban-node": es_unban_node, -- To view, visit https://gerrit.wikimedia.org/r/224548 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ic5022130f01b4522bdfa8313b48c68a2cb1e827c Gerrit-PatchSet: 6 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: DCausse <dcau...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org> Gerrit-Reviewer: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits