Filippo Giunchedi has submitted this change and it was merged.

Change subject: Add es-tool upgrade-fast and stopping paranoia
......................................................................


Add es-tool upgrade-fast and stopping paranoia

1. Add a new command `es-tool upgrade-fast` which will install the newest
elasticsearch from apt.
2. Be much more paranoid during restart-fast (and upgrade-fast) and make
really really sure that elasticsearch has stopped before moving on.

We achieve this paranoia by running ```ps aux``` and looking for processes
that you the Elasticsearch jar. This isn't perfect but its unlikely to give
many false positives or false negatives.

Change-Id: Ic5022130f01b4522bdfa8313b48c68a2cb1e827c
---
M modules/elasticsearch/files/es-tool
1 file changed, 98 insertions(+), 6 deletions(-)

Approvals:
  Filippo Giunchedi: Verified; Looks good to me, approved



diff --git a/modules/elasticsearch/files/es-tool 
b/modules/elasticsearch/files/es-tool
index 23c3d8a..256473f 100755
--- a/modules/elasticsearch/files/es-tool
+++ b/modules/elasticsearch/files/es-tool
@@ -3,6 +3,8 @@
 import argparse
 import ipaddr
 import os
+import logging
+import re
 import subprocess
 import sys
 import time
@@ -14,6 +16,16 @@
 
 # How many times to try re-enabling allocation
 REPLICATION_ENABLE_ATTEMPTS = 10
+
+
+# We pipe things here....
+DEV_NULL = open(os.devnull, 'w')
+
+# Lets use a basic logging configuration so the Elasticsearch client doesn't
+# complain. We go with ERROR here so curl doesn't log warnings when it can't
+# connect to Elasticsearch. We alreaady catch the exceptions for that and
+# handle them.
+logging.basicConfig(level=logging.ERROR)
 
 
 # Helper functions go here
@@ -121,7 +133,7 @@
     sys.stdout.flush()
 
 
-def es_restart_fast():
+def es_restart_fast(while_down):
     # Sanity checks
     if os.getuid() != 0:
         print "Must be run as root"
@@ -137,13 +149,60 @@
         return os.EX_UNAVAILABLE
     printu("ok\n")
 
-    # Actually restart the service
+    printu("Stopping elasticsearch...")
     try:
-        subprocess.check_call(["service", "elasticsearch", "restart"])
+        process_args = ["service", "elasticsearch", "stop"]
+        subprocess.check_call(process_args, stdout=DEV_NULL)
     except CalledProcessError:
-        print "failed! -- You will still need to enable replication again",
-        print "with `es-tool start-replication`"
+        print "failed! Elasticserch is probably not stopped but you will ",
+        print "need to enable replication again with",
+        print "`es-tool start-replication`"
         return os.EX_UNAVAILABLE
+    printu("ok\n")
+
+    printu("Double checking elasticsearch is stopped...")
+    end = time.time()
+    contains_re = re.compile("java.*elasticsearch-\\d+\\.\\d+\\.\\d\\.jar")
+    while True:
+        try:
+            ps = subprocess.Popen(["ps", "auxww"], stdout=subprocess.PIPE)
+            ps_out, _ = ps.communicate()
+            if contains_re.search(ps_out):
+                if time.time() > end + 240:
+                    print "betrayal! Elasticserch never stopped! You will",
+                    print "need to enable replication again with",
+                    print "`es-tool start-replication`"
+                    return os.EX_UNAVAILABLE
+                else:
+                    printu(".")
+                    time.sleep(1)
+                continue
+            break
+        except CalledProcessError:
+            print "failed to complete the check! Elasticsearch might be",
+            print "stopped or stopping so so you",
+            print "will have to start it again with `sudo service",
+            print "elasticsearch start and then reenable replication",
+            print "with `es-tool start-replication`"
+
+            return os.EX_UNAVAILABLE
+    printu("ok\n")
+
+    error = while_down()
+    if error:
+        return error
+
+    printu("Starting elasticsearch...")
+    try:
+        process_args = ["service", "elasticsearch", "start"]
+        subprocess.check_call(process_args, stdout=DEV_NULL)
+    except CalledProcessError:
+            print "failed! Elasticsearch is probably still stopped so you",
+            print "will have to start it again with `sudo service",
+            print "elasticsearch start and then reenable replication",
+            print "with `es-tool start-replication`"
+        return os.EX_UNAVAILABLE
+    printu("ok\n")
 
     # Wait for it to come back alive
     printu("Waiting for Elasticsearch...")
@@ -194,6 +253,38 @@
     return os.EX_OK
 
 
+def es_upgrade_fast():
+    def upgrade_commands():
+        printu("Updating apt...")
+        try:
+            subprocess.check_call(["apt-get", "update"], stdout=DEV_NULL)
+        except CalledProcessError:
+            print "failed! Elasticsearch is still stopped so you",
+            print "will have to start it again with `sudo service",
+            print "elasticsearch start and then reenable replication",
+            print "with `es-tool start-replication`"
+            return os.EX_UNAVAILABLE
+        printu("ok\n")
+
+        printu("Installing Elasticsearch...")
+        try:
+            process_args = [
+                "apt-get",
+                "-o", 'Dpkg::Options::="--force-confdef"',
+                "-o", 'Dpkg::Options::="--force-confold"',
+                "install", "elasticsearch"]
+            subprocess.check_call(process_args, stdout=DEV_NULL)
+        except CalledProcessError:
+            print "failed! Elasticsearch is still stopped so you",
+            print "will have to start it again with `sudo service",
+            print "elasticsearch start and then reenable replication",
+            print "with `es-tool start-replication`"
+            return os.EX_UNAVAILABLE
+        printu("ok\n")
+
+    es_restart_fast(upgrade_commands)
+
+
 def es_start_replication():
     if set_allocation_state("all"):
         print "All replication enabled"
@@ -236,7 +327,8 @@
 commands = {
     "ban-node": es_ban_node,
     "health": es_health,
-    "restart-fast": es_restart_fast,
+    "restart-fast": (lambda: es_restart_fast(lambda: 0)),
+    "upgrade-fast": es_upgrade_fast,
     "start-replication": es_start_replication,
     "stop-replication": es_stop_replication,
     "unban-node": es_unban_node,

-- 
To view, visit https://gerrit.wikimedia.org/r/224548
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic5022130f01b4522bdfa8313b48c68a2cb1e827c
Gerrit-PatchSet: 6
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: Chad <ch...@wikimedia.org>
Gerrit-Reviewer: DCausse <dcau...@wikimedia.org>
Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org>
Gerrit-Reviewer: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to