Filippo Giunchedi has uploaded a new change for review. https://gerrit.wikimedia.org/r/157805
Change subject: elasticsearch: handle request timeout and increase timeout ...................................................................... elasticsearch: handle request timeout and increase timeout apparently requests doesn't do explicit max_retries on purpose, see https://github.com/kennethreitz/requests/pull/1219 and https://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request also increase timeout to something higher than a multiple of 3 as suggested here: http://docs.python-requests.org/en/latest/user/advanced/#timeouts Change-Id: I57f25489a46f46832b09a6e7dab6b1e72e82a87d --- M modules/elasticsearch/files/nagios/check_elasticsearch.py 1 file changed, 22 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/05/157805/1 diff --git a/modules/elasticsearch/files/nagios/check_elasticsearch.py b/modules/elasticsearch/files/nagios/check_elasticsearch.py index f84e985..eb5b260 100644 --- a/modules/elasticsearch/files/nagios/check_elasticsearch.py +++ b/modules/elasticsearch/files/nagios/check_elasticsearch.py @@ -106,12 +106,28 @@ return EX_CRITICAL +def fetch_url(url, timeout, retries): + exception = None + + for i in range(retries): + try: + cluster_health_url = options.url + '/_cluster/health' + response = requests.get(url, timeout=timeout) + response.raise_for_status() + return response + except requests.exceptions.Timeout, e: + exception = e + continue + else: + if exception: + raise exception + + def check_elasticsearch(options): try: cluster_health_url = options.url + '/_cluster/health' - response = requests.get(cluster_health_url, - timeout=options.timeout) - response.raise_for_status() + response = fetch_url(cluster_health_url, options.timeout, + options.retries) except requests.exceptions.RequestException, e: log_critical('%s error while fetching: %s' % (cluster_health_url, e)) return EX_CRITICAL @@ -141,8 +157,10 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--url', default='http://localhost:9200', help='Elasticsearch endpoint') - parser.add_argument('--timeout', default=2, type=int, metavar='SECONDS', + parser.add_argument('--timeout', default=4, type=int, metavar='SECONDS', help='Timeout for the request to complete') + parser.add_argument('--retries', default=2, type=int, metavar='INTEGER', + help='How many times to retry a request on timeout') parser.add_argument('--shards-inactive', default='>=0.1%', dest='shards_inactive', metavar='THRESHOLD', help='Threshold to check for inactive shards ' -- To view, visit https://gerrit.wikimedia.org/r/157805 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I57f25489a46f46832b09a6e7dab6b1e72e82a87d Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Filippo Giunchedi <fgiunch...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits