Dzahn has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/392763 )
Change subject: Revert "Revert "apache: remove ganglia monitoring"" ...................................................................... Revert "Revert "apache: remove ganglia monitoring"" This reverts commit 63686242937af81b9c67124998c032dfec5842ca. Change-Id: I3001acbde8b318ffc75944af7a48381d52d6d64a --- M hieradata/role/common/mediawiki/appserver.yaml M hieradata/role/common/mediawiki/appserver/api.yaml M hieradata/role/common/mediawiki/appserver/canary_api.yaml M hieradata/role/common/mediawiki/canary_appserver.yaml M hieradata/role/common/mediawiki/imagescaler.yaml M hieradata/role/common/mediawiki/jobrunner.yaml M hieradata/role/common/mediawiki/memcached.yaml M hieradata/role/common/mediawiki/videoscaler.yaml D modules/apache/files/apache_status.py D modules/apache/files/apache_status.pyconf M modules/apache/manifests/monitoring.pp 11 files changed, 8 insertions(+), 573 deletions(-) Approvals: jenkins-bot: Verified Dzahn: Looks good to me, approved diff --git a/hieradata/role/common/mediawiki/appserver.yaml b/hieradata/role/common/mediawiki/appserver.yaml index 28e1f2f..7d14664 100644 --- a/hieradata/role/common/mediawiki/appserver.yaml +++ b/hieradata/role/common/mediawiki/appserver.yaml @@ -18,3 +18,4 @@ apache::logrotate::rotate: 12 nutcracker::verbosity: "4" role::mediawiki::webserver::tls: true +standard::has_ganglia: false diff --git a/hieradata/role/common/mediawiki/appserver/api.yaml b/hieradata/role/common/mediawiki/appserver/api.yaml index 1195204..7f01396 100644 --- a/hieradata/role/common/mediawiki/appserver/api.yaml +++ b/hieradata/role/common/mediawiki/appserver/api.yaml @@ -18,3 +18,4 @@ apache::logrotate::rotate: 12 nutcracker::verbosity: "4" role::mediawiki::webserver::tls: true +standard::has_ganglia: false diff --git a/hieradata/role/common/mediawiki/appserver/canary_api.yaml b/hieradata/role/common/mediawiki/appserver/canary_api.yaml index 99cde6f..575a080 100644 --- a/hieradata/role/common/mediawiki/appserver/canary_api.yaml +++ b/hieradata/role/common/mediawiki/appserver/canary_api.yaml @@ -23,3 +23,4 @@ apache::logrotate::rotate: 12 nutcracker::verbosity: "4" role::mediawiki::webserver::tls: true +standard::has_ganglia: false diff --git a/hieradata/role/common/mediawiki/canary_appserver.yaml b/hieradata/role/common/mediawiki/canary_appserver.yaml index a33df65..10e7a9e 100644 --- a/hieradata/role/common/mediawiki/canary_appserver.yaml +++ b/hieradata/role/common/mediawiki/canary_appserver.yaml @@ -23,3 +23,4 @@ apache::logrotate::rotate: 12 nutcracker::verbosity: "4" role::mediawiki::webserver::tls: true +standard::has_ganglia: false diff --git a/hieradata/role/common/mediawiki/imagescaler.yaml b/hieradata/role/common/mediawiki/imagescaler.yaml index 201ab70..7a6a277 100644 --- a/hieradata/role/common/mediawiki/imagescaler.yaml +++ b/hieradata/role/common/mediawiki/imagescaler.yaml @@ -13,3 +13,4 @@ light_process_count: "10" apache::mpm::mpm: worker role::mediawiki::webserver::tls: true +standard::has_ganglia: false diff --git a/hieradata/role/common/mediawiki/jobrunner.yaml b/hieradata/role/common/mediawiki/jobrunner.yaml index c638464..5f35732 100644 --- a/hieradata/role/common/mediawiki/jobrunner.yaml +++ b/hieradata/role/common/mediawiki/jobrunner.yaml @@ -21,3 +21,4 @@ role::lvs::realserver::pools: hhvm: lvs_name: jobrunner +standard::has_ganglia: false diff --git a/hieradata/role/common/mediawiki/memcached.yaml b/hieradata/role/common/mediawiki/memcached.yaml index 70221d7..c695d12 100644 --- a/hieradata/role/common/mediawiki/memcached.yaml +++ b/hieradata/role/common/mediawiki/memcached.yaml @@ -22,3 +22,4 @@ profile::memcached::extended_options: - 'slab_reassign' profile::memcached::port: '11211' +standard::has_ganglia: false diff --git a/hieradata/role/common/mediawiki/videoscaler.yaml b/hieradata/role/common/mediawiki/videoscaler.yaml index 3a04f72..40eaeb1 100644 --- a/hieradata/role/common/mediawiki/videoscaler.yaml +++ b/hieradata/role/common/mediawiki/videoscaler.yaml @@ -13,3 +13,4 @@ thread_count: 15 max_execution_time: 86400 profile::mediawiki::jobrunner::load_factor: 0.7 +standard::has_ganglia: false diff --git a/modules/apache/files/apache_status.py b/modules/apache/files/apache_status.py deleted file mode 100755 index 15c0f2b..0000000 --- a/modules/apache/files/apache_status.py +++ /dev/null @@ -1,439 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import time -import urllib2 -import traceback -import re -import copy - -# global to store state for "total accesses" -METRICS = { - 'time': 0, - 'data': {} -} - -LAST_METRICS = copy.deepcopy(METRICS) -METRICS_CACHE_MAX = 5 - -# Metric prefix -NAME_PREFIX = "ap_" -SSL_NAME_PREFIX = "apssl_" - -SERVER_STATUS_URL = "" - -descriptors = list() -Desc_Skel = {} -Scoreboard = { - NAME_PREFIX + 'waiting': { - 'key': '_', - 'desc': 'Waiting for Connection', - }, - NAME_PREFIX + 'starting': { - 'key': 'S', - 'desc': 'Starting up', - }, - NAME_PREFIX + 'reading_request': { - 'key': 'R', - 'desc': 'Reading Request', - }, - NAME_PREFIX + 'sending_reply': { - 'key': 'W', - 'desc': 'Sending Reply', - }, - NAME_PREFIX + 'keepalive': { - 'key': 'K', - 'desc': 'Keepalive (read)', - }, - NAME_PREFIX + 'dns_lookup': { - 'key': 'D', - 'desc': 'DNS Lookup', - }, - NAME_PREFIX + 'closing': { - 'key': 'C', - 'desc': 'Closing connection', - }, - NAME_PREFIX + 'logging': { - 'key': 'L', - 'desc': 'Logging', - }, - NAME_PREFIX + 'gracefully_fin': { - 'key': 'G', - 'desc': 'Gracefully finishing', - }, - NAME_PREFIX + 'idle': { - 'key': 'I', - 'desc': 'Idle cleanup of worker', - }, - NAME_PREFIX + 'open_slot': { - 'key': '.', - 'desc': 'Open slot with no current process', - }, -} -Scoreboard_bykey = dict([(v["key"], k) for (k, v) in Scoreboard.iteritems()]) - -SSL_REGEX = re.compile( - '^(cache type:) (.*)(<b>)(?P<shared_mem>[0-9]+)(</b> bytes, current sessio' - 'ns: <b>)(?P<current_sessions>[0-9]+)(</b><br>subcaches: <b>)(?P<num_subca' - 'ches>[0-9]+)(</b>, indexes per subcache: <b>)(?P<indexes_per_subcache>[0-' - '9]+)(</b><br>)(.*)(<br>index usage: <b>)(?P<index_usage>[0-9]+)(%</b>, ca' - 'che usage: <b>)(?P<cache_usage>[0-9]+)(%</b><br>total sessions stored sin' - 'ce starting: <b>)(?P<sessions_stored>[0-9]+)(</b><br>total sessions expir' - 'ed since starting: <b>)(?P<sessions_expired>[0-9]+)(</b><br>total \(pre-e' - 'xpiry\) sessions scrolled out of the cache: <b>)(?P<sessions_scrolled_out' - 'of_cache>[0-9]+)(</b><br>total retrieves since starting: <b>)(?P<retrieve' - 's_hit>[0-9]+)(</b> hit, <b>)(?P<retrieves_miss>[0-9]+)(</b> miss<br>total' - ' removes since starting: <b>)(?P<removes_hit>[0-9]+)(</b> hit, <b>)(?P<re' - 'moves_miss>[0-9]+)' -) - -Metric_Map = { - 'Uptime': NAME_PREFIX + "uptime", - 'IdleWorkers': NAME_PREFIX + "idle_workers", - 'BusyWorkers': NAME_PREFIX + "busy_workers", - 'Total kBytes': NAME_PREFIX + "bytes", - 'CPULoad': NAME_PREFIX + "cpuload", - "Total Accesses": NAME_PREFIX + "rps" -} - - -def get_metrics(): - - global METRICS, LAST_METRICS, SERVER_STATUS_URL, COLLECT_SSL - - if (time.time() - METRICS['time']) > METRICS_CACHE_MAX: - - metrics = dict([(k, 0) for k in Scoreboard.keys()]) - - # This is the short server-status. Lacks SSL metrics - try: - req = urllib2.Request(SERVER_STATUS_URL + "?auto") - - # Download the status file - res = urllib2.urlopen(req) - - for line in res: - split_line = line.rstrip().split(": ") - long_metric_name = split_line[0] - if long_metric_name == "Scoreboard": - for sck in split_line[1]: - metrics[Scoreboard_bykey[sck]] += 1 - else: - if long_metric_name in Metric_Map: - metric_name = Metric_Map[long_metric_name] - else: - metric_name = long_metric_name - metrics[metric_name] = split_line[1] - - except urllib2.URLError: - traceback.print_exc() - - # If we are collecting SSL metrics we'll do - if COLLECT_SSL: - - try: - req2 = urllib2.Request(SERVER_STATUS_URL) - - # Download the status file - res = urllib2.urlopen(req2) - - for line in res: - regMatch = SSL_REGEX.match(line) - if regMatch: - linebits = regMatch.groupdict() - for key in linebits: - # print SSL_NAME_PREFIX + key + "=" + linebits[key] - metrics[SSL_NAME_PREFIX + key] = linebits[key] - - except urllib2.URLError: - traceback.print_exc() - - LAST_METRICS = copy.deepcopy(METRICS) - METRICS = { - 'time': time.time(), - 'data': metrics - } - - return [METRICS, LAST_METRICS] - - -def get_value(name): - """Return a value for the requested metric""" - - metrics = get_metrics()[0] - - try: - result = metrics['data'][name] - except StandardError: - result = 0 - - return result - - -def get_delta(name): - """Return change over time for the requested metric""" - - # get metrics - [curr_metrics, last_metrics] = get_metrics() - - # If it's ap_bytes metric multiply result by 1024 - if name == NAME_PREFIX + "bytes": - multiplier = 1024 - else: - multiplier = 1 - - try: - delta = ( - multiplier * ( - float(curr_metrics['data'][name]) - - float(last_metrics['data'][name]) - ) / (curr_metrics['time'] - last_metrics['time']) - ) - if delta < 0: - print name + " is less 0" - delta = 0 - except KeyError: - delta = 0.0 - - return delta - - -def create_desc(prop): - d = Desc_Skel.copy() - for k, v in prop.iteritems(): - d[k] = v - return d - - -def metric_init(params): - global descriptors, Desc_Skel, SERVER_STATUS_URL, COLLECT_SSL - - print '[apache_status] Received the following parameters' - print params - - if "metric_group" not in params: - params["metric_group"] = "apache" - - Desc_Skel = { - 'name': 'XXX', - 'call_back': get_value, - 'time_max': 60, - 'value_type': 'uint', - 'units': 'proc', - 'slope': 'both', - 'format': '%d', - 'description': 'XXX', - 'groups': params["metric_group"], - } - - if "refresh_rate" not in params: - params["refresh_rate"] = 15 - - if "url" not in params: - params["url"] = "http://localhost:7070/server-status" - - if "collect_ssl" not in params: - params["collect_ssl"] = False - - SERVER_STATUS_URL = params["url"] - COLLECT_SSL = params["collect_ssl"] - - # IP:HOSTNAME - if "spoof_host" in params: - Desc_Skel["spoof_host"] = params["spoof_host"] - - descriptors.append(create_desc({ - "name": NAME_PREFIX + "rps", - "value_type": "float", - "units": "req/sec", - "call_back": get_delta, - "format": "%.3f", - "description": "request per second", - })) - - descriptors.append(create_desc({ - "name": NAME_PREFIX + "bytes", - "value_type": "float", - "units": "bytes/sec", - "call_back": get_delta, - "format": "%.3f", - "description": "bytes transferred per second", - })) - - descriptors.append(create_desc({ - "name": NAME_PREFIX + "cpuload", - "value_type": "float", - "units": "pct", - "format": "%.6f", - "call_back": get_value, - "description": "Pct of time CPU utilized", - })) - - descriptors.append(create_desc({ - "name": NAME_PREFIX + "busy_workers", - "value_type": "uint", - "units": "threads", - "format": "%u", - "call_back": get_value, - "description": "Busy threads", - })) - - descriptors.append(create_desc({ - "name": NAME_PREFIX + "idle_workers", - "value_type": "uint", - "units": "threads", - "format": "%u", - "call_back": get_value, - "description": "Idle threads", - })) - - descriptors.append(create_desc({ - "name": NAME_PREFIX + "uptime", - "value_type": "uint", - "units": "seconds", - "format": "%u", - "call_back": get_value, - "description": "Uptime", - })) - - for k, v in Scoreboard.iteritems(): - descriptors.append(create_desc({ - "name": k, - "call_back": get_value, - "description": v["desc"], - })) - - ########################################################################## - # SSL metrics - ########################################################################## - if params['collect_ssl']: - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "shared_mem", - "value_type": "float", - "units": "bytes", - "format": "%.3f", - "call_back": get_value, - "description": "Shared memory", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "current_sessions", - "value_type": "uint", - "units": "sessions", - "format": "%u", - "call_back": get_value, - "description": "Current sessions", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "num_subcaches", - "value_type": "uint", - "units": "subcaches", - "format": "%u", - "call_back": get_value, - "description": "Number of subcaches", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "indexes_per_subcache", - "value_type": "float", - "units": "indexes", - "format": "%.3f", - "call_back": get_value, - "description": "Subcaches", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "index_usage", - "value_type": "float", - "units": "pct", - "format": "%.3f", - "call_back": get_value, - "description": "Index usage", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "cache_usage", - "value_type": "float", - "units": "pct", - "format": "%.3f", - "call_back": get_value, - "description": "Cache usage", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "sessions_stored", - "value_type": "float", - "units": "sessions/sec", - "format": "%.3f", - "call_back": get_delta, - "description": "Sessions stored", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "sessions_expired", - "value_type": "float", - "units": "sessions/sec", - "format": "%.3f", - "call_back": get_delta, - "description": "Sessions expired", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "retrieves_hit", - "value_type": "float", - "units": "retrieves/sec", - "format": "%.3f", - "call_back": get_delta, - "description": "Retrieves Hit", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "retrieves_miss", - "value_type": "float", - "units": "retrieves/sec", - "format": "%.3f", - "call_back": get_delta, - "description": "Retrieves Miss", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "removes_hit", - "value_type": "float", - "units": "removes/sec", - "format": "%.3f", - "call_back": get_delta, - "description": "Removes Hit", - })) - - descriptors.append(create_desc({ - "name": SSL_NAME_PREFIX + "removes_miss", - "value_type": "float", - "units": "removes/sec", - "format": "%.3f", - "call_back": get_delta, - "description": "Removes Miss", - })) - - return descriptors - - -if __name__ == '__main__': - try: - params = { - 'url': 'http://localhost:7070/server-status', - 'collect_ssl': False - } - metric_init(params) - while True: - for d in descriptors: - v = d['call_back'](d['name']) - if d['name'] == NAME_PREFIX + "rps": - print 'value for %s is %.4f' % (d['name'], v) - else: - print 'value for %s is %s' % (d['name'], v) - time.sleep(15) - except KeyboardInterrupt: - os._exit(1) diff --git a/modules/apache/files/apache_status.pyconf b/modules/apache/files/apache_status.pyconf deleted file mode 100644 index ca5fecd..0000000 --- a/modules/apache/files/apache_status.pyconf +++ /dev/null @@ -1,113 +0,0 @@ -modules { - module { - name = "apache_status" - language = "python" - param url { - value = "http://127.0.0.1:80/server-status" - } - - # Which metric group should these metrics be put into - param metric_group { - value = "apache" - } - - # Collecting SSL metrics under Apache 2.2 appears to cause a memory leak - # in mod_status. Watch Apache memory utilization if you enable them - param collect_ssl { - value = False - } - - - } -} - -collection_group { - collect_every = 30 - time_threshold = 90 - - metric { - name = "ap_busy_workers" - title = "Busy Threads" - value_threshold = 0 - } - metric { - name = "ap_idle_workers" - title = "Idle Threads" - value_threshold = 0 - } - metric { - name = "ap_logging" - title = "Logging" - value_threshold = 0 - } - metric { - name = "ap_open_slot" - title = "Open slot with no current process" - value_threshold = 0 - } - metric { - name = "ap_reading_request" - title = "Reading Request" - value_threshold = 0 - } - metric { - name = "ap_waiting" - title = "Waiting for Connection" - value_threshold = 0 - } - metric { - name = "ap_sending_reply" - title = "Sending Reply" - value_threshold = 0 - } - metric { - name = "ap_idle" - title = "Idle cleanup of worker" - value_threshold = 0 - } - metric { - name = "ap_dns_lookup" - title = "DNS Lookup" - value_threshold = 0 - } - metric { - name = "ap_closing" - title = "Closing connection" - value_threshold = 0 - } - metric { - name = "ap_starting" - title = "Starting up" - value_threshold = 0 - } - metric { - name = "ap_gracefully_fin" - title = "Gracefully finishing" - value_threshold = 0 - } - metric { - name = "ap_keepalive" - title = "Keepalive (read)" - value_threshold = 0 - } - - metric { - name = "ap_rps" - title = "Requests per second" - value_threshold = 0.0 - } - - metric { - name = "ap_cpuload" - title = "Pct of time CPU utilized" - value_threshold = 0.0 - } - -# Uncomment if you are collecting SSL metrics -# metric { -# name_match = "apssl_(.+)" -# value_threshold = 0.0 -# } - - -} diff --git a/modules/apache/manifests/monitoring.pp b/modules/apache/manifests/monitoring.pp index aebe2f3..cfdb447 100644 --- a/modules/apache/manifests/monitoring.pp +++ b/modules/apache/manifests/monitoring.pp @@ -9,27 +9,6 @@ include ::apache::mod::status include ::standard - if $::standard::has_ganglia { - include ::ganglia - - file { '/usr/lib/ganglia/python_modules/apache_status.py': - source => 'puppet:///modules/apache/apache_status.py', - owner => 'root', - group => 'root', - mode => '0444', - require => Package['ganglia-monitor'], - } - - file { '/etc/ganglia/conf.d/apache_status.pyconf': - source => 'puppet:///modules/apache/apache_status.pyconf', - owner => 'root', - group => 'root', - mode => '0444', - require => File['/usr/lib/ganglia/python_modules/apache_status.py'], - notify => Service['ganglia-monitor'], - } - } - # Use `links -dump http://127.0.0.1/server-status` to generate # an Apache status report. require_package('links') -- To view, visit https://gerrit.wikimedia.org/r/392763 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I3001acbde8b318ffc75944af7a48381d52d6d64a Gerrit-PatchSet: 3 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Dzahn <dz...@wikimedia.org> Gerrit-Reviewer: Dzahn <dz...@wikimedia.org> Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org> Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org> Gerrit-Reviewer: Volans <rcocci...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits