Mark Bergsma has submitted this change and it was merged. Change subject: jenkins: add in ganglia monitoring ......................................................................
jenkins: add in ganglia monitoring Ganglia has a plugin to monitor Jenkins job queues. Might want to have it on the Jenkins master. Source: https://github.com/ganglia/gmond_python_modules I have edited the .pyconf to points to http://127.0.0.1:8080/ci and tested the entry point on gallium using curl: $ curl 'http://127.0.0.1:8080/ci/api/json' // bunch of json bug: 47884 Change-Id: Ie347ec6ef23224b01740f03ea6360fcc3b3f0ccd --- A files/ganglia/plugins/jenkins.py A files/ganglia/plugins/jenkins.pyconf M manifests/role/jenkins.pp 3 files changed, 353 insertions(+), 0 deletions(-) Approvals: Mark Bergsma: Looks good to me, approved jenkins-bot: Verified diff --git a/files/ganglia/plugins/jenkins.py b/files/ganglia/plugins/jenkins.py new file mode 100644 index 0000000..ac4fffb --- /dev/null +++ b/files/ganglia/plugins/jenkins.py @@ -0,0 +1,254 @@ +### This script reports jenkins metrics to ganglia. + +### License to use, modify, and distribute under the GPL +### http://www.gnu.org/licenses/gpl.txt +import logging +import os +import subprocess +import sys +import threading +import time +import traceback +import urllib2 +import json + +logging.basicConfig(level=logging.ERROR) + +_Worker_Thread = None + +class UpdateJenkinsThread(threading.Thread): + + def __init__(self, params): + threading.Thread.__init__(self) + self.running = False + self.shuttingdown = False + self.metrics = {} + self.settings = {} + self.refresh_rate = 60 + self.base_url = params['base_url'] + self._metrics_lock = threading.Lock() + self._settings_lock = threading.Lock() + + def shutdown(self): + self.shuttingdown = True + if not self.running: + return + self.join() + + def run(self): + global _Lock + + self.running = True + + while not self.shuttingdown: + time.sleep(self.refresh_rate) + self.refresh_metrics() + + self.running = False + + @staticmethod + def _get_jenkins_statistics(url): + + url += '/api/json' + url += '?tree=jobs[color],overallLoad[busyExecutors[min[latest]],queueLength[min[latest]],totalExecutors[min[latest]]]' + + c = urllib2.urlopen(url, None, 2) + json_data = c.read() + c.close() + + data = json.loads(json_data) + + result = {} + result['jenkins_overallload_busy_executors'] = data['overallLoad']['busyExecutors']['min']['latest'] + result['jenkins_overallload_queue_length'] = data['overallLoad']['queueLength']['min']['latest'] + result['jenkins_overallload_total_executors'] = data['overallLoad']['totalExecutors']['min']['latest'] + result['jenkins_jobs_total'] = len(data['jobs']) + result['jenkins_jobs_red'] = result['jenkins_jobs_yellow'] = result['jenkins_jobs_grey'] = result['jenkins_jobs_disabled'] = result['jenkins_jobs_aborted'] = result['jenkins_jobs_notbuilt'] = result['jenkins_jobs_blue'] = 0 + + # Possible values: http://javadoc.jenkins-ci.org/hudson/model/BallColor.html + colors = ['red', 'yellow', 'grey', 'disabled', 'aborted', 'notbuilt', 'blue'] + for color in colors: + result['jenkins_jobs_' + color] = 0 + for job in data['jobs']: + color = job['color'] + for c in colors: + if color == c or color == c + '_anime': + result['jenkins_jobs_' + c] += 1 + return result + + def refresh_metrics(self): + logging.debug('refresh metrics') + + try: + logging.debug(' opening URL: ' + str(self.base_url)) + data = UpdateJenkinsThread._get_jenkins_statistics(self.base_url) + except: + logging.warning('error refreshing metrics') + logging.warning(traceback.print_exc(file=sys.stdout)) + + try: + self._metrics_lock.acquire() + self.metrics = {} + for k, v in data.items(): + self.metrics[k] = v + except: + logging.warning('error refreshing metrics') + logging.warning(traceback.print_exc(file=sys.stdout)) + return False + + finally: + self._metrics_lock.release() + + if not self.metrics: + logging.warning('error refreshing metrics') + return False + + logging.debug('success refreshing metrics') + logging.debug('metrics: ' + str(self.metrics)) + + return True + + def metric_of(self, name): + logging.debug('getting metric: ' + name) + + try: + if name in self.metrics: + try: + self._metrics_lock.acquire() + logging.debug('metric: %s = %s' % (name, self.metrics[name])) + return self.metrics[name] + finally: + self._metrics_lock.release() + except: + logging.warning('failed to fetch ' + name) + return 0 + + def setting_of(self, name): + logging.debug('getting setting: ' + name) + + try: + if name in self.settings: + try: + self._settings_lock.acquire() + logging.debug('setting: %s = %s' % (name, self.settings[name])) + return self.settings[name] + finally: + self._settings_lock.release() + except: + logging.warning('failed to fetch ' + name) + return 0 + +def metric_init(params): + logging.debug('init: ' + str(params)) + global _Worker_Thread + + METRIC_DEFAULTS = { + 'units': 'jobs', + 'groups': 'jenkins', + 'slope': 'both', + 'value_type': 'uint', + 'format': '%d', + 'description': '', + 'call_back': metric_of + } + + descriptions = dict( + jenkins_overallload_busy_executors = { + 'value_type': 'float', + 'format': '%.3f', + 'units': 'executors', + 'description': 'Number of busy executors (master and slaves)'}, + jenkins_overallload_queue_length = { + 'value_type': 'float', + 'format': '%.3f', + 'units': 'queued items', + 'description': 'Length of the queue (master and slaves)'}, + jenkins_overallload_total_executors = { + 'value_type': 'float', + 'format': '%.3f', + 'units': 'executors', + 'description': 'Number of executors (master and slaves)'}, + jenkins_jobs_total = { + 'description': 'Total number of jobs'}, + jenkins_jobs_blue = { + 'description': 'Blue jobs'}, + jenkins_jobs_red = { + 'description': 'Red jobs'}, + jenkins_jobs_yellow = { + 'description': 'Yellow jobs'}, + jenkins_jobs_grey = { + 'description': 'Grey jobs'}, + jenkins_jobs_disabled = { + 'description': 'Disabled jobs'}, + jenkins_jobs_aborted = { + 'description': 'Aborted jobs'}, + jenkins_jobs_notbuilt = { + 'description': 'Not-built jobs'}) + + if _Worker_Thread is not None: + raise Exception('Worker thread already exists') + + _Worker_Thread = UpdateJenkinsThread(params) + _Worker_Thread.refresh_metrics() + _Worker_Thread.start() + + descriptors = [] + + for name, desc in descriptions.iteritems(): + d = desc.copy() + d['name'] = str(name) + [ d.setdefault(key, METRIC_DEFAULTS[key]) for key in METRIC_DEFAULTS.iterkeys() ] + descriptors.append(d) + return descriptors + +def metric_of(name): + global _Worker_Thread + return _Worker_Thread.metric_of(name) + +def setting_of(name): + global _Worker_Thread + return _Worker_Thread.setting_of(name) + +def metric_cleanup(): + global _Worker_Thread + if _Worker_Thread is not None: + _Worker_Thread.shutdown() + logging.shutdown() + pass + +if __name__ == '__main__': + from optparse import OptionParser + + try: + logging.debug('running from the cmd line') + parser = OptionParser() + parser.add_option('-u', '--URL', dest='base_url', default='http://127.0.0.1:8080', help='Base-URL for jenkins api (default: http://127.0.0.1:8080)') + parser.add_option('-q', '--quiet', dest='quiet', action='store_true', default=False) + parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False) + + (options, args) = parser.parse_args() + + descriptors = metric_init({ + 'base_url': options.base_url, + }) + + if options.debug: + from pprint import pprint + pprint(descriptors) + + for d in descriptors: + v = d['call_back'](d['name']) + + if not options.quiet: + print ' {0}: {1} {2} [{3}]' . format(d['name'], v, d['units'], d['description']) + + os._exit(1) + + except KeyboardInterrupt: + time.sleep(0.2) + os._exit(1) + except StandardError: + traceback.print_exc() + os._exit(1) + finally: + metric_cleanup() diff --git a/files/ganglia/plugins/jenkins.pyconf b/files/ganglia/plugins/jenkins.pyconf new file mode 100644 index 0000000..086186a --- /dev/null +++ b/files/ganglia/plugins/jenkins.pyconf @@ -0,0 +1,83 @@ +# + +modules { + module { + name = 'jenkins' + language = 'python' + + param base_url { + value = 'http://127.0.0.1:8080/ci' + } + } +} + +collection_group { + collect_every = 10 + time_threshold = 20 + + metric { + name = 'jenkins_overallload_busy_executors' + title = 'Number of busy executors on master and slaves' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_overallload_queue_length' + title = 'Length of the queue on master and slaves' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_overallload_total_executors' + title = 'Number of executors on master and slaves' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_total' + title = 'Total number of jobs' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_blue' + title = 'Number of jobs with status blue' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_red' + title = 'Number of jobs with status red' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_yellow' + title = 'Number of jobs with status yellow' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_grey' + title = 'Number of jobs with status grey' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_aborted' + title = 'Number of jobs with status aborted' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_notbuilt' + title = 'Number of jobs with status notbuilt' + value_threshold = 1.0 + } + + metric { + name = 'jenkins_jobs_disabled' + title = 'Number of jobs with status disabled' + value_threshold = 1.0 + } +} diff --git a/manifests/role/jenkins.pp b/manifests/role/jenkins.pp index 15e14e3..305c8c4 100644 --- a/manifests/role/jenkins.pp +++ b/manifests/role/jenkins.pp @@ -21,6 +21,22 @@ ], } + # Ganglia monitoring for Jenkins + file { '/usr/lib/ganglia/python_modules/jenkins.py': + source => 'puppet:///files/ganglia/plugins/jenkins.py', + owner => 'root', + group => 'root', + mode => '0444', + notify => Service[gmond], + } + file { '/etc/ganglia/conf.d/jenkins.pyconf': + source => 'puppet:///files/ganglia/plugins/jenkins.pyconf', + owner => 'root', + group => 'root', + mode => '0444', + notify => Service[gmond], + } + } class role::jenkins::slave::production { -- To view, visit https://gerrit.wikimedia.org/r/66960 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie347ec6ef23224b01740f03ea6360fcc3b3f0ccd Gerrit-PatchSet: 4 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Hashar <has...@free.fr> Gerrit-Reviewer: Asher <afeld...@wikimedia.org> Gerrit-Reviewer: Hashar <has...@free.fr> Gerrit-Reviewer: Mark Bergsma <m...@wikimedia.org> Gerrit-Reviewer: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits