Mark Bergsma has submitted this change and it was merged.

Change subject: jenkins: add in ganglia monitoring
......................................................................


jenkins: add in ganglia monitoring

Ganglia has a plugin to monitor Jenkins job queues. Might want to have
it on the Jenkins master.

Source:
https://github.com/ganglia/gmond_python_modules

I have edited the .pyconf to points to http://127.0.0.1:8080/ci and
tested the entry point on gallium using curl:

 $ curl 'http://127.0.0.1:8080/ci/api/json'
 // bunch of json

bug: 47884
Change-Id: Ie347ec6ef23224b01740f03ea6360fcc3b3f0ccd
---
A files/ganglia/plugins/jenkins.py
A files/ganglia/plugins/jenkins.pyconf
M manifests/role/jenkins.pp
3 files changed, 353 insertions(+), 0 deletions(-)

Approvals:
  Mark Bergsma: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/files/ganglia/plugins/jenkins.py b/files/ganglia/plugins/jenkins.py
new file mode 100644
index 0000000..ac4fffb
--- /dev/null
+++ b/files/ganglia/plugins/jenkins.py
@@ -0,0 +1,254 @@
+###  This script reports jenkins metrics to ganglia.
+
+###  License to use, modify, and distribute under the GPL
+###  http://www.gnu.org/licenses/gpl.txt
+import logging
+import os
+import subprocess
+import sys
+import threading
+import time
+import traceback
+import urllib2
+import json
+
+logging.basicConfig(level=logging.ERROR)
+
+_Worker_Thread = None
+
+class UpdateJenkinsThread(threading.Thread):
+
+  def __init__(self, params):
+    threading.Thread.__init__(self)
+    self.running = False
+    self.shuttingdown = False
+    self.metrics = {}
+    self.settings = {}
+    self.refresh_rate = 60
+    self.base_url = params['base_url']
+    self._metrics_lock = threading.Lock()
+    self._settings_lock = threading.Lock()
+
+  def shutdown(self):
+    self.shuttingdown = True
+    if not self.running:
+        return
+    self.join()
+
+  def run(self):
+    global _Lock
+
+    self.running = True
+
+    while not self.shuttingdown:
+        time.sleep(self.refresh_rate)
+        self.refresh_metrics()
+
+    self.running = False
+
+  @staticmethod
+  def _get_jenkins_statistics(url):
+
+    url += '/api/json'
+    url += 
'?tree=jobs[color],overallLoad[busyExecutors[min[latest]],queueLength[min[latest]],totalExecutors[min[latest]]]'
+
+    c = urllib2.urlopen(url, None, 2)
+    json_data = c.read()
+    c.close()
+
+    data = json.loads(json_data)
+
+    result = {}
+    result['jenkins_overallload_busy_executors'] = 
data['overallLoad']['busyExecutors']['min']['latest']
+    result['jenkins_overallload_queue_length'] = 
data['overallLoad']['queueLength']['min']['latest']
+    result['jenkins_overallload_total_executors'] = 
data['overallLoad']['totalExecutors']['min']['latest']
+    result['jenkins_jobs_total'] = len(data['jobs'])
+    result['jenkins_jobs_red'] = result['jenkins_jobs_yellow'] = 
result['jenkins_jobs_grey'] = result['jenkins_jobs_disabled'] = 
result['jenkins_jobs_aborted'] = result['jenkins_jobs_notbuilt'] = 
result['jenkins_jobs_blue'] = 0
+
+    # Possible values: 
http://javadoc.jenkins-ci.org/hudson/model/BallColor.html
+    colors = ['red', 'yellow', 'grey', 'disabled', 'aborted', 'notbuilt', 
'blue']
+    for color in colors:
+      result['jenkins_jobs_' + color] = 0
+    for job in data['jobs']:
+      color = job['color']
+      for c in colors:
+        if color == c or color == c + '_anime':
+          result['jenkins_jobs_' + c] += 1
+    return result
+
+  def refresh_metrics(self):
+    logging.debug('refresh metrics')
+
+    try:
+      logging.debug(' opening URL: ' + str(self.base_url))
+      data = UpdateJenkinsThread._get_jenkins_statistics(self.base_url)
+    except:
+      logging.warning('error refreshing metrics')
+      logging.warning(traceback.print_exc(file=sys.stdout))
+
+    try:
+      self._metrics_lock.acquire()
+      self.metrics = {}
+      for k, v in data.items():
+          self.metrics[k] = v
+    except:
+      logging.warning('error refreshing metrics')
+      logging.warning(traceback.print_exc(file=sys.stdout))
+      return False
+
+    finally:
+      self._metrics_lock.release()
+
+    if not self.metrics:
+      logging.warning('error refreshing metrics')
+      return False
+
+    logging.debug('success refreshing metrics')
+    logging.debug('metrics: ' + str(self.metrics))
+
+    return True
+
+  def metric_of(self, name):
+    logging.debug('getting metric: ' + name)
+
+    try:
+      if name in self.metrics:
+        try:
+          self._metrics_lock.acquire()
+          logging.debug('metric: %s = %s' % (name, self.metrics[name]))
+          return self.metrics[name]
+        finally:
+          self._metrics_lock.release()
+    except:
+      logging.warning('failed to fetch ' + name)
+      return 0
+
+  def setting_of(self, name):
+    logging.debug('getting setting: ' + name)
+
+    try:
+      if name in self.settings:
+        try:
+          self._settings_lock.acquire()
+          logging.debug('setting: %s = %s' % (name, self.settings[name]))
+          return self.settings[name]
+        finally:
+          self._settings_lock.release()
+    except:
+      logging.warning('failed to fetch ' + name)
+      return 0
+
+def metric_init(params):
+  logging.debug('init: ' + str(params))
+  global _Worker_Thread
+
+  METRIC_DEFAULTS = {
+    'units': 'jobs',
+    'groups': 'jenkins',
+    'slope': 'both',
+    'value_type': 'uint',
+    'format': '%d',
+    'description': '',
+    'call_back': metric_of
+  }
+
+  descriptions = dict(
+    jenkins_overallload_busy_executors = {
+      'value_type': 'float',
+      'format': '%.3f',
+      'units': 'executors',
+      'description': 'Number of busy executors (master and slaves)'},
+    jenkins_overallload_queue_length = {
+      'value_type': 'float',
+      'format': '%.3f',
+      'units': 'queued items',
+      'description': 'Length of the queue (master and slaves)'},
+    jenkins_overallload_total_executors = {
+      'value_type': 'float',
+      'format': '%.3f',
+      'units': 'executors',
+      'description': 'Number of executors (master and slaves)'},
+    jenkins_jobs_total = {
+      'description': 'Total number of jobs'},
+    jenkins_jobs_blue = {
+      'description': 'Blue jobs'},
+    jenkins_jobs_red = {
+      'description': 'Red jobs'},
+    jenkins_jobs_yellow = {
+      'description': 'Yellow jobs'},
+    jenkins_jobs_grey = {
+      'description': 'Grey jobs'},
+    jenkins_jobs_disabled = {
+      'description': 'Disabled jobs'},
+    jenkins_jobs_aborted = {
+      'description': 'Aborted jobs'},
+    jenkins_jobs_notbuilt = {
+      'description': 'Not-built jobs'})
+
+  if _Worker_Thread is not None:
+    raise Exception('Worker thread already exists')
+
+  _Worker_Thread = UpdateJenkinsThread(params)
+  _Worker_Thread.refresh_metrics()
+  _Worker_Thread.start()
+
+  descriptors = []
+
+  for name, desc in descriptions.iteritems():
+    d = desc.copy()
+    d['name'] = str(name)
+    [ d.setdefault(key, METRIC_DEFAULTS[key]) for key in 
METRIC_DEFAULTS.iterkeys() ]
+    descriptors.append(d)
+  return descriptors
+
+def metric_of(name):
+  global _Worker_Thread
+  return _Worker_Thread.metric_of(name)
+
+def setting_of(name):
+  global _Worker_Thread
+  return _Worker_Thread.setting_of(name)
+
+def metric_cleanup():
+  global _Worker_Thread
+  if _Worker_Thread is not None:
+      _Worker_Thread.shutdown()
+  logging.shutdown()
+  pass
+
+if __name__ == '__main__':
+  from optparse import OptionParser
+
+  try:
+    logging.debug('running from the cmd line')
+    parser = OptionParser()
+    parser.add_option('-u', '--URL', dest='base_url', 
default='http://127.0.0.1:8080', help='Base-URL for jenkins api (default: 
http://127.0.0.1:8080)')
+    parser.add_option('-q', '--quiet', dest='quiet', action='store_true', 
default=False)
+    parser.add_option('-d', '--debug', dest='debug', action='store_true', 
default=False)
+
+    (options, args) = parser.parse_args()
+
+    descriptors = metric_init({
+      'base_url': options.base_url,
+    })
+
+    if options.debug:
+      from pprint import pprint
+      pprint(descriptors)
+
+    for d in descriptors:
+      v = d['call_back'](d['name'])
+
+      if not options.quiet:
+        print ' {0}: {1} {2} [{3}]' . format(d['name'], v, d['units'], 
d['description'])
+
+    os._exit(1)
+
+  except KeyboardInterrupt:
+    time.sleep(0.2)
+    os._exit(1)
+  except StandardError:
+    traceback.print_exc()
+    os._exit(1)
+  finally:
+    metric_cleanup()
diff --git a/files/ganglia/plugins/jenkins.pyconf 
b/files/ganglia/plugins/jenkins.pyconf
new file mode 100644
index 0000000..086186a
--- /dev/null
+++ b/files/ganglia/plugins/jenkins.pyconf
@@ -0,0 +1,83 @@
+#
+
+modules {
+  module {
+    name = 'jenkins'
+    language = 'python'
+
+    param base_url {
+      value = 'http://127.0.0.1:8080/ci'
+    }
+  }
+}
+
+collection_group {
+  collect_every = 10
+  time_threshold = 20
+
+  metric {
+    name = 'jenkins_overallload_busy_executors'
+    title = 'Number of busy executors on master and slaves'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_overallload_queue_length'
+    title = 'Length of the queue on master and slaves'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_overallload_total_executors'
+    title = 'Number of executors on master and slaves'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_total'
+    title = 'Total number of jobs'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_blue'
+    title = 'Number of jobs with status blue'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_red'
+    title = 'Number of jobs with status red'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_yellow'
+    title = 'Number of jobs with status yellow'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_grey'
+    title = 'Number of jobs with status grey'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_aborted'
+    title = 'Number of jobs with status aborted'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_notbuilt'
+    title = 'Number of jobs with status notbuilt'
+    value_threshold = 1.0
+  }
+
+  metric {
+    name = 'jenkins_jobs_disabled'
+    title = 'Number of jobs with status disabled'
+    value_threshold = 1.0
+  }
+}
diff --git a/manifests/role/jenkins.pp b/manifests/role/jenkins.pp
index 15e14e3..305c8c4 100644
--- a/manifests/role/jenkins.pp
+++ b/manifests/role/jenkins.pp
@@ -21,6 +21,22 @@
                ],
        }
 
+       # Ganglia monitoring for Jenkins
+       file { '/usr/lib/ganglia/python_modules/jenkins.py':
+                       source => 'puppet:///files/ganglia/plugins/jenkins.py',
+                       owner  => 'root',
+                       group  => 'root',
+                       mode   => '0444',
+                       notify => Service[gmond],
+       }
+       file { '/etc/ganglia/conf.d/jenkins.pyconf':
+                       source => 
'puppet:///files/ganglia/plugins/jenkins.pyconf',
+                       owner  => 'root',
+                       group  => 'root',
+                       mode   => '0444',
+                       notify => Service[gmond],
+       }
+
 }
 
 class role::jenkins::slave::production {

-- 
To view, visit https://gerrit.wikimedia.org/r/66960
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie347ec6ef23224b01740f03ea6360fcc3b3f0ccd
Gerrit-PatchSet: 4
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Hashar <has...@free.fr>
Gerrit-Reviewer: Asher <afeld...@wikimedia.org>
Gerrit-Reviewer: Hashar <has...@free.fr>
Gerrit-Reviewer: Mark Bergsma <m...@wikimedia.org>
Gerrit-Reviewer: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to