Ottomata has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/65780


Change subject: Adding ganglia monitoring of webrequest data loss in Kraken HDFS
......................................................................

Adding ganglia monitoring of webrequest data loss in Kraken HDFS

Change-Id: Iefc4b809434e357b5cd8aec416434fd45cf18a4c
---
A files/ganglia/plugins/kraken_webrequest_loss.py
A files/ganglia/plugins/kraken_webrequest_loss.pyconf
M manifests/misc/monitoring.pp
M manifests/site.pp
4 files changed, 131 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/80/65780/1

diff --git a/files/ganglia/plugins/kraken_webrequest_loss.py 
b/files/ganglia/plugins/kraken_webrequest_loss.py
new file mode 100644
index 0000000..7f3698b
--- /dev/null
+++ b/files/ganglia/plugins/kraken_webrequest_loss.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+    Python Gmond Module for Kraken Webrequest Loss Percentage.
+    Loss percentage per source host data is generated by the packetloss
+    Oozie job in Kraken.
+
+    :copyright: (c) 2012 Wikimedia Foundation
+    :author: Andrew Otto <o...@wikimedia.org>
+    :license: GPL
+    
+"""
+from __future__ import print_function
+
+import logging
+import commands
+
+UPDATE_INTERVAL = 3600 # seconds
+
+# Config for multiple metrics.
+# Currently we only compute a single webrequest loss
+# percentage, but this allows us to add more later.
+metrics = {
+    'webrequest_loss_average': {
+        'description': 'Average Webrequest Loss Percentage',
+        'path':        '/wmf/data/webrequest/loss',
+    }
+}
+
+def latest_loss_path(metric_name):
+    """Returns HDFS path to the most recently generated webrequest loss 
data."""
+    logging.debug("latest_loss_path(%s)" % metrics[metric_name]['path'])
+    return commands.getoutput("/usr/bin/hadoop fs -ls %s | /usr/bin/tail -n 1 
| /usr/bin/awk '{print $NF}'" % (metrics[metric_name]['path']))
+
+def loss_data(loss_path):
+    """Returns the output data inside the HDFS loss_path."""
+    logging.debug("loss_data(%s)" % loss_path)
+    return commands.getoutput("/usr/bin/hadoop fs -cat %s/part*" % (loss_path))
+
+def loss_average(loss_data):
+    """Parses loss_data for loss percentages and averages them all."""
+    logging.debug("loss_average(%s)" % loss_data)
+    percent_sum = 0.0
+    loss_lines = loss_data.split("\n")
+    for line in loss_lines:
+        fields = line.split("\t")
+        percent = fields[-1]
+        percent_sum += float(percent)
+
+    average_percent = (percent_sum / float(len(loss_lines)))
+    return average_percent
+
+def metric_handler(name):
+    """Get value of particular metric; part of Gmond interface"""
+    logging.debug('metric_handler(): %s', name)
+    return loss_average(loss_data(latest_loss_path(name)))
+
+def metric_init(params):
+    global descriptors
+
+    descriptors = []
+    for metric_name, metric_config in metrics.items():
+        descriptors.append({
+            'name': metric_name,
+            'call_back': metric_handler,
+            'time_max': 3660,
+            'value_type': 'float',
+            'units': '%',
+            'slope': 'both',
+            'format': '%f',
+            'description': metric_config['description'],
+            'groups': 'analytics'
+        })
+
+    return descriptors
+
+
+def metric_cleanup():
+    """Teardown; part of Gmond interface"""
+    pass
+
+
+if __name__ == '__main__':
+    # When invoked as standalone script, run a self-test by querying each
+    # metric descriptor and printing it out.
+    logging.basicConfig(level=logging.DEBUG)
+    for metric in metric_init({}):
+        value = metric['call_back'](metric['name'])
+        print(( "%s => " + metric['format'] ) % ( metric['name'], value ))
diff --git a/files/ganglia/plugins/kraken_webrequest_loss.pyconf 
b/files/ganglia/plugins/kraken_webrequest_loss.pyconf
new file mode 100644
index 0000000..c4db97b
--- /dev/null
+++ b/files/ganglia/plugins/kraken_webrequest_loss.pyconf
@@ -0,0 +1,20 @@
+# Gmond configuration for calculating
+# webrequest data loss stored in HDFS in Kraken.
+
+modules {
+  module {
+    name = "kraken_webrequest_loss"
+    language = "python"
+  }
+}
+
+collection_group {
+  collect_every = 3600
+  time_threshold = 3660
+  
+  metric {
+    name = "webrequest_loss_average"
+    title = "Average Loss Percentage"
+    value_threshold = 0
+  }
+}
diff --git a/manifests/misc/monitoring.pp b/manifests/misc/monitoring.pp
index 16e3953..5f6c6c1 100644
--- a/manifests/misc/monitoring.pp
+++ b/manifests/misc/monitoring.pp
@@ -29,6 +29,7 @@
 
 # == Class misc::monitoring::net::udp
 # Sends UDP statistics to ganglia.
+#
 class misc::monitoring::net::udp {
        file {
                '/usr/lib/ganglia/python_modules/udp_stats.py':
@@ -42,6 +43,23 @@
        }
 }
 
+# == Class misc::monitoring::kraken::loss
+# Checks recently generated webrequest loss statistics in
+# Kraken HDFS and sends the average loss percentage to ganglia.
+#
+class misc::monitoring::kraken::loss {
+       file {
+               '/usr/lib/ganglia/python_modules/kraken_webrequest_loss.py':
+                       require => File['/usr/lib/ganglia/python_modules'],
+                       source => 
'puppet:///files/ganglia/plugins/kraken_webrequest_loss.py',
+                       notify => Service[gmond];
+               '/etc/ganglia/conf.d/udp_stats.pyconf':
+                       require => 
File["/usr/lib/ganglia/python_modules/kraken_webrequest_loss.py"],
+                       source => 
"puppet:///files/ganglia/plugins/kraken_webrequest_loss.pyconf",
+                       notify => Service[gmond];
+       }
+}
+
 # Ganglia views that should be
 # avaliable on ganglia.wikimedia.org
 class misc::monitoring::views {
diff --git a/manifests/site.pp b/manifests/site.pp
index 291e197..5391189 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -197,6 +197,10 @@
                hdfs_source       => "/wmf/public",
                rsync_destination => 
"stat1001.wikimedia.org::a/srv/stats.wikimedia.org/htdocs/kraken-public",
        }
+
+       # check webrequest loss in Kraken HDFS.
+       # (This can run on any analytics node.)
+       include misc::monitoring::kraken::loss
 }
 
 # git.wikimedia.org

-- 
To view, visit https://gerrit.wikimedia.org/r/65780
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iefc4b809434e357b5cd8aec416434fd45cf18a4c
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to