Giuseppe Lavagetto has uploaded a new change for review.
https://gerrit.wikimedia.org/r/226063
Change subject: misc: remove misc::monitoring::htcp-loss
......................................................................
misc: remove misc::monitoring::htcp-loss
It was unused and broken
Change-Id: I41e69dcb72b3baa4386a8e521538f7d3b46f7770
---
M manifests/misc/monitoring.pp
D modules/ganglia/files/plugins/htcpseqcheck.py
D modules/ganglia/files/plugins/htcpseqcheck.pyconf
D modules/ganglia/files/plugins/htcpseqcheck_ganglia.py
M modules/role/manifests/cache/text.pp
M modules/role/manifests/cache/upload.pp
6 files changed, 0 insertions(+), 492 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/63/226063/1
diff --git a/manifests/misc/monitoring.pp b/manifests/misc/monitoring.pp
index d06530a..1b977bf 100644
--- a/manifests/misc/monitoring.pp
+++ b/manifests/misc/monitoring.pp
@@ -1,47 +1,5 @@
# misc/monitoring.pp
-class misc::monitoring::htcp-loss {
- system::role { 'misc::monitoring::htcp-loss': description => 'HTCP packet
loss monitor' }
-
- File {
- require => File['/usr/lib/ganglia/python_modules'],
- notify => Service['ganglia-monitor']
- }
-
- # Ganglia
- file {
- '/usr/lib/ganglia/python_modules/htcpseqcheck.py':
- owner => 'root',
- group => 'root',
- mode => '0444',
- source => 'puppet:///files/ganglia/plugins/htcpseqcheck.py';
- '/usr/lib/ganglia/python_modules/htcpseqcheck_ganglia.py':
- owner => 'root',
- group => 'root',
- mode => '0444',
- source =>
'puppet:///files/ganglia/plugins/htcpseqcheck_ganglia.py';
- '/usr/lib/ganglia/python_modules/util.py':
- owner => 'root',
- group => 'root',
- mode => '0444',
- source => 'puppet:///files/ganglia/plugins/util.py';
- '/usr/lib/ganglia/python_modules/compat.py':
- owner => 'root',
- group => 'root',
- mode => '0444',
- source => 'puppet:///files/ganglia/plugins/compat.py';
- '/etc/ganglia/conf.d/htcpseqcheck.pyconf':
- # Disabled due to excessive memory and CPU usage -- TS
- # owner => 'root',
- # group => 'root',
- # mode => '0444',
- notify => Service['ganglia-monitor'],
- ensure => absent;
- # require => File["/etc/ganglia/conf.d"],
- # source => "puppet:///files/ganglia/plugins/htcpseqcheck.pyconf";
- }
-}
-
# Copied from nagios::ganglia::monitor::enwiki
# Will run on terbium to use the local MediaWiki install so that we can use
# maintenance scripts recycling DB connections and taking a few secs, not mins
diff --git a/modules/ganglia/files/plugins/htcpseqcheck.py
b/modules/ganglia/files/plugins/htcpseqcheck.py
deleted file mode 100644
index 7d02a60..0000000
--- a/modules/ganglia/files/plugins/htcpseqcheck.py
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/usr/bin/env python
-#
-# htcpseqcheck.py
-# measure HTCP multicast packet loss
-# Written on 2011/08/05 by Mark Bergsma <[email protected]>
-#
-# $Id$
-
-import util
-import socket, getopt, sys, pwd, grp, struct, threading
-
-from util import debug
-
-from datetime import datetime, timedelta
-from collections import deque
-
-try:
- from collections import Counter
-except ImportError:
- from compat import Counter
-
-# Globals
-
-sourcebuf = {}
-totalcounts, slidingcounts = Counter(), Counter()
-slidingdeque = deque()
-stats_lock = threading.Lock()
-
-class RingBuffer(deque):
- """
- Implements TCP window like behavior
- """
-
- def __init__(self, iterable=[], maxlen=None,
buffersize=timedelta(seconds=5)):
- self.counts = Counter()
- self.buffersize = buffersize
-
- try:
- deque.__init__(self, iterable, maxlen)
- except TypeError:
- deque.__init__(self, iterable)
-
- def add(self, seqnr):
- """
- Expects a sequence nr and adds it to the ringbuffer
- """
-
- ts = datetime.utcnow()
- counts = Counter()
- try:
- headseq, tailseq = self[0][0], self[-1][0]
- except IndexError:
- headseq, tailseq = seqnr-1, seqnr-1
-
- try:
- if seqnr == tailseq + 1:
- # Normal case, in-order arrival
- self.append((seqnr, ts, True))
- debug("Appended seqnr %d, timestamp %s" % (seqnr, ts))
- elif seqnr > tailseq + 1:
- # Packet(s) missing, fill the gap
- for seq in range(tailseq+1, seqnr):
- self.append((seq, ts, False))
- self.append((seqnr, ts, True))
- debug("Filled gap of %d packets before new packet seqnr %d,
timestamp %s" % (seqnr-tailseq-1, seqnr, ts))
- elif seqnr < headseq:
- counts['ancient'] += 1
- elif seqnr <= tailseq:
- # Late packet
- assert self[seqnr-headseq][0] == seqnr # Incorrect
seqnr?
-
- if self[seqnr-headseq][2]:
- counts['dups'] += 1 # Already exists
- debug("Duplicate packet %d" % seqnr)
- else:
- # Store with original timestamp
- self[seqnr-headseq] = (seqnr, self[seqnr-headseq][1], True)
- counts['outoforder'] += 1
- debug("Inserted late packet %d, timestamp %s" % (seqnr,
ts))
- except:
- raise
- else:
- counts['received'] += 1
- # Purge old packets
- self.deque(ts, counts)
- return counts
-
- def deque(self, now=datetime.utcnow(), counts=Counter()):
- while self and self[0][1] < now - self.buffersize:
- packet = self.popleft()
- counts['dequeued'] += 1
- debug("Dequeued packet id %d, timestamp %s, received %s" % packet)
- if not packet[2]:
- counts['lost'] += 1
-
- self.counts.update(counts)
-
-def receive_htcp(sock):
- portnr = sock.getsockname()[1];
-
- while 1:
- diagram, srcaddr = sock.recvfrom(2**14)
- if not diagram: break
-
- checkhtcpseq(diagram, srcaddr[0])
-
-def update_sliding_counts(counts, maxlen=10000):
- "Implements a sliding window of counts"
- global slidingdeque, slidingcounts
-
- slidingcounts += counts
- slidingdeque.append(counts)
-
- if len(slidingdeque) > maxlen:
- slidingcounts -= slidingdeque.popleft()
-
-def checkhtcpseq(diagram, srcaddr):
- global sourcebuf, totalcounts, slidingcounts, stats_lock
-
- transid = struct.unpack('!I', diagram[8:12])[0]
-
- with stats_lock: # Critical section
- sb = sourcebuf.setdefault(srcaddr, RingBuffer())
- try:
- counts = sb.add(transid)
- except IndexError:
- pass
- else:
- totalcounts.update(counts)
- update_sliding_counts(counts)
-
- # Don't bother printing stats if sys.stdout is set to None
- if not sys.stdout: return
-
- if counts['lost']:
- # Lost packets
- print "%d lost packet(s) from %s, last id %d" %
(counts['lost'], srcaddr, transid)
- elif counts['ancient']:
- print "Ancient packet from %s, id %d" % (srcaddr, transid)
-
- if counts['lost'] and sb.counts['dequeued']:
- print "%d/%d losses (%.2f%%), %d out-of-order, %d dups, %d
ancient, %d received from %s" % (
- sb.counts['lost'],
- sb.counts['dequeued'],
- float(sb.counts['lost'])*100/sb.counts['dequeued'],
- sb.counts['outoforder'],
- sb.counts['dups'],
- sb.counts['ancient'],
- sb.counts['received'],
- srcaddr)
- print "Totals: %d/%d losses (%.2f%%), %d out-of-order, %d
dups, %d ancient, %d received from %d sources" % (
- slidingcounts['lost'],
- slidingcounts['dequeued'],
- float(slidingcounts['lost'])*100/slidingcounts['dequeued'],
- totalcounts['outoforder'],
- totalcounts['dups'],
- totalcounts['ancient'],
- totalcounts['received'],
- len(sourcebuf.keys()))
-
-def print_help():
- print 'Usage:\n\thtcpseqcheck [ options ]\n'
- print 'Options:'
- print '\t-d\t\tFork into the background (become a daemon)'
- print '\t-p {portnr}\tUDP port number to listen on (default is 4827)'
- print '\t-j {mcast addr}\tMulticast group to join on startup'
- print '\t-u {username}\tChange uid'
- print '\t-g {group}\tChange group'
- print '\t-v\t\tBe more verbose'
-
-if __name__ == '__main__':
- host = '0.0.0.0'
- portnr = 4827
- multicast_group = None
- daemon = False
- user = group = None
- opts = 'dhj:p:vu:g:'
-
- # Parse options
- options, arguments = getopt.getopt(sys.argv[1:], opts)
- for option, value in options:
- if option == '-j':
- multicast_group = value
- elif option == '-p':
- portnr = int(value)
- elif option == '-h':
- print_help()
- sys.exit()
- elif option == '-d':
- daemon = True
- elif option == '-u':
- user = value
- elif option == '-g':
- group = value
- elif option == '-v':
- util.debugging = True
-
- try:
- # Change uid and gid
- try:
- if group: os.setgid(grp.getgrnam(group).gr_gid)
- if user: os.setuid(pwd.getpwnam(user).pw_uid)
- except:
- print "Error: Could not change uid or gid."
- sys.exit(-1)
-
- # Become a daemon
- if daemon:
- util.createDaemon()
-
- sock = util.open_htcp_socket(host, portnr)
-
- # Join a multicast group if requested
- if multicast_group is not None:
- debug('Joining multicast group ' + multicast_group)
- util.join_multicast_group(sock, multicast_group)
-
- # Start receiving HTCP packets
- receive_htcp(sock)
- except socket.error, msg:
- print msg[1];
- except KeyboardInterrupt:
- pass
-
-
-# Ganglia gmond module support
-try:
- from htcpseqcheck_ganglia import metric_init, metric_cleanup
-except ImportError:
- pass
\ No newline at end of file
diff --git a/modules/ganglia/files/plugins/htcpseqcheck.pyconf
b/modules/ganglia/files/plugins/htcpseqcheck.pyconf
deleted file mode 100644
index 28d5ff0..0000000
--- a/modules/ganglia/files/plugins/htcpseqcheck.pyconf
+++ /dev/null
@@ -1,61 +0,0 @@
-# Ganglia Python gmond module configuration file
-
-modules {
- module {
- name = "htcpseqcheck"
- language = "python"
-
- param multicast_group {
- value = "239.128.0.112"
- }
-
- param port {
- value = 4827
- }
- }
-}
-
-collection_group {
- collect_every = 15
- time_threshold = 15
-
- metric {
- name = "htcp_losspct"
- title = "HTCP packet loss percentage"
- }
-
- metric {
- name = "htcp_dequeued"
- title = "Dequeued HTCP packets"
- }
-
- metric {
- name = "htcp_dups"
- title = "Duplicate HTCP packets"
- }
-
- metric {
- name = "htcp_ancient"
- title = "Ancient HTCP packets"
- }
-
- metric {
- name = "htcp_received"
- title = "Received HTCP packets"
- }
-
- metric {
- name = "htcp_sources"
- title = "Unique HTCP senders"
- }
-
- metric {
- name = "htcp_lost"
- title = "Lost HTCP packets"
- }
-
- metric {
- name = "htcp_outoforder"
- title = "HTCP packets received out-of-order"
- }
-}
\ No newline at end of file
diff --git a/modules/ganglia/files/plugins/htcpseqcheck_ganglia.py
b/modules/ganglia/files/plugins/htcpseqcheck_ganglia.py
deleted file mode 100644
index 38f5449..0000000
--- a/modules/ganglia/files/plugins/htcpseqcheck_ganglia.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-
-# htcpseqcheck_ganglia.py
-# Ganglia gmond module integration
-
-import htcpseqcheck, util
-import threading, sys, socket, datetime
-
-from util import debug
-
-# Globals
-metrics = {}
-
-class HTCPSeqCheckThread(threading.Thread):
-
- name = "HTCPSeqCheck"
- daemon = True
-
- def run(self, kwargs={}):
- try:
- sock = util.open_htcp_socket(kwargs.get('host', ""),
kwargs.get('port', 4827))
-
- # Join a multicast group if requested
- if 'multicast_group' in kwargs:
- debug('Joining multicast group ' + kwargs['multicast_group'])
- util.join_multicast_group(sock, kwargs['multicast_group'])
-
- # Set sys.stdout to None; ganglia will do so anyway, and we
- # can detect this in htcpseqcheck.
-
- # Start receiving HTCP packets
- htcpseqcheck.receive_htcp(sock)
- except socket.error, msg:
- print >> sys.stderr, msg[1]
- sys.exit(1)
-
-def build_metrics_dict():
- "Builds a dict of metric parameter dicts"
-
- metrics = {
- 'htcp_losspct': {
- 'value_type': "float",
- 'units': "%",
- 'format': "%.2f",
- 'slope': "both",
- 'description': "HTCP packet loss percentage",
- 'int_name': None,
- },
- 'htcp_lost': {
- 'value_type': "uint",
- 'units': "packets/s",
- 'format': "%u",
- 'slope': "positive",
- 'description': "Lost HTCP packets",
- 'int_name': "lost",
- },
- 'htcp_dequeued': {
- 'value_type': "uint",
- 'units': "packets/s",
- 'format': "%u",
- 'slope': "positive",
- 'description': "Dequeued HTCP packets",
- 'int_name': "dequeued",
- },
- 'htcp_outoforder': {
- 'value_type': "uint",
- 'units': "packets/s",
- 'format': "%u",
- 'slope': "positive",
- 'description': "HTCP packets received out-of-order",
- 'int_name': "outoforder",
- },
- 'htcp_dups': {
- 'value_type': "uint",
- 'units': "dups/s",
- 'format': "%u",
- 'slope': "positive",
- 'description': "Duplicate HTCP packets",
- 'int_name': "dups",
- },
- 'htcp_ancient': {
- 'value_type': "uint",
- 'units': "packets/s",
- 'format': "%u",
- 'slope': "positive",
- 'description': "Ancient HTCP packets",
- 'int_name': "ancient",
- },
- 'htcp_received': {
- 'value_type': "uint",
- 'units': "packets/s",
- 'format': "%u",
- 'slope': "positive",
- 'description': "Received HTCP packets",
- 'int_name': "received",
- },
- 'htcp_sources': {
- 'value_type': "uint",
- 'units': "sources",
- 'format': "%u",
- 'slope': "both",
- 'description': "Unique HTCP senders",
- 'int_name': None,
- }
- }
-
- # Add common values
- for metricname, metric in metrics.iteritems():
- metric.update({
- 'name': metricname,
- 'call_back': metric_handler,
- 'time_max': 15,
- 'groups': "htcp"
- })
-
- return metrics
-
-def metric_init(params):
- # gmond module initialization
- global metrics
-
- # Start HTCP metrics collection in a separate thread
- HTCPSeqCheckThread().start()
-
- metrics = build_metrics_dict()
- return list(metrics.values())
-
-def metric_cleanup(params):
- pass
-
-def metric_handler(name):
- global metrics, silenceTime
-
- metric = metrics[name]
-
- try:
- with htcpseqcheck.stats_lock: # Critical section
- if name == "htcp_losspct":
- return float(htcpseqcheck.slidingcounts['lost']) /
htcpseqcheck.slidingcounts['dequeued'] * 100
- elif name == "htcp_sources":
- return len(htcpseqcheck.sourcebuf)
- else:
- return htcpseqcheck.totalcounts[metric['int_name']]
- except:
- return None
-
-if __name__ == '__main__':
- for metric in build_metrics_dict().itervalues():
- print " metric {\n name = \"%(name)s\"\n title =
\"%(description)s\"\n }\n" % metric
\ No newline at end of file
diff --git a/modules/role/manifests/cache/text.pp
b/modules/role/manifests/cache/text.pp
index 2fbca93..ecf1831 100644
--- a/modules/role/manifests/cache/text.pp
+++ b/modules/role/manifests/cache/text.pp
@@ -173,11 +173,6 @@
instance_name => 'frontend',
}
- # HTCP packet loss monitoring on the ganglia aggregators
- if $ganglia_aggregator and $::site != 'esams' {
- include misc::monitoring::htcp-loss
- }
-
# ToDo: Remove production conditional once this works
# is verified to work in labs.
if $::realm == 'production' {
diff --git a/modules/role/manifests/cache/upload.pp
b/modules/role/manifests/cache/upload.pp
index d85e8d8..5eac00d 100644
--- a/modules/role/manifests/cache/upload.pp
+++ b/modules/role/manifests/cache/upload.pp
@@ -140,11 +140,6 @@
include role::cache::logging
- # HTCP packet loss monitoring on the ganglia aggregators
- if $ganglia_aggregator and $::site != 'esams' {
- include misc::monitoring::htcp-loss
- }
-
# ToDo: Remove production conditional once this works
# is verified to work in labs.
if $::realm == 'production' {
--
To view, visit https://gerrit.wikimedia.org/r/226063
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I41e69dcb72b3baa4386a8e521538f7d3b46f7770
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Giuseppe Lavagetto <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits