Ottomata has uploaded a new change for review. https://gerrit.wikimedia.org/r/107887
Change subject: Adding define monitor_ganglia ...................................................................... Adding define monitor_ganglia This define is a wrapper to monitor service that allows for icinga/nagios monitoring of arbitrary metric values in Ganglia. This will allow monitoring of Ganglia values without having to add new entries into checkcommands.cfg.erb Change-Id: If8da7c36e5f16819b3c974940f9ce3ba800fa6fb --- M manifests/nagios.pp M templates/icinga/checkcommands.cfg.erb 2 files changed, 115 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/87/107887/1 diff --git a/manifests/nagios.pp b/manifests/nagios.pp index bd23ac8..f22e3a7 100644 --- a/manifests/nagios.pp +++ b/manifests/nagios.pp @@ -265,3 +265,111 @@ check_command => 'nrpe_check_zfs', } } + +# == Define monitor_ganglia +# Wrapper for monitor_service using check_ganglia command. +# This allows you to monitor arbitrary values in ganglia +# with icinga without having to add entries to checkcommands.cfg.erb +# +# Specifying threshold values +# =========================== +# +# (This is extracted from ``check_gmond.checkval``; see the embedded +# documentation for the most current version). +# +# The arguments to the ``-w`` and ``-c`` options use the following syntax: +# +# For numeric values +# ------------------ +# - 5 -- match if v >= 5 +# - 3:5 -- match if 3 <= v <= 5 +# - :5 -- match if v <=5 +# - 1,2,3 -- match if v in (1,2,3) +# +# For string values +# ------------------ +# - foo -- match if v == foo +# - foo,bar -- match if v in (foo, bar) +# +# Negation +# -------- +# You can negate a threshold expression by preceding it with '!'. For +# example: +# +# - !5 -- match if v < 5 +# - !3:5 -- match if v<3 || v>5 +# - !1,2,3 -- match if v not in (1,2,3) +# +# ( Pasted from# https://github.com/wikimedia/operations-debs-check_ganglia#specifying-threshold-values ) +# +# == Usage +# # Alert if free space in HDFS is less than 1TB +# monitor_ganglia { 'hdfs-capacity-remaining': +# description => 'GB free in HDFS', +# metric => 'Hadoop.NameNode.FSNamesystem.CapacityRemainingGB', +# warning_threshold => ':1024', +# critical_threshold => ':512, +# } +# +# == Parameters +# $description - Description of icinga alert +# $metric - ganglia metric name +# $warning - alert warning threshold +# $critical_threshold - alert critical threshold +# $gmetad_host - Default: 'nickel.wikimedia.org' +# $gmetad_query_port - gmetad XML query interface port. Default: 8654 +# $host +# $retries +# $group +# $ensure +# $critical +# $passive +# $freshness +# $normal_check_interval +# $retry_check_interval +# $contact_group +# +define monitor_ganglia( + $description, + $metric, + $warning_threshold, + $critical_threshold, + $gmetad_host = 'nickel.wikimedia.org', + $gmetad_query_port = 8654, + $host = $::hostname, + $retries = 3, + $group = $nagios_group, + $ensure = present, + $critical = 'false', + $passive = 'false', + $freshness = 36000, + $normal_check_interval = 1, + $retry_check_interval = 1, + $contact_group = 'admins' +) +{ + Class['icinga::ganglia::check'] -> Monitor_ganglia[$title] + + # checkcommands.cfg's check_ganglia command has + # many positional arguments that + # are passed to check_ganglia script: + # $ARG1$ -g gmetad host + # $ARG2$ -p gmetad xml query port + # $ARG3$ -m ganglia metric name + # $ARG4$ -w warning threshold + # $ARG5$ -c critical threshold + + monitor_service { $title: + ensure => $ensure, + description => $description, + check_command => "check_ganglia!${gmetad_host}!${gmetad_query_port}!${metric}!${warning_threshold}!${critical_threshold}", + retries => $retries, + group => $group, + critical => $critical, + passive => $passive, + freshness => $freshness, + normal_check_interval => $normal_check_interval, + retry_check_interval => $retry_check_interval, + contact_group => $contact_group, + } +} diff --git a/templates/icinga/checkcommands.cfg.erb b/templates/icinga/checkcommands.cfg.erb index 9fe2110..45b6aa1 100644 --- a/templates/icinga/checkcommands.cfg.erb +++ b/templates/icinga/checkcommands.cfg.erb @@ -501,11 +501,17 @@ command_line $USER1$/check_ganglios_generic_value -H $HOSTADDRESS$ -m packet_loss_average -w $ARG1$ -c $ARG2$ -o gt } - define command{ command_name check_memory_used command_line $USER3$/check_ganglios_memory_v2 -H $HOSTADDRESS -w $ARG1$ -c $ARG2$ } + +# check arbitrary ganglia metric values +define command{ + command_name check_ganglia + command_line $USER1$/check_ganglia -q -g $ARG1$ -p $ARG2$ -H $HOSTADDRESS$ -m '$ARG3$' -w '$ARG4$' -c '$ARG5$' +} + # percona mysql checks define command{ command_name nrpe_check_lvs -- To view, visit https://gerrit.wikimedia.org/r/107887 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If8da7c36e5f16819b3c974940f9ce3ba800fa6fb Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ottomata <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits