Faidon Liambotis has submitted this change and it was merged. Change subject: Add check_jnx_alarms to check Juniper chassis alarms ......................................................................
Add check_jnx_alarms to check Juniper chassis alarms ...and hook it up to all core/mgmt routers. This is the equivalent of "show chassis status" for all machines, raising a WARNING on yellow alarms and CRITICAL on red alarms and reporting the number of yellow/red alarms in any case. Bug: T83992 Change-Id: I97d19bfaee083f8c9e9b70faf19985057a46914e --- A modules/nagios_common/files/check_commands/check_jnx_alarms A modules/nagios_common/files/check_commands/check_jnx_alarms.cfg M modules/nagios_common/manifests/commands.pp M modules/netops/manifests/check.pp M modules/netops/manifests/monitoring.pp 5 files changed, 153 insertions(+), 1 deletion(-) Approvals: Faidon Liambotis: Looks good to me, approved jenkins-bot: Verified diff --git a/modules/nagios_common/files/check_commands/check_jnx_alarms b/modules/nagios_common/files/check_commands/check_jnx_alarms new file mode 100755 index 0000000..1e20542 --- /dev/null +++ b/modules/nagios_common/files/check_commands/check_jnx_alarms @@ -0,0 +1,133 @@ +#!/usr/bin/perl + +# Copyright 2016 Faidon Liambotis +# Copyright 2016 Wikimedia Foundation, Inc. +# +# This nagios plugin is free software, and comes with ABSOLUTELY NO WARRANTY. +# It may be used, redistributed and/or modified under the terms of the GNU +# General Public Licence (see http://www.fsf.org/licensing/licenses/gpl.txt). +# +# Example usage: +# check_jnx_alarms -H cr1-eqdfw.wikimedia.org -c s3cr3t + +package Local::CheckJnxAlarms; + +use strict; +use warnings; +use Nagios::Plugin::Getopt; +use Nagios::Plugin::Functions; +use Net::SNMP; + +my %name2oid = ( + + # jnxAlarms + 'jnxAlarms' => '1.3.6.1.4.1.2636.3.4', + 'jnxCraftAlarms' => '1.3.6.1.4.1.2636.3.4.2', + 'jnxAlarmRelayMode' => '1.3.6.1.4.1.2636.3.4.2.1', + 'jnxYellowAlarms' => '1.3.6.1.4.1.2636.3.4.2.2', + 'jnxYellowAlarmState' => '1.3.6.1.4.1.2636.3.4.2.2.1', + 'jnxYellowAlarmCount' => '1.3.6.1.4.1.2636.3.4.2.2.2', + 'jnxYellowAlarmLastChange' => '1.3.6.1.4.1.2636.3.4.2.2.3', + 'jnxRedAlarms' => '1.3.6.1.4.1.2636.3.4.2.3', + 'jnxRedAlarmState' => '1.3.6.1.4.1.2636.3.4.2.3.1', + 'jnxRedAlarmCount' => '1.3.6.1.4.1.2636.3.4.2.3.2', + 'jnxRedAlarmLastChange' => '1.3.6.1.4.1.2636.3.4.2.3.3', +); + +# this not exactly great; this isn't a very OO-package, but ePN restricts the +# use of global variables, so package variables should do, for now. +our $ng; +our ( @crit, @warn, @ok ); + +sub init { + ( @crit, @warn, @ok ) = (); + + $ng = Nagios::Plugin::Getopt->new( + usage => 'Usage: %s -H <host> --c <community> [...]', + version => 1, + blurb => 'Checks the state of a router\'s BGP peerings', + ); + + $ng->arg( + spec => 'host|H=s', + help => 'Hostname or IP address of the server to check against', + required => 1, + ); + $ng->arg( + spec => 'port|p=i', + help => 'SNMP port, default: %s', + default => 161, + ); + $ng->arg( + spec => 'community|c=s', + help => 'SNMP community to use, default: %s', + default => 'public', + ); + $ng->arg( + spec => 'snmpver=i', + help => 'SNMP version to use (1 or 2), default: %s', + default => 2, + ); + + $ng->getopts; + + return; +} + +sub collect { + + my ( $session, $error ) = Net::SNMP->session( + -hostname => $ng->host, + -community => $ng->community, + -port => $ng->port, + -version => $ng->snmpver, + ); + unless ( defined $session ) { + nagios_exit( CRITICAL, $error ); + } + + my ( %alarms, @text ); + foreach my $type (qw/red yellow/) { + my $identifier = 'jnx' . ucfirst($type) . 'AlarmCount'; + my $oid = $name2oid{$identifier}; + my $response = $session->get_table( + -baseoid => $oid, + -maxrepetitions => 15, + ) or nagios_exit( CRITICAL, $session->error ); + + my $value = ( values %{$response} )[0]; + $alarms{$type} = $value; + + push @text, "$value $type alarms"; + } + + push @warn, @text if $alarms{'yellow'} > 0; + push @crit, @text if $alarms{'red'} > 0; + push @ok, @text; + + return; +} + +sub run { + init; + collect; + + print join( "\n", @ok ), "\n" if $ng->verbose; + + nagios_exit( + check_messages( + critical => \@crit, + warning => \@warn, + ok => \@ok, + join => ', ', + ) + ); + return; +} + +# call run() if are we running interactively or via ePN +__PACKAGE__->run() if ( !caller || (caller)[0] eq 'Embed::Persistent' ); + +1; + +# vim: ts=4 sw=4 sts=4 et diff --git a/modules/nagios_common/files/check_commands/check_jnx_alarms.cfg b/modules/nagios_common/files/check_commands/check_jnx_alarms.cfg new file mode 100644 index 0000000..53f2ba7 --- /dev/null +++ b/modules/nagios_common/files/check_commands/check_jnx_alarms.cfg @@ -0,0 +1,4 @@ +define command { + command_name check_jnx_alarms + command_line $USER1$/check_jnx_alarms -H $HOSTADDRESS$ -c $ARG1$ +} diff --git a/modules/nagios_common/manifests/commands.pp b/modules/nagios_common/manifests/commands.pp index 7896ea1..25a2d6a 100644 --- a/modules/nagios_common/manifests/commands.pp +++ b/modules/nagios_common/manifests/commands.pp @@ -25,7 +25,7 @@ 'libnet-ssleay-perl', 'libio-socket-ssl-perl', 'libio-socket-inet6-perl', - # check_bgp + # check_bgp/check_jnx_alarms 'libnet-snmp-perl', 'libtime-duration-perl', ]: @@ -49,6 +49,7 @@ 'check_to_check_nagios_paging', 'check_ifstatus_nomon', 'check_bgp', + 'check_jnx_alarms', 'check_ores_workers', ] : require => File["${config_dir}/commands"], diff --git a/modules/netops/manifests/check.pp b/modules/netops/manifests/check.pp index 5d24f16..b2e5b08 100644 --- a/modules/netops/manifests/check.pp +++ b/modules/netops/manifests/check.pp @@ -13,6 +13,9 @@ # [*snmp_community*] # The SNMP community to use to poll the device. Optional # +# [*alarms*] +# Whether to perform chassis alarms checks. Defaults to false. +# # [*interfaces*] # Whether to perform interface status checks. Defaults to false. # @@ -30,6 +33,7 @@ $ipv4, $ipv6=undef, $snmp_community=undef, + $alarms=false, $bgp=false, $interfaces=false, ) { @@ -46,6 +50,15 @@ } } + if $alarms { + @monitoring::service { "${title} Juniper alarms": + host => $title, + group => 'routers', + description => 'Juniper alarms', + check_command => "check_jnx_alarms!${snmp_community}", + } + } + if $interfaces { @monitoring::service { "${title} interfaces": host => $title, diff --git a/modules/netops/manifests/monitoring.pp b/modules/netops/manifests/monitoring.pp index 3d66f60..1c2ee64 100644 --- a/modules/netops/manifests/monitoring.pp +++ b/modules/netops/manifests/monitoring.pp @@ -14,6 +14,7 @@ # core/mgmt routers $defaults = { snmp_community => $passwords::network::snmp_ro_community, + alarms => true, interfaces => true, } $routers = { -- To view, visit https://gerrit.wikimedia.org/r/281467 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I97d19bfaee083f8c9e9b70faf19985057a46914e Gerrit-PatchSet: 4 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Faidon Liambotis <[email protected]> Gerrit-Reviewer: Faidon Liambotis <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
