Hi, A DRBD monitor script is available at http://ben.timby.com/?p=12 Unfortunately it no longer works with drbd 8.3 since the /proc/drbd output has changed somewhat. This is an updated script which seems to work fine with DRBD 8.3.8. I have also updated the checks a little bit and made the output prettier :) Best regards, gulikoza
#!/usr/bin/perl -w # Copyright (c) 2007 Ben Timby # Written by Ben Timby <bti...@gmail.com> # Based on check_drbd by Igor Genibel <i...@jexiste.org> # Updated for DRBD 8.3 by gulikoza <gulikoza at users.sourceforge.net> # # Released under the GNU Public License # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # #Checking remote hosts requires ssh key authentication be configured. See the link #below for informain on configuring this. # #http://sial.org/howto/openssh/publickey-auth/ # #~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~* #Usage: ./drbd.monitor options [remote host list] # #Required Options # # -p host : name of host which must be "Primary" # #Optional Options # # -h : prints this message # -d : print debugging information to STDERR # #These options apply only if checking remote hosts # # -u user : ssh username for remote host checks # -i path : ssh identity (key) file for remote host checks # #Example (checks drbd0 on remote node1, ensuring that it is primary): # #./drbd.monitor -du mon -i /home/mon/.ssh/id_rsa -p node1 node1 # #Example (check drbd1 locally ensuring that it is primary): # #./drbd.monitor -p node1 localhost #~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~* # /etc/mon/mon.cf: # -- #hostgroup servers thishost remotehost # #watch servers # service drbd # interval 1m # monitor drbd.monitor -p -i /home/mon/.ssh/id_rsa -u mon thishost # period wd {Mon-Fri} hr {7am-10pm} # alertevery 1h # alertafter 2 30m # alert mail.alert root@localhost # -- #~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~* use strict qw(subs vars refs); use File::Temp qw/ :mktemp /; use Getopt::Std; use Sys::Hostname; sub debug { if ($main::debug == 1) { print STDERR "DEBUG: "; for $debug::parm (@_) { if (defined $debug::parm) { print STDERR "$debug::parm "; } else { print STDERR "undef "; } } print STDERR "\n"; } } sub parse { #params local($parse::file, $parse::up_role); ($parse::file, $parse::up_role) = @_; debug("up_role: ", $parse::up_role); #to merge into globals... local($parse::code, $parse::msg, $parse::line); local @parse::buf; my $i = 0; # Store the stats in a parse::buffer open (DRBD, "< $parse::file") or ( print 'CRITICAL drbd module not loaded!\n' and exit 2); while($parse::line = <DRBD>) { $parse::line =~ s/\n//g; $parse::line =~ s/\r//g; if ($parse::line =~ /^version: (\d+)\.(\d+)\.(\d+)/) { if ($1 == 0 && $2 < 7) { print 'CRITICAL DRBD version too old for this script (< v0.7.x)\n'; # remove the tmpfile if not local unlink $parse::file if ($#ARGV != 1); exit 2; } } $parse::buf[$i++] = $parse::line; debug("parse[$i]: \"$parse::line\""); } close(DRBD); # Parse the parse::buffer for ($i = 0; $i <= $#parse::buf; $i++) { debug("line: \"$parse::buf[$i]\""); #0: cs:Connected ro:Secondary/Secondary ds:UpToDate/UpToDate if ($parse::buf[$i] =~ /^\s+([0-9]+): cs:(\w+) ro:(\w+)\/(\w+) ds:(\w+)\/(\w+).*$/) { local $parse::dev = $1; local $parse::cs = $2; local $parse::st1 = $3; local $parse::st2 = $4; local $parse::ds1 = $5; local $parse::ds2 = $6; debug("dev=$parse::dev cs=$parse::cs st1=$parse::st1 st2=$parse::st2 ds1=$parse::ds2 ds2=$parse::ds2"); # Alerts have this priority: # - we are sync'ing (WARNING), # - we are inconsistent (CRITICAL), # - peer is inconsistent (WARNING), # - we lost the other host (CRITICAL) and we are secondary, # - we lost the other host (WARNING) and we are primary, # - we should be primary or secondary but we are not (WARNING), # - Connected and UpToDate (OK), # - not Connected (CRITICAL), # - in any other case, something is bad (CRITICAL). if ($parse::cs =~ /Sync(Source|Target)/) { local $parse::percent = ''; local $parse::extra = ''; # Get information about sync and ETA if ($i + 2 <= $#parse::buf && $parse::buf[$i + 2] =~ /(\d+\.\d+)%/) { $parse::percent = $1; } if ($i + 3 <= $#parse::buf) { $parse::extra = $parse::buf[$i + 3]; $parse::extra =~ s/^[ \t]*//g; } $parse::code = 1; $parse::msg = "$parse::dev: $parse::cs - $parse::percent% - $parse::extra"; } elsif ($parse::ds1 eq 'Inconsistent') { $parse::code = 2; $parse::msg = "$parse::dev: $parse::cs - $parse::st1/$parse::st2 - $parse::ds1 (should be UpToDate)"; } elsif ($parse::ds2 eq 'Inconsistent') { $parse::code = 1; $parse::msg = "$parse::dev: $parse::cs - $parse::st1/$parse::st2 - $parse::ds1/$parse::ds2 (should be UpToDate)"; } elsif ($parse::st2 eq 'Unknown' and $parse::up_role ne 'primary') { $parse::code = 2; $parse::msg = "$parse::dev: $parse::cs - $parse::st1/$parse::st2 (should not be Unknown)"; } elsif ($parse::st2 eq 'Unknown' and $parse::st1 eq 'Primary' ) { $parse::code = 1; $parse::msg = "$parse::dev: $parse::cs - $parse::st1/$parse::st2 (should not be Unknown)"; } elsif (defined $parse::up_role and $parse::up_role ne $parse::st1) { $parse::code = 1; $parse::msg = "$parse::dev: $parse::cs - $parse::st1 (should be $parse::up_role) / $parse::st2 - $parse::ds1/$parse::ds2"; } elsif ($parse::cs eq 'Connected' && $parse::ds1 eq 'UpToDate') { $parse::code = 0; $parse::msg = "$parse::dev: $parse::cs - $parse::st1/$parse::st2 $parse::ds1/$parse::ds2"; } elsif ($parse::cs ne 'Connected') { $parse::code = 2; $parse::msg = "$parse::dev: $parse::cs (should be Connected) - $parse::st1/$parse::st2 $parse::ds1/$parse::ds2"; } else { # Any case we didn't think about is CRITICAL $parse::code = 2; $parse::msg = "$parse::dev: $parse::cs - $parse::st1/$parse::st2 - $parse::ds1/$parse::ds2 (Unknown error!)"; } #merge our status with globals... $main::max_code = $parse::code if ($parse::code > $main::max_code); $main::msg .= "\n[".$parse::msg."]"; } } } sub ssh { local($ssh::hostspec, $ssh::key); ($ssh::hostspec, $ssh::key) = @_; debug("hostspec: ", $ssh::hostspec, " key: ", $ssh::key); local $ssh::tmpfile = mktemp( "/tmp/tmpfileXXXXX" ); local $ssh::output = ""; local $ssh::status = 1; # Set a execution timeout eval { local $SIG{ALRM} = sub { die "alarm\n" }; alarm 10; $ssh::output = `ssh -ttt $ssh::hostspec -i $ssh::key cat /proc/drbd 2> /dev/null`; $ssh::status = 0 if ($? != 0); alarm 0; }; if ($@) { die unless $@ eq "alarm\n"; # propage des erreurs inattendues # Timeout reached print "CRITICAL Timeout reached"; exit 2; } else { # Timeout not reached if ($ssh::status == 0) { print "CRITICAL Host ssh service unreachable\n"; exit 2; } elsif ($ssh::output eq "" ) { print "CRITICAL module not loaded on host\n"; exit 2; } open (TMPFILE, "> $ssh::tmpfile") or die "Unable to create tmpfile!\n"; print TMPFILE $ssh::output; close(TMPFILE); } return $ssh::tmpfile; } sub usage { if (defined $_[0]) { print "Error in parameters. $_[0]\n\n"; } print STDERR << "EOF"; mon System Monitor DRBD monitor Usage: $0 options [remote host list] Required Options -p host : name of host which must be "Primary" Optional Options -h : prints this message -d : print debugging information to STDERR These options apply only if checking remote hosts -u user : ssh username for remote host checks -i path : ssh identity (key) file for remote host checks Example (checks drbd0 on remote node1, ensuring that it is primary): $0 -du mon -i /home/mon/.ssh/id_rsa -p node1 node1 Example (check drbd1 locally ensuring that it is primary): $0 -p node1 localhost EOF exit; } local $main::proc = '/proc/drbd'; local $main::opts = "hdu:i:p:"; local %main::opt; local $main::debug = 0; local @main::code_msg = ( 'OK', 'WARNING', 'CRITICAL' ); local $main::max_code = 0; local $main::msg = ""; local $main::peer; local $main::role; my $i = 0; getopts("$main::opts", \%main::opt) or usage(); usage() if $main::opt{h}; $main::debug = 1 if $main::opt{d}; debug("user:", $main::opt{u}); debug("key file:", $main::opt{i}); debug("primary host:", $main::opt{p}); debug("#argv: ", $#ARGV); if (not defined $main::opt{p}) { usage("You must define a primary host with -p!"); } foreach $main::peer (@ARGV) { $main::peer = hostname() if ($main::peer eq "localhost"); debug("peer: ", "\"$main::peer\""); debug("pri: ", "\"$main::opt{p}\""); if ($main::peer eq $main::opt{p}) { $main::role = "Primary"; } else { $main::role = "Secondary"; } if (not $main::peer eq hostname()) { if (not defined $main::opt{u} or not defined $main::opt{i}) { usage("You must define a user with -u and a identity file with -i when checking remote hosts!"); } $main::proc = ssh($main::opt{u}."@".$main::peer, $main::opt{i}); debug("proc: ", $main::proc); parse($main::proc, $main::role); unlink $main::proc; } else { debug("parsing local file: /proc/drbd"); if (defined $main::opt{r}) { $main::role = $main::opt{r}; } parse("/proc/drbd", $main::role); } } print $main::code_msg[$main::max_code].$main::msg."\n"; exit $main::max_code;
_______________________________________________ mon mailing list mon@linux.kernel.org http://linux.kernel.org/mailman/listinfo/mon