--- On Mon, 1/3/10, Lawrence Giam <[email protected]> wrote:
> Hi
> All,
>
> I am trying to setup
> a monitoring system using nagios to monitor the Slony
> replication. I did some
> changes to the psql_replication_check.pl script but I
> am not getting the
> correct response back from the script. The script is
> suppose to check the
> threshold but after shutting down the slon daemon on the
> slave, the result
> return is still positive.
>
> I hereby include the
> part of the script that I have changed.
>
> my $query = 'SELECT
> * FROM _abc.sl_status' ;
>
> # Get the
> results
> ## Update to use sl_status
> ## tuple[0] : st_origin
> ## tuple[1]
> : st_received
> ## tuple[2] : st_last_event
> ## tuple[3] :
> st_last_event_ts
> ## tuple[4] : st_last_received
> ## tuple[5] :
> st_last_received_ts
> ## tuple[6] : st_last_received_event_ts
> ## tuple[7] :
> st_lag_num_events
> ## tuple[8] : st_lag_time
> @tuple =
> $res->fetchrow;
>
> # Debugging
> #
> Uncomment the below to swap the minute for seconds.
> This is to
> simulate
> # crazy replication times for when replication is not
> falling
> behind.
> #$rep_time[1] = $rep_time[2]
>
> # Check for a
> warning
> if ($tuple[8] >= $threshold_warning and $tuple[8] <
> $threshold_critical)
> {
>
> print("WARNING: ST_Origin $tuple[0], ST_Received
> $tuple[1], Behind $tuple[8]
> minutes\n");
>
> exit(1);
> }
> # Or for a
> critical
> elsif ($tuple[8] >=
> $threshold_critical)
> {
>
> print("CRITICAL: ST_Origin $tuple[0], ST_Received
> $tuple[1], Behind $tuple[8]
> minutes\n");
>
> exit(2);
> }
> # Otherwise,
> everything is ok
> else
> {
>
> printf("OK: ST_Origin $tuple[0], ST_Received
> $tuple[1], Behind $tuple[8]
> minute%s\n",$tuple[8] == 1 ? "" :
> "s"
> );
>
> exit(0);
> }
>
> I am trying to use
> the sl_status st_lag_time to check the lag difference but
> somehow the script is
> not right. Can anyone help me change the
> script?
>
I have a similar script (see below) also you mucht be interested in checking
out chack_postgres.pl as they've just implimented a slony lag check too.
Glyn
--------------
#!/usr/bin/perl
# $Id: test.pl,v 1.0 2008-01-30 12:00:30 Glyn Astill Exp $#
use DBI;
use strict;
use Getopt::Long qw/GetOptions/;
Getopt::Long::Configure('no_ignore_case');
my $dbh;
my $sth;
my @node;
my $field;
my $query;
my $result;
my $problems = 0;
my $USAGE = '-h <host> -p <port> -db <database> -u <username> (not recommended
-P <password>) -c <cluster> -e <lag events> -t <lag seconds>';
##
## Command line options
##
##http://www.perl.com/doc/manual/html/lib/Getopt/Long.html
use vars qw{%opt};
die $USAGE unless
GetOptions(\%opt,
'host|H=s',
'port=s',
'dbname|db=s',
'dbuser|u=s',
'dbpass|P=s',
'cluster|c=s',
'events|e:i',
'lagtime|t:i',
)
and keys %opt
and ! @ARGV;
my $dsn = "DBI:Pg:dbname=$opt{dbname};host=$opt{host};port=$opt{port};";
#This should use a pgpass file automatically if password not specified
$dbh = DBI->connect($dsn, $opt{dbuser}, $opt{dbpass});
if ($dbh) {
$query = 'SELECT st_origin, st_received, st_lag_num_events,
round(extract(epoch from st_lag_time)) from "_'.$opt{cluster}.'".sl_status';
$sth = $dbh->prepare($query);
if (!defined($sth)) {
print "POSTGRES_REPLICATION_LAG CRITICAL: Cannot prepare $DBI::errstr\n";
exit(2);
}
if (!$sth->execute) {
print "POSTGRES_REPLICATION_LAG CRITICAL: Cannot execute $DBI::errstr\n";
exit(2);
}
while (@node = $sth->fetchrow) {
$result = $result . "Subscriber " . $node[1] . " on Origin " . $node[0] .
" : Event lag=" . $node[2];
if (($opt{events} > 0) && ($opt{events} < $node[2])){
$result = $result . " (behind " . ($node[2] - $opt{events}) . ") ";
$problems++;
}
$result = $result . " Time lag=" . $node[3] . "s";
if (($opt{lagtime} > 0) && ($opt{lagtime} < $node[3])) {
$result = $result . " (behind " . ($node[3] - $opt{lagtime}) . "s) ";
$problems++;
}
$result = $result . "\n";
}
if ($problems > 0){
$result = "POSTGRES_REPLICATION_LAG CRITICAL: " . $result . "\n";
print $result;
exit (2);
} else {
$result = "POSTGRES_REPLICATION_LAG OK: " . $result . "\n";
print $result;
exit (0);
}
print $problems;
$sth->finish;
$dbh->disconnect();
} else {
print "POSTGRES_REPLICATION_LAG UNKNOWN: Cannot connect to Postgres server:
$DBI::errstr\n";
exit(3);
}
exit;
__END__
_______________________________________________
Slony1-general mailing list
[email protected]
http://lists.slony.info/mailman/listinfo/slony1-general