Here's what I use:
Running in cron every 5 minutes. Works like a charm.
 
#!/bin/sh
HOST=`hostname` 
NAGIOS_HOST=nagios1.mysite.com
NRPE=/usr/local/nagios/libexec/check_nrpe
CMD_FILE=/usr/local/nagios/var/rw/nagios.cmd
CHECK_STATUS=/usr/local/nagios/var/master.status
LOG_FILE=/usr/local/nagios/var/master.status.log
echocmd="/bin/echo -e"
mailcmd="/bin/mail"
mailto="[EMAIL PROTECTED]"
datetime=$(date +%s)
datelog=$(date +%F\ %T)

## functions
function down_mail()
{
$echocmd "Slave Nagios server $HOST has entered ACTIVE mode and taken over
network monitoring responsibilities!\nSomeone needs to check out
$NAGIOS_HOST" | $mailcmd -s "ALERT: $HOST Has Entered ACTIVE Mode" $mailto
}

function up_mail()
{
$echocmd "Slave Nagios server $HOST has returned to STANDBY mode because
master server $NAGIOS_HOST is now running." | $mailcmd -s "RECOVERY: $HOST
Has Entered STANDBY Mode" $mailto
}

function warn_mail()
{
$echocmd "Warning: status of master Nagios server $NAGIOS_HOST is
unknown.\nStandby server was NOT activated.\nSomeone needs to check the
script $0\nHere is a tail of the logfile:\n `tail -n 5 $LOG_FILE`" |
$mailcmd -s "WARNING: master nagios server status UNKNOWN" $mailto
}

## end functions

if ! test -d $(dirname $CHECK_STATUS); then
  mkdir -p $(dirname $CHECK_STATUS)
fi
 
if ! test -p $CMD_FILE; then
  echo "[$datelog] error writing to command file" | tee -a $LOG_FILE;
  exit 1;
fi

## Run the check 
   $NRPE -H $NAGIOS_HOST $* -c check_nagios > /dev/null 2>&1
   n=$?
   last=`cat $CHECK_STATUS`

## big ugly if
   if ! [ $last = $n ]; then
        echo "[$datelog] state change detected: from $last to $n"  | tee -a
$LOG_FILE;
        if test $n -eq 0; then
          echo "[$datelog] master is OK: disabling notifications and svc
checks" | tee -a $LOG_FILE
          echo "[$datetime] DISABLE_NOTIFICATIONS;$datetime" >> $CMD_FILE
          echo "[$datetime] STOP_EXECUTING_SVC_CHECKS" >> $CMD_FILE
          up_mail
        elif test $n -eq 2; then
          echo "[$datelog] master is NOT OK: enabling notifications and svc
checks" | tee -a $LOG_FILE
          echo "[$datetime] ENABLE_NOTIFICATIONS;$datetime" >> $CMD_FILE
          echo "[$datetime] START_EXECUTING_SVC_CHECKS" >> $CMD_FILE
          down_mail
        else
          echo "[$datelog] master status warning or unknown" | tee -a
$LOG_FILE
          warn_mail
        fi
   else
        if [ `date +%M` -eq 0 ]; then
        echo "[$datelog] no change detected: current status is $n" | tee -a
$LOG_FILE
        fi
   fi
## end big ugly if

## write current state to status file   
echo "$n" >$CHECK_STATUS
 

________________________________

From: [EMAIL PROTECTED]
[mailto:[EMAIL PROTECTED] On Behalf Of Lehman,
John
Sent: Wednesday, October 25, 2006 1:39 PM
To: Nagios Users mailinglist
Subject: [Nagios-users] failover



Question for everyone.

 

I have 2 nagios servers and one is active all the time and the other is
standby.

 

I have read the documentation on implementing failover but I am finding it
difficult implementing.

 

Is there anyone that could give me sample scripts which "on the standby
host" would "check the nagios master" and if the nagios master were down
then the standby would become the active host?

 

 

I am losing my mind trying to implement from the documentation and I would
appreciate any advice on this one.

 

John



-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Nagios-users mailing list
Nagios-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nagios-users
::: Please include Nagios version, plugin version (-v) and OS when reporting 
any issue. 
::: Messages without supporting info will risk being sent to /dev/null

Reply via email to