Here's what I use: Running in cron every 5 minutes. Works like a charm. #!/bin/sh HOST=`hostname` NAGIOS_HOST=nagios1.mysite.com NRPE=/usr/local/nagios/libexec/check_nrpe CMD_FILE=/usr/local/nagios/var/rw/nagios.cmd CHECK_STATUS=/usr/local/nagios/var/master.status LOG_FILE=/usr/local/nagios/var/master.status.log echocmd="/bin/echo -e" mailcmd="/bin/mail" mailto="[EMAIL PROTECTED]" datetime=$(date +%s) datelog=$(date +%F\ %T)
## functions function down_mail() { $echocmd "Slave Nagios server $HOST has entered ACTIVE mode and taken over network monitoring responsibilities!\nSomeone needs to check out $NAGIOS_HOST" | $mailcmd -s "ALERT: $HOST Has Entered ACTIVE Mode" $mailto } function up_mail() { $echocmd "Slave Nagios server $HOST has returned to STANDBY mode because master server $NAGIOS_HOST is now running." | $mailcmd -s "RECOVERY: $HOST Has Entered STANDBY Mode" $mailto } function warn_mail() { $echocmd "Warning: status of master Nagios server $NAGIOS_HOST is unknown.\nStandby server was NOT activated.\nSomeone needs to check the script $0\nHere is a tail of the logfile:\n `tail -n 5 $LOG_FILE`" | $mailcmd -s "WARNING: master nagios server status UNKNOWN" $mailto } ## end functions if ! test -d $(dirname $CHECK_STATUS); then mkdir -p $(dirname $CHECK_STATUS) fi if ! test -p $CMD_FILE; then echo "[$datelog] error writing to command file" | tee -a $LOG_FILE; exit 1; fi ## Run the check $NRPE -H $NAGIOS_HOST $* -c check_nagios > /dev/null 2>&1 n=$? last=`cat $CHECK_STATUS` ## big ugly if if ! [ $last = $n ]; then echo "[$datelog] state change detected: from $last to $n" | tee -a $LOG_FILE; if test $n -eq 0; then echo "[$datelog] master is OK: disabling notifications and svc checks" | tee -a $LOG_FILE echo "[$datetime] DISABLE_NOTIFICATIONS;$datetime" >> $CMD_FILE echo "[$datetime] STOP_EXECUTING_SVC_CHECKS" >> $CMD_FILE up_mail elif test $n -eq 2; then echo "[$datelog] master is NOT OK: enabling notifications and svc checks" | tee -a $LOG_FILE echo "[$datetime] ENABLE_NOTIFICATIONS;$datetime" >> $CMD_FILE echo "[$datetime] START_EXECUTING_SVC_CHECKS" >> $CMD_FILE down_mail else echo "[$datelog] master status warning or unknown" | tee -a $LOG_FILE warn_mail fi else if [ `date +%M` -eq 0 ]; then echo "[$datelog] no change detected: current status is $n" | tee -a $LOG_FILE fi fi ## end big ugly if ## write current state to status file echo "$n" >$CHECK_STATUS ________________________________ From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] On Behalf Of Lehman, John Sent: Wednesday, October 25, 2006 1:39 PM To: Nagios Users mailinglist Subject: [Nagios-users] failover Question for everyone. I have 2 nagios servers and one is active all the time and the other is standby. I have read the documentation on implementing failover but I am finding it difficult implementing. Is there anyone that could give me sample scripts which "on the standby host" would "check the nagios master" and if the nagios master were down then the standby would become the active host? I am losing my mind trying to implement from the documentation and I would appreciate any advice on this one. John ------------------------------------------------------------------------- Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 _______________________________________________ Nagios-users mailing list Nagios-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nagios-users ::: Please include Nagios version, plugin version (-v) and OS when reporting any issue. ::: Messages without supporting info will risk being sent to /dev/null