Hello,

I've found that mon trap support was a bit broken.  I put 
together a patch which fixed several things.

>From me:
* Only alert if opstatus is different than old_opstatus
  (avoids sending the alert everytime the a trap indicating
  failure is received. I dont believe alertafter is checked).
* In call alert, allow upalert, traps and traptimeouts all 
  change the commandline at the same time instead of just one.
  (so trap-based upalerts are actually sent as upalerts)
* in trap timeout, only send the alarm if the previous alert 
  was NOT a timeout.  (so that if it times out five times in 
  a row, you only get one alert)

>From Tom Scanlan 
 http://www.mail-archive.com/mon@linux.kernel.org/msg00341.html 
* Make timeout alerts work
* Make alertafter work for trap timeouts

The patch is all roled into one for ease of use.

The patch is attached, and may also be found at:
 http://www.vanderbijlfamily.com/software/windschecker/mon.diff

Enjoy!

Ryan
-- 
Ryan VanderBijl
--- mon 2001-09-08 09:42:05.000000000 -0400
+++ mon.ryan    2002-12-11 14:18:09.000000000 -0500
@@ -3947,6 +3947,11 @@
             $noalert++;
         }
     }
+
+    if ( $trap{"spc"} == $old_status && $old_status == $STAT_FAIL) {
+        $noalert++;
+    }
+
     #### else just fall through and send alert
 
     do_alert (
@@ -3969,22 +3974,32 @@
 #
 sub handle_trap_timeout {
     my ($group, $service) = @_;
-    my ($tmnow);
+    my ($tmnow, $sendalert);
 
     $tmnow = time;
+    $sendalert = 0;
 
     my $sref = \%{$watch{$group}->{$service}};
     $sref->{"_failure_count"}++;
+    $sref->{"_consec_failures"}++;
     $sref->{"_last_failure"} = $tmnow;
-    $sref->{"_first_failure"} = $tmnow if ($sref->{"_op_status"} != $STAT_FAIL);
+    if ($sref->{"_op_status"} != $STAT_FAIL) {
+       $sref->{"_first_failure"} = $tmnow;
+        $sendalert = 1;
+    }
     set_op_status ($group, $service, $STAT_FAIL);
+    if ($sref->{"_last_summary"} ne "trap timeout") {
+        $sendalert = 1;
+    }
     $sref->{"_last_summary"} = "trap timeout";
     $sref->{"_last_detail"} = "";
     shift @last_failures if (@last_failures > $CF{"MAX_KEEP"});
     push @last_failures, "$group $service $tm $sref->{_last_summary}";
     syslog ('crit', "failure for $last_failures[-1]");
 
-    do_alert ($group, $service, undef, undef, $FL_TRAPTIMEOUT);
+    if ($sendalert) {
+        do_alert ($group, $service, "Trap timeout", 1, $FL_TRAPTIMEOUT);
+    }
 }
 
 
@@ -4594,11 +4609,6 @@
             undef ($ENV{"MON_TRAP_INTENDED"}) if (defined($ENV{"MON_TRAP_INTENDED"}));
         }
 
-       my $t;
-       $t = "-u" if ($args{"flags"} & $FL_UPALERT);
-       $t = "-T" if ($args{"flags"} & $FL_TRAP);
-       $t = "-O" if ($args{"flags"} & $FL_TRAPTIMEOUT);
-
        my @execargs = (
            $alert,
            "-s", "$args{service}",
@@ -4607,10 +4617,16 @@
            "-t", "$tmnow",
        );
 
+       my $t = "";
+       $t = "-T" if ($args{"flags"} & $FL_TRAP);
+       $t = "-O" if ($args{"flags"} & $FL_TRAPTIMEOUT);
+
        if ($t) {
            push @execargs, $t;
        }
 
+       push @execargs, "-u" if ($args{"flags"} & $FL_UPALERT);
+
        if ($args{"args"} ne "") {
            push @execargs, quotewords('\s+',0,$args{"args"});
        }

Reply via email to