Oops, I was just looking at some messages on the list I'd flagged as "do something with this" and I found that Ed Ravin sent in a much nicer fix for this back in October! Searching for a public URL to it, I see he reposted it with an additional improvement in February.
http://www.mail-archive.com/mon%40linux.kernel.org/msg00339.html As penance here are some more improvements. These incorporate Ed's ideas, plus some more of my own. - ICMP messages are output in the detail section - If fping outputs a host we didn't ask for, or doesn't output one we did ask for, an error is thrown. - Rather than discarding fping's output when it exits with a weird status, treat this as another type of error to output. - Make it easier to add new errors which are treated like this. - Don't run traceroute for hosts fping couldn't look up. - Make -a more robust by testing whether any hosts were @alive rather than whether @unreachable == @ARGV. - Regularize the way the monitor outputs blank lines. Since Jim'd mailed me saying he'd committed the previous two patches I'd sent, this one is relative to them. diff -r -X /home/roderick/.diff-exclude -u base.3/mon.d/fping.monitor base.4/mon.d/fping.monitor --- base.3/mon.d/fping.monitor Fri Jun 7 13:04:22 2002 +++ base.4/mon.d/fping.monitor Fri Jun 7 14:28:56 2002 @@ -61,8 +61,13 @@ my @unreachable; my @alive; -my @addr_not_found; my @slow; +my @other_prob; # details for other per-host problems +my @error; # other errors which I'll give non-zero exit for +my @icmp; # ICMP messages output by fping +my %addr_unknown; + +my %want_host = map { $_ => 1 } @ARGV; # hosts fping hasn't output yet while (<IN>) { @@ -70,10 +75,15 @@ if (/^(\S+).*unreachable/) { push (@unreachable, $1); + delete $want_host{$1} + or push @error, "unreachable host `$1' wasn't asked for"; } elsif (/^(\S+) is alive \((\S+)/) { + delete $want_host{$1} + or push @error, "reachable host `$1' wasn't asked for"; + if ($opt{"s"} && $2 > $opt{"s"}) { push (@slow, [$1, $2]); @@ -87,83 +97,113 @@ elsif (/^(\S+)\s+address\s+not\s+found/) { - push @addr_not_found, $1; + $addr_unknown{$1} = 1; + push @other_prob, "$1 address not found"; push @unreachable, $1; + delete $want_host{$1} + or push @error, "unknown host `$1' wasn't asked for"; } - # fping can output a number of messages in addition to the eventual - # reachable/unreachable. Ignore them since we'll also get the main - # "unreachable" message). + # ICMP Host Unreachable from 1.2.3.4 for ICMP Echo sent to 2.4.6.8 + # (among others) - elsif (/^ICMP .+ from \S+ for ICMP Echo sent to /) + elsif (/^ICMP (.*) for ICMP Echo sent to (\S+)/) { - # do nothing + push @icmp, $_; } else { - print STDERR "unidentified output from fping: [$_]\n"; + push @error, "unidentified output from fping: [$_]"; } } +for my $host (keys %want_host) { + push @other_prob, "$host not listed in fping's output"; + push @unreachable, $host; +} + close (IN); $END_TIME = time; my $retval = $? >> 8; -if ($retval == 3) +if ($retval < 3) { - print "fping: invalid cmdline arguments [$CMD @ARGV]\n"; - exit 1; + # do nothing } -elsif ($retval == 4) +elsif ($retval == 3) { - print "fping: system call failure\n"; - exit 1; + push @error, "fping: invalid cmdline arguments [$CMD @ARGV]"; } -elsif ($retval == 1 || $retval == 2 || @slow != 0) +elsif ($retval == 4) { - print join (" ", sort (@unreachable, map { $_->[0] } @slow)), "\n\n"; + push @error, "fping: system call failure"; } -elsif ($retval == 0) +else { - print "\n"; + push @error, "unknown return code ($retval) from fping"; } -else -{ - print "unknown return code ($retval) from fping\n"; +if (@error) { + print "unusual errors\n"; +} +else { + my @fail = sort @unreachable, map { $_->[0] } @slow; + # This line is intentionally blank if there are no failures. + print "@fail\n"; } +print "\n"; print "start time: " . localtime ($START_TIME) . "\n"; print "end time : " . localtime ($END_TIME) . "\n"; -print "duration : " . ($END_TIME - $START_TIME) . " seconds\n\n"; +print "duration : " . ($END_TIME - $START_TIME) . " seconds\n"; + +if (@error != 0) +{ + print <<EOF; + +------------------------------------------------------------------------------ +unusual errors +------------------------------------------------------------------------------ +EOF + print join ("\n", @error), "\n"; +} if (@unreachable != 0) { print <<EOF; + ------------------------------------------------------------------------------ unreachable hosts ------------------------------------------------------------------------------ EOF - print join ("\n", @unreachable), "\n\n"; + print join ("\n", @unreachable), "\n"; - if (@addr_not_found != 0) - { - print "address not found for @addr_not_found\n"; - } + print "\nother problems:\n", join "\n", @other_prob, '' + if @other_prob; +} + +if (@icmp != 0) +{ + print <<EOF; - print "\n"; +------------------------------------------------------------------------------ +ICMP messages +------------------------------------------------------------------------------ +EOF + print join "\n", @icmp, ''; } if (@slow != 0) { print <<EOF; + ------------------------------------------------------------------------------ slow hosts (response time which exceeds $opt{s}ms) ------------------------------------------------------------------------------ @@ -180,6 +220,7 @@ if (@alive != 0) { print <<EOF; + ------------------------------------------------------------------------------ reachable hosts rtt ------------------------------------------------------------------------------ @@ -196,25 +237,28 @@ # if ($opt{"T"} && @unreachable) { + my $header_output = 0; foreach my $host (@unreachable) { + next if $addr_unknown{$host}; + print $header_output++ ? "\n" : <<EOF; + +------------------------------------------------------------------------------ +traceroute to unreachable hosts +------------------------------------------------------------------------------ +EOF system ("traceroute -w 3 $host 2>&1"); } - - print "\n"; } +exit 1 if @error; + # # fail only if all hosts do not respond # if ($opt{"a"}) { - if (@unreachable == @ARGV) - { - exit 1; - } - - exit 0; + exit(@alive ? 0 : 1); } exit 1 if (@slow != 0); -- Roderick Schertler [EMAIL PROTECTED]