Oops, I was just looking at some messages on the list I'd flagged as "do
something with this" and I found that Ed Ravin sent in a much nicer fix
for this back in October! Searching for a public URL to it, I see he
reposted it with an additional improvement in February.
http://www.mail-archive.com/mon%40linux.kernel.org/msg00339.html
As penance here are some more improvements. These incorporate Ed's
ideas, plus some more of my own.
- ICMP messages are output in the detail section
- If fping outputs a host we didn't ask for, or doesn't output one
we did ask for, an error is thrown.
- Rather than discarding fping's output when it exits with a weird
status, treat this as another type of error to output.
- Make it easier to add new errors which are treated like this.
- Don't run traceroute for hosts fping couldn't look up.
- Make -a more robust by testing whether any hosts were @alive
rather than whether @unreachable == @ARGV.
- Regularize the way the monitor outputs blank lines.
Since Jim'd mailed me saying he'd committed the previous two patches I'd
sent, this one is relative to them.
diff -r -X /home/roderick/.diff-exclude -u base.3/mon.d/fping.monitor
base.4/mon.d/fping.monitor
--- base.3/mon.d/fping.monitor Fri Jun 7 13:04:22 2002
+++ base.4/mon.d/fping.monitor Fri Jun 7 14:28:56 2002
@@ -61,8 +61,13 @@
my @unreachable;
my @alive;
-my @addr_not_found;
my @slow;
+my @other_prob; # details for other per-host problems
+my @error; # other errors which I'll give non-zero exit for
+my @icmp; # ICMP messages output by fping
+my %addr_unknown;
+
+my %want_host = map { $_ => 1 } @ARGV; # hosts fping hasn't output yet
while (<IN>)
{
@@ -70,10 +75,15 @@
if (/^(\S+).*unreachable/)
{
push (@unreachable, $1);
+ delete $want_host{$1}
+ or push @error, "unreachable host `$1' wasn't asked for";
}
elsif (/^(\S+) is alive \((\S+)/)
{
+ delete $want_host{$1}
+ or push @error, "reachable host `$1' wasn't asked for";
+
if ($opt{"s"} && $2 > $opt{"s"})
{
push (@slow, [$1, $2]);
@@ -87,83 +97,113 @@
elsif (/^(\S+)\s+address\s+not\s+found/)
{
- push @addr_not_found, $1;
+ $addr_unknown{$1} = 1;
+ push @other_prob, "$1 address not found";
push @unreachable, $1;
+ delete $want_host{$1}
+ or push @error, "unknown host `$1' wasn't asked for";
}
- # fping can output a number of messages in addition to the eventual
- # reachable/unreachable. Ignore them since we'll also get the main
- # "unreachable" message).
+ # ICMP Host Unreachable from 1.2.3.4 for ICMP Echo sent to 2.4.6.8
+ # (among others)
- elsif (/^ICMP .+ from \S+ for ICMP Echo sent to /)
+ elsif (/^ICMP (.*) for ICMP Echo sent to (\S+)/)
{
- # do nothing
+ push @icmp, $_;
}
else
{
- print STDERR "unidentified output from fping: [$_]\n";
+ push @error, "unidentified output from fping: [$_]";
}
}
+for my $host (keys %want_host) {
+ push @other_prob, "$host not listed in fping's output";
+ push @unreachable, $host;
+}
+
close (IN);
$END_TIME = time;
my $retval = $? >> 8;
-if ($retval == 3)
+if ($retval < 3)
{
- print "fping: invalid cmdline arguments [$CMD @ARGV]\n";
- exit 1;
+ # do nothing
}
-elsif ($retval == 4)
+elsif ($retval == 3)
{
- print "fping: system call failure\n";
- exit 1;
+ push @error, "fping: invalid cmdline arguments [$CMD @ARGV]";
}
-elsif ($retval == 1 || $retval == 2 || @slow != 0)
+elsif ($retval == 4)
{
- print join (" ", sort (@unreachable, map { $_->[0] } @slow)), "\n\n";
+ push @error, "fping: system call failure";
}
-elsif ($retval == 0)
+else
{
- print "\n";
+ push @error, "unknown return code ($retval) from fping";
}
-else
-{
- print "unknown return code ($retval) from fping\n";
+if (@error) {
+ print "unusual errors\n";
+}
+else {
+ my @fail = sort @unreachable, map { $_->[0] } @slow;
+ # This line is intentionally blank if there are no failures.
+ print "@fail\n";
}
+print "\n";
print "start time: " . localtime ($START_TIME) . "\n";
print "end time : " . localtime ($END_TIME) . "\n";
-print "duration : " . ($END_TIME - $START_TIME) . " seconds\n\n";
+print "duration : " . ($END_TIME - $START_TIME) . " seconds\n";
+
+if (@error != 0)
+{
+ print <<EOF;
+
+------------------------------------------------------------------------------
+unusual errors
+------------------------------------------------------------------------------
+EOF
+ print join ("\n", @error), "\n";
+}
if (@unreachable != 0)
{
print <<EOF;
+
------------------------------------------------------------------------------
unreachable hosts
------------------------------------------------------------------------------
EOF
- print join ("\n", @unreachable), "\n\n";
+ print join ("\n", @unreachable), "\n";
- if (@addr_not_found != 0)
- {
- print "address not found for @addr_not_found\n";
- }
+ print "\nother problems:\n", join "\n", @other_prob, ''
+ if @other_prob;
+}
+
+if (@icmp != 0)
+{
+ print <<EOF;
- print "\n";
+------------------------------------------------------------------------------
+ICMP messages
+------------------------------------------------------------------------------
+EOF
+ print join "\n", @icmp, '';
}
if (@slow != 0)
{
print <<EOF;
+
------------------------------------------------------------------------------
slow hosts (response time which exceeds $opt{s}ms)
------------------------------------------------------------------------------
@@ -180,6 +220,7 @@
if (@alive != 0)
{
print <<EOF;
+
------------------------------------------------------------------------------
reachable hosts rtt
------------------------------------------------------------------------------
@@ -196,25 +237,28 @@
#
if ($opt{"T"} && @unreachable)
{
+ my $header_output = 0;
foreach my $host (@unreachable)
{
+ next if $addr_unknown{$host};
+ print $header_output++ ? "\n" : <<EOF;
+
+------------------------------------------------------------------------------
+traceroute to unreachable hosts
+------------------------------------------------------------------------------
+EOF
system ("traceroute -w 3 $host 2>&1");
}
-
- print "\n";
}
+exit 1 if @error;
+
#
# fail only if all hosts do not respond
#
if ($opt{"a"})
{
- if (@unreachable == @ARGV)
- {
- exit 1;
- }
-
- exit 0;
+ exit(@alive ? 0 : 1);
}
exit 1 if (@slow != 0);
--
Roderick Schertler
[EMAIL PROTECTED]