Package: munin-node
Version: 1.4.5-3
Severity: normal
Tags: patch
hddtemp_smartctl has numerous bugs and annoymances e.g.:
. Fails to collect anything on systems with large numbers of drives due
to timeouts.
. Fails to collect anything on systems with one or more faulty or slow
drives due to timeouts.
. Fails to collect anything on busy (e.g. I/O) systems due to timeouts
. Arbitrarily sets graph minimum to zero celsius - I have three drives
here which say:
Min/Max recommended Temperature: -4/72 Celsius
Min/Max Temperature Limit: -9/77 Celsius
and five which say:
Min/Max recommended Temperature: 0/60 Celsius
Min/Max Temperature Limit: -40/70 Celsius
. Doesn't query temperature limits from drives
. On Linux systems only the first 26 drives are checked (e.g. not /dev/sdaa,
/dev/sdab etc.).
. Fails to detect smartctl '--nocheck' option on Squeeze+
I think this patch fixes those bugs sufficiently to get it working on my
system.
Cheers,
Tim.
--- /usr/share/munin/plugins/hddtemp_smartctl 2010-10-05 14:39:07.0
+0100
+++ hddtemp_smartctlmt 2011-07-31 14:01:23.415941442 +0100
@@ -23,6 +23,13 @@
not return its temperature when only the -A option is
used.
dev_$dev - monitoring device for one drive, e.g. twe0
+ maxchild - Maximum number of smartctl child processes to run at any
+one time - defaults to 4
+ minstartdelaysecs - Time in seconds to wait between spawning child
+ processes, defaults to 0.1
+ smartctltimeoutsecs - Maximum time to wait for a child process to run
+ in case of a broken drive etc. Defaults to 5
+
If the smartctl enviroment variable is not set the plugin will
search your $PATH, /usr/bin, /usr/sbin, /usr/local/bin and
@@ -89,11 +96,29 @@
smartctl then hdparm will be used. Note that hdparm isn't available
on all platforms.
+=head1 BUGS
+
+ * Uses part of the device name as the munin data source fieldname,
+which can and does frequently change. Should be derived from the
+drive model+serial number instead.
+ * All modern drives make their recommended/absolute min/max
+rated temperatures available - see 'smartctl -l scttemp' this
+info should be provided by this plugin when run with the config
+argument.
+
=cut
use strict;
+use Errno;
+use POSIX;
+use IO::File;
+sub pipe_from_fork ($);
my $DEBUG = $ENV{'MUNIN_DEBUG'} || 0;
+my $maxchild = $ENV{'maxchild'} || 4;
+my $minstartdelaysecs = $ENV{'minstartdelaysecs'} || 0.1;
+my $smartctltimeoutsecs = $ENV{'timeout'} || 5;
+
my $smartctl = exists $ENV{smartctl} ? $ENV{smartctl} : '';
@@ -116,8 +141,8 @@
# Check version of smartctl to determine --nocheck capabilities
my $use_nocheck = 0;
-if ($smartctl and `$smartctl --version` =~ / version (\d+\.\d+) /i) {
-$use_nocheck = $1 = 5.37;
+if ($smartctl and `$smartctl --help` =~ /nocheck/) {
+$use_nocheck = 1;
warn [DEBUG] Smartctl supports --nocheck\n if $DEBUG;
}
@@ -141,7 +166,7 @@
my @drivesSCSI;
if (-d '/sys/block/') {
opendir(SCSI, '/sys/block/');
-@drivesSCSI = grep /sd[a-z]/, readdir SCSI;
+@drivesSCSI = grep /sd[a-z]+/, readdir SCSI;
closedir(SCSI);
}
@@ -185,7 +210,6 @@
}
} elsif ($ARGV[0] eq 'config') {
print graph_title HDD temperature\n;
-print graph_args --base 1000 -l 0\n;
print graph_vlabel temp in °C\n;
print graph_category sensors\n;
print graph_info This graph shows the temperature in degrees Celsius of
the hard drives in the machine.\n;
@@ -194,7 +218,14 @@
}
}
-foreach my $drive (@drives) {
+my %smartreaders;
+my @remainingdrives = @drives;
+
+while (@remainingdrives || keys %smartreaders) {
+ # Fire-up another smartctl?
+ if (@remainingdrives ((keys %smartreaders) $maxchild)) {
+
+my $drive = shift @remainingdrives;
warn [DEBUG] Processing $drive\n if $DEBUG;
my $fulldev = device_for_drive($drive);
@@ -209,29 +240,88 @@
}
my $cmd = command_for_drive_device($drive, $fulldev, $use_nocheck);
- warn [DEBUG] Command for $drive is % $cmd %\n if $DEBUG;
+$smartreaders{$drive}{'cmd'} = $cmd;
- my $output = `$cmd`;
- if ($? ne 0) {
- print $drive.value U\n;
- print $drive.extinfo Command $cmd on drive $drive failed: $?. The
plugin needs to have read permission on all monitored devices.\n;
- warn [ERROR] Command $cmd on drive $drive failed: $?. The plugin needs
to have read permission on all monitored devices.\n;
- next;
+# This did use perl's open |- syntax, but this didn't work in 5.8.0 :-(
+$smartreaders{$drive}{'fh'} = IO::File-new;
+$smartreaders{$drive}{'pid'} = pipe_from_fork($smartreaders{$drive}{'fh'});
+
+if ($smartreaders{$drive}{'pid'}) { # parent process
+ # switch to non-blocking reading of the pipe
+ use Fcntl;
+ my $flags = 0;
+ fcntl($smartreaders{$drive}{'fh'}, F_GETFL, $flags) or die Couldn't get