#!@@PERL@@ -w # -*- perl -*- use strict; use warnings; =head1 NAME hddtemp_smartctl - Plugin to monitor harddrive temperatures through SMART =head1 CONFIGURATION This plugin needs to run as root or some other user that has access to the harddrive devices. The following environment variables are used smartctl - path to smartctl executable drives - List drives to monitor. E.g. "env.drives hda hdc". type_$dev - device type for one drive, e.g. "env.type_sda 3ware,0" or more typically "env.type_sda ata" if sda is a SATA disk. args_$dev - additional arguments to smartctl for one drive, e.g. "env.args_hda -v 194,10xCelsius". Use this to make the plugin use the --all or -a option if your disk will not return its temperature when only the -A option is used. dev_$dev - monitoring device for one drive, e.g. twe0 If the "smartctl" environment variable is not set the plugin will search your $PATH, /usr/bin, /usr/sbin, /usr/local/bin and /usr/local/sbin for a file called "smartctl", and use that. If the "drives" environment variable is not set, the plugin will attempt to search for drives to probe. =head1 MAGIC MARKERS #%# family=auto #%# capabilities=autoconf =head1 AUTHOR Copyright (c) 2005, Lutz Peter Christoph All rights reserved. 2016-08-27, Gabriele Pohl (contact@dipohl.de) Fix for github issue #690 =head1 LICENSE Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The name and aliases of Lutz Peter Christoph ("Lupe Christoph", "Lutz Christoph") may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =head1 NOTES Note for users of RAID controllers (smartmontools currently only supports 3ware): you can specify the drives attached to your RAID controller(s) as raiddev_num (e.g. sda_0). Then you must specify the type like this: type_sda_0 3ware,0. Recent versions of the kernel driver use a separate major device number for monitoring purposes, like /dev/twe or /dev/twa. This can be put in the e.g. dev_sda environment variable, to allow the user to keep sda as the name of the disk. To avoid spinning up sleeping disks smartctl will use the --nocheck parameter. If this parameter isn't supported by your version of smartctl then hdparm will be used. Note that hdparm isn't available on all platforms. =cut use File::Spec::Functions qw(splitdir); use lib $ENV{'MUNIN_LIBDIR'}; use Munin::Plugin; my $DEBUG = $ENV{'MUNIN_DEBUG'} || 0; my $smartctl; if (exists $ENV{smartctl}) { $smartctl = $ENV{smartctl}; if (defined $ARGV[0] and $ARGV[0] eq 'autoconf') { # The real "autoconf" section follows later. But here we need to check for requirements, too. if (! -e $smartctl) { print "no (Predefined smartctl ($smartctl) does not exist)\n"; exit 0; } elsif (! -x $smartctl) { print "no (Predefined smartctl ($smartctl) is not executable)\n"; exit 0; } } else { # immediate failure is allowed outside of "autoconf" die "$smartctl does not exist\n" unless (-e $smartctl); die "$smartctl is not executable\n" unless (-x $smartctl); } } else { # Not defined in %ENV? Check obvious places my @dirs = split(':', $ENV{PATH}); push (@dirs, qw(/usr/bin /usr/sbin /usr/local/bin /usr/local/sbin) ); until ($smartctl or @dirs == 0) { my $dir = shift @dirs; my $path = $dir.'/smartctl'; $smartctl = $path if -x $path; } unless ($smartctl) { if (defined $ARGV[0] and $ARGV[0] eq 'autoconf') { print "no ('smartctl' executable not found)\n"; exit 0; } else { die "'smartctl' executable not found\n"; } } } # Check version of smartctl to determine --nocheck capabilities my $use_nocheck = 0; if ($smartctl and `$smartctl --version` =~ / release (\d+\.\d+) /i) { $use_nocheck = $1 >= 5.37; warn "[DEBUG] Smartctl supports --nocheck\n" if $DEBUG; } # hdparm is used as a fallback my $hdparm = `sh -c 'command -v hdparm'`; chomp $hdparm; my @drives; # Try to get a default set of drives if ($^O eq 'linux') { # On Linux, we know how to enumerate ide drives. my @drivesIDE; if (-d '/proc/ide') { opendir(IDE, '/proc/ide'); @drivesIDE = grep /hd[a-z]/, readdir IDE; closedir(IDE); } # Look for SCSI / SATA drives in /sys my @drivesSCSI; if (-d '/sys/block/') { opendir(SCSI, '/sys/block/'); @drivesSCSI = grep /sd[a-z]/, readdir SCSI; closedir(SCSI); } # Look for NVMe drives in /sys my @drivesNVME; if (-d '/sys/block/') { opendir(NVME, '/sys/block/'); @drivesNVME = grep /nvme[0-9]+n[0-9]+/, readdir NVME; closedir(NVME); } # Get list of all drives we found @drives=(@drivesIDE,@drivesSCSI,@drivesNVME); } elsif ($^O eq 'freebsd') { opendir(DEV, '/dev'); @drives = grep /^(ada?|da)[0-9]+$/, readdir DEV; closedir(DEV); } elsif ($^O eq 'solaris') { @drives = map { s@.*/@@ ; $_ } glob '/dev/rdsk/c*t*d*s2'; } @drives = split ' ', $ENV{drives} if exists $ENV{drives}; # Sort list of drives @drives = sort @drives; warn "[DEBUG] Drives: ",join(', ',@drives),"\n" if $DEBUG; if (defined $ARGV[0]) { if ($ARGV[0] eq 'autoconf') { if (@drives) { my $cmd = command_for_drive_device($drives[0], device_for_drive($drives[0])); if (`$cmd` =~ /Temperature/) { print "yes\n"; } else { print "no (first drive not supported, configure the plugin)\n"; } exit 0; } else { print "no (no drives known)\n"; exit 0; } } elsif ($ARGV[0] eq 'config') { print "graph_title HDD temperature\n"; print "graph_vlabel Degrees Celsius\n"; print "graph_category sensors\n"; print "graph_info This graph shows the temperature in degrees Celsius of the hard drives in the machine.\n"; foreach (@drives) { my @dirs = splitdir($_); print clean_fieldname($_) . ".label " . $dirs[-1] . "\n"; print clean_fieldname($_) . ".max 100\n"; print clean_fieldname($_) . ".warning 57\n"; print clean_fieldname($_) . ".critical 60\n"; } exit 0; } } foreach my $drive (@drives) { warn "[DEBUG] Processing $drive\n" if $DEBUG; my $fulldev = device_for_drive($drive); # Fall back to using hdparm for detecting disks in stand-by only if nocheck # isn't supported (hdparm isn't available on all platforms). if (!$use_nocheck && $hdparm && $fulldev =~ /\/dev\/[sh]d?/) { if (`$hdparm -C $fulldev 2>/dev/null` =~ /standby/) { warn "[DEBUG] Drive $fulldev is in standby mode, not checking\n" if $DEBUG; next; } } my $cmd = command_for_drive_device($drive, $fulldev, $use_nocheck); warn "[DEBUG] Command for $drive is % $cmd %\n" if $DEBUG; my $output = `$cmd`; my $cmd_exit = $?; # Strip header $output =~ s/.*?\n\n//s; # Strip trailer $output =~ s/Please specify device type with the -d option.\n//s; $output =~ s/Use smartctl -h to get a usage summary//s; $output =~ s/\n+$//s; if ($cmd_exit != 0) { print "$drive.value U\n"; if ($cmd_exit == -1) { warn "[ERROR] Command $cmd on drive $drive failed to execute: $!"; } else { my $smartctl_exit = $cmd_exit >> 8; print "$drive.extinfo Command '$cmd' on drive $drive failed with exit($smartctl_exit)\n"; # exit (2) is a normal state with directive "--nocheck=standby" when device is in STANDBY or SLEEP mode if ($smartctl_exit == 2 and $use_nocheck) { if ($output =~ /(?:standby|sleep)/i) { next; } } warn "[ERROR] Command $cmd on drive $drive failed with exit($smartctl_exit): $output"; } next; } if ($output =~ /Current Drive Temperature:\s*(\d+)/) { print "$drive.value $1\n"; } elsif ($output =~ /^(194 Temperature_(Celsius|Internal).*)/m) { my @F = split /\s+/, $1; print "$drive.value $F[9]\n"; } elsif ($output =~ /^(231 Temperature_Celsius.*)/m) { my @F = split ' ', $1; print "$drive.value $F[9]\n"; } elsif ($output =~ /^(190 (Airflow_Temperature_Cel|Temperature_Case).*)/m) { my @F = split ' ', $1; print "$drive.value $F[9]\n"; } elsif ($output =~ /Temperature:\s*(\d+) Celsius/) { print "$drive.value $1\n"; } else { print "$drive.value U\n"; print "$drive.extinfo Temperature not detected in smartctl output\n"; } } sub device_for_drive { my ($drive) = @_; # The purpose of the following regular expression (removing a numeric suffix starting with an # underscore) is a mystery. But it is probably meant to detect a partition and select the # parent block device in such a case. # In order to avoid misinterpreting a trailing serial number as such a partition number, we # limit the number of numeric characters after the underscore to just one or two. my $dev = $drive =~ /(.*)(?:_\d{1,2})$/ ? $1 : $drive; my $fulldev = '/dev/'; $fulldev .= 'rdsk/' if $^O eq 'solaris'; $fulldev .= exists $ENV{'dev_'.$drive} ? $ENV{'dev_'.$drive} : $dev; return $fulldev; } sub command_for_drive_device { my ($drive, $fulldev, $use_nocheck) = @_; my $cmd = $smartctl.' -A '; $cmd .= '--nocheck=standby ' if $use_nocheck; $cmd .= $ENV{'args_'.$drive}.' ' if exists $ENV{'args_'.$drive}; $cmd .= '-d '.$ENV{'type_'.$drive}.' ' if exists $ENV{'type_'.$drive}; $cmd .= $fulldev; }