#!/usr/bin/perl -w =head1 NAME iostat_ios - Show IO-operation latency per low-level block device. =head1 APPLICABLE SYSTEMS Any Linux system =head1 CONFIGURATION None needed =head1 USAGE Link this into /etc/munin/plugins/ and restart the munin-node. =head1 INTERPRETATION The plugin shows the average time a IO-operation needs to complete for each disk or block device on the system. Simple partitioned disks will only show as the whole disk. LVM volumes, RAID devices and other derived block devices are not tracked individually. When the IO-operation time here increases it is to be expected that the iowait on the CPU graph increases, but please read about how iowait reporting works to fully understand that number. =head1 MAGIC MARKERS #%# family=legacy #%# capabilities=autoconf =head1 BUGS Should show more useful device names/labels for device-mapper devices, such as those used by LVM and so on. =head1 AUTHOR Copyright (C) 2007-2011 Nicolai Langfeldt Copyright (C) 2004 Per Buer =head1 LICENSE GPLv2 =cut use strict; use warnings; use IO::File; use Storable qw(nstore retrieve); use Munin::Plugin; use constant STATEFILE => "$ENV{MUNIN_PLUGSTATE}/iostat-ios.state"; if (defined($ARGV[0]) and $ARGV[0] eq 'autoconf') { if (-r "/proc/diskstats" or -r "/proc/partitions") { print "yes\n"; exit 0; } else { print "no (no /proc/diskstats or /proc/partitions)\n"; exit 0; } } if (defined($ARGV[0]) and $ARGV[0] eq 'config') { print_config(); exit; } my ($r, $old_r); my %name; $r = get_ios(); ($old_r) = get_state(); if ($old_r) { cmp_io($old_r, $r); } else { warn "iostat_ios: No historic data present\n"; } store_state( $r ); sub filter { my ($major, $minor, $tmpnam) = @_; # ignore derived logical block devices if(defined($major)) { return 0 if ($major == 1); # RAM devices return 0 if ($major == 9); # MD devices return 0 if ($major == 58); # LVM devices return 0 if ($major == 253); # LVM2 devices } # ignore partitions (ending in a digit) if(defined($tmpnam)) { return 0 if ($tmpnam =~ /part\d+$/); return 0 if ($tmpnam =~ /^\s*(?:sd|hd|x?vd)[a-z]\d+\s*$/); } return 1; } sub get_ios { my ($opt) = @_; my %R; my ($parts, $kernel); my @dev; if (-r "/proc/diskstats") { $kernel = 2.6; $parts = new IO::File("/proc/diskstats") || die("unable to open /proc/diskstats\n"); } else { $kernel = 2.4; $parts = new IO::File("/proc/partitions") || die("unable to open /proc/partitions\n"); } unless ($parts) { print "Could not gather statistics\n"; return undef; } while (<$parts>) { my ($maj, $min, $name, $rio, $rtime, $wio, $wtime) = (split(/\s+/, $_ ))[1,2,3,4,7,8,11]; next unless (defined($min) && defined($maj)); next unless ($wio and $rio and $rtime and $wtime); next if (filter($maj, $min, $name) == 0); $R{$maj}{$min}{rio} = $rio; $R{$maj}{$min}{rtime} = $rtime; $R{$maj}{$min}{wio} = $wio; $R{$maj}{$min}{wtime} = $wtime; my $label = "dev${maj}_${min}"; push(@dev, $label); $name{$label} = $name; } $parts->close(); if ($opt) { return \@dev; } else { return \%R; } } sub print_config { print("graph_title IO Service time\n", "graph_args --base 1000 --logarithmic\n", "graph_category disk\n", "graph_vlabel seconds\n", "graph_info For each applicable disk device this plugin shows the latency (or delay) for I/O operations on that disk device. The delay is in part made up of waiting for the disk to flush the data, and if data arrives at the disk faster than it can read or write it then the delay time will include the time needed for waiting in the queue.\n"); for my $d ( @{ get_ios(1) } ) { print "${d}_rtime.label ",$name{$d}," read\n", "${d}_rtime.type GAUGE\n", "${d}_rtime.cdef ${d}_rtime,1000,/\n", "${d}_wtime.label ",$name{$d}," write\n", "${d}_wtime.type GAUGE\n", "${d}_wtime.cdef ${d}_wtime,1000,/\n"; print_thresholds($d); } } sub cmp_io { my ($old_io, $new_io) = @_; for my $maj (sort keys %{$new_io} ) { for my $min (sort keys %{ $new_io->{$maj} } ) { my $rio_diff = $$new_io{$maj}{$min}{rio} - $$old_io{$maj}{$min}{rio}; my $rtime_diff = $$new_io{$maj}{$min}{rtime} - $$old_io{$maj}{$min}{rtime}; my $wio_diff = $$new_io{$maj}{$min}{wio} - $$old_io{$maj}{$min}{wio}; my $wtime_diff = $$new_io{$maj}{$min}{wtime} - $$old_io{$maj}{$min}{wtime}; my $dev = "dev${maj}_${min}"; print "${dev}_rtime.value ", ($rio_diff > 0 and $rtime_diff > 0) ? ($rtime_diff / $rio_diff) : 'U', "\n", "${dev}_wtime.value ", ($wio_diff > 0 and $wtime_diff > 0) ? ($wtime_diff / $wio_diff) : 'U', "\n"; } } } sub store_state { my ($R) = @_; nstore($R, STATEFILE); } sub get_state { my ($R); return(undef) unless ( -r STATEFILE); eval { # Using eval, if it breaks while retrieve, $R is still undef. # No need to unlink() it, since it will be overridden anyway. $R = retrieve( STATEFILE ); }; return($R); }