#!/usr/bin/perl -w use strict; use warnings; =head1 NAME irqstats - Munin plugin to monitor individual interrupts on a Linux machine =head1 APPLICABLE SYSTEMS Any Linux system =head1 CONFIGURATION Specified devices may be skipped. [irqstats] env.skipdevs blkif,vif None needed =head1 USAGE Link this plugin to /etc/munin/plugins/ and restart the munin-node. =head1 INTERPRETATION The plugin simply shows the number of interrupts on each interrupt line. The lines are identified by the attached equipment rather than the IRQ numbers. High interrupt rates may coincide with high loads on disk, network or other I/O, this is normal. =head1 MAGIC MARKERS #%# family=auto #%# capabilities=autoconf =head1 BUGS None known =head1 AUTHOR Unknown =head1 LICENSE GPLv2 =cut use Munin::Plugin; use Scalar::Util qw(looks_like_number); my $skipdevs = $ENV{'skipdevs'} || ""; if (defined $ARGV[0] && $ARGV[0] eq 'autoconf') { if(-r '/proc/interrupts') { print "yes\n"; exit(0); } else { print "no (file /proc/interrupts not readable)\n"; exit(1); } } my $sun; $sun = 1 if (`uname -m` =~ /sparc/); my $in; my @cpus; my $cpus; if ($sun) { @cpus = grep (/^cpu\d+/, `cat /proc/stat`); $cpus = scalar @cpus; } else { open $in, '<', '/proc/interrupts' or die "Can't open /proc/interrupts: $!\n"; @cpus = split(' ', <$in>); $cpus = scalar @cpus; } my $cpu; if ($0 =~ /(?:.*\/)?irqstats_(.+)/) { $cpu = $1; if ($cpu > $#cpus) { die "Requested CPU $cpu out of bounds (0..$#cpus)\n"; } } my @irqs; sub sum (@) { my $sum = 0; $sum += (looks_like_number($_) ? $_ : 0) || 0 for @_; # Avoid complaints about empty strings return $sum; } LINE: while (my $line = <$in>) { my ($irq, $label, $type); my @data; if ($sun) { @data = split (' ', $line, $cpus + 2); } else { @data = split(' ', $line, $cpus + 3); } chomp @data; $irq = shift @data; next unless defined($irq) and length $irq; chop $irq; if ($irq =~ /^\d+$/) { # For numbered interrupts the format is set $label = pop @data; $type = pop @data; # work around "MPIC \d" on powerpc if ($type =~ "MPIC" && $label =~ s/^([12])\s+//) { $type .= " $1";; } # handle hwirq if ($label =~ /^(\d+)\s+(.+)/) { $label = $1 eq $irq ? $2 : "$2 [$1]"; } # handle level/edge $label =~ s/^(Level|Edge)\s+//; } else { # For named interrupts it's more confusing, slice and dice # to divorce data from commentary $label = join(" ",@data[$cpus..$#data]); @data = @data[0..$cpus-1]; } # skip devices specified in config foreach my $skipdev (split(',', $skipdevs)) { next LINE if ($label =~ $skipdev); } # Skip non-per-cpu values for per-cpu stats # This will ignore ERR: and MIS: lines on multi cpu systems # (checked on 2.6.22, 2.6.24) # next if defined($cpu) and $cpus > @data; push @irqs, { irq => $irq, label => $label, count => defined($cpu) ? $data[$cpu] : sum(@data) }; } close $in; if (defined $ARGV[0] && $ARGV[0] eq 'config') { print 'graph_title Individual interrupts', defined($cpu) ? " on CPU $cpu\n" : "\n"; print <{irq}} @irqs), "\n"; for my $irq (@irqs) { my $f = ($irq->{label} || $irq->{irq}); $f = $irq->{irq} if (length ($f) > 47); print "i", $irq->{irq}, '.label ', $f, "\n"; if ( $irq->{label} ) { print "i", $irq->{irq}, '.info Interrupt ', $irq->{irq}, ', for device(s): ', $irq->{label}, "\n" } elsif ( $irq->{irq} =~ /NMI/i ) { print "i", $irq->{irq}, ".info Nonmaskable interrupt. Either 0 or quite high. If it's normally 0 then just one NMI will often mark some hardware failure.\n"; } elsif ( $irq->{irq} =~ /LOC/i ) { print "i", $irq->{irq}, ".info Local (per CPU core) APIC timer interrupt. Until 2.6.21 normally 250 or 1000 per second. On modern 'tickless' kernels it more or less reflects how busy the machine is.\n"; } print "i", $irq->{irq}, ".type DERIVE\n"; print "i", $irq->{irq}, ".min 0\n"; print_thresholds("i$irq->{irq}"); } } else { print "i", $_->{irq}, '.value ', $_->{count}, "\n" for @irqs; } __END__