#! /usr/bin/perl -w # fixscript will replace this line with code to load INN::Config use strict; # @(#)scanspool # # Originally written by Landon Curt Noll (chongo was here /\../\) # # This code is placed in the public domain. # # As this may take a while to run, using the -v switch is recommended to see # how far the program has progressed. # # This program will scan first the active file, noting problems such as: # # - malformed lines # - duplicate entries # - newsgroups aliased to a non-existent newsgroup # - newsgroups aliased to a newsgroup that is also aliased # # Then it will examine all articles under your news spool directory, # complaining about articles that: # # - have a basename that starts with a leading 0 # - have a basename that is out of range according to the low and high # water marks of the active file # - do not contain a Newsgroups header field # - are in a directory for which there is no newsgroup in the active file # - are in a newsgroup to which they do not belong # # scanspool will also complain about directories with an all-digit component, # in case the parent directory is a newsgroup with a high water mark below that # all-digit component, as the name of such directories will conflict with a # future article file with the same name. # # scanspool understands aliased groups. Thus, if an article is posted # to foo.old.name that is aliased to foo.bar, it will be expected to # be found under foo.bar and not foo.old.name. # # scanspool assumes that the path of a valid newsgroup's directory # from the top of the spool tree will not contain any "." character. # Thus, directories such as out.going, tmp.dir, in.coming and # news.archive will not be searched. This program also assumes that # article basenames contain only decimal digits. Last, files under # the top level directory "lost+found" are not scanned. # # The output of scanspool will start with one of 4 forms: # # FATAL: fatal or internal error (to stderr) # # WARNING: active or article format problem, (to stderr) # newsgroup alias problem, find(1) error, # article open error # # path/123: basename starts with 0, (to stdout) # article number out of range, # article in the wrong directory, # article in a directory not related to # an active non-aliased newsgroup # # \t ... verbose message starting with a tab (to stdout) # Data structures # my %gname2type; # $gname2type{$name} # $name - newsgroup name in foo.dot.form # produces => 4th active field (y, n, x, ...) # alias type is "=", not "=foo.bar" # my %realgname; # $realgname{$name} # $name - newsgroup name in foo.dot.form # produces => newsgroup name in foo.dot.form # if type is =, this will be a.b, not $name # my %lowart; # $lowart{$name} # $name - newsgroup name in foo.dot.form # produces => lowest article allowed in the group # if type is =, this is not valid # my %highart; # $highart{$name} # $name - newsgroup name in foo.dot.form # produces => highest article allowed in the group # if type is =, this is not valid # If $highart{$name} < $lowart{$name}, # then the group should be empty use Getopt::Std; # Define variables Getopt::Std uses for --help and --version. $Getopt::Std::STANDARD_HELP_VERSION = 1; our $VERSION = $INN::Config::version; $VERSION =~ s/INN //; $0 =~ s!.*/!!; my $usage = "Usage: $0 [-cnrv] [-a active-file] [-s spool-dir] Perform a sanity scan over all articles in Options: -a active-file active file to use (default /active) -c check article filenames, don't scan the articles -n don't throttle innd -r remove articles reported to have a problem -s spool-dir spool tree (default ) -v verbose mode verbose messages begin with a tab show articles found in non-active directories "; sub HELP_MESSAGE { print $usage; exit(0); } # global constants # my $prog = $0; # our name my $spool = "$INN::Config::patharticles"; my $activefile = "$INN::Config::active"; my $ctlinnd = "$INN::Config::pathbin/ctlinnd"; my $reason = "running scanspool"; # throttle reason # parse args # my %opt; getopts("a:cnrs:v", \%opt) || die $usage; $activefile = $opt{'a'} if defined $opt{'a'}; $spool = $opt{'s'} if defined $opt{'s'}; # throttle innd unless -n # if (!defined $opt{'n'}) { system("$ctlinnd throttle '$reason' >/dev/null 2>&1"); } # process the active file # parse_active($activefile); # check the spool directory # check_spool($spool); # unthrottle innd unless -n # if (!defined $opt{'n'}) { system("$ctlinnd go '$reason' >/dev/null 2>&1"); } # all done exit(0); # parse_active - parse the active file # # From the active file, fill out the %gname2type (type of newsgroup) # and %realgname (real/non-aliased name of group), %lowart & %highart # (low and high article numbers). This routine will also check for # aliases to missing groups, or groups that are also aliases. # sub parse_active { my ($active) = @_; # the name of the active file to use my $ACTIVE; # active file handle my $line; # active file line my $name; # name of newsgroup my $low; # low article number my $high; # high article number my $type; # type of newsgroup (4th active field) my $alias; # realname of an aliased group my $linenum; # active file line number # if verbose (-v), say what we are doing print "\tscanning $active\n" if defined $opt{'v'}; # open the active file open($ACTIVE, '<', $active) || fatal(1, "cannot open $active"); # parse each line $linenum = 0; while ($line = <$ACTIVE>) { # count the line ++$linenum; # verify that we have a correct number of tokens if ($line !~ /^\S+ 0*(\d+) 0*(\d+) \S+$/o) { problem("WARNING: active line is malformed at line $linenum"); next; } ($name, $high, $low, $type) = $line =~ /^(\S+) 0*(\d+) 0*(\d+) (\S+)$/o; # watch for duplicate entries if (defined $realgname{$name}) { problem("WARNING: ignoring duplicate group: $name" . ", at active line $linenum"); next; } # record which type it is $gname2type{$name} = $type; # record the low and high article numbers $lowart{$name} = $low; $highart{$name} = $high; # determine the directory and real group name # no special rule for "j" or "x" if ($type =~ /^=(.+)/o) { $alias = $1; $gname2type{$name} = "="; # rename type to be just = } else { $alias = $name; } $realgname{$name} = $alias; } # close the active file close $ACTIVE; # be sure that any alias type is aliased to a real group foreach my $ngname (keys %realgname) { # skip if not an alias type next if $gname2type{$ngname} ne "="; # be sure that the alias exists $alias = $realgname{$ngname}; if (!defined $realgname{$alias}) { problem("WARNING: alias for $ngname: $alias, is not a group"); next; } # be sure that the alias is not an alias of something else if ($gname2type{$alias} eq "=") { problem("WARNING: alias for $ngname: $alias, is also an alias"); next; } } return; } # problem - report a problem to stdout # # Print a message to stdout. Parameters are space separated. # A final newline is appended to it. # # usage: # problem(arg, arg2, ...) # sub problem { # print the line with the header field and newline my $line = join(" ", @_); print STDERR $line, "\n"; return; } # rm_file - remove a file # # Remove the file given as argument, if the -r flag is used. # Otherwise, it is a no-op. # Report a fatal error to stderr and exit if the removal fails. # # usage: # rm_file(filename) # sub rm_file { my ($filename) = @_; if (defined $opt{'r'}) { unlink("$filename") or fatal(4, "cannot remove $filename"); problem("$filename: successfully removed"); } return; } # fatal - report a fatal error to stderr and exit # # Print a message to stderr. The message has the program name prepended # to it. Parameters are space separated. A final newline is appended # to it. This function exits with the code of exitval. # # usage: # fatal(exitval, arg, arg2, ...) # sub fatal { my ($exitval, @args) = @_; # firewall, in case we're called with less than two arguments if ($#_ < 1) { print STDERR "FATAL: fatal() called with only ", $#_ + 1, " arguments\n"; if ($#_ < 0) { $exitval = -1; } } # print the error message my $line = join(" ", @args); print STDERR "$prog: $line\n"; # unthrottle innd unless -n # if (!defined $opt{'n'}) { system("$ctlinnd go '$reason' >/dev/null 2>&1"); } # exit exit $exitval; } # check_spool - check the articles found in the spool directory # # This subroutine will check all articles found under the $spool directory. # It will examine only file path that do not contain any "." or whitespace # character, and whose basename is completely numeric. Files under # lost+found will also be ignored. # # given: # $spooldir - top of tree # sub check_spool { my ($spooldir) = @_; # top of article tree my $filename; # article pathname under $spool my $artgrp; # group of an article my $artnum; # article number in a group my $prevgrp = ''; # previous different value of $artgrp my $preverrgrp = ''; # previous non-active $artgrp my $ARTICLE; # article handle my $aline; # header line from an article my $newsgroupsField; # in a continuation line, reading Newsgroups my @group; # array of groups from the Newsgroups header field my $remove; # mark an article as to be removed my $FINDFILE; # find command pipe handle # if verbose, say what we are doing print "\tfinding articles under $spooldir\n" if defined $opt{'v'}; # move to the $spool directory chdir $spooldir or fatal(2, "cannot chdir to $spool"); # start finding files # if ( !open( $FINDFILE, '-|', "find . \\( -type d -o -type f -o -type l \\) -name '[0-9]*' " . "-print 2>&1", ) ) { fatal(3, "cannot start find in $spool"); } # process each history line # while ($filename = <$FINDFILE>) { # if the line contains "find:", assume it is a find error and print it chomp($filename); if ($filename =~ /find:\s/o) { problem("WARNING:", $filename); next; } # remove the leading "./" that find put in our path $filename =~ s#^\./##o; # skip if this path has a "." in it (beyond a leading "./") next if ($filename =~ /\./o); # skip if lost+found next if ($filename =~ m:^lost+found/:o); # skip if not a numeric basename next if ($filename !~ m:/\d+$:o); # skip timehash and timecaf spools next if $filename =~ /^time-\d\d\/\d\d/ or $filename =~ /^time-\d\d\/..\/\d\d/ or $filename =~ /^timecaf-\d\d\/\d\d/; # get the article's newsgroup name (based on its path from $spool) $artgrp = $filename; $artgrp =~ s#/\d+$##o; $artgrp =~ s#/#.#go; # get the article number (based on its path from $spool) $artnum = $filename; $artnum =~ s#^.+/##o; # warn if a directory with an all-digit component is found, and the # parent directory ($artgrp) is an active newsgroup with a high water # mark below that all-digit component if (-d "$filename") { if (defined $gname2type{$artgrp} && $highart{$artgrp} > 1 && $highart{$artgrp} < $artnum) { problem( "WARNING:", "$filename: directory with an all-digit component", "(will conflict with a future article with the same name", "in $artgrp)", ); } next; } # if verbose (-v), then note if our group changed if (defined $opt{'v'} && $artgrp ne $prevgrp) { print "\t$artgrp\n"; $prevgrp = $artgrp; } # note if the article is not in a directory that is used by # a real (non-aliased) group in the active file # # If we complained about this group before, don't complain again. # If verbose, note files that could be removed. # if (!defined $gname2type{$artgrp} || $gname2type{$artgrp} =~ /[=jx]/o) { if ($preverrgrp ne $artgrp) { problem("$artgrp: not an active group directory (probably " . "a removed newsgroup; see articles with -v)"); $preverrgrp = $artgrp; } if (defined $opt{'v'}) { problem("$filename: article found in non-active directory"); } rm_file($filename); next; } # check on the article number if ($artnum =~ m/^0/o) { problem("$filename: article basename starts with a 0"); rm_file($filename); next if defined $opt{'r'}; } if (defined $gname2type{$artgrp}) { if ($lowart{$artgrp} > $highart{$artgrp}) { problem("$filename: active indicates group should be empty"); rm_file($filename); next if defined $opt{'r'}; } elsif ($artnum < $lowart{$artgrp}) { problem("$filename: article number is too low " . "(first article has number $lowart{$artgrp} " . "in the active file)"); rm_file($filename); next if defined $opt{'r'}; } elsif ($artnum > $highart{$artgrp}) { problem("$filename: article number is too high " . "(last article has number $highart{$artgrp} " . "in the active file"); rm_file($filename); next if defined $opt{'r'}; } } # if check filenames only (-c), then do nothing else with the file next if (defined $opt{'c'}); # don't open a control or junk, they can be from anywhere next if ($artgrp eq "control" || $artgrp =~ /^control\./ || $artgrp eq "junk"); if (-z "$filename") { problem("WARNING: $filename: empty file"); rm_file($filename); next; } # try open the file if (!open $ARTICLE, '<', $filename) { # the find is now gone (expired?), give up on it problem("WARNING: cannot open $filename"); next; } @group = (); $newsgroupsField = 0; $remove = 0; # read until the Newsgroups header field is found AREADLINE: while ($aline = <$ARTICLE>) { # catch the Newsgroups header field if ($aline =~ /^Newsgroups:/io or ($aline =~ /^\s+/o and $newsgroupsField > 0)) { # convert $aline into a comma separated list of groups # remove both CR and LF from the header field body (because # the article may be stored in wire-format) $aline =~ s/^Newsgroups://io; $aline =~ tr/ \t\r\n//d; # form an array of newsgroups, and retain them (in case of a # folded Newsgroups header field) push(@group, split(",", $aline)); $newsgroupsField = 1; } elsif ($aline =~ /^\r?\n$/o) { # end of headers problem("WARNING: $filename: no Newsgroups header field"); $remove++; last; } else { # do not continue parsing the headers as we have just found out # the whole contents of the Newsgroups header field last if $newsgroupsField > 0; } if (eof $ARTICLE) { problem("WARNING: $filename: article with no body"); $remove++; } } # check if the Newsgroups header field contains our group if ($newsgroupsField > 0) { my $found = 0; for (my $j = 0; $j <= $#group; ++$j) { # look at the group if (exists $realgname{ $group[$j] } and $realgname{ $group[$j] } eq $artgrp) { # this article was posted to this group $found++; last; } } if ($found == 0) { # no group or group alias was found problem("$filename: does not belong in $artgrp" . " according to its Newsgroups header field"); $remove++; } } # close the article close $ARTICLE; # remove the article if asked to rm_file($filename) if $remove > 0; } # all done with the find close $FINDFILE; return; }