#! /usr/bin/perl -w # use lib '@LIBPERLDIR@'; use INN::Config; # If running inside INN, uncomment the above and point to INN::Config. # # Author: James Brister -- berkeley-unix -- # Start Date: Sat, 10 Oct 1998 21:40:11 +0200 # Project: INN # File: pullnews # # History: # Full changelog can be found in the Git commit history of the # INN project. Major changes are: # # December 2023: # Julien Élie fixed the computation of the Bytes header field, # the use of trailing LF in rnews batches (-r option), and # trailing CRLF in generated header fields. # # February-March 2023: # Julien Élie added TLS support for both downstream and upstream # servers. Also made pullnews robust on socket timeout and lock # file handling, and added -L (largest article size wanted). # # January 2010: # Geraint A. Edwards added header-only feeding (-B); # added ability to hashfeed (-a) - uses MD5 - Diablo-compatible; # enabled -m to remove header fields matching (or not) a given # regexp; minor bug fix to rnews when -O; improved rnews # reporting. # # December 2008: # Matija Nalis added -O (optimized mode, checking whether the # downstream server already has the article to download). # Bug fixes too. # # May 2008: # Geraint A. Edwards greatly improved pullnews, adding # -b, -C, -d, -G, -H, -k, -l, -m, -M, -n, -P, -Q, -R, -t, -T, -w # and improving -s as well as fixing some bugs. # He also integrated the backupfeed contrib script by Kai # Henningsen, adding -f, -F, -N, -S, -z and -Z to pullnews. # # Description: A simple pull feeder. Connects to multiple upstream # machines (in the guise of a reader), and pulls over articles # and feeds them to a downstream server (in the guise of a # feeder). # # Uses a simple configuration file: /pullnews.marks when # run as the news user, or otherwise ~/pullnews.marks, to define # which machines to pull articles from and which groups at each # machine to pull over. There is also support for more specific # configurations like cross-posted newsgroups to kill, thanks to # the -m flag which allows articles with header fields matching # regexp to be dropped. # # A configuration file looks like: # # data.pa.vix.com # news.software.nntp 0 0 # comp.lang.c 0 0 # news.uu.net username password # uunet.announce 0 0 # uunet.help 0 0 # news.server.com:563_TLS username password # news.admin.hierarchies 0 0 # # Hostname lines have no leading space and may have an optional # username and password after the hostname; all the # subsequent group lines for that host must have leading # spaces. The two integers on the group line will be updated by # the program when it runs. They are the Unix time the group was # accessed, and the highest numbered article that was pulled # over. # # If you have INN and the script is able to successfully include your # INN::Config module, the value of $INN::Config::pathdb will override it # if pullnews is run as the news user. my $pathdb = $ENV{HOME}; require 5.004; $SIG{INT} = \&outtaHere; $SIG{QUIT} = \&bail; use Net::NNTP 2.18; # With libnet 1.0606 (10-Dec-1998) because older versions # issued MODE READER with Net::NNTP::new(). use Getopt::Std; use IO::Handle; use POSIX qw(ceil floor); use strict; $0 =~ s!.*/!!; $pathdb = $INN::Config::pathdb if $INN::Config::pathdb && $INN::Config::runasuser && $INN::Config::runasuser eq getpwuid($<); my $defaultConfig = "${pathdb}/pullnews.marks"; my $defaultPort = 119; my $defaultHost = "localhost"; my $defaultCheckPoint = 0; my $defaultRetries = 0; my $defaultDebug = 0; my $defaultRetryTime = 1; my $defaultProgressWidth = 50; my $defaultMaxArts; my $lockfile; my $runEndBlock = 0; # Check whether pullnews is run inside INN. my $use_inn_shlock = 0; eval { require INN::Utils::Shlock; import INN::Utils::Shlock; $use_inn_shlock = 1; }; # In case pullnews is run outside INN, fall back to call flock(2) # and its corresponding Perl function instead of shlock (a program # shipped with INN). # Note that this Perl function does not work as expected on all # existing systems (for instance on Solaris). if (not $use_inn_shlock) { use Fcntl; use Fcntl qw(:flock); } END { return unless $runEndBlock; # In case we bail out, while holding a lock. if ($use_inn_shlock) { INN::Utils::Shlock::releaselocks(); } elsif (defined $lockfile) { flock(LOCK, LOCK_UN); unlink $lockfile; } } my $usage = "Usage: $0 [-BhnOqRx] [-a hashfeed] [-b fraction] [-c config] [-C width] [-d level] [-f fraction] [-F fakehop] [-g groups] [-G newsgroups] [-H headers] [-k checkpt] [-l logfile] [-L size] [-m header_pats] [-M num] [-N num] [-p port] [-P hop_limit] [-Q level] [-r file] [-s host[:port][_tlsmode]] [-S num] [-t retries] [-T seconds] [-w num] [-z num] [-Z num] [upstream_host ...] Options: -a hashfeed only feed article if the MD5 hash of the Message-ID matches hashfeed (where hashfeed is of the form value/mod, value/mod:offset, start-end/mod, or start-end/mod:offset). The algorithm used is compatible with the one used by Diablo; see the pullnews man page for more details. -b fraction backtrack on server numbering reset. The proportion (0.0 to 1.0) of a group's articles to pull when the server's article number is less than our high for that group. When fraction is 1.0, pull all the articles on the server. The default is to do nothing. -B feed is header-only (headers plus one blank line). Add the Bytes header field if needed. Keep body if control article. -c config specify the configuration file instead of the default file located in ${pathdb}/pullnews.marks when run as the news user, or otherwise in ~/pullnews.marks (the running user's home directory). -C width use width characters for progress (default is $defaultProgressWidth). -d level set debugging level to this integer (default is $defaultDebug). -f fraction proportion of articles to get in each group (0.0 to 1.0). -F fakehop prepend fakehop as a host to the Path header field body. -g groups specify a collection of groups to get. The value must be a single argument with commas between group names: -g comp.lang.c,comp.lang.lisp,comp.lang.python The groups must be defined in the config file somewhere. Only the hosts that carry those groups will be contacted. -G newsgroups add these groups to the configuration (see -g and -w). -h print this message. -H headers remove these named header fields (colon-separated list). -k checkpt checkpoint the config file every checkpt articles (default is $defaultCheckPoint). A value of 0 means normally (at end). -l logfile log progress/stats to logfile (default is stdout). -L size largest wanted article size in bytes for articles to download. -m 'Hdr1:regexp1 !Hdr2:regexp2 #Hdr3:regexp3 !#Hdr4:regexp4 ...' feed article only if: the Hdr1 header field body matches regexp1; and the Hdr2 header field body does not match regexp2; also, process the message thus: if the Hdr3 header field body matches regexp3, remove that header field; if the Hdr4 header field body does not match regexp4, remove that header field. -M num maximum number of articles (per group) to process before bailing out. -n do nothing -- just fake it. -N num timeout length when establishing NNTP connection. -O optimized mode (may help for big articles/slow link to upstream hosts). Check whether an article exists before downloading it. -p port specify the port to connect to in order to feed articles (default is $defaultPort). -P hop_limit count hops ('!') in the Path header field body, feed article only if: hop_limit is '+num' and hop_count is more than num; or hop_limit is '-num' and hop_count is less than num. -q $0 will normally be verbose about what it is doing. This option will make it quiet. -Q level set the quietness level (-Q 2 is equivalent to -q). -r file rather than feeding to a server, $0 will instead create an rnews-compatible file. -R be a reader (use MODE READER and POST) -s host[:port][_tlsmode] specify the downstream hostname, optional port and optional use of TLS (default is $defaultHost, port $defaultPort unless overridden by -p, without TLS). tlsmode is either TLS (for implicit TLS) or STARTTLS (for explicit TLS). -S num specify the maximum time (in seconds) to run. -t retries number of attempts to connect to a server (default is $defaultRetries, see also -T). -T secs time (in seconds) to pause between retries (default is $defaultRetryTime, see also -t). -w num set highwater mark to num (if num is negative, use Current+num instead); a num of 0 will re-get all articles on the server; but a num of -0 will get no old articles, set mark to Current. -x insert an Xref header field in any article that lacks one. -z num time (in seconds) to sleep between articles. -Z num time (in seconds) to sleep between groups. "; sub HELP_MESSAGE { print $usage; exit(0); } use vars qw($opt_a $opt_b $opt_B $opt_c $opt_C $opt_d $opt_f $opt_F $opt_g $opt_G $opt_h $opt_H $opt_k $opt_l $opt_L $opt_m $opt_M $opt_n $opt_N $opt_O $opt_p $opt_P $opt_q $opt_Q $opt_r $opt_R $opt_s $opt_S $opt_t $opt_T $opt_w $opt_x $opt_z $opt_Z); getopts("a:b:Bc:C:d:f:F:g:G:hH:k:l:L:m:M:nN:Op:P:qQ:r:Rs:S:t:T:w:xz:Z:") || die $usage; HELP_MESSAGE() if defined $opt_h; my @groupsToGet = (); # Empty list means all groups in config file. my @groupsToAdd = (); my $rnews = $opt_r; my $groupFile = $opt_c || $defaultConfig; my $localServer = $opt_s || $defaultHost; my $localPort = $opt_p || $defaultPort; my $quiet = $opt_q; my $hashfeed = $opt_a || ''; my $header_only = $opt_B; my $watermark = $opt_w; my $retries = $opt_t || $defaultRetries; my $retryTime = $opt_T || $defaultRetryTime; my $checkPoint = $opt_k || $defaultCheckPoint; my $debug = $opt_d || $defaultDebug; my $progressWidth = $opt_C || $defaultProgressWidth; my $maxArts = $opt_M || $defaultMaxArts; my $no_op = $opt_n || 0; my $reader = $opt_R || 0; my $quietness = $opt_Q || 0; my $skip_headers = lc($opt_H || ''); my $logFile = '>&STDOUT'; $logFile = ">>$opt_l" if $opt_l; my @hdr_to_match = split(/\s+/, $opt_m) if defined $opt_m; my $pathSteps = $opt_P if defined $opt_P; my $path_limit; # Check whether we should use TLS for the downstream server. my $localTLS = 0; $localTLS = 1 if ($localServer =~ s/_STARTTLS$//); $localTLS = 2 if ($localServer =~ s/_TLS$//); die "Missing IO::Socket::SSL Perl module for TLS with downstream server\n" if $localTLS > 0 && !Net::NNTP::can_ssl(); # Find a possible port at the end of the news server name. # Count the number of ":" to check that it is not an IPv6 address. my @colons = split(/:/, $localServer); if ((scalar(@colons) == 2) and ($localServer =~ s/:(\d+)$//)) { $localPort = $1; die "can\'t have both ``-p'' and a port in ``-s''\n" if $opt_p; } die "can\'t have both ``-s'' and ``-r''\n" if $opt_s && $opt_r; die "``-b'' value not 0.0-1.0: $opt_b\n" if defined $opt_b and $opt_b !~ /^([01](\.0*)?|0?\.\d+)$/; die "``-C'' value not an integer: $opt_C\n" if $progressWidth !~ m!^\d+$!; die "``-d'' value not an integer: $opt_d\n" if $debug !~ m!^\d+$!; die "``-f'' value not 0.0-1.0: $opt_f\n" if defined $opt_f and $opt_f !~ /^([01](\.0*)?|0?\.\d+)$/; die "``-F'' value not a hostname: $opt_F\n" if defined $opt_F and $opt_F !~ m!^[\w\-\.]+$!; die "``-k'' value not an integer: $opt_k\n" if $checkPoint !~ m!^\d+$!; die "``-M'' value not an integer: $opt_M\n" if defined $maxArts and $maxArts !~ m!^\d+$!; die "``-N'' value not an integer: $opt_N\n" if defined $opt_N and $opt_N !~ /^\d+$/; die "``-p'' value not an integer: $opt_p\n" if $localPort !~ m!^\d+$!; if (defined $pathSteps) { die "``-P'' value not a signed integer: $opt_P\n" if $pathSteps !~ /^[-+](\d+)$/; $path_limit = $1; } die "option ``-r -'' needs ``-l'' option\n" if defined $opt_r and $opt_r eq '-' and not $opt_l; die "``-S'' value not an integer: $opt_S\n" if defined $opt_S and $opt_S !~ /^\d+$/; die "``-t'' value not an integer: $opt_t\n" if $retries !~ m!^\d+$!; die "``-w'' value not an integer: $opt_w\n" if defined $watermark and $watermark !~ /^-?\d+$/; die "``-z'' value not an integer: $opt_z\n" if defined $opt_z and $opt_z !~ /^\d+$/; die "``-Z'' value not an integer: $opt_Z\n" if defined $opt_Z and $opt_Z !~ /^\d+$/; if ($hashfeed ne '') { my $a_err = "``-a'' value not in format ``start[-end]/mod[:offset]'': $opt_a\n"; die $a_err if $opt_a !~ m!^(\d+)(?:-(\d+))?/(\d+)(?:[:_](\d+))?$!; $hashfeed = { 'low' => $1, 'high' => $2 || $1, 'modulus' => $3, 'offset' => $4 || 0, }; die $a_err if $hashfeed->{'low'} > $hashfeed->{'high'} or $hashfeed->{'modulus'} == 0 or $hashfeed->{'offset'} > 12; if ($hashfeed->{'low'} == 1 and $hashfeed->{'high'} == $hashfeed->{'modulus'}) { $hashfeed = ''; } else { require Digest::MD5; Digest::MD5->import(qw/md5/); } } $quiet = 1 if $quietness > 1; my %NNTP_Args = (); $NNTP_Args{'Timeout'} = $opt_N if defined $opt_N; # Protocol debugging. $NNTP_Args{'Debug'} = 1 if $debug >= 4; @groupsToGet = map { s!^\s*(\S+)\s*!$1!; $_ } split(",", $opt_g) if $opt_g; @groupsToAdd = map { s!^\s*(\S+)\s*!$1!; $_ } split(",", $opt_G) if $opt_G; $| = 1; my $servers = {}; my $sname = undef; my %fed = (); my %refused = (); my %rejected = (); my $pulled = {}; my %passwd = (); my $upstreamParams = {}; my %info = ( fed => 0, refused => 0, rejected => 0, bytes => 0, ); if ($rnews) { if ($no_op) { print "Would write to rnews file $rnews\n"; } else { open(RNEWS, ">$rnews") || die "can't open rnews-format output: $rnews: $!\n"; } $info{'rnews'}->{bytes} = 0; $info{'rnews'}->{fed} = 0; } open(LOG, $logFile) || die "can't open logfile ($logFile)!: $!\n"; # Forces a flush after each write or print. my $oldfh = select; $| = 1; select LOG; $| = 1; select $oldfh; $lockfile = $groupFile . '.pid'; # Acquire a lock. if ($use_inn_shlock) { INN::Utils::Shlock::lock($lockfile) or die "cannot create lockfile $lockfile\n"; } else { sysopen(LOCK, "$lockfile", O_RDWR | O_CREAT, 0644) or die "cannot create lockfile $lockfile: $!\n"; $oldfh = select; select LOCK; $| = 1; select $oldfh; if (!flock(LOCK, LOCK_EX | LOCK_NB)) { seek LOCK, 0, 0; my $otherpid = ; chomp $otherpid; die "Another pullnews (pid: $otherpid) seems to be running.\n"; } print LOCK "$$\n"; } # Now that a lock file has been created, ensure we release it when this process # ends or is stopped. $runEndBlock = 1; print LOG scalar(localtime(time)), " start\n\n" unless $quiet; if (@groupsToGet && !$quiet) { print LOG "Checking for specific groups:\n"; map { printf LOG "\t%s\n", $_ } @groupsToGet; print LOG "\n"; } open(FILE, "<$groupFile") || die "can't open group file $groupFile\n"; my $array_order = 0; while () { next if m!^\s*\#! || m!^\s*$!; if (m!^(\S+)(\s+(\S+)\s+(\S+))?\s*$!) { $sname = $1; $servers->{$sname} = {}; $servers->{$sname}->{_order} = $array_order++; $passwd{$sname} = [$3, $4] if defined $3 and $3 ne ""; $upstreamParams->{$sname} = {}; # Check whether we should use TLS. my $upstreamName = $sname; $upstreamParams->{$sname}->{tls} = 0; $upstreamParams->{$sname}->{tls} = 1 if ($upstreamName =~ s/_STARTTLS$//); $upstreamParams->{$sname}->{tls} = 2 if ($upstreamName =~ s/_TLS$//); die "Missing IO::Socket::SSL Perl module for TLS with $upstreamName\n" if $upstreamParams->{$sname}->{tls} > 0 && !Net::NNTP::can_ssl(); # Find a possible port at the end of the news server name. # Count the number of ":" to check that it is not an IPv6 address. my @colons = split(/:/, $upstreamName); if ((scalar(@colons) == 2) and ($upstreamName =~ s/:(\d+)$//)) { $upstreamParams->{$sname}->{port} = $1; } else { $upstreamParams->{$sname}->{port} = $defaultPort; } $upstreamParams->{$sname}->{name} = $upstreamName; } elsif (m!^\s+(\S+)\s+(\d+)\s+(\d+)!) { my ($group, $date, $high) = ($1, $2, $3); $servers->{$sname}->{$group} = [$date, $high]; } elsif (m!^\s+(\S+)\s*$!) { # Assume this is a new group. my ($group, $date, $high) = ($1, 0, 0); print LOG "Looking for new group $group on $sname\n" unless $quiet; $servers->{$sname}->{$group} = [$date, $high]; } else { die "Fatal error in $groupFile: $.: $_\n"; } } close FILE; my @servers = @ARGV ? @ARGV : sort { $servers->{$a}->{_order} <=> $servers->{$b}->{_order} } keys %$servers; die "No servers!\n" if !@servers; my $localcxn; my $art_total_count = 0; # The number of articles transferred during the session. if (not $rnews) { my $additionalString = "without TLS"; $additionalString = "with explicit TLS" if $localTLS == 1; $additionalString = "with implicit TLS" if $localTLS == 2; print LOG "Connecting to downstream host: $localServer " . "port: $localPort $additionalString..." unless $quiet; my %localopts = ("Port" => "$localPort", "Reader" => $reader, %NNTP_Args); $localopts{'SSL'} = 1 if $localTLS == 2; $localcxn = Net::NNTP->new($localServer, %localopts) || die " Can't connect to server $localServer\n"; # Negotiate explicit TLS after MODE READER but before AUTHINFO. if ($localTLS == 1 && !$localcxn->starttls()) { die " Can't use STARTTLS on $localServer: " . $localcxn->code() . " " . join('//', split(/\r?\n/, $localcxn->message())) . "\n"; } if (exists $passwd{$localServer} && !$localcxn->authinfo(@{ $passwd{$localServer} })) { warn sprintf( " failed to authorize: %s %s\n", $localcxn->code(), join('//', split(/\r?\n/, $localcxn->message())), ); } } if (not $quiet and not $quietness) { print LOG " done.\n\n"; print LOG "Legend: ``.'' is an article the downstream server refused\n"; print LOG " ``*'' is an article the downstream server rejected\n"; print LOG " ``+'' is an article the downstream server accepted\n"; print LOG " ``x'' is an article the upstream server couldn't "; print LOG "give out\n"; print LOG " ``m'' is an article skipped due to headers (-a, -m or -P)\n"; print LOG "\n"; print LOG "Writing to rnews-format output: $rnews\n\n" if $rnews; } my $connectionAttempts = 0; my %groupsStarted = (); UPSTREAM: foreach my $server (@servers) { my ($username, $passwd); my $upstreamName = $upstreamParams->{$server}->{name}; my $upstreamPort = $upstreamParams->{$server}->{port}; my $upstreamTLS = $upstreamParams->{$server}->{tls}; foreach my $addGroup (@groupsToAdd) { next if defined $servers->{$server}->{$addGroup}; $servers->{$server}->{$addGroup} = [0, 0]; } if (@groupsToGet > 0) { my $ok; foreach my $sgroup (keys %{ $servers->{$server} }) { $ok = 1 if grep($_ eq $sgroup, @groupsToGet); } if (!$ok) { # User gave -g and the server doesn't have those groups. warn "Skipping server $upstreamName: " . "doesn't have specified groups.\n"; next; } } if (exists $passwd{$server}) { ($username, $passwd) = @{ $passwd{$server} }; } if (!exists($servers->{$server})) { warn "No such upstream host $upstreamName configured.\n"; next; } my $shash = $servers->{$server}; # No need to connect to the upstream server when there is no newsgroup # to fetch. (The value 1 is for the "_order" key in the hash.) next if keys(%{$shash}) == 1; my $upstream; my $additionalString = "without TLS"; $additionalString = "with explicit TLS" if $upstreamTLS == 1; $additionalString = "with implicit TLS" if $upstreamTLS == 2; my %upstreamopts = ("Port" => "$upstreamPort", %NNTP_Args); $upstreamopts{'SSL'} = 1 if $upstreamTLS == 2; # Double braces to make a loop that next can control. { { print LOG "Connecting to upstream server: $upstreamName " . "port: $upstreamPort $additionalString..." unless $quiet; $upstream = Net::NNTP->new($upstreamName, %upstreamopts); if (!$upstream && $connectionAttempts < $retries) { print LOG " failed. Let's attempt again.\n" unless $quiet; $connectionAttempts++; sleep $retryTime; next; } } } if (!$upstream) { print LOG " failed.\n" unless $quiet; warn "can't connect to upstream server $upstreamName: $!\n"; next; } else { print LOG " done.\n" unless $quiet; } # Negotiate explicit TLS after MODE READER (sent by default by Net::NNTP # just after the connection) but before AUTHINFO. if ($upstreamTLS == 1 && !$upstream->starttls()) { warn sprintf( "can't use STARTTLS: %s %s\n", $upstream->code(), join('//', split(/\r?\n/, $upstream->message())), ); next; } if ($username && !$upstream->authinfo($username, $passwd)) { warn sprintf( "failed to authorize: %s %s\n", $upstream->code(), join('//', split(/\r?\n/, $upstream->message())), ); next; } $info{server}->{$server}->{bytes} = 0; $info{server}->{$server}->{fed} = 0; $info{server}->{$server}->{refused} = 0; $info{server}->{$server}->{rejected} = 0; foreach my $group (sort keys %{ $servers->{$server} }) { next if $group eq '_order'; next if (@groupsToGet && !grep ($_ eq $group, @groupsToGet)); my $ret; # crossFeedGroup returns 1 when the newsgroup is successfully pulled. # If the GROUP command returns an unexpected result, 0 is returned (but # we do not want to abort on the first error). # If the connection is no longer active with the upstream server, # 2 is returned. $ret = crossFeedGroup($upstream, $localcxn, $server, $group, $shash); last if defined $opt_S and time >= $^T + $opt_S; sleep $opt_Z if defined $opt_Z; if ($ret == 2) { if ($connectionAttempts < $retries) { print LOG "Let's attempt again.\n" unless $quiet; $connectionAttempts++; sleep $retryTime; redo UPSTREAM; } } } $upstream->quit(); last if defined $opt_S and time >= $^T + $opt_S; } continue { # Reinitialize the counter for the next server. $connectionAttempts = 0; %groupsStarted = (); } saveConfig(); stats() unless $quiet; if ($rnews) { if (not $no_op and not close RNEWS) { print LOG "\nRNEWS close failure: $!"; } unlink $rnews if -f $rnews and not -s $rnews; } print LOG "\nDone ", scalar(localtime(time)), "\n" unless $quiet; cleanLock(); exit(0); ############################################################################### sub stats { my $ltotal = 0; my $reftotal = 0; my $rejtotal = 0; my $sum; map { $reftotal += $refused{$_} } keys %refused; map { $rejtotal += $rejected{$_} } keys %rejected; map { $ltotal += $fed{$_} } keys %fed; $sum = $reftotal + $rejtotal + $ltotal; if ($quiet) { printf LOG localtime() . " [$$] %d article%s to $localServer\n", $sum, ($sum != 1 ? "s" : ""); } elsif ($rnews) { printf LOG "\n%d article%s written to $rnews\n", $sum, ($sum != 1 ? "s were" : " was"); } else { printf LOG "\n%d article%s offered to server on $localServer\n", $sum, ($sum != 1 ? "s were" : " was"); } return if ($sum == 0); if ($quiet) { print LOG localtime() . " [$$] $ltotal ok, $reftotal ref, $rejtotal rej\n"; } elsif (not $rnews) { printf LOG "%d article%s accepted\n", $ltotal, ($ltotal != 1 ? "s were" : " was") if ($ltotal != 0); printf LOG "%d article%s refused\n", $reftotal, ($reftotal != 1 ? "s were" : " was") if ($reftotal != 0); printf LOG "%d article%s rejected\n", $rejtotal, ($rejtotal != 1 ? "s were" : " was") if ($rejtotal != 0); } map { my $server = $_; my $width = 0; print LOG "\nUpstream server " . $upstreamParams->{$server}->{name} . ":\n" if not $quiet; map { $width = length if length > $width; } sort keys %{ $pulled->{$server} } if not $quiet; map { if ($quiet) { printf LOG "%s [$$] from %s $_ %s\n", localtime(), $upstreamParams->{$server}->{name}, $pulled->{$server}->{$_}; } else { printf LOG "\t%${width}s %d\n", $_, $pulled->{$server}->{$_}; } } sort keys %{ $pulled->{$server} }; } sort keys %{$pulled}; } sub saveConfig { return if $no_op; local $SIG{INT} = 'IGNORE'; local $SIG{QUIT} = 'IGNORE'; open(FILE, ">$groupFile") || die "can't open $groupFile: $!\n"; my $server; my $group; print LOG "\nSaving config\n" unless $quiet; print FILE "# Format: (date is epoch seconds)\n"; print FILE "# hostname[:port][_tlsmode] [username password]\n"; print FILE "# group date high\n"; foreach $server ( sort { $servers->{$a}->{_order} <=> $servers->{$b}->{_order} } keys %$servers ) { print FILE "$server"; if (defined $passwd{$server}) { printf FILE " %s %s", $passwd{$server}->[0], $passwd{$server}->[1]; } print FILE "\n"; foreach $group (sort keys %{ $servers->{$server} }) { next if $group eq '_order'; my ($date, $high) = @{ $servers->{$server}->{$group} }; printf FILE " %s %d %d\n", $group, $date, $high; } } close FILE; } sub outtaHere { saveConfig(); cleanLock(); exit(0); } sub cleanLock { # Unlock. if ($use_inn_shlock) { INN::Utils::Shlock::unlock($lockfile) if defined $lockfile; } else { flock(LOCK, LOCK_UN); unlink $lockfile if defined $lockfile; } } sub bail { warn "received QUIT signal. Not saving config.\n"; cleanLock(); exit(0); } sub crossFeedGroup { my ($fromServer, $toServer, $server, $group, $shash) = @_; my ($date, $high) = @{ $shash->{$group} }; my ($prevDate, $prevHigh) = @{ $shash->{$group} }; my ($narticles, $first, $last, $name) = $fromServer->group($group); my $count = 0; my $len = 0; # Received article length (bytes) (for stats). my $code; my $startTime = time; my ($prevRefused, $prevRejected) = ($info{refused}, $info{rejected}); if (!defined($narticles)) { # Group command failed. warn sprintf( "Group command failed for $group: %s %s\n", $fromServer->code() || 'NO_CODE', join('//', split(/\r?\n/, $fromServer->message())), ); return 0; } if (not $quiet) { printf LOG "\n%s:\n", $name; printf LOG "\tLast checked: %s\n", $prevDate ? scalar(localtime($prevDate)) : "never"; printf LOG "\t%d article%s available (first %d, last %d)\n", $narticles, $narticles != 1 ? "s" : "", $first, $last; } # Do not set several times the water mark to another value. Just go on # downloading articles from the last retrieved one when the connection # timed out. if (defined($watermark) and !exists($groupsStarted{$group})) { printf LOG "\tOur previous highest: %d\n", $prevHigh if not $quiet; $high = $watermark; $high = $last + $watermark if substr($watermark, 0, 1) eq '-'; $high = 0 if $high < 0; $shash->{$group} = [time, $high]; $prevHigh = $high; $groupsStarted{$group} = 1; } printf LOG "\tOur current highest: %d", $high if not $quiet; return 0 if !$name; if ($narticles == 0) { print LOG " (nothing to get)\n" unless $quiet; # Just update the time; keep the last known high watermark. $shash->{$group} = [time, $high]; return 1; } my $toget = (($last - $high) < $narticles ? $last - $high : $narticles); $toget = ceil($toget * $opt_f) if defined $opt_f; if ($last < $high and $opt_b) { $high = $first + floor(($last - $first + 1) * (1 - $opt_b)); $toget = $last - $high; print LOG " (reset highwater mark to $high)" unless $quiet; } elsif ($prevHigh == -1 || $last <= $prevHigh) { # We connected OK but there's nothing there, or we just want # to reset our highwater mark. $shash->{$group} = [time, $high]; print LOG " (nothing to get)\n" unless $quiet; return 1; } print LOG " (at most $toget articles to get)\n" unless $quiet; my $i; my @warns; my $skip_article; my $overview; my $begin = ($first > $high ? $first : $high + 1); for ($i = $begin; $i <= $last; $i++) { $skip_article = 0; last if defined $maxArts and $count >= $maxArts; last if defined $opt_f and $count >= $toget; $count++; $art_total_count++; sleep $opt_z if defined $opt_z and $count > 1; # Do not download articles whose size exceeds the largest wanted size. # Field 3 contains the Message-ID, field 5 the article size. if (defined($opt_L)) { # Retrieve overview data by chunks, so that articles keep being # downloaded instead of a possible long wait at the start of the # process of each newsgroup. if (($count % $progressWidth) == 1) { # Do not directly use $i + $progressWidth, as the result may # exceed the maximum article number supported by the server. my @range = ( $i, $i + $progressWidth - 1 > $last ? $last : $i + $progressWidth - 1, ); $overview = $fromServer->xover(\@range); } if (defined($$overview{$i}[5]) and $$overview{$i}[5] > $opt_L) { print LOG "." unless $quiet; print LOG "\tDEBUGGING $i\t-- not downloading " . "article $$overview{$i}[3] " . "which has $$overview{$i}[5] bytes\n" if $debug >= 1; $skip_article = 1; } } # "Optimized mode" -- check if the article is wanted # *before* downloading it. if (not $skip_article and defined($opt_O)) { # 223 n article retrieved # -- request text separately (after STAT) # 423 no such article number in this group # 430 no such article found my $org_msgid = $fromServer->nntpstat($i); my $org_code = $fromServer->code(); # Continue if the article exists on the upstream server. if ($org_code == 223 and not $rnews) { my $new_msgid = $toServer->nntpstat($org_msgid); my $new_code = $toServer->code(); print LOG "\n\tDEBUGGING $i\t$org_msgid ($org_code) => $new_code\n" if $debug >= 3; # Skip the article if it already exists # on the downstream server. if ($new_code == 223) { print LOG "." unless $quiet; print LOG "\tDEBUGGING $i\t-- not downloading " . "already existing message $org_msgid code=$new_code\n" if $debug >= 1; $skip_article = 1; } } } my $add_bytes_header = 0; my $is_control_art = 0; my $article; if (not $skip_article and $header_only) { $article = $fromServer->head($i); if ($fromServer->code() == 221) { my $has_bytes_header = 0; for my $hdr (@$article) { if (lc(substr($hdr, 0, 6)) eq 'bytes:') { $has_bytes_header = 1; } elsif (lc(substr($hdr, 0, 8)) eq 'control:') { $is_control_art = 1; last; } } $add_bytes_header = 1 if not $has_bytes_header; push @{$article}, "\r\n" if not $is_control_art; } } if (not $skip_article and (not $header_only or $is_control_art or $add_bytes_header)) { $article = $fromServer->article($i); } if ($article) { my $msgid; my $xref = 0; my $headers = 1; my $line_len = 0; my $idx_blank_pre_body; # Index of the blank line # between headers/body. my $tx_len = 0; # Transmitted article length (bytes) # (for rnews, the Bytes header field). my @header_nums_to_go = (); my $match_all_hdrs = 1; # Assume no headers to match. my $skip_due_to_hdrs = 0; # Set to 1 if triggered by -P, # 2 if by -m, 3 if by -a. my %m_found_hdrs = (); my $curr_hdr = ''; for (my $idx = 0; $idx < @{$article}; $idx++) { $line_len = length($article->[$idx]); $len += $line_len; # Dot-stuffing already undone by Net::NNTP. $tx_len += $line_len; $info{server}->{$server}->{bytes} += $line_len; $info{bytes} += $line_len; next if not $headers; $idx_blank_pre_body = $idx; $curr_hdr = lc($1) if $article->[$idx] =~ /^([^:[:blank:]]+):/; if ($article->[$idx] eq "\r\n") { $headers = 0; next; } if ($match_all_hdrs and @hdr_to_match and $article->[$idx] =~ /^[^[:blank:]]/) { # Check header field matches -m flag if new header field. # Unfold this header field body (with following lines). my $unfolded_art_hdr = $article->[$idx]; for ( my $idx_step = $idx + 1; $idx_step < @$article and $article->[$idx_step] =~ /^[[:space:]](.+)/; $idx_step++ ) { # While next line is continuation... my $more_line = $1; chomp $unfolded_art_hdr; $unfolded_art_hdr .= $more_line; } my ($hdr_un, $val_un) = split(':', $unfolded_art_hdr, 2); $val_un = '' if not defined $val_un; $val_un =~ s/^\s*//; my $remove_hdr = 0; for my $tuple_match (@hdr_to_match) { my ($hdr_m, $val_m) = split(':', $tuple_match, 2); my $negate_h = ($hdr_m =~ s/^!//); my $remove_h = ($hdr_m =~ s/^#//); next if lc($hdr_un) ne lc($hdr_m); $m_found_hdrs{ lc($hdr_m) } = 1 if not $remove_h; if ($negate_h) { if ($val_un =~ /$val_m/i) { print LOG "\tDEBUGGING $i\t-- $hdr_un [$val_un]\n" if $debug >= 2; if (not $remove_h) { $match_all_hdrs = 0; } } elsif ($remove_h) { $remove_hdr = 1; } } elsif (not $val_un =~ /$val_m/i) { print LOG "\tDEBUGGING $i\t++ $hdr_un [$val_un]\n" if $debug >= 2; if (not $remove_h) { $match_all_hdrs = 0; } } elsif ($remove_h) { $remove_hdr = 1; } last if not $match_all_hdrs; } push @header_nums_to_go, $idx if $remove_hdr; } if (grep { $curr_hdr eq $_ } split(':', $skip_headers)) { print LOG "\tDEBUGGING $i\tskip_hdr $idx\t$curr_hdr\n" if $debug >= 2; push @header_nums_to_go, $idx; } if ($article->[$idx] =~ m!^message-id:\s*(\S+)!i) { $msgid = $1; } if ($article->[$idx] =~ m!^Path:\s*!i) { if (defined $pathSteps) { my $path_count = $article->[$idx]; $path_count = ($path_count =~ s@!@@g) || 0; if (substr($pathSteps, 0, 1) eq '-') { $skip_due_to_hdrs = 1 if $path_count >= $path_limit; } elsif (substr($pathSteps, 0, 1) eq '+') { $skip_due_to_hdrs = 1 if $path_count <= $path_limit; } } if ($skip_due_to_hdrs) { print LOG "\tDEBUGGING $i\tNpath_skip_art $i\n" if $debug >= 2; } else { if (defined $opt_F) { $tx_len += length($opt_F) + 1; $article->[$idx] =~ s/^Path:\s*/$&$opt_F!/i; } } } if ($opt_x && $article->[$idx] =~ m!^xref:!i) { $xref = 1; } } if ( @hdr_to_match and (not $match_all_hdrs or scalar(grep { !/^!?#/ } @hdr_to_match) != keys %m_found_hdrs) ) { $skip_due_to_hdrs = 2; } while (@header_nums_to_go) { # Start from last, so numbers are not affected. my $idx = pop @header_nums_to_go; my $cut = join("\n\t", splice(@{$article}, $idx, 1)); $tx_len -= length($cut); $idx_blank_pre_body--; print LOG "\tDEBUGGING $i\tcut1 $cut\n" if $debug >= 2; while ($article->[$idx] =~ /^[[:space:]](.+)/) { # Folded lines. my $cut = join("\n\t", splice(@{$article}, $idx, 1)); $tx_len -= length($cut); $idx_blank_pre_body--; print LOG "\tDEBUGGING $i\tcut_ $cut\n" if $debug >= 2; } } if (!$msgid) { warn "No Message-ID header field found in article\n"; next; } else { print LOG "\tDEBUGGING $i\tMessage-ID: $msgid\n" if $debug >= 1; } # Some old servers lack Xref header fields, which bothers # a downstream INN if it has xrefslave set to true, so add one # just before the blank line. if ($opt_x && !$xref) { warn "No Xref header field found in article, adding\n"; my $xref_h = "Xref: " . $upstreamParams->{$server}->{name} . " $group:$i\r\n"; splice(@{$article}, $idx_blank_pre_body, 0, $xref_h); $tx_len += length($xref_h); $idx_blank_pre_body++; } if ($add_bytes_header) { # Compute the number of bytes the same way the :bytes # metadata item would do. The additional Bytes header # field is not counted (as it does not form part of the # article in a header-only feed), as well as header fields # removed by pullnews. my $bytes_h = "Bytes: $tx_len\r\n"; splice(@{$article}, $idx_blank_pre_body, 0, $bytes_h); $idx_blank_pre_body++; } if ($header_only and not $is_control_art and @{$article} > $idx_blank_pre_body + 1) { splice(@{$article}, $idx_blank_pre_body + 1); } if (not $skip_due_to_hdrs and ref $hashfeed) { my $hash_val = unpack( 'N', substr(md5($msgid), 12 - $hashfeed->{'offset'}, 4), ) % $hashfeed->{'modulus'} + 1; $skip_due_to_hdrs = 3 if $hash_val < $hashfeed->{'low'} or $hash_val > $hashfeed->{'high'}; } $pulled->{$server}->{$group}++; if ($skip_due_to_hdrs) { print LOG "m" unless $quiet; if ($debug >= 2) { print LOG "\tDEBUGGING $i\tskip_art: " . ( $skip_due_to_hdrs == 1 ? 'hopsPath' : ( $skip_due_to_hdrs == 2 ? 'hdr' : ( $skip_due_to_hdrs == 3 ? 'hashfeed' : 'unknown' ) ) ) . "\n"; } } elsif ($rnews) { # Change all trailing CRLF to LF as rnews expects articles # in native format (rnews converts them to wire format when # processing them). # Net::NNTP has already removed dot-stuffing. my $artwithoutCR = ""; foreach my $line (@{$article}) { $line =~ s/\r\n$/\n/; $artwithoutCR .= $line; } printf RNEWS "#! rnews %d\n", length($artwithoutCR); print RNEWS $artwithoutCR; print LOG "+" unless $quiet; $fed{$group}++; $info{'rnews'}->{fed}++; $info{fed}++; } else { if ($no_op) { print "Would offer $msgid\n"; } elsif ($reader and not $toServer->post($article)) { # 240 article posted ok # 340 send article to be posted. # End with . # 440 posting not allowed # 441 posting failed my $code = $toServer->code(); my $msg = $toServer->message(); $msg =~ s/^340 .*?\n(?=.)//o; if ($msg =~ /^240 /) { print LOG "+" unless $quiet; push @warns, "Post $i ok ($code): $msg"; $fed{$group}++; $info{server}->{$server}->{fed}++; $info{fed}++; } elsif ($msg =~ /^435 / or $msg =~ /duplicate message-id/io) { print LOG "." unless $quiet; push @warns, "Post $i to server declined ($code): $msg" if $msg !~ /^435 $msgid$/ and $msg !~ /duplicate message-id/io; $refused{$group}++; $info{server}->{$server}->{refused}++; $info{refused}++; } else { warn "\nPost $i to server failed ($code): $msg\n"; $toServer->quit(); saveConfig(); exit(1); } print LOG "\tDEBUGGING $i\tPost $code: Msg: <" . join('//', split(/\r?\n/, $toServer->message())) . ">\n" if $debug >= 1; } elsif (not $reader and not $toServer->ihave($msgid, $article)) { # 235 article transferred ok # 335 send article to be transferred. # End with . # 435 article not wanted -- do not send it # 436 transfer failed -- try again later # 437 article rejected -- do not try again my $code = $toServer->code(); my $msg = $toServer->message(); if ($code == 435) { print LOG "." unless $quiet; $refused{$group}++; $info{server}->{$server}->{refused}++; $info{refused}++; } elsif ($code == 437) { print LOG "*" unless $quiet; $rejected{$group}++; $info{server}->{$server}->{rejected}++; $info{rejected}++; } else { warn "\nTransfer to server failed ($code): $msg\n"; $toServer->quit(); saveConfig(); exit(1); } print LOG "\tDEBUGGING $i\tPost $code: Msg: <" . join('//', split(/\r?\n/, $msg)) . ">\n" if $debug >= 1; } else { my $code = $toServer->code(); my $msg = $toServer->message(); print LOG "+" unless $quiet; print LOG "\tDEBUGGING $i\tPost $code: Msg: <" . join('//', split(/\r?\n/, $msg)) . ">\n" if $debug >= 1; $fed{$group}++; $info{server}->{$server}->{fed}++; $info{fed}++; } } $shash->{$group} = [time, $high = $i]; } elsif ($skip_article) { # Optimized mode (-O) or article size check (-L) decided to skip # this article... # The "." resulting treatment has already been output. $refused{$group}++; $info{server}->{$server}->{refused}++; $info{refused}++; $shash->{$group} = [time, $high = $i]; } else { if ($fromServer->code() == 430 or $fromServer->code() == 423) { # No such article, do not retry. $shash->{$group} = [time, $high = $i]; } else { if ($fromServer->code() == 421) { # 421 pseudo response code given by Net::Cmd (of which # Net::NNTP is a subclass) when the connection is no longer # active. warn "\nArticle retrieval failed (" . join('//', split(/\r?\n/, $fromServer->message())) . ")\n\n"; return 2; } else { warn "\nUnexpected response from server (" . $fromServer->code() . " " . join('//', split(/\r?\n/, $fromServer->message())) . ")\n"; saveConfig(); exit(1); } } print LOG "x" unless $quiet; printf LOG ( "\tDEBUGGING $i\t-- article unavailable %s %s\n", $fromServer->code(), join('//', split(/\r?\n/, $fromServer->message())), ) if $debug >= 1; } saveConfig() if $checkPoint and ($art_total_count % $checkPoint) == 0; print LOG "\n" if (!$quiet && (($count % $progressWidth) == 0)); last if defined $opt_S and time >= $^T + $opt_S; } print LOG "\n" unless $quiet; print LOG join("\n\t", '', @warns) . "\n\n" if @warns; my $elapsed_time = time - $startTime + 1; if ($quiet) { my $rejectedDiff = $info{rejected} - $prevRejected; my $refusedDiff = $info{refused} - $prevRefused; my $destServer = ($localServer ne $defaultHost ? " to $localServer" : ''); print LOG localtime() . " [$$] " . $upstreamParams->{$server}->{name} . "$destServer $name $narticles" . " $first-$last : $count $prevHigh-" . ($high == $last ? '' : $high) . " $refusedDiff $rejectedDiff\n" unless $prevHigh == $high and $count == 0; } else { printf LOG "%s article%s retrieved in %d seconds (%d bytes, %d cps)\n", $count, ($count == 1 ? "" : "s"), $elapsed_time, $len, int($len * 100 / $elapsed_time) / 100; } return 1; }