#!/usr/bin/perl -wpi.bak
# html-update-link-dates
# Michael Ernst
# Time-stamp: <2018-02-06 20:49:42 mernst>
### NOTE: This script is deprecated. It has moved to
### https://github.com/plume-lib/html-tools
# usage:
# html-update-link-dates file [...]
# For any text of the form
# text (14 Mar 1967),
# text (14 Mar 1967, 19 Mbytes)
# text (PDF, 14 Mar 1967),
# text (PDF, 14 Mar 1967, 19 Mbytes)
# update the date and file size text, but don't update the HTML file's
# modification date.
# For examples, see http://homes.cs.washington.edu/~mernst/software/ and
# http://pag.csail.mit.edu/daikon/download/.
# Warning: if the filename is the HTML file being edited, the date/size may
# be incorrect.
# To do:
# Need to operate by paragraphs, not by lines.
use FindBin ();
use lib "$FindBin::Bin";
use checkargs;
require POSIX;
require Time::Local;
# Forward declaration for Perl 5.6.
sub file_date_replacement ( $$$$$ );
BEGIN
{ for my $file (@ARGV)
{ $file =~ s/\/$/index.html/;
if (! (-f $file && -r $file))
{ die "$file isn't a readable normal file"; }
my ($atime,$mtime,$gid);
(undef,undef,undef,undef,undef,$gid,undef,undef,
$atime,$mtime,undef,undef,undef) = stat($file);
$file_gid{$file} = $gid;
$file_atime{$file} = $atime;
$file_mtime{$file} = $mtime; }
# What should I do about May??
$longmonthre = '(?:January|February|March|April|May|June|July|August|September|October|November|December)';
$datere = '(?:[0123]?[0-9] (?:[A-Z][a-z][a-z]|' . $longmonthre . ') [0-9]{4})';
$sizere = '(?:[0-9.]+ [kMG]?bytes)';
$datesizere = '(' . $datere . '(?:, ' . $sizere . ')?)';
}
s/( ]+)(\">.*?<\/a>(?:,? (?:by|from) [A-Za-z]+(?: [A-Za-z]+)?)?(?: \((?:PDF, )?|, ))$datesizere([\)]?)/file_date_replacement($1,$2,$3,$4,$5)/eg;
END
{ for my $file (keys %file_atime)
{ utime($file_atime{$file}, $file_mtime{$file}, $file);
# $< is the real userid for whoever is running this program.
chown($<, $file_gid{$file}, $file); }
# Ordinary, non-erroneous termination
exit(0);
}
###########################################################################
sub file_date_replacement ( $$$$$ )
{ my ($pre, $file, $mid, $old_date, $post) = check_args(5, @_);
# print "\#$pre\#$file\#$mid\#$old_date\#$post\#\n";
my $dir = $ARGV;
$dir =~ s/(^|\/)[^\/]*$/$1/;
my $old_size;
if ($old_date =~ /^($datere), ($sizere)/)
{ $old_date = $1;
$old_size = $2; }
my $longmonth = ($old_date =~ m/$longmonthre/);
my $full_file = $file;
if ($full_file !~ s:^/:/www/:)
{ $full_file = $dir . $file; }
$full_file = simplify_path_name($full_file);
$full_file =~ s/\/$/\/index.html/;
if (! -f $full_file)
{ my $date = "00 Mth 0000";
if (defined($old_size))
{ $date .= ", 00 Mbytes"; }
# Don't print warning if commented out.
if (($pre =~ //)) {
print STDERR "html-update-link-dates: Didn't find $full_file\n";
}
return "$pre$file${mid}$date$post"; }
my $date = file_date($full_file, $longmonth);
my $size = file_size($full_file);
# If this script changed the HTML file, then don't reset its write date.
# This means that browsers which check the modification time before
# downloading a new version will get the lastest version. And indeed the
# text has changed, though not substantially...
if (($date ne $old_date) || (defined($old_size) && ($size ne $old_size)))
{ delete $file_atime{$ARGV};
delete $file_mtime{$ARGV};
delete $file_gid{$ARGV}; }
if (defined($old_size))
{ $date .= ", $size"; }
return "$pre$file$mid$date$post";
}
# Return human-readable file date in form DD MMM YYYY
# If second argument is provided and true, use full month name, not abbrev.
sub file_date ( $;$ )
{ my ($file, @rest) = check_args_range(1, 2, @_);
my $longmonth = (scalar(@rest) == 1) && $rest[0];
my $mtime;
(undef,undef,undef,undef,undef,undef,undef,undef,
undef,$mtime,undef,undef,undef) = stat($file);
my ($mday,$mon,$year);
(undef,undef,undef,$mday,$mon,$year,undef,undef,undef) = localtime($mtime);
my $dateformat = ($longmonth ? "%d %B %Y" : "%d %b %Y");
my $date = POSIX::strftime($dateformat, 0, 0, 0, $mday, $mon, $year);
$date =~ s/^0//;
# print STDERR "file_date($file) => $date\n";
return $date;
}
# Return human-readable file size in form NN [kM]bytes
sub file_size ( $ )
{ my ($file) = check_args(1, @_);
my $size;
(undef,undef,undef,undef,undef,undef,undef,$size,
undef,undef,undef,undef,undef) = stat($file);
if ($size < 1000)
{ return "$size bytes"; }
my ($divisor, $metric_letter);
if ($size < 950000)
{ $divisor = 1000;
$metric_letter = "k"; }
elsif ($size < 950000000)
{ $divisor = 1000000;
$metric_letter = "M"; }
else
{ $divisor = 1000000000;
$metric_letter = "G"; }
# This truncates, but it really ought to round instead. Do so by adding
# 5 to the third most significant digit (since we truncate to two
# significant digits).
my $to_add = "5" . ("0" x (length($size)-3));
$trunc_size = sprintf("%f", ($size+$to_add)/$divisor);
if ($trunc_size =~ m/^([0-9][0-9])([0-9]*)(\.[0-9]*)?$/)
{ $trunc_size = $1 . "0" x length($2); }
elsif ($trunc_size !~ s/^([0-9]\.?[0-9])([0-9]*)$/$1/)
{ die "What trunc_size? " . $trunc_size; }
return $trunc_size . " " . $metric_letter . "bytes";
}
## Lifted from em_util.pm, 9/8/97
# Simplify a directory path by canonicalizing to not contain
# any ".." or "." components
# e.g. simplify_path_name("./bink/baz/foo/../foo2/../../bar") is
# "./bink/bar"
sub simplify_path_name ( $ )
{
my ($path) = check_args(1, @_);
my $result = $path;
while (($result =~ s%/[^/\n]*/\.\./%/%) && $result ne $path) {
$path = $result;
}
while (($result =~ s%//%/%g) && $result ne $path) {
$path = $result;
}
return $result;
}