#!/usr/bin/perl -wpi.bak # html-update-link-dates # Michael Ernst # Time-stamp: <2018-02-06 20:49:42 mernst> ### NOTE: This script is deprecated. It has moved to ### https://github.com/plume-lib/html-tools # usage: # html-update-link-dates file [...] # For any text of the form # text (14 Mar 1967), # text (14 Mar 1967, 19 Mbytes) # text (PDF, 14 Mar 1967), # text (PDF, 14 Mar 1967, 19 Mbytes) # update the date and file size text, but don't update the HTML file's # modification date. # For examples, see http://homes.cs.washington.edu/~mernst/software/ and # http://pag.csail.mit.edu/daikon/download/. # Warning: if the filename is the HTML file being edited, the date/size may # be incorrect. # To do: # Need to operate by paragraphs, not by lines. use FindBin (); use lib "$FindBin::Bin"; use checkargs; require POSIX; require Time::Local; # Forward declaration for Perl 5.6. sub file_date_replacement ( $$$$$ ); BEGIN { for my $file (@ARGV) { $file =~ s/\/$/index.html/; if (! (-f $file && -r $file)) { die "$file isn't a readable normal file"; } my ($atime,$mtime,$gid); (undef,undef,undef,undef,undef,$gid,undef,undef, $atime,$mtime,undef,undef,undef) = stat($file); $file_gid{$file} = $gid; $file_atime{$file} = $atime; $file_mtime{$file} = $mtime; } # What should I do about May?? $longmonthre = '(?:January|February|March|April|May|June|July|August|September|October|November|December)'; $datere = '(?:[0123]?[0-9] (?:[A-Z][a-z][a-z]|' . $longmonthre . ') [0-9]{4})'; $sizere = '(?:[0-9.]+ [kMG]?bytes)'; $datesizere = '(' . $datere . '(?:, ' . $sizere . ')?)'; } s/( ]+)(\">.*?<\/a>(?:,? (?:by|from) [A-Za-z]+(?: [A-Za-z]+)?)?(?: $(?:PDF, )?|, ))$datesizere([$]?)/file_date_replacement($1,$2,$3,$4,$5)/eg; END { for my $file (keys %file_atime) { utime($file_atime{$file}, $file_mtime{$file}, $file); # $< is the real userid for whoever is running this program. chown($<, $file_gid{$file}, $file); } # Ordinary, non-erroneous termination exit(0); } ########################################################################### sub file_date_replacement ( $$$$$ ) { my ($pre, $file, $mid, $old_date, $post) = check_args(5, @_); # print "\#$pre\#$file\#$mid\#$old_date\#$post\#\n"; my $dir = $ARGV; $dir =~ s/(^|\/)[^\/]*$/$1/; my $old_size; if ($old_date =~ /^($datere), ($sizere)/) { $old_date = $1; $old_size = $2; } my $longmonth = ($old_date =~ m/$longmonthre/); my $full_file = $file; if ($full_file !~ s:^/:/www/:) { $full_file = $dir . $file; } $full_file = simplify_path_name($full_file); $full_file =~ s/\/$/\/index.html/; if (! -f $full_file) { my $date = "00 Mth 0000"; if (defined($old_size)) { $date .= ", 00 Mbytes"; } # Don't print warning if commented out. if (($pre =~ //)) { print STDERR "html-update-link-dates: Didn't find $full_file\n"; } return "$pre$file${mid}$date$post"; } my $date = file_date($full_file, $longmonth); my $size = file_size($full_file); # If this script changed the HTML file, then don't reset its write date. # This means that browsers which check the modification time before # downloading a new version will get the lastest version. And indeed the # text has changed, though not substantially... if (($date ne $old_date) || (defined($old_size) && ($size ne $old_size))) { delete $file_atime{$ARGV}; delete $file_mtime{$ARGV}; delete $file_gid{$ARGV}; } if (defined($old_size)) { $date .= ", $size"; } return "$pre$file$mid$date$post"; } # Return human-readable file date in form DD MMM YYYY # If second argument is provided and true, use full month name, not abbrev. sub file_date ( $;$ ) { my ($file, @rest) = check_args_range(1, 2, @_); my $longmonth = (scalar(@rest) == 1) && $rest[0]; my $mtime; (undef,undef,undef,undef,undef,undef,undef,undef, undef,$mtime,undef,undef,undef) = stat($file); my ($mday,$mon,$year); (undef,undef,undef,$mday,$mon,$year,undef,undef,undef) = localtime($mtime); my $dateformat = ($longmonth ? "%d %B %Y" : "%d %b %Y"); my $date = POSIX::strftime($dateformat, 0, 0, 0, $mday, $mon, $year); $date =~ s/^0//; # print STDERR "file_date($file) => $date\n"; return $date; } # Return human-readable file size in form NN [kM]bytes sub file_size ( $ ) { my ($file) = check_args(1, @_); my $size; (undef,undef,undef,undef,undef,undef,undef,$size, undef,undef,undef,undef,undef) = stat($file); if ($size < 1000) { return "$size bytes"; } my ($divisor, $metric_letter); if ($size < 950000) { $divisor = 1000; $metric_letter = "k"; } elsif ($size < 950000000) { $divisor = 1000000; $metric_letter = "M"; } else { $divisor = 1000000000; $metric_letter = "G"; } # This truncates, but it really ought to round instead. Do so by adding # 5 to the third most significant digit (since we truncate to two # significant digits). my $to_add = "5" . ("0" x (length($size)-3)); $trunc_size = sprintf("%f", ($size+$to_add)/$divisor); if ($trunc_size =~ m/^([0-9][0-9])([0-9]*)(\.[0-9]*)?$/) { $trunc_size = $1 . "0" x length($2); } elsif ($trunc_size !~ s/^([0-9]\.?[0-9])([0-9]*)$/$1/) { die "What trunc_size? " . $trunc_size; } return $trunc_size . " " . $metric_letter . "bytes"; } ## Lifted from em_util.pm, 9/8/97 # Simplify a directory path by canonicalizing to not contain # any ".." or "." components # e.g. simplify_path_name("./bink/baz/foo/../foo2/../../bar") is # "./bink/bar" sub simplify_path_name ( $ ) { my ($path) = check_args(1, @_); my $result = $path; while (($result =~ s%/[^/\n]*/\.\./%/%) && $result ne $path) { $path = $result; } while (($result =~ s%//%/%g) && $result ne $path) { $path = $result; } return $result; }