#!/usr/bin/perl -w # # Copyright (C) 2007 Alex Schroeder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . require LWP; use Getopt::Std; our ($opt_v, $opt_w, $opt_f); # We make our own specialization of LWP::UserAgent that asks for # user/password if document is protected. { package RequestAgent; @ISA = qw(LWP::UserAgent); sub new { my $self = LWP::UserAgent::new(@_); $self; } sub get_basic_credentials { my($self, $realm, $uri) = @_; return split(':', $main::opt_w, 2); } } my $usage = qq{$0 [-i URL] [-t SECONDS] \t[-u USERNAME] [-p PASSWORD] [-w USERNAME:PASSWORD] \t[-f FORMAT] [-a TAG] [-d TAG] [TARGET] TARGET is the base URL for the wiki. Visiting this URL should show you its homepage. You add a TAG using -a and delete it using -d. Multiple tags can be separated by a space or a comma. FORMAT defaults to [[tag:TheTag]]. If you use just words, specify -f1. Provide the page names to retag on STDIN or use -i to point to a page. You can use the index action with the raw parameter. See example below. The list of page names should use the MIME type text/plain. By default, retag will tag a page every five seconds. Use -t to override this. SECONDS is the number of seconds to wait between requests. The edits will show up on the list of changes as anonymous edits. If you want to provide a USERNAME, you can use -u to do so. If you want to tag pages on a locked wiki, you need to provide a PASSWORD using -p. On the other hand, if your wiki is protected by so-called "basic authentication" -- that is, if you need to provide a username and password before you can even view the site -- then you can pass those along using the -w option. Separate username and password using a colon. Example: retag -i 'http://www.emacswiki.org/cgi-bin/alex?search=tag%3Akitsunemori+2006+2007;context=0;raw=1' \\ -u AlexSchroeder -a MondayGroup http://www.emacswiki.org/cgi-bin/alex }; sub UrlEncode { my $str = shift; return '' unless $str; my @letters = split(//, $str); my @safe = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '-', '_', '.', '!', '~', '*', "'", '(', ')', '#'); foreach my $letter (@letters) { my $pattern = quotemeta($letter); if (not grep(/$pattern/, @safe)) { $letter = sprintf("%%%02x", ord($letter)); } } return join('', @letters); } sub GetRaw { my ($uri) = @_; my $ua = RequestAgent->new; my $response = $ua->get($uri); print "no response\n" unless $response->code; print "GET ", $response->code, " ", $response->message, "\n" if $opt_v; return $response->content if $response->is_success; } my $FreeLinkPattern = "([-,.()' _0-9A-Za-z\x80-\xff]+)"; sub PostRaw { my ($uri, $id, $data, $username, $password) = @_; my $ua = RequestAgent->new; my $response = $ua->post($uri, {title=>$id, text=>$data, raw=>1, question=>1, recent_edit=>'on', username=>$username, pwd=>$password}); my $status = $response->code . ' ' . $response->message; warn "POST $id failed: $status.\n" unless $response->is_success; } sub tag { my ($target, $interval, $username, $password, $pageref, $addref, $delref) = @_; foreach my $id (@$pageref) { print "$id\n"; my $page = UrlEncode ($id); my $data = GetRaw("$target?action=browse;id=$page;raw=1"); # Every page starts with a new copy. my %tags = map { $_ => 1 } @$addref; # The current code does not remove tags sprinkled all over the # page. The code will in fact add those tags to the final tagline. if ($data =~ /\n\nTags: (.*)/) { my $tags = $1; if ($opt_f) { foreach my $tag (split /,\s*/, $1) { $tags{$tag} = 1; } } else { while ($tags =~ /\[\[tag:$FreeLinkPattern(\|[^]|]+)?\]\]/ogi) { $tags{$1} = 1; } } foreach my $tag (@$delref) { delete $tags{$tag}; } } my $newtags; if ($opt_f) { $newtags = join(', ', sort keys %tags); } else { $newtags = join(' ', map { "\[\[tag:$_\]\]" } sort keys %tags); } # The code will not remove the tagline if the last tag is removed. # It will add a tagline if there is none. $data =~ s/\n\nTags: .*/\n\nTags: $newtags/ or $data .= "\n\nTags: $newtags"; PostRaw($target, $id, $data, $username, $password); sleep($interval); } } sub main { our($opt_h, $opt_i, $opt_t, $opt_d, $opt_u, $opt_p); getopts('hvi:t:u:p:w:a:d:f:'); die $usage if $opt_h; die "Missing tags to add or delete. Use -a TAG or -d TAG.\n" unless $opt_a or $opt_d; my $interval = $opt_t ? $opt_t : 5; my (@add, @delete); @add = split(/[ ,]+/, $opt_a) if $opt_a; @delete = split(/[ ,]+/, $opt_d) if $opt_d; my $username = $opt_u; my $password = $opt_p; my $target = shift(@ARGV); die "You need to provide exactly one target URL. Use -h for more help.\n" unless $target and not @ARGV; my @pages = (); if ($opt_i) { my $data = GetRaw($opt_i); @pages = split(/\n/, $data); } else { print "List of pages:\n"; while () { chomp; push(@pages, $_); } } die "The list of pages is missing. Use -i.\n" unless @pages; tag($target, $interval, $username, $password, \@pages, \@add, \@delete); } main();