#!/usr/bin/env perl # Construct .javadoc-index.el file for javadoc-lookup (look up Java docs from Emacs). # Michael Ernst # Run like this: # javadoc-index-to-alist > ~/.javadoc-index.el # With no arguments, it uses a default list from file ~/.javadoc-index-files . # This runs javadoc-escape-anchors if necessary (see its documentation). use strict; use English; $WARNING = 1; my $debug = 0; # $debug = 1; use File::Find; use FindBin qw($Bin); if (scalar(@ARGV) == 0) { @ARGV = read_and_glob_files("$ENV{HOME}/.javadoc-index-files"); # print STDERR "ARGV (" . scalar(@ARGV) . " elements): @ARGV\n"; } my $current_dir = `pwd`; chomp($current_dir); $current_dir = "file:" . $current_dir . "/"; print ";; For use by Emacs function javadoc-lookup.\n"; # Not $PROGRAM_NAME because we aren't using the English module. print ";; Created by $0.\n"; print ";; Command line: $0 @ARGV\n"; my %refs = (); my %javadoc_ignored_prefixes = (); my $sort_sublists = 0; # enable for debugging for my $file (@ARGV) { if (! -e $file) { print STDERR "Didn't find $file\n"; next; } # No need to run javadoc-escape-anchors on the index file itself; # just open it and use it. open(INDEX, $file) || die "Couldn't open $file"; my $this_dir = $file; $this_dir =~ s%(^|/)[^/]*$%$1%; if ($this_dir =~ m/^\//) { # absolute directory $this_dir = "file:$this_dir"; } else { # perhaps ought to simplify $this_dir = $current_dir . $this_dir; } my $prefix = $this_dir; $prefix =~ s/\/index-files\//\//; $javadoc_ignored_prefixes{$prefix} = 1; while () { if (m%^
public interface <(?:b|strong)>%) { if ($debug) { print STDERR "Considering: $_"; } } if (m%^
public interface <(?:b|strong|span class="strong")>(\w+)%) { $refs{$1} .= " \"" . $this_dir . $1 . ".html\""; } # Get rid of type parameters; a user never wants to look up their names. # This cannot just use the "g" modifier (global replace), because # type parameters may be nested: Enum>. if (m/type parameter in/) { if ($debug) { print STDERR "Replacing 'type parameter in': $_"; } } # "<"; any number of type annotations; "?" or "\@[^ ]+ )*(\?|]*?">[^ ]*?)( extends (\@[^ &]+ |\@[^ ]+ )*(]*>[^ <]*(<\?(,\?)*>)?|[^ &<>]+))?,?)+>%%i) { if ($debug) { print STDERR "After one replacement : $_"; } # empty body } if (m/\"type parameter in/) { die "Substitution failed in $file, still has \"type parameter in\":\n$_"; } # This regexp is intentionally not anchored at its beginning or end, # because sometimes line breaks are missing. # (Problem: Distributed XOM Javadocs do not use ... tags. while (m%
[ \t]*(?:)?(?:<(?:B|STRONG|span class="strong"|span class="typeNameLink")>)?([^<]*)(\([^<]*\))?(?:)?(?:)?(\(.*?\))?(\.| - )%ig) { # Either $3 or $4 could be set (but not both). # my ($this_ref,$this_item,$this_args1,$this_args2) = ($1,$2,$3,$4); my ($this_ref,$this_item) = ($1,$2); if ($this_ref =~ /\"/) { die "In $file, bad this_ref:\n this_ref=$this_ref\n this_item=$this_item\n in $_"; } if ($this_item =~ /\"/) { die "In $file, bad this_item:\n this_ref=$this_ref\n this_item=$this_item\n in $_"; } $this_ref = $this_dir . $this_ref; $this_ref =~ s%/[^/]+/\.\./%/%g; $this_ref =~ s%/(\./)+%/%g; $this_item =~ s%<%<%g; $this_item =~ s%>%>%g; if ($debug) { print ";; <<<$this_dir>>><<<$this_ref>>><<<$this_item>>>\n"; } # if ($debug) { print ";; <<<$this_dir>>><<<$this_ref>>><<<$this_item>>><<<$this_args>>>\n"; } $refs{$this_item} .= " \"$this_ref\""; } } } # I can't require javadoc-lookup because it loads .javadoc-index.el. # print "(eval-when-compile '(require 'javadoc-lookup))\n"; print "(setq javadoc-html-refs '(\n"; # Reverse the list so that, when completion-ignore-case is t, "Class" # takes priority over the less-likely-to-be-desired "CLASS". for my $item (reverse sort keys %refs) { my $refs_sans_leading_trailing = $refs{$item}; $refs_sans_leading_trailing =~ s/^ "//; $refs_sans_leading_trailing =~ s/"$//; my $sorted_refs; if ($sort_sublists) { # Sort for reproducibility of results. Problem: user has no control # over which version is chosen, if duplicate Javadoc exists. my @refs = split('" "', $refs_sans_leading_trailing); $sorted_refs = join('" "', sort(@refs)); } else { # Prioritize the first-seen version of Javadoc for any class. $sorted_refs = $refs_sans_leading_trailing; } print " (\"$item\" \"$sorted_refs\")\n"; } print "))\n"; print "(setq javadoc-ignored-prefixes (list\n"; for my $prefix (sort keys %javadoc_ignored_prefixes) { print " (concat \"^\" (regexp-quote \"$prefix\"))\n"; } print "))\n"; sub read_and_glob_files ( $ ) { my ($files_filename) = @_; my @result = (); open(FILES, $files_filename) || die("Could not open $files_filename"); for my $index_filename () { if ($debug) { print STDERR "globbing: $index_filename\n"; } $index_filename =~ s/^\s+//; $index_filename =~ s/\s+$//; if (($index_filename eq "") || ($index_filename =~ /^#/)) { next; } # Use of "sort" makes the results more deterministic, easier to compare. my @globbed_files = sort(glob($index_filename)); if ($debug) { print STDERR "Result: " . scalar(@globbed_files) . " files\n"; } if (scalar(@globbed_files) == 0) { print STDERR "Glob not found: $index_filename\n"; next; } push @result, @globbed_files; if ($debug) { print STDERR "result now has " . scalar(@result) . " elements\n"; } } close(FILES); return @result; } # Files on which javadoc-escape-anchors has been run. my %jea_files = (); ### It seems that this isn't necessary any longer. (7/8/2007) # Run javadoc-escape-anchors on the given file. sub javadoc_escape_anchors_file ( $ ) { my ($file) = @_; if ($jea_files{$file}) { return; } open(INDEX, $file) || die "Couldn't open $file"; my $firstline = ; close(INDEX); if (! $firstline =~ /processed by javadoc-escape-anchors/) { my $command = "javadoc-escape-anchors $file"; my $result = system($command); if ($result != 0) { print STDERR "Failure $? for shell command $command\n"; } } # Do this even on error, since we'll just get the same error over and over. $jea_files{$file} = 1; }