#!/usr/bin/perl # edbrowse: line editor/browser use IO::Handle; use IO::Socket; use Time::Local; =head1 Author Karl Dahlke eklhad@gmail.com =HEAD1 Current Maintainer Chris Brannon maintainers@edbrowse.org http://edbrowse.org =head1 Copyright Notice This program is copyright (C) (C) Karl Dahlke, 2000-2003. It is made available, by the author, under the terms of the General Public License (GPL), as articulated by the Free Software Foundation. It may be used for any purpose, and redistributed, provided this copyright notice is included. =head1 Redirection This program, and its associated documentation, are becoming quite large. Therefore the documentation has been moved to a separate html file. Please visit: http://edbrowse.org/usersguide.html Realize that this documentation covers the C version of edbrowse. Development of the Perl version stopped years ago, and there have been significant changes. If you have lynx on hand, you can run: lynx -dump http://edbrowse.org/usersguide.html > usersguide.txt If you are using lynx to download the actual program, do this: lynx -source 'http://edbrowse.org/edbrowse.pl' > edbrowse.pl =cut $version = "1.5.17"; @agents = ("edbrowse/$version"); $agent = $agents[0]; # It's tempting to let perl establish the global variables as you go. # Let's try not to do this. # That's where all the side effects are - that's where the bugs come in. # Below are the global variables, with some explanations. $debug = 0; # general debugging $errorExit = 0; $ismc = 0; # is mail client $zapmail = 0; # just get rid of the mail $maxfile = 40000000; # Max size of an editable file. $eol = "\r\n"; # end-of-line, as far as http is concerned $doslike = 0; # Is it a Dos-like OS? $doslike = 1 if $^O =~ /^(dos|win|mswin)/i; $errorMsg = ""; # Set this if the last operation produced an error. $inglob = 0; # Are we in global mode, under a g// operation? $onloadSubmit = 0; $inscript = 0; # plowing through javascript $filesize = 0; # size of file just read or written $global_lhs_rhs = 0; # remember lhs and rhs across sessions $caseInsensitive = 0; # Do we send crnl or nl after the lines in a text buffer? # What is the standard - I think it's DOS newlines. $textAreaCR = 1; $pdf_convert = 1; # convert pdf to html $fetchFrames = 1; # fetch the frames into a web page $allsub = 0; # enclose all superscripts and subscripts $allowCookies = 1; # allow all cookies. %cookies = (); # the in-memory cookie jar %authHist = (); # authorization strings by domain $authAttempt = 0; # count authorization attempts for this page $ssl_verify = 1; # By default we verify all certs. $ssl = undef; # ssl connection $ctx = undef; # ssl certificate $allowReferer = 1; # Allow referer header by default. $referer = ""; # refering web page $reroute = 1; # follow http redirections to find the actual web page $rerouteCount = 0; # but prevent infinite loops %didFrame = (); # which frames have we fetched already $passive = 1; # ftp passive mode on by default. $nostack = 0; # suppress stacking of edit sessions $last_z = 1; # line count for the z command $endmarks = 0; # do we print ^ $ at the start and end of lines? $subprint = 0; # print lines after substitutions? $delprint = 0; # print line after delete $dw = 0; # directory write enabled $altattach = 0; # attachments are really alternative presentations of the same email $do_input = 0; # waiting for the next input from the tty $intFlag = 0; # control c was hit $intMsg = "operation interrupted"; # Interrupt handler, for control C. # Close file handle if we were reading from disk or socket. sub intHandler() { $intFlag = 1; if($do_input) { print "\ninterrupt, type qt to quit completely\n"; return; } # Reading from an http server. close FH if defined FH; # Kill ftp data connection if open. close FDFH if defined FDFH; # and mail connection or ftp control connection close SERVER_FH if defined SERVER_FH; # And listening ftp socket. close FLFH if defined FLFH; exit 1 if $ismc; } # intHandler $SIG{INT} = \&intHandler; # A quieter form of die, without the edbrowse line number, which just confuses people. sub dieq($) { my $msg = shift; print "fatal: $msg\n"; exit 1; } # dieq @weekDaysShort = ("Sun","Mon","Tue","Wed","Thu","Fri","Sat"); @monthsShort = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"); sub mailTimeString() { my ($ss, $nn, $hh, $dd, $mm, $yy, $wd) = localtime time; my $wds = $weekDaysShort[$wd]; my $mths = $monthsShort[$mm]; return sprintf "%s, %02d %s %d %02d:%02d:%02d", $wds, $dd, $mths, $yy+1900, $hh, $nn, $ss; } # mailTimeString # ubackup is set when the command has changed something. # The previous text, stored in the save_* variables, # is copied into the last* variables. If you type u, # the last* variables and current variables are swapped. $ubackup = 0; # Did we successfully read the edbrowse config file? # If so, set some variables. $myname = $annoyFile = $junkFile = $addressFile = ""; %adbook = (); $adbooktime = 0; @inmailserver = (); # list of pop3 servers $mailDir = ""; $localMail = -1; $whichMail = 0; # which account to use $smMail = ""; $naccounts = 0; # number of pop accounts $outmailserver = ""; # smtp $smtplogin = ""; # smtp login my $mailToSend = ""; @pop3login = (); @pop3password = (); @replyAddress = (); @fromSource = (); @fromDest = (); $serverLine = ""; # line received from mail or ftp server # web express configuration variables and arrays. %shortcut = (); %commandList = (); %commandCheck = (); $currentShortcut = ""; $currentCommandList = ""; # Specify the start and end of a range for an operation. # 1,3m5 will set these variables to 1, 3, and 5. $startRange = $endRange = $dest = 0; # The input command, but only the one-letter commands. $icmd = ""; # Now the command that is actually executed is in $cmd. # This is usually the same as $icmd, but not always. # 8i becomes 7a, for instance. $cmd = ""; # The valid edbrowse commands. $valid_cmd = "aAbBcdefghHiIjJklmnpqrsStuvwz=^@<"; # Commands that can be done in browse mode. $browse_cmd = "AbBdefghHIjJklmnpqsuvwz=^@<"; # Commands for directory mode. $dir_cmd = "AbdefghHklnpqsvwz=^@<"; # Commands that work at line number 0, in an empty file. $zero_cmd = "aAbefhHqruwz=^@<"; # Commands that expect a space afterward. $spaceplus_cmd = "befrw"; # Commands that should have no text after them. $nofollow_cmd = "aAcdhHijlmnptu="; # Commands that can be done after a g// global directive. $global_cmd = "dIjJlmnpst"; # Show the error message, not just the question mark, after these commands. $showerror_cmd = "Abefqrw^@"; $helpall = 0; # show the error message all the time # Remember that two successive q's will quit the session without changes. # here we must track which session, by number, you were trying to quit. $lastq = $lastqq = -1; # For any variable x, there are usually multiple copies of x, one per session. # These are housed in an array @x. # In contrast, the variable $x holds $x[$context], # according to the current context. # I hope this isn't too confusing. $context = 0; # dot and dol, current and last line numbers. @dot = (0); $dot = $dot[0]; @dol = (0); $dol = $dol[0]; @factive = (1); # which sessions are active # Retain file names, and whether the text has been modified. @fname = (""); $fname = $fname[0]; $baseref = ""; # usually the same as $fname @fmode = (0); # file modes $fmode = $fmode[0]; $binmode = 1; # binary file $nlmode = 2; # newline apended $browsemode = 4; # browsing html text $changemode = 8; # something has changed in this file $dirmode = 16; # directory mode $firstopmode = 32; # first operation issued - undo is possible $nobrowse = "not in browse mode"; # common error message $nixbrowse = "command not available in browse mode"; $nixdir = "command not available in directory mode"; sub dirBrowseCheck($) { my $cmd = shift; $fmode&$browsemode and $errorMsg = "$cmd $nixbrowse", $inglob = 0, return 0; $fmode&$dirmode and $errorMsg = "$cmd $nixdir", $inglob = 0, return 0; return 1; } # dirBrowseCheck # retain base directory name when scanning a directory @dirname = (""); $dirname = $dirname[0]; # Remember substitution strings. @savelhs = (); # save left hand side $savelhs = $savelhs[0]; @saverhs = (); # save right hand side $saverhs = $saverhs[0]; # month hash, to encode dates. %monhash = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6, jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12); $home = $ENV{HOME}; defined $home and length $home or dieq 'home directory not defined by $HOME.'; -d $home or dieq "$home is not a directory."; # Establish the trash bin, for deleted files. $rbin = "$home/.trash"; if(! -d $rbin) { $rbin = "" unless mkdir $rbin, 0700; } # Config file for this browser. # Sample file is available at http://edbrowse.org/sample.perl.ebrc $rcFile = "$home/.ebprc"; # Last http header, normally deleted before you read the web page. $ebhttp = "$rbin/eb.http"; truncate $ebhttp, 0; # When we need a temp file. $ebtmp = "$rbin/eb.tmp"; # A file containing SSL certificates in PEM format, concatinated together. # This will be used for certificate verification. $ebcerts = "$home/.ssl-certs"; # file for persistant cookies. $ebcooks = "$home/.cookies"; sub fillJar() ; fillJar(); # fill up that cooky jar # Let's see if we can read the config file? if(open FH, $rcFile) { my $sort = 0; while() { s/^\s+//; s/^#.*$//; next if /^$/; s/\s+$//; my ($server, $login, $passwd, $retpath, $key, $value); if(/^([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]+)\s*:\s*([^:\s]*)/) { ($server, $login, $passwd, $retpath) = ($1, $2, $3, $4); my $smtpbox = $5; if($server =~ s/^\*\s*//) { dieq "multiple accounts are marked as local, with a star." if $localMail >= 0; $localMail = $naccounts; $smtpbox = $server unless length $smtpbox; $outmailserver = $smtpbox; $smtplogin = $login; } $inmailserver[$naccounts] = $server; $pop3login[$naccounts] = $login; $pop3password[$naccounts] = $passwd; $replyAddress[$naccounts] = $retpath; ++$naccounts; next; } # describing a mail server # Now look form keyword = string. # Initial < is shorthand for cmd = s/^\]+)\s*>\s*(.+)$/) { push @fromSource, lc $1; push @fromDest, $2; next; } dieq "from filter \"$value\" does not look like \"emailAddress > file\"."; } # from if($key eq "agent") { push @agents, $value; next; } # agent # web express keywords if($key eq "shortcut") { if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) { dieq "shortcut $currentShortcut has not been assigned a url"; } $value =~ /^[\w-]+$/ or dieq "the name of a shortcut must consist of letters digits or dashes, $value is invalid"; $currentShortcut = $value; # Start out with no post processing commands. $shortcut{$value}{after} = []; $shortcut{$value}{sort} = sprintf "%04d", $sort; ++$sort; $currentCommandList = ""; next; } # shortcut if($key eq "cmdlist") { if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) { dieq "shortcut $currentShortcut has not been assigned a url"; } $currentShortcut = ""; my $check = 0; $check = 1 if $value =~ s/^\+//; $value =~ /^[\w-]+$/ or dieq "the name of a command list must consist of letters digits or dashes, $value is invalid."; $currentCommandList = $value; $commandList{$value} = []; $commandCheck{$value} = $check; next; } # cmdlist if($key eq "cmd") { length $currentShortcut or length $currentCommandList or dieq "postprocessing command is not part of a command list or shortcut"; my $cref; # command reference $cref = $shortcut{$currentShortcut}{after} if length $currentShortcut; $cref = $commandList{$currentCommandList} if length $currentCommandList; # is this a command list? if($value =~ /^[a-zA-Z_-]+$/ and defined $commandList{$value}) { my $cpush = $commandList{$value}; push @$cref, @$cpush; } else { push @$cref, $value; } next; } # cmd if($key eq "url") { length $currentShortcut or dieq "$key command without a current shortcut"; $shortcut{$currentShortcut}{url} = $value; next; } # url if($key eq "desc") { length $currentShortcut or dieq "$key command without a current shortcut"; $shortcut{$currentShortcut}{desc} = $value; next; } # desc dieq "Unrecognized keyword <$key> in config file."; } dieq "garbled line <$_> in config file."; } # loop over lines in config file close FH; if(length $currentShortcut and ! defined $shortcut{$currentShortcut}{url}) { dieq "shortcut $currentShortcut has not been assigned a url"; } if($naccounts) { $localMail = 0 if $naccounts == 1; dieq "None of the pop3 accounts is marked as local." if $localMail < 0; dieq "fullname not specified in the config file." if ! length $myname; } # mail accounts } # open succeeded # One array holds all the lines of text (without the newlines) # for all the files in all the sessions. # Within a given session, the actual file is represented by a list of numbers, # indexes into this large array. # Note that when text is copied, we actually copy the strings in the array. # I could just have different lines use the same index, thus pointing to the # same string, and there would be no need to copy that string, # but then I'd have to maintain reference counts on all these strings, # and that would make the program very messy! @text = (); # If a file has 30 lines, it is represented by 30 numbers, # indexes into @text above. # Should we use an array of numbers, or a string of numbers # represented by decimal digits? # Both are painful, in different ways. # Consider inserting a block of text, a very common operation. # In a list, we would have to slide all the following numbers down. # Granted, that's better than copying all those lines of text down, # but it's still a pain to program, and somewhat inefficient. # If we use strings, we take the original string of numbers, # break it at the insert point, and make a new string # by concatenating these two pieces with the new block. # The same issues arise when deleting text near the top of a file. # This and other considerations push me towards strings. # I currently use 6 characters for a line number, and a seventh for the g// flag. $lnwidth = 7; # width of a line number field in $map $lnwidth1 = $lnwidth - 1; $lnformat = "%6d "; $lnspace = ' ' x $lnwidth; $lnmax = 999999; # Note that line 0 never maps to anything in @text. @map = ($lnspace); $map = $map[0]; # The 26 labels, corresponding to the lower case letters. # These are stored in a packed string, like $map above. # labels also holds the filetype suffixes when in directory mode. @labels = ($lnspace x 26); $labels = $labels[0]; # offset into $labels, where directory suffixes begin. $dirSufStart = 26 * $lnwidth; # The anchor/form/input tags, for browsing. # The browse tags are in an array of hashes. # Each hash has tag=tagname, # and attrib=value for each attrib=value in the tag. # Be advised that certain special tags, such as those defining # title and description and keywords, are placed in btag[0]. @btags = (); $btags = $btags[0]; # When we focus on an input field, for edit or manipulation, # we need its type, size, and list of options. $inf = ""; # current text displayed by this input field. $itype = ""; # Type of the input field. $isize = 0; # size of the input field. $iopt = {}; # hash of input options in a discrete list. $irows = $icols = 0; # for a text area window. $iwrap = ""; # Can we scroll beyond this window? $itag = undef; # the input tag from which the previous variables were derived. $iline = 0; # line where this input field was found. $ifield = 0; # field number, within the line, the nth input field on the line. $itagnum = 0; # tag number for this input field. $inorange = "this input directive cannot be applied to a range of lines"; $inoglobal = "this input directive cannot be applied globally"; # last* and save* variables mirror the variables that define your session. # This supports the undo command. $lastdot = $savedot = $lastdol = $savedol = 0; $lastmap = $savemap = $lastlabels = $savelabels = ""; # Variables to format text, i.e. break lines at sentence/phrase boundaries. $refbuf = ""; # The new, reformatted buffer. $lineno = $colno = 0; # line/column number $optimalLine = 80; # optimal line length $cutLineAfter = 36; # cut sentence or phrase after this column $paraLine = 120; # longer lines are assumed to be self-contained paragraphs $longcut = 0; # last cut of a long line $lspace = 3; # last space value, 3 = paragraph $lperiod = $lcomma = $lright = $lany = 0; # columns for various punctuations $idxperiod = $idxcomma = $idxright = $idxany = 0; # Push the entire edit session onto a stack, for the back key. # A hash will hold all the variables that make a session, # such as $map, $fname, $btags, etc. @backup = (); $backup = $backup[0]; $hexChars = "0123456789abcdefABCDEF"; # Valid delimiters for search/substitute. # note that \ is conspicuously absent, not a valid delimiter. # I alsso avoid nestable delimiters such as parentheses. # And no alphanumerics please -- too confusing. $valid_delim = "-_=!|#*;:`\"',./?+@"; # $linePending holds a line of text that you accidentally typed in # while edbrowse was in command mode. # When you see the question mark, immediately type a+ to recover the line. $linePending = undef; # That's it for the globals, here comes the code. # First a few support routines. # Strip white space from either side. sub stripWhite($) { my $line = shift; $$line =~ s/^\s+//; $$line =~ s/\s+$//; } # stripWhite # Is a filename a URL? # If it is, return the transport protocol, e.g. http. sub is_url($) { my $line = shift; return 'http' if $line =~ m,^http://[^\s],i; return 'https' if $line =~ m,^https://[^\s],i; return 'gopher' if $line =~ m,^gopher://[^\s],i; return 'telnet' if $line =~ m,^telnet://[^\s],i; return 'ftp' if $line =~ m,^ftp://[^\s],i; # I assume that the following will be regular http. # Strip off the ?this=that stuff $line =~ s:\?.*::; # Strip off the file name and .browse suffix. $line =~ s:/.*::; $line =~ s/\.browse$//; $line =~ s/:\d+$//; return 0 if $line !~ /\w\.\w.*\w\.\w/; # we need at least two internal dots # Look for an ip address, four numbers and three dots. return 'http' if $line =~ /^\d+\.\d+\.\d+\.\d+$/; $line =~ s/.*\.//; return 'http' if index(".com.biz.info.net.org.gov.edu.us.uk.au.ca.de.jp.be.nz.sg.", ".$line.") >= 0; } # is_url # Apply a (possibly) relative path to a preexisting url. # The new url is returned. # resolveUrl("http://www.eklhad.net/linux/index.html", "app") returns # "http://www.eklhad.net/linux/app" sub resolveUrl($$) { my ($line, $href) = @_; my $scheme; $line = "" unless defined $line; $line =~ s/\.browse$//; # debug print - this is a very subtle routine. print "resolve($line, $href)\n" if $debug >= 2; # Some people, or generators, actually write http://../whatever.html $href =~ s/^http:(\.+)/$1/i; $href =~ s,^http://(\.*/),$1,i; return $href unless length $href and length $line and ! is_url($href); if(substr($href, 0, 1) ne '/') { $line =~ s/\?.*//; # hope this is right if(substr($href, 0, 1) ne '?') { if($line =~ s,^/[^/]*$,, or $line =~ s,([^/])/[^/]*$,$1,) { # We stripped off the last directory $line .= '/'; } else { if($scheme = is_url $line) { $line .= '/'; } else { $line = ""; } } # stripping off last directory } # doesn't start with ? } elsif($scheme = is_url $line) { # Keep the scheme and server, lose the filename $line =~ s/\?.*//; # hope this is right $line =~ s,^($scheme://[^/]*)/.*,$1,i; } else { $line = ""; } return $line.$href; } # resolveUrl # Prepare a string for http transmition. # No, I really don't know which characters to encode. # I'm probably encoding more than I need to -- hope that's ok. sub urlEncode($) { $_ = shift; s/([^-\w .@])/sprintf('%%%02X',ord($1))/ge; y/ /+/; return $_; } # urlEncode sub urlDecode($) { $_ = shift; y/+/ /; s/%([0-9a-fA-F]{2})/chr hex "$1"/ge; return $_; } # urlDecode # The javascript unescape function, sort of sub unescape($) { $_ = shift; s/(%|\\u00)([0-9a-fA-F]{2})/chr hex "$2"/ge; s/&#(\d+);/chr "$1"/ge; return $_; } # unescape # Pull the subject out of a sendmail url. sub urlSubject($) { my $href = shift; if($$href =~ s/\?(.*)$//) { my @pieces = split '&', $1; foreach my $j (@pieces) { next unless $j =~ s/^subject=//i; my $subj = urlDecode $j; stripWhite \$subj; return $subj; } # loop } # attributes after the email return ""; } # urlSubject # Get raw text ready for html display. sub textUnmeta($) { my $tbuf = shift; return unless length $$tbuf; $$tbuf =~ s/&/&/g; $$tbuf =~ s//>/g; $$tbuf =~ s/^/

/;
$$tbuf =~ s/$/<\/PRE>

\n/; } # textUnmeta # Derive the alt description for an image or hyperlink. sub deriveAlt($$) { my $h = shift; my $href = shift; my $alt = $$h{alt}; $alt = "" unless defined $alt; stripWhite \$alt; # Some alt descriptions are flat-out useless. $alt =~ s/^[^\w]+$//; return $alt if length $alt; if(!length $href) { $href = $$h{href}; $href = "" unless defined $href; } $alt = $href; $alt =~ s/^javascript.*$//i; $alt =~ s/^\?//; $alt =~ s:\?.*::s; $alt =~ s:.*/::; $alt =~ s/\.[^.]*$//; $alt =~ s:/$::; return $alt; } # deriveAlt # Pull the reference out of a javascript openWindow() call. $foundFunc = ""; sub javaWindow($) { my $jc = shift; # java call my $page = ""; $foundFunc = ""; $page = $1 if $jc =~ /(?:open|location|window)[\w.]* *[(=] *["']([\w._\/:,=@&?+-]+)["']/i; return $page if length $page; return "submit" if $jc =~ /\bsubmit *\(/i; while($jc =~ /(\w+) *\(/g) { my $f = $1; my $href = $$btags[0]{fw}{$f}; if($href) { $href =~ s/^\*//; $foundFunc = $f; $page = $href; } } return $page; } # javaWindow # Try to find the Java functions sub javaFunctions($) { my $tbuf = shift; my $flc = 0; # function line count my $f; # java function while($$tbuf =~ /(.+)/g) { my $line = $1; if($line =~ /function *(\w+)\(/) { $f = $1; print "java function $f\n" if $debug >= 6; $flc = 1; } my $win = javaWindow $line; if(length $win) { if($flc) { if(not defined $$btags[0]{fw}{$f}) { $$btags[0]{fw}{$f} = "*$win"; print "$f: $win\n" if $debug >= 3; } } elsif($win ne "submit") { my $h = {}; push @$btags, $h; $attrhidden = hideNumber($#$btags); $$h{ofs1} = length $refbuf; my $alt = deriveAlt($h, $win); $alt = "relocate" unless length $alt; createHyperLink($h, $win, $alt); } } next unless $flc; ++$flc; $flc = 0 if $flc == 12; } # loop over lines } # javaFunctions # Mixed case. sub mixCase($) { my $w = lc shift; $w =~ s/\b([a-z])/uc $1/ge; # special McDonald code $w =~ s/Mc([a-z])/"Mc".uc $1/ge; return $w; } # mixCase # Create a hyperlink where there was none before. sub createHyperLink($$$) { my ($h, $href, $desc) = @_; $$h{tag} = "a"; $$h{bref} = $baseref; $$h{href} = $href; $refbuf .= "\x80$attrhidden" . "{$desc}"; $colno += 2 + length $desc; $$h{ofs2} = length $refbuf; $lspace = 0; } # createHyperLink # meta html characters. # There's lots more -- this is just a starter. %charmap = ( # Normal ascii symbols gt => '>', lt => '<', quot => '"', plus => '+', minus => '-', colon => ':', apos => '`', star => '*', comma => ',', period => '.', dot => ".", dollar => '$', percnt => '%', amp => '&', # International letters ntilde => "\xf1", Ntilde => "\xd1", agrave => "\xe0", Agrave => "\xc0", egrave => "\xe8", Egrave => "\xc8", igrave => "\xec", Igrave => "\xcc", ograve => "\xf2", Ograve => "\xd2", ugrave => "\xf9", Ugrave => "\xd9", auml => "\xe4", Auml => "\xc4", euml => "\xeb", Euml => "\xcb", iuml => "\xef", Iuml => "\xcf", ouml => "\xf6", Ouml => "\xd6", uuml => "\xfc", Uuml => "\xdc", yuml => "\xff", Yuml => 'Y', aacute => "\xe1", Aacute => "\xc1", eacute => "\xe9", Eacute => "\xc9", iacute => "\xed", Iacute => "\xcd", oacute => "\xf3", Oacute => "\xd3", uacute => "\xfa", Uacute => "\xda", yacute => "\xfd", Yacute => "\xdd", atilde => "\xe3", Atilde => "\xc3", itilde => 'i', Itilde => 'I', otilde => "\xf5", Otilde => "\xd5", utilde => 'u', Utilde => 'U', acirc => "\xe2", Acirc => "\xc2", ecirc => "\xea", Ecirc => "\xca", icirc => "\xee", Icirc => "\xce", ocirc => "\xf4", Ocirc => "\xd4", ucirc => "\xfb", Ucirc => "\xdb", # Other 8-bit symbols. # I turn these into their 8 bit equivalents, # then a follow-on routine turns them into words for easy reading. # Some speech adapters do this as well, saying "cents" for the cents sign, # but yours may not, so I do some of these translations for you. # But not here, because some people put the 8-bit cents sign in directly, # rather then ¢, so I've got to do that translation later. pound => "\xa3", cent => "\xa2", sdot => "\xb7", middot => "\xb7", edot => 'e', nbsp => ' ', times => "\xd7", divide => "\xf7", deg => "\xb0", frac14 => "\xbc", half => "\xbd", frac34 => "\xbe", frac13 => "1/3", frac23 => "2/3", copy => "\xa9", reg => "\xae", trade => "(TM)", ); %symbolmap = ( a => "945", b => "946", g => "947", d => "948", e => "949", z => "950", h => "951", q => "952", i => "953", k => "954", l => "955", m => "956", n => "957", x => "958", o => "959", p => "960", r => "961", s => "963", t => "964", u => "965", f => "966", c => "967", y => "968", w => "969", 177 => "8177", # kludge!! I made up 8177 198 => "8709", 219 => "8660", 209 => "8711", 229 => "8721", 206 => "8712", 207 => "8713", 242 => "8747", 192 => "8501", 172 => "8592", 174 => "8594", 165 => "8734", 199 => "8745", 200 => "8746", 64 => "8773", 182 => "8706", 185 => "8800", 162 => "8242", 163 => "8804", 179 => "8805", 204 => "8834", 205 => "8838", 201 => "8835", 203 => "8836", 202 => "8839", 208 => "8736", ); # map certain font=symbol characters to words %symbolWord = ( 176 => "degrees", 188 => "1fourth", 189 => "1half", 190 => "3fourths", 215 => "times", 247 => "divided by", 913 => "Alpha", 914 => "Beta", 915 => "Gamma", 916 => "Delta", 917 => "Epsilon", 918 => "Zeta", 919 => "Eta", 920 => "Theta", 921 => "Iota", 922 => "Kappa", 923 => "Lambda", 924 => "Mu", 925 => "Nu", 926 => "Xi", 927 => "Omicron", 928 => "Pi", 929 => "Rho", 931 => "Sigma", 932 => "Tau", 933 => "Upsilon", 934 => "Phi", 935 => "Chi", 936 => "Psi", 937 => "Omega", 945 => "alpha", 946 => "beta", 947 => "gamma", 948 => "delta", 949 => "epsilon", 950 => "zeta", 951 => "eta", 952 => "theta", 953 => "iota", 954 => "kappa", 955 => "lambda", 956 => "mu", 957 => "nu", 958 => "xi", 959 => "omicron", 960 => "pi", 961 => "rho", 962 => "sigmaf", 963 => "sigma", 964 => "tau", 965 => "upsilon", 966 => "phi", 967 => "chi", 968 => "psi", 969 => "omega", 8177 => "+-", # kludge!! I made up 8177 8242 => "prime", 8501 => "aleph", 8592 => "left arrow", 8594 => "arrow", 8660 => "double arrow", 8706 => "d", 8709 => "empty set", 8711 => "del", 8712 => "member of", 8713 => "not a member of", 8721 => "sum", 8734 => "infinity", 8736 => "angle", 8745 => "intersect", 8746 => "union", 8747 => "integral", 8773 => "congruent to", 8800 => "not equal", 8804 => "less equal", 8805 => "greater equal", 8834 => "proper subset of", 8835 => "proper superset of", 8836 => "not a subset of", 8838 => "subset of", 8839 => "superset of", ); # Map an html meta character using the above hashes. # Usually run from within a global substitute. sub metaChar($) { my $meta = shift; if($meta =~ /^#(\d+)$/) { return chr $1 if $1 <= 255; return "'" if $1 == 8217; return "\x82$1#" if $symbolWord{$1}; return "?"; } my $real = $charmap{$meta}; defined $real or $real = "?"; return $real; } # metaChar # Translate number. # This is highly specific to my web pages - doesn't work in general! sub metaSymbol($) { my $meta = shift; $meta =~ s/^&#//; $meta =~ s/;$//; my $real = $symbolmap{$meta}; return "?" unless $real; return "&#$real;"; } # metaSymbol # replace VAR with $VAR, as defined by the environment. sub envVar($) { my $var = shift; my $newvar = $ENV{$var}; if(defined $newvar) { # There shouldn't be any whitespace at the front or back. stripWhite \$newvar; return $newvar if length $newvar; } length $errorMsg or $errorMsg = "environment variable $var not set"; return ""; } # envVar # Replace the variables in a line, using the above. sub envLine($) { my $line = shift; $errorMsg = ""; # $errorMsg will be set if something goes wrong. $line =~ s,^~/,\$HOME/,; $line =~ s/\$([a-zA-Z]\w*)/envVar($1)/ge; return $line; } # envLine # The filename can be specified using environment variables, # and shell meta characters such as *. # But not if it's a url. sub envFile($) { my $filename = shift; $errorMsg = ""; if(! is_url($filename)) { $filename = envLine($filename); return if length $errorMsg; my @filelist; # This is real kludgy - I just don't understand how glob works. if($filename =~ / / and $filename !~ /"/) { @filelist = glob '"'.$filename.'"'; } else { @filelist = glob $filename; } $filelist[0] = $filename if $#filelist < 0; $errorMsg = "wild card expansion produces multiple files" if $#filelist; $filename = $filelist[0]; } return $filename; } # envFile # Drop any active edit sessions that have no text, and no associated file. # This housecleaning routine is run on every quit or backup command. sub dropEmptyBuffers() { foreach my $cx (0..$#factive) { next if $cx == $context; next unless $factive[$cx]; next if length $fname[$cx]; next if $dol[$cx]; $factive[$cx] = undef; } } # dropEmptyBuffers # Several small functions to switch between contexts, i.e. editing sessions. # In all these functions, we have to map between our context numbers, # that start with 0, and the user's session numbers, that start with 1. # C and fortran programmers will be use to this problem. # Is a context different from the currently running context? sub cxCompare($) { my $cx = shift; $errorMsg = "session 0 is invalid", return 0 if $cx < 0; return 1 if $cx != $context; # ok ++$cx; $errorMsg = "you are already in session $cx"; return 0; } # cxCompare # Is a context active? sub cxActive($) { my $cx = shift; return 1 if $factive[$cx]; ++$cx; $errorMsg = "session $cx is not active"; return 0; } # cxActive # Switch to another editing session. # This assumes cxCompare has succeeded - we're moving to a different context. # Pass the context number and an interactive flag. sub cxSwitch($$) { my ($cx, $ia) = @_; # Put the variables in a known start state if this is a virgin session. cxReset($cx, 0) if ! defined $factive[$cx]; $dot[$context] = $dot, $dot = $dot[$cx]; $dol[$context] = $dol, $dol = $dol[$cx]; $fname[$context] = $fname, $fname = $fname[$cx]; $dirname[$context] = $dirname, $dirname = $dirname[$cx]; $map[$context] = $map, $map = $map[$cx]; $labels[$context] = $labels, $labels = $labels[$cx]; $btags = $btags[$cx]; $backup[$context] = $backup, $backup = $backup[$cx]; if(!$global_lhs_rhs) { $savelhs[$context] = $savelhs, $savelhs = $savelhs[$cx]; $saverhs[$context] = $saverhs, $saverhs = $saverhs[$cx]; } $fmode[$context] = $fmode, $fmode = $fmode[$cx]; # But we don't replicate the last* variables per context, # so your ability to undo is destroyed if you switch contexts. $fmode &= ~$firstopmode; if($ia) { if(defined $factive[$cx]) { print ((length($fname[$cx]) ? $fname[$cx] : "no file")."\n"); } else { print "new session\n"; } } $factive[$cx] = 1; $context = $cx; return 1; } # cxSwitch # Can we trash the data in a context? # If so, trash it, and reset all the variables. # The second parameter is a close directive. # If nonzero, we clear out empty buffers associated with # text areas in the fill-out forms (browse mode). # A value of 1, as opposed to 2, means close down the entire session. sub cxReset($$) { my ($cx, $close) = @_; if(defined $factive[$cx]) { # We might be trashing data, make sure that's ok. $fname[$cx] = $fname, $fmode[$cx] = $fmode if $cx == $context; if($fmode[$cx]&$changemode and !( $fmode[$cx]&$dirmode) and $lastq != $cx and length $fname[$cx] and ! is_url($fname[$cx])) { $errorMsg = "expecting `w'"; $lastqq = $cx; if($cx != $context) { ++$cx; $errorMsg .= " on session $cx"; } return 0; } # warning message if($close) { dropEmptyBuffers(); if($close&1) { # And we're closing this session. $factive[$cx] = undef; $backup[$cx] = undef; } } } # session was active # reset the variables $dot[$cx] = $dol[$cx] = 0; $map[$cx] = $lnspace; $fname[$cx] = ""; $dirname[$cx] = ""; $labels[$cx] = $lnspace x 26; $btags[$cx] = []; $savelhs[$cx] = $saverhs[$cx] = undef; $fmode[$cx] = 0; if($cx == $context) { $dot = $dol = 0; $map = $map[$cx]; $fname = ""; $labels = $labels[$cx]; $btags = $btags[$cx]; $global_lhs_rhs or $savelhs = $saverhs = undef; $fmode = 0; } # current context return 1; } # cxReset # Pack all the information about the current context into a hash. # This will be pushed onto a virtual stack. # When you enter the back key, it all gets unpacked again, # to restore your session. sub cxPack() { my $h = { dot =>$dot, dol => $dol, map => $map, labels => $labels, lastdot =>$lastdot, lastdol => $lastdol, lastmap => $lastmap, lastlabels => $lastlabels, fname => $fname, dirname => $dirname, fmode => $fmode&~$changemode, savelhs => $savelhs, saverhs => $saverhs, btags => $btags, }; return $h; } # cxPack sub cxUnpack($) { my $h = shift; return if ! defined $h; $dot = $$h{dot}; $lastdot = $$h{lastdot}; $dol = $$h{dol}; $lastdol = $$h{lastdol}; $map = $$h{map}; $lastmap = $$h{lastmap}; $labels = $$h{labels}; $lastlabels = $$h{lastlabels}; $fmode = $$h{fmode}; $fname = $$h{fname}; $dirname = $$h{dirname}; if(!$global_lhs_rhs) { $savelhs = $$h{savelhs}; $saverhs = $$h{saverhs}; } $btags[$context] = $btags = $$h{btags}; } # cxUnpack # find an available session and load it with some initial data. # Returns the context number. sub cxCreate($$) { my ($text_ptr, $filename) = @_; # Look for an unused buffer my ($cx, $j); for($cx=0; $cx<=$#factive; ++$cx) { last unless defined $factive[$cx]; } cxReset($cx, 0); $factive[$cx] = 1; $fname[$cx] = $filename; my $bincount = $$text_ptr =~ y/\0\x80-\xff/\0\x80-\xff/; if($bincount*4 - 10 < length $$text_ptr) { # A text file - remove crlf in the dos world. $$text_ptr =~ s/\r\n/\n/g if $doslike; } else { $fmode[$cx] |= $binmode; } $fmode[$cx] |= $nlmode unless $$text_ptr =~ s/\n$//; $j = $#text; if(length $$text_ptr) { push @text, split "\n", $$text_ptr, -1; } if(!lineLimit(0)) { my $newpiece = $lnspace; ++$dol[$cx], $newpiece .= sprintf($lnformat, $j) while ++$j <= $#text; $map[$cx] = $newpiece; $dot[$cx] = $dol[$cx]; } else { warn $errorMsg; } return $cx; } # cxCreate # See if @text is too big. # Pass the number of lines we will be adding. sub lineLimit($) { my $more = shift; return 0 if $#text + $more <= $lnmax; $errorMsg = "Your limit of 1 million lines has been reached.\nSave your files, then exit and restart this program."; return 1; } # lineLimit # Hide and reveal numbers that are internal to the line. # These numbers indicate links and input fields, and are not displayed by the next routine. sub hideNumber($) { my $n = shift; $n =~ y/0-9/\x85-\x8e/; return $n; } # hideNumber sub revealNumber($) { my $n = shift; $n =~ y/\x85-\x8f/0-9/; return $n; } # revealNumber sub removeHiddenNumbers($) { my $t = shift; $$t =~ s/\x80[\x85-\x8f]+([<>{])/$1/g; $$t =~ s/\x80[\x85-\x8f]+\*//g; } # removeHiddenNumbers # Small helper function to retrieve the text for line number n. # If the second parameter is set, hidden numbers are left in place; # otherwise they are stripped out via removeHiddenNumbers(). sub fetchLine($$) { my $n = shift; my $show = shift; return "" unless $n; # should never happen my $t = $text[substr($map, $n*$lnwidth, $lnwidth1)]; removeHiddenNumbers(\$t) if $show and $fmode&$browsemode; return $t; } # fetchLine # Here's the same function, but for another context. sub fetchLineContext($$$) { my $n = shift; my $show = shift; my $cx = shift; $t = $text[substr($map[$cx], $n*$lnwidth, $lnwidth1)]; removeHiddenNumbers(\$t) if $show and $fmode[$cx]&$browsemode; return $t; } # fetchLineContext # Print size of the text in buffer. sub apparentSize() { my $j = 0; $j += length(fetchLine($_, 1)) + 1 foreach (1..$dol); --$j if $fmode&$nlmode; print "$j\n"; } # apparentSize # Read a line from stdin. # Could be a command, could be text going into the buffer. sub readLine() { my ($i, $j, $c, $d, $line); getline: { $intFlag = 0; $do_input = 1; $line = ; $do_input = 0; redo getline if $intFlag and ! defined $line; # interrupt $intFlag = 0; } exit 0 unless defined $line; # EOF $line =~ s/\n$//; # A bug in my keyboard causes nulls to be entered from time to time. $line =~ s/\0/ /g; return $line if $line !~ /~/; # shortcut # We have to process it, character by character. my $line2 = ""; for($i=0; $i= 16; my $val = $j*16; $d = substr $line, $i+2, 1; $j = index $hexChars, $d; next if $j < 0; $j -= 6 if $j >= 16; $val += $j; # We don't use this mechanism to enter normal ascii characters. next if $val >= 32 and $val < 127; # And don't stick a newline in the middle of an entered line. next if $val == 10; $c = chr $val; $i += 2; } # loop over input chars return $line2; } # readLine # Read a block of lines into the buffer. sub readLines() { my $tbuf = ""; # Put the pending line in first, if it's there. my $line = $linePending; $line = readLine() unless defined $line; while($line ne ".") { $tbuf .= "$line\n"; $line = readLine(); } # loop gathering input lines return addTextToSession(\$tbuf) if length $tbuf; $dot = $endRange; $dot = 1 if $dot == 0 and $dol; return 1; } # readLines # Display a line. Show line number if $cmd is n. # Expand binary characters if $cmd is l. # Pass the line number. sub dispLine($) { my $ln = shift; print "$ln " if $cmd eq 'n'; my $line = fetchLine($ln, 1); # Truncate, if the line is pathologically long. $line = substr($line, 0, 500) . "..." if length($line) > 500; print '^' if $endmarks and ($endmarks == 2 or $cmd eq 'l'); if($cmd eq 'l') { $line =~ y/\10\11/<>/; $line =~ s/([\0-\x1f\x80-\xff])/sprintf("~%02x",ord($1))/ge; } else { # But we always remap return, null, and escape $line =~ s/(\00|\r|\x1b)/sprintf("~%02x",ord($1))/ge; } print $line; print dirSuffix($ln); print '$' if $endmarks and ($endmarks == 2 or $cmd eq 'l'); print "\n"; } # dispLine # If we've printed a line in directory mode, and the entry isn't # a regular file, we've got to find and print the special character at the end. # / means directory, for example. # This is used by the previous routine, among others. sub dirSuffix($) { my $ln = shift; my $suf = ""; if($fmode&$dirmode) { $suf = substr($labels, $dirSufStart + 2*$ln, 2); $suf =~ s/ +$//; } return $suf; } # dirSuffix # Routines to help format a string, i.e. cut at sentence boundaries. # This isn't real smart; it will happily split Mr. Flintstone. sub appendWhiteSpace($$) { my($chunk, $breakable) = @_; my $nlc = $chunk =~ y/\n//d; # newline count if($breakable) { # Don't interrogate the last few characters of a huge string -- that's inefficient. my $short = substr $refbuf, -2; my $l = length $refbuf; $lperiod = $colno, $idxperiod = $l if $short =~ /[.!?:][)"|}]?$/; $lcomma = $colno, $idxcomma = $l if $short =~ /[-,;][)"|]?$/; $lright = $colno, $idxright = $l if $short =~ /[)"|]$/; $lany = $colno, $idxany = $l; # Tack short fragment onto previous long line. if($longcut and ($nlc or $lperiod == $colno) and $colno <= 14) { substr($refbuf, $longcut, 1) = " "; $chunk = "", $nlc = 1 unless $nlc; } # pasting small fragment onto previous line } # allowing line breaks $nlc = 0 if $lspace == 3; if($nlc) { $nlc = 1 if $lspace == 2; $refbuf .= "\n"; $refbuf .= "\n" if $nlc > 1; $colno = 1; $longcut = $lperiod = $lcomma = $lright = $lany = 0; $lspace = 3 if $lspace >= 2 or $nlc > 1; $lspace = 2 if $lspace < 2; } $refbuf .= $chunk; $lspace = 1 if length $chunk; $colno += $chunk =~ y/ / /; $colno += 4 * ($chunk =~ y/\t/\t/); } # appendWhiteSpace sub appendPrintable($) { my $chunk = shift; $refbuf .= $chunk; $colno += length $chunk; $lspace = 0; return if $colno <= $optimalLine; # Oops, line is getting long. Let's see where we can cut it. my ($i, $j) = (0, 0); if($lperiod > $cutLineAfter) { $i = $lperiod, $j = $idxperiod; } elsif($lcomma > $cutLineAfter) { $i = $lcomma, $j = $idxcomma; } elsif($lright > $cutLineAfter) { $i = $lright, $j = $idxright; } elsif($lany > $cutLineAfter) { $i = $lany, $j = $idxany; } return unless $j; # nothing we can do about it $longcut = 0; $longcut = $j if $i != $lperiod; substr($refbuf, $j, 1) = "\n"; $colno -= $i; $lperiod -= $i; $lcomma -= $i; $lright -= $i; $lany -= $i; } # appendPrintable # Break up a line using the above routines. sub breakLine($) { my $t = shift; my $ud = $$t =~ s/\r$//; if($lspace eq "2l") { $$t =~ s/^/\r/ if length $$t; $lspace = 2; } $$t =~ s/^/\r/ if length $$t > $paraLine; my $rc = $$t =~ y/\r/\n/; $ud |= $$t =~ s/[ \t]+$//gm; $ud |= $$t =~ s/([^ \t\n])[ \t]{2,}/$1 /g; $ud |= $$t =~ s/([^ \t\n])\t/$1 /g; $ud |= $$t =~ s/ +\t/\t/g; $lspace = 2 if $lspace < 2; # should never happen $lspace = 3 unless length $$t; return $ud if ! $rc and length $$t < $optimalLine; $rc |= $ud; # The following 120 comes from $paraLine. $$t =~ s/(\n.{120})/\n$1/g; $$t =~ s/(.{120,}\n)/$1\n/g; $refbuf = ""; $colno = 1; $longcut = $lperiod = $lcomma = $lright = $lany = 0; while($$t =~ /(\s+|[^\s]+)/g) { my $chunk = $1; if($chunk =~ /\s/) { appendWhiteSpace($chunk, 1); } else { appendPrintable($chunk); } } if($lspace < 2) { # line didn't have a \r at the end # We might want to paste the last word back on. appendWhiteSpace("\n", 1); chop $refbuf; } $rc = 1 if $refbuf =~ /\n/; return 0 unless $rc; $$t = $refbuf; $lspace = "2l" if length $refbuf > $paraLine; return 1; } # breakLine # Check the syntax of a regular expression, before we pass it to perl. # If perl doesn't like it, it dies, and you've lost your edits. # The first char is the delimiter -- we stop at the next delimiter. # The regexp, up to the second delimiter, is returned, # along with the remainder of the string in the second return variable. # return (regexp, remainder), or return () if there is a problem. # As usual, $errorMsg will be set. # Pass the line containing the regexp, and a flag indicating # left or right side of a substitute. sub regexpCheck($$) { my ($line, $isleft) = @_; my ($c, $d); # We wouldn't be here if the line was empty. my $delim = substr $line, 0, 1; index($valid_delim, $delim) >= 0 or $errorMsg = "invalid delimiter $delim", return (); $line = substr $line, 1; # remove lead delimiter # Remember whether a character is "on deck", ready to be modified by * etc. my $ondeck = 0; my $offdeck = ' '; my $exp = ""; my $cc = 0; # in character class my $paren = 0; # nested parentheses while(length $line) { $c = substr $line, 0, 1; if($c eq '\\') { $errorMsg = "line ends in backslash", return () if length($line) == 1; $d = substr $line, 1, 1; $ondeck = 1; $offdeck = ' '; # I can't think of any reason to remove the escape \ from any character, # except ()|, where we reverse the sense of escape, # and \& on the right, which becomes &. if(index("()|", $d) >= 0 and ! $cc and $isleft) { $ondeck = 0, ++$paren if $c eq '('; --$paren if $c eq ')'; $errorMsg = "Unexpected closing )", return () if $paren < 0; $c = ''; } $c = '' if $d eq '&' and ! $isleft; $exp .= "$c$d"; $line = substr $line, 2; next; } # escape character # Break out if you've hit the delimiter $paren or $c ne $delim or last; # Not the delimiter, I'll assume I can copy it over to $exp. # But I have to watch out for slash, which is *my* delimiter. $exp .= '\\' if $c eq '/'; # Then there's ()|, which I am reversing the sense of escape. $exp .= '\\' if index("()|", $c) >= 0 and $isleft; # Sometimes $ is interpolated when I don't want it to be. # Even if there is no alphanumeric following, a bare $ seems to cause trouble. # Escape it, unless followed by delimiter, or digit (rhs). if($c eq '$') { $exp .= '\\' if $isleft and length($line) > 1 and substr($line, 1, 1) ne $delim; $exp .= '\\' if ! $isleft and $line !~ /^\$\d/; } if($c eq '^') { $exp .= '\\' if $isleft and $cc != length $exp; } # And we have to escape every @, to avoid interpolation. # Good thing we don't have to escape %, # or it might mess up our % remembered rhs logic. $exp .= '\\' if $c eq '@'; # Turn & into $& $exp .= '$' if $c eq '&' and ! $isleft; # Finally push the character. $exp .= $c; $line = substr $line, 1; # Are there any syntax checks I need to make on the rhs? # I don't think so. next if ! $isleft; if($cc) { # character class # All that matters here is the ] $cc = 0 if $c eq ']'; next; } # Modifiers must have a preceding character. # Except ? which can reduce the greediness of the others. if($c eq '?' and $offdeck ne '?') { $ondeck = 0; $offdeck = '?'; next; } if(index("?+*", $c) >= 0 or $c eq '{' and $line =~ s/^(\d+,?\d*})//) { my $mod = ( $c eq '{' ? "{$1" : $c); $errorMsg = "$mod modifier has no preceding character", return () if ! $ondeck; $ondeck = 0; $offdeck = $c; $exp .= "$1" if $c eq '{'; next; } # modifier $ondeck = 1; $offdeck = ' '; $cc = length $exp if $c eq '['; } # loop over chars in the pattern $cc == 0 or $errorMsg = "no closing ]", return (); $paren == 0 or $errorMsg = "no closing )", return (); if(! length $exp and $isleft) { $exp = $savelhs; $errorMsg = "no remembered search string", return () if ! defined $exp; } $savelhs = $exp if $isleft; if(! $isleft) { if($exp eq '%') { $exp = $saverhs; $errorMsg = "no remembered replacement string", return () if ! defined $exp; } elsif($exp eq '\\%') { $exp = '%'; } $saverhs = $exp; } # rhs return ($exp, $line); } # regexpCheck # Get the start or end of a range. # Pass the line containing the address. sub getRangePart($) { my $line = shift; my $ln = $dot; if($line =~ s/^(\d+)//) { $ln = $1; } elsif($line =~ s/^\.//) { # $ln is already set to dot } elsif($line =~ s/^\$//) { $ln = $dol; } elsif($line =~ s/^'([a-z])//) { $ln = substr $labels, (ord($1) - ord('a'))*$lnwidth, $lnwidth; $errorMsg = "label $1 not set", return () if $ln eq $lnspace; } elsif($line =~ m:^([/?]):) { $errorMsg = "search string not found", return () if $dot == 0; my $delim = $1; my @pieces = regexpCheck($line, 1); return () if $#pieces < 0; my $exp = $pieces[0]; $line = $pieces[1]; my $icase = ""; # case independent $icase = "i" if $caseInsensitive; if($delim eq substr $line, 0, 1) { $line = substr $line, 1; if('i' eq substr $line, 0, 1) { $line = substr $line, 1; $icase = 'i'; } } my $incr = ($delim eq '/' ? 1 : -1); # Recompile the regexp after each command, but don't compile it on every line. # Is there a better way to do this, besides using eval? my $notfound = 0; eval ' while(1) { $ln += $incr; $ln = 1 if $ln > $dol; $ln = $dol if $ln == 0; last if fetchLine($ln, 1) =~ ' . "/$exp/o$icase; " . '$notfound = 1, last if $ln == $dot; } # looking for match '; # end evaluated string $errorMsg = "search string not found", return () if $notfound; } # search pattern # Now add or subtract from this base line number while($line =~ s/^([+-])(\d*)//) { my $add = ($2 eq "" ? 1 : $2); $ln += ($1 eq '+' ? $add : -$add); } $errorMsg = "line number too large", return () if $ln > $dol; $errorMsg = "negative line number", return () if $ln < 0; return ($ln, $line); } # getRangePart # Read the data as a string from a url. # Data is retrieved using http, https, or ftp. # Parameters: url, post data, result buffer. # You can return 0 (failure) and leave text and the buffer, # and I'll report the error, and still assimilate the buffer. sub readUrl($$$) { my ($filename, $post, $tbuf) = @_; my $rc = 1; # return code, success $lfsz = 0; # local file size my $rsize = 0; # size read my $weburl; my $scheme; my $encoding = ""; my $pagetype = ""; my %url_desc = (); # Description of the current URL # I don't know if we need a full url encode or what?? # This is a major kludge! I just don't understand this. $filename =~ s/ /%20/g; $filename =~ s/[\t\r\n]//g; # I don't know what http://foo@this.that.com/file.htm means, # but I see it all the time. $filename =~ s,^http://[^/]*@,http://,i; $$tbuf = ""; # start with a clear buffer $errorMsg = "too many nested frames", return 0 unless $rerouteCount; --$rerouteCount; # split into machine, file, and post parameters separate: { my $oldname = $filename; # remember where we started my $authinfo = ""; # login password for web sites that return error 401 $scheme = is_url $filename; # scheme could have changed $weburl = 0; $weburl = 1 if $scheme =~ /^https?$/; if(!length $post and $filename =~ s/^(.*?)(\?.*)$/$1/ ) { $post = $2; } # $post should be url encoded, but sometimes it's not, and I don't know why. $post =~ y/ /+/; my $postfilename = ""; # We assume $post starts with ? or *, if it is present at all. my $meth = "GET"; my $postapplic = ""; if(substr($post, 0, 1) eq '*') { $meth = "POST"; } else { $postfilename = $post; } print "$meth: $post\n" if $debug >= 2; $filename =~ s,^$scheme://,,i; my $serverPort = 80; $serverPort = 443 if $scheme eq 'https'; $serverPort = 21 if $scheme eq 'ftp'; $serverPort = 23 if $scheme eq 'telnet'; my $serverPortString = ""; my $server = $filename; $server =~ s,/.*,,; # Sometimes we need to do this -- got me hanging! $server =~ s/%([0-9a-fA-F]{2})/chr hex "$1"/ge; if($server =~ s/:(\d+)$//) { $serverPort = $1; } # If a server is on port 443, assume it speaks SSL. # This is a real bastardization of the html standard, # but it's the explorer standard. Need I say more? $scheme = 'https' if$serverPort == 443; $serverPortString = ":$serverPort" if $serverPort != 80; $filename =~ s,^[^/]*,,; # Lots of http servers can't handle /./ or /../ or // $filename =~ s:/{2,}:/:g; # Oops, put internal http:// back the way it was. # The bug is caused by a line like this. #

# Because it's post, the get parameters after the ? are still here. # And I just turned http:// into http:/ # This is very rare, but it happened to me, so I'm trying to fix it. $filename =~ s,http:/,http://,gi; $filename =~ s,ftp:/,ftp://,gi; $filename =~ s:^/(\.{1,2}/)+:/:; $filename =~ s:/(\./)+:/:g; 1 while $filename =~ s:/[^/]+/\.\./:/:; $filename =~ s:^/(\.\./)+:/:; # Ok, create some more variables so we either fetch this file # or convert it if it's pdf. # Too bad I did all this work, and the pdf converter doesn't work for crap. # Probably because pdf is irreparably inaccessible. # Thanks a lot adobe! my $go_server = $server; my $go_port = $serverPort; my $go_portString = $serverPortString; my $go_file = $filename; my $go_post = $post; my $go_postfilename = $postfilename; my $go_meth = $meth; if($filename =~ /\.pdf$/ and $pdf_convert) { ($meth eq "GET" and $scheme eq "http") or $errorMsg = "online conversion from pdf to html only works when the pdf file is accessed via the http get method\ntype pr to download pdf in raw mode", return 0; $go_server="access.adobe.com"; $go_port = 80; $go_portString = ""; $go_file = "/perl/convertPDF.pl"; # It would be simpler if this bloody form wer get, but it's post. $go_meth = "POST"; $go_post = "http://$server$serverPortString$filename$postfilename"; $go_post = "*submit=submit&url=" . urlEncode($go_post); $go_postfilename = ""; } # redirecting to adobe to convert pdf if($go_meth eq "POST") { $postapplic = "Pragma: no-cache$eol" . "Cache-Control: no-cache$eol" . "Content-Type: application/x-www-form-urlencoded$eol" . "Content-Length: " . (length($go_post)-1) . $eol; } my $newname = ""; $authAttempt = 0; makeconnect: { my $chunk; $lfsz = 0; $$tbuf = ""; $go_file = "/" if ! length $go_file; %url_desc = (SCHEME => $scheme, SERVER => $go_server, PORT => $go_port, PATH => $go_file, method => $go_meth); $url_desc{content} = substr($go_post, 1) if length $go_post; # Kinda silly. # If you're using digest authentication with the POST method, # the content needs to be digestified. # This is for message integrity checking, when that option is used. # Consider completely replacing $go_x variables with elements of the %url_desc # hash? There is massive redundancy here. my $domainCookies = ""; $domainCookies = fetchCookies(\%url_desc) if $allowCookies; # Grab the cookies. my $send_server = # Send this to the http server - maybe via SSL "$go_meth $go_file$go_postfilename HTTP/1.0$eol" . # Do we need $go_portString here??? # If I put it in, paypal doesn't work. "Host: $go_server$eol" . (length $referer ? "Referer: $referer$eol" : "") . $domainCookies . $authinfo . "Accept: text/*, audio/*, image/*, application/*, message/*$eol" . "Accept: audio-file, postscript-file, mail-file, default, */*;q=0.01$eol" . "Accept-Encoding: gzip, compress$eol" . "Accept-Language: en$eol" . "User-Agent: $agent$eol" . $postapplic . $eol; # blank line at the end # send data after if post method $send_server .= substr($go_post, 1) if $go_meth eq "POST"; if($debug >= 4) { my $temp_server = $send_server; $temp_server =~ y/\r//d; print $temp_server; } if($scheme eq 'http') { # Connect to the http server. my $iaddr = inet_aton($go_server) or $errorMsg = "cannot identify $go_server on the network", return 0; my $paddr = sockaddr_in($go_port, $iaddr); my $proto = getprotobyname('tcp'); socket(FH, PF_INET, SOCK_STREAM, $proto) or $errorMsg = "cannot allocate a socket", return 0; connect(FH, $paddr) or $errorMsg = "cannot connect to $go_server", return 0; FH->autoflush(1); print FH $send_server; # Send the HTTP request message # Now retrieve the page and update the user after every 100K of data. my $last_fk = 0; STDOUT->autoflush(1) if ! $doslike; while(defined($rsize = sysread FH, $chunk, 100000)) { print "sockread $rsize\n" if $debug >= 5; $$tbuf .= $chunk; $lfsz += $rsize; last if $rsize == 0; my $fk = int($lfsz/100000); if($fk > $last_fk) { print "."; $last_fk = $fk; } last if $lfsz >= $maxfile; } close FH; print "\n" if $last_fk; STDOUT->autoflush(0) if ! $doslike; $lfsz <= $maxfile or $errorMsg = "file is too large, limit 40MB", return 0; defined $rsize or $$tbuf = "", $errorMsg = "error reading data from the socket", return 0; } elsif ($scheme eq 'https') { $lfsz = do_ssl($go_server, $go_port, $send_server, $tbuf); Net::SSLeay::free($ssl) if defined $ssl; Net::SSLeay::CTX_free($ctx) if defined $ctx; return 0 unless $lfsz; } elsif ($scheme eq 'ftp') { $lfsz = ftp_connect($go_server, $go_port, $go_file, $tbuf); return 0 unless $lfsz; } elsif ($scheme eq "telnet") { if($go_server =~ s/^([^:@]*):([^:@]*)@//) { print "This URL gives a suggested username of $1 and password of $2\n" . "to be used with the telnet connection you are about to establish.\n"; # See RFC 1738, section 3.8. The username and password in a telnet URL # are advisory. There is no standard method of logging into telnet services. # I guess this is especially useful for public services, which offer guest accounts and such. } print "Starting telnet.\n"; system("telnet $go_server $go_port"); return 1; } else { $errorMsg = "this browser cannot access $scheme URLs.", return 0; } # We got the web page. # But it might be a redirection to another url. if($weburl and $$tbuf =~ /^http\/[\d.]+ 30[12]/i) { if($$tbuf =~ /\nlocation:[ \t]+(.*[^\s])[ \t\r]*\n/i) { $newname = $1; print "relocate $newname\n" if $debug >= 2; }} if($rc and ! length $newname and # Some web sites serve up pages with no headers at all! # aspace.whizy.com/forum/ultimate.cgi $$tbuf =~ /^http/i and $$tbuf =~ /^http\/[\d.]+ 404 /i) { $errorMsg = "file not found on the remote server"; $rc = 0; } # not found # there is yet another way to redirect to a url if($rc and $$tbuf =~ /]*(url=|\d+;)['"]?([^'">\s]+)/i) { $newname = $2; print "refresh $newname\n" if $debug >= 2; # This is almost always an absolute url, even without the http prefix, # but sometimes it's relative. Got me hanging! # Here's a looser criterion for web url. if($newname =~ /^[\w,-]+\.[\w,-]+\.[\w,-]/) { $newname = "http://$newname"; } } # Extract information from the http header - primarily cookies. $encoding = $pagetype = ""; if($$tbuf =~ s/^(http\/\d.*?\r?\n\r?\n)//si) { my $header = $1; my @lines = split "\n", $header; open BFH, ">>$ebhttp"; if(defined BFH) { print BFH $header; close BFH; } $authinfo = ""; while(my $hline = shift @lines) { $hline =~ s/\r$//; print "$hline\n" if $debug >= 4; setCookies($hline, \%url_desc) if $hline =~ /^Set-Cookie:/i and $allowCookies; $authinfo = parseWWWAuth($hline, \%url_desc) if $hline =~ /^WWW-Authenticate/i; return 0 if $authinfo eq "x"; # I shouldn't really discard things like charset=blablabla, # but I don't really know what to do with it anyways. $hline =~s/;.*//; $encoding = lc $1 if $hline =~ /^content-encoding:\s+['"]?(\w+)['"]?\s*$/i; $pagetype = lc $1 if $hline =~ /^content-type:\s+['"]?([^\s'"]+)['"]?\s*$/i; } # loop over lines ++$authAttempt, redo makeconnect if length $authinfo; } else { # http header extracted if($scheme =~ /^https?$/) { $errorMsg = "http response doesn't have a head-body structure"; $rc = 0; } else { # For now, this means ftp. # We could have retrieved an html page via ftp, but probably not. # Turn off browse command. $cmd = 'e' unless $$tbuf =~ /^<[hH!]/; } } } # makeconnect # cookies that are set via http-equiv # The content of the cookie must be quoted. while($$tbuf =~ /= 2; if($newname ne $oldname) { # It's not really diferent if one has :80 and the other doesn't. # I wouldn't code this up if it didn't really happen. See www.claritin.com $oldname =~ s,^HTTP://,http://,; $oldname =~ s,^(http://)?([^/]*):80/,$1$2/,; $oldname =~ s,^(http://)?([^/]*):80$,$1$2,; $newname =~ s,^HTTP://,http://,; $newname =~ s,^(http://)?([^/]*):80/,$1$2/,; $newname =~ s,^(http://)?([^/]*):80$,$1$2,; if($oldname ne $newname) { if(--$rerouteCount) { print "$newname\n" if $debug >= 1; # Post method becomes get after redirection, I think. # $post = "" if length $post and $newname =~ /\?[^\/]*$/; $post = ""; $filename = $newname; redo separate; } $errorMsg = "too many url redirections"; $rc = 0; }}} # automatic url redirection $changeFname = "$scheme://$server$serverPortString$filename$postfilename"; } # separate # Check for complressed data. if($rc and $lfsz and length $encoding and $pagetype =~ /^text/i) { print "$lfsz\ndecoding $encoding\n" if $debug >= 2; my $program = ""; my $csuf = ""; # compression suffix $program = "zcat", $csuf = "gz" if $encoding eq "gzip"; $program = "zcat", $csuf = "Z" if $encoding eq "compress"; length $program or $errorMsg = "unrecognized compression method", return 0; $cfn = "$ebtmp.$csuf"; # compressed file name open FH, ">$cfn" or $errorMsg = "cannot create temp file $cfn", return 0; binmode FH, ':raw' if $doslike; print FH $$tbuf or $errorMsg = "cannot write to temp file $cfn", return 0; close FH; unlink $ebtmp; if(! system "$program $ebtmp.$csuf >$ebtmp 2>/dev/null") { # There are web pages out there that are improperly compressed. # We'll call it good if we got any data at all. $errorMsg = "could not uncompress the data", return 0 unless (stat($ebtmp))[7]; } # Read in the uncompressed data. $$tbuf = ""; open FH, $ebtmp or $errorMsg = "cannot open the uncompressed file $ebtmp", return 0; $lfsz = (stat(FH))[7]; $lfsz <= $maxfile or $errorMsg = "uncompressed file is too large, limit 40MB", close FH, return 0; binmode FH, ':raw' if $doslike; $rsize = sysread FH, $$tbuf, $lfsz; close FH; $rsize and $rsize == $lfsz or $errorMsg = "cannot read the uncompressed data from $ebtmp", return 0; unlink $ebtmp; } # compressed data if($rc and $fetchFrames) { $errorMsg = ""; # This really isn't right - to do this here I mean. # If a line of javascript happens to contain a frame tag # I'm going to fetch that frame and put it in right here. # Hopefully that won't happen. # Note that the entire frame tag must be on one line. $$tbuf =~ s/(\0\x80-\xff]+>)/readFrame($1)/gei; $rc = 0 if length $errorMsg; } # looking for frames return $rc; } # readUrl # Read a frame. sub readFrame($) { my $tag = shift; my $saveFname = $changeFname; my($tc, $fbuf, $src, $name); $tag =~ s/\bsrc *= */src=/gi; $tag =~ s/\bname *= */name=/gi; $tc = $tag; if($tc =~ s/^.*\bsrc=//s) { $src = $tc; $src =~ s/ .*//s; $src =~ s/^['"]//; $src =~ s/['"]?>?$//; if(length $src) { print "fetch frame $src\n" if $debug >= 1; $src = resolveUrl($saveFname, $src); if($didFrame{$src}) { print "already fetched\n" if $debug >= 2; $changeFname = $saveFname; return ""; } $didFrame{$src} = 1; print "* $src\n" if $debug >= 1; $name = ""; $tc = $tag; if($tc =~ s/^.*\bname=//s) { $tc =~ s/ .*//s; $tc =~ s/^['"]//; $tc =~ s/['"]?>?$//; $name = urlDecode $tc if length $tc; } # name attribute if(readUrl($src, "", \$fbuf)) { # Replace the tag with the data, and some stuff prepended. $name = " $name" if length $name; $tag = "

Frame$name:

\n\n"; $changeFname = $saveFname; return $tag.$fbuf; } # frame read successfully }} # src attribute present $changeFname = $saveFname; return $tag; } # readFrame # Adjust the map of line numbers -- we have inserted text. # Also shift the downstream labels. # Pass the string containing the new line numbers, and the dest line number. sub addToMap($$) { my ($newpiece, $dln) = @_; my $offset = length($newpiece)/$lnwidth; $offset > 0 or die "negative offset in addToMap"; my ($i, $j); foreach $i (0..25) { my $ln = substr($labels, $i*$lnwidth, $lnwidth); # line number next if $ln eq $lnspace or $ln <= $dln; substr($labels, $i*$lnwidth, $lnwidth) = sprintf($lnformat, $ln + $offset); } # loop over 26 labels $j = ($dln+1) * $lnwidth; substr($map, $j, 0) = $newpiece; $dot = $dln + $offset; $dol += $offset; $fmode |= $changemode|$firstopmode; $ubackup = 1; } # addToMap # Fold in the text buffer (parameter) at $endRange (global variable). # Assumes the text has the last newline on it. sub addTextToSession($) { my $tbuf = shift; # text buffer return 1 unless length $$tbuf; $fmode &= ~$nlmode if $endRange == $dol; if(not $$tbuf =~ s/\n$// and $endRange == $dol) { $fmode |= $nlmode; print "no trailing newline\n" if ! ($fmode&$binmode) and $cmd ne 'b'; } # missing newline my $j = $#text; my $newpiece = ""; # At this point $tbuf could be empty, whence split doesn't work properly. # This only happens when reading a file containing one blank line. if(length $$tbuf) { push @text, split "\n", $$tbuf, -1; } else { push @text, ""; } $#text = $j, return 0 if lineLimit 0; $newpiece .= sprintf($lnformat, $j) while ++$j <= $#text; addToMap($newpiece, $endRange); return 1; } # addTextToSession # Read a file into memory. # As described earlier, the lines are appended to @text. # Then the indexes for those lines are pasted into $map, # using addToMap(). # Check to see if the data is binary, and set $fmode accordingly. # Parameters are the filename or URL, and the post data (for URLs). sub readFile($$) { my ($filename, $post) = @_; my $tbuf; # text buffer my $rc = 1; # return code, success $filesize = 0; my $rsize = 0; # size read my $j; if(is_url $filename) { $rerouteCount = 24; %didFrame = (); $rc = readUrl($filename, $post, \$tbuf); $filesize = length $tbuf; return 0 unless $rc + $filesize; } else { # url or file open FH, "<$filename" or $errorMsg = "cannot open $filename, $!", return 0; # Check for directory here if(-d FH) { close FH; $j = $filename; $j =~ s,/$,,; $j .= "/*"; my @dirlist; if($j =~ / /) { @dirlist = glob '"'.$j.'"'; } else { @dirlist = glob $j; } if($#dirlist < 0) { $dot = $endRange; $filesize = 0; return $rc; } # empty directory $dirname = $j; $dirname =~ s/..$//; # get rid of /* return 0 if lineLimit($#dirlist + 1); $filesize = 0; $tbuf = ""; $j = $dirSufStart; substr($labels, $j, 2) = " "; foreach (@dirlist) { my $entry = $_; $entry =~ s,.*/,,; # leave only the file $entry =~ s/\n/\t/g; my $suf = ""; $suf .= '@' if -l; if(! -f) { $suf .= '/' if -d; $suf .= '|' if -p; $suf .= '*' if -b; $suf .= '<' if -c; $suf .= '^' if -S; } # not a regular file $filesize += length($entry) + length($suf) + 1; if($dol) { $entry .= $suf; } else { $suf .= " "; $j += 2; substr($labels, $j, 2) = substr($suf, 0, 2); } $tbuf .= "$entry\n"; } $dol or $fmode = $dirmode, print "directory mode\n"; return addTextToSession(\$tbuf); } # directory -f FH or $errorMsg = "$filename is not a regular file", close FH, return 0; $filesize = (stat(FH))[7]; if(! $filesize) { close FH; $dot = $endRange; $filesize = 0; return $rc; } # empty file $filesize <= $maxfile or $errorMsg = "file is too large, limit 40MB", close FH, return 0; binmode FH, ':raw' if $doslike; $rsize = sysread(FH, $tbuf, $filesize) if $filesize; close FH; $rsize == $filesize or $errorMsg = "cannot read the contents of $filename,$!", return 0; } # reading url or regular file my $bincount = $tbuf =~ y/\0\x80-\xff/\0\x80-\xff/; if($bincount*4 - 10 < $filesize) { # A text file - remove crlf in the dos world. $tbuf =~ s/\r\n/\n/g if $doslike; } elsif(! ($fmode&$binmode)) { # If it wasn't before, it is now a binary file. print "binary data\n"; $fmode |= $binmode; } $rc &= addTextToSession(\$tbuf); return $rc; } # readFile # Write a range into a file. # Pass the mode and filename. sub writeFile($$) { my ($mode, $filename) = @_; $errorMsg = "cannot write to a url", return 0 if is_url($filename); $dol or $errorMsg = "writing an empty file", return 0; open FH, "$mode$filename" or $errorMsg = "cannot create $filename, $!", return 0; $filesize = 0; binmode FH, ':raw' if $doslike and $fmode&$binmode; if($startRange) { foreach my $i ($startRange..$endRange) { my $nl = ($fmode&$nlmode && $i == $dol ? "" : "\n"); my $suf = dirSuffix($i); my $outline = fetchLine($i, 1).$suf.$nl; print FH $outline or $errorMsg = "cannot write to $filename, $!", close FH, return 0; $filesize += length $outline; } # loop over range } # nonempty file close FH; # This is not an undoable operation, nor does it change data. # In fact the data is "no longer modified" if we have written all of it. $fmode &= ~$changemode if $dol == 0 or $startRange == 1 and $endRange == $dol; return 1; } # writeFile # Read from another context. # Pass the context number. sub readContext($) { my $cx = shift; cxCompare($cx) and cxActive($cx) or return 0; my $dolcx = $dol[$cx]; $filesize = 0; if($dolcx) { return 0 if lineLimit $dolcx; $fmode &= ~$nlmode if $endRange == $dol; my $newpiece = ""; foreach my $i (1..$dolcx) { my $inline = fetchLineContext($i, 1, $cx); my $suf = ""; if($fmode[$cx] & $dirmode) { $suf = substr($labels[$cx], $dirSufStart + 2*$i, 2); $suf =~ s/ +$//; } $inline .= $suf; push @text, $inline; $newpiece .= sprintf $lnformat, $#text; $filesize += length($inline) + 1; } # end loop copying lines addToMap($newpiece, $endRange); if($fmode[$cx]&$nlmode) { --$filesize; $fmode |= $nlmode if $endRange == $dol; } $fmode |= $binmode, print "binary data\n" if $fmode[$cx]&$binmode and ! ($fmode&$binmode); } # nonempty buffer return 1; } # readContext # Write to another context. # Pass the context number. sub writeContext($) { my $cx = shift; my $dolcx = $endRange - $startRange + 1; $dolcx = 0 if ! $startRange; return 0 if ! cxCompare($cx) or !cxReset($cx, 1) or lineLimit $dolcx; my $mapcx = $lnspace; $filesize = 0; if($startRange) { foreach my $i ($startRange..$endRange) { $outline = fetchLine($i, 0); $outline .= dirSuffix($i); push @text, $outline; $mapcx .= sprintf $lnformat, $#text; $filesize += length($outline) + 1; } # end loop copying lines $fmode[$cx] = $fmode & ($binmode|$browsemode); $fmode[$cx] |= $nlmode, --$filesize if $fmode&$nlmode and $endRange == $dol; } # nonempty file $map[$cx] = $mapcx; $dot[$cx] = $dol[$cx] = $dolcx; $factive[$cx] = 1; $fname[$cx] = ""; $btags[$cx] = $btags; return 1; } # writeContext # Move or copy a block of text. sub moveCopy() { $dest++; # more convenient $endr1 = $endRange+1; # more convenient $dest <= $startRange or $dest >= $endr1 or $errorMsg = "destination lies inside the block to be moved or copied", return 0; if($cmd eq 'm' and ($dest == $endr1 or $dest == $startRange)) { $errorMsg = "no change" if ! $inglob; return 0; } my $starti = $startRange*$lnwidth; my $endi = $endr1*$lnwidth; my $desti = $dest * $lnwidth; my $offset = $endr1 - $startRange; my ($i, $j); # The section of the map that represents the range. my $piece_r = substr $map, $starti, $endi-$starti; my $piece_n = ""; # the new line numbers, if the text is copied. if($cmd eq 't') { return 0 if lineLimit $offset; for($j=0; $j $dol; $fmode &= ~$nlmode if $endRange == $dol and $cmd eq 'm'; } # Now for the labels my ($lowcut, $highcut, $p2len); if($dest <= $startRange) { $lowcut = $dest; $highcut = $endr1; $p2len = $startRange - $dest; } else { $lowcut = $startRange; $highcut = $dest; $p2len = $dest - $endr1; } foreach $i (0..25) { my $ln = substr($labels, $i*$lnwidth, $lnwidth); # line number next if $ln eq $lnspace or $ln < $lowcut; if($ln >= $highcut) { $ln += $offset if $cmd eq 't'; } elsif($ln >= $startRange and $ln <= $endRange) { $ln += ($dest < $startRange ? -$p2len : $p2len) if $cmd eq 'm'; $ln += $offset if $cmd eq 't' and $dest < $startRange; } elsif($dest < $startRange) { $ln += $offset; } else { $ln -= $offset if $cmd eq 'm'; } substr($labels, $i*$lnwidth, $lnwidth) = sprintf $lnformat, $ln; } # loop over labels $dol += $offset if $cmd eq 't'; $dot = $endRange; $dot += ($dest < $startRange ? -$p2len : $p2len) if $cmd eq 'm'; $dot = $dest + $offset - 1 if $cmd eq 't'; $fmode |= $changemode|$firstopmode; $ubackup = 1; return 1; } # moveCopy # Delete a block of text. # Pass the range to delete. sub delText($$) { my ($sr, $er) = @_; # local start and end range my ($i, $j); $fmode &= ~$nlmode if $er == $dol; $j = $er - $sr + 1; substr($map, $sr*$lnwidth, $j*$lnwidth) = ""; # Move the labels. foreach $i (0..25) { my $ln = substr($labels, $i*$lnwidth, $lnwidth); # line number next if $ln eq $lnspace or $ln < $sr; substr($labels, $i*$lnwidth, $lnwidth) = ($ln <= $er ? $lnspace : (sprintf $lnformat, $ln - $j)); } # loop over labels $dol -= $j; $dot = $sr; --$dot if $dot > $dol; $fmode |= $changemode|$firstopmode; $ubackup = 1; return 1; } # delText # Delete files from a directory as you delete lines. # It actually moves them to your trash bin. sub delFiles() { $dw or $errorMsg = "directories are readonly, type dw to enable directory writes", return 0; $dw == 2 or length $rbin or $errorMsg = "could not create .trash under your home directory, to hold the deleted files", return 0; my $ln = $startRange; my $cnt = $endRange - $startRange + 1; while($cnt--) { my $f = fetchLine($ln, 0); if($dw == 2 or dirSuffix($ln) =~ /^@/) { unlink "$dirname/$f" or $errorMsg = "could not remove $f, $!", return 0; } else { rename "$dirname/$f", "$rbin/$f" or $errorMsg = "Could not move $f to the trash bin, $!, set dx mode to actually remove the file", return 0; } delText($ln, $ln); substr($labels, $dirSufStart + 2*$ln, 2) = ""; } return 1; } # delFiles # Join lines from startRange to endRange. sub joinText() { $errorMsg = "cannot join one line", return 0 if $startRange == $endRange; return 0 if lineLimit 1; my ($i, $line); $line = ""; foreach $i ($startRange..$endRange) { $line .= ' ' if $cmd eq 'J' and $i > $startRange; $line .= fetchLine($i, 0); } push @text, $line; substr($map, $startRange*$lnwidth, $lnwidth) = sprintf $lnformat, $#text; delText($startRange+1, $endRange); $dot = $startRange; return 1; } # joinText # Substitute text on the lines in $startRange through $endRange. # We could be changing the text in an input field. # If so, we'll call infReplace(). # Also, we might be indirectory mode, whence we must rename the file. sub substituteText($) { my $line = shift; my $whichlink = ""; $whichlink = $1 if $line =~ s/^(\d+)//; length $line or $errorMsg = "no regular expression after $icmd", return -1; if($fmode&$dirmode) { $dw or $errorMsg = "directories are readonly, type dw to enable directory writes", return -1; } my ($i, $j, $exp, $rhs, $qrhs, $lastSubst, @pieces, $blmode); if($line ne "bl") { $blmode = 0; @pieces = regexpCheck($line, 1); return -1 if $#pieces < 0; $exp = $pieces[0]; $line = $pieces[1]; length $line or $errorMsg = "missing delimiter", return -1; @pieces = regexpCheck($line, 0); return -1 if $#pieces < 0; $rhs = $pieces[0]; $line = $pieces[1]; } else { $blmode = 1, $lspace = 3; } my $gflag = ""; my $nflag = 0; my $iflag = ""; $iflag = "i" if $caseInsensitive; $subprint = 1; # default is to print the last line substituted $lastSubst = 0; if(! $blmode) { if(length $line) { $subprint = 0; # necessarily starts with the delimiter substr($line, 0, 1) = ""; while(length $line) { $gflag = 'g', next if $line =~ s/^g//; $subprint = 2, next if $line =~ s/^p//; $iflag = 'i', next if $line =~ s/^i//; if($line =~ s/^(\d+)//) { ! $nflag or $errorMsg = "multiple numbers after the third delimiter", return -1; $nflag = $1; $nflag > 0 and $nflag <= 999 or $errorMsg = "numeric suffix out of range, please use [1-999]", return -1; next; } # number $errorMsg = "unexpected substitution suffix after the third delimiter"; return -1; } # loop gathering suffix flags ! $gflag or ! $nflag or $errorMsg = "cannot use both a numeric suffix and the `g' suffix simultaneously", return -1; # s/x/y/1 is very inefficient. $nflag = 0 if $nflag == 1; } # closing delimiter $qrhs = $rhs; # quote-fixed right hand side if($rhs =~ /^[ul]c$/) { $qrhs = "$qrhs \$&"; $iflag .= 'e' if !$nflag; } elsif($rhs eq "ue") { $qrhs = "unescape \$&"; $iflag .= 'e' if !$nflag; } elsif($rhs eq "mc") { $qrhs = "mixCase \$&"; $iflag .= 'e' if !$nflag; } else { if($nflag) { $qrhs =~ s/"/\\"/g; $qrhs = '"'.$qrhs.'"'; } } # I don't understand it, but $&[x] means something to perl. # So when I replace j with &[x], becomeing $&[x], it blows up. # Thus I escape open brackets and braces in the rhs. # Hopefully you won't escape them on the command line - you have no reason to. # If you do they'll be doubly escaped, and that's bad. $qrhs =~ s/([\[{])/\\$1/g; # } } else { $subprint = 0; } # blmode or not # Substitute the input fields first. if($cmd eq 'I') { my $yesdot = 0; my $foundFields = 0; foreach $i ($startRange..$endRange) { my $rc = infIndex($i, $whichlink); next unless $rc; $foundFields = 1; $rc > 0 or $dot = $i, $inglob = 0, return -1; my $newinf = $inf; if(!$nflag) { eval '$rc = $newinf =~ ' . "s/$exp/$qrhs/$iflag$gflag; "; } else { $j = 0; eval '$newinf =~ ' . "s/$exp/++\$j == $nflag ? $qrhs : \$&/ge$iflag; "; $rc = ($j >= $nflag); } next unless $rc; $dot = $i; infReplace($newinf) or return -1; $yesdot = $dot; } # loop over lines if(! $yesdot) { if(!$inglob) { $errorMsg = "no match" if $foundFields; } return 0; } dispLine($yesdot) if $subprint == 2 or ! $inglob and $subprint == 1; return 1; } # input fields # Not an input field, just text, so replace it. # Once again, use the eval construct. # This time we might be substituting across an entire range. @pieces = (); $errorMsg = ""; eval ' for($i=$startRange; $i<=$endRange; ++$i) { my $temp = fetchLine($i, 0);' . ($blmode ? 'my $subst = breakLine(\$temp);' : (!$nflag ? 'my $subst = $temp =~ ' . "s/$exp/$qrhs/o$iflag$gflag; " : 'my $subst = 0; my $k = 0; $temp =~ ' . "s/$exp/++\$k == $nflag ? $qrhs : \$&/oge$iflag; " . '$subst = ($k >= $nflag); ' )) . 'next unless $subst; if($fmode&$dirmode) { if($temp =~ m,[/\n],) { $errorMsg = "cannot embed slash or newline in a directory name"; $inglob = 0; last; } my $dest = "$dirname/$temp"; my $src = fetchLine($i, 0); $src = "$dirname/$src"; if($src ne $dest) { if(-e $dest or -l $dest) { $errorMsg = "destination file already exists"; $inglob = 0; last; } rename $src, $dest or $errorMsg = "cannot move file to $temp", $inglob = 0, last; } # source and dest are different } # directory @pieces = split "\n", $temp, -1; @pieces = ("") if $temp eq ""; last if lineLimit $#pieces+1; $j = $#text; push @text, @pieces; @pieces = (); substr($map, $i*$lnwidth, $lnwidth) = sprintf $lnformat, ++$j; if($j < $#text) { my $newpiece = ""; $newpiece .= sprintf $lnformat, $j while ++$j <= $#text; addToMap($newpiece, $i); $j = length($newpiece) / $lnwidth; $endRange += $j; $i += $j; } dispLine($i) if $subprint == 2; $lastSubst = $i; $fmode |= $changemode|$firstopmode; $ubackup = 1; last if $intFlag; } '; # eval string return 0 if length $errorMsg; if(! $lastSubst) { $errorMsg = ($blmode ? "no change" : "no match") if ! $inglob; return 0; } $dot = $lastSubst; dispLine($dot) if $subprint == 1 and ! $inglob; if($intFlag and ! $inglob) { $errorMsg = $intMsg, return 0; } return 1; } # substituteText # Follow a hyperlink to another web page. sub hyperlink($) { my $whichlink = shift; $cmd = 'b'; $errorMsg = "cannot use the g$whichlink command in directory mode", return 0 if $fmode&$dirmode; $startRange == $endRange or $errorMsg = "go command does not expect a range", return 0; my $h; # hyperlink tag my @links = (); # links on this line my @bref = (); # baseref values my ($j, $line, $href); if($fmode&$browsemode) { $line = fetchLine $endRange, 0; while($line =~ /\x80([\x85-\x8f]+){/g) { $j = revealNumber $1; $h = $$btags[$j]; $href = $$h{href}; $errorMsg = "hyperlink found without a url?? internal error", return 0 unless defined $href; push @links, $href; push @bref, $$h{bref}; } # loop } # browse mode if($#links < 0) { $line = fetchLine $endRange, 1; stripWhite \$line; $line =~ s/[\s"']+/ /g; if(length $line) { while($line =~ /([^ ]+)/g) { $href = $1; $href =~ s/^[^\w]+//; $href =~ s/[^\w]+$//; if(is_url $href) { push @links, $href; } else { $href =~ s/^mailto://i; push @links, "mailto:$href" if $href =~ /^[\w.,-]+@[\w,-]+\.[\w,.-]+$/; } } } # loop over words } # looking for url in text mode $j = $#links + 1; $j or $errorMsg = "no links present", return 0; length $whichlink or $j == 1 or $errorMsg = "multiple links, please use g [1,$j]", return 0; $whichlink = 1 if ! length $whichlink; if($whichlink == 0 or $whichlink > $j) { $errorMsg = $j > 1 ? "invalid link, please use g [1,$j]" : "this line only has one link"; return 0; } --$whichlink; $href = $links[$whichlink]; if($href =~ s/^mailto://i) { $cmd = 'e'; return 1, "\x80mail\x80$href"; } # mailto $href =~ /^javascript:/i and $errorMsg = "sorry, this link calls a javascript function", return 0; return 1, $href if $href =~ /^#/; $line = resolveUrl(($#bref >= 0 ? $bref[$whichlink] : ""), $href); print "* $line\n"; return 1, $line; } # hyperlink # Follow an internal link to a section of the document. sub findSection($) { my $section = shift; foreach my $i (1..$dol) { my $t = fetchLine $i, 0; while($t =~ /\x80([\x85-\x8f]+)\*/g) { my $j = revealNumber $1; my $h = $$btags[$j]; return $i if $$h{name} eq $section; } } return 0; } # findSection # Return the number of unbalanced punctuation marks at the start and end of the line. sub unbalanced($$$) { my ($c, $d, $ln) = @_; my $curline = fetchLine($ln, 1); # Escape these characters, so we know they are literal. $c = "\\$c"; $d = "\\$d"; while($curline =~ s/$c[^$c$d]*$d//) { ; } my $forward = $curline =~ s/$c//g; $forward = 0 if $forward eq ""; my $backward = $curline =~ s/$d//g; $backward = 0 if $backward eq ""; return $backward, $forward; } # unbalanced # Find the line that balances the unbalanced punctuation. sub balanceLine($) { my $line = shift; my ($c, $d); # balancing characters my $openlist = "{([<`"; my $closelist = "})]>'"; my $alllist = "{}()[]<>`'"; my $level = 0; my ($i, $direction, $forward, $backward); if(length $line) { $line =~ /^[\{\}\(\)\[\]<>`']$/ or $errorMsg = "you must specify exactly one of $alllist after the B command", return 0; $c = $line; if(index($openlist, $c) >= 0) { $d = substr $closelist, index($openlist, $c), 1; $direction = 1; } else { $d = $c; $c = substr $openlist, index($closelist, $d), 1; $direction = -1; } ($backward, $forward) = unbalanced($c, $d, $endRange); if($direction > 0) { ($level = $forward) or $errorMsg = "line does not contain an open $c", return 0; } else { ($level = $backward) or $errorMsg = "line does not contain an open $d", return 0; } } else { # character specified by the user or not? # Look for anything unbalanced, probably a brace. foreach $i (0..2) { $c = substr $openlist, $i, 1; $d = substr $closelist, $i, 1; ($backward, $forward) = unbalanced($c, $d, $endRange); ! $backward or ! $forward or $errorMsg = "both $c and $d are unbalanced on this line, try B$c or B$d", return 0; ($level = $backward + $forward) or next; $direction = 1; $direction = -1 if $backward; last; } $level or $errorMsg = "line does not contain an unbalanced brace, parenthesis, or bracket", return 0; } # explicit character passed in, or look for one my $selected = ($direction > 0) ? $c : $d; # Now search for the balancing line. $i = $endRange; while(($i += $direction) > 0 and $i <= $dol) { ($backward, $forward) = unbalanced($c, $d, $i); if($direction > 0 and $backward >= $level or $direction < 0 and $forward >= $level) { $dot = $i; dispLine($dot); return 1; } $level += ($forward-$backward) * $direction; } # loop over lines $errorMsg = "cannot find the line that balances $selected"; return 0; } # balanceLine # Apply a regular expression to each line, and then execute # a command for each matching, or nonmatching, line. # This is the global feature, g/re/p, which gives us the word grep. sub doGlobal($) { my $line = shift; my ($i, $j, $exp, @pieces); length $line or $errorMsg = "no regular expression after $icmd", return 0; @pieces = regexpCheck($line, 1); return 0 if $#pieces < 0; $exp = $pieces[0]; $line = $pieces[1]; length $line or $errorMsg = "missing delimiter", return 0; $line =~ s/^.(i?)\s*//; my $iflag = $1; $iflag = "i" if $caseInsensitive; # Clean up any previous stars. substr($map, $_*$lnwidth+$lnwidth1, 1) = ' ' foreach (1.. $dol); # Find the lines that match the pattern. my $gcnt = 0; # global count eval ' for($i=$startRange, $j=$i*$lnwidth+$lnwidth1; $i<=$endRange; ++$i, $j+=$lnwidth) { substr($map, $j, 1) = "*", ++$gcnt if fetchLine($i, 1)' . ($cmd eq 'g' ? ' =~ ' : ' !~ ') . "/$exp/o$iflag; }"; $gcnt or $errorMsg = ($cmd eq 'g' ? "no lines match the g pattern" : "all lines match the v pattern"), return 0; # Now apply $line to every line with a * $inglob = 1; $errorMsg = ""; $line = 'p' if ! length $line; my $origdot = $dot; my $yesdot = 0; my $nodot = 0; my $stars = 1; global:while($gcnt and $stars) { $stars = 0; for($i=1; $i<=$dol; ++$i) { last global if $intFlag; next unless substr($map, $i*$lnwidth+$lnwidth1, 1) eq '*'; $stars = 1,--$gcnt; substr($map, $i*$lnwidth+$lnwidth1, 1) = ' '; $dot = $i; # ready to run the command if(evaluate($line)) { $yesdot = $dot; --$i if $ubackup; # try this line again, in case we deleted or moved it } else { # Subcommand might turn global flag off. $nodot = $dot, $yesdot = 0, last global if ! $inglob; } } } $inglob = 0; # yesdot could be 0, even upon success, if all lines are deleted via g/re/d if($yesdot or ! $dol) { $dot = $yesdot; dispLine($dot) if ($cmd eq 's' or $cmd eq 'I') and $subprint == 1; } elsif($nodot) { $dot = $nodot; } else { $dot = $origdot; $errorMsg = "none of the marked lines were successfully modified" if $errorMsg eq ""; } $errorMsg = $intMsg if $errorMsg eq "" and $intFlag; return ! length $errorMsg; } # doGlobal # Reveal the links to other web pages, or the email links. sub showLinks() { my ($i, $j, $h, $href, $line); my $addrtext = ""; if($fmode&$browsemode) { $line = fetchLine $endRange, 0; while($line =~ /\x80([\x85-\x8f]+){(.*?)}/g) { $j = revealNumber $1; $i = $2; $h = $$btags[$j]; $href = $$h{href}; $href = "" unless defined $href; if($href =~ s/^mailto://i) { $addrtext .= "$i:$href\n"; } else { $href = resolveUrl($$h{bref}, $href); $addrtext .= "\n$i\n\n"; } } # loop } # browse mode if(! length $addrtext) { length $fname or $errorMsg = "no file name", return 0; if(is_url($fname)) { $href = $fname; $href =~ s/\.browse$//; $j = $href; $j =~ s,^https?://,,i; $j =~ s,.*/,,; $addrtext = "\n$j\n\n"; } else { $addrtext = $fname."\n"; } } $addrtext =~ s/\n$//; $j = $#text; push @text, split "\n", $addrtext, -1; $#text = $j, return 0 if lineLimit 0; $h = cxPack(); cxReset($context, 0) or return 0; $$h{backup} = $backup if defined $backup; $backup = $h; print((length($addrtext)+1)."\n"); $dot = $dol = $#text - $j; my $newpiece = $lnspace; $newpiece .= sprintf($lnformat, $j) while ++$j <= $#text; $map = $newpiece; return 1; } # showLinks # All other editors let you stack and undo hundreds of operations. # If I'm writing a new editor, why don't I do that? # I don't know; guess I don't have the time. # And in my 20 years experience, I have rarely felt the need # to undo several operations. # I'm usually undoing the last typo, and that's it. # So I allow you to undo the last operation, only. # Get ready for a possible undo command. sub readyUndo() { return if $fmode & $dirmode; $savedot = $dot, $savedol = $dol; $savemap = $map, $savelabels = $labels; } # readyUndo sub goUndo() { # swap, so we can undo our undo. I do this alot. my $temp; $temp = $ dot, $dot = $lastdot, $lastdot = $temp; $temp = $ dol, $dol = $lastdol, $lastdol = $temp; $temp = $ map, $map = $lastmap, $lastmap = $temp; $temp = $ labels, $labels = $lastlabels, $lastlabels = $temp; } # goUndo # Replace labels with their lines in shell escapes. sub expandLabeledLine($) { my$x = shift; my $n = ord($x) - ord('a'); my $ln = substr $labels, $n*$lnwidth, $lnwidth; $ln ne $lnspace or $errorMsg = "label $x not set", return ""; return fetchLine($ln, 1); } # expandLabeledLine # Run a shell escape sub shellEscape($) { my $line = shift; # Expand 'a through 'z labels $errorMsg = ""; $line =~ s/\B'([a-z])\b/expandLabeledLine($1)/ge; return 0 if length $errorMsg; $line =~ s/'_/$fname/g; $line =~ s/'\./fetchLine($dot,1)/ge; if($doslike) { # Just run system and hope for the best. system $line; } else { # Unix has a concept of shells. my $shell = $ENV{SHELL}; $shell = "/bin/sh" if ! defined $shell; if(length $line) { system $shell, "-c", $line; } else { system $shell; } } # dos or unix print "ok\n"; return 1; } # shellEscape # Implement various two letter commands. # Most of these set and clear modes. sub twoLetter($) { my $line = shift; my ($i, $j); if($line eq "qt") { exit 0; } if($line =~ s/^cd\s+// or $line =~ s/^cd$//) { $cmd = 'e'; # so error messages are printed if(length $line) { my $temppath = `pwd`; chomp $temppath; if($line eq "-") { $errorMsg = "you have no previous directory", return 0 unless defined $oldpath; chdir $oldpath or $errorMsg = "cannot change to previous directory $oldpath", return 0; } else { $line = envFile($line); return 0 if length $errorMsg; chdir $line or $errorMsg = "invalid directory", return 0; } $oldpath = $temppath; } print `pwd`; return 1; } if($line eq "rf") { $cmd = 'e'; if($fmode & $browsemode) { $cmd = 'b'; $fname =~ s/.browse$//; } length $fname or $errorMsg = "no file name", return 0; $nostack = 1; return -1, "$cmd $fname"; } if($line eq "et") { $cmd = 'e'; $fmode&$browsemode or $errorMsg = $nobrowse, return 0; foreach $i (1..$dol) { $text[substr($map, $i*$lnwidth, $lnwidth1)] = fetchLine($i,1); } $fmode &= ~($browsemode|$firstopmode|$changemode); $btags = []; # don't need those any more. print "editing as pure text\n" if $helpall; return 1; } if($line eq "ub") { $fmode&$browsemode or $errorMsg = $nobrowse, return 0; dropEmptyBuffers(); # Backing out. $map = $$btags[0]{map1}; $fname = $$btags[0]{fname}; $fmode = $$btags[0]{fmode}; $labels = $$btags[0]{labels}; $dot = $$btags[0]{dot}; $dol = $$btags[0]{dol1}; apparentSize(); return 1; } # reverse browse if($line eq "f/" or $line eq "w/") { $i = $fname; $i =~ s,.*/,, or $errorMsg = "filename does not contain a slash", return 0; print "$i\n" if $helpall; substr($line, 1, 1) = " $i"; return -1, $line; } if($line =~ /^f[dkt]$/) { $fmode&$browsemode or $errorMsg = $nobrowse, return 0; my $key = "title"; $key = "keywords" if $line eq "fk"; $key = "description" if $line eq "fd"; my $val = $$btags[0]{$key}; if(defined $val) { print "$val\n"; } else { print "no $key\n"; } return 1; } if($line =~ /^sm(\d*)$/) { $cmd = 'e'; $smMail = $1; $altattach = 0; $j = sendMailCurrent(); $j and print "ok\n"; return $j; } # simple commands if($line eq "sg") { $global_lhs_rhs = 1; print "substitutions global\n" if $helpall; return 1; } if($line eq "sl") { $global_lhs_rhs = 0; print "substitutions local\n" if $helpall; return 1; } if($line eq "ci") { $caseInsensitive = 1; print "case insensitive\n" if $helpall; return 1; } if($line eq "cs") { $caseInsensitive = 0; print "case sensitive\n" if $helpall; return 1; } if($line eq "dr") { $dw = 0; print "directories readonly\n" if $helpall; return 1; } if($line eq "dw") { $dw = 1; print "directories writable\n" if $helpall; return 1; } if($line eq "dx") { $dw = 2; print "directories writable with delete\n" if $helpall; return 1; } if($line eq "dp") { $delprint ^= 1; print ($delprint ? "delete print\n" : "delete quiet\n"); return 1; } if($line eq "rh") { $reroute ^= 1; print ($reroute ? "redirect html\n" : "do not redirect html\n"); return 1; } if($line eq "pm") { $passive ^= 1; print ($passive ? "passive ftp\n" : "active ftp\n"); return 1; } if($line eq "ph") { $pdf_convert ^= 1; print ($pdf_convert ? "pdf to html conversion\n" : "pdf raw\n"); return 1; } if($line eq "vs") { $ssl_verify ^= 1; print ($ssl_verify ? "verify ssl connections\n" : "do not verify ssl connections (less secure)\n"); return 1; } if($line eq "ac") { $allowCookies ^= 1; print ($allowCookies ? "accept cookies\n" : "reject cookies\n"); return 1; } if($line eq "sr") { $allowReferer ^= 1; print ($allowReferer ? "send refering web page\n" : "don't send refering web page\n"); return 1; } if($line =~ s/^db *//) { if($line =~ /^\d$/) { $debug = $line, return 1; } else { $errorMsg = "please set debug level, 0 through 7", return 0; } } if($line =~ s/^ua *//) { if($line =~ /^\d+$/) { $errorMsg = "Agent number $line is not defined", return 0 if ! defined$agents[$line]; $agent = $agents[$line], return 1; } else { $errorMsg = "please set user agent, 0 through ".$#agents, return 0; } } # ua number if($line eq "ff") { $fetchFrames ^= 1; print ($fetchFrames ? "fetch frames\n" : "do not fetch frames\n"); return 1; } if($line eq "tn") { $textAreaCR ^= 1; print ($textAreaCR ? "dos newlines on text areas\n" : "unix newlines on text areas\n"); return 1; } if($line eq "eo") { $endmarks = 0; print "end markers off\n" if $helpall; return 1; } if($line eq "el") { $endmarks = 1; print "end markers list\n" if $helpall; return 1; } if($line eq "ep") { $endmarks = 2; print "end markers on\n" if $helpall; return 1; } return -1,"^".length($1) if $line =~ /^(\^+)$/; return stripChild() if $line eq "ws"; return unstripChild() if $line eq "us"; return -1, $line; # no change } # twoLetter # Evaluate the entered command. # This is indirectly recursive, as in g/z/ s/x/y/ # Pass the command line, and return success or failure. sub evaluate($) { my $line = shift; my ($i, $j, @pieces, $h, $href); my $postspace = 0; my $postBrowse; my $nsuf = -1; # numeric suffix my $cx; # context specified -- always $nsuf - 1 my $section = ""; # section within a document my $post = ""; # for post cgi method $nostack = 0; # suppress stacking of edit sessions $referer = ""; $referer = $fname if $allowReferer; $referer =~ s/\.browse$//; $cmd = ""; # We'll allow whitespace at the start of an entered command. $line =~ s/^\s*//; # Watch for successive q commands. $lastq = $lastqq, $lastqq = -1; if(!$inglob) { # We'll allow comments in an edbrowse script return 1 if $line =~ /^#/; return shellEscape $line if $line =~ s/^!\s*//; # Web express shortcuts if($line =~ s/^@ *//) { if(! length $line) { my @shortList = (); foreach $i (sort keys %shortcut) { $j = $i; my ($desc, $sort); defined ($desc = $shortcut{$i}{desc}) and $j .= " = $desc"; $j = "|$j"; defined ($sort = $shortcut{$i}{sort}) and $j = "$sort$j"; $j .= "\n"; push @shortList, $j; } # loop over shortcuts foreach (sort @shortList) { s/^.*?\|//; print $_; } return 1; } $cmd = '@'; ($j, $line, $postBrowse) = webExpress($line); return 0 unless $j; $line =~ s%^%b http://%; if($line =~ /\*/) { $post = $line; $post =~ s/.*\*/*/; $line =~ s/\*.*//; } } # Predefined command sets. if($line =~ s/^< *//) { if(!length $line) { foreach $i (sort keys %commandList) { print "$i\n"; } return 1; } $i = $commandList{$line}; defined $i or $errorMsg = "command set $line is not recognized", return 0; return evaluateSequence($i, $commandCheck{$line}); } # command set # Two letter commands. ($j, $line) = twoLetter($line); return $j if $j >= 0; } # not in global $startRange = $endRange = $dot; # default, if no range given $line = '+' if ! length $line; $line = ($dol ? 1 : 0) . $line if substr($line, 0, 1) eq ','; if($line =~ /^j/i) { $endRange = $dot + 1; $errorMsg = "line number too large", return "" if $endRange > $dol; } elsif(substr($line, 0, 1) eq '=') { $startRange = $endRange = $dol; } elsif($line =~ /^[wgv]/ and $line !~ /^g\s*\d*$/) { $startRange = 1, $endRange = $dol; $startRange = 0 if ! $dol; } elsif($line =~ s/^;//) { $endRange = $dol; } else { @pieces = getRangePart($line); $inglob = 0, return 0 if $#pieces < 0; $startRange = $endRange = $pieces[0]; $line = $pieces[1]; if($line =~ s/^,//) { $endRange = $dol; # new default if($line =~ /^[-'.\$+\d\/?]/) { @pieces = getRangePart($line); $inglob = 0, return 0 if $#pieces < 0; $endRange = $pieces[0]; $line = $pieces[1]; } # second address } # comma present } # end standard range processing # lc lower case, uc upper case $line =~ s:^([lmu]c|ue)$:s/.*/$1/:; if($line eq "bl") { # break the line dirBrowseCheck("break line") or return 0; $line = "sbl"; } $cmd = substr($line, 0, 1); if(length $cmd) { $line = substr($line, 1); } else { $cmd = 'p'; } $icmd = $cmd; $startRange <= $endRange or $errorMsg = "bad range", return 0; index($valid_cmd, $cmd) >= 0 or $errorMsg = "unknown command $cmd", $inglob = 0, return 0; # Change some of the command codes, depending on context $cmd = 'I' if $cmd eq 'i' and $line =~ /^[$valid_delim\d<*]/o; $cmd = 'I' if $cmd eq 's' and $fmode&$browsemode; $cmd = 's' if $cmd eq 'S'; my $writeMode = ">"; if($cmd eq "w" and substr($line, 0, 1) eq "+") { $writeMode = ">>"; $line =~ s/^.//; } !($fmode&$dirmode) or index($dir_cmd, $cmd) >= 0 or $errorMsg = "$icmd $nixdir", $inglob = 0, return 0; !($fmode&$browsemode) or index($browse_cmd, $cmd) >= 0 or $errorMsg = "$icmd $nixbrowse", $inglob = 0, return 0; $startRange > 0 or index($zero_cmd, $cmd) >= 0 or $errorMsg = "zero line number", return 0; $postspace = 1 if $line =~ s/^\s+//; if(index($spaceplus_cmd, $cmd) >= 0 and ! $postspace and length $line and $line !~ /^\d+$/) { $errorMsg = "no space after command"; return 0; } # env variable and wild card expansion if(index("brewf", $cmd) >= 0 and length $line) { $line = envFile($line); return 0 if length $errorMsg; } if($cmd eq 'B') { return balanceLine($line); } if($cmd eq 'z') { $startRange = $endRange + 1; $endRange = $startRange; $startRange <= $dol or $errorMsg = "line number too large", return 0; $cmd = 'p'; $line = $last_z if ! length $line; if($line =~ /^(\d+)\s*$/) { $last_z = $1; $last_z = 1 if $last_z == 0; $endRange += $last_z - 1; $endRange = $dol if $endRange > $dol; } else { $errorMsg = "z command should be followed by a number", return 0; } $line = ""; } # move/copy destination, the third address if($cmd eq 'm' or $cmd eq 't') { length $line or $errorMsg = "no move/copy destination", $inglob = 0, return 0; $line =~ /^[-'.\$+\d\/?]/ or $errorMsg = "invalid move/copy destination", $inglob = 0, return 0; @pieces = getRangePart($line); $inglob = 0, return 0 if $#pieces < 0; $dest = $pieces[0]; $line = $pieces[1]; $line =~ s/^\s*//; } # move copy destination if($cmd eq 'a') { ($line eq "+") ? ($line = "") : ($linePending = undef); } else { $linePending = undef; } ! length $line or index($nofollow_cmd, $cmd) < 0 or $errorMsg = "unexpected text after the $icmd command", $inglob = 0, return 0; # We don't need trailing whitespace, except for substitute or global substitute. index("sgvI", $cmd) >= 0 or $line =~ s/\s*$//; ! $inglob or index($global_cmd, $cmd) >= 0 or $errorMsg = "the $icmd command cannot be applied globally", $inglob = 0, return 0; if($cmd eq 'h') { $errorMsg = "no errors" if ! length $errorMsg; print $errorMsg,"\n"; return 1; } if($cmd eq 'H') { $helpall ^= 1; print "help messages on\n" if $helpall; return 1; } # H if(index("lpn", $cmd) >= 0) { foreach $i ($startRange..$endRange) { dispLine($i); $dot = $i; last if $intFlag; } return 1; } if($cmd eq '=') { print $endRange,"\n"; return 1; } if($cmd eq 'u') { $fmode&$firstopmode or $errorMsg = "nothing to undo", return 0; goUndo(); return 1; } # u if($cmd eq 'k') { $line =~ /^[a-z]$/ or $errorMsg = "please enter k[a-z]", return 0; $startRange == $endRange or $errorMsg = "cannot label an entire range", return 0; substr($labels, (ord($line) - ord('a'))*$lnwidth, $lnwidth) = sprintf $lnformat, $endRange; return 1; } $nsuf = $line if $line =~ /^\d+$/ and ! $postspace; $cx = $nsuf - 1; if($cmd eq 'f') { if($nsuf >= 0) { (cxCompare($cx) and cxActive($cx)) or return 0; $j = $fname[$cx]; print(length($j) ? $j : "no file"); print " [binary]" if $fmode[$cx]&$binmode; print "\n"; return 1; } if(length $line) { $errorMsg = "cannot change the name of a directory", return 0 if $fmode&$dirmode; $fname = $line; } else { print(length($fname) ? $fname : "no file"); print " [binary]" if $fmode&$binmode; print "\n"; } return 1; } # f if($cmd eq 'q') { $nsuf < 0 or (cxCompare($cx) and cxActive($cx)) or return 0; if($nsuf < 0) { $cx = $context; $errorMsg = "unexpected text after the $icmd command", return 0 if length $line; } cxReset($cx, 1) or return 0; return 1 if $cx != $context; # look around for another active session while(1) { $cx = 0 if ++$cx > $#factive; exit 0 if $cx == $context; next if ! defined $factive[$cx]; cxSwitch($cx, 1); return 1; } } # q if($cmd eq 'w') { if($nsuf >= 0) { $writeMode eq ">" or $errorMsg = "sorry, append to buffer not yet implemented", return 0; return writeContext($cx) } $line = $fname if ! length $line; if($fmode&$dirmode and $line eq $fname) { $errorMsg = "cannot write to the directory; files are modified as you go"; return 0; } return writeFile($writeMode, $line) if length $line; $errorMsg = "no file specified"; return 0; } # w # goto a file in a directory if($fmode&$dirmode and $cmd eq 'g' and ! length $line) { $cmd = 'e'; $line = $dirname . '/' . fetchLine($endRange, 0); } if($cmd eq 'e') { return (cxCompare($cx) and cxSwitch($cx, 1)) if $nsuf >= 0; if(!length $line) { $j = $context + 1; print "session $j\n"; return 1; } } # e if($cmd eq 'g' and $line =~ /^\d*$/) { ($j, $line) = hyperlink($line); return 0 unless $j; # Go on to browse the file. } # goto link if($cmd eq '^') { ! length $line or $nsuf >= 0 or $errorMsg = "unexpected text after the ^ command", return 0; $nsuf = 1 if $nsuf < 0; while($nsuf) { $errorMsg = "no previous text", return 0 if ! defined $backup; cxReset($context, 2) or return 0; $h = $backup; $backup = $$h{backup}; cxUnpack($h); --$nsuf; } # Should this print be inside or outside the loop? if($dot) { dispLine($dot); } else { print "empty file\n"; } return 1; } # ^ if($cmd eq 'A') { return showLinks(); } # A if($icmd eq 's' or $icmd eq 'S') { # A few shorthand notations. if($line =~ /^([,.;:!?)"-])(\d?)$/) { my $suffix = $2; $line = "$1 +"; # We have to escape the question mark and period $line =~ s/^([?.])/\\$1/; $line = "/$line/$1\\n"; $line .= "/$suffix" if length $suffix; } } # original command was s readyUndo if ! $inglob; if($cmd eq 'g' or $cmd eq 'v') { return doGlobal($line); } # global if($cmd eq 'I') { $fmode&$browsemode or $errorMsg = $nobrowse, $inglob = 0, return 0; if($line =~ /^\d*\?/) { # status $inglob and $errorMsg = $inoglobal, $inglob = 0, return 0; $startRange == $endRange or $errorMsg = $inorange, return 0; infIndex($endRange, $line) > 0 or return 0; infStatus($line); return 1; } # get info on input field if($line =~ /^\d*([=<])/) { my $asg = $1; $subprint = 1; my $yesdot = 0; my $t = $line; $t =~ s/^\d*[=<]//; if($asg eq '<') { if($t =~ /^\d+$/) { my $cx = $t-1; cxCompare($cx) and cxActive($cx) or $inglob = 0, return 0; my $dolcx = $dol[$cx]; $dolcx == 1 or $errorMsg = "session $t should contain exactly one line", $inglob = 0, return 0; $t = fetchLineContext(1, 1, $cx); } else { $errorMsg = ""; $t = envFile $t; length($errorMsg) and $inglob = 0, return 0; open FH, $t or $errorMsg = "cannot open $t, $!", $inglob = 0, return 0; $t = ; defined $t or $errorMsg = "empty file", $inglob = 0, return 0; if(defined ) { close FH; $errorMsg = "file contains more than one line"; $inglob = 0; return 0; } close FH; $t =~ s/[\r\n]+$//; } } # I 0 and infReplace($t) or $inglob = 0, return 0; $yesdot = $dot; } # loop over lines if($yesdot) { dispLine($yesdot) if ! $inglob; return 1; } $errorMsg = "no input fields present" if ! $inglob; return 0; } # i= if($line =~ /^\d*\*$/) { $inglob and $errorMsg = $inoglobal, $inglob = 0, return 0; $startRange == $endRange or $errorMsg = $inorange, return 0; infIndex($endRange, $line) > 0 or return 0; ($j, $line, $post) = infPush(); # return code of -1 means there's more to do. return $j unless $j < 0; } elsif( $line !~ m&^\d*[$valid_delim]&o) { $errorMsg = "unknown input field directive, please use I? or I= or I/text/replacement/"; return 0; } } # input field # Pull section indicator off of a url. $section = $1 if $cmd eq 'b' and $line =~ s/(#.*)//; if(($cmd eq 'b' or $cmd eq 'e') and length $line) { $h = undef; $h = cxPack() if $dol and ! $nostack; cxReset($context, 0) or return 0; $startRange = $endRange = 0; $changeFname = ""; if($line =~ /^\x80mail\x80(.*)$/) { # special code for sendmail link $href = $1; my $subj = urlSubject(\$href); $subj = "Comments" unless length $subj; if(lineLimit 2) { $i = 0; } else { $i = 1; push @text, "To: $href"; $map .= sprintf($lnformat, $#text); push @text, "Subject: $subj"; $map .= sprintf($lnformat, $#text); $dot = $dol = 2; print "SendMail link. Compose your mail, type sm to send, then ^ to get back.\n"; apparentSize(); } } else { $fname = $line; $i = readFile($fname, $post); $fmode &= ~($changemode|$firstopmode); } $filesize = -1, cxUnpack($h), return 0 if !$i and ! $dol and is_url($fname); if(defined $h) { $$h{backup} = $backup if defined $backup; $backup = $h; } return 0 if ! $i; $fname = $changeFname if length $changeFname; $cmd = 'e' if $fmode&$binmode or ! $dol; return 1 if $cmd eq 'e'; } if($cmd eq 'b') { if(! ($fmode&$browsemode)) { readyUndo(); print("$filesize\n"), $filesize = -1 if $filesize >= 0; render() or return 0; if(defined $postBrowse) { $$btags[0]{pb} = $postBrowse; evaluateSequence($postBrowse, 0); if($$btags[0]{dol2} > $dol) { $fmode &= ~($changemode|$firstopmode); apparentSize(); } } } else { $errorMsg = "already browsing", return 0 if ! length $section; } return 1 if ! length $section; $section =~ s/^#//; $j = findSection($section); $errorMsg = "cannot locate section #$section", return 0 unless $j; $dot = $j; dispLine($dot); return 1; } # b if($cmd eq 'm' or $cmd eq 't') { return moveCopy(); } if($cmd eq 'i') { $cmd = 'a'; --$startRange, --$endRange; } if($cmd eq 'c') { delText($startRange, $endRange) or return 0; $endRange = --$startRange; $cmd = 'a'; } if($cmd eq 'a') { return readLines(); } if($cmd eq 'd') { $i = ($endRange == $dol); if($fmode & $dirmode) { $j = delFiles(); } else { $j = delText($startRange, $endRange); } $inglob = 0 if ! $j; if($j and $delprint and ! $inglob) { $i ? print "end of file\n" : dispLine($dot); } return $j; } # d if($cmd eq 'j' or $cmd eq 'J') { return joinText(); } # j if($cmd eq 'r') { return readContext($cx) if $nsuf >= 0; return readFile($line, "") if length $line; $errorMsg = "no file specified"; return 0; } # r if($cmd eq 's' or $cmd eq 'I') { $j = substituteText($line); $inglob = $j = 0 if $j < 0; return $j; } # substitute $errorMsg = "command $icmd not yet implemented"; $inglob = 0; return 0; } # evaluate sub evaluateSequence($$) { my $commands = shift; my $check = shift; foreach my $go (@$commands) { $inglob = 0; $intFlag = 0; $filesize = -1; my $rc = evaluate($go); print "$filesize\n" if $filesize >= 0; $rc or ! $check or return 0; } return 1; } # evaluateSequence # Hash to map html tags onto their English descriptions. # For instance, P maps to "paragraph". # Most of the tags, such as FONT, map to nothing, # whence they are thrown away. # The first two characters are not part of the description. # It forms a number that describes the nestability of the tag. # Bit 1 means the tag should be nested, like parentheses. # In fact all the bit1 tags should nest amongst eachother, unlike #
    (nesting error). # Bit 2 means a tag may appear inside itself, like nested lists. # Bit 4 means the tag implies a paragraph break. # Bit 8 means we retain attributes on the positive tag. # bit 16 means to close an open anchor *before* applying this tag %tagdesc = ( sub => "11a subscript", font => " 3a font", center => " 3centered text", sup => "11a superscript", title => "17the title", head => "17the html header information", body => "27the html body", bgsound => "24background music", meta => " 8a meta tag", base => " 8base reference for relative URLs", img => " 8an image", br => " 0a line break", p => "20a paragraph", blockquote => "20a quoted paragraph", div => "20a divided section", h => "21a header", dt => "20a term", dd => "20a definition", hr => "16a horizontal line", ul => "23a bullet list", ol => "23a numbered list", dl => "23a definition list", li => "16a list item", form => "25a form", input => "24an input item", a => "25an anchor", frame => "28a frame", map => "28An image map", area => "24an image map area", # I've seen tables nested inside tables -- I don't know why! table => "31a table", tr => "19a table row", td => "19a table entry", th => "19a table heading", pre => " 5a preformatted section", xmp => " 5a preformatted section", address => " 5a preformatted section", script => " 1a script", style => " 1a style block", noframes => " 1noframe section", select => "25an option list", textarea => "25an input text area", option => "24a select option", # The following tags convey formatting information that is eventually # discarded, but I'll track them for a while, # just to verify nestability. em => " 1a block of emphasized text", strong => " 1a block of emphasized text", b => " 1a block of bold text", i => " 1a block of italicized text", code => " 1a block of sample code", samp => " 1a block of sample code", ); # We encode tags in a @tag attribute=value attribute=value ...@ format, # though of course we don't use the @ sign. # We use \x80, which should not appear in international text. # I simply hard code it - it makes things simpler. # Support routine, to encode a tag. # Run from within a global substitute. # Pas the name of the tag, slash, and tag arguments sub processTag($$$) { my ($tag, $slash, $attributes) = @_; my $nlcount = $attributes =~ y/\n/\n/; # newline count my $doat = 0; # do attributes $tag = lc $tag; my $desc = $tagdesc{$tag}; if(defined $desc) { $doat = (substr($desc, 0, 2) & 8); } else { $tag = "z"; } # Do we need to gather up the attributes? if(!$doat or $slash eq "/") { # Guess not, just return the tag. return "" if $tag eq "z" and ! $nlcount; return "\x80$tag$slash$nlcount\x80"; } # Process each whitespace separated chunk, taking quotes into account. # note that name="foo"size="1" is suppose to be two separate tags; # God help us! # Borrow a global variable, even though this may not be an input tag. $itag = {tag => $tag}; push @$btags, $itag; $attributes =~ s/( # replace the entire matched text \w+ # attribute name (?>\s*=\s* # as in name=value (?> # a sequence of choices [^\s"']+ # regular printable characters | "[^"]*" # double quoted string | '[^']*' # single quoted string ) # one of three formats )? # =value )/processAttr($1)/xsge; # Capture description and keywords. if($tag eq "meta") { my $val = $$itag{name}; if(defined $val) { $val = lc $val; if($val eq "description" or $val eq "keywords") { my $content = $$itag{content}; if(defined $content) { stripWhite \$content; $$btags[0]{$val} = $content if length $content; } # content } # description or keywords } # name= pop @$btags; return "" unless $nlcount; return "\x80z$nlcount\x80"; } # meta tag my $tagnum = $#$btags; return "\x80$tag$nlcount,$tagnum\x80"; } # processTag # Support routine, to crack attribute=value. sub processAttr($) { my $line = shift; # Get rid of spaces around first equals. $line =~ s/^([^=\s]*)\s*=\s*/$1=/; # Get rid of the quotes. $line =~ s/("[^"]*"|'[^']*')/substr($1,1,-1)/sge; my $attr = lc $line; $attr =~ s/\s*=.*//s; return "" unless $attr =~ /^\w+$/; $line =~ s/^[^=]*=//s or $line = ""; $line =~ s/&([a-zA-Z]+|#\d+);/metaChar($1)/ge; $$itag{$attr} = $line; return ""; } # processAttr # Support routine, to encode a bang tag. # Run from within a global substitute. sub processBangtag($) { my $item = shift; if($item eq "'" or $item eq '"') { return (length $bangtag ? " " : $item); } if(substr($item, 0, 1) eq '<') { return "" if length $bangtag; return $item if $item eq "<"; $bangtag = substr $item, 1; return " my $l = length($bangtag) - 1; $l &= ~1; # back down to an even number return " " if $l and ! length $item; # lone > inside a comment $bangtag = ""; return ">"; } # processBangtag # Turn <>'" in javascript into spaces, as we did above. sub processScript($) { my $item = shift; if(length($item) < 5) { return ($inscript ? " " : $item); } # now $item is