# STD.pm # # Copyright 2007-2012, Larry Wall # # You may copy this software under the terms of the Artistic License, # version 2.0 or later. grammar STD:ver<6.0.0.alpha>:auth; use DEBUG; use NAME; use Stash; use Cursor; our $ALL; =begin comment Contextuals used in STD ======================= # per parse my $*ACTIONS; # class or object which defines reduce actions my $*SETTINGNAME; # name of core setting my $*TMP_PREFIX; # where to put tmp files my $*ORIG; # the original program string my @*ORIG; # same thing as individual chars my @*MEMOS; # per-position info such as ws and line number my $*HIGHWATER; # where we were last looking for things my $*HIGHMESS; # current parse failure message my $*HIGHEXPECT; # things we were looking for at the bleeding edge my $*IN_PANIC; # don't panic recursively # symbol table management our $ALL; # all the stashes, keyed by id my $*CORE; # the CORE scope my $*SETTING; # the SETTING scope my $*GLOBAL; # the GLOBAL scope my $*PROCESS; # the PROCESS scope my $*UNIT; # the UNIT scope my $*CURLEX; # current lexical scope info my $*CURPKG; # current package scope my %*MYSTERY; # names we assume may be post-declared functions # tree attributes, marked as propagating up (u) down (d) or up-and-down (u/d) my %*LANG; # (d) braided languages: MAIN, Q, Regex, etc my $*IN_DECL; # (d) a declarator is looking for a name to declare my $*HAS_SELF; # (d) in a context where 'self' exists my $*SCOPE = ""; # (d) which scope declarator we're under my $*MULTINESS; # (d) which multi declarator we're under my $*PKGDECL ::= ""; # (d) current package declarator my $*NEWPKG; # (u/d) new package being declared my $*NEWLEX; # (u/d) new lex info being declared my $*DECLARAND; # (u/d) new object associated with declaration my $*GOAL ::= "(eof)"; # (d) which special terminator we're most wanting my $*IN_REDUCE; # (d) attempting to parse an [op] construct my $*IN_META; # (d) parsing a metaoperator like [..] my $*QUASIMODO; # (d) don't carp about quasi variables my $*LEFTSIGIL; # (u) sigil of LHS for item vs list assignment my $*QSIGIL; # (d) sigil of current interpolation my $*INVOCANT_OK; # (d) parsing a list that allows an invocant my $*INVOCANT_IS; # (u) invocant of args match my $*BORG; # (u/d) who to blame if we're missing a block =end comment =begin notes Some rules are named by syntactic category plus an additional symbol specified in adverbial form, either in bare :name form or in :sym form. (It does not matter which form you use for identifier symbols, except that to specify a symbol "sym" you must use the :sym form of adverb.) If you use the rule within the rule, it will parse the symbol at that point. At the final reduction point of a rule, if $sym has been set, that is used as the final symbol name for the rule. This need not match the symbol specified as part the rule name; that is just for disambiguating the name. However, if no $sym is set, the original symbol will be used by default. This grammar relies on transitive longest-token semantics. =end notes method p6class () { ::STD::P6 } method TOP ($STOP = '') { my $lang = self.cursor_fresh( self.p6class ); if $STOP { my $*GOAL ::= $STOP; $lang.unitstop($STOP).comp_unit; } else { $lang.comp_unit; } } ############## # Precedence # ############## # The internal precedence levels are *not* part of the public interface. # The current values are mere implementation; they may change at any time. # Users should specify precedence only in relation to existing levels. # Some special tags: # :iffy - operator returns a Bool or works in boolean context # :diffy - operator produces result different type from arguments # :fiddly - # :pure - constant %term = (:dba('term') , :prec); constant %methodcall = (:dba('methodcall') , :prec, :assoc, :uassoc, :fiddly, :!pure); constant %autoincrement = (:dba('autoincrement') , :prec, :assoc, :uassoc, :!pure); constant %exponentiation = (:dba('exponentiation') , :prec, :assoc, :pure); constant %symbolic_unary = (:dba('symbolic unary') , :prec, :assoc, :uassoc, :pure); constant %multiplicative = (:dba('multiplicative') , :prec, :assoc, :pure); constant %additive = (:dba('additive') , :prec, :assoc, :pure); constant %replication = (:dba('replication') , :prec, :assoc, :pure); constant %concatenation = (:dba('concatenation') , :prec, :assoc, :pure); constant %junctive_and = (:dba('junctive and') , :prec, :assoc, :pure); constant %junctive_or = (:dba('junctive or') , :prec, :assoc, :pure); constant %named_unary = (:dba('named unary') , :prec, :assoc, :uassoc, :pure); constant %structural = (:dba('structural infix'), :prec, :assoc, :diffy); constant %chaining = (:dba('chaining') , :prec, :assoc, :diffy, :iffy, :pure); constant %tight_and = (:dba('tight and') , :prec, :assoc); constant %tight_or = (:dba('tight or') , :prec, :assoc); constant %conditional = (:dba('conditional') , :prec, :assoc, :iffy); constant %item_assignment = (:dba('item assignment') , :prec, :assoc, :!pure); constant %list_assignment = (:dba('list assignment') , :prec, :assoc, :fiddly, :!pure); constant %loose_unary = (:dba('loose unary') , :prec, :assoc, :uassoc, :pure); constant %comma = (:dba('comma') , :prec, :assoc, :nextterm, :fiddly, :pure); constant %list_infix = (:dba('list infix') , :prec, :assoc, :pure); constant %list_prefix = (:dba('list prefix') , :prec, :assoc, :uassoc); constant %loose_and = (:dba('loose and') , :prec, :assoc); constant %loose_or = (:dba('loose or') , :prec, :assoc); constant %LOOSEST = (:dba('LOOSEST') , :prec); constant %terminator = (:dba('terminator') , :prec, :assoc); # "epsilon" tighter than terminator #constant $LOOSEST = %LOOSEST; constant $LOOSEST = "a=!"; # XXX preceding line is busted constant $item_assignment_prec = 'i='; constant $methodcall_prec = 'y='; ############## # Categories # ############## # Categories are designed to be easily extensible in derived grammars # by merely adding more rules in the same category. The rules within # a given category start with the category name followed by a differentiating # adverbial qualifier to serve (along with the category) as the longer name. # The endsym context, if specified, says what to implicitly check for in each # rule right after the initial . Normally this is used to make sure # there's appropriate whitespace. # Note that endsym isn't called if # isn't called. my $*endsym = "null"; my $*endargs = -1; proto token category {*} token category:category { } token category:sigil { } proto token sigil {*} token category:twigil { } proto token twigil is endsym {*} token category:special_variable { } proto token special_variable {*} token category:comment { } proto token comment {*} token category:version { } proto token version {*} token category:module_name { } proto token module_name {*} token category:value { } proto token value {*} token category:term { } proto token term {*} token category:numeric { } proto token numeric {*} token category:quote { } proto token quote () {*} token category:prefix { } proto token prefix is unary is defequiv(%symbolic_unary) {*} token category:infix { } proto token infix is binary is defequiv(%additive) {*} token category:postfix { } proto token postfix is unary is defequiv(%autoincrement) {*} token category:dotty { } proto token dotty is endsym {*} token category:circumfix { } proto token circumfix {*} token category:postcircumfix { } proto token postcircumfix is unary {*} # unary as far as EXPR knows... token category:quote_mod { } proto token quote_mod {*} token category:trait_mod { } proto token trait_mod is endsym {*} token category:initializer { } proto token initializer is endsym {*} token category:type_declarator { } proto token type_declarator is endsym {*} token category:scope_declarator { } proto token scope_declarator is endsym {*} token category:package_declarator { } proto token package_declarator is endsym {*} token category:multi_declarator { } proto token multi_declarator is endsym {*} token category:routine_declarator { } proto token routine_declarator is endsym {*} token category:regex_declarator { } proto token regex_declarator is endsym {*} token category:statement_prefix { } proto rule statement_prefix () {*} token category:feed_separator { } proto token feed_separator {*} token category:statement_control { } proto rule statement_control is endsym {*} token category:statement_mod_cond { } proto rule statement_mod_cond is endsym {*} token category:statement_mod_loop { } proto rule statement_mod_loop is endsym {*} token category:infix_prefix_meta_operator { } proto token infix_prefix_meta_operator is binary {*} token category:infix_postfix_meta_operator { } proto token infix_postfix_meta_operator ($op) is binary {*} token category:infix_circumfix_meta_operator { } proto token infix_circumfix_meta_operator is binary {*} token category:postfix_prefix_meta_operator { } proto token postfix_prefix_meta_operator is unary {*} token category:prefix_postfix_meta_operator { } proto token prefix_postfix_meta_operator is unary {*} token category:prefix_circumfix_meta_operator { } proto token prefix_circumfix_meta_operator is unary {*} token category:terminator { } proto token terminator {*} token unspacey { <.unsp>? } token begid { } token endid { > } token spacey { > } token keyspace { [ > || <.panic: "Whitespace required after keyword"> ] } token nofun { } # Note, don't reduce on a bare sigil unless you don't want a twigil or # you otherwise don't care what the longest token is. token sigil:sym<$> { } token sigil:sym<@> { } token sigil:sym<%> { } token sigil:sym<&> { } token twigil:sym<.> { } token twigil:sym { } token twigil:sym<^> { } token twigil:sym<:> { } token twigil:sym<*> { } token twigil:sym { } token twigil:sym<=> { } token twigil:sym<~> { } # overridden in subgrammars token stopper { } # hopefully we can include these tokens in any outer LTM matcher regex stdstopper { :temp $*STUB = return self if @*MEMOS[self.pos] :exists; :dba('standard stopper') [ | | | | > | $ # unlikely, check last (normal LTM behavior) ] { @*MEMOS[$¢.pos] ||= 1; } } token longname { {} [ > ]* } token name { [ | * | + ] } token morename { :my $*QSIGIL ::= ''; '::' [ || > [ | | :dba('indirect name') '(' ~ ')' ] || <.panic: "Name component may not be null"> ]? } ############################## # Quote primitives # ############################## # assumes whitespace is eaten already method peek_delimiters { my $pos = self.pos; my $startpos = $pos; my $char = substr(self.orig,$pos++,1); if $char ~~ /^\s$/ { self.panic("Whitespace character is not allowed as delimiter"); # "can't happen" } elsif $char ~~ /^\w$/ { self.panic("Alphanumeric character is not allowed as delimiter"); } elsif $char eq '' { self.panic("No delimiter found"); } elsif not ord $char { self.panic("Null character is not allowed as delimiter"); } elsif %STD::close2open{$char} { self.panic("Use of a closing delimiter for an opener is reserved"); } elsif $char eq ':' { self.panic("Colons may not be used to delimit quoting constructs"); } my $rightbrack = %STD::open2close{$char}; if not defined $rightbrack { return $char, $char; } while substr(self.orig,$pos,1) eq $char { $pos++; } my $len = $pos - $startpos; my $start = $char x $len; my $stop = $rightbrack x $len; return $start, $stop; } role startstop[$start,$stop] { token starter { $start } token stopper { $stop } } role stop[$stop] { token starter { } token stopper { $stop } } role unitstop[$stop] { token unitstopper { $stop } } token unitstopper { $ } method balanced ($start,$stop) { self.mixin( ::startstop[$start,$stop] ); } method unbalanced ($stop) { self.mixin( ::stop[$stop] ); } method unitstop ($stop) { self.mixin( ::unitstop[$stop] ); } method truly ($bool,$opt) { return self if $bool; self.sorry("Cannot negate $opt adverb"); self; } token charname { [ | | .*? ] || <.sorry: "Unrecognized character name"> .*? } token charnames { \s* [<.ws>] +% [','\s*] } token charspec { [ | :dba('character name') '[' ~ ']' | \d+ | <[ ?..Z \\.._ ]> | <.obsbrack> | <.sorry: "Unrecognized \\c character"> . ] } proto token backslash {*} proto token escape {*} token starter { } token escape:none { } # and this is what makes nibbler polymorphic... method nibble ($lang) { self.cursor_fresh($lang).nibbler; } # note: polymorphic over many quote languages, we hope token nibbler { :my $text = ''; :my $from = self.pos; :my $to = $from; :my @nibbles = (); :my $multiline = 0; { $.from = self.pos; } [ > [ || { push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to; my $n = $[*-1]; my @n = @$n; push @nibbles, $[*-1]; push @nibbles, @n; push @nibbles, $[*-1]; $text = ''; $to = $from = $¢.pos; } || { push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to; push @nibbles, $[*-1]; $text = ''; $to = $from = $¢.pos; } || . { my $ch = substr(self.orig, $¢.pos-1, 1); $text ~= $ch; $to = $¢.pos; if $ch ~~ "\n" { $multiline++; } } ] ]* { push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to or !@nibbles; $ = \@nibbles; $.pos = $¢.pos; $ :delete; $ :delete; $ :delete; $ :delete; $*LAST_NIBBLE = $¢; $*LAST_NIBBLE_MULTILINE = $¢ if $multiline; } } token babble ($l) { :my $lang = $l; :my $start; :my $stop; <.ws> [ <.ws> { my $kv = $[*-1]; $lang = ($lang.tweak(| ($kv. => $kv.)) or $lang.sorry("Unrecognized adverb :" ~ $kv. ~ '(' ~ $kv. ~ ')')); } ]* $ = { ($start,$stop) = $¢.peek_delimiters(); $lang = $start ne $stop ?? $lang.balanced($start,$stop) !! $lang.unbalanced($stop); [$lang,$start,$stop]; } } our @herestub_queue; class Herestub { has Str $.delim; has $.orignode; has $.lang; } role herestop { token starter { } regex stopper { ^^ {} $=[\h*?] :r $*DELIM \h* <.unv>?? $$ \v? } } # XXX be sure to temporize @herestub_queue on reentry to new line of heredocs method heredoc () { my $*CTX ::= self.callm if $*DEBUG +& DEBUG::trace_call; my $here = self; while my $herestub = shift @herestub_queue { my $*DELIM = $herestub.delim; my $lang = $herestub.lang.mixin( ::herestop ); my $doc; if ($doc) = $here.nibble($lang) { $here = $doc.trim_heredoc(); $herestub.orignode = $doc; } else { self.panic("Ending delimiter $*DELIM not found"); } } return self.cursor($here.pos); # return to initial type } token quibble ($l) { :my ($lang, $start, $stop); { my $B = $; ($lang,$start,$stop) = @$B; } $start [ $stop || <.panic: "Couldn't find terminator $stop"> ] { $lang<_herelang> and $¢.queue_heredoc($[0], $lang<_herelang>) } } method queue_heredoc($delim, $lang) { push @herestub_queue, ::Herestub.new( delim => $delim, lang => $lang, orignode => self); return self; } token quotepair { :my $key; :my $value; ':' :dba('colon pair (restricted)') [ | '!' [ <.sorry: "Argument not allowed on negated pair"> ]? { $key = $.Str; $value = 0; } | { $key = $.Str; } [ || <.unsp>? { $value = $; } || { $value = 1; } ] | $=(\d+) $=(<[a..z]>+) [ <.sorry: "2nd argument not allowed on pair"> ]? { $key = $.Str; $value = $.Str; } ] $ = {$key} $ = {$value} } token quote:sym<「 」> { :dba('perfect quotes') "「" ~ "」" ).unbalanced("」"))> } token quote:sym<' '> { :dba('single quotes') "'" ~ "'" ).tweak(:q).unbalanced("'"))> } token quote:sym<‘ ’> { :dba('single quotes') "‘" ~ "’" ).tweak(:q).unbalanced("’"))> } token quote:sym<" "> { :dba('double quotes') '"' ~ '"' ).tweak(:qq).unbalanced('"'))> } token quote:sym<“ ”> { :dba('double quotes') '“' ~ '”' ).tweak(:qq).unbalanced('”'))> } token circumfix:sym<« »> { :dba('shell-quote words') '«' ~ '»' ).tweak(:qq).tweak(:ww).balanced('«','»'))> } token circumfix:sym«<< >>» { :dba('shell-quote words') '<<' ~ '>>' ).tweak(:qq).tweak(:ww).balanced('<<','>>'))> } token circumfix:sym«< >» { :dba('quote words') '<' ~ '>' [ [ ' > <.obs('', '$' ~ '*IN.lines (or add whitespace to suppress warning)')> ]? # XXX fake out gimme5 [ ' > <.obs('<>', "lines() to read input,\n or ('') to represent the null string,\n or () to represent the empty list")> ]? ).tweak(:q).tweak(:w).balanced('<','>'))> ] } ################## # Lexer routines # ################## token ws { :temp $*STUB = return self if @*MEMOS[self.pos] :exists; :my $startpos = self.pos; :my $*HIGHEXPECT = {}; :dba('whitespace') [ | \h+ { @*MEMOS[$¢.pos] = $startpos; } # common case | ::: { @*MEMOS[$startpos]:delete; } <.sorry: "Whitespace is required between alphanumeric tokens"> # must \s+ between words ] || [ | <.unsp> | <.vws> <.heredoc> | <.unv> | $ { $¢.moreinput } ]* { if ($¢.pos == $startpos) { @*MEMOS[$¢.pos]:delete; } else { @*MEMOS[$¢.pos] = $startpos; @*MEMOS[$¢.pos] = @*MEMOS[$startpos] if @*MEMOS[$startpos] :exists; } } } token unsp { [ \\ :dba('unspace') [ | <.vws> | <.unv> | $ { $¢.moreinput } ]* ]+ } token vws { :dba('vertical whitespace') [ [ | \v | '#DEBUG -1' { say "DEBUG"; $*DEBUG = -1; } \V* \v | '<<<<<<<' :: >>>>>>' > <.sorry: 'Found a version control conflict marker'> \V* \v | '=======' :: .*? \v '>>>>>>>' \V* \v # ignore second half ] ]+ } # We provide two mechanisms here: # 1) define $*moreinput, or # 2) override moreinput method method moreinput () { $*moreinput.() if $*moreinput; self; } token unv { :dba('horizontal whitespace') [ | \h+ | ^^ <.pod_comment> | \h* ]+ } token comment:sym<#`(...)> { '#`' :: [ || <.panic: "Opening bracket is required for #` comment"> ] <.quibble($¢.cursor_fresh( %*LANG ))> } token comment:sym<#(...)> { '#' <.suppose ))> * \h* [ '#' | $$ ] > # extra stuff on line after closer? > <.worry: "Embedded comment seems to be missing backtick"> } token comment:sym<#=(...)> { '#=' :: ))> } token comment:sym<#=> { '#=' :: $ = [\N*] } token comment:sym<#> { '#' {} \N* } token ident { <.alpha> \w* } token apostrophe { <[ ' \- ]> } token identifier { <.ident> [ <.apostrophe> <.ident> ]* } # XXX We need to parse the pod eventually to support $= variables. token pod_comment { ^^ \h* '=' <.unsp>? [ | 'begin' \h+ :: [ || .*? "\n" [ :r \h* '=' <.unsp>? 'end' \h+ $ » \N* ] || .Str eq 'END'}> .* || { my $id = $.Str; self.panic("=begin $id without matching =end $id"); } ] | 'begin' » :: \h* [ $$ || '#' || <.sorry: "Unrecognized token after =begin"> \N* ] [ .*? "\n" \h* '=' <.unsp>? 'end' » \N* || { self.panic("=begin without matching =end"); } ] | 'for' » :: \h* [ || $$ || '#' || <.sorry: "Unrecognized token after =for"> \N* ] [.*? ^^ \h* $$ || .*] | :: [ <.panic: "Obsolescent pod format, please use =begin/=end instead"> ]? [||\s||<.sorry: "Illegal pod directive">] \N* ] } # suppress fancy end-of-line checking token embeddedblock { # encapsulate braided languages :temp %*LANG; :my $*SIGNUM; :my $*GOAL ::= '}'; :temp $*CURLEX; :dba('embedded block') <.newlex> <.finishlex> '{' :: [ :lang(%*LANG
) ] [ '}' || <.panic: "Unable to parse statement list; couldn't find right brace"> ] } token binints { [<.ws><.ws>] +% ',' } token binint { <[ 0..1 ]>+ [ _ <[ 0..1 ]>+ ]* } token octints { [<.ws><.ws>] +% ',' } token octint { <[ 0..7 ]>+ [ _ <[ 0..7 ]>+ ]* } token hexints { [<.ws><.ws>] +% ',' } token hexint { <.xdigit>+ [ _ <.xdigit>+ ]* } token decints { [<.ws><.ws>] +% ',' } token decint { \d+ [ _ \d+ ]* } token integer { [ | 0 [ b '_'? | o '_'? | x '_'? | d '_'? | .Str ~ " if you mean that") }> ] | ] > <.sorry: "Decimal point must be followed by digit">]? > [ <.sorry: "Only isolated underscores are allowed inside numbers"> ]? } token radint { [ | | and not defined $ }> ] } token escale { <[Ee]> <[+\-]>? } # careful to distinguish from both integer and 42.method token dec_number { :dba('decimal number') [ | $ = [ '.' ] ? | $ = [ '.' ] ? | $ = [ ] ] [ <.sorry: "Number contains two decimal points (missing 'v' for version number?)"> ['.'\d+]+ ]? [ <.sorry: "Only isolated underscores are allowed inside numbers"> ]? } token alnumint { [ <[ 0..9 a..z A..Z ]>+ [ _ <[ 0..9 a..z A..Z ]>+ ]* ] } token rad_number { ':' $ = [\d+] <.unsp>? # XXX optional dot here? {} # don't recurse in lexer :dba('number in radix notation') :s [ || '<' [ | $ = [ '.' ] | $ = [ '.' ] | $ = [ ] ] [ '*' [ '**' || <.sorry: "Base is missing ** exponent part"> ] ]? '>' # { make radcalc($, $, $, $) } || || || <.panic: "Malformed radix number"> ] } token obsbrack { '{' <.obs('curly brackets','square brackets')> } token terminator:sym<)> { } token terminator:sym<]> { ']' } token terminator:sym<}> { '}' } # XXX should eventually be derived from current Unicode tables. constant %open2close = ( "\x0028" => "\x0029", "\x003C" => "\x003E", "\x005B" => "\x005D", "\x007B" => "\x007D", "\x00AB" => "\x00BB", "\x0F3A" => "\x0F3B", "\x0F3C" => "\x0F3D", "\x169B" => "\x169C", "\x2018" => "\x2019", "\x201A" => "\x2019", "\x201B" => "\x2019", "\x201C" => "\x201D", "\x201E" => "\x201D", "\x201F" => "\x201D", "\x2039" => "\x203A", "\x2045" => "\x2046", "\x207D" => "\x207E", "\x208D" => "\x208E", "\x2208" => "\x220B", "\x2209" => "\x220C", "\x220A" => "\x220D", "\x2215" => "\x29F5", "\x223C" => "\x223D", "\x2243" => "\x22CD", "\x2252" => "\x2253", "\x2254" => "\x2255", "\x2264" => "\x2265", "\x2266" => "\x2267", "\x2268" => "\x2269", "\x226A" => "\x226B", "\x226E" => "\x226F", "\x2270" => "\x2271", "\x2272" => "\x2273", "\x2274" => "\x2275", "\x2276" => "\x2277", "\x2278" => "\x2279", "\x227A" => "\x227B", "\x227C" => "\x227D", "\x227E" => "\x227F", "\x2280" => "\x2281", "\x2282" => "\x2283", "\x2284" => "\x2285", "\x2286" => "\x2287", "\x2288" => "\x2289", "\x228A" => "\x228B", "\x228F" => "\x2290", "\x2291" => "\x2292", "\x2298" => "\x29B8", "\x22A2" => "\x22A3", "\x22A6" => "\x2ADE", "\x22A8" => "\x2AE4", "\x22A9" => "\x2AE3", "\x22AB" => "\x2AE5", "\x22B0" => "\x22B1", "\x22B2" => "\x22B3", "\x22B4" => "\x22B5", "\x22B6" => "\x22B7", "\x22C9" => "\x22CA", "\x22CB" => "\x22CC", "\x22D0" => "\x22D1", "\x22D6" => "\x22D7", "\x22D8" => "\x22D9", "\x22DA" => "\x22DB", "\x22DC" => "\x22DD", "\x22DE" => "\x22DF", "\x22E0" => "\x22E1", "\x22E2" => "\x22E3", "\x22E4" => "\x22E5", "\x22E6" => "\x22E7", "\x22E8" => "\x22E9", "\x22EA" => "\x22EB", "\x22EC" => "\x22ED", "\x22F0" => "\x22F1", "\x22F2" => "\x22FA", "\x22F3" => "\x22FB", "\x22F4" => "\x22FC", "\x22F6" => "\x22FD", "\x22F7" => "\x22FE", "\x2308" => "\x2309", "\x230A" => "\x230B", "\x2329" => "\x232A", "\x23B4" => "\x23B5", "\x2768" => "\x2769", "\x276A" => "\x276B", "\x276C" => "\x276D", "\x276E" => "\x276F", "\x2770" => "\x2771", "\x2772" => "\x2773", "\x2774" => "\x2775", "\x27C3" => "\x27C4", "\x27C5" => "\x27C6", "\x27D5" => "\x27D6", "\x27DD" => "\x27DE", "\x27E2" => "\x27E3", "\x27E4" => "\x27E5", "\x27E6" => "\x27E7", "\x27E8" => "\x27E9", "\x27EA" => "\x27EB", "\x2983" => "\x2984", "\x2985" => "\x2986", "\x2987" => "\x2988", "\x2989" => "\x298A", "\x298B" => "\x298C", "\x298D" => "\x298E", "\x298F" => "\x2990", "\x2991" => "\x2992", "\x2993" => "\x2994", "\x2995" => "\x2996", "\x2997" => "\x2998", "\x29C0" => "\x29C1", "\x29C4" => "\x29C5", "\x29CF" => "\x29D0", "\x29D1" => "\x29D2", "\x29D4" => "\x29D5", "\x29D8" => "\x29D9", "\x29DA" => "\x29DB", "\x29F8" => "\x29F9", "\x29FC" => "\x29FD", "\x2A2B" => "\x2A2C", "\x2A2D" => "\x2A2E", "\x2A34" => "\x2A35", "\x2A3C" => "\x2A3D", "\x2A64" => "\x2A65", "\x2A79" => "\x2A7A", "\x2A7D" => "\x2A7E", "\x2A7F" => "\x2A80", "\x2A81" => "\x2A82", "\x2A83" => "\x2A84", "\x2A8B" => "\x2A8C", "\x2A91" => "\x2A92", "\x2A93" => "\x2A94", "\x2A95" => "\x2A96", "\x2A97" => "\x2A98", "\x2A99" => "\x2A9A", "\x2A9B" => "\x2A9C", "\x2AA1" => "\x2AA2", "\x2AA6" => "\x2AA7", "\x2AA8" => "\x2AA9", "\x2AAA" => "\x2AAB", "\x2AAC" => "\x2AAD", "\x2AAF" => "\x2AB0", "\x2AB3" => "\x2AB4", "\x2ABB" => "\x2ABC", "\x2ABD" => "\x2ABE", "\x2ABF" => "\x2AC0", "\x2AC1" => "\x2AC2", "\x2AC3" => "\x2AC4", "\x2AC5" => "\x2AC6", "\x2ACD" => "\x2ACE", "\x2ACF" => "\x2AD0", "\x2AD1" => "\x2AD2", "\x2AD3" => "\x2AD4", "\x2AD5" => "\x2AD6", "\x2AEC" => "\x2AED", "\x2AF7" => "\x2AF8", "\x2AF9" => "\x2AFA", "\x2E02" => "\x2E03", "\x2E04" => "\x2E05", "\x2E09" => "\x2E0A", "\x2E0C" => "\x2E0D", "\x2E1C" => "\x2E1D", "\x2E20" => "\x2E21", "\x3008" => "\x3009", "\x300A" => "\x300B", "\x300C" => "\x300D", "\x300E" => "\x300F", "\x3010" => "\x3011", "\x3014" => "\x3015", "\x3016" => "\x3017", "\x3018" => "\x3019", "\x301A" => "\x301B", "\x301D" => "\x301E", "\xFD3E" => "\xFD3F", "\xFE17" => "\xFE18", "\xFE35" => "\xFE36", "\xFE37" => "\xFE38", "\xFE39" => "\xFE3A", "\xFE3B" => "\xFE3C", "\xFE3D" => "\xFE3E", "\xFE3F" => "\xFE40", "\xFE41" => "\xFE42", "\xFE43" => "\xFE44", "\xFE47" => "\xFE48", "\xFE59" => "\xFE5A", "\xFE5B" => "\xFE5C", "\xFE5D" => "\xFE5E", "\xFF08" => "\xFF09", "\xFF1C" => "\xFF1E", "\xFF3B" => "\xFF3D", "\xFF5B" => "\xFF5D", "\xFF5F" => "\xFF60", "\xFF62" => "\xFF63", ); constant %close2open = invert %open2close; token opener { <[ \x0028 \x003C \x005B \x007B \x00AB \x0F3A \x0F3C \x169B \x2018 \x201A \x201B \x201C \x201E \x201F \x2039 \x2045 \x207D \x208D \x2208 \x2209 \x220A \x2215 \x223C \x2243 \x2252 \x2254 \x2264 \x2266 \x2268 \x226A \x226E \x2270 \x2272 \x2274 \x2276 \x2278 \x227A \x227C \x227E \x2280 \x2282 \x2284 \x2286 \x2288 \x228A \x228F \x2291 \x2298 \x22A2 \x22A6 \x22A8 \x22A9 \x22AB \x22B0 \x22B2 \x22B4 \x22B6 \x22C9 \x22CB \x22D0 \x22D6 \x22D8 \x22DA \x22DC \x22DE \x22E0 \x22E2 \x22E4 \x22E6 \x22E8 \x22EA \x22EC \x22F0 \x22F2 \x22F3 \x22F4 \x22F6 \x22F7 \x2308 \x230A \x2329 \x23B4 \x2768 \x276A \x276C \x276E \x2770 \x2772 \x2774 \x27C3 \x27C5 \x27D5 \x27DD \x27E2 \x27E4 \x27E6 \x27E8 \x27EA \x2983 \x2985 \x2987 \x2989 \x298B \x298D \x298F \x2991 \x2993 \x2995 \x2997 \x29C0 \x29C4 \x29CF \x29D1 \x29D4 \x29D8 \x29DA \x29F8 \x29FC \x2A2B \x2A2D \x2A34 \x2A3C \x2A64 \x2A79 \x2A7D \x2A7F \x2A81 \x2A83 \x2A8B \x2A91 \x2A93 \x2A95 \x2A97 \x2A99 \x2A9B \x2AA1 \x2AA6 \x2AA8 \x2AAA \x2AAC \x2AAF \x2AB3 \x2ABB \x2ABD \x2ABF \x2AC1 \x2AC3 \x2AC5 \x2ACD \x2ACF \x2AD1 \x2AD3 \x2AD5 \x2AEC \x2AF7 \x2AF9 \x2E02 \x2E04 \x2E09 \x2E0C \x2E1C \x2E20 \x3008 \x300A \x300C \x300E \x3010 \x3014 \x3016 \x3018 \x301A \x301D \xFD3E \xFE17 \xFE35 \xFE37 \xFE39 \xFE3B \xFE3D \xFE3F \xFE41 \xFE43 \xFE47 \xFE59 \xFE5B \xFE5D \xFF08 \xFF1C \xFF3B \xFF5B \xFF5F \xFF62 ]> } grammar P6 is STD { ################### # Top-level rules # ################### # Note: we only check for the stopper. We don't check for ^ because # we might be embedded in something else. rule comp_unit { :my $*begin_compunit = 1; :my $*endargs = -1; :my %*LANG; :my $*PKGDECL ::= ""; :my $*IN_DECL = ''; :my $*HAS_SELF = ''; :my $*DECLARAND; :my $*OFTYPE; :my $*NEWPKG; :my $*NEWLEX; :my $*QSIGIL ::= ''; :my $*IN_META = ''; :my $*QUASIMODO; :my $*SCOPE = ""; :my $*LEFTSIGIL; :my $*PRECLIM; :my %*MYSTERY = (); :my $*INVOCANT_OK; :my $*INVOCANT_IS; :my $*CURLEX; :my $*MULTINESS = ''; :my $*SIGNUM = 0; :my $*MONKEY_TYPING = False; :my %*WORRIES; :my @*WORRIES; :my $*FATALS = 0; :my $*IN_SUPPOSE = False; :my $*CCSTATE = ''; :my $*CURPKG; { %*LANG
= ::STD::P6 ; %*LANG = ::STD::Q ; %*LANG = ::STD::Quasi ; %*LANG = ::STD::Regex ; %*LANG = ::STD5 ; %*LANG = ::STD5::Regex ; @*WORRIES = (); self.load_setting($*SETTINGNAME); my $oid = $*SETTING.id; my $id = 'MY:file<' ~ $*FILE ~ '>'; $*CURLEX = Stash.new( 'OUTER::' => [$oid], '!file' => $*FILE, '!line' => 0, '!id' => [$id], ); $ALL.{$id} = $*CURLEX; $*UNIT = $*CURLEX; $ALL. = $*UNIT; self.finishlex; # $¢ = self.cursor_fresh($*CURLEX<$?LANGNAME>); } <.unitstart> [ || <.panic: "Confused"> ] # "CHECK" time... $ = { $*CURLEX } { $¢.explain_mystery(); if @*WORRIES { note "Potential difficulties:\n " ~ join( "\n ", @*WORRIES) ~ "\n"; } die "Check failed\n" if $*FATALS; } } # Note: because of the possibility of placeholders we can't determine arity of # the block syntactically, so this must be determined via semantic analysis. # Also, pblocks used in an if/unless statement do not treat $_ as a placeholder, # while most other blocks treat $_ as equivalent to $^x. Therefore the first # possible place to check arity is not here but in the rule that calls this # rule. (Could also be done in a later pass.) token pblock () { :temp $*CURLEX; :dba('parameterized block') [ | '{' > || { if $*BORG and $*BORG. { if $*BORG. { my $m = "Function '" ~ $*BORG. ~ "' needs parens to avoid gobbling block" ~ $*BORG..locmess; $*BORG..panic($m ~ "\nMissing block (apparently gobbled by '" ~ $*BORG. ~ "')"); } else { my $m = "Expression needs parens to avoid gobbling block" ~ $*BORG..locmess; $*BORG..panic($m ~ "\nMissing block (apparently gobbled by expression)"); } } elsif %*MYSTERY { $¢.panic("Missing block (apparently gobbled by undeclared routine?)"); } else { $¢.panic("Missing block"); } } ] [ | <.newlex(1)> :my $*GOAL := '{'; <.getsig> | <.newlex(1)> <.getsig> ] } # this is a hook for subclasses token unitstart { } token lambda { '->' | '<->' } # Look for an expression followed by a required lambda. token xblock { :my $*GOAL ::= '{'; :my $*BORG = {}; <.ws> # XXX { $*BORG. //= $.cursor(self.pos) } <.ws> } token block () { :temp $*CURLEX; :dba('scoped block') [ || <.panic: "Missing block"> ] <.newlex> <.checkyada> } token blockoid { # encapsulate braided languages :temp %*LANG; :my $*SIGNUM; <.finishlex> [ | '{YOU_ARE_HERE}' <.you_are_here> | :dba('block') '{' ~ '}' :: <.curlycheck(1)> | <.panic: 'Missing block'> | <.panic: "Malformed block"> ] } token curlycheck($code) { [ || # (usual case without comments) { @*MEMOS[$¢.pos] = 2; } || > || <.unv> $$ { @*MEMOS[$¢.pos] = 2; } || <.unsp>? { @*MEMOS[$¢.pos] = $code; } ] } token regex_block { # encapsulate braided languages :temp %*LANG; :temp %*RX; :my $lang = %*LANG; :my $*GOAL ::= '}'; [ <.ws> { my $kv = $[*-1]; $lang = ($lang.tweak(|($kv..Str => $kv.)) or $lang.panic("Unrecognized adverb :" ~ $kv. ~ '(' ~ $kv. ~ ')')); } ]* [ | '{*}' $ = {1;} | [ '{' [ '}' || <.panic: "Unable to parse regex; couldn't find right brace"> ] ] ] <.curlycheck(1)> } # statement semantics rule statementlist { :my $*INVOCANT_OK = 0; :temp $*MONKEY_TYPING; :dba('statement list') '' [ | $ | > | [ +% ]* { self.mark_sinks($) } ] } # embedded semis, context-dependent semantics rule semilist { :my $*INVOCANT_OK = 0; :dba('semicolon list') '' [ | > | [ ]* ] } token label { :my $label; ':' <.ws> [ .Str) }> <.worry("Redeclaration of '$label'")> ]? # add label as a pseudo constant { $¢.add_constant($label,self.label_id); } } token statement { :my $*endargs = -1; :my $*QSIGIL ::= 0; > # this could either be a statement that follows a declaration # or a statement that is within the block of a code declaration .bless($¢); }> [ |