// This file contains JavaScript code inside { curly braces }. // This file contains Bikeshed markup inside /* comments */. // This file contains PEG.js grammar rules which are converted to // railroad diagrams in the spec and executable JavaScript. { var base = options.base || {scheme: 'about'}; var url = options.url; var encodingOverride = options.encodingOverride || 'utf-8'; /* This function accepts a variable number of arguments. It copies the 'exception' property from the first object which defines such to the first object. It then returns the first object */ function copy(base) { var result = base; if (!result.exception) { for (var i = 1; iURLs, each returning a set of components, namely one or more of the following: scheme, scheme-data, username, password, host, port, path, query, and fragment. Initialize $result to the value of @FileUrl, @NonRelativeUrl, or @RelativeUrl depending on which one first matches the input, and then modify $result as follows: * If @Query is present in the input, set $result.query to this value. * If @Fragment is present in the input, set $result.fragment to this value. * If $result.scheme has a default port, and if $result.port is equal to that default, then delete the $port property from $result. Return $result. */ Url = base:(FileUrl / NonRelativeUrl / RelativeUrl) query:('?' Query)? fragment:('#' Fragment)? { var result = copy(base, query && query[1], fragment && fragment[1]); if (query) { result.query = query[1] }; if (fragment) { result.fragment = fragment[1].toString() }; if (Url.DEFAULT_PORT[result.scheme] == result.port) { delete result.port; } return result } /* Three production rules are defined for files, numbered from top to bottom. Examples and evaluation instructions for each:
  1. file:c:\foo\bar.html
    Initialize $result to an empty object, and then modify it as follows: * Set $result.scheme to the value returned by @FileLikeScheme. * Set $result.path to the value returned by @Path. * Remove the first element from $result.path if it is an empty string and if there is a second element which has a non-empty value. * Construct a string using the alphabetic character following the first ":" in the input concatenated with a ":". Prepend this string to $result.path.
  2. /C|\foo\bar
    Initialize $result to an empty object, and then modify it as follows: * Set $result.scheme to the value returned by @FileLikeScheme * If the @Host is present in the input, set $result.host to the value returned by the @Host production rule * If the @Host is not present and no slashes precede the @Path in the input, then the $base.path minus the last element is prepended to the $result.path. * Set $result.path to the value returned by @Path.
  3. file:/example.com/
    Indicate a conformance error. Initialize $result to an empty object, and then modify it as follows: * Set $result.scheme to "file". * Set $result.path to the value returned by @Path. * Remove the first element from $result.path if it is an empty string and if there is a second element which has a non-empty value. * Construct a string consisting of the character following the initial "/" (if any) in the production rule concatenated with a ":". Prepend this string to the $result.path array.
Return $result. Note: at the present time, file URLs are generally not interoperable, and therefore are effectively implementation defined. Furthermore, the parsing rules in this section have not enjoyed wide review, and therefore are more likely to be subject to change than other parts of this specification. People with input on this matter are encourage to add comments to bug 23550. */ FileUrl = scheme:FileLikeScheme ':' drive:[a-zA-Z] [:|] [\\/]? path:Path { var result = copy({path: path}, path); result.scheme = scheme; if (result.path[0] == '' && result.path[1] != '') result.path.shift(); result.path.unshift(drive+':'); return result } / '/'* drive:[a-zA-Z] '|' '/'? path:Path { var result = copy({path: path}, path); result.exception = 'Legacy compatibility issue'; result.scheme = 'file'; if (result.path[0] == '' && result.path[1] != '') result.path.shift(); result.path.unshift(drive+':'); return result } / scheme:FileLikeScheme ':' host:('/' '/' Host)? slash:'/'* path:Path { var result = copy({path: path}, path); if (host) { result.host = host[2]; } else if (slash.length == 0) { var path = base.path.slice(0, -1); path.push.apply(path, result.path); result.path = path } result.scheme = scheme; return result } /*
javascript:alert("Hello, world!");
  • This rule is only to be evaluated if the value of @Scheme does not match any relative scheme. Set encoding override to "utf-8". Initialize $result to be a JSON object with $scheme set to be the result returned by @Scheme, and $schemeData set to the result returned by @SchemeData. Return $result. Note: the resolution of bug 26338 may change how encoding override is handled. Note: the resolution of bug 27233 may add support for relative URLs for unknown schemes. */ NonRelativeUrl = scheme:Scheme ':' &{ return Url.RELATIVE_SCHEME.indexOf(scheme) == -1 } data:SchemeData { encodingOverride = 'utf-8'; return copy({scheme: scheme, scheme_data: data}, data); } /* Four production rules are defined for relative URLs, numbered from top to bottom. Examples and evaluation instructions for each:
    1. http://user:pass@example.org:21/foo/bar
      If anything other than two forward solidus characters ("//") immediately follows the first colon in the input, indicate a conformance error. Initialize $result to the value returned by @Authority. Modify $result as follows: * If @RelativeScheme is present in the input, then set $result.scheme to this value. * If @RelativeScheme is not present in the input, then set $result.scheme to the value of $base.scheme. * If @Path is present in the input, set $result.path to its value.
    2. This rule is only to be evaluated if the value of @Scheme does not match $base.scheme.

      ftp:/example.com/ parsed using a base of http://example.org/foo/bar

      Indicate a conformance error. Initialize $result to the value returned by @Authority. Modify $result as follows: * Set $result.scheme to the value returned by @RelativeScheme. * if $result.host is either an empty string or contains a colon, then terminate parsing with a parse exception. * If @Path is present in the input, set $result.path to its value.
    3. http:foo/bar
      Indicate a conformance error. Initialize $result to be an empty object. Modify $result as follows: * Set $result.scheme to the value returned by @RelativeScheme. * Set $result.scheme to the value returned by @Scheme. * Set $result.host to $base.host * Set $result.path by the path concatenation of $base.path and @Path.
    4. /foo/bar
      Initialize $result to be an empty object. Modify $result as follows: * Set $result.scheme to $base.scheme. * Set $result.host to $base.host. * Set $result.path to @Path * Replace $result.path by the path concatenation of $base.path and $result.Path.
    Return $result. */ RelativeUrl = scheme:(RelativeScheme ':')? slash1:[/\\] slash2:[/\\] authority:Authority path:([/\\] Path)? { result = copy(authority, path && path[1]); if (path) result.path = path[1]; if (scheme) { result.scheme = scheme[0]; } else { result.scheme = base.scheme; } if (slash1 == '\\' || slash2 == '\\') { result.exception = 'Backslash ("\\") used as a delimiter' } else if (path && path[0] == '\\') { result.exception = 'Backslash ("\\") used as a delimiter' } return result } / scheme:RelativeScheme &{ return base.scheme != scheme } ':' slash1:[\\/]? authority:Authority path:([/\\] Path)? { result = copy(authority, path && path[1]); if (path) result.path = path[1]; result.exception = 'Expected a slash ("/")'; result.scheme = scheme.toLowerCase(); if (!result.host || result.host == '') error('Empty host'); if (result.host.indexOf(':') != -1) error('Invalid host'); if (slash1 == '\\') { result.exception = 'Backslash ("\\") used as a delimiter' } else if (path && path[0] == '\\') { result.exception = 'Backslash ("\\") used as a delimiter' } return result } / scheme:RelativeScheme ':' path:Path { var result = copy({path: path}, path); result.exception = 'Expected a slash ("/")'; result.scheme = scheme; result.host = base.host; result.path = Url.pathConcat(base.path, result.path) return result } / path:Path { if (Url.RELATIVE_SCHEME.indexOf(base.scheme) == -1) { error("relative URL provided with a non-relative base") }; var result = copy({path: path}, path); result.scheme = base.scheme; result.host = base.host; result.path = Url.pathConcat(base.path, result.path) return result } /* Only one "file like" relative scheme is defined at this time: "file". This scheme is to be matched case insensitively. This production rule is to return the scheme value, lowercased. */ FileLikeScheme = scheme:"file"i { return scheme.toLowerCase() } /* Six relative schemes are defined. They are to be matched against the input in a case insensitive manner. Set encoding override to "utf-8" if the scheme matches "wss" or "ws". Return the scheme as a lowercased string. Note: the resolution of bug 26338 may change how encoding override is handled. */ RelativeScheme = "ftp"i / "gopher"i / "https"i / "http"i / rs:"wss"i { encodingOverride = 'utf-8'; return rs } / rs:"ws"i { encodingOverride = 'utf-8'; return rs } /* A scheme consists of the alphabetic letters "a" through "z" or "A" through "Z", followed by zero or more alphabetic characters or any of the following special characters: hyphen-minus (U+002D), plus sign (U+002B) or full stop (U+002D). Return the results as a lowercased string. */ Scheme = scheme:([a-zA-Z] [-a-zA-Z+.]*) { return (scheme[0] + scheme[1].join('')).toLowerCase() } /* Initialize $result to an empty object, then modify it as follows: * If @User is present, set $result.username to its value. * If @Password is present, set $result.password to its value. * Set $result.host to the value returned by @Host up to the first "@" sign, if any. If no "@" signs are present in the return value from the @Host production, then set $result.host to the entire value. * If one or more "@" signs are present in the value returned by the @Host production, then perform the following steps: * Indicate a conformance error. * Initialize $info to the value of '%40' plus the remainder of the @Host after the first "@" sign. Replace all remaining "@" signs in $info, with the string "%40". * If @Password is present in the input, append $info to $result.password. * If @Password is not present in input and @User is present, append $info to $result.username. * If @User is not present in input, set $result.username to $info. * If @Port is present, set $result.port to its value. Return $result. */ Authority = userpass:( User (':' Password)? '@' )? host:Host port:(':' Port)? { result = copy({}, host, userpass && userpass[0], userpass && userpass[1] && userpass[1][1], port && port[1]); if (userpass) { result.username = userpass[0]; if (userpass[1]) result.password = userpass[1][1]; } host = host.split('@'); result.host = host.pop(); if (host.length > 0) { result.exception = 'At sign ("@") in user or password needs to be percent encoded'; var info = '%40' + host.join('%40'); if (result.password != null) { result.password += info } else { result.username += info } }; if (result.username != null && result.host == '') { error('Empty host'); }; if (port) result.port = port[1]; return result; } /* Consume all characters until either a solidus (U+002F), a reverse solidus (U+005C), a question mark (U+003F), a number sign (U+0023), a commercial at (U+0040), a colon (U+003A), or the end of string is encountered. Return the cleansed result using the default encode set. */ User = user:[^/\\?#@:]* { return cleanse(user, Url.DEFAULT_ENCODE_SET, 'user') } /* Consume all characters until either a solidus (U+002F), a reverse solidus (U+005C), a question mark (U+003F), a number sign (U+0023), a commercial at (U+0040), or the end of string is encountered. Return the cleansed result using the default encode set. */ Password = password:[^/\\?#@]* { return cleanse(password, Url.DEFAULT_ENCODE_SET, 'password') } /* If the input contains an @IPv6Addr, return "[" plus the result returned by @IPv6Addr plus "]". If the input contains an @IPv4Addr, return the result returned by @IPv4Addr. Otherwise: * If any U+0009, U+000A, U+000D, U+200B, U+2060, or U+FEFF characters are present in the input, remove those characters and indicate a conformance error. * Let $domain be the result of host parsing the value. If this results in a failure, terminate processing with a parse exception. If host parsing returned a value that was different than what was provided as input, indicate a conformance error. * Try parsing $domain as an @IPv4Addr. If this succeeds, replace $domain with the result. * Validate the $domain as follows: * split the string at U+002E (full stop) characters * If any of the pieces, other than the first one, are empty strings, indicate a conformance error. * Return $domain. Note: the resolution of bug 25334 may change what codepoints are allowed in a domain. Note: the resolution of bug 27266 may change the way domain names and trailing dots are handled. */ Host = '[' addr:IPv6Addr ']' &{ return lookahead(/^([\\\/?#:]|$)/) } { return '[' + addr + ']' } / addr:IPv4Addr &{ return lookahead(/^([\\\/?#:]|$)/) } { return addr } / host:[^:/\\?#]* { var warn = null; for (var i=0; i -1) { before.splice(index, 1); } else { warn = "Domain name contains an IDNA mapped character"; } }); } /* If the result can be parsed as an IPv4 address, return that instead */ try { return UrlParser.parse(host, {startRule: 'IPv4Addr'}); } catch (e) { } /* warn if NFC normalization changed the URL */ before = host; host = host.normalize('NFC'); if (host != before) { warn = "Domain name contains an non-NFC normalized character"; } if (/[\u0000\u0009\u000A\u000D\u0020#%\/:?\[\\\]]/.test(host)) { var c = host.match(/[\u0000\u0009\u000A\u000D\u0020#%\/:?\[\\\]]/)[0]; error('Invalid domain character U+' + ("000" + c.charCodeAt(0).toString(16)).slice(-4).toUpperCase()); } host = host.split('.'); for (var i=0; iparse exception and terminate processing unless there are exactly six @H16 values and one @LS32 value. * If there are consecutive colon characters present in the input, indicate a parse exception and terminate processing if the total number of values (@H16 or @LS32) is more than six. * Unless there is a @LS32 value present, indicate a parse exception and terminate processing if consecutive colon characters are present in the input or if there are more than one @LS32 value after the consecutive colons. Perform the following steps: * Append "0" values to $pre while the sum of the lengths of the $pre and $post arrays is less than six. * Append a "0" value to $pre if no @LS32 item is present in the input and the sum of the lengths of the $pre and $post array is seven. * Append $last to $pre. Return the ipv6 serialized value of $pre as a string. Note: the resolution of bug 27234 may add support for link-local addresses. */ IPv6Addr = addr:(((H16 ':')* ':')? (H16 ':')* (H16 / LS32)) { var pre = []; var post = []; var ipv4 = null; if (addr[0]) { for (var i=0; i 6) { error('malformed IPv6 Address') } } else { if (addr[1].length != 6 || addr[2].indexOf('.')==-1) { error('malformed IPv6 Address') } }; for (var i=0; i 1) { error('malformed IPv6 Address') }; if (addr[2].indexOf('.') == -1) { post.push(addr[2]) } else { ipv4 = addr[2] }; return Url.canonicalizeIpv6(pre, post, ipv4) } /* If any but the last @Number is greater or equal to 256, terminate processing with a parse exception. If the last @Number is greater than or equal to 256 to the power of (5 minus the number of @Number's present in the input), terminate processing with a parse exception. Unless four @Number's are present, indicate a conformance error. Initialize $n to the last @Number. If the first @Number is present, add it's value times 256**3 to $n. If the second @Number is present, add it's value times 256**2 to $n. If the third @Number is present, add it's value times 256 to $n. Initialize $result to an empty array. Four times do the following: * Prepend the value of $n modulo 256 to $result. * Set $n to the value of the integer quotient of $n divided by 256. Join the values in $result with a Full Stop (U+002E) character, and return the results as a string. Note: the resolution of bug 26431 may change this definition. */ IPv4Addr = addr:((Number '.' (Number '.' (Number '.')?)?)? Number) { var n = addr[1]; var warn = addr[1].exception; if (addr[0]) { if (addr[0][0] >= 256) error('IPv4 address component out of range'); if (addr[0][0].exception) warn = addr[0][0].exception; n += addr[0][0]*256*256*256; if (addr[0][2]) { if (addr[0][2][0] >= 256) error('IPv4 address component out of range'); if (addr[0][2][0].exception) warn = addr[0][2][0].exception; n += addr[0][2][0]*256*256; if (addr[0][2][2]) { if (addr[0][2][2][0] >= 256) error('IPv4 address component out of range'); if (addr[0][2][2][0].exception) warn = addr[0][2][2][0].exception; n += addr[0][2][2][0]*256; if (addr[1] >= 256) error('IPv4 address component out of range'); } else { if (addr[1] >= 256*256) error('IPv4 address component out of range'); warn = 'Missing IPv4 component'; } } else { if (addr[1] >= 256*256*256) error('IPv4 address component out of range'); warn = 'Missing IPv4 component'; } } else { if (addr[1] >= 256*256*256*256) error('IPv4 address component out of range'); warn = 'Missing IPv4 component'; } addr = [] for (var i=0; i<4; i++) { addr.unshift(n % 256); n = Math.floor(n/256) }; addr = addr.join('.') if (warn) { addr = new String(addr); addr.exception = warn; } return addr; } /* Three production rules, with uppercase and percent encoded variants, are defined for numbers. Parse the values as hexadecimal, octal, and decimal integers respectively. Indicate a conformance error if the value is hexadecimal or octal. Return the result as an integer. */ Number = '0' ('x' / 'X') digits:[0-9a-fA-F]+ { var result = parseInt(digits.join(''), 16); result.exception = 'Hexadecimal IPV4 component'; return result } / '0' digits:[0-7]+ { var result = new Number(parseInt(digits.join(''), 8)); result.exception = 'Octal IPV4 component'; return result } / digits:[0-9]+ { return new Number(parseInt(digits.join(''))) } /* Return up to four hexadecimal characters as a string. */ H16 = a:[0-9A-Fa-f] b:[0-9A-Fa-f]? c:[0-9A-Fa-f]? d:[0-9A-Fa-f]? { return a + (b ? b : '') + (c ? c : '') + (d ? d : '') } /* Return four decimal bytes separated by full stop characters as a string. */ LS32 = a:DecimalByte '.' b:DecimalByte '.' d:DecimalByte '.' d:DecimalByte { return a.join('') + '.' + b.join('') + '.' + c.join('') + '.' + d.join('') } /* Decimal bytes are a string of up to three decimal digits. If the results converted to an integer are greater than 255, terminate processing with a parse exception. */ DecimalByte = a:[0-2]? b:[0-9]? c:[0-9] { return (a ? a : '') + (b ? b : '') + c } /* Consume all characters until either a solidus (U+002F), a reverse solidus (U+005C), a question mark (U+003F), or the end of string is encountered. Cleanse result using null as the encode set. Remove leading U+0030 code points from result until either the leading code point is not U+0030 or result is one code point. If any characters that remain are not decimal digits: * If $input was not set, terminate processing with a parse exception. * Truncate $result starting with the first non-digit character. * Indicate a conformance error. Return the result as a string. Note: the resolution of bug 26446 may change port from a string to a number. */ Port = port:[^/\\?#]* { port = cleanse(port, null, 'port'); var warn = port.exception; port = port.replace(/^0+(\d)/, '$1'); if (!/^\d*$/.test(port)) { if (url) { warn = 'Invalid port number'; port = port.replace(/\D.*/, '') } else { error('Invalid port number'); } } if (warn) { port = new String(port); port.exception = warn }; return port } /* If any of the path separators are a reverse solidus ("\"), indicate a conformance error. Extract all the pathnames into an array. Process each name as follows: * Cleanse the name using the default encode set as the encode set. * If the name is "." or "%2e" (case insensitive), then process this name based on the position in the array: * If the position is other than the last, remove the name from the list. * If the array is of length 1, replace the entry with an empty string. * Otherwise, leave the entry as is. * If the name is "..", ".%2e", "%2e.", or "%2e%2e" (all to be compared in a case insensitive manner), then process this name based on the position in the array: * If the position is the first, then remove it. * If the position is other than the last, then remove it and the one before it. * If the position is the last, then remove it and the one before it, then append an empty string. Return the array. Note: the resolution of bug 24163 may change what characters to escape in the path. */ Path = path:([^/\\?#]* [/\\])* basename:[^/\\?#]* { var warn = null; path.push([basename]); for (var i=0; icleansed result using null as the encode set. Note: the resolution of bug 24246 may change what characters to escape in the scheme data. */ SchemeData = data:[^?#]* { return cleanse(data, null, 'scheme data'); } /* Consume all characters until either a number sign (U+0023) or the end of string is encountered. Return the cleansed result using the the result using the query encode set. The query encode set is defined to be bytes that are less than 0x21, greater than 0x7E, or one of 0x22, 0x23, 0x3C, 0x3E, and 0x60. Note: the resolution of bug 27280 may change how code points < 0x20 are handled. */ Query = query:[^#]* { return cleanse(query, Url.QUERY_ENCODE_SET, 'query') } /* Consume all remaining characters in the input. Return the cleansed result using the simple encode set. Note: the resolution of bug 27252 may change what characters to escape in the fragment. Note: the resolution of bug 26988 may add support for parsing URLs without decoding the fragment identifier. Setter Rules {#setter-rules} ============= */ Fragment = fragment:.* { return cleanse(fragment, Url.SIMPLE_ENCODE_SET, 'fragment') } /* Set $url.scheme to value returned by @Scheme. */ setProtocol = scheme:Scheme ':'? .* { url._scheme = scheme } /* If $url.scheme_data is not null, return. Set $url.username to the percent encoded value using the username encode set. */ setUsername = user:(.*) { if (url._scheme_data == null) { url._username = Url.percentEncode(user.join(''), Url.USERNAME_ENCODE_SET) } } /* If $url.scheme_data is not null, return. Set $url.password to the percent encoded value using the password encode set. */ setPassword = password:(.*) { if (url._scheme_data == null) { url._password = Url.percentEncode(password.join(''), Url.PASSWORD_ENCODE_SET) } } /* If $url.scheme_data is not null, return. Set $url.host to the value returned by @Host. If @Port is present, set $result.port to its value. */ setHost = host:Host port:(':' Port)? ([/\\?#]? (.*))? { if (url._scheme_data == null) { url._host = host if (port) url._port = port[1] } } /* If $url.scheme_data is not null, return. Set $url.host to the value returned by @Host. */ setHostname = host:Host [:/\\?#]? (.*) { if (url._scheme_data == null) { url._host = host } } /* If $url.scheme_data is not null or $url.scheme is "file", return. Set $url.port to the value returned by @Port. */ setPort = port:Port [/\\?#]? (.*) { if (url._scheme_data == null && url._scheme != 'file') { url._port = port; } } /* If $url.scheme_data is not null, return. Set $url.path to the value returned by @Path. */ setPathname = [/\\]? path:Path [/\\?#]? (.*) { if (url._scheme_data == null) { url._path = path } } /* Set $url.query to the percent encoded value after the initial question mark (U+003F), if any, using the query encode set. */ setSearch = '?'? query:(.*) { url._query = Url.percentEncode(query.join(''), Url.QUERY_ENCODE_SET) } /* Set $url.fragment to the percent encoded value after the initial number sign (U+0023), if any, using the simple encode set */ setHash = '#'? fragment:(.*) { url._fragment = Url.percentEncode(fragment.join(''), Url.SIMPLE_ENCODE_SET) }