/** * Represents a parsing action; typically not created directly via `new`. */ export class Parser { /** * The parsing action. Takes a parsing Context and returns an ActionResult * representing success or failure. */ action: (context: Context) => ActionResult; /** * Creates a new custom parser that performs the given parsing action. */ constructor(action: (context: Context) => ActionResult) { this.action = action; } /** * Returns a parse result with either the value or error information. */ parse(input: string): ParseOK | ParseFail { const location = { index: 0, line: 1, column: 1 }; const context = new Context({ input, location }); const result = this.skip(eof).action(context); if (result.type === "ActionOK") { return { type: "ParseOK", value: result.value, }; } return { type: "ParseFail", location: result.furthest, expected: result.expected, }; } /** * Returns the parsed result or throws an error. */ tryParse(input: string): A { const result = this.parse(input); if (result.type === "ParseOK") { return result.value; } const { expected, location } = result; const { line, column } = location; const message = `parse error at line ${line} column ${column}: ` + `expected ${expected.join(", ")}`; throw new Error(message); } /** * Combines two parsers one after the other, yielding the results of both in * an array. */ and(parserB: Parser): Parser<[A, B]> { return new Parser((context) => { const a = this.action(context); if (a.type === "ActionFail") { return a; } context = context.moveTo(a.location); const b = context.merge(a, parserB.action(context)); if (b.type === "ActionOK") { const value: [A, B] = [a.value, b.value]; return context.merge(b, context.ok(b.location.index, value)); } return b; }); } /** Parse both and return the value of the first */ skip(parserB: Parser): Parser { return this.and(parserB).map(([a]) => a); } /** Parse both and return the value of the second */ next(parserB: Parser): Parser { return this.and(parserB).map(([, b]) => b); } /** * Try to parse using the current parser. If that fails, parse using the * second parser. */ or(parserB: Parser): Parser { return new Parser((context) => { const a = this.action(context); if (a.type === "ActionOK") { return a; } return context.merge(a, parserB.action(context)); }); } /** * Parse using the current parser. If it succeeds, pass the value to the * callback function, which returns the next parser to use. */ chain(fn: (value: A) => Parser): Parser { return new Parser((context) => { const a = this.action(context); if (a.type === "ActionFail") { return a; } const parserB = fn(a.value); context = context.moveTo(a.location); return context.merge(a, parserB.action(context)); }); } /** * Yields the value from the parser after being called with the callback. */ map(fn: (value: A) => B): Parser { return this.chain((a) => { return ok(fn(a)); }); } /** * Returns the callback called with the parser. */ thru(fn: (parser: this) => B): B { return fn(this); } /** * Returns a parser which parses the same value, but discards other error * messages, using the ones supplied instead. */ desc(expected: string[]): Parser { return new Parser((context) => { const result = this.action(context); if (result.type === "ActionOK") { return result; } return { type: "ActionFail", furthest: result.furthest, expected }; }); } /** * Wraps the current parser with before & after parsers. */ wrap(before: Parser, after: Parser): Parser { return before.next(this).skip(after); } /** * Ignores content before and after the current parser, based on the supplied * parser. */ trim(beforeAndAfter: Parser): Parser { return this.wrap(beforeAndAfter, beforeAndAfter); } /** * Repeats the current parser between min and max times, yielding the results * in an array. */ repeat(min = 0, max = Infinity): Parser { if (!isRangeValid(min, max)) { throw new Error(`repeat: bad range (${min} to ${max})`); } if (min === 0) { return this.repeat(1, max).or(ok([])); } return new Parser((context) => { const items: A[] = []; let result = this.action(context); if (result.type === "ActionFail") { return result; } while (result.type === "ActionOK" && items.length < max) { items.push(result.value); if (result.location.index === context.location.index) { throw new Error( "infinite loop detected; don't call .repeat() with parsers that can accept zero characters" ); } context = context.moveTo(result.location); result = context.merge(result, this.action(context)); } if (result.type === "ActionFail" && items.length < min) { return result; } return context.merge(result, context.ok(context.location.index, items)); }); } /** * Returns a parser that parses between min and max times, separated by the separator * parser supplied. */ sepBy(separator: Parser, min = 0, max = Infinity): Parser { if (!isRangeValid(min, max)) { throw new Error(`sepBy: bad range (${min} to ${max})`); } if (min === 0) { return this.sepBy(separator, 1, max).or(ok([])); } // We also know that min=1 due to previous checks, so we can skip the call // to `repeat` here if (max === 1) { return this.map((x) => [x]); } return this.chain((first) => { return separator .next(this) .repeat(min - 1, max - 1) .map((rest) => { return [first, ...rest]; }); }); } /** * Returns a parser that adds name and start/end location metadata. */ node(name: S): Parser> { return all(location, this, location).map(([start, value, end]) => { const type = "ParseNode"; return { type, name, value, start, end } as const; }); } } function isRangeValid(min: number, max: number): boolean { return ( min <= max && min >= 0 && max >= 0 && Number.isInteger(min) && min !== Infinity && (Number.isInteger(max) || max === Infinity) ); } /** * Result type from `node`. See `node` for more details. */ export interface ParseNode { type: "ParseNode"; name: S; value: A; start: SourceLocation; end: SourceLocation; } /** * Parser that yields the current `SourceLocation`, containing properties * `index`, `line` and `column`. */ export const location = new Parser((context) => { return context.ok(context.location.index, context.location); }); /** * Returns a parser that yields the given value and consumes no input. */ export function ok(value: A): Parser { return new Parser((context) => { return context.ok(context.location.index, value); }); } /** * Returns a parser that fails with the given messages and consumes no input. */ export function fail(expected: string[]): Parser { return new Parser((context) => { return context.fail(context.location.index, expected); }); } /** * This parser succeeds if the input has already been fully parsed. */ export const eof = new Parser<"">((context) => { if (context.location.index < context.input.length) { return context.fail(context.location.index, [""]); } return context.ok(context.location.index, ""); }); /** Returns a parser that matches the exact text supplied. */ export function text(string: A): Parser { return new Parser((context) => { const start = context.location.index; const end = start + string.length; if (context.input.slice(start, end) === string) { return context.ok(end, string); } return context.fail(start, [string]); }); } /** * Returns a parser that matches the entire regular expression at the current * parser position. */ export function match(regexp: RegExp): Parser { for (const flag of regexp.flags) { switch (flag) { case "i": // ignoreCase case "s": // dotAll case "m": // multiline case "u": // unicode continue; default: throw new Error("only the regexp flags 'imsu' are supported"); } } const sticky = new RegExp(regexp.source, regexp.flags + "y"); return new Parser((context) => { const start = context.location.index; sticky.lastIndex = start; const match = context.input.match(sticky); if (match) { const end = start + match[0].length; const string = context.input.slice(start, end); return context.ok(end, string); } return context.fail(start, [String(regexp)]); }); } /** A tuple of parsers */ // eslint-disable-next-line @typescript-eslint/no-explicit-any type ManyParsers = { [P in keyof A]: Parser; }; /** Parse all items, returning their values in the same order. */ // eslint-disable-next-line @typescript-eslint/no-explicit-any export function all(...parsers: ManyParsers): Parser { // TODO: This could be optimized with a custom parser, but I should probably add // benchmarking first to see if it really matters enough to rewrite it return parsers.reduce((acc, p) => { return acc.chain((array) => { return p.map((value) => { return [...array, value]; }); }); }, ok([])); } /** Parse using the parsers given, returning the first one that succeeds. */ export function choice[]>( ...parsers: Parsers ): Parser> { // TODO: This could be optimized with a custom parser, but I should probably add // benchmarking first to see if it really matters enough to rewrite it return parsers.reduce((acc, p) => { return acc.or(p); }); } /** * Takes a lazily invoked callback that returns a parser, so you can create * recursive parsers. */ export function lazy(fn: () => Parser): Parser { // NOTE: This parsing action overwrites itself on the specified parser. We're // assuming that the same parser won't be returned to multiple `lazy` calls. I // never heard of such a thing happening in Parsimmon, and it doesn't seem // likely to happen here either. I assume this is faster than using variable // closure and an `if`-statement here, but I honestly don't know. const parser: Parser = new Parser((context) => { parser.action = fn().action; return parser.action(context); }); return parser; } /** * Represents a location in the input (source code). Keeps track of `index` (for * use with `.slice` and such), as well as `line` and `column` for displaying to * users. */ export interface SourceLocation { /** The string index into the input (e.g. for use with `.slice`) */ index: number; /** * The line number for error reporting. Only the character `\n` is used to * signify the beginning of a new line. */ line: number; /** * The column number for error reporting. */ column: number; } /** * Represents the result of a parser's action callback. */ export type ActionResult = ActionOK | ActionFail; /** * Represents a successful result from a parser's action callback. This is made * automatically by calling `context.ok`. Make sure to use `context.merge` * when writing a custom parser that executes multiple parser actions. */ export interface ActionOK { type: "ActionOK"; location: SourceLocation; value: A; furthest: SourceLocation; expected: string[]; } /** * Represents a successful result from a parser's action callback. This is made * automatically by calling `context.ok`. Make sure to use `context.merge` * when writing a custom parser that executes multiple parser actions. */ export interface ActionFail { type: "ActionFail"; furthest: SourceLocation; expected: string[]; } function union(a: string[], b: string[]): string[] { return [...new Set([...a, ...b])]; } /** * Represents the current parsing context. */ class Context { /** the string being parsed */ input: string; /** the current parse location */ location: SourceLocation; constructor(options: { input: string; location: SourceLocation }) { this.input = options.input; this.location = options.location; } /** * Returns a new context with the supplied location and the current input. */ moveTo(location: SourceLocation): Context { return new Context({ input: this.input, location, }); } private _internal_move(index: number): SourceLocation { if (index === this.location.index) { return this.location; } const start = this.location.index; const end = index; const chunk = this.input.slice(start, end); let { line, column } = this.location; for (const ch of chunk) { if (ch === "\n") { line++; column = 1; } else { column++; } } return { index, line, column }; } /** * Represents a successful parse ending before the given `index`, with the * specified `value`. */ ok(index: number, value: A): ActionResult { return { type: "ActionOK", value, location: this._internal_move(index), furthest: { index: -1, line: -1, column: -1 }, expected: [], }; } /** * Represents a failed parse starting at the given `index`, with the specified * list `expected` messages (note: this list usually only has one item). */ fail(index: number, expected: string[]): ActionResult { return { type: "ActionFail", furthest: this._internal_move(index), expected, }; } /** * Merge two sequential `ActionResult`s so that the `expected` and location data * is preserved correctly. */ merge(a: ActionResult, b: ActionResult): ActionResult { if (b.furthest.index > a.furthest.index) { return b; } const expected = b.furthest.index === a.furthest.index ? union(a.expected, b.expected) : a.expected; if (b.type === "ActionOK") { return { type: "ActionOK", location: b.location, value: b.value, furthest: a.furthest, expected, }; } return { type: "ActionFail", furthest: a.furthest, expected, }; } } /** * Represents a successful parse result. */ export interface ParseOK { type: "ParseOK"; /** The parsed value */ value: A; } /** * Represents a failed parse result, where it failed, and what types of * values were expected at the point of failure. */ export interface ParseFail { type: "ParseFail"; /** The input location where the parse failed */ location: SourceLocation; /** List of expected values at the location the parse failed */ expected: string[]; }