// Token taxonomy for Flash. The set covers the whole v1 surface and no more — // module imports and declarations, the control-flow / error / comptime // keywords, the operator and compound-assignment families, the four literal // forms, and the comment and doc-comment trivia. It is the single source of // truth the lexer and parser fan out from; new syntax adds a variant here first. use "support" as sup pub const Kind = enum { // literals + names ident, int, float, // decimal float literal, e.g. 3.14 or 1.5e-3; value passes through to Zig verbatim string, multiline_str, // a `\\…` raw multiline-string line (one per physical line) char, // 'c' builtin, // #name(...) doc_comment, // `///…` doc-comment line (content-bearing; kept and re-emitted) line_comment, // `//…` line comment in any non-doc shape (`//`, `////`, `//!`); kept for the formatter, filtered out before the parse // keywords kw_use, kw_as, kw_link, kw_fn, kw_export, kw_extern, kw_callconv, kw_align, kw_linksection, kw_pub, kw_inline, kw_comptime, kw_const, kw_var, kw_orelse, kw_if, kw_else, kw_while, kw_for, kw_in, kw_break, kw_continue, kw_return, kw_try, kw_catch, kw_defer, kw_errdefer, kw_packed, // the layout modifier on a struct definition (`packed struct`) kw_struct, kw_enum, kw_union, kw_switch, kw_asm, kw_error, kw_test, // `test "name" { … }` — a top-level test-block declaration // value keywords — the reserved literal words. They parse only in value // position (parsePrimary), never as a bindable identifier, so `true`, // `false`, `null`, `undefined`, and `unreachable` cannot be shadowed by a // name and lower to the identical Zig keyword. kw_true, kw_false, kw_null, kw_undefined, kw_unreachable, // primitive-type keywords — reserved type-position words. Each names a Zig // primitive but, being reserved, cannot be shadowed by a binding: `noreturn` // (the empty return type), `anytype` (an inferred parameter type), and // `anyopaque` (an incomplete pointee type). All three lower verbatim. kw_noreturn, kw_anytype, kw_anyopaque, // punctuation l_paren, r_paren, l_brace, r_brace, l_bracket, r_bracket, comma, colon, colon_equal, // := dot, equal, arrow, // -> fat_arrow, // => — a switch prong separator star, // * underscore, // a lone _ // operators plus, // + plus_plus, // ++ — array / slice concatenation star_star, // ** — array repetition plus_percent, // +% — wrapping addition minus, // - minus_percent, // -% — wrapping subtraction slash, // / percent, // % star_percent, // *% — wrapping multiplication (the base `*` is `star`, in punctuation) eq_eq, // == bang_eq, // != lt, // < lt_eq, // <= lt_lt, // << gt, // > gt_eq, // >= gt_gt, // >> amp, // & amp_amp, // && pipe, // | pipe_pipe, // || caret, // ^ tilde, // ~ bang, // ! question, // ? dot_dot, // .. ellipsis3, // ... — an inclusive switch range (lo...hi) // compound assignment plus_eq, // += minus_eq, // -= star_eq, // *= slash_eq, // /= percent_eq, // %= amp_eq, // &= pipe_eq, // |= caret_eq, // ^= lt_lt_eq, // <<= gt_gt_eq, // >>= plus_percent_eq, // +%= — wrapping add-assign minus_percent_eq, // -%= — wrapping subtract-assign star_percent_eq, // *%= — wrapping multiply-assign // trivia / control eof, invalid, } pub const Token = struct { kind Kind, // Byte range into the source buffer, [start, end). Keeping spans instead // of copied slices means the lexer allocates nothing and every token can // point back at the original text for diagnostics. start u32, end u32, line u32, pub fn lexeme(self Token, src []u8) []u8 { return src[self.start..self.end] } } // The keyword table, as a flat linear scan. The set is small (43 words) and // frozen with the v1 grammar, and `eql` rejects on length before it compares // bytes, so the scan is cheap where it matters — identifiers that are not // keywords fall through on length alone almost every time. A flat chain is // also the shape the formatter lays out best; a comptime string map would buy // nothing at this size. pub const keywords = struct { pub fn get(text []u8) ?Kind { if sup.eql(u8, text, "use") { return .kw_use } if sup.eql(u8, text, "as") { return .kw_as } if sup.eql(u8, text, "link") { return .kw_link } if sup.eql(u8, text, "fn") { return .kw_fn } if sup.eql(u8, text, "export") { return .kw_export } if sup.eql(u8, text, "extern") { return .kw_extern } if sup.eql(u8, text, "callconv") { return .kw_callconv } if sup.eql(u8, text, "align") { return .kw_align } if sup.eql(u8, text, "linksection") { return .kw_linksection } if sup.eql(u8, text, "pub") { return .kw_pub } if sup.eql(u8, text, "inline") { return .kw_inline } if sup.eql(u8, text, "comptime") { return .kw_comptime } if sup.eql(u8, text, "const") { return .kw_const } if sup.eql(u8, text, "var") { return .kw_var } if sup.eql(u8, text, "orelse") { return .kw_orelse } if sup.eql(u8, text, "if") { return .kw_if } if sup.eql(u8, text, "else") { return .kw_else } if sup.eql(u8, text, "while") { return .kw_while } if sup.eql(u8, text, "for") { return .kw_for } if sup.eql(u8, text, "in") { return .kw_in } if sup.eql(u8, text, "break") { return .kw_break } if sup.eql(u8, text, "continue") { return .kw_continue } if sup.eql(u8, text, "return") { return .kw_return } if sup.eql(u8, text, "try") { return .kw_try } if sup.eql(u8, text, "catch") { return .kw_catch } if sup.eql(u8, text, "defer") { return .kw_defer } if sup.eql(u8, text, "errdefer") { return .kw_errdefer } if sup.eql(u8, text, "packed") { return .kw_packed } if sup.eql(u8, text, "struct") { return .kw_struct } if sup.eql(u8, text, "enum") { return .kw_enum } if sup.eql(u8, text, "union") { return .kw_union } if sup.eql(u8, text, "switch") { return .kw_switch } if sup.eql(u8, text, "asm") { return .kw_asm } if sup.eql(u8, text, "error") { return .kw_error } if sup.eql(u8, text, "test") { return .kw_test } if sup.eql(u8, text, "true") { return .kw_true } if sup.eql(u8, text, "false") { return .kw_false } if sup.eql(u8, text, "null") { return .kw_null } if sup.eql(u8, text, "undefined") { return .kw_undefined } if sup.eql(u8, text, "unreachable") { return .kw_unreachable } if sup.eql(u8, text, "noreturn") { return .kw_noreturn } if sup.eql(u8, text, "anytype") { return .kw_anytype } if sup.eql(u8, text, "anyopaque") { return .kw_anyopaque } return null } } test "every keyword maps to its kind" { try sup.expectEqual(Kind.kw_use, keywords.get("use")) try sup.expectEqual(Kind.kw_as, keywords.get("as")) try sup.expectEqual(Kind.kw_link, keywords.get("link")) try sup.expectEqual(Kind.kw_fn, keywords.get("fn")) try sup.expectEqual(Kind.kw_export, keywords.get("export")) try sup.expectEqual(Kind.kw_extern, keywords.get("extern")) try sup.expectEqual(Kind.kw_callconv, keywords.get("callconv")) try sup.expectEqual(Kind.kw_align, keywords.get("align")) try sup.expectEqual(Kind.kw_linksection, keywords.get("linksection")) try sup.expectEqual(Kind.kw_pub, keywords.get("pub")) try sup.expectEqual(Kind.kw_inline, keywords.get("inline")) try sup.expectEqual(Kind.kw_comptime, keywords.get("comptime")) try sup.expectEqual(Kind.kw_const, keywords.get("const")) try sup.expectEqual(Kind.kw_var, keywords.get("var")) try sup.expectEqual(Kind.kw_orelse, keywords.get("orelse")) try sup.expectEqual(Kind.kw_if, keywords.get("if")) try sup.expectEqual(Kind.kw_else, keywords.get("else")) try sup.expectEqual(Kind.kw_while, keywords.get("while")) try sup.expectEqual(Kind.kw_for, keywords.get("for")) try sup.expectEqual(Kind.kw_in, keywords.get("in")) try sup.expectEqual(Kind.kw_break, keywords.get("break")) try sup.expectEqual(Kind.kw_continue, keywords.get("continue")) try sup.expectEqual(Kind.kw_return, keywords.get("return")) try sup.expectEqual(Kind.kw_try, keywords.get("try")) try sup.expectEqual(Kind.kw_catch, keywords.get("catch")) try sup.expectEqual(Kind.kw_defer, keywords.get("defer")) try sup.expectEqual(Kind.kw_errdefer, keywords.get("errdefer")) try sup.expectEqual(Kind.kw_packed, keywords.get("packed")) try sup.expectEqual(Kind.kw_struct, keywords.get("struct")) try sup.expectEqual(Kind.kw_enum, keywords.get("enum")) try sup.expectEqual(Kind.kw_union, keywords.get("union")) try sup.expectEqual(Kind.kw_switch, keywords.get("switch")) try sup.expectEqual(Kind.kw_asm, keywords.get("asm")) try sup.expectEqual(Kind.kw_error, keywords.get("error")) try sup.expectEqual(Kind.kw_test, keywords.get("test")) try sup.expectEqual(Kind.kw_true, keywords.get("true")) try sup.expectEqual(Kind.kw_false, keywords.get("false")) try sup.expectEqual(Kind.kw_null, keywords.get("null")) try sup.expectEqual(Kind.kw_undefined, keywords.get("undefined")) try sup.expectEqual(Kind.kw_unreachable, keywords.get("unreachable")) try sup.expectEqual(Kind.kw_noreturn, keywords.get("noreturn")) try sup.expectEqual(Kind.kw_anytype, keywords.get("anytype")) try sup.expectEqual(Kind.kw_anyopaque, keywords.get("anyopaque")) } test "non-keywords miss the table" { try sup.expectEqual(null, keywords.get("flash")) try sup.expectEqual(null, keywords.get("us")) try sup.expectEqual(null, keywords.get("usee")) try sup.expectEqual(null, keywords.get("Use")) try sup.expectEqual(null, keywords.get("kw_use")) try sup.expectEqual(null, keywords.get("")) } test "lexeme slices the token's span out of the source" { const src []u8 = "const x = 1" const t Token = .{ .kind = .ident, .start = 6, .end = 7, .line = 1 } try sup.expectEqualStrings("x", t.lexeme(src)) } test "lexeme spans the whole buffer at the extremes" { const src []u8 = "use" const t Token = .{ .kind = .kw_use, .start = 0, .end = 3, .line = 1 } try sup.expectEqualStrings("use", t.lexeme(src)) }