// Flash formatter — AST back to canonical Flash source text. // // `flashc fmt` is gofmt / zig fmt for Flash: it parses a `.flash` file and // re-emits it in one canonical layout. This module is the renderer — the // inverse of lower.zig. Where lowering walks the AST to *Zig* text, the // formatter walks the same AST to *Flash* text, so the two are mirror images // and the emitter inventory lines up one-to-one (emitType / emitExpr / // emitStmt / emitFn / …). The canonical layout is the lowering's layout rules // transposed to Flash spelling: 4-space indent, one blank line between // top-level units, the same brace-spacing zig fmt uses, mandatory braces. // // The Flash spelling is the lowering mapping read in reverse — the implicit // `const` pointee that lowering makes explicit is dropped again here: // // const NAME = @import("X") <- use X (a bare module import) // const Y = @import("X") <- use X as Y // const Y = @import("X.zig") <- use "X" as Y (a sibling file; Flash names // the stem only — no backend extension) // comptime { _ = @import(M) } <- link "M" (kept as `link` lines, not // folded into a comptime block) // @name(a, …) <- #name(a, …) (the '#' builtin sigil) // fn f(a: T) R <- fn f(a T) R (type after the name, no // colon; a missing return is simply omitted) // []const T / []T <- []T / []mut T (const-pointee default: the // [*]const T / *const T <- [*]T / *T implicit `const` is dropped) // x and y / x or y <- x && y / x || y (the logical operators keep // their Flash spelling) // const x = e (untyped) <- x := e (the short-declaration canon) // if (c) { … } <- if c { … } (statement conditions carry // while (c) { … } <- while c { … } no parentheses) // for (xs) |x| { … } <- for x in xs { … } // for (lo..hi) |i| { … } <- for i in lo..hi { … } // // A value `if` is the one conditional that keeps its parentheses // (`if (c) a else b`), matching the surface grammar. Statements carry no // trailing semicolon. Source blank lines between statements are preserved // (collapsed to one); top-level units are always one blank line apart, a run // of `use` (or a run of `link`) counting as a single unit. // // Three guarantees back the rewrite, each gated by the test suite: a parse // error refuses the file untouched (a formatter never destroys code); every // comment in the input appears exactly once in the output; and formatting // never changes the emitted Zig — lower(parse(src)) equals // lower(parse(fmt(src))) byte for byte — so a reformat can never alter a // program's meaning. The formatter is also idempotent: fmt(fmt(src)) == fmt(src). const std = @import("std"); const ast = @import("ast.zig"); const token = @import("token.zig"); const parser = @import("parser.zig"); const lower = @import("lower.zig"); const Parser = parser.Parser; // Re-exported for the integration suite (tests/fmt_examples.zig): the lexer the // formatter is built on, so a test can tokenize a formatted result to compare // comment multisets without a second lexer module — which would place // src/lexer.zig in two module graphs at once (a compile error). pub const Lexer = @import("lexer.zig").Lexer; pub const Error = error{OutOfMemory}; // Format `src` to canonical Flash text. Runs its own parser; a parse error // propagates as parser.Error.UnexpectedToken (the caller reads the parser's // diagnostic and leaves the file untouched). The returned slice is arena-owned. pub fn format(arena: std.mem.Allocator, src: []const u8) parser.Error![]const u8 { var p = Parser.init(arena, src); const program = try p.parseProgram(); return render(arena, program, p.comments, src); } // Render an already-parsed program. `comments` is the source-ordered line // comments the parser collected aside (the formatter reattaches them); `src` // is the original buffer, used to recover blank-line and comment positions // from the AST's source slices. pub fn render(arena: std.mem.Allocator, program: ast.Program, comments: []const token.Token, src: []const u8) Error![]const u8 { var p: Printer = .{ .arena = arena, .src = src, .comments = comments }; const items = program.items; var first = true; for (items, 0..) |item, idx| { // Standalone comments before this item — the file header before the // first item, a comment block before a declaration. const lead = p.anchorOffset(itemLeadAnchor(item)); if (lead) |off| { if (try p.flushStandalone(off, 0, first)) first = false; } // Blank lines between top-level items are PRESERVED, not imposed: a // source blank before the item (or its lead-in comment) renders as one // blank, and the author's tight grouping of consecutive declarations // (a run of `use`, a block of `pub const` re-exports) is kept tight. // The lowering's own "one blank between units" rule is for generated // Zig; the formatter keeps what the author wrote. A `comptime { … }` // block's only stored slice is its first statement, one line below the // `comptime {` head, so the blank check steps up to the head line. if (!first) { var blank_anchor = lead; if (item == .comptime_block) { if (lead) |fs| blank_anchor = prevLineBreakOffset(p.src, fs); } if (blank_anchor) |off| if (blankBeforeOffset(p.src, off)) try p.raw("\n"); } first = false; p.boundary = p.nextOffset(items, idx + 1); switch (item) { .use_decl => |u| try p.emitUseDeclAt(u), // The lowering folds a `link` run into one `comptime { … }` block; // the formatter keeps the `link "M"` lines the author wrote. .link_decl => |l| try p.print("link \"{s}\"", .{l.module}), .const_decl => |c| try p.emitConstDecl(c), .fn_decl => |f| try p.emitFn(f), .comptime_block => |stmts| { try p.raw("comptime "); try p.emitBlockBody(stmts, 0); }, .test_decl => |t| { try p.print("test {s} ", .{t.name}); try p.emitBlockBody(t.body, 0); }, } try p.flushTrailing(p.anchorOffset(itemTailAnchor(item))); try p.raw("\n"); } // End of file: emit every comment that has not been placed yet, at depth 0 // (a file-tail comment, or one a placement heuristic could not site earlier). _ = try p.flushStandalone(p.src.len, 0, first); return p.buf.toOwnedSlice(arena); } const Printer = struct { arena: std.mem.Allocator, src: []const u8, buf: std.ArrayList(u8) = .empty, // The source-ordered line comments to reattach while walking the AST, and a // cursor into them. The walk merges the two streams: at each element it // flushes the comments that precede it (standalone, on their own lines) and // appends a same-line one as a trailing comment. Every comment is emitted // exactly once — anything not placed earlier is flushed at end of file. comments: []const token.Token, c_idx: usize = 0, // The source offset just past the construct currently being emitted — the // exclusive upper bound for a block-close comment flush, so a block never // adopts comments that belong to a later sibling. Set by each sequence loop // (items, statements) to the next element's offset; saved and restored // around nested blocks. boundary: usize = 0, fn raw(self: *Printer, s: []const u8) Error!void { try self.buf.appendSlice(self.arena, s); } fn print(self: *Printer, comptime fmt: []const u8, args: anytype) Error!void { try self.buf.print(self.arena, fmt, args); } fn indent(self: *Printer, depth: usize) Error!void { var k: usize = 0; while (k < depth) : (k += 1) try self.raw(" "); } // --- comment plumbing ------------------------------------------------ // The byte offset of an AST source slice into `src`, or null when the slice // is empty or not a view into the buffer (a defensive guard — every AST // string is meant to be a real source slice). fn anchorOffset(self: *Printer, slice: ?[]const u8) ?usize { const a = slice orelse return null; if (a.len == 0) return null; const base = @intFromPtr(self.src.ptr); const ap = @intFromPtr(a.ptr); if (ap < base or ap >= base + self.src.len) return null; return ap - base; } // The lead-anchor offset of items[idx], or end of file when idx is past the // last item — the boundary for the preceding item's block-close flush. fn nextOffset(self: *Printer, items: []const ast.Item, idx: usize) usize { if (idx >= items.len) return self.src.len; return self.anchorOffset(itemLeadAnchor(items[idx])) orelse self.src.len; } // Emit every pending standalone comment whose start is before `limit`, each // on its own line at `depth`. A source blank line before a comment is // preserved (collapsed to one), except before the first emitted line when // `suppress_leading_blank`. Returns whether any comment was emitted. fn flushStandalone(self: *Printer, limit: usize, depth: usize, suppress_leading_blank: bool) Error!bool { var emitted = false; while (self.c_idx < self.comments.len) { const c = self.comments[self.c_idx]; if (c.start >= limit) break; if (emitted or !suppress_leading_blank) { if (blankBeforeOffset(self.src, c.start)) try self.raw("\n"); } try self.indent(depth); try self.raw(c.lexeme(self.src)); try self.raw("\n"); emitted = true; self.c_idx += 1; } return emitted; } // If the next pending comment is a trailing comment on the same source line // as the element anchored at `anchor` (no newline between), append it to the // current output line as ` ` and consume it. fn flushTrailing(self: *Printer, anchor: ?usize) Error!void { const off = anchor orelse return; if (self.c_idx >= self.comments.len) return; const c = self.comments[self.c_idx]; if (commentIsTrailing(self.src, c.start) and noNewlineBetween(self.src, off, c.start)) { try self.raw(" "); try self.raw(c.lexeme(self.src)); self.c_idx += 1; } } // At a block's closing brace (its statements were at `inner_depth`), flush // the pending comments that belong inside — those before `boundary` (the // next sibling after the block) AND indented past the block's owner. The // offset bound stops a block from adopting a later sibling's comments; the // relative-column rule keeps a comment that lines up with the owner outside. // Together they site a block-final comment without the brace's own offset, // which the tree does not carry. fn flushBlockClose(self: *Printer, inner_depth: usize, boundary: usize) Error!void { const threshold: usize = if (inner_depth == 0) 0 else (inner_depth - 1) * 4; while (self.c_idx < self.comments.len) { const c = self.comments[self.c_idx]; if (c.start >= boundary) break; if (commentColumn(self.src, c.start) <= threshold) break; if (blankBeforeOffset(self.src, c.start)) try self.raw("\n"); try self.indent(inner_depth); try self.raw(c.lexeme(self.src)); try self.raw("\n"); self.c_idx += 1; } } // --- items ----------------------------------------------------------- // A top-level function: emitted at depth 0, terminated with a newline. The // depth-aware body lives in emitFnAt, which a struct method reuses. // A top-level function: doc block, then the signature and body. The caller // (render) flushes a trailing comment and emits the line break, so a // same-line comment on a one-line declaration can still attach. fn emitFn(self: *Printer, f: ast.FnDecl) Error!void { try self.emitDoc(f.doc, 0); try self.emitFnAt(f, 0); } // Emit a function whose signature starts at the current column and whose // closing brace returns to `depth`. Flash spells parameters `name type` // (no colon), drops the `->` before the return type, and omits the return // entirely when absent (the lowering's `void` is implicit). A bodyless // `extern` prototype simply ends — Flash has no terminating `;`. fn emitFnAt(self: *Printer, f: ast.FnDecl, depth: usize) Error!void { if (f.is_pub) try self.raw("pub "); if (f.is_export) try self.raw("export "); if (f.is_extern) try self.raw("extern "); if (f.is_inline) try self.raw("inline "); try self.print("fn {s}(", .{f.name}); for (f.params, 0..) |prm, idx| { if (idx != 0) try self.raw(", "); if (prm.is_comptime) try self.raw("comptime "); try self.raw(prm.name orelse "_"); try self.raw(" "); try self.emitType(prm.type); } try self.raw(")"); // An explicit `callconv(…)` sits between the parameter list and the // return type. The formatter emits it only when the source wrote one — // the implicit C ABI of a bare `export fn` is the lowering's to add, so // re-emitting it here would invent surface the author did not write. if (f.call_conv) |cc| { try self.raw(" callconv("); try self.emitExpr(cc); try self.raw(")"); } // The return type follows directly, on the same physical line as the // `)` (which is how the parser knows it is a return, not the next item). if (f.ret) |r| { try self.raw(" "); try self.emitType(r); } if (f.body) |body| { try self.raw(" "); try self.emitBlockBody(body, depth); } } // A top-level constant: doc block, then the declaration. The caller (render) // flushes a trailing comment and emits the line break. fn emitConstDecl(self: *Printer, c: ast.ConstDecl) Error!void { try self.emitDoc(c.doc, 0); try self.emitConstDeclAt(c, 0); } // Emit `[pub ](const|var) NAME[ T][ align(e)] = value` at the current column, // ending at the value with no trailing newline. `depth` threads into the // value so a multiline string or a nested type definition lays out one level // deeper. A top-level constant is never rewritten to `:=` (the short // declaration is statement-only grammar). fn emitConstDeclAt(self: *Printer, c: ast.ConstDecl, depth: usize) Error!void { if (c.is_pub) try self.raw("pub "); try self.raw(if (c.is_mut) "var " else "const "); try self.raw(c.name); if (c.type) |ty| { try self.raw(" "); try self.emitType(ty); } if (c.value == .multiline_str) { try self.raw(" "); try self.emitMultilineRhs(c.value.multiline_str, depth); } else { try self.raw(" = "); try self.emitValue(c.value, depth); } } // Emit one import as `[pub ]use TARGET[ as ALIAS]`. A quoted file import // names the module stem in quotes (`use "syscalls" as sys`); a bare module // import names it unquoted (`use flibc`). The same form serves at the top // level and inside a struct body. fn emitUseDeclAt(self: *Printer, u: ast.UseDecl) Error!void { if (u.is_pub) try self.raw("pub "); try self.raw("use "); if (u.is_file) { try self.raw("\""); try self.raw(u.module); try self.raw("\""); } else { try self.raw(u.module); } if (u.alias) |a| { try self.raw(" as "); try self.raw(a); } } // Lay out a multiline-string value in assignment-RHS position. The caller // has emitted the left-hand side and a trailing space, up to but not // including the `=`. Produces, matching the lowering minus its trailing `;`: // =\n <\\lines at depth+1> // ending on the last `\\` line with no trailing newline (the caller closes // the statement). `depth` is the statement's own indent. fn emitMultilineRhs(self: *Printer, lines: []const []const u8, depth: usize) Error!void { try self.raw("=\n"); for (lines, 0..) |ln, idx| { if (idx != 0) try self.raw("\n"); try self.indent(depth + 1); try self.raw("\\\\"); try self.raw(ln); } } // Emit the value of a binding or constant. A struct/enum/union type // definition lays out across multiple lines with its closing brace at // `depth`; every other value is a single-line expression. fn emitValue(self: *Printer, value: ast.Expr, depth: usize) Error!void { switch (value) { .struct_def, .enum_def, .union_def => try self.emitTypeDef(value, depth), else => try self.emitExprAt(value, depth), } } // Lay out a `struct { … }` / `enum { … }` / `union(…) { … }` definition. // Fields/variants sit one per line at `depth + 1` with a trailing comma; the // closing brace returns to `depth`. Flash spells a field `name type` (no // colon), exactly as a parameter. fn emitTypeDef(self: *Printer, x: ast.Expr, depth: usize) Error!void { switch (x) { .struct_def => |sd| { try self.raw("struct {\n"); const sb = self.boundary; // the offset just past the whole struct var firstm = true; for (sd.fields) |f| { const lead = self.anchorOffset(if (f.doc.len > 0) f.doc[0] else f.name); if (lead) |off| { if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false; } firstm = false; try self.emitDoc(f.doc, depth + 1); try self.indent(depth + 1); try self.raw(f.name); try self.raw(" "); try self.emitType(f.type); if (f.default) |d| { try self.raw(" = "); try self.emitExpr(d); } try self.raw(","); try self.flushTrailing(self.anchorOffset(f.name)); try self.raw("\n"); } try self.emitContainerDecls(sd.decls, sd.fields.len != 0, depth, sb); try self.flushBlockClose(depth + 1, sb); try self.indent(depth); try self.raw("}"); }, .enum_def => |ed| { try self.raw("enum"); if (ed.tag_type) |t| { try self.raw("("); try self.raw(t); try self.raw(")"); } try self.raw(" {\n"); const sb = self.boundary; // the offset just past the whole enum var firstm = true; for (ed.variants) |v| { const lead = self.anchorOffset(if (v.doc.len > 0) v.doc[0] else v.name); if (lead) |off| { if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false; } firstm = false; try self.emitDoc(v.doc, depth + 1); try self.indent(depth + 1); try self.raw(v.name); if (v.value) |val| { try self.raw(" = "); try self.emitExpr(val.*); } try self.raw(","); try self.flushTrailing(self.anchorOffset(v.name)); try self.raw("\n"); } try self.emitContainerDecls(ed.decls, ed.variants.len != 0, depth, sb); try self.flushBlockClose(depth + 1, sb); try self.indent(depth); try self.raw("}"); }, .union_def => |ud| { try self.raw("union"); if (ud.tag) |t| { try self.raw("("); try self.raw(t); try self.raw(")"); } try self.raw(" {\n"); const sb = self.boundary; // the offset just past the whole union var firstm = true; for (ud.variants) |v| { const lead = self.anchorOffset(if (v.doc.len > 0) v.doc[0] else v.name); if (lead) |off| { if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false; } firstm = false; try self.emitDoc(v.doc, depth + 1); try self.indent(depth + 1); try self.raw(v.name); // A payload type renders `name type` (no colon); a bare name // is a void variant. if (v.payload) |ty| { try self.raw(" "); try self.emitType(ty); } try self.raw(","); try self.flushTrailing(self.anchorOffset(v.name)); try self.raw("\n"); } try self.emitContainerDecls(ud.decls, ud.variants.len != 0, depth, sb); try self.flushBlockClose(depth + 1, sb); try self.indent(depth); try self.raw("}"); }, else => unreachable, } } // Associated declarations follow a container's fields/variants, each // preceded by a blank line (one after the member block, one between decls) — // the idiomatic container layout. A container whose first member is a // declaration gets no leading blank. `sb` is the offset just past the whole // container, restored as the boundary when the decls are done. fn emitContainerDecls(self: *Printer, decls: []const ast.ContainerDecl, has_members: bool, depth: usize, sb: usize) Error!void { for (decls, 0..) |d, idx| { if (idx != 0 or has_members) try self.raw("\n"); const lead = self.anchorOffset(declLeadAnchor(d)); if (lead) |off| _ = try self.flushStandalone(off, depth + 1, true); // The next declaration (or the container boundary) bounds this // one's method-body block-close flushes, so a method never // adopts a comment that belongs to a later method. self.boundary = if (idx + 1 < decls.len) (self.anchorOffset(declLeadAnchor(decls[idx + 1])) orelse sb) else sb; switch (d) { .method => |m| { try self.emitDoc(m.doc, depth + 1); try self.indent(depth + 1); try self.emitFnAt(m, depth + 1); }, .constant => |c| { try self.emitDoc(c.doc, depth + 1); try self.indent(depth + 1); try self.emitConstDeclAt(c, depth + 1); }, .use_import => |u| { try self.indent(depth + 1); try self.emitUseDeclAt(u); }, } try self.flushTrailing(self.anchorOffset(declTailAnchor(d))); try self.raw("\n"); } self.boundary = sb; } // --- statements ------------------------------------------------------ // Emit a brace-delimited block body, opening at the current column. An empty // statement list collapses to `{}`; a non-empty one opens `{`, lays out one // statement per line at `depth + 1`, and closes `}` back at `depth`. fn emitBlockBody(self: *Printer, stmts: []const ast.Stmt, depth: usize) Error!void { if (stmts.len == 0) { try self.raw("{}"); return; } try self.raw("{\n"); try self.emitBlock(stmts, depth + 1); try self.indent(depth); try self.raw("}"); } // Emit a block's statements, each on its own line at the given indent depth. // A source blank line between two statements is preserved (collapsed to a // single blank); there is never a blank after the opening `{` (the first // statement carries none) or before the closing `}`. fn emitBlock(self: *Printer, stmts: []const ast.Stmt, depth: usize) Error!void { const bb = self.boundary; // the offset just past this whole block var first = true; for (stmts, 0..) |s, idx| { const aoff = self.anchorOffset(stmtAnchor(s)); if (aoff) |off| { if (try self.flushStandalone(off, depth, first)) first = false; } if (!first) { if (aoff) |off| if (blankBeforeOffset(self.src, off)) try self.raw("\n"); } first = false; // The next statement (or the block boundary, for the last) bounds // this statement's own inner block-close flushes. self.boundary = if (idx + 1 < stmts.len) (self.anchorOffset(stmtAnchor(stmts[idx + 1])) orelse bb) else bb; try self.indent(depth); try self.emitStmt(s, depth); try self.flushTrailing(aoff); try self.raw("\n"); } self.boundary = bb; // Comments between the last statement and the closing brace that are // indented past the block's owner belong inside; flush them here. try self.flushBlockClose(depth, bb); } // Emit a `///` doc-comment block: one line per entry at `depth`, the three // slashes plus the preserved content. An empty `doc` emits nothing. fn emitDoc(self: *Printer, doc: []const []const u8, depth: usize) Error!void { for (doc) |line| { try self.indent(depth); try self.raw("///"); try self.raw(line); try self.raw("\n"); } } fn emitStmt(self: *Printer, s: ast.Stmt, depth: usize) Error!void { switch (s) { .discard => |x| { if (x == .multiline_str) { try self.raw("_ "); try self.emitMultilineRhs(x.multiline_str, depth); } else { try self.raw("_ = "); try self.emitExprAt(x, depth); } }, .bind => |b| { // The short-declaration canon: an untyped, non-`align`, // non-`comptime` immutable binding renders `name := value`, // whatever spelling the author used. `name := e` and an untyped // `const name = e` lower identically, so this changes only the // surface form, never the meaning. Every other binding — `var`, a // typed or aligned `const`, a `comptime` local — keeps its keyword // form (`:=` has no typed, mutable, or comptime spelling). const short = !b.is_mut and b.type == null and b.align_expr == null and !b.is_comptime; if (short) { try self.raw(b.name); if (b.value == .multiline_str) { try self.raw(" :"); try self.emitMultilineRhs(b.value.multiline_str, depth); } else { try self.raw(" := "); try self.emitValue(b.value, depth); } } else { if (b.is_comptime) try self.raw("comptime "); try self.raw(if (b.is_mut) "var " else "const "); try self.raw(b.name); if (b.type) |ty| { try self.raw(" "); try self.emitType(ty); } if (b.align_expr) |ae| { try self.raw(" align("); try self.emitExpr(ae); try self.raw(")"); } if (b.value == .multiline_str) { try self.raw(" "); try self.emitMultilineRhs(b.value.multiline_str, depth); } else { try self.raw(" = "); try self.emitValue(b.value, depth); } } }, .assign => |a| { try self.emitExprAt(a.target, depth); try self.raw(" "); try self.raw(a.op); // "=", "+=", … verbatim try self.raw(" "); try self.emitExprAt(a.value, depth); }, // The `:=` canon extends to destructures: an immutable one renders // `a, b := e` whether the author wrote that or `const a, b = e` // (a destructure has no type, `align`, or `comptime` spelling to // block the rewrite); a mutable one keeps `var a, b = e`. .destructure => |d| { if (d.is_mut) try self.raw("var "); for (d.names, 0..) |maybe, i| { if (i != 0) try self.raw(", "); try self.raw(maybe orelse "_"); } try self.raw(if (d.is_mut) " = " else " := "); try self.emitValue(d.value, depth); }, .destructure_assign => |da| { for (da.targets, 0..) |t, i| { if (i != 0) try self.raw(", "); try self.emitExprAt(t, depth); } try self.raw(" = "); try self.emitExprAt(da.value, depth); }, .if_stmt => |iff| try self.emitIf(iff, depth), .defer_stmt => |inner| { try self.raw("defer "); try self.emitStmt(inner.*, depth); }, .errdefer_stmt => |inner| { try self.raw("errdefer "); try self.emitStmt(inner.*, depth); }, .defer_block => |stmts| { try self.raw("defer "); try self.emitBlockBody(stmts, depth); }, .errdefer_block => |stmts| { try self.raw("errdefer "); try self.emitBlockBody(stmts, depth); }, .while_stmt => |w| { if (w.is_inline) try self.raw("inline "); try self.raw("while "); try self.emitExprAt(w.cond, depth); if (w.capture) |cap| { try self.raw(" |"); try self.raw(cap); try self.raw("|"); } try self.raw(" "); try self.emitLoopBody(w.body, w.else_body, w.else_capture, depth); }, .for_stmt => |fr| { if (fr.is_inline) try self.raw("inline "); try self.raw("for "); for (fr.captures, 0..) |c, i| { if (i != 0) try self.raw(", "); try self.raw(c); } try self.raw(" in "); try self.emitExprAt(fr.iter, depth); if (fr.range_hi) |hi| { try self.raw(".."); try self.emitExprAt(hi, depth); } try self.raw(" "); try self.emitLoopBody(fr.body, fr.else_body, null, depth); }, .expr => |x| try self.emitExprAt(x, depth), } } // `if cond { … }`, with an `else { … }` arm or, when the else body is exactly // one nested if, an idiomatic `else if … { … }` chain. The condition carries // no parentheses (the statement form). fn emitIf(self: *Printer, iff: ast.If, depth: usize) Error!void { try self.raw("if "); try self.emitExprAt(iff.cond, depth); if (iff.capture) |cap| { try self.raw(" |"); try self.raw(cap); try self.raw("|"); } try self.raw(" "); const after_if = self.boundary; if (iff.else_body) |eb| { // The then-body's block-close is bounded by the else clause, so it // does not adopt comments that belong to the else arm. self.boundary = self.anchorOffset(elseAnchor(eb)) orelse after_if; try self.emitBlockBody(iff.body, depth); self.boundary = after_if; if (eb.len == 1 and eb[0] == .if_stmt) { try self.raw(" else "); try self.emitIf(eb[0].if_stmt, depth); } else { // ` else { … }`, the error capture printed as ` else |err| { … }`. try self.raw(" else "); if (iff.else_capture) |cap| { try self.raw("|"); try self.raw(cap); try self.raw("| "); } try self.emitBlockBody(eb, depth); } } else { try self.emitBlockBody(iff.body, depth); } } // A loop body with its optional `else` arm (`while`/`for … else`). Mirrors // emitIf's else handling: the body's block-close is bounded by the else // clause so it does not adopt the else arm's comments; the capture (the // `while` error binding) prints as ` else |err| { … }`. fn emitLoopBody(self: *Printer, body: []ast.Stmt, else_body: ?[]ast.Stmt, else_capture: ?[]const u8, depth: usize) Error!void { const after_loop = self.boundary; if (else_body) |eb| { self.boundary = self.anchorOffset(elseAnchor(eb)) orelse after_loop; try self.emitBlockBody(body, depth); self.boundary = after_loop; try self.raw(" else "); if (else_capture) |cap| { try self.raw("|"); try self.raw(cap); try self.raw("| "); } try self.emitBlockBody(eb, depth); } else { try self.emitBlockBody(body, depth); } } // --- expressions ----------------------------------------------------- // The depth-0 wrapper, for inline-only callers (type length / sentinel // expressions, struct-field and enum-variant defaults) where an expression // never spans multiple lines. fn emitExpr(self: *Printer, x: ast.Expr) Error!void { try self.emitExprAt(x, 0); } // Emit an expression at indentation `depth`. Most forms are single-line and // thread `depth` unchanged; the multi-line forms — a labeled block and the // `switch` expression — lay their inner statements / prongs out at `depth + 1` // and close at `depth`. fn emitExprAt(self: *Printer, x: ast.Expr, depth: usize) Error!void { switch (x) { .int, .float, .string, .char, .ident, .value_word => |s| try self.raw(s), .multiline_str => |lines| { // Reached only outside a const/binding/discard value (a call // argument, an asm template). Indentation before `\\` does not // affect the value; the byte-exact layout is guaranteed for the // routed value positions, not here (the same deliberate limit the // lowering carries). try self.raw("\n"); for (lines) |ln| { try self.raw("\\\\"); try self.raw(ln); try self.raw("\n"); } }, .member => |m| { try self.emitExprAt(m.base.*, depth); try self.raw("."); try self.raw(m.field); }, .deref => |d| { try self.emitExprAt(d.*, depth); try self.raw(".*"); }, .optional_unwrap => |u| { try self.emitExprAt(u.*, depth); try self.raw(".?"); }, .call => |c| { try self.emitExprAt(c.callee.*, depth); try self.emitArgs(c.args, depth); }, .index => |ix| { try self.emitExprAt(ix.base.*, depth); try self.raw("["); try self.emitExprAt(ix.index.*, depth); try self.raw("]"); }, .slice => |s| { try self.emitExprAt(s.base.*, depth); try self.raw("["); try self.emitExprAt(s.lo.*, depth); const spaced = sliceBoundSpaces(s.lo.*) or (s.hi != null and sliceBoundSpaces(s.hi.?.*)); if (spaced) try self.raw(" "); try self.raw(".."); if (s.hi) |hi| { if (spaced) try self.raw(" "); try self.emitExprAt(hi.*, depth); } if (s.sentinel) |sen| { try self.raw(" :"); try self.emitExprAt(sen.*, depth); } try self.raw("]"); }, .builtin_call => |b| { // The AST holds the bare intrinsic name; Flash spells it with the // '#' sigil (the lowering's '@' is the Tier-0 backend's). try self.raw("#"); try self.raw(b.name); try self.emitArgs(b.args, depth); }, .unary => |u| { try self.raw(u.op); try self.emitExprAt(u.operand.*, depth); }, .binary => |b| { try self.emitExprAt(b.lhs.*, depth); try self.raw(" "); try self.raw(b.op); // verbatim — `&&` / `||` keep their Flash spelling try self.raw(" "); try self.emitExprAt(b.rhs.*, depth); }, .struct_lit => |fields| { const spaced = !(fields.len == 0 or (fields.len == 1 and fields[0].name == null)); try self.raw(if (spaced) ".{ " else ".{"); for (fields, 0..) |f, idx| { if (idx != 0) try self.raw(", "); if (f.name) |n| { try self.raw("."); try self.raw(n); try self.raw(" = "); } try self.emitExprAt(f.value, depth); } try self.raw(if (spaced) " }" else "}"); }, .typed_lit => |tl| { try self.emitExprAt(tl.type.*, depth); const spaced = !(tl.fields.len == 0 or (tl.fields.len == 1 and tl.fields[0].name == null)); try self.raw(if (spaced) "{ " else "{"); for (tl.fields, 0..) |f, idx| { if (idx != 0) try self.raw(", "); if (f.name) |n| { try self.raw("."); try self.raw(n); try self.raw(" = "); } try self.emitExprAt(f.value, depth); } try self.raw(if (spaced) " }" else "}"); }, .type_lit => |t| try self.emitType(t.*), .enum_lit => |v| { try self.raw("."); try self.raw(v); }, .error_lit => |n| { try self.raw("error."); try self.raw(n); }, .error_set => |names| { const spaced = names.len > 1; try self.raw(if (spaced) "error{ " else "error{"); for (names, 0..) |n, idx| { if (idx != 0) try self.raw(", "); try self.raw(n); } try self.raw(if (spaced) " }" else "}"); }, .struct_def, .enum_def, .union_def => try self.emitTypeDef(x, depth), .group => |g| { try self.raw("("); try self.emitExprAt(g.*, depth); try self.raw(")"); }, // A value `if` keeps its parentheses, the one conditional that does — // `if (cond) a else b`, exactly the surface grammar requires. .if_expr => |iff| { try self.raw("if ("); try self.emitExprAt(iff.cond.*, depth); try self.raw(") "); try self.emitExprAt(iff.then.*, depth); try self.raw(" else "); try self.emitExprAt(iff.else_.*, depth); }, // `switch subject { … }` — the subject carries no parentheses (the // statement-header form); prongs lay out one per line at depth + 1. .switch_expr => |sw| { try self.raw("switch "); try self.emitExprAt(sw.subject.*, depth); try self.raw(" {\n"); const swb = self.boundary; // the offset just past the whole switch var firstm = true; for (sw.prongs, 0..) |prong, pidx| { const lead = if (prong.patterns.len > 0) self.anchorOffset(exprAnchor(prong.patterns[0].lo)) else null; if (lead) |off| { if (try self.flushStandalone(off, depth + 1, firstm)) firstm = false; } firstm = false; // The next prong (or the switch boundary, for the last) // bounds this prong's own inner block-close flushes, so a // block-bodied prong never adopts a later prong's comments. self.boundary = if (pidx + 1 < sw.prongs.len) (self.anchorOffset(prongAnchor(sw.prongs[pidx + 1])) orelse swb) else swb; try self.indent(depth + 1); if (prong.is_else) { try self.raw("else"); } else { for (prong.patterns, 0..) |pat, idx| { if (idx != 0) try self.raw(", "); try self.emitExprAt(pat.lo, depth + 1); if (pat.hi) |hi| { try self.raw("..."); try self.emitExprAt(hi, depth + 1); } } } try self.raw(" => "); if (prong.capture) |cap| { try self.raw("|"); try self.raw(cap); try self.raw("| "); } try self.emitExprAt(prong.body, depth + 1); try self.raw(","); try self.flushTrailing(lead); try self.raw("\n"); } self.boundary = swb; try self.flushBlockClose(depth + 1, swb); try self.indent(depth); try self.raw("}"); }, .block_expr => |blk| { if (blk.label) |label| { try self.raw(label); try self.raw(": "); } try self.emitBlockBody(blk.body, depth); }, .try_expr => |t| { try self.raw("try "); try self.emitExprAt(t.*, depth); }, .catch_expr => |c| { try self.emitExprAt(c.lhs.*, depth); try self.raw(" catch "); if (c.capture) |cap| { try self.raw("|"); try self.raw(cap); try self.raw("| "); } try self.emitExprAt(c.handler.*, depth); }, .asm_expr => |a| try self.emitAsm(a, depth), .brk => |b| { try self.raw("break"); if (b.label) |l| { try self.raw(" :"); try self.raw(l); } if (b.value) |v| { try self.raw(" "); try self.emitExprAt(v.*, depth); } }, .cont => try self.raw("continue"), .ret => |maybe| { try self.raw("return"); if (maybe) |vals| { // The value list re-emits as written: `return v` for one // value, `return a, b` for the multi-return sugar (a // written `return .{ a, b }` is ONE struct_lit value, so // each spelling round-trips to itself). try self.raw(" "); for (vals, 0..) |v, idx| { if (idx != 0) try self.raw(", "); try self.emitExprAt(v, depth); } } }, } } fn emitArgs(self: *Printer, args: []const ast.Expr, depth: usize) Error!void { try self.raw("("); for (args, 0..) |a, idx| { if (idx != 0) try self.raw(", "); try self.emitExprAt(a, depth); } try self.raw(")"); } // --- types ----------------------------------------------------------- // The Flash spelling of a type — the lowering's mapping in reverse. The // const-pointee default is implicit, so the pointer families drop the // explicit `const` the lowering adds (`[]T`, `*T`, `[*]T`), and `mut` opts a // pointee back into mutability. `argv` / `cstr` are ordinary names here: the // builtin-alias expansion is the lowering's, not the surface's. fn emitType(self: *Printer, t: ast.TypeRef) Error!void { switch (t) { .name => |n| try self.raw(n), .slice => |inner| { try self.raw("[]"); try self.emitType(inner.*); }, .slice_mut => |inner| { try self.raw("[]mut "); try self.emitType(inner.*); }, .slice_sentinel => |sp| { try self.raw("[:"); try self.emitExpr(sp.sentinel.*); try self.raw("]"); try self.emitType(sp.elem.*); }, .slice_sentinel_mut => |sp| { try self.raw("[:"); try self.emitExpr(sp.sentinel.*); try self.raw("]mut "); try self.emitType(sp.elem.*); }, .many_ptr => |inner| { try self.raw("[*]"); try self.emitType(inner.*); }, .many_ptr_mut => |inner| { try self.raw("[*]mut "); try self.emitType(inner.*); }, .many_ptr_volatile => |inner| { try self.raw("[*]volatile "); try self.emitType(inner.*); }, .many_ptr_mut_volatile => |inner| { try self.raw("[*]mut volatile "); try self.emitType(inner.*); }, .many_ptr_sentinel => |sp| { try self.raw("[*:"); try self.emitExpr(sp.sentinel.*); try self.raw("]"); try self.emitType(sp.elem.*); }, .many_ptr_sentinel_mut => |sp| { try self.raw("[*:"); try self.emitExpr(sp.sentinel.*); try self.raw("]mut "); try self.emitType(sp.elem.*); }, .ptr => |inner| { try self.raw("*"); try self.emitType(inner.*); }, .ptr_mut => |inner| { try self.raw("*mut "); try self.emitType(inner.*); }, .ptr_volatile => |inner| { try self.raw("*volatile "); try self.emitType(inner.*); }, .ptr_mut_volatile => |inner| { try self.raw("*mut volatile "); try self.emitType(inner.*); }, .array => |arr| { try self.raw("["); try self.emitExpr(arr.len.*); try self.raw("]"); try self.emitType(arr.elem.*); }, .array_sentinel => |a| { try self.raw("["); try self.emitExpr(a.len.*); try self.raw(":"); try self.emitExpr(a.sentinel.*); try self.raw("]"); try self.emitType(a.elem.*); }, .array_inferred => |elem| { try self.raw("[_]"); try self.emitType(elem.*); }, .array_inferred_sentinel => |sp| { try self.raw("[_:"); try self.emitExpr(sp.sentinel.*); try self.raw("]"); try self.emitType(sp.elem.*); }, .optional => |inner| { try self.raw("?"); try self.emitType(inner.*); }, .errunion => |eu| { if (eu.set) |s| try self.emitType(s.*); try self.raw("!"); try self.emitType(eu.payload.*); }, .fn_type => |ft| { // `fn(P, …) R` — Flash writes the parameter list tight after `fn` // (no space), and omits the return when absent. try self.raw("fn("); for (ft.params, 0..) |p, idx| { if (idx != 0) try self.raw(", "); try self.emitType(p); } try self.raw(")"); if (ft.ret) |r| { try self.raw(" "); try self.emitType(r.*); } }, .generic => |g| { try self.raw(g.name); try self.raw("("); for (g.args, 0..) |arg, idx| { if (idx != 0) try self.raw(", "); try self.emitExpr(arg); } try self.raw(")"); }, .tuple => |elems| { // `(A, B)` — canonical form: one space after each comma, no // trailing comma (a tolerated source trailing comma drops). try self.raw("("); for (elems, 0..) |e, idx| { if (idx != 0) try self.raw(", "); try self.emitType(e); } try self.raw(")"); }, } } // `asm [volatile] (…)` — inline assembly, the structure transposed from the // lowering (the template and constraint strings are a foreign sublanguage // that passes through unchanged; only the operand types and value expressions // take Flash spelling, via emitType / emitExpr). An asm output operand keeps // its `-> T` arrow, which the surface retains for this position. fn emitAsm(self: *Printer, a: ast.AsmExpr, depth: usize) Error!void { try self.raw("asm "); if (a.is_volatile) try self.raw("volatile "); try self.raw("("); const ml_template = a.template.* == .multiline_str; const multiline = ml_template or a.outputs.len > 0 or a.inputs.len > 0; if (!multiline) { try self.emitExprAt(a.template.*, depth); if (a.clobbers) |c| { try self.raw(" ::: "); try self.emitExprAt(c.*, depth); } try self.raw(")"); return; } if (ml_template) { try self.raw("\n"); for (a.template.*.multiline_str) |ln| { try self.indent(depth + 1); try self.raw("\\\\"); try self.raw(ln); try self.raw("\n"); } } else { try self.emitExprAt(a.template.*, depth); try self.raw("\n"); } const n_sections: usize = if (a.clobbers != null) 3 else if (a.inputs.len > 0) 2 else if (a.outputs.len > 0) 1 else 0; if (n_sections >= 1) { try self.indent(depth + 1); try self.raw(":"); try self.emitAsmOperandList(a.outputs, depth); } if (n_sections >= 2) { try self.indent(depth + 1); try self.raw(":"); try self.emitAsmOperandList(a.inputs, depth); } if (a.clobbers) |c| { try self.indent(depth + 1); try self.raw(": "); try self.emitExprAt(c.*, depth); try self.raw(")"); return; } try self.indent(depth); try self.raw(")"); } fn emitAsmOperandList(self: *Printer, ops: []const ast.AsmOperand, depth: usize) Error!void { if (ops.len == 0) { try self.raw("\n"); return; } for (ops, 0..) |op, idx| { if (idx == 0) { try self.raw(" "); } else { try self.indent(depth + 1); try self.raw(" "); } try self.emitAsmOperand(op, depth); try self.raw(",\n"); } } fn emitAsmOperand(self: *Printer, op: ast.AsmOperand, depth: usize) Error!void { try self.raw("["); try self.raw(op.name); try self.raw("] "); try self.raw(op.constraint); try self.raw(" ("); switch (op.body) { .ret_type => |t| { try self.raw("-> "); try self.emitType(t); }, .expr => |e| try self.emitExprAt(e, depth), } try self.raw(")"); } }; // Whether a slice bound forces a space around the `..`, mirroring the lowering // (a binary operation or a `catch` spaces it; every other form stays tight). fn sliceBoundSpaces(x: ast.Expr) bool { return switch (x) { .binary, .catch_expr => true, else => false, }; } // Whether the source line immediately before byte `offset`'s line is blank // (whitespace only) — the signal that the author left a paragraph break before // the statement or comment at `offset`. fn blankBeforeOffset(src: []const u8, offset: usize) bool { var i = offset; while (i > 0 and src[i - 1] != '\n') i -= 1; // start of offset's line if (i == 0) return false; // first line of the file const nl = i - 1; // the '\n' ending the previous line var j = nl; while (j > 0 and src[j - 1] != '\n') j -= 1; // start of the previous line var t = j; while (t < nl) : (t += 1) { const c = src[t]; if (c != ' ' and c != '\t' and c != '\r') return false; } return true; } // The offset of the newline ending the line *before* `offset`'s line, or null // when `offset` is already on the first line. Used to step a blank-line check up // one line, for a construct whose first stored slice is one line below its head // (a `comptime { … }` block, anchored at its first statement). fn prevLineBreakOffset(src: []const u8, offset: usize) ?usize { var i = offset; while (i > 0 and src[i - 1] != '\n') i -= 1; // start of offset's line if (i == 0) return null; return i - 1; // the '\n' that ends the previous line } // The source column of byte `offset` — the count of characters from the start of // its line. A comment's column is how deeply it is indented. fn commentColumn(src: []const u8, offset: usize) usize { var i = offset; while (i > 0 and src[i - 1] != '\n') i -= 1; return offset - i; } // Whether the comment starting at `start` is a trailing comment — some // non-whitespace byte precedes it on its own source line. Otherwise it is a // standalone comment that occupies its line alone. fn commentIsTrailing(src: []const u8, start: usize) bool { var k = start; while (k > 0 and src[k - 1] != '\n') { const c = src[k - 1]; if (c != ' ' and c != '\t' and c != '\r') return true; k -= 1; } return false; } // Whether the source bytes in [from, to) contain no newline (the two offsets sit // on the same physical line). fn noNewlineBetween(src: []const u8, from: usize, to: usize) bool { if (from > to or to > src.len) return false; var i = from; while (i < to) : (i += 1) if (src[i] == '\n') return false; return true; } // The lead anchor of a top-level item — a source slice on the first line of its // rendered form, including any leading doc comment (used to flush the comments // that come before it). fn itemLeadAnchor(it: ast.Item) ?[]const u8 { return switch (it) { .use_decl => |u| u.module, .link_decl => |l| l.module, .const_decl => |c| if (c.doc.len > 0) c.doc[0] else c.name, .fn_decl => |f| if (f.doc.len > 0) f.doc[0] else f.name, .comptime_block => |stmts| if (stmts.len > 0) stmtAnchor(stmts[0]) else null, // The quoted name lexeme is a source slice on the head line. .test_decl => |t| t.name, }; } // The tail anchor of a top-level item — a slice on the declaration's own first // line (past any doc comment), used to attach a same-line trailing comment. fn itemTailAnchor(it: ast.Item) ?[]const u8 { return switch (it) { .use_decl => |u| u.module, .link_decl => |l| l.module, .const_decl => |c| c.name, .fn_decl => |f| f.name, .comptime_block => null, .test_decl => |t| t.name, }; } // The lead / tail anchors of a container's associated declaration, as for items. fn declLeadAnchor(d: ast.ContainerDecl) ?[]const u8 { return switch (d) { .method => |m| if (m.doc.len > 0) m.doc[0] else m.name, .constant => |c| if (c.doc.len > 0) c.doc[0] else c.name, .use_import => |u| u.module, }; } fn declTailAnchor(d: ast.ContainerDecl) ?[]const u8 { return switch (d) { .method => |m| m.name, .constant => |c| c.name, .use_import => |u| u.module, }; } // The boundary anchor of an `else` arm: its first statement's anchor. Null for // an empty arm (`else {}`), which then simply keeps the enclosing boundary — // also shielding the eb[0] index from the empty slice. fn elseAnchor(eb: []ast.Stmt) ?[]const u8 { return if (eb.len > 0) stmtAnchor(eb[0]) else null; } // The boundary anchor of a switch prong: its first pattern. The `else` prong // has no patterns; its body stands in — for a block body, the first statement // (mirroring elseAnchor). Null falls back to the whole switch's boundary. fn prongAnchor(p: ast.SwitchProng) ?[]const u8 { if (p.patterns.len > 0) return exprAnchor(p.patterns[0].lo); return switch (p.body) { .block_expr => |blk| elseAnchor(blk.body), else => exprAnchor(p.body), }; } // A representative source slice on a statement's first physical line, used to // recover its position for blank-line preservation. Null for the keyword-only // forms (a bare `break` / `continue`) that store no anchor — they simply take no // preserved blank. fn stmtAnchor(s: ast.Stmt) ?[]const u8 { return switch (s) { .discard => |x| exprAnchor(x), .bind => |b| b.name, // A destructure anchors on its first real name — a `_` skip stores no // source slice, and the same-line comma rule keeps every name on the // statement's first line anyway. .destructure => |d| for (d.names) |maybe| { if (maybe) |name| break name; } else null, .assign => |a| exprAnchor(a.target), .destructure_assign => |da| exprAnchor(da.targets[0]), .if_stmt => |iff| exprAnchor(iff.cond), .while_stmt => |w| exprAnchor(w.cond), .for_stmt => |fr| if (fr.captures.len > 0) fr.captures[0] else exprAnchor(fr.iter), .defer_stmt => |inner| stmtAnchor(inner.*), .errdefer_stmt => |inner| stmtAnchor(inner.*), // The block forms anchor on their first statement (like a top-level // comptime block); an empty block has no anchor. .defer_block, .errdefer_block => |stmts| if (stmts.len > 0) stmtAnchor(stmts[0]) else null, .expr => |x| exprAnchor(x), }; } // The leftmost source slice of an expression (recursing into the head of a // postfix / binary chain), or null for the forms whose head is a keyword or a // synthesized node. Used only to locate a statement's first line. fn exprAnchor(e: ast.Expr) ?[]const u8 { return switch (e) { .int, .float, .string, .char, .ident, .value_word, .enum_lit, .error_lit => |s| s, .multiline_str => |lines| if (lines.len > 0) lines[0] else null, .member => |m| exprAnchor(m.base.*), .deref => |d| exprAnchor(d.*), .optional_unwrap => |u| exprAnchor(u.*), .call => |c| exprAnchor(c.callee.*), .index => |ix| exprAnchor(ix.base.*), .slice => |s| exprAnchor(s.base.*), .builtin_call => |b| b.name, .unary => |u| u.op, .binary => |b| exprAnchor(b.lhs.*), .group => |g| exprAnchor(g.*), .if_expr => |iff| exprAnchor(iff.cond.*), .switch_expr => |sw| exprAnchor(sw.subject.*), .try_expr => |t| exprAnchor(t.*), .catch_expr => |c| exprAnchor(c.lhs.*), .typed_lit => |tl| exprAnchor(tl.type.*), // A `.{ … }` literal leads with `.{`, which is not a stored slice; use // its first field's name or value so a `return .{ … }` / `_ = .{ … }` // statement still has an anchor (without one, a leading comment would be // pushed past the statement instead of in front of it). .struct_lit => |fields| if (fields.len == 0) null else if (fields[0].name) |n| n else exprAnchor(fields[0].value), .error_set => |names| if (names.len > 0) names[0] else null, .ret => |m| if (m) |vals| exprAnchor(vals[0]) else null, .brk => |b| if (b.value) |v| exprAnchor(v.*) else null, else => null, }; } // --- tests --------------------------------------------------------------- const testing = std.testing; fn parseProg(arena: std.mem.Allocator, src: []const u8) parser.Error!ast.Program { var p = Parser.init(arena, src); return p.parseProgram(); } // The line-comment lexemes of `src`, sorted, for a multiset comparison. fn sortedComments(arena: std.mem.Allocator, src: []const u8) ![]const []const u8 { var list: std.ArrayList([]const u8) = .empty; var lx = Lexer.init(src); while (true) { const t = lx.next(); if (t.kind == .eof) break; if (t.kind == .line_comment) try list.append(arena, t.lexeme(src)); } const slice = try list.toOwnedSlice(arena); std.mem.sort([]const u8, slice, {}, lessStr); return slice; } fn lessStr(_: void, a: []const u8, b: []const u8) bool { return std.mem.lessThan(u8, a, b); } // The three gates, run on any source (with or without comments): formatting // never changes the emitted Zig (lower(parse(src)) == lower(parse(fmt(src)))), // the formatter is idempotent (fmt(fmt(src)) == fmt(src)), and every comment in // the input appears exactly once in the output (multiset equality). fn expectStable(src: []const u8) !void { var a = std.heap.ArenaAllocator.init(testing.allocator); defer a.deinit(); const arena = a.allocator(); const lowered_src = try lower.emit(arena, try parseProg(arena, src)); const formatted = try format(arena, src); const lowered_fmt = try lower.emit(arena, try parseProg(arena, formatted)); try testing.expectEqualStrings(lowered_src, lowered_fmt); const formatted2 = try format(arena, formatted); try testing.expectEqualStrings(formatted, formatted2); // comment multiset in == out const in_comments = try sortedComments(arena, src); const out_comments = try sortedComments(arena, formatted); try testing.expectEqual(in_comments.len, out_comments.len); for (in_comments, out_comments) |ic, oc| try testing.expectEqualStrings(ic, oc); } fn expectFormat(src: []const u8, want: []const u8) !void { var a = std.heap.ArenaAllocator.init(testing.allocator); defer a.deinit(); try testing.expectEqualStrings(want, try format(a.allocator(), src)); } test "hello: imports, links, an exported entry, binds and calls" { // A plain untyped immutable binding renders in the `:=` short-declaration // canon, whether the source wrote `:=` or `const x =`. try expectFormat( \\use flibc \\ \\link "flibc_start" \\link "flibc_mem" \\ \\export fn main(_ usize, _ argv) noreturn { \\ const msg = "hello from flash\n" \\ _ = flibc.sys.write_fd(1, msg.ptr, msg.len) \\ flibc.exit() \\} , \\use flibc \\ \\link "flibc_start" \\link "flibc_mem" \\ \\export fn main(_ usize, _ argv) noreturn { \\ msg := "hello from flash\n" \\ _ = flibc.sys.write_fd(1, msg.ptr, msg.len) \\ flibc.exit() \\} \\ ); } test "types: pointer, slice, sentinel, optional, error-union, fn-type spellings round-trip" { try expectStable( \\fn pass(p *u32, q *mut u32, m []u8, w []mut u8, s [*:0]u8) *u32 { \\ return q \\} \\ \\const VTable = struct { \\ alloc *fn(*mut anyopaque, usize) ?[*]mut u8, \\ free *fn(*mut anyopaque, []mut u8) void, \\} \\ \\fn dup(path cstr) AllocError!i32 { \\ return error.OutOfMemory \\} ); } test "control flow: if/else-if, while-capture, range-for, switch, defer round-trip" { try expectStable( \\fn run(n usize) void { \\ for i in 0..n { \\ if i == 0 { \\ continue \\ } else if i == 1 { \\ defer cleanup() \\ } else { \\ work(i) \\ } \\ } \\ while it.next() |x| { \\ _ = x \\ } \\ switch tag { \\ 0 => low(), \\ 1, 2 => mid(), \\ else => high(), \\ } \\} ); } test "containers: struct with fields and a method, enum, tagged union round-trip" { try expectStable( \\const Point = struct { \\ x i32, \\ y i32 = 0, \\ \\ fn sum(self Point) i32 { \\ return self.x + self.y \\ } \\} \\ \\const Color = enum(u8) { \\ red, \\ green = 5, \\ blue, \\} \\ \\const Tok = union(enum) { \\ eof, \\ int usize, \\} ); } test "enum and union bodies with methods, constants, and imports round-trip" { try expectStable( \\const Color = enum(u8) { \\ red, \\ green = 5, \\ \\ use "names" as names \\ \\ const COUNT usize = 2 \\ \\ /// the canonical default \\ pub fn default() Color { \\ return .red \\ } \\} \\ \\const Tok = union(enum) { \\ eof, \\ int usize, \\ \\ fn isEof(self Tok) bool { \\ return self == .eof \\ } \\} ); } test "expressions: builtins, logical operators, casts, struct literals round-trip" { try expectStable( \\fn f(a bool, b bool) usize { \\ if a && b || c { \\ return #intCast(x) \\ } \\ p := P{ .x = 1, .y = 2 } \\ q := .{ 1, 2, 3 } \\ return value orelse 0 \\} ); } test "doc comments are preserved on the declaration they lead" { try expectStable( \\/// the maximum \\/// width \\pub const MAX = 80 \\ \\/// add two numbers \\fn add(a i32, b i32) i32 { \\ return a + b \\} ); } test "blank lines between statements are preserved, collapsed to one" { try expectFormat( \\fn f() void { \\ a() \\ \\ \\ b() \\ c() \\} , \\fn f() void { \\ a() \\ \\ b() \\ c() \\} \\ ); } test "value if-expression keeps its parentheses" { try expectStable( \\fn pick(c bool) usize { \\ return if (c) 1 else 2 \\} ); } test "the := canon: a plain bind rewrites; typed, var, comptime keep their keyword" { try expectFormat( \\fn f() void { \\ const b = 2 \\ const c i32 = 3 \\ var d = 4 \\ comptime const e = 5 \\} , \\fn f() void { \\ b := 2 \\ const c i32 = 3 \\ var d = 4 \\ comptime const e = 5 \\} \\ ); } test "the := canon round-trips and stays stable across binding kinds" { try expectStable( \\fn f() void { \\ a := compute() \\ const b = other() \\ const c usize = 3 \\ var d = 4 \\ const g usize align(16) = 6 \\} ); } test "a standalone comment leads a statement; a trailing one rides its line" { try expectFormat( \\fn f() void { \\ // compute the sum \\ s := a + b // the running total \\ return s \\} , \\fn f() void { \\ // compute the sum \\ s := a + b // the running total \\ return s \\} \\ ); } test "trailing comments ride enum variants" { try expectFormat( \\const Kind = enum { \\ command, // the first token \\ path, // a later token \\} , \\const Kind = enum { \\ command, // the first token \\ path, // a later token \\} \\ ); } test "a file-header block and a doc comment are both preserved" { try expectFormat( \\// header line one \\// header line two \\ \\/// a doc \\pub const MAX = 80 , \\// header line one \\// header line two \\ \\/// a doc \\pub const MAX = 80 \\ ); } test "a blank line before a top-level comptime block is preserved" { try expectFormat( \\fn shim() void { \\ work() \\} \\ \\comptime { \\ #export(&shim, .{ .name = "_start" }) \\} , \\fn shim() void { \\ work() \\} \\ \\comptime { \\ #export(&shim, .{ .name = "_start" }) \\} \\ ); } test "a module-head //! comment leads the file" { try expectFormat( \\//! module documentation \\ \\use flibc , \\//! module documentation \\ \\use flibc \\ ); } test "a comment-only file emits its comments" { try expectFormat( \\// just a comment \\// and another , \\// just a comment \\// and another \\ ); } test "a block-final comment stays inside the block" { try expectFormat( \\fn f() void { \\ work() \\ // trailing note inside the block \\} , \\fn f() void { \\ work() \\ // trailing note inside the block \\} \\ ); } test "consecutive top-level declarations keep the author's blank-line grouping" { try expectFormat( \\pub const A = x.A \\pub const B = x.B \\ \\pub const C = x.C , \\pub const A = x.A \\pub const B = x.B \\ \\pub const C = x.C \\ ); } test "a comment leads a return-struct-literal statement, not pushed past it" { try expectFormat( \\fn f() T { \\ // build the result \\ return .{ .key = .none } \\} , \\fn f() T { \\ // build the result \\ return .{ .key = .none } \\} \\ ); } test "a trailing comment on a method's statement stays in that method" { try expectFormat( \\const S = struct { \\ fn a(self S) void { \\ return \\ } \\ \\ fn b(self S) void { \\ v := f() // a trailing note \\ } \\} , \\const S = struct { \\ fn a(self S) void { \\ return \\ } \\ \\ fn b(self S) void { \\ v := f() // a trailing note \\ } \\} \\ ); } test "comment-rich source: every comment survives, output is stable" { try expectStable( \\// a leading file comment \\ \\use flibc // the C runtime \\ \\/// the entry \\export fn main(_ usize, _ argv) noreturn { \\ // set up \\ n := count() // how many \\ for i in 0..n { \\ // each iteration \\ step(i) \\ } \\ // tear down \\ flibc.exit() \\} ); } test "composite-type alias declarations round-trip" { try expectStable( \\const F = *fn(u8) u8 \\const O = ?u8 \\const S = []u8 \\const M = *mut fn() void \\ \\fn take(g Get([]u8)) void { \\ _ = g \\} ); } test "defer/errdefer block form round-trips, comments riding inside" { try expectStable( \\fn run(fd i32) !void { \\ defer { \\ // release in reverse order \\ close(fd) \\ close(fd + 1) \\ } \\ errdefer { \\ close(0) \\ } \\ defer close(fd) \\ return \\} ); } test "test blocks round-trip, comments riding inside" { try expectStable( \\// suite header \\use std \\ \\test "first" { \\ // inside the body \\ n := 1 \\ _ = n \\} \\ \\test "empty" {} ); } test "loop else arms and the if else-capture round-trip, comments riding inside" { try expectStable( \\fn f(xs []u8, c bool) void { \\ if next() |v| { \\ consume(v) \\ } else |err| { \\ // the failure arm \\ log(err) \\ } \\ while next() |v| { \\ consume(v) \\ } else |err| { \\ log(err) \\ } \\ while c { \\ // body comment stays in the body \\ step() \\ } else { \\ done() \\ } \\ for x in xs { \\ consume(x) \\ } else { \\ done() \\ } \\} ); } test "inline loops round-trip: inline for across its shapes, inline while unchanged" { try expectStable( \\fn f(xs []u8, n usize) void { \\ inline for x in xs { \\ consume(x) \\ } \\ inline for i in 0..n { \\ consume(i) \\ } else { \\ done() \\ } \\ inline for x, i in xs { \\ // comment rides the unrolled body \\ consume(i) \\ } \\ inline while n > 0 { \\ step() \\ } \\} ); } test "an empty else arm round-trips (the elseAnchor guard)" { // `else {}` has no first statement to anchor the boundary on; the guard // keeps the enclosing boundary instead of indexing the empty arm. try expectStable( \\fn f(xs []u8, c bool) void { \\ if c {} else {} \\ while c {} else {} \\ for x in xs {} else {} \\} ); } test "a block-bodied prong does not adopt a later prong's standalone comment" { // Without the per-prong boundary narrowing, prong `.a`'s block-close // flush was bounded by the whole switch and pulled `.b`'s interior // comment back to the end of `.a`'s body. try expectStable( \\fn f(k Kind) void { \\ switch k { \\ .a => { \\ work() \\ }, \\ .b => { \\ // interior comment of prong b \\ more() \\ }, \\ } \\} ); } test "a block-bodied prong does not adopt a later prong's trailing comments" { // The adoption was not limited to standalone comments: trailing comments // on statements inside a later block-bodied prong were pulled back too, // degraded to standalone lines at the end of the earlier prong's body. try expectStable( \\fn f(k Kind) void { \\ switch k { \\ .a => { \\ work() \\ }, \\ .b => { \\ if cond { \\ x() // trailing on x \\ y() // trailing on y \\ } \\ }, \\ } \\} ); } test "a leading comment inside a block-bodied prong stays inside" { // The inverse drift: a standalone comment as the first line inside a // prong's block was re-sited out of the prong when an earlier // block-bodied prong's close flush reached past the prong header. try expectStable( \\fn f(k Kind) void { \\ switch k { \\ .a => { \\ work() \\ }, \\ .b => |w| { \\ // leading interior comment \\ if w { \\ x() \\ } \\ }, \\ else => { \\ // the else prong holds its own comment too \\ done() \\ }, \\ } \\} ); } test "tuple types and multi-return round-trip, comments riding inside" { try expectStable( \\const Pair = (u8, bool) \\ \\fn pair() (u8, bool) { \\ // both spellings hold \\ return 42, true \\} \\ \\fn lit() Pair { \\ return .{ 7, false } \\} \\ \\fn first(t (u8, (u8, bool))) u8 { \\ return (t[0] + t[1][0]) * 1 \\} ); } test "a tuple type's trailing comma drops to the canonical spelling" { try expectFormat( \\fn pair() (u8, bool,) { \\ return 42, true \\} , \\fn pair() (u8, bool) { \\ return 42, true \\} \\ ); } test "destructures round-trip, comments riding inside" { try expectStable( \\fn pair() (u8, bool) { \\ return 42, true \\} \\ \\fn demo() void { \\ // both skips hold \\ tok, _ := pair() \\ _, ok := pair() \\ var x, y = pair() \\ x, y = pair() // the assignment list is verbatim \\ arr[0], y = pair() \\ _ = tok \\ _ = ok \\ _ = x \\} ); } test "the ':=' canon extends to destructures: 'const' rewrites, 'var' keeps its keyword" { try expectFormat( \\fn demo() void { \\ const a, b = pair() \\ var x, y = pair() \\ _ = .{ a, b, x, y } \\} , \\fn demo() void { \\ a, b := pair() \\ var x, y = pair() \\ _ = .{ a, b, x, y } \\} \\ ); }