// diff-corpus — the stage0-vs-stage1 differential harness. // // Runs two flashc binaries over every .flash file in the given directories // and asserts byte-identical observable behaviour in three modes: // // * transpile (flashc FILE) — stdout, stderr, exit status // * token stream (flashc --dump-tokens FILE) — stdout, stderr, exit status // * formatting (flashc fmt COPY) — the rewritten file's bytes, // stderr, exit status // // Files that the compiler rejects participate too: both binaries must then // reject with the same diagnostics and the same exit status, so the corpus // covers the error surface as well as the happy path. Any divergence is // reported and the run exits non-zero. This byte-equality check is the // license every hybrid module swap must renew. // // The stage1 binary renders diagnostics with a source caret by default; // stage0 is frozen and prints the bare one-line form. Every stage1 // invocation therefore carries --plain-diagnostics, which restores the // frozen bytes — so the comparison keeps pinning the diagnostic text itself. // // Usage: diff-corpus [dir ...] const std = @import("std"); const Io = std.Io; const tmp_root = ".zig-cache/diff-corpus"; const max_file_size: usize = 1 << 20; pub fn main(init: std.process.Init) !void { const io = init.io; const arena = init.arena.allocator(); var stdout_buf: [4096]u8 = undefined; var stdout_obj = std.Io.File.stdout().writer(io, &stdout_buf); const out = &stdout_obj.interface; defer out.flush() catch {}; const args = try init.minimal.args.toSlice(arena); if (args.len < 4) { try out.writeAll("usage: diff-corpus [dir ...]\n"); return error.BadArguments; } const stage0 = args[1]; const stage1 = args[2]; try Io.Dir.cwd().createDirPath(io, tmp_root); var mismatches: usize = 0; var files: usize = 0; for (args[3..]) |dir_path| { // Collect the directory's .flash entries and sort them, so the // report order is stable across runs and platforms. var dir = try Io.Dir.cwd().openDir(io, dir_path, .{ .iterate = true }); defer dir.close(io); var names: std.ArrayList([]const u8) = .empty; var it = dir.iterate(); while (try it.next(io)) |entry| { if (entry.kind != .file) continue; if (!std.mem.endsWith(u8, entry.name, ".flash")) continue; try names.append(arena, try arena.dupe(u8, entry.name)); } std.mem.sort([]const u8, names.items, {}, lessThanStr); for (names.items) |name| { const path = try std.fs.path.join(arena, &.{ dir_path, name }); files += 1; mismatches += try compareFile(arena, io, out, stage0, stage1, path); } } if (mismatches > 0) { try out.print("diff-corpus: {d} mismatch(es) across {d} files\n", .{ mismatches, files }); try out.flush(); std.process.exit(1); } try out.print("diff-corpus: {d} files x 3 modes, no differences\n", .{files}); } fn lessThanStr(_: void, a: []const u8, b: []const u8) bool { return std.mem.lessThan(u8, a, b); } // One observed run: captured streams plus how the process ended. const Observed = struct { stdout: []u8, stderr: []u8, term: std.process.Child.Term, }; fn runOnce(arena: std.mem.Allocator, io: Io, argv: []const []const u8) !Observed { const r = try std.process.run(arena, io, .{ .argv = argv }); return .{ .stdout = r.stdout, .stderr = r.stderr, .term = r.term }; } fn sameObserved(a: Observed, b: Observed) bool { return std.meta.eql(a.term, b.term) and std.mem.eql(u8, a.stdout, b.stdout) and std.mem.eql(u8, a.stderr, b.stderr); } // Compare one source file across the three modes. Returns the number of // modes (0–3) in which the two binaries diverged. fn compareFile( arena: std.mem.Allocator, io: Io, out: *Io.Writer, stage0: []const u8, stage1: []const u8, path: []const u8, ) !usize { var bad: usize = 0; // The two read-only modes share a shape: same argv tail, two binaries. const direct_modes = [_]struct { tag: []const u8, flag: ?[]const u8 }{ .{ .tag = "transpile", .flag = null }, .{ .tag = "tokens", .flag = "--dump-tokens" }, }; for (direct_modes) |mode| { var argv: std.ArrayList([]const u8) = .empty; try argv.append(arena, stage0); if (mode.flag) |f| try argv.append(arena, f); try argv.append(arena, path); const a = try runOnce(arena, io, argv.items); var argv1: std.ArrayList([]const u8) = .empty; try argv1.append(arena, stage1); try argv1.append(arena, "--plain-diagnostics"); if (mode.flag) |f| try argv1.append(arena, f); try argv1.append(arena, path); const b = try runOnce(arena, io, argv1.items); if (!sameObserved(a, b)) { bad += 1; try out.print("diff-corpus: {s}: {s} differs\n", .{ path, mode.tag }); } } // fmt rewrites in place, so each binary formats its own pristine copy // and the comparison is over the resulting bytes. const src = try Io.Dir.cwd().readFileAlloc(io, path, arena, .limited(max_file_size)); const flat = try arena.dupe(u8, path); for (flat) |*c| { if (c.* == '/' or c.* == '\\') c.* = '_'; } const copy0 = try std.fmt.allocPrint(arena, "{s}/s0_{s}", .{ tmp_root, flat }); const copy1 = try std.fmt.allocPrint(arena, "{s}/s1_{s}", .{ tmp_root, flat }); try Io.Dir.cwd().writeFile(io, .{ .sub_path = copy0, .data = src }); try Io.Dir.cwd().writeFile(io, .{ .sub_path = copy1, .data = src }); const f0 = try runOnce(arena, io, &.{ stage0, "fmt", copy0 }); const f1 = try runOnce(arena, io, &.{ stage1, "--plain-diagnostics", "fmt", copy1 }); const out0 = try Io.Dir.cwd().readFileAlloc(io, copy0, arena, .limited(max_file_size)); const out1 = try Io.Dir.cwd().readFileAlloc(io, copy1, arena, .limited(max_file_size)); const fmt_same = std.meta.eql(f0.term, f1.term) and std.mem.eql(u8, f0.stderr, f1.stderr) and std.mem.eql(u8, out0, out1); if (!fmt_same) { bad += 1; try out.print("diff-corpus: {s}: fmt differs\n", .{path}); } return bad; }