package docs import ( "bytes" "errors" "fmt" "os" "path/filepath" "strings" ) // Marker spellings for `eeco docs compact`. Fixed in slice 1; a future // slice can introduce a config knob for custom markers if a user needs // it. const ( startMarker = "" endMarker = "" ) // CompactRegion records one marked region that was (or would be, in // dry-run) moved to the archive. Line numbers are 1-based and inclusive, // covering the start marker line through the end marker line. type CompactRegion struct { StartLine int EndLine int } // CompactReport summarises a compact run. It is returned in both the // dry-run and write paths so a CLI caller can render the same summary // either way. type CompactReport struct { Source string Archive string Regions []CompactRegion ArchiveExists bool DryRun bool } // Compact moves every region of source delimited by // `` / `` into // archive, leaving a pointer stub in place at the source (marker mode). // It is a thin wrapper over the shared compact engine; the regions to // move are discovered from explicit markers. The public signature is // unchanged. func Compact(source, archive string, dryRun bool) (CompactReport, error) { return compact(source, archive, dryRun, func(raw []byte) ([]CompactRegion, error) { return scanArchiveRegions(raw) }) } // CompactKeepLast moves heading-delimited regions of source into archive // (heading mode). prefix is a heading-line prefix such as "## Snapshot" // whose `#` run fixes the section level; the keepLast most-recent // matching sections (newest first, top of file) are kept and everything // older is archived. It shares every move mechanic with Compact via the // compact engine — only region discovery differs. Heading mode refuses // to run on a source that still carries explicit archive markers (the // two modes are mutually exclusive). func CompactKeepLast(source, archive string, dryRun bool, prefix string, keepLast int) (CompactReport, error) { return compact(source, archive, dryRun, func(raw []byte) ([]CompactRegion, error) { return scanHeadingRegions(raw, prefix, keepLast) }) } // compact is the shared engine behind Compact (marker mode) and // CompactKeepLast (heading mode). find discovers the regions to move; // everything downstream — the archive stat, the dry-run / no-region // early return, splitRegions, appendArchive, and the source rewrite — is // identical across both modes. Both paths are absolute. The // repo-relativity check belongs in the CLI layer where the repo root is // known; this function trusts both paths. // // Behaviour: // - Markers / headings inside fenced code blocks (``` or ~~~) are ignored. // - Unmatched, nested, or out-of-order markers return an error. // - With dryRun=true, nothing is written; the report still names every // region that would move. // - Re-running with no discoverable regions is an idempotent no-op // (returns an empty Regions slice and writes nothing). // - The archive file is created on first run and appended to on later // runs; the appended content is a deterministic concatenation of the // cut regions, each preceded by a one-line provenance header. // - The source-doc trailing newline is preserved exactly. // // The header carries no date or wall-clock content so byte output is // reproducible across runs. func compact(source, archive string, dryRun bool, find func([]byte) ([]CompactRegion, error)) (CompactReport, error) { report := CompactReport{ Source: source, Archive: archive, DryRun: dryRun, } raw, err := os.ReadFile(source) if err != nil { return report, fmt.Errorf("read source: %w", err) } regions, err := find(raw) if err != nil { return report, err } report.Regions = regions if _, err := os.Stat(archive); err == nil { report.ArchiveExists = true } else if !errors.Is(err, os.ErrNotExist) { return report, fmt.Errorf("stat archive: %w", err) } if len(regions) == 0 || dryRun { return report, nil } // Source path is given as an absolute path; the archive header // records the source by basename to keep the header short and avoid // leaking the operator's local layout. The stub references the // archive by its path relative to the source's directory so a // reader can follow the pointer without guessing where the archive // lives. sourceTag := filepath.Base(source) stubTarget, relErr := filepath.Rel(filepath.Dir(source), archive) if relErr != nil { stubTarget = filepath.Base(archive) } stubTarget = filepath.ToSlash(stubTarget) archiveAddition, sourceRewrite := splitRegions(raw, regions, sourceTag, stubTarget) if err := appendArchive(archive, archiveAddition, !report.ArchiveExists); err != nil { return report, fmt.Errorf("write archive: %w", err) } if err := os.WriteFile(source, sourceRewrite, 0o644); err != nil { return report, fmt.Errorf("rewrite source: %w", err) } return report, nil } // scanArchiveRegions walks src line-by-line tracking fenced-code state // and returns every paired start/end region. Markers inside a fence are // ignored. A start without a matching end, an end without an open start, // or a second start before the first end is a hard error. func scanArchiveRegions(src []byte) ([]CompactRegion, error) { var regions []CompactRegion inFence := false openStart := 0 // 1-based line number of the open start marker; 0 = no open start lines := splitLinesKeepEOL(src) for i, line := range lines { lineNo := i + 1 trimmed := strings.TrimRight(line, "\r\n") // Track fenced code boundaries. The trim handles indented fences too // (a fence may carry leading whitespace). stripped := strings.TrimLeft(trimmed, " \t") if strings.HasPrefix(stripped, "```") || strings.HasPrefix(stripped, "~~~") { inFence = !inFence continue } if inFence { continue } marker := strings.TrimSpace(trimmed) switch marker { case startMarker: if openStart != 0 { return nil, fmt.Errorf("%s line %d: nested start marker (previous still open at line %d)", "compact", lineNo, openStart) } openStart = lineNo case endMarker: if openStart == 0 { return nil, fmt.Errorf("%s line %d: end marker with no matching start", "compact", lineNo) } regions = append(regions, CompactRegion{StartLine: openStart, EndLine: lineNo}) openStart = 0 } } if openStart != 0 { return nil, fmt.Errorf("compact line %d: start marker with no matching end", openStart) } return regions, nil } // headingSection is one matched heading-mode section: the 1-based line of // the matched heading and the 1-based line of the boundary that // terminates it (exclusive — the section spans [start, end)). At EOF the // terminating boundary is len(lines)+1. type headingSection struct { start int end int } // headingLevel returns the ATX-heading level of line (the number of // leading `#` characters) when line is a heading, or 0 when it is not. A // heading is a run of one or more `#` at the start of the line (after // optional leading whitespace) followed by a space or the line end. The // trailing newline is ignored. func headingLevel(line string) int { s := strings.TrimLeft(strings.TrimRight(line, "\r\n"), " \t") n := 0 for n < len(s) && s[n] == '#' { n++ } if n == 0 || (n < len(s) && s[n] != ' ') { return 0 } return n } // scanHeadingRegions discovers archivable regions by heading rather than // by explicit markers. prefix is a heading-line prefix such as // "## Snapshot"; its `#` run fixes the section level L. A *matched* // section opens at a heading of exactly level L whose trimmed text has // the given prefix, and runs until the next *boundary* heading (any // heading of level <= L) or EOF — so a section can never swallow a later // same-or-higher heading such as a live "## Next session" tail. The N // most-recent matched sections (newest first, i.e. topmost in the file) // are kept; everything older is archivable, and adjacent archivable // sections coalesce into one CompactRegion per contiguous run. Headings // inside fenced code blocks are ignored, mirroring scanArchiveRegions. // // Heading mode is mutually exclusive with explicit markers: if the // source already contains a paired archive-marker region, this returns // an error rather than silently mixing the two schemes. func scanHeadingRegions(src []byte, prefix string, keepLast int) ([]CompactRegion, error) { level := headingLevel(prefix) if level == 0 { return nil, fmt.Errorf("compact: --heading %q is not a markdown heading (expected a leading '#' run, e.g. \"## Snapshot\")", prefix) } if keepLast < 0 { return nil, fmt.Errorf("compact: --keep-last must be >= 0 (got %d)", keepLast) } // Any explicit archive markers (a complete pair, or even a malformed // unmatched/nested one) mean the source is set up for marker mode; // refuse rather than silently mix the two schemes. Inline prose // mentions are unaffected — scanArchiveRegions only matches standalone // marker lines. if markers, err := scanArchiveRegions(src); err != nil || len(markers) > 0 { return nil, errors.New("source contains explicit archive markers; remove them or drop --keep-last") } wantPrefix := strings.TrimSpace(prefix) lines := splitLinesKeepEOL(src) var matched []headingSection openStart := 0 // 1-based line of the currently open matched section; 0 = none inFence := false for i, line := range lines { lineNo := i + 1 stripped := strings.TrimLeft(strings.TrimRight(line, "\r\n"), " \t") if strings.HasPrefix(stripped, "```") || strings.HasPrefix(stripped, "~~~") { inFence = !inFence continue } if inFence { continue } lvl := headingLevel(line) if lvl == 0 || lvl > level { continue // body line (a deeper heading does not split the section) } // A boundary heading (lvl <= level) closes any open matched section. if openStart != 0 { matched = append(matched, headingSection{start: openStart, end: lineNo}) openStart = 0 } // The boundary is itself a new matched section only when it is at // exactly level L and carries the prefix. if lvl == level && strings.HasPrefix(strings.TrimSpace(line), wantPrefix) { openStart = lineNo } } if openStart != 0 { matched = append(matched, headingSection{start: openStart, end: len(lines) + 1}) } if keepLast >= len(matched) { return nil, nil // nothing older than the kept window — idempotent no-op } archivable := matched[keepLast:] // newest-on-top: keep the first keepLast // Coalesce adjacent archivable sections (section_i.end == the next // section's start) into maximal contiguous runs; each run is one // region whose EndLine is the last line before its terminating // boundary. var regions []CompactRegion for i := 0; i < len(archivable); { runStart := archivable[i].start runEnd := archivable[i].end j := i + 1 for j < len(archivable) && archivable[j].start == runEnd { runEnd = archivable[j].end j++ } regions = append(regions, CompactRegion{StartLine: runStart, EndLine: runEnd - 1}) i = j } return regions, nil } // splitRegions partitions src into (archiveBytes, sourceBytes) using the // pre-validated regions. Each cut region (markers + body) is appended to // archiveBytes after a one-line provenance header. The same region is // replaced in sourceBytes with a single-line pointer stub that names the // archive destination. func splitRegions(src []byte, regions []CompactRegion, sourceTag, stubTarget string) (archiveAddition, sourceRewrite []byte) { lines := splitLinesKeepEOL(src) newline := dominantNewline(lines) stub := fmt.Sprintf("> _archived to `%s` (eeco docs compact)._%s", stubTarget, newline) var archive bytes.Buffer var out bytes.Buffer cursor := 0 for _, r := range regions { for ; cursor < r.StartLine-1; cursor++ { out.WriteString(lines[cursor]) } out.WriteString(stub) archive.WriteString("") archive.WriteString(newline) for j := r.StartLine - 1; j < r.EndLine; j++ { archive.WriteString(lines[j]) } // Guarantee a blank line between consecutive archive blocks. If // the cut content already ended with a newline (the end-marker // line normally does), one extra newline is enough; if it did // not, add two. last := lines[r.EndLine-1] if !strings.HasSuffix(last, "\n") { archive.WriteString(newline) } archive.WriteString(newline) cursor = r.EndLine } for ; cursor < len(lines); cursor++ { out.WriteString(lines[cursor]) } return archive.Bytes(), out.Bytes() } // dominantNewline picks the newline style used most often in lines, with // a "\n" fallback for files with no newlines at all. func dominantNewline(lines []string) string { crlf, lf := 0, 0 for _, line := range lines { switch { case strings.HasSuffix(line, "\r\n"): crlf++ case strings.HasSuffix(line, "\n"): lf++ } } if crlf > lf { return "\r\n" } return "\n" } // splitLinesKeepEOL returns the lines of src with their trailing newline // (LF or CRLF) preserved. An unterminated final line is returned as-is. func splitLinesKeepEOL(src []byte) []string { var lines []string for len(src) > 0 { i := bytes.IndexByte(src, '\n') if i < 0 { lines = append(lines, string(src)) break } lines = append(lines, string(src[:i+1])) src = src[i+1:] } return lines } // appendArchive appends content to archive, creating the file (and any // parent directories) on first write. When the archive already exists, // a single blank line is written between the prior content and the new // content so successive runs do not glue blocks together visually. func appendArchive(archive string, content []byte, createNew bool) error { if createNew { if err := os.MkdirAll(filepath.Dir(archive), 0o755); err != nil { return err } return os.WriteFile(archive, content, 0o644) } existing, err := os.ReadFile(archive) if err != nil { return err } var buf bytes.Buffer buf.Write(existing) if len(existing) > 0 && !bytes.HasSuffix(existing, []byte("\n")) { buf.WriteByte('\n') } if len(existing) > 0 { buf.WriteByte('\n') } buf.Write(content) return os.WriteFile(archive, buf.Bytes(), 0o644) }