// Package gates runs cross-cutting policy gates that compose multiple // scans over a project tree. Today the only gate is check-attribution, // which combines a tracked-file scan (delegated to // internal/workflow.Detector — the same primitive comment-hygiene uses) // with a commit-body scan applying a stricter, trailer-anchored pattern // set. The package is consumed by the eeco gates CLI verb; it depends // on git being on PATH for the commit-body scan and the tracked-files // enumeration. package gates import ( "bytes" "errors" "fmt" "os" "os/exec" "path/filepath" "regexp" "strings" "github.com/ajhahnde/eeco/internal/workflow" ) // Pattern fragments are assembled at runtime so this source stays // self-clean for eeco's own comment-hygiene scan (Constraint 3 — // mirrors the discipline in internal/workflow/attribution.go and // internal/hooks/commitmsg.go). var ( gateCoAuthored = "[Cc]o-" + "[Aa]uthored-" + "[Bb]y" gateGenVerb = "[Gg]enerated" gateRobotEmoji = "\\x{1F916}" ) // strictTrailerPatterns is the commit-body pattern set — same shape as // internal/hooks/commitmsg.go. Trailer-anchored Co-Authored-By rules so // a docs commit subject like "remove the Co-Authored-By trailer" does // not false-fire, plus the robot-emoji Generated-with signature. var strictTrailerPatterns = []*regexp.Regexp{ regexp.MustCompile(`(?im)^` + gateCoAuthored + `:.*claude`), regexp.MustCompile(`(?im)^` + gateCoAuthored + `:.*anthropic`), regexp.MustCompile(`(?im)^` + gateCoAuthored + `:.*noreply@anthropic`), regexp.MustCompile(gateRobotEmoji + `[^\n]{0,20}` + gateGenVerb), } // textExtensions is the default extension allowlist for the file scan // — same set the existing scripts/check_comment_hygiene.sh in // downstream consumers uses, extended with the Go-side extensions a // Go project carries. var textExtensions = map[string]bool{ ".md": true, ".sh": true, ".go": true, ".zig": true, ".S": true, ".inc": true, ".zon": true, ".yml": true, ".yaml": true, ".txt": true, ".ld": true, ".json": true, ".toml": true, } // Options governs CheckAttribution scope. Zero value scans nothing — // callers must set at least one of ScanFiles / ScanCommits. type Options struct { // Paths overrides the default tracked-files enumeration when set. // Each entry is repo-relative. Paths []string // Range is the commit-body git range (e.g. "origin/main..HEAD"). An // empty value selects the default: origin/main..HEAD when // resolvable, otherwise HEAD~10..HEAD with a notice. Range string // ScanFiles enables the tracked-tree file scan. ScanFiles bool // ScanCommits enables the commit-body scan. ScanCommits bool // Excludes are additional repo-relative paths to skip during the // file scan; the gate's own source is already excluded. Excludes []string } // Finding is one policy hit. Path/Line/Excerpt set for file hits; // Commit/Line/Excerpt set for commit-body hits. type Finding struct { Path string Line int Commit string Excerpt string } // Result groups findings with non-fatal notices the caller should // surface to stderr (for example the HEAD~10 range fallback). type Result struct { Findings []Finding Notices []string } // CheckAttribution runs the configured scans against workdir (a git // repository). Returns the combined Result and a non-nil error only on // infrastructure failure (workdir is not a repo, git is unavailable). // A clean result is Result{} with both slices nil; a finding-only // outcome returns the populated Result and a nil error so callers can // distinguish "ran and found things" from "could not run". func CheckAttribution(workdir string, opts Options) (Result, error) { var res Result if opts.ScanFiles { fs, err := scanFiles(workdir, opts) if err != nil { return res, err } res.Findings = append(res.Findings, fs...) } if opts.ScanCommits { cs, notices, err := scanCommits(workdir, opts) if err != nil { return res, err } res.Findings = append(res.Findings, cs...) res.Notices = append(res.Notices, notices...) } return res, nil } func scanFiles(workdir string, opts Options) ([]Finding, error) { paths := opts.Paths if len(paths) == 0 { out, err := runGit(workdir, "ls-files") if err != nil { return nil, fmt.Errorf("git ls-files: %w", err) } for p := range strings.SplitSeq(strings.TrimRight(out, "\n"), "\n") { p = strings.TrimSpace(p) if p == "" { continue } if !isTextExtension(p) { continue } paths = append(paths, p) } } excluded := make(map[string]bool, len(opts.Excludes)+1) excluded["internal/gates/attribution.go"] = true for _, e := range opts.Excludes { excluded[filepath.ToSlash(e)] = true } det, err := workflow.NewDetector(nil) if err != nil { return nil, fmt.Errorf("build detector: %w", err) } var findings []Finding for _, rel := range paths { if excluded[rel] { continue } full := filepath.Join(workdir, rel) b, err := os.ReadFile(full) if err != nil { continue } // Cheap binary sniff so a JSON-like blob with a NUL skips. if bytes.IndexByte(b[:min(len(b), 8000)], 0) != -1 { continue } for _, hit := range det.Scan(rel, string(b)) { excerpt := readLine(b, hit.Line) findings = append(findings, Finding{ Path: rel, Line: hit.Line, Excerpt: excerpt, }) } } return findings, nil } func scanCommits(workdir string, opts Options) ([]Finding, []string, error) { var notices []string rng := opts.Range if rng == "" { if _, err := runGit(workdir, "rev-parse", "--verify", "--quiet", "origin/main"); err == nil { rng = "origin/main..HEAD" } else { rng = "HEAD~10..HEAD" notices = append(notices, "origin/main not resolvable; commit-body scan range falls back to "+rng) } } out, err := runGit(workdir, "rev-list", rng) if err != nil { // Empty range (e.g. HEAD has no ancestor for HEAD~10..HEAD in a // shallow repo): treat as no commits, not an infrastructure // failure. The notice already names the fallback range. return nil, notices, nil } var findings []Finding for sha := range strings.FieldsSeq(out) { body, err := runGit(workdir, "log", "-1", "--format=%B", sha) if err != nil { continue } for _, p := range strictTrailerPatterns { loc := p.FindStringIndex(body) if loc == nil { continue } line := strings.Count(body[:loc[0]], "\n") + 1 excerpt := strings.TrimRight(body[loc[0]:loc[1]], "\r\n") findings = append(findings, Finding{ Commit: shortSHA(sha), Line: line, Excerpt: excerpt, }) break // one hit per commit is enough — keep reports terse } } return findings, notices, nil } func runGit(workdir string, args ...string) (string, error) { cmd := exec.Command("git", args...) cmd.Dir = workdir var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { var exitErr *exec.ExitError if errors.As(err, &exitErr) { return "", fmt.Errorf("git %s: %s", strings.Join(args, " "), strings.TrimSpace(stderr.String())) } return "", fmt.Errorf("git %s: %w", strings.Join(args, " "), err) } return stdout.String(), nil } func shortSHA(sha string) string { if len(sha) >= 7 { return sha[:7] } return sha } func isTextExtension(path string) bool { ext := filepath.Ext(path) return textExtensions[strings.ToLower(ext)] } // readLine returns the 1-indexed line of b, with trailing CR/LF // stripped. An out-of-range line returns "". func readLine(b []byte, n int) string { if n <= 0 { return "" } cur := 1 start := 0 for i, c := range b { if c != '\n' { continue } if cur == n { return strings.TrimRight(string(b[start:i]), "\r") } cur++ start = i + 1 } if cur == n { return strings.TrimRight(string(b[start:]), "\r") } return "" }