#!/usr/bin/env python3 """cc-audit — lint any CLAUDE.md / AGENTS.md against the 12-rule baseline. Usage: python cc_audit.py # scans ./CLAUDE.md, ./AGENTS.md python cc_audit.py path/to/file.md # explicit path python cc_audit.py --json # machine-readable output Checks: 1. File exists and is non-empty 2. Size below the 200-line compliance cliff 3. Each of the 12 baseline rules has some signal (keyword match) 4. No forbidden anti-patterns (e.g. leaked paypal links, huge token dumps) 5. Project-specifics section is present 6. YAML frontmatter is valid if present Exit code 0 if the file passes, 1 if it has warnings, 2 if broken. """ from __future__ import annotations import argparse import json import os import re import sys from dataclasses import dataclass, field from pathlib import Path from typing import Iterable # --- 12-rule keyword signals --------------------------------------------- # We don't require exact wording; we look for any of the signal words for # the rule. A rule is "covered" if at least one signal appears somewhere # in the file. These are intentionally permissive — missing a rule entirely # is the problem, not paraphrasing it. RULE_SIGNALS: list[tuple[str, list[str]]] = [ ("1: think before coding", ["assumption", "think before", "surface tradeoffs", "push back"]), ("2: simplicity first", ["simplicity", "minimum code", "speculative", "simplest"]), ("3: surgical changes", ["surgical", "touch only", "adjacent code", "match existing style"]), ("4: goal-driven execution", ["goal-driven", "success criteria", "define success", "until verified"]), ("5: don't make the model do non-language work", ["non-language", "deterministic code", "deterministic logic", "retry policy", "routing is code"]), ("6: hard token budget", ["token budget", "budget", "spiral", "ceiling", "re-chew"]), ("7: surface conflicts", ["surface conflict", "two pattern", "pick one", "conflict"]), ("8: read before you write", ["read before", "understand adjacent", "adjacent code"]), ("9: tests gated by correctness", ["tests are gated", "behavior, not shape", "assertions", "not just"]), ("10: checkpoints for long operations", ["checkpoint", "long-running", "commit between", "multi-step"]), ("11: convention beats novelty", ["convention", "established pattern", "novelty"]), ("12: fail visibly", ["fail visibly", "partial failure", "silent", "skipped rows", "truncated output"]), ] ANTI_PATTERNS: list[tuple[str, str]] = [ (r"paypal\.me/[\w-]+", "leaked paypal link — remove before committing"), (r"ghp_[A-Za-z0-9]{10,}", "leaked GitHub PAT token"), (r"sk-[A-Za-z0-9]{20,}", "leaked API key (sk-...)"), (r"\bAKIA[0-9A-Z]{16}\b", "leaked AWS access key"), (r"\bpassword\s*[:=]\s*['\"]", "literal password in clear text"), ] COMPLIANCE_CLIFF = 200 # lines — past this, agent compliance drops sharply IDEAL_MAX = 150 # --- core logic ----------------------------------------------------------- @dataclass class Result: path: str exists: bool lines: int = 0 rules_hit: list[str] = field(default_factory=list) rules_missing: list[str] = field(default_factory=list) anti_patterns: list[str] = field(default_factory=list) has_project_specifics: bool = False size_warning: str | None = None notes: list[str] = field(default_factory=list) @property def score(self) -> int: """0–100 compliance score.""" if not self.exists: return 0 base = int(len(self.rules_hit) / len(RULE_SIGNALS) * 80) base += 10 if self.has_project_specifics else 0 base += 10 if not self.anti_patterns else 0 if self.size_warning: base -= 10 return max(0, min(100, base)) @property def status(self) -> str: if not self.exists: return "missing" if self.anti_patterns: return "broken" if len(self.rules_missing) > 4 or self.size_warning: return "warn" return "pass" def audit(path: Path) -> Result: r = Result(path=str(path), exists=path.exists()) if not r.exists: r.notes.append(f"file not found: {path}") return r text = path.read_text(encoding="utf-8", errors="replace") r.lines = text.count("\n") + 1 low = text.lower() for name, signals in RULE_SIGNALS: if any(s.lower() in low for s in signals): r.rules_hit.append(name) else: r.rules_missing.append(name) for pattern, message in ANTI_PATTERNS: if re.search(pattern, text, flags=re.IGNORECASE): r.anti_patterns.append(message) if re.search(r"project\s*specific", low): r.has_project_specifics = True else: r.notes.append("no 'project specifics' section — add repo-specific rules") if r.lines > COMPLIANCE_CLIFF: r.size_warning = ( f"file is {r.lines} lines — past the ~{COMPLIANCE_CLIFF}-line " "compliance cliff; compliance drops sharply beyond this" ) elif r.lines > IDEAL_MAX: r.notes.append( f"file is {r.lines} lines — above the ideal {IDEAL_MAX}; " "consider trimming" ) return r def find_default_files(cwd: Path) -> list[Path]: candidates = ["CLAUDE.md", "AGENTS.md", ".cursorrules", ".github/copilot-instructions.md"] return [cwd / c for c in candidates if (cwd / c).exists()] def render_text(results: Iterable[Result]) -> str: lines: list[str] = [] for r in results: lines.append(f"=== {r.path} ===") if not r.exists: lines.append(" ⨯ file not found") lines.append("") continue lines.append( f" status: {r.status.upper()} score: {r.score}/100 " f"lines: {r.lines}" ) lines.append( f" rules covered: {len(r.rules_hit)}/{len(RULE_SIGNALS)}" ) if r.rules_missing: lines.append(" missing rules:") for m in r.rules_missing: lines.append(f" - {m}") if r.anti_patterns: lines.append(" anti-patterns:") for a in r.anti_patterns: lines.append(f" ⨯ {a}") if r.size_warning: lines.append(f" ⚠ {r.size_warning}") for n in r.notes: lines.append(f" · {n}") lines.append("") return "\n".join(lines) def render_json(results: Iterable[Result]) -> str: payload = [] for r in results: payload.append({ "path": r.path, "exists": r.exists, "status": r.status, "score": r.score, "lines": r.lines, "rules_hit": r.rules_hit, "rules_missing": r.rules_missing, "anti_patterns": r.anti_patterns, "has_project_specifics": r.has_project_specifics, "size_warning": r.size_warning, "notes": r.notes, }) return json.dumps(payload, indent=2) def main(argv: list[str] | None = None) -> int: p = argparse.ArgumentParser(description=__doc__.splitlines()[0]) p.add_argument("paths", nargs="*", type=Path, help="CLAUDE.md / AGENTS.md files to audit (default: " "auto-detect from cwd)") p.add_argument("--json", action="store_true", help="emit machine-readable JSON") args = p.parse_args(argv) if not args.paths: args.paths = find_default_files(Path.cwd()) if not args.paths: print("no CLAUDE.md / AGENTS.md found in cwd", file=sys.stderr) return 2 results = [audit(p) for p in args.paths] if args.json: print(render_json(results)) else: print(render_text(results)) # exit codes: 0 pass, 1 warn, 2 broken/missing worst = 0 for r in results: if r.status in {"missing", "broken"}: worst = max(worst, 2) elif r.status == "warn": worst = max(worst, 1) return worst if __name__ == "__main__": sys.exit(main())