package workflow import ( "os" "path/filepath" "regexp" "slices" "strings" "time" ) // GitWriteGuardResult is the outcome of the git-write guard over a // candidate Bash command. Decision is decisionDeny or decisionAllow. On a // deny, Reason is the operator-facing explanation carried back as the // PreToolUse permission-decision reason. On an allow, Consumed lists the // one-shot sentinel kinds ("commit" / "tag") the caller must remove — the // guard leaves consumption to the caller so a deny (an unauthorized op, or // a gate finding on an authorized commit) never burns the authorization. // A command with no commit / tag mutation is an allow with empty Consumed. type GitWriteGuardResult struct { Decision string Reason string Consumed []string } const ( decisionAllow = "allow" decisionDeny = "deny" ) // sentinelTTL is how long an authorization sentinel stays valid after the // operator sets it via `eeco authorize`. A stale sentinel is cleared and // treated as unauthorized, so a forgotten authorization cannot linger. const sentinelTTL = 15 * time.Minute // gitGlobalValueOpts are the git global options that consume the following // token as their value, used when walking past global options to the // subcommand. It mirrors the set in isGitCommit (commitguard.go); a second // copy here keeps ScanCommitGuard untouched while classifyGitWrite reuses // the same walk for any subcommand. var gitGlobalValueOpts = map[string]bool{ "-C": true, "-c": true, "--git-dir": true, "--work-tree": true, "--namespace": true, "--exec-path": true, "--super-prefix": true, "--config-env": true, } // tagMutationFlags are the `git tag` flags that turn a tag op into a // mutation (create / annotate / sign / delete / move). A bare `git tag`, // `git tag -l`, or `git tag -n` is a read-only listing and passes. var tagMutationFlags = map[string]bool{ "-a": true, "-s": true, "-d": true, "-f": true, "--annotate": true, "--sign": true, "--delete": true, "--force": true, "--message": true, "--file": true, "--create-reflog": true, } // shellWrappers are the command prefixes that hide a git op inside a // quoted argument the tokenizer cannot see into; their presence triggers // the raw-string backstop (mirrors pre-commit-guard.sh:128-133). var shellWrappers = []string{"bash -c", "sh -c", "zsh -c", " -lc ", "eval "} var ( reWrappedGitCommit = regexp.MustCompile(`(^|[^[:alnum:]_])git[[:space:]]+commit([[:space:]]|$)`) reWrappedGitTag = regexp.MustCompile(`(^|[^[:alnum:]_])git[[:space:]]+tag([[:space:]]|$)`) ) // ScanGitWriteGuard generalizes the attribution-only commit-guard into the // full git-write guard the cockpit machinery installs as a PreToolUse hook. // It blocks a pending `git commit` and a `git tag` MUTATION unless a // one-shot authorization sentinel (set by `eeco authorize`, 15-min TTL) // exists, and on an authorized commit folds in eeco's CI-parity gates // (attribution + workspace-path leak) so an authorized write still cannot // carry a leak into history. command is the PreToolUse Bash command, cwd // the repo it targets, stateDir the sentinel directory (/state), // and workspaceName the engine dir name used to build the leak pattern. // // Posture (locked decision #2): the write-verb classifier fails CLOSED — a // command that cannot be tokenized cleanly but whose raw text shows a // commit / tag is denied. Everything downstream (the leak / attribution // fold-in) degrades OPEN, so the git pre-commit hook and CI stay the hard // gates and a session is never wedged. A deny is carried in Decision; the // runner translates it to the JSON permission-decision body and always // exits 0. func ScanGitWriteGuard(det *Detector, command, cwd, stateDir, workspaceName string) GitWriteGuardResult { commit, tagMut := classifyCommand(command) if !commit && !tagMut { return GitWriteGuardResult{Decision: decisionAllow} } var consumed []string // (1a) git tag mutation: user-only, gated first. A combined // `git tag v1 && git commit` falls through to the commit gate after the // tag sentinel is queued for consumption. if tagMut { if !sentinelAuthorized(stateDir, "tag") { return GitWriteGuardResult{Decision: decisionDeny, Reason: tagDenyReason} } consumed = append(consumed, "tag") } // (1b) git commit: require authorization. if commit { if !sentinelAuthorized(stateDir, "commit") { return GitWriteGuardResult{Decision: decisionDeny, Reason: commitDenyReason} } // (2) authorized commit: fold in the CI-parity gates (degrade-open). // A finding denies but PRESERVES the sentinel (do not queue "commit" // for consumption) so a re-commit after the fix works. if problems := commitGateFindings(det, command, cwd, workspaceName); len(problems) > 0 { return GitWriteGuardResult{Decision: decisionDeny, Reason: gateDenyReason(problems)} } consumed = append(consumed, "commit") } return GitWriteGuardResult{Decision: decisionAllow, Consumed: consumed} } const ( commitDenyReason = "eeco git-write-guard: git commit is user-driven — the user commits himself. " + "After explicit authorization, run `eeco authorize commit` to allow one commit " + "(15-min, one-shot), then re-run." tagDenyReason = "eeco git-write-guard: git tag mutation is user-only. " + "To allow one tag op, run `eeco authorize tag` (15-min, one-shot). " + "Read-only tag ops (git tag, git tag -l, …) are never blocked." ) // gateDenyReason renders the deny message for an authorized commit that // tripped the leak / attribution gates. The authorization sentinel is // preserved, so the operator fixes the listed problems and re-commits. func gateDenyReason(problems []string) string { return "eeco git-write-guard: commit blocked — " + strings.Join(problems, "; ") + ". Fix these, then re-commit (authorization preserved)." } // classifyCommand reports whether the command invokes `git commit` and // whether it invokes a `git tag` MUTATION, across every segment of a // compound command. When the command cannot be tokenized cleanly it fails // CLOSED, trusting a raw substring match (locked decision #2). A shell // wrapper (bash -c / eval / …) triggers a raw backstop regardless, since // the tokenizer cannot see a git op hidden inside the wrapper's quoted arg. func classifyCommand(command string) (commit, tagMut bool) { if commandParseOK(command) { for _, words := range commandSegments(command) { verb, mut := classifyGitWrite(words) switch verb { case "commit": commit = true case "tag": if mut { tagMut = true } } } } else { // Fail CLOSED: an unbalanced-quote command we cannot tokenize is // denied if its raw text shows a commit / tag write. if strings.Contains(command, "git commit") { commit = true } if strings.Contains(command, "git tag") { tagMut = true } } if hasShellWrapper(command) { if reWrappedGitCommit.MatchString(command) { commit = true } if reWrappedGitTag.MatchString(command) { tagMut = true } } return commit, tagMut } // classifyGitWrite inspects one segment's word list and returns the git // subcommand ("" when the segment is not a git invocation) and, for a // `git tag`, whether it is a mutation. It reuses isEnvAssign / isGitProg // and the global-option walk from isGitCommit, generalized to any // subcommand. func classifyGitWrite(words []string) (verb string, tagMutation bool) { i := 0 for i < len(words) && isEnvAssign(words[i]) { i++ } if i >= len(words) || !isGitProg(words[i]) { return "", false } i++ // past git for i < len(words) { w := words[i] if w == "--" { return "", false // end of options without a subcommand } if strings.HasPrefix(w, "-") { if gitGlobalValueOpts[w] { i += 2 } else { i++ } continue } if w != "tag" { return w, false } return "tag", tagIsMutation(words[i+1:]) } return "", false } // tagIsMutation reports whether the args after `git tag` denote a mutation: // a name argument (create) or any mutation flag (annotate / sign / delete / // force / message / file). A bare listing (`git tag`, `-l`, `-n`) is not. func tagIsMutation(rest []string) bool { for _, a := range rest { if !strings.HasPrefix(a, "-") { return true // a name arg ⇒ create } if tagMutationFlags[a] || strings.HasPrefix(a, "-m") || strings.HasPrefix(a, "--message") || strings.HasPrefix(a, "--file") { return true } } return false } // hasShellWrapper reports whether the command contains a known shell // wrapper that could hide a git op inside a quoted argument. func hasShellWrapper(command string) bool { for _, w := range shellWrappers { if strings.Contains(command, w) { return true } } return false } // commandParseOK reports whether command tokenizes cleanly — every quote is // closed. The guard fails CLOSED when this is false (locked decision #2). It // mirrors lex's single-quote, double-quote, and backslash handling so its // verdict matches the tokenizer the classifier relies on. func commandParseOK(command string) bool { i, n := 0, len(command) for i < n { switch command[i] { case '\'': i++ for i < n && command[i] != '\'' { i++ } if i >= n { return false // unterminated single quote } i++ case '"': i++ for i < n && command[i] != '"' { if command[i] == '\\' && i+1 < n { i += 2 continue } i++ } if i >= n { return false // unterminated double quote } i++ case '\\': if i+1 < n { i += 2 } else { i++ } default: i++ } } return true } // sentinelAuthorized reports whether a one-shot authorization sentinel for // kind ("commit"/"tag") exists and is within its TTL. A stale sentinel is // removed and reported unauthorized, so a forgotten authorization never // lingers (it is also cleared at session start in C4b). func sentinelAuthorized(stateDir, kind string) bool { path := filepath.Join(stateDir, "git-"+kind+"-authorized") info, err := os.Stat(path) if err != nil { return false } if time.Since(info.ModTime()) > sentinelTTL { _ = os.Remove(path) // stale ⇒ clear, treat as unauthorized return false } return true } // commitGateFindings runs eeco's CI-parity gates over an authorized commit // and returns the operator-facing problems (empty = clean). Every check // degrades open: an unreadable diff or a message it cannot statically // resolve yields no finding, so the git pre-commit hook + CI stay the hard // gates (locked decision #2). It folds three families: AI-attribution // (det, eeco's comment-hygiene equivalent) over the assembled message, the // staged diff, and the raw command; plus a workspace-path leak over staged // additions (leak-guard's pattern). func commitGateFindings(det *Detector, command, cwd, workspaceName string) []string { var problems []string add := func(p string) { if !slices.Contains(problems, p) { problems = append(problems, p) } } scanAttr := func(where, text string) { for _, f := range det.Scan(where, text) { add(f.Msg + " in " + where) } } // Attribution in the assembled message of each commit segment. for _, words := range commandSegments(command) { if verb, _ := classifyGitWrite(words); verb != "commit" { continue } if msg := assembleMessage(words, cwd); msg != "" { scanAttr("commit message", msg) } } // Attribution + workspace-path leak in the staged additions. if diff := stagedDiff(cwd); diff != "" { scanAttr("staged diff", diff) for _, line := range scanDiffWorkspaceLeak(diff, workspaceName) { add("workspace path in staged content: " + strings.TrimSpace(line)) } } // Attribution embedded with a real newline in the raw command (a trailer // or generated-by line inside -m). scanAttr("command", command) return problems } // scanDiffWorkspaceLeak returns the added diff lines that reference an // engine subdirectory under the workspace (the state/memory/… dirs) — the // workspace-path leak leak-guard catches in tracked files, applied here to // the prospective staged content. An empty workspaceName disables the scan // (no pattern to build). Only added lines (`+`, excluding the `+++` header) // are scanned. func scanDiffWorkspaceLeak(diff, workspaceName string) []string { if workspaceName == "" { return nil } re := regexp.MustCompile(regexp.QuoteMeta(workspaceName) + `/(?:` + reAlt(engineSubdirs) + `)/`) var out []string for _, line := range splitLines(diff) { if !strings.HasPrefix(line, "+") || strings.HasPrefix(line, "+++") { continue } if re.MatchString(line) { out = append(out, strings.TrimPrefix(line, "+")) } } return out }