package projecttype import ( "context" "encoding/json" "fmt" "os" "path/filepath" "slices" "sort" "strings" ) // DefaultThreshold is the deterministic-confidence floor at or above // which Detect accepts the marker-scan result without prompting. It // backs the init_detection_threshold config key. const DefaultThreshold = 0.7 // minAIConfidence is the floor below which an AI-fallback classification // is rejected: the result is re-offered to the operator (when a Prompter // is available) or degraded to generic. const minAIConfidence = 0.5 // Source records which pipeline layer produced a Result. type Source string const ( SourceMarker Source = "marker-scan" SourceFlag Source = "type-flag" SourceInteractive Source = "interactive-prompt" SourceAI Source = "ai-fallback" SourceFallback Source = "generic-fallback" ) // Result is the outcome of Detect. type Result struct { Category Category // Confidence is the marker-scan confidence in [0,1] for a // deterministic result. An operator pick or a forced --type is 1.0; an // AI result carries the model's reported confidence. Confidence float64 // Dirs is the knowledge-directory set to scaffold: the catalog dirs // for Category, plus any AI-proposed deviations when Source is // SourceAI. Dirs []string Source Source Justification string } // Prompter asks the operator to resolve an ambiguous detection. A nil // Prompter makes Detect non-interactive (layer 3 is skipped). type Prompter interface { // Pick presents the candidate categories best-first and the catalog // (for descriptions and the generic escape) and returns the operator's // choice. When describe is true the operator asked to describe the // project freely; freeText carries that description and Detect routes // to the AI layer. A non-nil error aborts detection. Pick(candidates []Category, cat *Catalog) (choice Category, describe bool, freeText string, err error) } // AIFunc runs one gated AI pass and returns the model's raw text. A nil // AIFunc means no AI fallback is available and Detect degrades to // generic where the pipeline would otherwise call it. type AIFunc func(ctx context.Context, prompt string) (string, error) // Options configures one Detect call. type Options struct { // RepoRoot is the directory the deterministic layers scan. RepoRoot string // Threshold overrides DefaultThreshold when > 0. Threshold float64 // Forced short-circuits the whole pipeline with an operator-supplied // --type value. An unknown value is an error. Forced Category // ForceAI routes straight to the AI layer (the --ai flag), skipping // the deterministic accept and the interactive prompt. ForceAI bool // Prompter resolves ambiguity interactively; nil disables layer 3. Prompter Prompter // AI runs the layer-4 fallback; nil disables layer 4. AI AIFunc } func (o Options) threshold() float64 { if o.Threshold > 0 { return o.Threshold } return DefaultThreshold } // Detect classifies opt.RepoRoot through the four-layer pipeline and // returns the resolved category and its scaffold dir-set. It never // errors on an unclassifiable tree: the terminal fallback is generic. func Detect(ctx context.Context, cat *Catalog, opt Options) (Result, error) { if cat == nil { return Result{}, fmt.Errorf("nil catalog") } if opt.Forced != "" { if !cat.Has(opt.Forced) { return Result{}, fmt.Errorf("unknown project type %q", opt.Forced) } return Result{ Category: opt.Forced, Confidence: 1.0, Dirs: cat.DirsFor(opt.Forced), Source: SourceFlag, }, nil } if opt.ForceAI { return aiLayer(ctx, cat, opt, "") } scores := scoreRepo(opt.RepoRoot) top, second := topTwo(scores) conf := confidence(scores[top], scores[second]) if top != "" && conf >= opt.threshold() { return Result{ Category: top, Confidence: conf, Dirs: cat.DirsFor(top), Source: SourceMarker, }, nil } candidates := rankedCandidates(scores) if opt.Prompter != nil { choice, describe, freeText, err := opt.Prompter.Pick(candidates, cat) if err != nil { return Result{}, err } if describe { return aiLayer(ctx, cat, opt, freeText) } if !cat.Has(choice) { return Result{}, fmt.Errorf("operator chose unknown project type %q", choice) } return Result{ Category: choice, Confidence: 1.0, Dirs: cat.DirsFor(choice), Source: SourceInteractive, }, nil } // Non-interactive: accept the best deterministic guess if there is // one, otherwise fall back to generic. if top != "" { return Result{ Category: top, Confidence: conf, Dirs: cat.DirsFor(top), Source: SourceMarker, }, nil } return genericResult(cat, "no marker or convention identified the project"), nil } // aiLayer runs the layer-4 fallback. It degrades to generic when no // AIFunc is wired, the call fails, the response is malformed, or the // reported confidence is below minAIConfidence and no Prompter can // re-offer the top candidates. func aiLayer(ctx context.Context, cat *Catalog, opt Options, desc string) (Result, error) { if opt.AI == nil { return genericResult(cat, "AI fallback not configured"), nil } tree := topLevelEntries(opt.RepoRoot) prompt, err := buildDetectPrompt(cat, tree, desc) if err != nil { return genericResult(cat, "AI fallback prompt build failed: "+err.Error()), nil } raw, err := opt.AI(ctx, prompt) if err != nil { return genericResult(cat, "AI fallback unavailable: "+err.Error()), nil } parsed, ok := parseAIDetect(raw) if !ok || !cat.Has(Category(parsed.Category)) { return genericResult(cat, "AI fallback returned no usable classification"), nil } chosen := Category(parsed.Category) if parsed.Confidence < minAIConfidence { if opt.Prompter != nil { choice, describe, _, perr := opt.Prompter.Pick(topThree(cat, parsed), cat) if perr != nil { return Result{}, perr } if !describe && cat.Has(choice) { return Result{ Category: choice, Confidence: 1.0, Dirs: cat.DirsFor(choice), Source: SourceInteractive, }, nil } } return genericResult(cat, "AI fallback confidence too low"), nil } return Result{ Category: chosen, Confidence: clamp01(parsed.Confidence), Dirs: mergeDirs(cat.DirsFor(chosen), parsed.Dirs), Source: SourceAI, Justification: strings.TrimSpace(parsed.Justification), }, nil } func genericResult(cat *Catalog, why string) Result { return Result{ Category: Generic, Confidence: 0, Dirs: cat.DirsFor(Generic), Source: SourceFallback, Justification: why, } } // scoreRepo accumulates per-category votes from the marker-file scan // (layer 1) and the conventional-directory scan (layer 2). func scoreRepo(repoRoot string) map[Category]float64 { scores := make(map[Category]float64) if repoRoot == "" { return scores } for marker, votes := range markerRules { if rootHas(repoRoot, marker) { for _, v := range votes { scores[v.cat] += v.weight } } } for dir, votes := range signalRules { if rootHasDir(repoRoot, dir) { for _, v := range votes { scores[v.cat] += v.weight } } } return scores } // confidence is the share of the winning score over itself plus the // runner-up: 1.0 when only one category scores, lower as the runner-up // closes in. It deliberately ignores the long tail of small votes so a // clear leader is not diluted by many partial matches. func confidence(top, second float64) float64 { if top <= 0 { return 0 } return top / (top + second) } func topTwo(scores map[Category]float64) (top, second Category) { var topV, secondV float64 for _, cat := range sortedCats(scores) { v := scores[cat] switch { case v > topV: second, secondV = top, topV top, topV = cat, v case v > secondV: second, secondV = cat, v } } return top, second } func rankedCandidates(scores map[Category]float64) []Category { cats := sortedCats(scores) sort.SliceStable(cats, func(i, j int) bool { return scores[cats[i]] > scores[cats[j]] }) out := make([]Category, 0, len(cats)) for _, c := range cats { if scores[c] > 0 { out = append(out, c) } } return out } // sortedCats returns the scored categories in deterministic name order // so the score walk and tie-breaks do not depend on map iteration order. func sortedCats(scores map[Category]float64) []Category { out := make([]Category, 0, len(scores)) for c := range scores { out = append(out, c) } slices.Sort(out) return out } func rootHas(repoRoot, marker string) bool { if strings.ContainsAny(marker, "*?[") { matches, err := filepath.Glob(filepath.Join(repoRoot, marker)) return err == nil && len(matches) > 0 } _, err := os.Stat(filepath.Join(repoRoot, marker)) return err == nil } func rootHasDir(repoRoot, name string) bool { info, err := os.Stat(filepath.Join(repoRoot, name)) return err == nil && info.IsDir() } func topLevelEntries(repoRoot string) []string { var names []string ents, err := os.ReadDir(repoRoot) if err != nil { return names } for _, e := range ents { if e.Name() == ".git" { continue } name := e.Name() if e.IsDir() { name += "/" } names = append(names, name) } sort.Strings(names) return names } func clamp01(v float64) float64 { switch { case v < 0: return 0 case v > 1: return 1 default: return v } } // mergeDirs returns base with any extra dirs appended that are not // already present, preserving order. It backs the AI layer's bounded // "propose deviations to the dir-set" affordance. func mergeDirs(base, extra []string) []string { seen := make(map[string]struct{}, len(base)) out := make([]string, 0, len(base)+len(extra)) for _, d := range base { seen[d] = struct{}{} out = append(out, d) } for _, d := range extra { d = strings.TrimSpace(d) if d == "" { continue } if _, dup := seen[d]; dup { continue } seen[d] = struct{}{} out = append(out, d) } return out } type aiDetect struct { Category string `json:"category"` Confidence float64 `json:"confidence"` Dirs []string `json:"dirs"` Justification string `json:"justification"` Deviations []string `json:"deviations"` } // parseAIDetect extracts the first JSON object from the model's text // (which may wrap it in prose or a code fence) and unmarshals it. func parseAIDetect(raw string) (aiDetect, bool) { start := strings.IndexByte(raw, '{') end := strings.LastIndexByte(raw, '}') if start < 0 || end < start { return aiDetect{}, false } var d aiDetect if err := json.Unmarshal([]byte(raw[start:end+1]), &d); err != nil { return aiDetect{}, false } d.Dirs = mergeDirs(d.Dirs, d.Deviations) return d, true } // topThree returns the AI-chosen category (when known) plus other known // categories, capped at three, for an operator re-prompt. func topThree(cat *Catalog, d aiDetect) []Category { var out []Category if cat.Has(Category(d.Category)) { out = append(out, Category(d.Category)) } for _, c := range cat.Categories() { if len(out) >= 3 { break } if c == Generic || (len(out) > 0 && c == out[0]) { continue } out = append(out, c) } return out }