// Package ask answers a free-form question about a project with a // deterministic, no-AI-spend, ranked set of pointers: the matching // memory facts first (eeco's curated topic→file map) and then the // best-matching code locations as path:line references. // // It is the engine behind `eeco ask`. Where `eeco go` (package brief) // gives a one-shot project overview, `eeco ask` is the interactive // counterpart: a fast, precise pointer into the codebase for any // assistant, beyond the static brief. // // The package only reads — the resolved config, the memory store, and // the repository's tracked files — and writes nothing. It calls no AI // provider: relevance is a simple word-overlap score, the same // tokenisation the memory store uses for fact selection. The output // carries no timestamp and every list is in a stable sort order, so a // given question over a given tree always produces the same answer. package ask import ( "bytes" "encoding/json" "errors" "fmt" "io/fs" "os" "path/filepath" "regexp" "sort" "strings" "github.com/ajhahnde/eeco/internal/config" "github.com/ajhahnde/eeco/internal/gitx" "github.com/ajhahnde/eeco/internal/memory" ) // DefaultLimit is the number of code locations Search returns when the // caller passes a non-positive limit. const DefaultLimit = 10 // maxFileBytes caps the size of a file ask will scan. Larger files are // skipped: they are almost always generated or vendored, and reading // them would blow the time budget for a command meant to feel instant. const maxFileBytes = 256 * 1024 // snippetCap bounds the length of a code-line snippet in the output so // one long minified line cannot dominate the answer. const snippetCap = 160 // tokenSplit matches the inverse of a word character (letters, digits, // underscore). Tokenisation mirrors internal/memory/select.go: lowercase, // split on non-word runs, dedupe. A private copy lives here rather than // widening the memory package's surface — the two are tiny, independent, // and ask layers its own scoring on top. var tokenSplit = regexp.MustCompile(`[^\p{L}\p{N}_]+`) // Result is the structured answer to one question: the matching memory // facts and the matching code locations, each ranked. It is the data // behind `eeco ask`, independent of how it is rendered — Render turns it // into Markdown, RenderJSON into a JSON object, so the two always // describe the same answer. Both slice fields are always non-nil so the // JSON form renders an empty list rather than null. type Result struct { Question string `json:"question"` Memory []MemoryHit `json:"memory"` Code []CodeHit `json:"code"` } // MemoryHit is one memory fact whose name, description, or body shares a // word with the question. Ref is the repo-relative file the fact points // at, empty when the fact carries none. Score is the count of distinct // question terms the fact matched. type MemoryHit struct { Name string `json:"name"` Description string `json:"description"` Ref string `json:"ref"` Score int `json:"score"` } // CodeHit is one line in a tracked source file that shares a word with // the question. Path is repo-relative and slash-separated, Line is // 1-based, Text is the trimmed (and length-capped) line, and Score is // the count of distinct question terms the line matched. type CodeHit struct { Path string `json:"path"` Line int `json:"line"` Text string `json:"text"` Score int `json:"score"` } // Search answers question for cfg: it scores the memory store and the // repository's tracked files by word overlap with the question and // returns the ranked matches. limit caps the number of code locations // returned (a non-positive limit means DefaultLimit); every matching // memory fact is returned. It reads the memory store only when the // workspace is initialised and degrades gracefully when it is not — the // code search still runs, so `eeco ask` is useful in any git repo. // // A non-nil error means a real I/O fault while walking the tree or // reading the store; an unmatched question is not an error (the Result // simply carries empty lists). func Search(cfg *config.Config, question string, limit int) (Result, error) { if cfg == nil { return Result{}, errors.New("ask.Search: nil config") } if limit <= 0 { limit = DefaultLimit } res := Result{ Question: strings.TrimSpace(question), Memory: []MemoryHit{}, Code: []CodeHit{}, } terms := tokenize(question) if len(terms) == 0 { return res, nil } mem, err := searchMemory(cfg, terms) if err != nil { return Result{}, err } res.Memory = mem code, err := searchCode(cfg, terms, limit) if err != nil { return Result{}, err } res.Code = code return res, nil } // searchMemory scores each fact by the number of distinct question terms // found across its name, description, and body. It reads the store // read-only (it does not call memory.Select, which would bump last_used // and re-save). Facts are sorted by score descending, then name // ascending, for a stable order. func searchMemory(cfg *config.Config, terms map[string]struct{}) ([]MemoryHit, error) { if !config.IsInitialized(cfg) { return []MemoryHit{}, nil } store, err := memory.Open(cfg) if err != nil { return nil, fmt.Errorf("ask: open memory: %w", err) } facts, err := store.LoadAll() if err != nil { return nil, fmt.Errorf("ask: load memory: %w", err) } hits := []MemoryHit{} for _, f := range facts { if f.Disabled { continue } score := overlapCount(terms, tokenize(f.Name+" "+f.Description+" "+f.Body)) if score == 0 { continue } hits = append(hits, MemoryHit{ Name: f.Name, Description: f.Description, Ref: f.Ref, Score: score, }) } sort.SliceStable(hits, func(i, j int) bool { if hits[i].Score != hits[j].Score { return hits[i].Score > hits[j].Score } return hits[i].Name < hits[j].Name }) return hits, nil } // searchCode scans every tracked text file and scores each line by the // number of distinct question terms it contains. The top limit lines are // returned, ranked by score descending, then path ascending, then line // ascending — a fully deterministic order. func searchCode(cfg *config.Config, terms map[string]struct{}, limit int) ([]CodeHit, error) { files, err := collectFiles(cfg) if err != nil { return nil, err } hits := []CodeHit{} for _, rel := range files { data, err := os.ReadFile(filepath.Join(cfg.RepoRoot, filepath.FromSlash(rel))) if err != nil { // A file listed by git but unreadable now (a race, a broken // symlink) is skipped, not fatal: the answer degrades rather // than aborting. continue } if len(data) > maxFileBytes || bytes.IndexByte(data, 0) >= 0 { continue // oversized or binary } for i, raw := range strings.Split(string(data), "\n") { score := overlapCount(terms, tokenize(raw)) if score == 0 { continue } hits = append(hits, CodeHit{ Path: rel, Line: i + 1, Text: snippet(raw), Score: score, }) } } sort.SliceStable(hits, func(i, j int) bool { if hits[i].Score != hits[j].Score { return hits[i].Score > hits[j].Score } if hits[i].Path != hits[j].Path { return hits[i].Path < hits[j].Path } return hits[i].Line < hits[j].Line }) if len(hits) > limit { hits = hits[:limit] } return hits, nil } // collectFiles lists the repository's text-file candidates, repo-relative // and slash-separated. It prefers git's tracked set so build artifacts, // the eeco workspace, and other untracked clutter stay out of the search; // it falls back to a recursive directory walk when git is unavailable or // the repo has no tracked files (the same two-branch strategy the brief // uses for the top-level listing). Either path skips the .git directory // and eeco's own workspace. func collectFiles(cfg *config.Config) ([]string, error) { if tracked, err := gitx.TrackedFiles(cfg.RepoRoot); err == nil && len(tracked) > 0 { out := make([]string, 0, len(tracked)) for _, p := range tracked { seg, _, _ := strings.Cut(p, "/") if seg == cfg.WorkspaceName { continue } out = append(out, p) } sort.Strings(out) return out, nil } var out []string err := filepath.WalkDir(cfg.RepoRoot, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { if path == cfg.RepoRoot { return nil } if name := d.Name(); name == ".git" || name == cfg.WorkspaceName { return filepath.SkipDir } return nil } rel, err := filepath.Rel(cfg.RepoRoot, path) if err != nil { return err } out = append(out, filepath.ToSlash(rel)) return nil }) if err != nil { return nil, fmt.Errorf("ask: walk repo: %w", err) } sort.Strings(out) return out, nil } // Render serialises a Result to the Markdown answer. When the answer is // empty it renders a single guidance line instead of empty sections. func Render(r Result) string { var b strings.Builder fmt.Fprintf(&b, "# eeco ask: %q\n\n", r.Question) if len(r.Memory) == 0 && len(r.Code) == 0 { b.WriteString("No matches — try different terms, or run `eeco go` for the project brief.\n") return b.String() } b.WriteString("## Memory\n\n") if len(r.Memory) == 0 { b.WriteString("No matching facts.\n") } else { for _, m := range r.Memory { if m.Ref != "" { fmt.Fprintf(&b, "- %s → `%s`\n", m.Description, m.Ref) } else { fmt.Fprintf(&b, "- %s — %s\n", m.Name, m.Description) } } } b.WriteString("\n## Code\n\n") if len(r.Code) == 0 { b.WriteString("No matching code.\n") } else { for _, c := range r.Code { fmt.Fprintf(&b, "- `%s:%d` %s\n", c.Path, c.Line, c.Text) } } return b.String() } // RenderJSON serialises a Result to an indented JSON object — the // machine-readable counterpart to Render. The three top-level keys // (question, memory, code) are frozen; the arrays are always present, // never null. func RenderJSON(r Result) (string, error) { out, err := json.MarshalIndent(r, "", " ") if err != nil { return "", fmt.Errorf("ask: marshal json: %w", err) } return string(out) + "\n", nil } // snippet trims a code line and caps its length so one very long line // cannot dominate the answer. func snippet(line string) string { s := strings.TrimSpace(line) if len(s) > snippetCap { s = s[:snippetCap] + "…" } return s } // tokenize lowercases s, splits it on non-word runs, drops single // characters (which carry little signal and inflate code-search noise), // and returns the distinct tokens as a set. func tokenize(s string) map[string]struct{} { out := map[string]struct{}{} for _, t := range tokenSplit.Split(strings.ToLower(s), -1) { if len(t) <= 1 { continue } out[t] = struct{}{} } return out } // overlapCount returns the number of distinct terms present in both sets. func overlapCount(terms, hay map[string]struct{}) int { short, long := terms, hay if len(hay) < len(terms) { short, long = hay, terms } n := 0 for k := range short { if _, ok := long[k]; ok { n++ } } return n }