package memory import ( "regexp" "sort" "strings" ) // tokenSplit matches the inverse of \w (letters/digits/underscore). // Tokenisation is intentionally simple: lowercase, split on non-word // characters, dedupe. No stopword list — the store is small enough that // recall matters more than precision at this stage. var tokenSplit = regexp.MustCompile(`[^\p{L}\p{N}_]+`) // Select returns the facts whose name or description shares a word with // task, sorted by name. Each selected fact's last_used is bumped to the // store clock and re-saved. Pinned facts participate in selection so // that explicit knowledge still surfaces. func (s *Store) Select(task string) ([]*Fact, error) { facts, err := s.LoadAll() if err != nil { return nil, err } terms := tokenize(task) if len(terms) == 0 { return nil, nil } var out []*Fact for _, f := range facts { if f.Disabled { continue } if overlap(terms, tokenize(f.Name+" "+f.Description)) { out = append(out, f) } } sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name }) now := s.Now().UTC() today := now.Truncate(24 * 60 * 60 * 1e9) // truncate to day; equivalent to date-only for _, f := range out { f.LastUsed = today if err := s.Save(f); err != nil { return out, err } } return out, nil } func tokenize(s string) map[string]struct{} { out := map[string]struct{}{} for _, t := range tokenSplit.Split(strings.ToLower(s), -1) { if t == "" { continue } out[t] = struct{}{} } return out } func overlap(a, b map[string]struct{}) bool { short, long := a, b if len(b) < len(a) { short, long = b, a } for k := range short { if _, ok := long[k]; ok { return true } } return false }