import { AIMessage } from '@langchain/core/messages'; import { StructuredToolInterface } from '@langchain/core/tools'; import { callLlm } from '../model/llm.js'; import { getTools } from '../tools/registry.js'; import { buildSystemPrompt, buildIterationPrompt, loadSoulDocument } from './prompts.js'; import { extractTextContent, hasToolCalls } from '../utils/ai-message.js'; import { InMemoryChatHistory } from '../utils/in-memory-chat-history.js'; import { buildHistoryContext } from '../utils/history-context.js'; import { estimateTokens, CONTEXT_THRESHOLD, KEEP_TOOL_USES } from '../utils/tokens.js'; import { formatUserFacingError, isContextOverflowError } from '../utils/errors.js'; import type { AgentConfig, AgentEvent, ContextClearedEvent, TokenUsage } from '../agent/types.js'; import { createRunContext, type RunContext } from './run-context.js'; import { AgentToolExecutor } from './tool-executor.js'; import { MemoryManager } from '../memory/index.js'; import { runMemoryFlush, shouldRunMemoryFlush } from '../memory/flush.js'; import { resolveProvider } from '../providers.js'; const DEFAULT_MODEL = 'gpt-5.4'; const DEFAULT_MAX_ITERATIONS = 10; const MAX_OVERFLOW_RETRIES = 2; const OVERFLOW_KEEP_TOOL_USES = 3; /** * The core agent class that handles the agent loop and tool execution. */ export class Agent { private readonly model: string; private readonly maxIterations: number; private readonly tools: StructuredToolInterface[]; private readonly toolMap: Map; private readonly toolExecutor: AgentToolExecutor; private readonly systemPrompt: string; private readonly signal?: AbortSignal; private readonly memoryEnabled: boolean; private constructor( config: AgentConfig, tools: StructuredToolInterface[], systemPrompt: string, ) { this.model = config.model ?? DEFAULT_MODEL; this.maxIterations = config.maxIterations ?? DEFAULT_MAX_ITERATIONS; this.tools = tools; this.toolMap = new Map(tools.map(t => [t.name, t])); this.toolExecutor = new AgentToolExecutor(this.toolMap, config.signal, config.requestToolApproval, config.sessionApprovedTools); this.systemPrompt = systemPrompt; this.signal = config.signal; this.memoryEnabled = config.memoryEnabled ?? true; } /** * Create a new Agent instance with tools. */ static async create(config: AgentConfig = {}): Promise { const model = config.model ?? DEFAULT_MODEL; const tools = getTools(model); const soulContent = await loadSoulDocument(); let memoryFiles: string[] = []; let memoryContext: string | null = null; if (config.memoryEnabled !== false) { const memoryManager = await MemoryManager.get(); memoryFiles = await memoryManager.listFiles(); const session = await memoryManager.loadSessionContext(); if (session.text.trim()) { memoryContext = session.text; } } const systemPrompt = buildSystemPrompt( model, soulContent, config.channel, config.groupContext, memoryFiles, memoryContext, ); return new Agent(config, tools, systemPrompt); } /** * Run the agent and yield events for real-time UI updates. * Anthropic-style context management: full tool results during iteration, * with threshold-based clearing of oldest results when context exceeds limit. */ async *run(query: string, inMemoryHistory?: InMemoryChatHistory): AsyncGenerator { const startTime = Date.now(); if (this.tools.length === 0) { yield { type: 'done', answer: 'No tools available. Please check your API key configuration.', toolCalls: [], iterations: 0, totalTime: Date.now() - startTime }; return; } const ctx = createRunContext(query); const memoryFlushState = { alreadyFlushed: false }; // Build initial prompt with conversation history context let currentPrompt = this.buildInitialPrompt(query, inMemoryHistory); // Main agent loop let overflowRetries = 0; while (ctx.iteration < this.maxIterations) { ctx.iteration++; let response: AIMessage | string; let usage: TokenUsage | undefined; while (true) { try { const result = await this.callModel(currentPrompt); response = result.response; usage = result.usage; overflowRetries = 0; break; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); if (isContextOverflowError(errorMessage) && overflowRetries < MAX_OVERFLOW_RETRIES) { overflowRetries++; const clearedCount = ctx.scratchpad.clearOldestToolResults(OVERFLOW_KEEP_TOOL_USES); if (clearedCount > 0) { yield { type: 'context_cleared', clearedCount, keptCount: OVERFLOW_KEEP_TOOL_USES }; currentPrompt = buildIterationPrompt( query, ctx.scratchpad.getToolResults(), ctx.scratchpad.formatToolUsageForPrompt() ); continue; } } const totalTime = Date.now() - ctx.startTime; const provider = resolveProvider(this.model).displayName; yield { type: 'done', answer: `Error: ${formatUserFacingError(errorMessage, provider)}`, toolCalls: ctx.scratchpad.getToolCallRecords(), iterations: ctx.iteration, totalTime, tokenUsage: ctx.tokenCounter.getUsage(), tokensPerSecond: ctx.tokenCounter.getTokensPerSecond(totalTime), }; return; } } ctx.tokenCounter.add(usage); const responseText = typeof response === 'string' ? response : extractTextContent(response); // Emit thinking if there are also tool calls (skip whitespace-only responses) if (responseText?.trim() && typeof response !== 'string' && hasToolCalls(response)) { const trimmedText = responseText.trim(); ctx.scratchpad.addThinking(trimmedText); yield { type: 'thinking', message: trimmedText }; } // No tool calls = final answer is in this response if (typeof response === 'string' || !hasToolCalls(response)) { yield* this.handleDirectResponse(responseText ?? '', ctx); return; } // Execute tools and add results to scratchpad (response is AIMessage here) for await (const event of this.toolExecutor.executeAll(response, ctx)) { yield event; if (event.type === 'tool_denied') { const totalTime = Date.now() - ctx.startTime; yield { type: 'done', answer: '', toolCalls: ctx.scratchpad.getToolCallRecords(), iterations: ctx.iteration, totalTime, tokenUsage: ctx.tokenCounter.getUsage(), tokensPerSecond: ctx.tokenCounter.getTokensPerSecond(totalTime), }; return; } } yield* this.manageContextThreshold(ctx, query, memoryFlushState); // Build iteration prompt with full tool results (Anthropic-style) currentPrompt = buildIterationPrompt( query, ctx.scratchpad.getToolResults(), ctx.scratchpad.formatToolUsageForPrompt() ); } // Max iterations reached with no final response const totalTime = Date.now() - ctx.startTime; yield { type: 'done', answer: `Reached maximum iterations (${this.maxIterations}). I was unable to complete the research in the allotted steps.`, toolCalls: ctx.scratchpad.getToolCallRecords(), iterations: ctx.iteration, totalTime, tokenUsage: ctx.tokenCounter.getUsage(), tokensPerSecond: ctx.tokenCounter.getTokensPerSecond(totalTime), }; } /** * Call the LLM with the current prompt. * @param prompt - The prompt to send to the LLM * @param useTools - Whether to bind tools (default: true). When false, returns string directly. */ private async callModel(prompt: string, useTools: boolean = true): Promise<{ response: AIMessage | string; usage?: TokenUsage }> { const result = await callLlm(prompt, { model: this.model, systemPrompt: this.systemPrompt, tools: useTools ? this.tools : undefined, signal: this.signal, }); return { response: result.response, usage: result.usage }; } /** * Emit the response text as the final answer. */ private async *handleDirectResponse( responseText: string, ctx: RunContext ): AsyncGenerator { const totalTime = Date.now() - ctx.startTime; yield { type: 'done', answer: responseText, toolCalls: ctx.scratchpad.getToolCallRecords(), iterations: ctx.iteration, totalTime, tokenUsage: ctx.tokenCounter.getUsage(), tokensPerSecond: ctx.tokenCounter.getTokensPerSecond(totalTime), }; } /** * Clear oldest tool results if context size exceeds threshold. */ private async *manageContextThreshold( ctx: RunContext, query: string, memoryFlushState: { alreadyFlushed: boolean }, ): AsyncGenerator { const fullToolResults = ctx.scratchpad.getToolResults(); const estimatedContextTokens = estimateTokens(this.systemPrompt + ctx.query + fullToolResults); if (estimatedContextTokens > CONTEXT_THRESHOLD) { if ( this.memoryEnabled && shouldRunMemoryFlush({ estimatedContextTokens, alreadyFlushed: memoryFlushState.alreadyFlushed, }) ) { yield { type: 'memory_flush', phase: 'start' }; const flushResult = await runMemoryFlush({ model: this.model, systemPrompt: this.systemPrompt, query, toolResults: fullToolResults, signal: this.signal, }).catch(() => ({ flushed: false, written: false as const })); memoryFlushState.alreadyFlushed = flushResult.flushed; yield { type: 'memory_flush', phase: 'end', filesWritten: flushResult.written ? [`${new Date().toISOString().slice(0, 10)}.md`] : [], }; } const clearedCount = ctx.scratchpad.clearOldestToolResults(KEEP_TOOL_USES); if (clearedCount > 0) { memoryFlushState.alreadyFlushed = false; yield { type: 'context_cleared', clearedCount, keptCount: KEEP_TOOL_USES }; } } } /** * Build initial prompt with conversation history context if available */ private buildInitialPrompt( query: string, inMemoryChatHistory?: InMemoryChatHistory ): string { if (!inMemoryChatHistory?.hasMessages()) { return query; } const recentTurns = inMemoryChatHistory.getRecentTurns(); if (recentTurns.length === 0) { return query; } return buildHistoryContext({ entries: recentTurns, currentMessage: query, }); } }