package com.aicodeassistant.engine; import com.aicodeassistant.llm.ModelCapabilityRegistry; import com.aicodeassistant.llm.ModelRegistry; import com.aicodeassistant.model.Message; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import java.util.List; /** * ContextCascade — 5 层压缩级联统一协调器。 *

*

 *   Level 0: Snip           ← 单条工具结果超预算时,截断中间保留首尾
 *   Level 1: MicroCompact   ← 对旧的可压缩工具结果替换为 "[cleared]"
 *   Level 2: AutoCompact    ← token 率 > 阈值时,三区划分 + LLM 摘要
 *   Level 3: CollapseDrain  ← 紧急情况下激进压缩 (contextWindow*0.5 目标)
 *   Level 4: ReactiveCompact← API 返回 413 时,仅保留 1 轮 + 极度压缩
 * 
*

* 关键设计: * - Level 0-1 每次 API 调用前无条件执行(代价极低) * - Level 2 基于 buffer-based 阈值触发 * - Level 3-4 仅在错误恢复路径触发 * - 状态通过 CascadeState 在层级间传递 * */ @Service public class ContextCascade { private static final Logger log = LoggerFactory.getLogger(ContextCascade.class); // ============ 阈值常量 ============ /** Buffer-based 自动压缩缓冲 token 数(默认值,当 ModelCapabilityRegistry 不可用时使用) */ private static final int AUTOCOMPACT_BUFFER_TOKENS_DEFAULT = 13_000; /** 最大连续自动压缩失败次数(电路断路器) */ private static final int MAX_CONSECUTIVE_FAILURES = 3; /** 工具结果预算占上下文窗口比例 */ private static final double TOOL_RESULT_BUDGET_RATIO = 0.3; /** MicroCompact 保护尾部消息数 */ private static final int MICRO_COMPACT_PROTECTED_TAIL = 10; private final SnipService snipService; private final MicroCompactService microCompactService; private final ContextCollapseService contextCollapseService; private final CompactService compactService; private final TokenCounter tokenCounter; private final ModelRegistry modelRegistry; private final ModelCapabilityRegistry modelCapabilityRegistry; public ContextCascade(SnipService snipService, MicroCompactService microCompactService, ContextCollapseService contextCollapseService, CompactService compactService, TokenCounter tokenCounter, ModelRegistry modelRegistry, ModelCapabilityRegistry modelCapabilityRegistry) { this.snipService = snipService; this.microCompactService = microCompactService; this.contextCollapseService = contextCollapseService; this.compactService = compactService; this.tokenCounter = tokenCounter; this.modelRegistry = modelRegistry; this.modelCapabilityRegistry = modelCapabilityRegistry; } // ============ 级联状态 ============ /** * Token 警告状态 — 多级阈值判断。 */ public record TokenWarningState( boolean isAboveWarningThreshold, // 第一级: ~70% 有效窗口 boolean isAboveErrorThreshold, // 第二级: ~90% 有效窗口 boolean isAboveAutoCompactThreshold, // 第三级: 触发自动压缩 int currentTokens, int contextWindowSize, int autoCompactThreshold ) {} /** * 级联执行结果 — 记录每层的执行情况。 */ public record CascadeResult( List messages, int originalTokens, int finalTokens, boolean snipExecuted, int snipTokensFreed, boolean microCompactExecuted, int microCompactTokensFreed, boolean contextCollapseExecuted, // Level 1.5 是否执行 int contextCollapseCharsFreed, // Level 1.5 释放字符数 boolean autoCompactAttempted, // 是否达到阈值并尝试执行 boolean autoCompactExecuted, CompactService.CompactResult autoCompactResult ) { public int totalTokensFreed() { return originalTokens - finalTokens; } public String summary() { StringBuilder sb = new StringBuilder("ContextCascade: "); sb.append(originalTokens).append(" → ").append(finalTokens).append(" tokens"); if (snipExecuted) sb.append(", Snip: -").append(snipTokensFreed); if (microCompactExecuted) sb.append(", MicroCompact: -").append(microCompactTokensFreed); if (contextCollapseExecuted) sb.append(", Collapse: -").append(contextCollapseCharsFreed).append("chars"); if (autoCompactAttempted && !autoCompactExecuted) sb.append(", AutoCompact: ATTEMPTED_FAILED"); if (autoCompactExecuted && autoCompactResult != null) sb.append(", AutoCompact: ").append(autoCompactResult.summary()); return sb.toString(); } } /** * 自动压缩追踪状态 — 跨轮次持久化。 */ public record AutoCompactTrackingState( boolean compactedThisTurn, int turnCounter, String lastTurnId, int consecutiveFailures ) { public static AutoCompactTrackingState initial() { return new AutoCompactTrackingState(false, 0, "", 0); } public AutoCompactTrackingState withFailure() { return new AutoCompactTrackingState(false, turnCounter, lastTurnId, consecutiveFailures + 1); } public AutoCompactTrackingState withSuccess(String turnId) { return new AutoCompactTrackingState(true, turnCounter + 1, turnId, 0); } public boolean isCircuitBroken() { return consecutiveFailures >= MAX_CONSECUTIVE_FAILURES; } } // ============ 核心:Token 警告状态计算 ============ /** * 计算 Token 警告状态 — buffer-based 精确算法。 * 比 CompactService.shouldAutoCompact() 的 85% 比率更精确。 * * 算法: * effectiveWindow = contextWindow - contextWindow/4 (预留 25% 摘要空间) * autoCompactThreshold = effectiveWindow - AUTOCOMPACT_BUFFER_TOKENS */ public TokenWarningState calculateTokenWarningState( List messages, String model) { int contextWindow = modelRegistry.getContextWindowForModel(model); int reservedForSummary = contextWindow / 4; int effectiveWindow = contextWindow - reservedForSummary; // 动态获取 buffer tokens:优先从 ModelCapabilityRegistry 获取,回退到默认值 int bufferTokens = modelCapabilityRegistry.isRegistered(model) ? modelCapabilityRegistry.getBufferTokens(model) : AUTOCOMPACT_BUFFER_TOKENS_DEFAULT; int autoCompactThreshold = effectiveWindow - bufferTokens; int currentTokens = tokenCounter.estimateTokens(messages); return new TokenWarningState( currentTokens > (int) (autoCompactThreshold * 0.7), // 警告 currentTokens > (int) (autoCompactThreshold * 0.9), // 错误 currentTokens > autoCompactThreshold, // 触发压缩 currentTokens, contextWindow, autoCompactThreshold ); } // ============ 核心:前置级联(每次 API 调用前) ============ /** * 执行前置级联 — Level 0 + Level 1 + Level 2。 * 在 QueryEngine.queryLoop() 的 Step 1 中调用,替代分散的三段代码。 * * @param messages 当前消息列表 * @param model 当前使用的模型 * @param trackingState 自动压缩追踪状态 * @return 级联执行结果 */ public CascadeResult executePreApiCascade( List messages, String model, AutoCompactTrackingState trackingState) { int contextWindow = modelRegistry.getContextWindowForModel(model); int originalTokens = tokenCounter.estimateTokens(messages); List current = messages; boolean snipExecuted = false; int snipTokensFreed = 0; boolean mcExecuted = false; int mcTokensFreed = 0; boolean collapseExecuted = false; int collapseCharsFreed = 0; boolean acAttempted = false; boolean acExecuted = false; CompactService.CompactResult acResult = null; // ===== Level 0: Snip (单条工具结果截断) ===== int toolResultBudget = (int) (contextWindow * TOOL_RESULT_BUDGET_RATIO * 3.5); List afterSnip = snipService.snipToolResults(current, toolResultBudget); int snipBefore = tokenCounter.estimateTokens(current); int snipAfter = tokenCounter.estimateTokens(afterSnip); if (snipAfter < snipBefore) { snipExecuted = true; snipTokensFreed = snipBefore - snipAfter; current = afterSnip; log.debug("Level 0 Snip: freed {} tokens", snipTokensFreed); } // ===== Level 1: MicroCompact (旧工具结果清除) ===== var mcResult = microCompactService.compactMessages(current, MICRO_COMPACT_PROTECTED_TAIL); if (mcResult.tokensFreed() > 0) { mcExecuted = true; mcTokensFreed = mcResult.tokensFreed(); current = mcResult.messages(); log.debug("Level 1 MicroCompact: freed {} tokens", mcTokensFreed); } // ===== Level 1.5: ContextCollapse (三级渐进折叠) ===== boolean collapseAttempted = true; ContextCollapseService.CollapseResult collapseResult = contextCollapseService.progressiveCollapse(current); if (collapseResult.collapsedCount() > 0) { collapseExecuted = true; collapseCharsFreed = collapseResult.estimatedCharsFreed(); current = collapseResult.messages(); log.debug("Level 1.5 ProgressiveCollapse: collapsed {} messages, ~{} chars freed", collapseResult.collapsedCount(), collapseResult.estimatedCharsFreed()); } // ===== Level 2: AutoCompact (LLM 摘要) — 含 Collapse 互斥协调 ===== boolean collapseDidExecute = collapseAttempted && collapseResult.collapsedCount() > 0; int collapseFreedChars = collapseResult.estimatedCharsFreed(); if (collapseDidExecute) { // Collapse 已执行,重新评估是否仍需 AutoCompact TokenWarningState postCollapseWarning = calculateTokenWarningState(current, model); if (!postCollapseWarning.isAboveAutoCompactThreshold()) { log.info("Level 2 AutoCompact 跳过: Collapse 已释放足够空间 " + "(collapseCharsFreed={}, postTokens={}, threshold={})", collapseFreedChars, postCollapseWarning.currentTokens(), postCollapseWarning.autoCompactThreshold()); } else if (!trackingState.isCircuitBroken()) { log.info("Level 2 AutoCompact 触发: Collapse 释放不足 (postTokens={} > threshold={})", postCollapseWarning.currentTokens(), postCollapseWarning.autoCompactThreshold()); acAttempted = true; try { acResult = compactService.compact(current, contextWindow, false); if (acResult.skipReason() == null && !acResult.compactedMessages().isEmpty()) { acExecuted = true; current = acResult.compactedMessages(); log.info("Level 2 AutoCompact completed: {}", acResult.summary()); } } catch (Exception e) { log.error("Level 2 AutoCompact failed", e); } } } else if (!trackingState.isCircuitBroken()) { // Collapse 未执行,保持原有 AutoCompact 判断逻辑 TokenWarningState warning = calculateTokenWarningState(current, model); if (warning.isAboveAutoCompactThreshold()) { log.info("Level 2 AutoCompact triggered: {} tokens > threshold {}", warning.currentTokens(), warning.autoCompactThreshold()); acAttempted = true; try { acResult = compactService.compact(current, contextWindow, false); if (acResult.skipReason() == null && !acResult.compactedMessages().isEmpty()) { acExecuted = true; current = acResult.compactedMessages(); log.info("Level 2 AutoCompact completed: {}", acResult.summary()); } } catch (Exception e) { log.error("Level 2 AutoCompact failed", e); } } } int finalTokens = tokenCounter.estimateTokens(current); CascadeResult result = new CascadeResult(current, originalTokens, finalTokens, snipExecuted, snipTokensFreed, mcExecuted, mcTokensFreed, collapseExecuted, collapseCharsFreed, acAttempted, acExecuted, acResult); if (result.totalTokensFreed() > 0) { log.info(result.summary()); } return result; } // ============ 错误恢复级联(413 时调用) ============ /** * 执行错误恢复级联 — Level 3 (CollapseDrain) + Level 4 (ReactiveCompact)。 * 在 QueryEngine 捕获 413 错误后调用。 * * @param messages 当前消息列表 * @param contextWindow 上下文窗口大小 * @param hasAttemptedReactive 是否已尝试过反应式压缩 * @return 压缩后的消息列表,或 null 表示无法恢复 */ public List executeErrorRecoveryCascade( List messages, int contextWindow, boolean hasAttemptedReactive) { // Level 3: CollapseDrain — 更激进的压缩 (contextWindow * 0.5 目标) try { CompactService.CompactResult drainResult = compactService.compact( messages, (int) (contextWindow * 0.5), true); if (drainResult.skipReason() == null && !drainResult.compactedMessages().isEmpty()) { log.info("Level 3 CollapseDrain: {} → {} tokens", drainResult.beforeTokens(), drainResult.afterTokens()); return drainResult.compactedMessages(); } } catch (Exception e) { log.warn("Level 3 CollapseDrain failed: {}", e.getMessage()); } // Level 4: ReactiveCompact — 最后手段 if (!hasAttemptedReactive) { try { CompactService.CompactResult reactiveResult = compactService.reactiveCompact(messages, contextWindow, false); if (reactiveResult.skipReason() == null) { log.info("Level 4 ReactiveCompact: {} → {} tokens", reactiveResult.beforeTokens(), reactiveResult.afterTokens()); return reactiveResult.compactedMessages(); } } catch (Exception e) { log.error("Level 4 ReactiveCompact failed: {}", e.getMessage()); } } log.error("ContextCascade: 所有恢复策略已耗尽"); return null; } }