// engine.js // 简化的 ReAct 核心引擎 (function(window) { 'use strict'; /** * ReAct 引擎核心 * 简化版本,移除了过度复杂的规则和强制模式匹配 */ class ReActEngine { constructor(config = {}) { this.maxIterations = config.maxIterations || 5; this.budgetManager = new window.TokenBudgetManager(config.tokenBudget); this.toolRegistry = new window.ToolRegistry(); this.eventHandlers = []; this.llmConfig = config.llmConfig || {}; // 文档状态(由 buildInitialContext 设置) this.hasSemanticGroups = false; this.hasVectorIndex = false; this.hasChunks = false; // 去重:记录已检索过的内容片段(避免重复展示) this.seenContentHashes = new Set(); this.seenContentSummaries = new Map(); // hash -> summary } /** * 获取系统提示词(动态生成) */ getSystemPrompt() { return window.SystemPromptBuilder.buildReActSystemPrompt( this.hasSemanticGroups, this.hasVectorIndex ); } /** * 获取工具使用指南 */ getToolGuidelines(hasSemanticGroups = false, hasVectorIndex = false, hasChunks = false) { const availableTools = this.toolRegistry.getAvailableToolDefinitions(hasSemanticGroups, hasVectorIndex, hasChunks); const availableToolNames = availableTools.map(t => t.name).join(', '); console.log(`[ReActEngine] 可用工具(${availableTools.length}个): ${availableToolNames}`); return window.SystemPromptBuilder.buildToolGuidelines(availableTools); } /** * 添加事件监听器 */ on(eventType, handler) { this.eventHandlers.push({ type: eventType, handler }); } /** * 发送事件 */ emit(eventType, data) { this.eventHandlers .filter(h => h.type === eventType || h.type === '*') .forEach(h => { try { h.handler(data); } catch (e) { console.error('[ReActEngine] 事件处理器错误:', e); } }); } /** * 构建初始上下文(改进策略:包含文档概览) */ buildInitialContext(docContent) { // 检测文档状态 this.hasSemanticGroups = ( (Array.isArray(docContent.semanticGroups) && docContent.semanticGroups.length > 0) || (Array.isArray(window.data?.semanticGroups) && window.data.semanticGroups.length > 0) ); this.hasVectorIndex = !!( docContent.vectorIndexReady || docContent.vectorIndex || window.data?.vectorIndexReady || window.data?.vectorIndex ); this.hasChunks = !!( (Array.isArray(docContent.translatedChunks) && docContent.translatedChunks.length > 0) || (Array.isArray(docContent.ocrChunks) && docContent.ocrChunks.length > 0) || (docContent.translation && docContent.translation.length > 0) || (docContent.ocr && docContent.ocr.length > 0) || (Array.isArray(window.data?.translatedChunks) && window.data.translatedChunks.length > 0) || (Array.isArray(window.data?.ocrChunks) && window.data.ocrChunks.length > 0) || (window.data?.translation && window.data.translation.length > 0) || (window.data?.ocr && window.data.ocr.length > 0) ); console.log('[ReActEngine] 文档状态 - 意群:', this.hasSemanticGroups, ', 向量:', this.hasVectorIndex); // 使用 ContextBuilder 构建初始上下文 return window.ContextBuilder.buildInitialContext(docContent); } /** * 检测上下文是否为空(仅包含元数据) */ isContextEmpty(context) { if (!context || context.length < 100) return true; // 检测是否只包含元数据标记 const hasMetadata = context.includes('=== DOCUMENT METADATA ==='); const hasCritical = context.includes('=== CRITICAL ==='); const hasActualContent = context.length > 800; // 超过800字符说明有实际内容 return hasMetadata && hasCritical && !hasActualContent; } /** * 检测是否存在重复工具调用 */ hasRepeatedCalls(toolResults) { if (toolResults.length < 2) return false; const lastCall = toolResults[toolResults.length - 1]; const secondLastCall = toolResults[toolResults.length - 2]; // 检查工具名称和参数是否相同 if (lastCall.tool !== secondLastCall.tool) return false; const lastParams = JSON.stringify(lastCall.params); const secondLastParams = JSON.stringify(secondLastCall.params); return lastParams === secondLastParams; } /** * 检测最后一次工具调用是否返回空结果 */ checkEmptyResults(toolResults) { if (toolResults.length === 0) return false; const lastResult = toolResults[toolResults.length - 1]; // 检查结果是否为空 if (!lastResult.result || !lastResult.result.success) return false; const results = lastResult.result.results; if (!results) return false; // 数组为空或长度为0 return Array.isArray(results) && results.length === 0; } /** * 分析信息充足性(修复:支持 grep 的 matches 字段) */ analyzeInformationSufficiency(toolResults, question) { if (toolResults.length === 0) return 'insufficient'; // 计算总检索内容长度 let totalContentLength = 0; let successfulCalls = 0; let itemsFound = 0; for (const result of toolResults) { if (!result.result || !result.result.success) continue; // 支持不同工具的返回格式 let items = null; if (result.result.results) { // vector_search, keyword_search, search_semantic_groups items = result.result.results; } else if (result.result.matches) { // grep, regex_search items = result.result.matches; } else if (result.result.text) { // fetch, fetch_group_text items = [{ text: result.result.text }]; } if (items && items.length > 0) { totalContentLength += JSON.stringify(items).length; successfulCalls++; itemsFound += items.length; } } console.log(`[ReActEngine] 信息充足性分析 - 总内容长度: ${totalContentLength}, 成功调用: ${successfulCalls}/${toolResults.length}, 检索到 ${itemsFound} 条结果`); // 启发式判断(更宽松的阈值,因为去重后内容会减少) if (successfulCalls >= 2 && totalContentLength > 1500) { return 'likely_sufficient'; // 很可能足够 } else if (successfulCalls >= 1 && totalContentLength > 800) { return 'maybe_sufficient'; // 可能足够 } else { return 'insufficient'; // 不足 } } /** * 总结已检索的内容(用于警告提示) */ summarizeRetrievedContent(toolResults) { const summaryParts = []; let totalItems = 0; for (const result of toolResults) { if (!result.result || !result.result.success) continue; const tool = result.tool; let count = 0; if (result.result.results) { count = result.result.results.length; } else if (result.result.matches) { count = result.result.matches.length; } else if (result.result.text) { count = 1; } if (count > 0) { totalItems += count; summaryParts.push(`${count} items from ${tool}`); } } if (summaryParts.length === 0) { return 'No content retrieved yet'; } return `${totalItems} total items (${summaryParts.join(', ')})`; } /** * 推理阶段(简化版,移除所有强制性规则) */ async reasoning(systemPrompt, conversationHistory, currentContext, userQuestion, toolResults = []) { // 构建推理提示词(简化版) const reasoningPrompt = this.buildReasoningPrompt( currentContext, userQuestion, toolResults ); this.emit('reasoning_start', { prompt: reasoningPrompt }); // 增强的系统提示词 const enhancedSystemPrompt = systemPrompt + '\n\n' + this.getSystemPrompt(); // 调用 LLM const response = await this.callLLM(enhancedSystemPrompt, conversationHistory, reasoningPrompt); this.emit('reasoning_complete', { response }); // 使用增强的 JSON 解析器 return window.ReActJsonParser.parse(response); } /** * 构建推理提示词(智能版,动态添加警告) */ buildReasoningPrompt(context, question, toolResults) { const parts = []; const iteration = toolResults.length + 1; // 1. 用户问题(始终简洁) parts.push('========================================'); parts.push('用户问题:'); parts.push(question); parts.push('========================================'); parts.push(''); // 2. 当前已知信息 parts.push('---'); parts.push('当前已知信息:'); parts.push(context); parts.push(''); // 3. 工具调用历史(如果有) if (toolResults.length > 0) { parts.push('工具调用历史:'); toolResults.forEach((result, idx) => { parts.push(`${idx + 1}. ${result.tool}(${JSON.stringify(result.params)})`); const resultStr = JSON.stringify(result.result); parts.push(` 结果: ${resultStr.length > 300 ? resultStr.slice(0, 300) + '...' : resultStr}`); }); parts.push(''); } // ===== 智能警告系统 ===== const warnings = []; // 检测 1:首轮强制检索(更严格) if (iteration === 1) { warnings.push('🚨 CRITICAL - FIRST ITERATION:'); warnings.push(' - The context contains NO document content, only metadata'); warnings.push(' - You MUST call a tool in this iteration'); warnings.push(' - DO NOT return action: "answer" in the first iteration'); warnings.push(' - DO NOT ask the user for more details'); warnings.push(' - Choose appropriate search keywords based on the question and start retrieving'); console.log('[ReActEngine] 首轮迭代,强制要求调用工具'); } // 检测 2:重复工具调用 if (this.hasRepeatedCalls(toolResults)) { warnings.push('⚠️ You are repeating the same tool call with the same parameters. Consider trying a different tool, different parameters, or providing an answer based on available information.'); console.log('[ReActEngine] 检测到重复工具调用,添加警告'); } // 检测 3:空结果 if (this.checkEmptyResults(toolResults)) { warnings.push('💡 Your last search returned no results. This may mean the information doesn\'t exist in the document, or you need different search terms. Consider answering based on available information or trying a different approach.'); console.log('[ReActEngine] 检测到空结果,添加提示'); } // 检测 4:信息充足性(强化版 - 明确告诉 LLM 已检索到什么) const sufficiency = this.analyzeInformationSufficiency(toolResults, question); if (sufficiency === 'likely_sufficient' || sufficiency === 'maybe_sufficient') { const summary = this.summarizeRetrievedContent(toolResults); warnings.push(`💡 INFORMATION RETRIEVED SUMMARY:`); warnings.push(` - You have made ${toolResults.length} tool calls`); warnings.push(` - Retrieved content includes: ${summary}`); warnings.push(` - CRITICAL: Review the "当前已知信息" section above`); warnings.push(` - If the information is sufficient to answer the question, provide an answer NOW`); warnings.push(` - DO NOT say "文档内容尚未加载" if you can see content above`); console.log('[ReActEngine] 信息可能充足,添加强化提示'); } // 检测 5:接近迭代上限 if (iteration >= this.maxIterations - 1) { warnings.push(`🚨 FINAL ITERATION WARNING:`); warnings.push(` - This is iteration ${iteration}/${this.maxIterations}`); warnings.push(` - You MUST provide an answer based on available information`); warnings.push(` - Even partial information is better than no answer`); warnings.push(` - DO NOT end without attempting to answer`); console.log('[ReActEngine] 接近迭代上限,添加紧急警告'); } // 如果有警告,插入警告区块 if (warnings.length > 0) { parts.push('=== SYSTEM NOTICES ==='); warnings.forEach(w => parts.push(w)); parts.push(''); console.log(`[ReActEngine] 第${iteration}轮推理,触发${warnings.length}个警告`); } // 4. 工具指南 parts.push(this.getToolGuidelines(this.hasSemanticGroups, this.hasVectorIndex, this.hasChunks)); parts.push(''); // 5. 决策提示(根据迭代轮次调整) parts.push('---'); if (iteration === 1) { parts.push('**第一轮决策(必须调用工具)**:'); parts.push('- 分析用户问题,提取关键概念'); parts.push('- 选择合适的工具和检索关键词'); parts.push('- 返回 JSON 格式:{ "action": "use_tool", "thought": "...", "tool": "...", "params": {...} }'); } else { parts.push('**后续轮次决策**:'); parts.push('- 如果检索到的内容足够回答问题 → 返回答案'); parts.push('- 如果需要更多信息 → 继续调用工具检索'); parts.push('- 返回 JSON 格式的决策'); } parts.push(''); return parts.join('\n'); } /** * 调用 LLM */ async callLLM(systemPrompt, conversationHistory, userPrompt) { if (!window.llmCaller) { throw new Error('LLMCaller未加载'); } try { const response = await window.llmCaller.call( systemPrompt, conversationHistory, userPrompt, { externalConfig: this.llmConfig, timeout: 60000 } ); return response; } catch (error) { console.error('[ReActEngine] LLM调用失败:', error); throw error; } } /** * 执行 ReAct 循环(核心流程) */ async *run(userQuestion, docContent, systemPrompt, conversationHistory = []) { this.emit('session_start', { question: userQuestion }); // 构建初始上下文 let context = this.buildInitialContext(docContent); const toolResults = []; let iterations = 0; const reactLog = []; // Store the execution log console.log('[ReActEngine] 初始上下文长度:', context.length); yield { type: 'context_initialized', context: context.slice(0, 500) + '...', reactLog }; while (iterations < this.maxIterations) { iterations++; const iterationPayload = { type: 'iteration_start', iteration: iterations, maxIterations: this.maxIterations }; yield iterationPayload; this.emit('iteration_start', iterationPayload); yield { type: 'reasoning_start', iteration: iterations }; let decision; try { decision = await this.reasoning( systemPrompt, conversationHistory, context, userQuestion, toolResults ); } catch (error) { this.emit('error', { error: error.message || String(error), iteration: iterations }); yield { type: 'error', error: '推理失败: ' + (error.message || String(error)), iteration: iterations }; break; } if (decision.thought) { reactLog.push({ type: 'thought', iteration: iterations, content: decision.thought }); } yield { type: 'reasoning_complete', iteration: iterations, thought: decision.thought, action: decision.action, reactLog }; // 判断是回答还是使用工具 if (decision.action === 'answer') { const finalPayload = { type: 'final_answer', answer: decision.answer, iterations: iterations, toolCallCount: toolResults.length, reactLog }; yield finalPayload; this.emit('final_answer', finalPayload); this.emit('session_complete', { answer: decision.answer, iterations, reactLog }); return; } // 执行工具调用(支持并行) if (decision.action === 'use_tool') { const toolCalls = decision.parallel ? decision.tool_calls : [{ tool: decision.tool, params: decision.params }]; // 发送工具调用开始事件 for (const call of toolCalls) { reactLog.push({ type: 'action', iteration: iterations, tool: call.tool, params: call.params }); const startPayload = { type: 'tool_call_start', iteration: iterations, tool: call.tool, params: call.params, parallel: decision.parallel, totalCalls: toolCalls.length, reactLog }; yield startPayload; this.emit('tool_call_start', startPayload); } // 并行执行所有工具 const executePromises = toolCalls.map(async (call) => { let toolResult; try { toolResult = await this.toolRegistry.execute(call.tool, call.params); } catch (error) { toolResult = { success: false, error: error.message || String(error) }; } return { tool: call.tool, params: call.params, result: toolResult }; }); const completedCalls = await Promise.all(executePromises); // 发送工具调用完成事件 for (const call of completedCalls) { reactLog.push({ type: 'observation', iteration: iterations, result: call.result }); const completePayload = { type: 'tool_call_complete', iteration: iterations, tool: call.tool, params: call.params, result: call.result, parallel: decision.parallel, reactLog }; yield completePayload; this.emit('tool_call_complete', completePayload); } // 更新上下文(支持去重) for (const call of completedCalls) { const newContext = window.ContextBuilder.formatToolResult( call.tool, call.result, this.seenContentHashes, this.seenContentSummaries ); context += '\n\n' + newContext; toolResults.push({ tool: call.tool, params: call.params, result: call.result }); } // Token预算检查 const contextTokens = this.budgetManager.estimate(context); const budgetLimit = this.budgetManager.allocation.context; if (contextTokens > budgetLimit) { const prunedPayload = { type: 'context_pruned', before: contextTokens, after: budgetLimit, iteration: iterations }; yield prunedPayload; this.emit('context_pruned', prunedPayload); context = window.ContextBuilder.pruneContext(context, budgetLimit); } yield { type: 'context_updated', iteration: iterations, contextSize: context.length, estimatedTokens: this.budgetManager.estimate(context), parallelCallsCount: decision.parallel ? toolCalls.length : 0 }; } } // 达到最大迭代次数 yield { type: 'max_iterations_reached', iterations: this.maxIterations, toolCallCount: toolResults.length }; const fallbackAnswer = `经过 ${iterations} 轮推理,我收集到了一些信息,但未能在迭代限制内得出完整答案。\n\n基于当前信息:\n\n${context.slice(0, 2000)}\n\n建议:\n1. 提供更具体的问题\n2. 或尝试增加迭代次数限制`; const fallbackPayload = { type: 'final_answer', answer: fallbackAnswer, iterations: iterations, toolCallCount: toolResults.length, fallback: true, reactLog }; yield fallbackPayload; this.emit('final_answer', fallbackPayload); this.emit('session_complete', { answer: fallbackAnswer, iterations, fallback: true, reactLog }); } } // 导出到全局 window.ReActEngine = ReActEngine; console.log('[ReActEngine] 核心引擎已加载'); })(window);