489 lines
16 KiB
JavaScript
489 lines
16 KiB
JavaScript
// tool-registry.js
|
||
// 工具注册表(从 react-engine.js 提取)
|
||
|
||
(function(window) {
|
||
'use strict';
|
||
|
||
/**
|
||
* 工具注册表
|
||
* 管理所有可用的检索工具
|
||
*/
|
||
class ToolRegistry {
|
||
constructor() {
|
||
this.tools = new Map();
|
||
this.registerBuiltinTools();
|
||
}
|
||
|
||
/**
|
||
* 注册内置工具
|
||
*/
|
||
registerBuiltinTools() {
|
||
// === 搜索工具类 ===
|
||
|
||
// 1. 向量语义搜索
|
||
this.register({
|
||
name: 'vector_search',
|
||
description: '智能语义搜索,理解同义词、相关概念、隐含关系。适合概念性、开放性、探索性问题。',
|
||
parameters: {
|
||
query: { type: 'string', description: '语义描述或问题' },
|
||
limit: { type: 'number', description: '返回结果数量', default: 10 }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.SemanticVectorSearch || !window.SemanticVectorSearch.search) {
|
||
return {
|
||
success: false,
|
||
error: '向量搜索功能未启用,建议使用 keyword_search 或 grep'
|
||
};
|
||
}
|
||
|
||
if (!window.data?.vectorIndex && !window.data?.semanticGroups) {
|
||
return {
|
||
success: false,
|
||
error: '向量索引未构建,建议使用 keyword_search 或 grep'
|
||
};
|
||
}
|
||
|
||
try {
|
||
const results = await window.SemanticVectorSearch.search(params.query, params.limit || 10);
|
||
return {
|
||
success: true,
|
||
count: results.length,
|
||
results: results.map(r => ({
|
||
groupId: r.groupId,
|
||
score: r.score,
|
||
text: r.text,
|
||
keywords: r.keywords
|
||
}))
|
||
};
|
||
} catch (error) {
|
||
return {
|
||
success: false,
|
||
error: `向量搜索失败: ${error.message}`
|
||
};
|
||
}
|
||
}
|
||
});
|
||
|
||
// 2. BM25关键词搜索
|
||
this.register({
|
||
name: 'keyword_search',
|
||
description: '多关键词加权搜索(BM25算法)。适用于精确查找特定关键词组合。',
|
||
parameters: {
|
||
keywords: { type: 'array', description: '关键词数组,如["词1", "词2"]' },
|
||
limit: { type: 'number', description: '返回结果数量', default: 8 }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.BM25Search || !window.BM25Search.search) {
|
||
return {
|
||
success: false,
|
||
error: 'BM25搜索功能未加载,建议使用 grep'
|
||
};
|
||
}
|
||
|
||
if (!window.data?.semanticGroups && !window.data?.ocrChunks && !window.data?.translatedChunks) {
|
||
return {
|
||
success: false,
|
||
error: '文档chunks未生成,建议使用 grep'
|
||
};
|
||
}
|
||
|
||
try {
|
||
const results = await window.BM25Search.search(params.keywords, params.limit || 8);
|
||
return {
|
||
success: true,
|
||
count: results.length,
|
||
results: results.map(r => ({
|
||
groupId: r.groupId,
|
||
score: r.score,
|
||
text: r.text,
|
||
matchedKeywords: r.matchedKeywords
|
||
}))
|
||
};
|
||
} catch (error) {
|
||
return {
|
||
success: false,
|
||
error: `BM25搜索失败: ${error.message}`
|
||
};
|
||
}
|
||
}
|
||
});
|
||
|
||
// 3. GREP字面文本搜索
|
||
this.register({
|
||
name: 'grep',
|
||
description: '字面文本搜索(精确匹配)。支持OR逻辑(用|分隔多个关键词,如"词1|词2|词3")。',
|
||
parameters: {
|
||
query: { type: 'string', description: '搜索关键词或短语' },
|
||
limit: { type: 'number', description: '返回结果数量', default: 20 },
|
||
context: { type: 'number', description: '上下文长度(字符数)', default: 2000 },
|
||
caseInsensitive: { type: 'boolean', description: '是否忽略大小写', default: true }
|
||
},
|
||
execute: async (params) => {
|
||
const docContent = (window.data?.translation || window.data?.ocr || '');
|
||
if (!docContent) {
|
||
return { success: false, error: '文档内容为空' };
|
||
}
|
||
|
||
const query = params.query || '';
|
||
const limit = params.limit || 20;
|
||
const context = params.context || 2000;
|
||
const caseInsensitive = params.caseInsensitive !== false;
|
||
|
||
const results = [];
|
||
const keywords = query.split('|').map(k => k.trim()).filter(k => k);
|
||
|
||
for (const keyword of keywords) {
|
||
const regex = new RegExp(keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), caseInsensitive ? 'gi' : 'g');
|
||
let match;
|
||
while ((match = regex.exec(docContent)) !== null && results.length < limit) {
|
||
const start = Math.max(0, match.index - context);
|
||
const end = Math.min(docContent.length, match.index + keyword.length + context);
|
||
results.push({
|
||
keyword: keyword,
|
||
position: match.index,
|
||
preview: docContent.slice(start, end)
|
||
});
|
||
|
||
if (match.index === regex.lastIndex) regex.lastIndex++;
|
||
}
|
||
|
||
if (results.length >= limit) break;
|
||
}
|
||
|
||
return {
|
||
success: true,
|
||
count: results.length,
|
||
matches: results
|
||
};
|
||
}
|
||
});
|
||
|
||
// 4. 正则表达式搜索
|
||
this.register({
|
||
name: 'regex_search',
|
||
description: '正则表达式搜索,匹配特定格式。适用于:日期、编号、公式引用、图表标注等。',
|
||
parameters: {
|
||
pattern: { type: 'string', description: '正则表达式模式(需转义特殊字符)' },
|
||
limit: { type: 'number', description: '返回结果数量', default: 10 },
|
||
context: { type: 'number', description: '上下文长度(字符数)', default: 1500 }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.AdvancedSearchTools || !window.AdvancedSearchTools.regexSearch) {
|
||
return { success: false, error: 'AdvancedSearchTools未加载' };
|
||
}
|
||
|
||
const docContent = (window.data?.translation || window.data?.ocr || '');
|
||
if (!docContent) {
|
||
return { success: false, error: '文档内容为空' };
|
||
}
|
||
|
||
try {
|
||
const results = window.AdvancedSearchTools.regexSearch(
|
||
params.pattern,
|
||
docContent,
|
||
{
|
||
limit: params.limit || 10,
|
||
context: params.context || 1500
|
||
}
|
||
);
|
||
|
||
return {
|
||
success: true,
|
||
count: results.length,
|
||
matches: results
|
||
};
|
||
} catch (error) {
|
||
return {
|
||
success: false,
|
||
error: error.message || '正则搜索失败'
|
||
};
|
||
}
|
||
}
|
||
});
|
||
|
||
// 5. 布尔逻辑搜索
|
||
this.register({
|
||
name: 'boolean_search',
|
||
description: '布尔逻辑搜索(支持AND/OR/NOT和括号)。语法示例:"(词1 OR 词2) AND 词3 NOT 词4"',
|
||
parameters: {
|
||
query: { type: 'string', description: '布尔查询表达式' },
|
||
limit: { type: 'number', description: '返回结果数量', default: 10 },
|
||
context: { type: 'number', description: '上下文长度(字符数)', default: 1500 }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.AdvancedSearchTools || !window.AdvancedSearchTools.booleanSearch) {
|
||
return { success: false, error: 'AdvancedSearchTools未加载' };
|
||
}
|
||
|
||
const docContent = (window.data?.translation || window.data?.ocr || '');
|
||
if (!docContent) {
|
||
return { success: false, error: '文档内容为空' };
|
||
}
|
||
|
||
try {
|
||
const results = window.AdvancedSearchTools.booleanSearch(
|
||
params.query,
|
||
docContent,
|
||
{
|
||
limit: params.limit || 10,
|
||
context: params.context || 1500
|
||
}
|
||
);
|
||
|
||
return {
|
||
success: true,
|
||
count: results.length,
|
||
matches: results
|
||
};
|
||
} catch (error) {
|
||
return {
|
||
success: false,
|
||
error: error.message || '布尔搜索失败'
|
||
};
|
||
}
|
||
}
|
||
});
|
||
|
||
// === 意群工具类 ===
|
||
|
||
// 6. 搜索意群
|
||
this.register({
|
||
name: 'search_semantic_groups',
|
||
description: '在文档的语义意群中搜索相关内容。返回意群ID、摘要和关键词。',
|
||
parameters: {
|
||
query: { type: 'string', description: '搜索查询' },
|
||
limit: { type: 'number', description: '返回结果数量', default: 5 }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.SemanticTools) {
|
||
return {
|
||
success: false,
|
||
error: 'SemanticTools未加载,建议使用 grep'
|
||
};
|
||
}
|
||
|
||
if (!window.data?.semanticGroups || window.data.semanticGroups.length === 0) {
|
||
return {
|
||
success: false,
|
||
error: '文档意群未生成,建议使用 grep 或 vector_search'
|
||
};
|
||
}
|
||
|
||
const results = window.SemanticTools.searchGroups(params.query, params.limit || 5);
|
||
return {
|
||
success: true,
|
||
results: results.map(r => ({
|
||
groupId: r.groupId,
|
||
summary: r.summary,
|
||
keywords: r.keywords,
|
||
charCount: r.charCount
|
||
}))
|
||
};
|
||
}
|
||
});
|
||
|
||
// 7. 获取意群详细内容
|
||
this.register({
|
||
name: 'fetch_group_text',
|
||
description: '获取指定意群的详细文本内容。granularity可选:summary(摘要), digest(精华), full(全文)。',
|
||
parameters: {
|
||
groupId: { type: 'string', description: '意群ID' },
|
||
granularity: { type: 'string', description: '详细程度', default: 'digest', enum: ['summary', 'digest', 'full'] }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.SemanticTools) {
|
||
throw new Error('SemanticTools未加载');
|
||
}
|
||
const result = window.SemanticTools.fetchGroupText(params.groupId, params.granularity || 'digest');
|
||
return {
|
||
success: true,
|
||
groupId: result.groupId,
|
||
granularity: result.granularity,
|
||
text: result.text,
|
||
charCount: result.text.length
|
||
};
|
||
}
|
||
});
|
||
|
||
// 8. 获取意群完整信息
|
||
this.register({
|
||
name: 'fetch',
|
||
description: '获取意群的完整详细信息(包含完整论述、公式、数据、图表、结构信息)。',
|
||
parameters: {
|
||
groupId: { type: 'string', description: '意群ID' }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.SemanticTools || !window.SemanticTools.fetchGroupDetailed) {
|
||
throw new Error('SemanticTools.fetchGroupDetailed未加载');
|
||
}
|
||
const result = window.SemanticTools.fetchGroupDetailed(params.groupId);
|
||
return {
|
||
success: true,
|
||
groupId: result.groupId,
|
||
text: result.text,
|
||
structure: result.structure,
|
||
keywords: result.keywords,
|
||
summary: result.summary,
|
||
digest: result.digest,
|
||
charCount: result.charCount
|
||
};
|
||
}
|
||
});
|
||
|
||
// 9. 文档结构地图
|
||
this.register({
|
||
name: 'map',
|
||
description: '获取文档整体结构地图(意群ID、字数、关键词、摘要、章节/图表/公式)。适用于了解文档整体脉络。',
|
||
parameters: {
|
||
limit: { type: 'number', description: '返回意群数量', default: 50 },
|
||
includeStructure: { type: 'boolean', description: '是否包含结构信息(章节、图表等)', default: true }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.SemanticTools) {
|
||
return {
|
||
success: false,
|
||
error: 'SemanticTools未加载,可尝试使用 grep'
|
||
};
|
||
}
|
||
|
||
const groups = window.data?.semanticGroups || [];
|
||
|
||
if (groups.length === 0) {
|
||
return {
|
||
success: false,
|
||
error: '文档意群未生成,建议使用 grep 或 vector_search'
|
||
};
|
||
}
|
||
|
||
const limit = Math.min(params.limit || 50, groups.length);
|
||
const includeStructure = params.includeStructure !== false;
|
||
|
||
const mapData = groups.slice(0, limit).map(g => {
|
||
const entry = {
|
||
groupId: g.groupId,
|
||
charCount: g.charCount || 0,
|
||
keywords: g.keywords || [],
|
||
summary: g.summary || ''
|
||
};
|
||
|
||
if (includeStructure && g.structure) {
|
||
entry.structure = {
|
||
sections: g.structure.sections || [],
|
||
figures: g.structure.figures || [],
|
||
formulas: g.structure.formulas || [],
|
||
tables: g.structure.tables || []
|
||
};
|
||
}
|
||
|
||
return entry;
|
||
});
|
||
|
||
return {
|
||
success: true,
|
||
totalGroups: groups.length,
|
||
returnedGroups: mapData.length,
|
||
docGist: window.data?.semanticDocGist || '',
|
||
map: mapData
|
||
};
|
||
}
|
||
});
|
||
|
||
// 10. 列出所有意群概览
|
||
this.register({
|
||
name: 'list_all_groups',
|
||
description: '列出文档中所有意群的概览信息(ID、关键词、摘要)。',
|
||
parameters: {
|
||
limit: { type: 'number', description: '返回数量限制', default: 20 },
|
||
includeDigest: { type: 'boolean', description: '是否包含精华摘要', default: false }
|
||
},
|
||
execute: async (params) => {
|
||
if (!window.SemanticTools) {
|
||
throw new Error('SemanticTools未加载');
|
||
}
|
||
const results = window.SemanticTools.listGroups(params.limit || 20, params.includeDigest || false);
|
||
return {
|
||
success: true,
|
||
count: results.length,
|
||
groups: results
|
||
};
|
||
}
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 注册新工具
|
||
*/
|
||
register(tool) {
|
||
if (!tool.name || !tool.execute) {
|
||
throw new Error('工具必须包含name和execute字段');
|
||
}
|
||
this.tools.set(tool.name, tool);
|
||
}
|
||
|
||
/**
|
||
* 获取所有工具定义
|
||
*/
|
||
getToolDefinitions() {
|
||
return Array.from(this.tools.values()).map(tool => ({
|
||
name: tool.name,
|
||
description: tool.description,
|
||
parameters: tool.parameters
|
||
}));
|
||
}
|
||
|
||
/**
|
||
* 根据文档状态获取可用的工具定义(动态过滤)
|
||
*/
|
||
getAvailableToolDefinitions(hasSemanticGroups = false, hasVectorIndex = false, hasChunks = false) {
|
||
const allTools = Array.from(this.tools.values());
|
||
|
||
const requiresSemanticGroups = ['search_semantic_groups', 'fetch_group_text', 'fetch', 'map', 'list_all_groups'];
|
||
const requiresVectorIndex = ['vector_search'];
|
||
const requiresChunks = ['keyword_search'];
|
||
|
||
const availableTools = allTools.filter(tool => {
|
||
if (requiresSemanticGroups.includes(tool.name)) {
|
||
return hasSemanticGroups;
|
||
}
|
||
if (requiresVectorIndex.includes(tool.name)) {
|
||
return hasVectorIndex;
|
||
}
|
||
if (requiresChunks.includes(tool.name)) {
|
||
return hasSemanticGroups || hasChunks;
|
||
}
|
||
return true;
|
||
});
|
||
|
||
return availableTools.map(tool => ({
|
||
name: tool.name,
|
||
description: tool.description,
|
||
parameters: tool.parameters
|
||
}));
|
||
}
|
||
|
||
/**
|
||
* 执行工具
|
||
*/
|
||
async execute(toolName, params) {
|
||
const tool = this.tools.get(toolName);
|
||
if (!tool) {
|
||
throw new Error(`未找到工具: ${toolName}`);
|
||
}
|
||
try {
|
||
return await tool.execute(params);
|
||
} catch (error) {
|
||
return {
|
||
success: false,
|
||
error: error.message || String(error)
|
||
};
|
||
}
|
||
}
|
||
}
|
||
|
||
// 导出到全局
|
||
window.ToolRegistry = ToolRegistry;
|
||
|
||
console.log('[ToolRegistry] 模块已加载');
|
||
|
||
})(window);
|