586 lines
21 KiB
JavaScript
586 lines
21 KiB
JavaScript
// js/chatbot/agents/semantic-grouper.js
|
||
// -----------------------------------------
|
||
// 意群聚合模块:将现有翻译分段聚合成更大的语义意群
|
||
// 用于长文档(>5万字)的智能分段处理
|
||
|
||
(function(window) {
|
||
'use strict';
|
||
|
||
/**
|
||
* 带重试的LLM调用包装器(指数退避 + 抖动)
|
||
* - 重试条件:408/429/5xx 或明显的网络错误
|
||
* - 默认为 3 次重试,基准延迟 600ms,上限 5000ms
|
||
* @param {Function} fn - 要执行的异步函数
|
||
* @param {Object} opts
|
||
* @param {number} opts.maxRetries
|
||
* @param {number} opts.baseDelay
|
||
* @param {number} opts.maxDelay
|
||
* @returns {Promise<any>} 函数执行结果
|
||
*/
|
||
async function retryWithBackoff(fn, opts = {}) {
|
||
const extractStatusFromMessage = (msg) => {
|
||
if (!msg) return undefined;
|
||
const m = String(msg).match(/\b(\d{3})\b/);
|
||
return m ? parseInt(m[1], 10) : undefined;
|
||
};
|
||
|
||
const shouldRetry = (err) => {
|
||
const status = err && (err.status || extractStatusFromMessage(err.message));
|
||
// 将 401/403 也视作可重试(上游号池问题)
|
||
if (status === 401 || status === 403) return true;
|
||
if (status === 408 || status === 429) return true;
|
||
if (status >= 500 && status <= 599) return true;
|
||
if (!status && (err?.name === 'TypeError' || /fetch|network|timeout/i.test(String(err && err.message)))) {
|
||
return true; // 网络类错误
|
||
}
|
||
return false;
|
||
};
|
||
|
||
const maxRetries = typeof opts.maxRetries === 'number' ? opts.maxRetries : 3;
|
||
const baseDelay = typeof opts.baseDelay === 'number' ? opts.baseDelay : 600;
|
||
const maxDelay = typeof opts.maxDelay === 'number' ? opts.maxDelay : 5000;
|
||
|
||
let lastError = null;
|
||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||
try {
|
||
return await fn();
|
||
} catch (error) {
|
||
lastError = error;
|
||
if (attempt === maxRetries || !shouldRetry(error)) {
|
||
throw error;
|
||
}
|
||
const jitter = Math.floor(Math.random() * 250);
|
||
const delay = Math.min(maxDelay, baseDelay * Math.pow(2, attempt)) + jitter;
|
||
console.warn(`[SemanticGrouper] API调用失败,${delay}ms后重试 (${attempt + 1}/${maxRetries})...`, error?.message || error);
|
||
await new Promise(resolve => setTimeout(resolve, delay));
|
||
}
|
||
}
|
||
if (lastError) throw lastError;
|
||
}
|
||
|
||
/**
|
||
* 将分段数组聚合成意群
|
||
* @param {Array<string>} chunks - 原始分段数组(ocrChunks 或 translatedChunks)
|
||
* @param {Object} options - 配置选项
|
||
* @param {number} options.targetChars - 目标字数(默认 5000)
|
||
* @param {number} options.minChars - 最小字数(默认 2500)
|
||
* @param {number} options.maxChars - 最大字数(默认 6000)
|
||
* @param {Function} options.onProgress - 进度回调函数 (current, total, message)
|
||
* @returns {Promise<Object>} 返回 {groups: 意群数组, enrichedChunks: 带元数据的chunks}
|
||
*/
|
||
async function aggregateIntoSemanticGroups(chunks, options = {}) {
|
||
const {
|
||
targetChars = 5000,
|
||
minChars = 2500,
|
||
maxChars = 6000,
|
||
concurrency = 20, // 恢复默认并发数
|
||
docContext = (window.data && window.data.semanticDocGist) ? window.data.semanticDocGist : '',
|
||
onProgress = null
|
||
} = options;
|
||
|
||
if (!chunks || !Array.isArray(chunks) || chunks.length === 0) {
|
||
console.warn('[SemanticGrouper] 无效的输入分段');
|
||
return { groups: [], enrichedChunks: [] };
|
||
}
|
||
|
||
console.log(`[SemanticGrouper] 开始聚合 ${chunks.length} 个分段,目标字数: ${targetChars}`);
|
||
|
||
// 创建带元数据的chunks
|
||
const enrichedChunks = chunks.map((text, index) => ({
|
||
chunkId: `chunk-${index}`,
|
||
text: text,
|
||
belongsToGroup: null, // 稍后填充
|
||
position: index,
|
||
charCount: text.length
|
||
}));
|
||
|
||
const candidates = [];
|
||
let currentGroup = {
|
||
segments: [],
|
||
texts: [],
|
||
charCount: 0
|
||
};
|
||
|
||
for (let i = 0; i < chunks.length; i++) {
|
||
const chunk = chunks[i] || '';
|
||
const chunkLength = chunk.length;
|
||
|
||
// 决策逻辑:是否将当前块加入当前组
|
||
if (currentGroup.charCount > 0) {
|
||
const potentialTotal = currentGroup.charCount + chunkLength;
|
||
// 情况1:加入后仍在合理范围内(不超过最大)
|
||
if (potentialTotal <= maxChars) {
|
||
currentGroup.segments.push(i);
|
||
currentGroup.texts.push(chunk);
|
||
currentGroup.charCount = potentialTotal;
|
||
continue;
|
||
}
|
||
// 情况2:加入后超过最大限制 -> 完成当前组(最小要求为软约束,允许 < min)
|
||
candidates.push(currentGroup);
|
||
currentGroup = {
|
||
segments: [i],
|
||
texts: [chunk],
|
||
charCount: chunkLength
|
||
};
|
||
continue;
|
||
} else {
|
||
// 空组,直接加入
|
||
currentGroup.segments.push(i);
|
||
currentGroup.texts.push(chunk);
|
||
currentGroup.charCount = chunkLength;
|
||
}
|
||
}
|
||
|
||
// 处理最后一组
|
||
if (currentGroup.segments.length > 0) {
|
||
candidates.push(currentGroup);
|
||
}
|
||
|
||
console.log(`[SemanticGrouper] 初步分组完成,共 ${candidates.length} 个候选意群。开始并发处理,最大并发: ${concurrency}`);
|
||
|
||
// 调用进度回调:开始处理
|
||
if (onProgress && typeof onProgress === 'function') {
|
||
onProgress(0, candidates.length, '开始生成意群摘要和关键词...');
|
||
}
|
||
|
||
const groups = await finalizeGroupsInParallel(candidates, concurrency, docContext, onProgress);
|
||
|
||
// 更新enrichedChunks的belongsToGroup字段
|
||
groups.forEach(group => {
|
||
group.segments.forEach(segmentIndex => {
|
||
enrichedChunks[segmentIndex].belongsToGroup = group.groupId;
|
||
});
|
||
});
|
||
|
||
console.log(`[SemanticGrouper] 聚合完成,生成 ${groups.length} 个意群`);
|
||
|
||
return {
|
||
groups: groups,
|
||
enrichedChunks: enrichedChunks
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 完成意群的处理(生成摘要和关键词)
|
||
* @param {Object} currentGroup - 当前意群
|
||
* @param {Array} groups - 意群数组
|
||
*/
|
||
async function finalizeGroup(currentGroup, groupIndex, docContext) {
|
||
const fullText = currentGroup.texts.join('\n\n');
|
||
const groupId = `group-${groupIndex}`;
|
||
|
||
console.log(`[SemanticGrouper] 处理意群 ${groupId},包含 ${currentGroup.segments.length} 个分段,共 ${currentGroup.charCount} 字`);
|
||
|
||
try {
|
||
// 并发生成摘要、关键词和结构化信息
|
||
const [summary, keywords, structure] = await Promise.all([
|
||
generateSummary(fullText, 400, docContext), // 400字详细摘要
|
||
extractKeywords(fullText, docContext),
|
||
extractStructure(fullText, docContext) // 提取图表、章节、要点
|
||
]);
|
||
|
||
const result = {
|
||
groupId,
|
||
segments: currentGroup.segments, // 保留原始分段索引
|
||
charCount: currentGroup.charCount,
|
||
summary, // 400字详细摘要
|
||
keywords, // 关键词数组
|
||
structure, // 结构化信息(图表、章节、要点)
|
||
fullText // 完整文本
|
||
};
|
||
|
||
console.log(`[SemanticGrouper] 意群 ${groupId} 处理完成:`, {
|
||
segments: currentGroup.segments,
|
||
charCount: currentGroup.charCount,
|
||
keywords: (keywords || []).join(', '),
|
||
figures: structure.figures?.length || 0,
|
||
tables: structure.tables?.length || 0,
|
||
sections: structure.sections?.length || 0
|
||
});
|
||
|
||
return result;
|
||
} catch (error) {
|
||
console.error(`[SemanticGrouper] 处理意群 ${groupId} 失败:`, error);
|
||
|
||
// 降级:不生成摘要,仅保存基本信息
|
||
return {
|
||
groupId,
|
||
segments: currentGroup.segments,
|
||
charCount: currentGroup.charCount,
|
||
summary: `该意群包含 ${currentGroup.segments.length} 个分段,共 ${currentGroup.charCount} 字`,
|
||
keywords: [],
|
||
structure: { figures: [], tables: [], sections: [], keyPoints: [] },
|
||
fullText,
|
||
error: error.message
|
||
};
|
||
}
|
||
}
|
||
|
||
async function finalizeGroupsInParallel(candidates, concurrency, docContext, onProgress) {
|
||
const results = new Array(candidates.length);
|
||
let nextIndex = 0;
|
||
let completedCount = 0;
|
||
const total = candidates.length;
|
||
|
||
async function runNext() {
|
||
const i = nextIndex++;
|
||
if (i >= candidates.length) return;
|
||
|
||
// 添加随机延迟,避免同时发起大量请求
|
||
if (i > 0) {
|
||
await new Promise(resolve => setTimeout(resolve, 200 + Math.random() * 300));
|
||
}
|
||
|
||
results[i] = await finalizeGroup(candidates[i], i, docContext);
|
||
|
||
// 更新进度
|
||
completedCount++;
|
||
if (onProgress && typeof onProgress === 'function') {
|
||
onProgress(completedCount, total, `正在处理意群 ${completedCount}/${total}`);
|
||
}
|
||
|
||
return runNext();
|
||
}
|
||
|
||
const poolSize = Math.max(1, Math.min(concurrency || 1, candidates.length));
|
||
const runners = [];
|
||
for (let i = 0; i < poolSize; i++) {
|
||
runners.push(runNext());
|
||
}
|
||
await Promise.all(runners);
|
||
return results;
|
||
}
|
||
|
||
/**
|
||
* 生成意群摘要
|
||
* @param {string} text - 完整文本
|
||
* @param {number} maxLength - 最大长度
|
||
* @returns {Promise<string>} 摘要文本
|
||
*/
|
||
async function generateSummary(text, maxLength, docContext = '') {
|
||
// 检查依赖
|
||
if (!window.ChatbotCore || typeof window.ChatbotCore.singleChunkSummary !== 'function') {
|
||
console.warn('[SemanticGrouper] ChatbotCore 未加载,使用截断作为摘要');
|
||
return text.substring(0, maxLength) + (text.length > maxLength ? '...' : '');
|
||
}
|
||
|
||
try {
|
||
const config = window.ChatbotCore.getChatbotConfig();
|
||
const apiKey = config.apiKey;
|
||
|
||
if (!apiKey) {
|
||
throw new Error('未配置 API Key');
|
||
}
|
||
|
||
// 限制输入文本长度(避免 token 过多)
|
||
const inputText = text.length > 5000 ? text.substring(0, 5000) + '...' : text;
|
||
|
||
const ctx = (docContext || '').slice(0, 1000);
|
||
const prompt = `${ctx ? `背景(整篇文档总览):\n${ctx}\n\n` : ''}请用不超过${maxLength}字概括以下内容的核心要点,保持专业性和准确性:
|
||
|
||
${inputText}
|
||
|
||
要求:
|
||
1. 概括核心内容和主要观点,并保持与背景一致性
|
||
2. ${maxLength <= 100 ? '极度精简' : '突出重点'}
|
||
3. 不要添加"本段讲述"等描述性前缀`;
|
||
|
||
// 使用重试包装器
|
||
const summary = await retryWithBackoff(async () => {
|
||
return await window.ChatbotCore.singleChunkSummary(
|
||
prompt,
|
||
inputText,
|
||
config,
|
||
apiKey
|
||
);
|
||
}, { maxRetries: 3, baseDelay: 600, maxDelay: 5000 });
|
||
|
||
return summary.trim();
|
||
} catch (error) {
|
||
console.error('[SemanticGrouper] 生成摘要失败:', error);
|
||
// 降级:使用截断
|
||
return text.substring(0, maxLength) + (text.length > maxLength ? '...' : '');
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 提取关键词
|
||
* @param {string} text - 完整文本
|
||
* @returns {Promise<Array<string>>} 关键词数组
|
||
*/
|
||
async function extractKeywords(text, docContext = '') {
|
||
if (!window.ChatbotCore || typeof window.ChatbotCore.singleChunkSummary !== 'function') {
|
||
console.warn('[SemanticGrouper] ChatbotCore 未加载,无法提取关键词');
|
||
return [];
|
||
}
|
||
|
||
try {
|
||
const config = window.ChatbotCore.getChatbotConfig();
|
||
const apiKey = config.apiKey;
|
||
|
||
if (!apiKey) {
|
||
throw new Error('未配置 API Key');
|
||
}
|
||
|
||
const inputText = text.length > 3000 ? text.substring(0, 3000) + '...' : text;
|
||
|
||
const ctx = (docContext || '').slice(0, 500);
|
||
const prompt = `${ctx ? `背景(整篇文档总览):\n${ctx}\n\n` : ''}请从以下内容中提取4-6个最重要的关键词,优先提取:
|
||
1. 专有名词(公司名、人名、产品名、机构名、地名)
|
||
2. 核心概念(重要术语、技术名称)
|
||
3. 主题词(核心话题)
|
||
|
||
要求:
|
||
- **优先级**:实体 > 专业术语 > 概念词
|
||
- **具体性**:优先提取具体名称(如"雷曼公司"而非"公司")
|
||
- **完整性**:保留实体的完整表达(如"雷曼兄弟公司"而非拆分)
|
||
- 用逗号分隔,只返回关键词列表,不要解释
|
||
|
||
文档内容:
|
||
${inputText}`;
|
||
|
||
// 使用重试包装器
|
||
const result = await retryWithBackoff(async () => {
|
||
return await window.ChatbotCore.singleChunkSummary(
|
||
prompt,
|
||
inputText,
|
||
config,
|
||
apiKey
|
||
);
|
||
}, { maxRetries: 3, baseDelay: 600, maxDelay: 5000 });
|
||
|
||
// 解析关键词
|
||
const keywords = result
|
||
.split(/[,,、]/)
|
||
.map(k => k.trim())
|
||
.filter(k => k.length > 0 && k.length < 20)
|
||
.slice(0, 5);
|
||
|
||
return keywords;
|
||
} catch (error) {
|
||
console.error('[SemanticGrouper] 提取关键词失败:', error);
|
||
return [];
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 提取结构化信息(图表、章节、要点)
|
||
* @param {string} text - 完整文本
|
||
* @param {string} docContext - 文档上下文
|
||
* @returns {Promise<Object>} 结构化信息对象
|
||
*/
|
||
async function extractStructure(text, docContext = '') {
|
||
const structure = {
|
||
orderedElements: [], // 按文档顺序的结构化元素
|
||
// 保留旧格式用于兼容
|
||
figures: [],
|
||
tables: [],
|
||
sections: [],
|
||
keyPoints: []
|
||
};
|
||
|
||
try {
|
||
// 使用LLM提取有序的结构化信息
|
||
if (window.ChatbotCore && typeof window.ChatbotCore.singleChunkSummary === 'function') {
|
||
const config = window.ChatbotCore.getChatbotConfig();
|
||
const apiKey = config.apiKey;
|
||
|
||
if (apiKey) {
|
||
const inputText = text.length > 4000 ? text.substring(0, 4000) + '...' : text;
|
||
const ctx = (docContext || '').slice(0, 500);
|
||
const prompt = `${ctx ? `背景(文档总览):\n${ctx}\n\n` : ''}请按照文档顺序,提取以下结构化信息(每行一个,保持原文档顺序):
|
||
|
||
1. 大标题(章节主标题,如"第三章 XXX")- 标记为 [TITLE]
|
||
2. 小节标题(如"3.1 XXX")- 标记为 [SECTION]
|
||
3. 核心要点(重要论点或观点,不超过30字)- 标记为 [POINT]
|
||
4. 图片标题(如"图3.1: XXX")- 标记为 [FIGURE]
|
||
5. 表格标题(如"表3.1: XXX")- 标记为 [TABLE]
|
||
6. 公式标题/说明(如"公式3.1: XXX"或"E=mc²")- 标记为 [FORMULA]
|
||
|
||
格式示例:
|
||
[TITLE] 第三章 理想投资的判断标准
|
||
[SECTION] 3.1 风险与收益的平衡
|
||
[POINT] 投资需要在风险与收益之间寻找最优平衡点
|
||
[FIGURE] 图3.1: 风险收益曲线
|
||
[FORMULA] 公式3.1: 夏普比率 = (Rp - Rf) / σp
|
||
[TABLE] 表3.1: 不同资产类别的历史表现
|
||
|
||
要求:
|
||
- **严格保持原文表达**:标题、图表名称、公式等必须完全引用原文,不要改写或概括,这对后续关键词搜索至关重要
|
||
- 严格按照内容在文档中的出现顺序提取
|
||
- 每个元素不超过50字
|
||
- 只提取最重要的10-15个元素
|
||
- 如果没有某类元素,跳过即可
|
||
|
||
文档内容:
|
||
${inputText}`;
|
||
|
||
try {
|
||
// 使用重试包装器
|
||
const result = await retryWithBackoff(async () => {
|
||
return await window.ChatbotCore.singleChunkSummary(
|
||
prompt,
|
||
inputText,
|
||
config,
|
||
apiKey
|
||
);
|
||
});
|
||
|
||
// 解析结果
|
||
const lines = result.split('\n').map(l => l.trim()).filter(l => l.length > 0);
|
||
|
||
for (const line of lines) {
|
||
if (line.startsWith('[TITLE]')) {
|
||
const content = line.replace('[TITLE]', '').trim();
|
||
structure.orderedElements.push({ type: 'title', content });
|
||
structure.sections.push(content); // 兼容
|
||
} else if (line.startsWith('[SECTION]')) {
|
||
const content = line.replace('[SECTION]', '').trim();
|
||
structure.orderedElements.push({ type: 'section', content });
|
||
structure.sections.push(content); // 兼容
|
||
} else if (line.startsWith('[POINT]')) {
|
||
const content = line.replace('[POINT]', '').trim();
|
||
structure.orderedElements.push({ type: 'keypoint', content });
|
||
structure.keyPoints.push(content); // 兼容
|
||
} else if (line.startsWith('[FIGURE]')) {
|
||
const content = line.replace('[FIGURE]', '').trim();
|
||
structure.orderedElements.push({ type: 'figure', content });
|
||
structure.figures.push(content); // 兼容
|
||
} else if (line.startsWith('[TABLE]')) {
|
||
const content = line.replace('[TABLE]', '').trim();
|
||
structure.orderedElements.push({ type: 'table', content });
|
||
structure.tables.push(content); // 兼容
|
||
} else if (line.startsWith('[FORMULA]')) {
|
||
const content = line.replace('[FORMULA]', '').trim();
|
||
structure.orderedElements.push({ type: 'formula', content });
|
||
}
|
||
}
|
||
|
||
console.log(`[SemanticGrouper] 提取了 ${structure.orderedElements.length} 个有序结构元素`);
|
||
} catch (e) {
|
||
console.warn('[SemanticGrouper] LLM提取结构失败,使用正则降级:', e.message);
|
||
// 降级到正则提取(无顺序)
|
||
fallbackRegexExtraction(text, structure);
|
||
}
|
||
} else {
|
||
// 无API Key,使用正则降级
|
||
fallbackRegexExtraction(text, structure);
|
||
}
|
||
} else {
|
||
// 无ChatbotCore,使用正则降级
|
||
fallbackRegexExtraction(text, structure);
|
||
}
|
||
|
||
return structure;
|
||
} catch (error) {
|
||
console.error('[SemanticGrouper] 提取结构化信息失败:', error);
|
||
return structure;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 降级方案:使用正则提取(不保证顺序)
|
||
*/
|
||
function fallbackRegexExtraction(text, structure) {
|
||
const figureRegex = /(?:图|Figure|Fig\.?)\s*(\d+)[:::]?\s*([^\n]{0,50})/gi;
|
||
const tableRegex = /(?:表|Table)\s*(\d+)[:::]?\s*([^\n]{0,50})/gi;
|
||
const formulaRegex = /(?:公式|Formula|Equation)\s*(\d+)[:::]?\s*([^\n]{0,50})/gi;
|
||
const sectionRegex = /^(?:#+\s*)?(\d+(?:\.\d+)*)\s+([^\n]{3,60})$/gm;
|
||
|
||
let match;
|
||
while ((match = figureRegex.exec(text)) !== null) {
|
||
const content = `图${match[1]}: ${match[2].trim()}`;
|
||
structure.figures.push(content);
|
||
structure.orderedElements.push({ type: 'figure', content });
|
||
}
|
||
while ((match = tableRegex.exec(text)) !== null) {
|
||
const content = `表${match[1]}: ${match[2].trim()}`;
|
||
structure.tables.push(content);
|
||
structure.orderedElements.push({ type: 'table', content });
|
||
}
|
||
while ((match = formulaRegex.exec(text)) !== null) {
|
||
const content = `公式${match[1]}: ${match[2].trim()}`;
|
||
structure.orderedElements.push({ type: 'formula', content });
|
||
}
|
||
while ((match = sectionRegex.exec(text)) !== null) {
|
||
const content = `${match[1]} ${match[2].trim()}`;
|
||
structure.sections.push(content);
|
||
structure.orderedElements.push({ type: 'section', content });
|
||
}
|
||
|
||
// 去重
|
||
structure.figures = [...new Set(structure.figures)].slice(0, 5);
|
||
structure.tables = [...new Set(structure.tables)].slice(0, 5);
|
||
structure.sections = structure.sections.slice(0, 5);
|
||
}
|
||
|
||
/**
|
||
* 快速匹配:根据关键词查找相关意群
|
||
* @param {string} query - 用户查询
|
||
* @param {Array<Object>} groups - 意群数组
|
||
* @returns {Array<Object>} 匹配的意群(按相关度排序)
|
||
*/
|
||
function quickMatch(query, groups) {
|
||
if (!query || !groups || groups.length === 0) {
|
||
return [];
|
||
}
|
||
|
||
const queryLower = query.toLowerCase();
|
||
const queryWords = queryLower.split(/[\s,,、。.]+/).filter(w => w.length > 1);
|
||
|
||
// 为每个意群计算相关度分数
|
||
const scored = groups.map(group => {
|
||
let score = 0;
|
||
|
||
// 关键词匹配(权重 3)
|
||
if (group.keywords && group.keywords.length > 0) {
|
||
group.keywords.forEach(kw => {
|
||
if (queryLower.includes(kw.toLowerCase())) {
|
||
score += 3;
|
||
}
|
||
});
|
||
}
|
||
|
||
// 摘要匹配(权重 2)
|
||
if (group.summary) {
|
||
const summaryLower = group.summary.toLowerCase();
|
||
queryWords.forEach(word => {
|
||
if (summaryLower.includes(word)) {
|
||
score += 2;
|
||
}
|
||
});
|
||
}
|
||
|
||
// digest 匹配(权重 1)
|
||
if (group.digest) {
|
||
const digestLower = group.digest.toLowerCase();
|
||
queryWords.forEach(word => {
|
||
if (digestLower.includes(word)) {
|
||
score += 1;
|
||
}
|
||
});
|
||
}
|
||
|
||
return { group, score };
|
||
});
|
||
|
||
// 按分数降序排序
|
||
scored.sort((a, b) => b.score - a.score);
|
||
|
||
// 返回有分数的意群
|
||
return scored.filter(item => item.score > 0).map(item => item.group);
|
||
}
|
||
|
||
// 导出公共接口
|
||
window.SemanticGrouper = {
|
||
aggregate: aggregateIntoSemanticGroups,
|
||
quickMatch: quickMatch,
|
||
// 辅助方法(供测试使用)
|
||
generateSummary: generateSummary,
|
||
extractKeywords: extractKeywords
|
||
};
|
||
|
||
console.log('[SemanticGrouper] 意群聚合模块已加载');
|
||
|
||
})(window);
|