// js/chatbot/agents/semantic-grouper.js // ----------------------------------------- // 意群聚合模块：将现有翻译分段聚合成更大的语义意群 // 用于长文档（>5万字）的智能分段处理 (function(window) { 'use strict'; /** * 带重试的LLM调用包装器（指数退避 + 抖动） * - 重试条件：408/429/5xx 或明显的网络错误 * - 默认为 3 次重试，基准延迟 600ms，上限 5000ms * @param {Function} fn - 要执行的异步函数 * @param {Object} opts * @param {number} opts.maxRetries * @param {number} opts.baseDelay * @param {number} opts.maxDelay * @returns {Promise} 函数执行结果 */ async function retryWithBackoff(fn, opts = {}) { const extractStatusFromMessage = (msg) => { if (!msg) return undefined; const m = String(msg).match(/\b(\d{3})\b/); return m ? parseInt(m[1], 10) : undefined; }; const shouldRetry = (err) => { const status = err && (err.status || extractStatusFromMessage(err.message)); // 将 401/403 也视作可重试（上游号池问题） if (status === 401 || status === 403) return true; if (status === 408 || status === 429) return true; if (status >= 500 && status <= 599) return true; if (!status && (err?.name === 'TypeError' || /fetch|network|timeout/i.test(String(err && err.message)))) { return true; // 网络类错误 } return false; }; const maxRetries = typeof opts.maxRetries === 'number' ? opts.maxRetries : 3; const baseDelay = typeof opts.baseDelay === 'number' ? opts.baseDelay : 600; const maxDelay = typeof opts.maxDelay === 'number' ? opts.maxDelay : 5000; let lastError = null; for (let attempt = 0; attempt <= maxRetries; attempt++) { try { return await fn(); } catch (error) { lastError = error; if (attempt === maxRetries || !shouldRetry(error)) { throw error; } const jitter = Math.floor(Math.random() * 250); const delay = Math.min(maxDelay, baseDelay * Math.pow(2, attempt)) + jitter; console.warn(`[SemanticGrouper] API调用失败，${delay}ms后重试 (${attempt + 1}/${maxRetries})...`, error?.message || error); await new Promise(resolve => setTimeout(resolve, delay)); } } if (lastError) throw lastError; } /** * 将分段数组聚合成意群 * @param {Array} chunks - 原始分段数组（ocrChunks 或 translatedChunks） * @param {Object} options - 配置选项 * @param {number} options.targetChars - 目标字数（默认 5000） * @param {number} options.minChars - 最小字数（默认 2500） * @param {number} options.maxChars - 最大字数（默认 6000） * @param {Function} options.onProgress - 进度回调函数 (current, total, message) * @returns {Promise