1052 lines
33 KiB
JavaScript
1052 lines
33 KiB
JavaScript
// history_detail_scripts.js - 从 history_detail.html 中提取的 JavaScript 代码
|
||
// 这个文件包含了历史详情页面的主要 JavaScript 逻辑
|
||
|
||
window.addEventListener('storage', function(e) {
|
||
if (e.key === 'paperBurnerSettings') {
|
||
// 重新加载设置并刷新 chatbot 配置
|
||
if (window.ChatbotCore && typeof window.ChatbotCore.getChatbotConfig === 'function') {
|
||
// 你可以强制刷新 Chatbot UI 或重载配置
|
||
window.ChatbotUI && window.ChatbotUI.updateChatbotUI && window.ChatbotUI.updateChatbotUI();
|
||
}
|
||
}
|
||
});
|
||
|
||
/**
|
||
* 将 exact 文本转为模糊正则,允许空格、换行模糊匹配,大小写不敏感
|
||
* @param {string} exact
|
||
* @returns {RegExp}
|
||
*/
|
||
function escapeRegExp(string) {
|
||
// 更安全地转义所有正则特殊字符
|
||
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||
}
|
||
|
||
function fuzzyRegFromExact(exact) {
|
||
// 先转义所有正则特殊字符
|
||
let pattern = escapeRegExp(exact);
|
||
// 将所有空白替换为 \s+,允许跨行、多个空格
|
||
pattern = pattern.replace(/\s+/g, '\\s+');
|
||
// 可选:忽略前后空白
|
||
pattern = '\\s*' + pattern + '\\s*';
|
||
return new RegExp(pattern, 'gi');
|
||
}
|
||
|
||
/**
|
||
* 检查是否有OCR数据
|
||
* @returns {boolean}
|
||
*/
|
||
function hasOcrData() {
|
||
return window.data && window.data.ocr && window.data.ocr.trim() !== '';
|
||
}
|
||
|
||
/**
|
||
* 检查是否有翻译数据
|
||
* @returns {boolean}
|
||
*/
|
||
function hasTranslationData() {
|
||
return window.data && window.data.translation && window.data.translation.trim() !== '';
|
||
}
|
||
|
||
/**
|
||
* 检查是否有原始PDF数据
|
||
* @returns {boolean}
|
||
*/
|
||
function hasOriginalPdfData() {
|
||
return window.data && window.data.metadata && window.data.metadata.originalPdfBase64;
|
||
}
|
||
|
||
/**
|
||
* 将 Base64 字符串转换为 File 对象
|
||
* @param {string} base64 - Base64 编码的字符串(可带或不带 data: 前缀)
|
||
* @param {string} filename - 文件名
|
||
* @returns {File} File 对象
|
||
*/
|
||
function base64ToFile(base64, filename) {
|
||
// 处理 data URL 格式
|
||
let dataUrl = base64;
|
||
let mimeString = 'application/pdf';
|
||
|
||
if (base64.startsWith('data:')) {
|
||
const matches = base64.match(/^data:([^;]+);base64,(.+)$/);
|
||
if (matches) {
|
||
mimeString = matches[1];
|
||
dataUrl = matches[2];
|
||
}
|
||
}
|
||
|
||
const byteString = atob(dataUrl);
|
||
const ab = new ArrayBuffer(byteString.length);
|
||
const ia = new Uint8Array(ab);
|
||
for (let i = 0; i < byteString.length; i++) {
|
||
ia[i] = byteString.charCodeAt(i);
|
||
}
|
||
|
||
return new File([ab], filename || 'document.pdf', { type: mimeString });
|
||
}
|
||
|
||
/**
|
||
* 显示 Toast 消息
|
||
* @param {string} message - 消息内容
|
||
* @param {string} type - 类型: 'info', 'success', 'error', 'warning'
|
||
* @param {number} duration - 显示时长(毫秒)
|
||
*/
|
||
function showToast(message, type = 'info', duration = 3000) {
|
||
// 检查是否已有 toast 容器
|
||
let toastContainer = document.getElementById('pbx-toast-container');
|
||
if (!toastContainer) {
|
||
toastContainer = document.createElement('div');
|
||
toastContainer.id = 'pbx-toast-container';
|
||
toastContainer.style.cssText = `
|
||
position: fixed;
|
||
top: 20px;
|
||
right: 20px;
|
||
z-index: 10001;
|
||
display: flex;
|
||
flex-direction: column;
|
||
gap: 10px;
|
||
`;
|
||
document.body.appendChild(toastContainer);
|
||
}
|
||
|
||
// 创建 toast 元素
|
||
const toast = document.createElement('div');
|
||
const colors = {
|
||
info: '#3b82f6',
|
||
success: '#10b981',
|
||
error: '#ef4444',
|
||
warning: '#f59e0b'
|
||
};
|
||
|
||
toast.style.cssText = `
|
||
padding: 12px 20px;
|
||
background: ${colors[type] || colors.info};
|
||
color: white;
|
||
border-radius: 8px;
|
||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
||
font-size: 14px;
|
||
max-width: 400px;
|
||
word-wrap: break-word;
|
||
animation: slideIn 0.3s ease-out;
|
||
`;
|
||
|
||
// 添加动画样式
|
||
if (!document.getElementById('pbx-toast-styles')) {
|
||
const style = document.createElement('style');
|
||
style.id = 'pbx-toast-styles';
|
||
style.textContent = `
|
||
@keyframes slideIn {
|
||
from { transform: translateX(100%); opacity: 0; }
|
||
to { transform: translateX(0); opacity: 1; }
|
||
}
|
||
@keyframes slideOut {
|
||
from { transform: translateX(0); opacity: 1; }
|
||
to { transform: translateX(100%); opacity: 0; }
|
||
}
|
||
`;
|
||
document.head.appendChild(style);
|
||
}
|
||
|
||
toast.textContent = message;
|
||
toastContainer.appendChild(toast);
|
||
|
||
// 自动移除
|
||
setTimeout(() => {
|
||
toast.style.animation = 'slideOut 0.3s ease-out forwards';
|
||
setTimeout(() => {
|
||
if (toast.parentNode) {
|
||
toast.parentNode.removeChild(toast);
|
||
}
|
||
// 如果容器为空,移除容器
|
||
if (toastContainer.children.length === 0) {
|
||
toastContainer.parentNode.removeChild(toastContainer);
|
||
}
|
||
}, 300);
|
||
}, duration);
|
||
}
|
||
|
||
/**
|
||
* 执行 OCR 处理
|
||
* @param {File} file - 要处理的文件
|
||
* @param {Function} onProgress - 进度回调函数 (current, total, message)
|
||
* @returns {Promise<Object>} OCR 结果 { markdown, images, metadata }
|
||
*/
|
||
async function performOcr(file, onProgress) {
|
||
// 验证 OcrManager 可用
|
||
if (typeof OcrManager === 'undefined') {
|
||
throw new Error('OCR 模块未加载,请刷新页面重试');
|
||
}
|
||
|
||
// 创建 OcrManager 实例
|
||
const ocrManager = new OcrManager();
|
||
|
||
// 执行 OCR
|
||
const result = await ocrManager.processFile(file, onProgress);
|
||
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* 执行翻译处理
|
||
* @param {string} markdown - OCR 后的 Markdown 文本
|
||
* @param {Function} onProgress - 进度回调函数
|
||
* @returns {Promise<string>} 翻译后的文本
|
||
*/
|
||
async function performTranslation(markdown, onProgress) {
|
||
// 目标语言固定为中文
|
||
const targetLangName = '中文';
|
||
|
||
// 翻译提示词(不依赖 getBuiltInPrompts)
|
||
const systemPrompt = '你是一个专业的文档翻译助手,擅长将文本精确翻译为简体中文,同时保留原始的 Markdown 格式。';
|
||
const userPromptTemplate = '请将以下英文内容翻译成简体中文。请保持原有的 Markdown 格式和结构。只返回翻译后的内容,不要添加任何解释或说明。\n\n${content}';
|
||
|
||
// 强制使用 'aliyun' 模型(后端代理模式,无需前端 API Key)
|
||
// translation.js 的 predefinedConfigs 只配置了 'aliyun' 指向后端代理
|
||
const selectedModel = 'aliyun';
|
||
const apiKey = ''; // 后端代理不需要前端 API Key
|
||
|
||
// 分段翻译长文本
|
||
const chunks = splitMarkdownIntoChunks(markdown, 2000);
|
||
let translatedText = '';
|
||
const totalChunks = chunks.length;
|
||
|
||
console.log('[performTranslation] 分块数:', totalChunks);
|
||
console.log('[performTranslation] 输入markdown长度:', markdown?.length);
|
||
|
||
onProgress && onProgress(0, totalChunks, '正在翻译...');
|
||
|
||
// 临时禁用 promptPoolUI,避免历史详情页访问不存在的 UI 元素
|
||
const originalPromptPoolUI = window.promptPoolUI;
|
||
window.promptPoolUI = undefined;
|
||
|
||
try {
|
||
for (let i = 0; i < chunks.length; i++) {
|
||
onProgress && onProgress(i + 1, totalChunks, `翻译中 (${i + 1}/${totalChunks})`);
|
||
|
||
try {
|
||
// 调用 translateMarkdown 函数
|
||
// 'aliyun' 模型通过后端代理调用通义千问,无需前端 API Key
|
||
const chunkResult = await translateMarkdown(
|
||
chunks[i],
|
||
targetLangName,
|
||
selectedModel,
|
||
apiKey,
|
||
'[历史详情页翻译]', // logContext
|
||
systemPrompt, // 直接传入中文翻译的系统提示词
|
||
userPromptTemplate, // 直接传入中文翻译的用户提示词模板
|
||
true, // useCustomPrompts = true,使用传入的提示词
|
||
true, // processTablePlaceholders
|
||
{} // translateOptions - 使用默认配置
|
||
);
|
||
|
||
console.log(`[performTranslation] 第${i + 1}块翻译完成, 结果长度:`, chunkResult?.length);
|
||
console.log(`[performTranslation] 第${i + 1}块翻译结果前100字符:`, chunkResult?.substring(0, 100));
|
||
|
||
translatedText += chunkResult + '\n\n';
|
||
} catch (error) {
|
||
console.error(`[performTranslation] 翻译第 ${i + 1} 块失败:`, error);
|
||
// 如果某块翻译失败,保留原文
|
||
translatedText += chunks[i] + '\n\n';
|
||
}
|
||
}
|
||
} finally {
|
||
// 恢复 promptPoolUI
|
||
window.promptPoolUI = originalPromptPoolUI;
|
||
}
|
||
|
||
console.log('[performTranslation] 翻译完成, 最终长度:', translatedText.trim().length);
|
||
return translatedText.trim();
|
||
}
|
||
|
||
/**
|
||
* 将 Markdown 分割成小块
|
||
* @param {string} markdown - Markdown 文本
|
||
* @param {number} maxChars - 每块最大字符数
|
||
* @returns {string[]} 分割后的块数组
|
||
*/
|
||
function splitMarkdownIntoChunks(markdown, maxChars = 2000) {
|
||
console.log('[splitMarkdownIntoChunks] 输入长度:', markdown?.length, 'maxChars:', maxChars);
|
||
console.log('[splitMarkdownIntoChunks] 输入前100字符:', markdown?.substring(0, 100));
|
||
|
||
const chunks = [];
|
||
const lines = markdown.split('\n');
|
||
console.log('[splitMarkdownIntoChunks] 行数:', lines.length);
|
||
|
||
let currentChunk = '';
|
||
|
||
for (const line of lines) {
|
||
if (currentChunk.length + line.length + 1 > maxChars) {
|
||
if (currentChunk.trim()) {
|
||
console.log('[splitMarkdownIntoChunks] 添加块:', chunks.length + 1, '长度:', currentChunk.length);
|
||
chunks.push(currentChunk.trim());
|
||
}
|
||
currentChunk = line + '\n';
|
||
} else {
|
||
currentChunk += line + '\n';
|
||
}
|
||
}
|
||
|
||
if (currentChunk.trim()) {
|
||
console.log('[splitMarkdownIntoChunks] 添加最后块:', chunks.length + 1, '长度:', currentChunk.length);
|
||
chunks.push(currentChunk.trim());
|
||
}
|
||
|
||
console.log('[splitMarkdownIntoChunks] 最终块数:', chunks.length);
|
||
chunks.forEach((chunk, i) => console.log(`[splitMarkdownIntoChunks] 块${i + 1} 长度:`, chunk.length));
|
||
|
||
return chunks.length > 0 ? chunks : [markdown];
|
||
}
|
||
|
||
/**
|
||
* 根据标题分割 Markdown 文本为块数组(用于分块对比视图)
|
||
* @param {string} markdown - Markdown 文本
|
||
* @returns {string[]} 分割后的块数组
|
||
*/
|
||
function generateChunks(markdown) {
|
||
if (!markdown || typeof markdown !== 'string') return [];
|
||
|
||
const lines = markdown.split(/\r?\n/);
|
||
const chunks = [];
|
||
let buffer = [];
|
||
let inCode = false;
|
||
|
||
function flush() {
|
||
if (buffer.length) {
|
||
chunks.push(buffer.join('\n').trim());
|
||
buffer = [];
|
||
}
|
||
}
|
||
|
||
for (let i = 0; i < lines.length; i++) {
|
||
const line = lines[i];
|
||
|
||
// 处理代码块
|
||
if (/^\s*```/.test(line)) {
|
||
inCode = !inCode;
|
||
buffer.push(line);
|
||
continue;
|
||
}
|
||
|
||
if (inCode) {
|
||
buffer.push(line);
|
||
continue;
|
||
}
|
||
|
||
// 标题作为新分块的起点
|
||
if (/^\s*#/.test(line)) {
|
||
flush();
|
||
}
|
||
|
||
buffer.push(line);
|
||
}
|
||
|
||
flush();
|
||
return chunks.length > 0 ? chunks : [markdown];
|
||
}
|
||
|
||
/**
|
||
* 创建确认对话框
|
||
* @param {string} title - 对话框标题
|
||
* @param {string} message - 对话框消息
|
||
* @param {string} confirmText - 确认按钮文本
|
||
* @param {string} cancelText - 取消按钮文本
|
||
* @returns {Promise<boolean>} 用户是否确认
|
||
*/
|
||
function showConfirmDialog(title, message, confirmText = '确认', cancelText = '取消') {
|
||
return new Promise((resolve) => {
|
||
// 创建对话框容器
|
||
const dialogOverlay = document.createElement('div');
|
||
dialogOverlay.id = 'pbx-confirm-dialog-overlay';
|
||
dialogOverlay.style.cssText = `
|
||
position: fixed;
|
||
top: 0;
|
||
left: 0;
|
||
width: 100%;
|
||
height: 100%;
|
||
background: rgba(0, 0, 0, 0.5);
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
z-index: 10000;
|
||
`;
|
||
|
||
const dialogBox = document.createElement('div');
|
||
dialogBox.style.cssText = `
|
||
background: white;
|
||
border-radius: 12px;
|
||
padding: 24px;
|
||
max-width: 400px;
|
||
width: 90%;
|
||
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15);
|
||
`;
|
||
|
||
dialogBox.innerHTML = `
|
||
<h3 style="margin: 0 0 16px 0; font-size: 18px; color: #1a1a1a;">${title}</h3>
|
||
<p style="margin: 0 0 24px 0; font-size: 14px; color: #666; line-height: 1.6;">${message}</p>
|
||
<div style="display: flex; gap: 12px; justify-content: flex-end;">
|
||
<button id="pbx-dialog-cancel" style="
|
||
padding: 10px 20px;
|
||
border: 1px solid #ddd;
|
||
background: white;
|
||
border-radius: 6px;
|
||
cursor: pointer;
|
||
font-size: 14px;
|
||
color: #666;
|
||
">${cancelText}</button>
|
||
<button id="pbx-dialog-confirm" style="
|
||
padding: 10px 20px;
|
||
border: none;
|
||
background: #4f46e5;
|
||
color: white;
|
||
border-radius: 6px;
|
||
cursor: pointer;
|
||
font-size: 14px;
|
||
font-weight: 500;
|
||
">${confirmText}</button>
|
||
</div>
|
||
`;
|
||
|
||
dialogOverlay.appendChild(dialogBox);
|
||
document.body.appendChild(dialogOverlay);
|
||
|
||
// 按钮事件
|
||
const confirmBtn = dialogBox.querySelector('#pbx-dialog-confirm');
|
||
const cancelBtn = dialogBox.querySelector('#pbx-dialog-cancel');
|
||
|
||
const cleanup = () => {
|
||
document.body.removeChild(dialogOverlay);
|
||
};
|
||
|
||
confirmBtn.onclick = () => {
|
||
cleanup();
|
||
resolve(true);
|
||
};
|
||
|
||
cancelBtn.onclick = () => {
|
||
cleanup();
|
||
resolve(false);
|
||
};
|
||
|
||
// 点击遮罩层关闭
|
||
dialogOverlay.onclick = (e) => {
|
||
if (e.target === dialogOverlay) {
|
||
cleanup();
|
||
resolve(false);
|
||
}
|
||
};
|
||
|
||
// ESC 键关闭
|
||
const handleEsc = (e) => {
|
||
if (e.key === 'Escape') {
|
||
cleanup();
|
||
resolve(false);
|
||
document.removeEventListener('keydown', handleEsc);
|
||
}
|
||
};
|
||
document.addEventListener('keydown', handleEsc);
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 触发重新处理文档(OCR和/或翻译)
|
||
* @param {boolean} includeTranslation - 是否包含翻译
|
||
*/
|
||
async function triggerReprocess(includeTranslation) {
|
||
const docId = window.docIdForLocalStorage;
|
||
const docName = window.data ? window.data.name : '未知文档';
|
||
|
||
if (!docId) {
|
||
showToast('无法获取文档ID,请刷新页面重试。', 'error');
|
||
return;
|
||
}
|
||
|
||
// 检查是否有原始 PDF 数据
|
||
const pdfBase64 = window.data?.metadata?.originalPdfBase64;
|
||
if (!pdfBase64) {
|
||
showToast('当前记录没有保存原始PDF数据,无法重新处理。', 'error');
|
||
return;
|
||
}
|
||
|
||
|
||
try {
|
||
// 将 Base64 转为 File 对象
|
||
const file = base64ToFile(pdfBase64, docName);
|
||
|
||
|
||
const ocrResult = await performOcr(file, (current, total, msg) => {
|
||
showToast(`${msg || 'OCR 处理中'} (${current}/${total})`, 'info', 5000);
|
||
});
|
||
|
||
// 更新 window.data
|
||
window.data.ocr = ocrResult.markdown;
|
||
if (ocrResult.images && ocrResult.images.length > 0) {
|
||
window.data.images = ocrResult.images;
|
||
}
|
||
if (ocrResult.metadata) {
|
||
window.data.metadata = window.data.metadata || {};
|
||
window.data.metadata.ocrEngine = ocrResult.metadata.engine;
|
||
window.data.metadata.pageCount = ocrResult.metadata.pageCount;
|
||
}
|
||
|
||
// 如果需要翻译,执行翻译
|
||
if (includeTranslation) {
|
||
|
||
try {
|
||
const translationResult = await performTranslation(ocrResult.markdown, (current, total, msg) => {
|
||
showToast(`${msg || '翻译中'} (${current}/${total})`, 'info', 5000);
|
||
});
|
||
window.data.translation = translationResult;
|
||
|
||
// 生成分块数据用于"分块对比"视图
|
||
const ocrChunks = generateChunks(ocrResult.markdown);
|
||
const translatedChunks = generateChunks(translationResult);
|
||
window.data.ocrChunks = ocrChunks;
|
||
window.data.translatedChunks = translatedChunks;
|
||
console.log('[triggerReprocess] 生成分块: ocrChunks=', ocrChunks.length, 'translatedChunks=', translatedChunks.length);
|
||
} catch (translationError) {
|
||
console.error('[triggerReprocess] 翻译失败:', translationError);
|
||
showToast(`翻译失败: ${translationError.message},但 OCR 已完成`, 'warning');
|
||
// 继续保存 OCR 结果,即使翻译失败
|
||
}
|
||
}
|
||
|
||
// 保存到 IndexedDB
|
||
await saveResultToDB(window.data);
|
||
|
||
// 刷新页面显示
|
||
if (typeof renderDetail === 'function') {
|
||
renderDetail();
|
||
}
|
||
if (typeof showTab === 'function') {
|
||
// 翻译成功后显示分块对比,否则显示 OCR
|
||
showTab(includeTranslation && window.data.translation ? 'chunk-compare' : 'ocr');
|
||
}
|
||
|
||
showToast('处理完成!', 'success');
|
||
|
||
} catch (error) {
|
||
console.error('[triggerReprocess] 处理失败:', error);
|
||
showToast(`处理失败: ${error.message}`, 'error');
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 显示 Tab 按钮加载状态
|
||
* @param {string} tabId - Tab 按钮 ID
|
||
* @param {boolean} loading - 是否显示加载状态
|
||
*/
|
||
function setTabLoadingState(tabId, loading) {
|
||
const btn = document.getElementById(tabId);
|
||
if (!btn) return;
|
||
|
||
if (loading) {
|
||
// 保存原始内容
|
||
btn.dataset.originalContent = btn.innerHTML;
|
||
// 显示加载动画
|
||
btn.innerHTML = `<div class="spinner" style="width: 18px; height: 18px; border: 2px solid #e5e7eb; border-top-color: #3b82f6; border-radius: 50%; animation: spin 1s linear infinite; margin: 0 auto;"></div>`;
|
||
btn.disabled = true;
|
||
btn.style.minWidth = '60px';
|
||
} else {
|
||
// 恢复原始内容
|
||
if (btn.dataset.originalContent) {
|
||
btn.innerHTML = btn.dataset.originalContent;
|
||
}
|
||
btn.disabled = false;
|
||
btn.style.minWidth = '';
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 处理"Word文档"标签点击
|
||
* 如果没有OCR数据,询问用户是否需要生成
|
||
*/
|
||
async function handleOcrTabClick() {
|
||
// 如果已有OCR数据,直接显示
|
||
if (hasOcrData()) {
|
||
showTab('ocr');
|
||
return;
|
||
}
|
||
|
||
// 检查是否有原始PDF
|
||
if (!hasOriginalPdfData()) {
|
||
showToast('当前记录没有保存原始PDF数据,无法重新处理。', 'warning');
|
||
return;
|
||
}
|
||
|
||
// 弹出确认对话框
|
||
const confirmed = await showConfirmDialog(
|
||
'生成Word文档',
|
||
'当前文档尚未进行OCR处理。是否需要启动OCR处理生成Word文档?\n\n处理完成后可在本页面查看和导出。',
|
||
'启动OCR',
|
||
'取消'
|
||
);
|
||
|
||
if (confirmed) {
|
||
// 显示加载状态
|
||
setTabLoadingState('tab-ocr', true);
|
||
try {
|
||
await triggerReprocess(false); // 仅OCR,不翻译
|
||
} finally {
|
||
// 恢复按钮状态
|
||
setTabLoadingState('tab-ocr', false);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 处理"仅翻译"标签点击
|
||
* 如果没有翻译数据,询问用户是否需要生成翻译对照文档
|
||
*/
|
||
async function handleTranslationTabClick() {
|
||
// 如果已有翻译数据,直接显示
|
||
if (hasTranslationData()) {
|
||
showTab('translation');
|
||
return;
|
||
}
|
||
|
||
// 检查是否有原始PDF
|
||
if (!hasOriginalPdfData()) {
|
||
showToast('当前记录没有保存原始PDF数据,无法重新处理。', 'warning');
|
||
return;
|
||
}
|
||
|
||
// 弹出确认对话框
|
||
const confirmed = await showConfirmDialog(
|
||
'生成翻译对照文档',
|
||
'当前文档尚未进行翻译处理。是否需要启动OCR+翻译处理?\n\n这将生成原文Word文档和翻译对照文档,处理完成后可在本页面查看和导出。',
|
||
'启动OCR+翻译',
|
||
'取消'
|
||
);
|
||
|
||
if (confirmed) {
|
||
// 显示加载状态
|
||
setTabLoadingState('tab-translation', true);
|
||
try {
|
||
await triggerReprocess(true); // OCR + 翻译
|
||
} finally {
|
||
// 恢复按钮状态
|
||
setTabLoadingState('tab-translation', false);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 显示 PDF 对照确认对话框
|
||
* 询问用户是否进行 MinerU 结构化翻译
|
||
*/
|
||
async function showPdfCompareConfirmDialog() {
|
||
// 检查是否有 MinerU 数据(contentListJson)
|
||
const hasMinerUData = window.data && window.data.metadata && window.data.metadata.contentListJson;
|
||
|
||
if (!hasMinerUData) {
|
||
// 没有 MinerU 数据,询问是否重新使用 MinerU 处理
|
||
const confirmed = await showConfirmDialog(
|
||
'PDF 对照视图',
|
||
'当前文档未使用 MinerU 引擎处理,无法进行结构化翻译。\n\n是否重新使用 MinerU 引擎处理文档?\n\n处理完成后将自动进行翻译并显示 PDF 对照视图。',
|
||
'开始处理',
|
||
'取消'
|
||
);
|
||
|
||
if (confirmed) {
|
||
// 使用 MinerU 重新处理并翻译
|
||
await triggerReprocessWithMinerU();
|
||
} else {
|
||
// 用户取消,跳转回原始文件标签页
|
||
if (typeof showTab === 'function') {
|
||
showTab('original-file');
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
// 有 MinerU 数据,询问是否进行翻译
|
||
const confirmed = await showConfirmDialog(
|
||
'PDF 对照视图',
|
||
'当前文档尚未进行 MinerU 结构化翻译。是否现在开始翻译?\n\n翻译完成后将显示原文与译文的 PDF 对照视图。',
|
||
'开始翻译',
|
||
'取消'
|
||
);
|
||
|
||
if (confirmed) {
|
||
await executeMinerUStructuredTranslation();
|
||
} else {
|
||
// 用户取消,跳转回原始文件标签页
|
||
if (typeof showTab === 'function') {
|
||
showTab('original-file');
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 使用 MinerU 引擎重新处理文档并翻译
|
||
*/
|
||
async function triggerReprocessWithMinerU() {
|
||
const docId = window.docIdForLocalStorage;
|
||
const docName = window.data ? window.data.name : '未知文档';
|
||
|
||
if (!docId) {
|
||
showToast('无法获取文档ID,请刷新页面重试。', 'error');
|
||
return;
|
||
}
|
||
|
||
// 检查是否有原始 PDF 数据
|
||
const pdfBase64 = window.data?.metadata?.originalPdfBase64;
|
||
if (!pdfBase64) {
|
||
showToast('当前记录没有保存原始PDF数据,无法重新处理。', 'error');
|
||
return;
|
||
}
|
||
|
||
// 显示加载状态
|
||
setTabLoadingState('tab-pdf-compare', true);
|
||
|
||
// 保存当前 OCR 配置
|
||
const savedEngine = localStorage.getItem('ocrEngine');
|
||
const savedTranslationMode = localStorage.getItem('mineruTranslationMode');
|
||
const savedMineruMode = localStorage.getItem('mineruMode');
|
||
|
||
try {
|
||
|
||
// 临时设置 OCR 配置为 MinerU + 结构化翻译模式
|
||
localStorage.setItem('ocrEngine', 'mineru');
|
||
localStorage.setItem('mineruTranslationMode', 'structured');
|
||
localStorage.setItem('mineruMode', 'txt');
|
||
|
||
// 将 PDF 转换为 Blob
|
||
const pdfBytes = Uint8Array.from(atob(pdfBase64), c => c.charCodeAt(0));
|
||
const pdfBlob = new Blob([pdfBytes], { type: 'application/pdf' });
|
||
const pdfFile = new File([pdfBlob], docName || 'document.pdf', { type: 'application/pdf' });
|
||
|
||
// 获取翻译模型配置 - 使用后端代理,不需要前端选择模型
|
||
const settings = typeof loadSettings === 'function' ? loadSettings() : {};
|
||
|
||
// 调用 OCR 处理
|
||
if (typeof window.processSinglePdf !== 'function') {
|
||
showToast('处理模块未加载,请刷新页面重试。', 'error');
|
||
return;
|
||
}
|
||
|
||
// 使用后端代理进行翻译,不需要传递 API Key
|
||
const result = await window.processSinglePdf(
|
||
pdfFile,
|
||
null, // mistralKeyObject - 使用 MinerU 不需要
|
||
null, // translationKeyObject - 使用后端代理不需要
|
||
'aliyun', // 使用阿里云百炼(后端代理会处理)
|
||
null, // translationModelConfig
|
||
settings.maxTokensPerChunk || 2000,
|
||
settings.targetLanguage || 'Chinese',
|
||
() => Promise.resolve(), // acquireSlot
|
||
() => {}, // releaseSlot
|
||
settings.defaultSystemPrompt || '',
|
||
settings.defaultUserPromptTemplate || '',
|
||
settings.useCustomPrompts || false,
|
||
null, // batchContext
|
||
() => {} // onFileSuccess
|
||
);
|
||
|
||
if (result.error) {
|
||
throw new Error(result.error);
|
||
}
|
||
|
||
console.log('[triggerReprocessWithMinerU] 处理结果:', result);
|
||
console.log('[triggerReprocessWithMinerU] metadata:', result.metadata);
|
||
console.log('[triggerReprocessWithMinerU] contentListJson:', result.metadata?.contentListJson);
|
||
|
||
// 更新 window.data
|
||
if (result.ocr) window.data.ocr = result.ocr;
|
||
if (result.translation) window.data.translation = result.translation;
|
||
if (result.images) window.data.images = result.images;
|
||
if (result.ocrChunks) window.data.ocrChunks = result.ocrChunks;
|
||
if (result.translatedChunks) window.data.translatedChunks = result.translatedChunks;
|
||
if (result.metadata) {
|
||
window.data.metadata = window.data.metadata || {};
|
||
Object.assign(window.data.metadata, result.metadata);
|
||
}
|
||
|
||
// 保存到 IndexedDB
|
||
if (typeof saveResultToDB === 'function') {
|
||
await saveResultToDB(window.data);
|
||
}
|
||
|
||
showToast('处理完成!正在加载 PDF 对照视图...', 'success');
|
||
|
||
// 重置渲染锁
|
||
if (typeof renderingTab !== 'undefined') renderingTab = null;
|
||
|
||
// 刷新页面显示
|
||
if (typeof renderDetail === 'function') {
|
||
await renderDetail();
|
||
}
|
||
|
||
// 自动跳转到 PDF 对照视图
|
||
if (typeof showTabImmediate === 'function') {
|
||
showTabImmediate('pdf-compare');
|
||
} else if (typeof showTab === 'function') {
|
||
showTab('pdf-compare');
|
||
}
|
||
|
||
} catch (error) {
|
||
console.error('[triggerReprocessWithMinerU] 处理失败:', error);
|
||
showToast(`处理失败: ${error.message}`, 'error');
|
||
} finally {
|
||
// 恢复原始 OCR 配置
|
||
if (savedEngine !== null) localStorage.setItem('ocrEngine', savedEngine);
|
||
else localStorage.removeItem('ocrEngine');
|
||
if (savedTranslationMode !== null) localStorage.setItem('mineruTranslationMode', savedTranslationMode);
|
||
else localStorage.removeItem('mineruTranslationMode');
|
||
if (savedMineruMode !== null) localStorage.setItem('mineruMode', savedMineruMode);
|
||
else localStorage.removeItem('mineruMode');
|
||
|
||
setTabLoadingState('tab-pdf-compare', false);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 执行 MinerU 结构化翻译
|
||
* 通过后端 local-proxy 代理调用 LLM API,API Key 由后端管理
|
||
*/
|
||
async function executeMinerUStructuredTranslation() {
|
||
const logPrefix = '[MinerU结构化翻译]';
|
||
// 使用统一配置获取代理地址
|
||
const PROXY_BASE = (typeof window !== 'undefined' && window.ProxyConfig)
|
||
? window.ProxyConfig.getProxyUrl()
|
||
: (window.PBX_PROXY_BASE_URL || '/api');
|
||
|
||
// 获取翻译配置
|
||
const settings = typeof loadSettings === 'function' ? loadSettings() : {};
|
||
const modelName = "tongyi";
|
||
|
||
|
||
// 显示进度容器
|
||
const tabContent = document.getElementById('tabContent');
|
||
tabContent.innerHTML = `
|
||
<div id="structured-translation-progress" style="padding: 24px;">
|
||
<div style="display: flex; align-items: center; margin-bottom: 16px;">
|
||
<div class="spinner" style="width: 24px; height: 24px; border: 3px solid #e5e7eb; border-top-color: #3b82f6; border-radius: 50%; animation: spin 1s linear infinite; margin-right: 12px;"></div>
|
||
<h3 style="margin: 0; font-size: 16px; color: #1f2937;">正在执行 MinerU 结构化翻译...</h3>
|
||
</div>
|
||
<div id="structured-translation-log" style="
|
||
background: #f9fafb;
|
||
border: 1px solid #e5e7eb;
|
||
border-radius: 8px;
|
||
padding: 12px;
|
||
max-height: 300px;
|
||
overflow-y: auto;
|
||
font-family: monospace;
|
||
font-size: 13px;
|
||
color: #6b7280;
|
||
">
|
||
<div class="log-entry">准备开始翻译...</div>
|
||
</div>
|
||
<div style="margin-top: 16px;">
|
||
<div class="progress-bar" style="
|
||
background: #e5e7eb;
|
||
border-radius: 8px;
|
||
height: 8px;
|
||
overflow: hidden;
|
||
">
|
||
<div id="structured-translation-progress-bar" style="
|
||
background: linear-gradient(90deg, #3b82f6, #8b5cf6);
|
||
height: 100%;
|
||
width: 0%;
|
||
transition: width 0.3s ease;
|
||
"></div>
|
||
</div>
|
||
<p id="structured-translation-status" style="margin: 8px 0 0 0; font-size: 13px; color: #6b7280;">进度: 0%</p>
|
||
</div>
|
||
</div>
|
||
<style>
|
||
@keyframes spin {
|
||
to { transform: rotate(360deg); }
|
||
}
|
||
</style>
|
||
`;
|
||
|
||
const logEl = document.getElementById('structured-translation-log');
|
||
const progressBar = document.getElementById('structured-translation-progress-bar');
|
||
const statusEl = document.getElementById('structured-translation-status');
|
||
|
||
// 日志函数
|
||
const addLog = (msg) => {
|
||
const entry = document.createElement('div');
|
||
entry.className = 'log-entry';
|
||
entry.textContent = `[${new Date().toLocaleTimeString()}] ${msg}`;
|
||
logEl.appendChild(entry);
|
||
logEl.scrollTop = logEl.scrollHeight;
|
||
console.log(`${logPrefix} ${msg}`);
|
||
};
|
||
|
||
// 进度回调
|
||
const onProgress = (progress) => {
|
||
const pct = progress.percentage || 0;
|
||
progressBar.style.width = `${pct}%`;
|
||
statusEl.textContent = `进度: ${pct}% - ${progress.message || ''}`;
|
||
};
|
||
|
||
try {
|
||
addLog('初始化翻译器...');
|
||
|
||
// 检查 MinerUStructuredTranslation 是否可用
|
||
if (typeof MinerUStructuredTranslation === 'undefined') {
|
||
throw new Error('MinerU 结构化翻译模块未加载');
|
||
}
|
||
|
||
const translator = new MinerUStructuredTranslation();
|
||
addLog('翻译器初始化完成');
|
||
|
||
// 获取全局 data 对象
|
||
const dataObj = window.data;
|
||
console.log('dataObj:', dataObj);
|
||
|
||
if (!dataObj || !dataObj.metadata || !dataObj.metadata.contentListJson) {
|
||
console.log(!dataObj ,!dataObj.metadata , !dataObj.metadata.contentListJson);
|
||
|
||
throw new Error('缺少必要的内容数据');
|
||
}
|
||
|
||
// 提取可翻译内容
|
||
const contentListJson = dataObj.metadata.contentListJson;
|
||
addLog(`提取可翻译内容...`);
|
||
|
||
const translatableContent = translator.extractTranslatableContent(contentListJson);
|
||
addLog(`提取了 ${translatableContent.length} 个片段`);
|
||
|
||
// 分批
|
||
const batches = translator.splitIntoBatches(translatableContent);
|
||
addLog(`分为 ${batches.length} 个批次`);
|
||
|
||
// 获取目标语言
|
||
const targetLang = settings.targetLanguage === 'custom'
|
||
? (settings.customTargetLanguageName || 'Chinese')
|
||
: (settings.targetLanguage || 'Chinese');
|
||
|
||
// 翻译选项 - 通过后端代理,不需要 API Key
|
||
const translationOptions = {
|
||
useBackendProxy: true,
|
||
proxyBase: PROXY_BASE,
|
||
provider: modelName === 'custom' ? 'aliyun' : modelName // 默认使用 aliyun/通义
|
||
};
|
||
|
||
// 执行翻译
|
||
addLog(`开始翻译 (模型: ${modelName}, 目标语言: ${targetLang}, 通过后端代理)...`);
|
||
|
||
const translatedContentList = await translator.translateBatches(
|
||
batches,
|
||
targetLang,
|
||
modelName,
|
||
null, // API Key 为 null,由后端代理处理
|
||
{
|
||
...translationOptions,
|
||
maxRetries: settings.structuredMaxRetries || 2,
|
||
retryDelay: settings.structuredRetryDelayMs || 800
|
||
},
|
||
onProgress,
|
||
() => Promise.resolve(), // acquireSlot
|
||
() => {} // releaseSlot
|
||
);
|
||
|
||
addLog('翻译完成,保存数据...');
|
||
|
||
// 保存到 metadata
|
||
if (!dataObj.metadata) dataObj.metadata = {};
|
||
dataObj.metadata.translatedContentList = translatedContentList;
|
||
dataObj.metadata.supportsStructuredTranslation = true;
|
||
|
||
// 收集失败项
|
||
const failedItems = [];
|
||
translatedContentList.forEach((it, idx) => {
|
||
if (it && it.failed === true) {
|
||
failedItems.push({
|
||
index: idx,
|
||
type: it.type,
|
||
page_idx: it.page_idx || 0,
|
||
text: translator.extractItemText ? translator.extractItemText(it) : (it.text || '')
|
||
});
|
||
}
|
||
});
|
||
dataObj.metadata.failedStructuredItems = failedItems;
|
||
dataObj.metadata.structuredFailedCount = failedItems.length;
|
||
|
||
// 更新全局数据
|
||
if (typeof data !== 'undefined') {
|
||
data.metadata = dataObj.metadata;
|
||
}
|
||
window.data = dataObj;
|
||
|
||
// 保存到数据库
|
||
if (typeof saveResultToDB === 'function') {
|
||
await saveResultToDB(dataObj);
|
||
addLog('数据已保存到数据库');
|
||
}
|
||
|
||
if (failedItems.length > 0) {
|
||
addLog(`注意: 有 ${failedItems.length} 个片段翻译失败`);
|
||
}
|
||
|
||
addLog('正在刷新界面...');
|
||
|
||
// 重置渲染锁,确保后续 showTab 可以执行
|
||
if (typeof renderingTab !== 'undefined') renderingTab = null;
|
||
|
||
// 刷新界面
|
||
if (typeof renderDetail === 'function') {
|
||
await renderDetail(); // await async 函数
|
||
}
|
||
|
||
// 延迟后显示 PDF 对照视图
|
||
setTimeout(() => {
|
||
if (typeof showTabImmediate === 'function') {
|
||
showTabImmediate('pdf-compare');
|
||
} else if (typeof showTab === 'function') {
|
||
showTab('pdf-compare');
|
||
}
|
||
}, 500);
|
||
|
||
} catch (error) {
|
||
console.error(`${logPrefix} 翻译失败:`, error);
|
||
addLog(`错误: ${error.message}`);
|
||
|
||
// 显示错误
|
||
tabContent.innerHTML = `
|
||
<div class="error-box" style="padding: 24px; text-align: center;">
|
||
<i class="fa fa-exclamation-triangle" style="font-size: 48px; color: #ef4444; margin-bottom: 16px;"></i>
|
||
<h3 style="margin: 0 0 12px 0; color: #991b1b;">翻译失败</h3>
|
||
<p style="margin: 0 0 16px 0; color: #6b7280;">${error.message}</p>
|
||
<button onclick="showTab('ocr')" style="
|
||
padding: 10px 20px;
|
||
background: #3b82f6;
|
||
color: white;
|
||
border: none;
|
||
border-radius: 8px;
|
||
cursor: pointer;
|
||
">返回 OCR 内容</button>
|
||
</div>
|
||
`;
|
||
|
||
if (typeof renderingTab !== 'undefined') renderingTab = null;
|
||
if (typeof console.timeEnd === 'function') console.timeEnd('[性能] showTab_总渲染');
|
||
}
|
||
}
|
||
|
||
// 绑定 tab 按钮点击事件
|
||
document.addEventListener('DOMContentLoaded', function() {
|
||
if (document.getElementById('tab-ocr')) {
|
||
document.getElementById('tab-ocr').onclick = handleOcrTabClick;
|
||
}
|
||
if (document.getElementById('tab-translation')) {
|
||
document.getElementById('tab-translation').onclick = handleTranslationTabClick;
|
||
}
|
||
if (document.getElementById('tab-chunk-compare')) {
|
||
document.getElementById('tab-chunk-compare').onclick = function() { showTab('chunk-compare'); };
|
||
}
|
||
if (document.getElementById('tab-pdf-compare')) {
|
||
document.getElementById('tab-pdf-compare').onclick = function() { showTab('pdf-compare'); };
|
||
}
|
||
if (document.getElementById('tab-original-file')) {
|
||
document.getElementById('tab-original-file').onclick = function() { showTab('original-file'); };
|
||
}
|
||
|
||
// 页面加载后渲染详情
|
||
if (typeof renderDetail === 'function') {
|
||
renderDetail();
|
||
}
|
||
});
|