From 95eb004c1829ce19137a42c5db195e4e53604a15 Mon Sep 17 00:00:00 2001 From: MT-Mint <798521692@qq.com> Date: Wed, 25 Mar 2026 10:47:51 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20pdf=E5=AF=B9=E7=85=A7=EF=BC=8Cword?= =?UTF-8?q?=E6=96=87=E6=A1=A3=EF=BC=8C=E7=BF=BB=E8=AF=91=EF=BC=8C=E5=88=86?= =?UTF-8?q?=E5=9D=97=E5=AF=B9=E6=AF=94=E5=8A=9F=E8=83=BD=E9=83=BD=E5=B7=B2?= =?UTF-8?q?=E7=BB=8F=E6=AD=A3=E5=B8=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- js/history/history_detail_render.js | 25 +++++++++++++++++++++++++ js/history/history_detail_scripts.js | 28 +++++++++++++++++----------- js/process/main.js | 17 +++++++++++++---- local-proxy/routes/documents.js | 5 +++-- 4 files changed, 58 insertions(+), 17 deletions(-) diff --git a/js/history/history_detail_render.js b/js/history/history_detail_render.js index 124e1ea..e51836b 100644 --- a/js/history/history_detail_render.js +++ b/js/history/history_detail_render.js @@ -64,6 +64,25 @@ async function renderDetail() { // 兼容嵌套的 metadata 结构 const meta = data?.metadata?.metadata || data?.metadata || {}; + // 调试:打印后端数据结构 + console.log('[renderDetail] 后端数据结构:', { + id: data.id, + name: data.name, + fileType: data.fileType, + hasMetadata: !!data.metadata, + metadataType: typeof data.metadata, + metadataKeys: data.metadata ? Object.keys(data.metadata) : [], + hasNestedMetadata: !!(data.metadata?.metadata), + nestedMetadataKeys: data.metadata?.metadata ? Object.keys(data.metadata.metadata) : [], + metaKeys: Object.keys(meta), + hasOriginalPdfBase64: !!meta.originalPdfBase64, + hasOcrChunks: !!(meta.ocrChunks?.length), + hasTranslatedChunks: !!(meta.translatedChunks?.length), + hasImages: !!(meta.images?.length), + hasContentListJson: !!meta.contentListJson, + hasTranslatedContentList: !!meta.translatedContentList + }); + const hasMinerUStructuredData = meta.originalPdfBase64 && meta.contentListJson && @@ -72,6 +91,12 @@ async function renderDetail() { const hasOriginalPdf = !!(meta.originalPdfBase64); + console.log('[renderDetail] 检测结果:', { + hasMinerUStructuredData, + hasOriginalPdf, + defaultTab: hasOriginalPdf ? 'original-file' : 'ocr' + }); + document.getElementById('fileName').textContent = data.name; if (fileMetaTimeEl) { fileMetaTimeEl.textContent = `时间: ${new Date(data.time).toLocaleString()}`; diff --git a/js/history/history_detail_scripts.js b/js/history/history_detail_scripts.js index bed2d0a..709d463 100644 --- a/js/history/history_detail_scripts.js +++ b/js/history/history_detail_scripts.js @@ -743,7 +743,8 @@ async function triggerReprocessWithMinerU() { settings.defaultUserPromptTemplate || '', settings.useCustomPrompts || false, null, // batchContext - () => {} // onFileSuccess + () => {}, // onFileSuccess + docId // existingDocId - 更新现有文档而非创建新记录 ); if (result.error) { @@ -765,12 +766,7 @@ async function triggerReprocessWithMinerU() { Object.assign(window.data.metadata, result.metadata); } - // 保存到数据库 - if (window.storageAdapter && typeof window.storageAdapter.saveResultToDB === 'function') { - await window.storageAdapter.saveResultToDB(window.data); - } else if (typeof saveResultToDB === 'function') { - await saveResultToDB(window.data); - } + // 注意:processSinglePdf 已经保存到数据库,无需重复保存 showToast('处理完成!正在加载 PDF 对照视图...', 'success'); @@ -951,6 +947,18 @@ async function executeMinerUStructuredTranslation() { addLog('翻译完成,保存数据...'); + // 从 translatedContentList 生成合并的翻译文本 + const translatedTextParts = []; + translatedContentList.forEach((item) => { + if (item && item.text && item.type === 'text') { + translatedTextParts.push(item.text); + } + }); + const combinedTranslation = translatedTextParts.join('\n\n'); + + // 更新 dataObj 的翻译字段 + dataObj.translation = combinedTranslation; + // 保存到 metadata if (!dataObj.metadata) dataObj.metadata = {}; dataObj.metadata.translatedContentList = translatedContentList; @@ -971,10 +979,8 @@ async function executeMinerUStructuredTranslation() { dataObj.metadata.failedStructuredItems = failedItems; dataObj.metadata.structuredFailedCount = failedItems.length; - // 更新全局数据 - if (typeof data !== 'undefined') { - data.metadata = dataObj.metadata; - } + // 更新全局数据(dataObj 是 window.data 的引用,修改即生效) + // 注意:不再尝试更新可能为 null 的全局 data 变量 window.data = dataObj; // 保存到数据库 diff --git a/js/process/main.js b/js/process/main.js index a35d8d9..1fbc8fa 100644 --- a/js/process/main.js +++ b/js/process/main.js @@ -112,7 +112,8 @@ async function processSinglePdf( defaultUserPromptTemplateSetting, useCustomPromptsSetting, // 新增参数 batchContext, - onFileSuccess + onFileSuccess, + existingDocId // 新增:现有文档ID,用于更新而非创建新记录 ) { let currentMarkdownContent = ''; let currentTranslationContent = ''; @@ -823,8 +824,14 @@ const fileType = fileToProcess.name.split('.').pop().toLowerCase(); ); // 5. 保存结果 - // 结构化翻译完成后:不生成常规译文,以免展示译文/分块对比标签 - currentTranslationContent = ''; + // 从 translatedContentList 生成合并的翻译文本用于数据库保存 + const translatedTextParts = []; + (translatedContentList || []).forEach((item) => { + if (item && item.text && item.type === 'text') { + translatedTextParts.push(item.text); + } + }); + currentTranslationContent = translatedTextParts.join('\n\n'); // 将翻译后的 JSON 保存在元数据中供未来使用 if (!ocrResult.metadata.translatedContentList) { @@ -1090,8 +1097,10 @@ const fileType = fileToProcess.name.split('.').pop().toLowerCase(); } // 保存文档记录 + // 如果提供了 existingDocId,则更新现有文档(UUID格式);否则创建新记录 + const docId = existingDocId || `${fileToProcess.name}_${fileToProcess.size}`; const documentData = { - id: `${fileToProcess.name}_${fileToProcess.size}`, + id: docId, name: fileToProcess.name, size: fileToProcess.size, time: processedAt, diff --git a/local-proxy/routes/documents.js b/local-proxy/routes/documents.js index 7fac363..6085cc4 100644 --- a/local-proxy/routes/documents.js +++ b/local-proxy/routes/documents.js @@ -229,10 +229,11 @@ router.put('/:id', async (req, res, next) => { translationModelName: 'translationModel' }; - // 应用字段映射 + // 应用字段映射:复制到数据库字段名,然后删除前端字段名(避免进入 metadata) for (const [frontendField, dbField] of Object.entries(fieldMapping)) { - if (body[frontendField] !== undefined && body[dbField] === undefined) { + if (body[frontendField] !== undefined) { body[dbField] = body[frontendField]; + delete body[frontendField]; // 删除前端字段,避免进入 metadata } }