feat: pdf数据已经可以正常显示

This commit is contained in:
肖应宇 2026-03-25 09:51:26 +08:00
parent ce324d8da3
commit 17d677923a
10 changed files with 284 additions and 83 deletions

View File

@ -2255,8 +2255,14 @@ async function handleReadClick() {
});
// 保存到数据库
let savedDoc = record;
if (typeof window.storageAdapter !== 'undefined' && typeof window.storageAdapter.saveResultToDB === 'function') {
await window.storageAdapter.saveResultToDB(record);
const result = await window.storageAdapter.saveResultToDB(record);
// 后端模式:使用返回的 UUID
if (result && result.id) {
savedDoc = result;
console.log('[仅阅读] 后端返回的 UUID:', result.id);
}
} else if (typeof saveResultToDB === 'function') {
await saveResultToDB(record);
} else {
@ -2269,8 +2275,8 @@ async function handleReadClick() {
console.log('[仅阅读] 数据已保存,准备跳转...');
// 跳转到历史详情页面
window.location.href = `views/history/history_detail.html?id=${encodeURIComponent(recordId)}`;
// 跳转到历史详情页面(使用保存后的 ID
window.location.href = `views/history/history_detail.html?id=${encodeURIComponent(savedDoc.id)}`;
} catch (error) {
console.error('保存文件到历史记录失败:', error);
showNotification(`保存失败: ${error.message}`, 'error');

View File

@ -49,6 +49,13 @@
return;
}
// 检查 ID 是否是 UUID 格式(后端要求)
const isUUID = docId && /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(docId);
if (!isUUID) {
console.warn('[saveSingleMessage] Non-UUID document ID, skipping backend save:', docId?.substring(0, 30));
return;
}
try {
await window.storageAdapter.saveChatMessage(docId, {
role: message.role,
@ -69,11 +76,17 @@
try {
let history = [];
if (isBackendMode()) {
// 后端模式:从 API 加载
// 检查 ID 是否是 UUID 格式(后端要求)
const isUUID = docId && /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(docId);
if (isBackendMode() && isUUID) {
// 后端模式:从 API 加载(只有 UUID 格式的 ID 才能调用后端)
history = await window.storageAdapter.loadChatHistory(docId);
} else {
// 前端模式localStorage
// 前端模式或非 UUID ID使用 localStorage
if (!isUUID && isBackendMode()) {
console.warn('[loadChatHistory] Non-UUID document ID, using localStorage fallback:', docId?.substring(0, 30));
}
const raw = localStorage.getItem('chatHistory_' + docId);
if (raw) {
history = JSON.parse(raw);

View File

@ -81,14 +81,19 @@ function splitContentSmart(content, maxChunk = 8192) {
}
/**
* 生成当前文档的唯一 ID
* ID 用于区分不同文档的聊天上下文或相关数据存储 (如思维导图数据)
* ID 的生成基于文档名称图片数量OCR 文本长度和翻译文本长度的组合
* 以期在实际使用中具有足够的唯一性
* 获取当前文档的唯一 ID
* 优先使用文档的 id 字段后端返回的 UUID
* 如果没有则基于文档名称图片数量OCR 文本长度和翻译文本长度生成唯一性 ID
*
* @returns {string} 当前文档的唯一 ID
*/
function getCurrentDocId() {
// 优先使用文档的 id 字段(后端 UUID
if (window.data && window.data.id) {
return window.data.id;
}
// 降级:基于内容生成 ID前端模式或旧数据
const doc = getCurrentDocContent();
// 用文件名+图片数量+ocr长度+translation长度做唯一性可根据实际情况调整
return `${doc.name || 'unknown'}_${(doc.images||[]).length}_${(doc.ocr||'').length}_${(doc.translation||'').length}`;

View File

@ -61,14 +61,16 @@ async function renderDetail() {
// 标签按钮原始文件、OCR、仅翻译、分块对比、PDF对照现在始终可见
// 仅保留数据检测用于默认标签选择
const hasMinerUStructuredData =
data.metadata &&
data.metadata.originalPdfBase64 &&
data.metadata.contentListJson &&
data.metadata.translatedContentList &&
data.metadata.supportsStructuredTranslation === true;
// 兼容嵌套的 metadata 结构
const meta = data?.metadata?.metadata || data?.metadata || {};
const hasOriginalPdf = !!(data.metadata && data.metadata.originalPdfBase64);
const hasMinerUStructuredData =
meta.originalPdfBase64 &&
meta.contentListJson &&
meta.translatedContentList &&
meta.supportsStructuredTranslation === true;
const hasOriginalPdf = !!(meta.originalPdfBase64);
document.getElementById('fileName').textContent = data.name;
if (fileMetaTimeEl) {

View File

@ -61,6 +61,9 @@ function showTab(tab) {
* showTab 逻辑移至此处
*/
function showTabImmediate(tab) {
// 确保 data 引用 window.data数据由 renderDetail 设置)
const data = window.data;
// 确保 DOM 缓存已初始化
DOM_CACHE.ensureInitialized();
@ -159,10 +162,22 @@ function showTabImmediate(tab) {
let activeContentElement = null; // 用于 applyAnnotationsToContent
const significantTokenTypes = ['paragraph', 'heading', 'code', 'table', 'blockquote', 'list', 'html', 'hr'];
// 数据校验:确保 data 已加载
if (!data) {
console.error('[showTab] window.data is not loaded!');
document.getElementById('tabContent').innerHTML = `
<div style="padding:40px;text-align:center;color:#dc2626;">
<i class="fa fa-exclamation-triangle" style="font-size:48px;margin-bottom:12px;display:block;"></i>
<p>文档数据加载失败请刷新页面重试</p>
</div>`;
renderingTab = null;
return;
}
// ---- 增加日志 ----
// 日志现在可以准确反映 globalCurrentContentIdentifier
console.log(`[showTab - ${tab}] 即将渲染。当前 window.globalCurrentContentIdentifier:`, window.globalCurrentContentIdentifier);
if (data && data.annotations) {
if (data.annotations) {
console.log(`[showTab - ${tab}] data.annotations (长度 ${data.annotations.length}):`, JSON.parse(JSON.stringify(data.annotations)));
} else {
console.log(`[showTab - ${tab}] data.annotations 不可用或为空。`);
@ -201,8 +216,12 @@ function showTabImmediate(tab) {
// ========== 原始 PDF 查看器iframe 嵌入官方 pdf.js viewer ==========
if (DOM_CACHE.tabs.originalFile) DOM_CACHE.tabs.originalFile.classList.add('active');
// 检查数据
if (!data.metadata || !data.metadata.originalPdfBase64) {
// 检查数据(先检查 data 是否存在)
// 兼容嵌套的 metadata 结构data.metadata.originalPdfBase64 或 data.metadata.metadata.originalPdfBase64
const pdfBase64 = data?.metadata?.originalPdfBase64
|| data?.metadata?.metadata?.originalPdfBase64;
if (!data || !pdfBase64) {
document.getElementById('tabContent').innerHTML = `
<div style="padding:40px;text-align:center;color:#666;">
<i class="fa fa-file-pdf-o" style="font-size:48px;margin-bottom:12px;display:block;"></i>
@ -235,14 +254,14 @@ function showTabImmediate(tab) {
(async () => {
try {
// base64 → Uint8Array
const base64 = data.metadata.originalPdfBase64;
const base64 = pdfBase64;
const raw = atob(base64);
const bytes = new Uint8Array(raw.length);
for (let i = 0; i < raw.length; i++) bytes[i] = raw.charCodeAt(i);
// 统一走后端暴露的 /pdfjs 静态资源,避免 Vite 在开发环境下
// 将 PDF.js viewer 的脚本按模块脚本处理,导致 viewer.js 加载报错。
const viewerBase = '/pdfjs/web/viewer.html';
// 统一走后端暴露的 /pdfjs 静态资源,避免 Vite 在开发环境下
// 将 PDF.js viewer 的脚本按模块脚本处理,导致 viewer.js 加载报错。
const viewerBase = '/pdfjs/web/viewer.html';
const iframe = document.getElementById('pdf-viewer-iframe');
const loading = document.getElementById('pdf-viewer-loading');
@ -301,7 +320,9 @@ function showTabImmediate(tab) {
if (DOM_CACHE.tabs.pdfCompare) DOM_CACHE.tabs.pdfCompare.classList.add('active');
// 检查是否有必要的结构化翻译数据
const hasStructuredData = data.metadata && data.metadata.originalPdfBase64 && data.metadata.contentListJson && data.metadata.translatedContentList;
// 兼容嵌套的 metadata 结构
const metaForCompare = data?.metadata?.metadata || data?.metadata || {};
const hasStructuredData = metaForCompare.originalPdfBase64 && metaForCompare.contentListJson && metaForCompare.translatedContentList;
if (!hasStructuredData) {
// 缺少结构化翻译数据,弹出确认对话框询问用户
@ -335,10 +356,10 @@ function showTabImmediate(tab) {
console.log('[PDFCompareView] 开始初始化 PDF 对照视图');
await pdfCompareView.initialize(
data.metadata.originalPdfBase64,
data.metadata.contentListJson,
data.metadata.translatedContentList,
data.metadata.layoutJson // 传入 layoutJson
metaForCompare.originalPdfBase64,
metaForCompare.contentListJson,
metaForCompare.translatedContentList,
metaForCompare.layoutJson // 传入 layoutJson
);
// 为多轮检索生成chunks如果还没有的话
@ -347,8 +368,8 @@ function showTabImmediate(tab) {
if (typeof generateChunksFromContentList === 'function') {
const chunks = generateChunksFromContentList(
data.metadata.contentListJson,
data.metadata.translatedContentList
metaForCompare.contentListJson,
metaForCompare.translatedContentList
);
window.data.ocrChunks = chunks.ocrChunks;
window.data.translatedChunks = chunks.translatedChunks;

View File

@ -370,7 +370,11 @@ const fileType = fileToProcess.name.split('.').pop().toLowerCase();
refRecord.metadata.failedStructuredItems = newFailed;
refRecord.metadata.structuredFailedCount = newFailed.length;
refRecord.time = new Date().toISOString();
await saveResultToDB(refRecord);
if (window.storageAdapter && typeof window.storageAdapter.saveResultToDB === 'function') {
await window.storageAdapter.saveResultToDB(refRecord);
} else {
await saveResultToDB(refRecord);
}
if (typeof addProgressLog === "function") {
addProgressLog(`${logPrefix} 已将 ${translatedSubset.length} 个片段写回历史记录 ${refId},剩余失败 ${newFailed.length}`);
@ -463,7 +467,11 @@ const fileType = fileToProcess.name.split('.').pop().toLowerCase();
});
refRecord.translation = (refRecord.translatedChunks || []).join('\n\n');
refRecord.time = new Date().toISOString();
await saveResultToDB(refRecord);
if (window.storageAdapter && typeof window.storageAdapter.saveResultToDB === 'function') {
await window.storageAdapter.saveResultToDB(refRecord);
} else {
await saveResultToDB(refRecord);
}
if (typeof addProgressLog === "function") addProgressLog(`${logPrefix} 已将 ${translatedPieces.length} 个失败片段写回历史记录 ${refId}`);
// 准备返回对象并跳过后续的常规保存逻辑
@ -1081,44 +1089,56 @@ const fileType = fileToProcess.name.split('.').pop().toLowerCase();
}
}
if (typeof saveResultToDB === "function") {
await saveResultToDB({
id: `${fileToProcess.name}_${fileToProcess.size}`,
name: fileToProcess.name,
size: fileToProcess.size,
time: processedAt,
ocr: currentMarkdownContent,
translation: currentTranslationContent,
images: currentImagesData,
ocrChunks: ocrChunks,
translatedChunks: translatedChunks,
fileType: fileType,
targetLanguage: targetLanguageValue,
relativePath: relativePath,
sourceArchive: sourceArchive,
originalContent: originalEncoding === 'text' ? originalContent : null,
originalEncoding: originalEncoding,
originalBinary: originalEncoding && originalEncoding !== 'text' && originalBinary ? arrayBufferToBase64(originalBinary) : null,
originalExtension: originalExtension,
// 新增模型元信息OCR/翻译)
ocrEngine: usedOcrEngine || ocrEngineForLog || (typeof window !== 'undefined' ? (window.ocrSettingsManager?.getCurrentConfig()?.engine || null) : null),
ocrSource: usedOcrSource || null,
translationModelName: selectedTranslationModelName || 'none',
translationModelCustomName: (selectedTranslationModelName === 'custom' && translationModelConfig && (translationModelConfig.displayName || translationModelConfig.name)) ? (translationModelConfig.displayName || translationModelConfig.name) : null,
translationModelId: (selectedTranslationModelName === 'custom' && translationModelConfig && translationModelConfig.modelId) ? translationModelConfig.modelId : null,
batchId: batchContext ? batchContext.id : null,
batchOrder: batchContext ? batchContext.order : null,
batchTotal: batchContext ? batchContext.total : null,
batchTemplate: batchContext ? batchContext.template : null,
batchFormats: batchContext ? batchContext.formats : null,
batchStartedAt: batchContext ? batchContext.startedAt : null,
batchOutputLanguage: batchContext ? batchContext.outputLanguage : null,
batchOriginalIndex: batchContext ? batchContext.originalIndex : null,
batchAttempt: batchContext ? batchContext.attempt : null,
batchZip: batchContext ? batchContext.zipOutput : null,
// 新增MinerU 结构化翻译元数据
metadata: Object.keys(metadataToSave).length > 0 ? metadataToSave : null
});
// 保存文档记录
const documentData = {
id: `${fileToProcess.name}_${fileToProcess.size}`,
name: fileToProcess.name,
size: fileToProcess.size,
time: processedAt,
ocr: currentMarkdownContent,
translation: currentTranslationContent,
images: currentImagesData,
ocrChunks: ocrChunks,
translatedChunks: translatedChunks,
fileType: fileType,
targetLanguage: targetLanguageValue,
relativePath: relativePath,
sourceArchive: sourceArchive,
originalContent: originalEncoding === 'text' ? originalContent : null,
originalEncoding: originalEncoding,
originalBinary: originalEncoding && originalEncoding !== 'text' && originalBinary ? arrayBufferToBase64(originalBinary) : null,
originalExtension: originalExtension,
// 新增模型元信息OCR/翻译)
ocrEngine: usedOcrEngine || ocrEngineForLog || (typeof window !== 'undefined' ? (window.ocrSettingsManager?.getCurrentConfig()?.engine || null) : null),
ocrSource: usedOcrSource || null,
translationModelName: selectedTranslationModelName || 'none',
translationModelCustomName: (selectedTranslationModelName === 'custom' && translationModelConfig && (translationModelConfig.displayName || translationModelConfig.name)) ? (translationModelConfig.displayName || translationModelConfig.name) : null,
translationModelId: (selectedTranslationModelName === 'custom' && translationModelConfig && translationModelConfig.modelId) ? translationModelConfig.modelId : null,
batchId: batchContext ? batchContext.id : null,
batchOrder: batchContext ? batchContext.order : null,
batchTotal: batchContext ? batchContext.total : null,
batchTemplate: batchContext ? batchContext.template : null,
batchFormats: batchContext ? batchContext.formats : null,
batchStartedAt: batchContext ? batchContext.startedAt : null,
batchOutputLanguage: batchContext ? batchContext.outputLanguage : null,
batchOriginalIndex: batchContext ? batchContext.originalIndex : null,
batchAttempt: batchContext ? batchContext.attempt : null,
batchZip: batchContext ? batchContext.zipOutput : null,
// 新增MinerU 结构化翻译元数据
metadata: Object.keys(metadataToSave).length > 0 ? metadataToSave : null
};
let savedDocId = documentData.id;
if (typeof window !== 'undefined' && window.storageAdapter && typeof window.storageAdapter.saveResultToDB === 'function') {
// 后端模式:使用 storageAdapter
const result = await window.storageAdapter.saveResultToDB(documentData);
if (result && result.id) {
savedDocId = result.id;
console.log(`${logPrefix} 后端返回的文档 UUID:`, savedDocId);
}
} else if (typeof saveResultToDB === "function") {
// 前端模式:使用本地 IndexedDB
await saveResultToDB(documentData);
}
if (typeof onFileSuccess === 'function') {
@ -1126,6 +1146,7 @@ const fileType = fileToProcess.name.split('.').pop().toLowerCase();
}
return {
file: fileToProcess,
id: savedDocId, // 后端 UUID 或本地 ID
markdown: currentMarkdownContent,
translation: currentTranslationContent,
images: currentImagesData,

View File

@ -190,7 +190,26 @@ class BackendStorage {
if (!AuthManager.isAuthenticated()) {
return this._fallbackTo('saveResultToDB', document);
}
await this.fetchAPI('/documents', { method: 'POST', body: JSON.stringify(document) });
// 判断是更新还是创建UUID 格式的 ID 表示更新现有文档
const isUUID = document.id && /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(document.id);
if (isUUID) {
// 更新现有文档
await this.fetchAPI(`/documents/${document.id}`, {
method: 'PUT',
body: JSON.stringify(document)
});
return document;
} else {
// 创建新文档(后端会生成 UUID
const response = await this.fetchAPI('/documents', {
method: 'POST',
body: JSON.stringify(document)
});
// 返回后端生成的完整文档(包含 UUID
return response;
}
} catch (error) {
console.error('Failed to save document to backend, falling back to local:', error);
return this._fallbackTo('saveResultToDB', document);

View File

@ -16,6 +16,16 @@ function isValidUUID(id) {
return /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(id);
}
// 文档 ID 验证(支持 UUID 和旧格式的 filename-based ID
function isValidDocumentId(id) {
if (!id || typeof id !== 'string') return false;
// UUID 格式
if (isValidUUID(id)) return true;
// 旧格式filename_timestamp用于向后兼容或迁移提示
// 不再接受非 UUID 格式
return false;
}
// 获取文档的聊天历史
router.get('/:documentId/history', async (req, res, next) => {
try {
@ -24,7 +34,11 @@ router.get('/:documentId/history', async (req, res, next) => {
// 验证 UUID 格式
if (!isValidUUID(documentId)) {
return res.status(400).json({ error: 'Invalid document ID format' });
console.warn(`[Chat] Invalid document ID format: ${documentId.substring(0, 50)}...`);
return res.status(400).json({
error: 'Invalid document ID format. Expected UUID.',
hint: 'Document ID should be a UUID like "550e8400-e29b-41d4-a716-446655440000". If you see a filename-based ID, the document may have been created before the UUID fix.'
});
}
// 验证和规范化参数

View File

@ -171,23 +171,24 @@ router.post('/', async (req, res, next) => {
'ocrProvider', 'ocrText', 'ocr', 'ocrMetadata', 'ocrEngine', 'ocrSource',
'translationModel', 'translatedText', 'translation', 'translationMetadata',
'translationModelName', 'summary', 'toc', 'processingTime', 'errorMessage',
'id', 'userId', 'createdAt', 'updatedAt'
'id', 'userId', 'createdAt', 'updatedAt',
'metadata' // 添加 metadata 到已知字段,避免嵌套保存
];
for (const [key, value] of Object.entries(body)) {
if (!knownFields.includes(key) && value !== undefined) {
metadataFields[key] = value;
}
}
// 合并原有 metadata
// 合并原有 metadata 的内容(而不是保存 metadata 字段本身)
if (body.metadata && typeof body.metadata === 'object') {
Object.assign(metadataFields, body.metadata);
}
schemaFields.metadata = Object.keys(metadataFields).length > 0 ? metadataFields : body.metadata;
schemaFields.metadata = Object.keys(metadataFields).length > 0 ? metadataFields : null;
// 移除 undefined 字段
// 移除 undefined 字段,并排除前端传入的 id使用数据库自动生成的 UUID
const cleanData = {};
for (const [key, value] of Object.entries(schemaFields)) {
if (value !== undefined) {
if (value !== undefined && key !== 'id') {
cleanData[key] = value;
}
}
@ -216,13 +217,80 @@ router.post('/', async (req, res, next) => {
router.put('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const body = { ...req.body };
// 字段映射:前端字段名 -> 数据库字段名
const fieldMapping = {
name: 'fileName',
size: 'fileSize',
ocr: 'ocrText',
translation: 'translatedText',
ocrEngine: 'ocrProvider',
translationModelName: 'translationModel'
};
// 应用字段映射
for (const [frontendField, dbField] of Object.entries(fieldMapping)) {
if (body[frontendField] !== undefined && body[dbField] === undefined) {
body[dbField] = body[frontendField];
}
}
// 提取已知字段,其余放入 metadata
const knownFields = [
'fileName', 'fileSize', 'fileType', 'filePath', 'status',
'ocrProvider', 'ocrText', 'ocrMetadata',
'translationModel', 'translatedText', 'translationMetadata',
'summary', 'toc', 'processingTime', 'errorMessage'
];
const updateData = {};
const metadataUpdate = {};
for (const [key, value] of Object.entries(body)) {
if (key === 'id' || key === 'userId' || key === 'createdAt' || key === 'updatedAt') {
continue; // 跳过只读字段
}
if (knownFields.includes(key)) {
updateData[key] = value;
} else if (key === 'metadata') {
// 合并 metadata
metadataUpdate['metadata'] = value;
} else if (!key.startsWith('_')) {
// 其他字段放入 metadata排除内部字段
metadataUpdate[key] = value;
}
}
// 如果有需要放入 metadata 的字段,需要先读取现有 metadata 再合并
if (Object.keys(metadataUpdate).length > 0) {
const existingDoc = await prisma.document.findFirst({
where: { id, userId: req.user.id },
select: { metadata: true }
});
if (existingDoc) {
const existingMeta = existingDoc.metadata || {};
const newMeta = { ...existingMeta };
for (const [key, value] of Object.entries(metadataUpdate)) {
if (key === 'metadata' && typeof value === 'object') {
Object.assign(newMeta, value);
} else {
newMeta[key] = value;
}
}
updateData.metadata = newMeta;
}
}
await prisma.document.updateMany({
where: {
id,
userId: req.user.id
},
data: req.body
data: updateData
});
res.json({ success: true });

View File

@ -1,4 +1,6 @@
import { resolve } from 'path';
import { createReadStream, existsSync } from 'fs';
import { join } from 'path';
// 可选的前端构建配置:不改变现有直出模式
// - 输入:根目录 index.html 与 admin/index.html
@ -16,10 +18,6 @@ export default {
target: 'http://localhost:3456',
changeOrigin: true
},
'/pdfjs': {
target: 'http://localhost:3456',
changeOrigin: true
},
'/mineru': {
target: 'http://localhost:3456',
changeOrigin: true
@ -34,6 +32,40 @@ export default {
optimizeDeps: {
exclude: ['pdfjs-dist']
},
// 开发服务器中间件:服务 pdfjs 静态文件
plugins: [{
name: 'serve-pdfjs',
configureServer(server) {
server.middlewares.use((req, res, next) => {
if (req.url && req.url.startsWith('/pdfjs/')) {
const filePath = join(process.cwd(), req.url);
if (existsSync(filePath)) {
// 设置正确的 MIME 类型
const ext = filePath.split('.').pop()?.toLowerCase();
const mimeTypes = {
'html': 'text/html',
'js': 'application/javascript',
'css': 'text/css',
'json': 'application/json',
'png': 'image/png',
'jpg': 'image/jpeg',
'gif': 'image/gif',
'svg': 'image/svg+xml',
'woff': 'font/woff',
'woff2': 'font/woff2',
'ttf': 'font/ttf',
'pdf': 'application/pdf'
};
const contentType = mimeTypes[ext] || 'application/octet-stream';
res.setHeader('Content-Type', contentType);
createReadStream(filePath).pipe(res);
return;
}
}
next();
});
}
}],
build: {
outDir: 'dist',
emptyOutDir: true,