/** * PDFExporter.js * PDF导出模块 * 负责将翻译内容导出为PDF文件 */ class PDFExporter { constructor(options = {}) { this.options = Object.assign({ fontUrl: 'https://gcore.jsdelivr.net/npm/source-han-sans-cn@1.0.0/SourceHanSansCN-Normal.otf', pdfLibUrl: 'https://gcore.jsdelivr.net/npm/pdf-lib@1.17.1/dist/pdf-lib.min.js', fontkitUrl: 'https://gcore.jsdelivr.net/npm/@pdf-lib/fontkit@1.1.1/dist/fontkit.umd.min.js', bboxNormalizedRange: 1000 }, options); // 库加载状态 this.pdfLibLoaded = false; this.fontkitLoaded = false; } /** * 导出结构化翻译PDF * @param {string} originalPdfBase64 - 原始PDF的Base64编码 * @param {Array} translatedContentList - 翻译内容列表 * @param {Function} showNotification - 通知函数 */ async exportStructuredTranslation(originalPdfBase64, translatedContentList, showNotification = null) { try { // 检查是否有翻译数据 if (!translatedContentList || translatedContentList.length === 0) { if (showNotification) { showNotification('没有翻译内容可导出', 'warning'); } return; } // 检查是否有原始PDF数据 if (!originalPdfBase64) { if (showNotification) { showNotification('原始PDF数据不可用', 'error'); } return; } // 显示进度提示 if (showNotification) { showNotification('正在生成译文PDF,请稍候...', 'info'); } // 动态加载 pdf-lib if (typeof PDFLib === 'undefined') { console.log('[PDFExporter] 正在加载 pdf-lib...'); await this.loadPdfLib(); } const { PDFDocument, rgb } = PDFLib; // 加载原始PDF const pdfBytes = this.base64ToUint8Array(originalPdfBase64); const pdfDoc = await PDFDocument.load(pdfBytes); // 注册 fontkit if (typeof fontkit !== 'undefined') { pdfDoc.registerFontkit(fontkit); console.log('[PDFExporter] fontkit 已注册'); } else { console.warn('[PDFExporter] fontkit 未加载,无法嵌入自定义字体'); } // 加载中文字体 let font = null; try { if (typeof fontkit === 'undefined') { throw new Error('fontkit 未加载,无法嵌入中文字体'); } console.log('[PDFExporter] 正在加载中文字体...'); const fontBytes = await fetch(this.options.fontUrl).then(res => { if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`); return res.arrayBuffer(); }); font = await pdfDoc.embedFont(fontBytes); console.log('[PDFExporter] 中文字体加载成功'); } catch (fontError) { console.error('[PDFExporter] 中文字体加载失败:', fontError); if (showNotification) { showNotification('中文字体加载失败,无法导出PDF: ' + fontError.message, 'error'); } throw fontError; } // ✅ 预处理:计算全局字号限制(与Canvas渲染保持一致) const fontSizeLimits = this.preprocessPdfFontSizes(pdfDoc, font, translatedContentList); // 按页面分组翻译内容 const pageContentMap = new Map(); translatedContentList.forEach((item, idx) => { if (item.type !== 'text' || !item.text || !item.bbox) return; const pageIdx = item.page_idx !== undefined ? item.page_idx : 0; if (!pageContentMap.has(pageIdx)) { pageContentMap.set(pageIdx, []); } pageContentMap.get(pageIdx).push({ ...item, originalIndex: idx }); }); const BBOX_NORMALIZED_RANGE = this.options.bboxNormalizedRange; // 遍历每一页,覆盖翻译文本 for (const [pageIdx, items] of pageContentMap.entries()) { if (pageIdx >= pdfDoc.getPageCount()) continue; const page = pdfDoc.getPage(pageIdx); const { width: pageWidth, height: pageHeight } = page.getSize(); // 计算缩放因子 const scaleX = pageWidth / BBOX_NORMALIZED_RANGE; const scaleY = pageHeight / BBOX_NORMALIZED_RANGE; console.log(`[PDFExporter] 页面 ${pageIdx}: PDF尺寸=${pageWidth.toFixed(2)}x${pageHeight.toFixed(2)}pt, 缩放比例=${scaleX.toFixed(3)}x${scaleY.toFixed(3)}`); // 用白色矩形覆盖原文 items.forEach(item => { const bbox = item.bbox; const x = bbox[0] * scaleX; const y = pageHeight - (bbox[3] * scaleY); const width = (bbox[2] - bbox[0]) * scaleX; const height = (bbox[3] - bbox[1]) * scaleY; page.drawRectangle({ x: x, y: y, width: width, height: height, color: rgb(1, 1, 1), }); }); // 绘制翻译文本 items.forEach(item => { const bbox = item.bbox; const text = item.text || ''; if (!text.trim()) return; // 计算bbox在PDF坐标系中的位置 const x = bbox[0] * scaleX; const boxWidth = (bbox[2] - bbox[0]) * scaleX; const boxHeight = (bbox[3] - bbox[1]) * scaleY; const bboxTop = pageHeight - (bbox[1] * scaleY); const bboxBottom = pageHeight - (bbox[3] * scaleY); // 判断是否为短文本(与TextFittingAdapter保持一致) const isShortText = text.length < 50 || (/\n/.test(text) && text.length < 80); // 使用文本布局算法(应用全局字号限制) const layout = this.calculatePdfTextLayout(font, text, boxWidth, boxHeight, isShortText, fontSizeLimits); const { fontSize, lines, lineHeight } = layout; const paddingTop = 2; const paddingX = 2; const availableHeight = boxHeight - paddingTop * 2; // 计算总高度并垂直居中 const totalHeight = lines.length > 0 ? (lines.length - 1) * lineHeight + fontSize : 0; const yOffset = (availableHeight - totalHeight) / 2; // 绘制每一行(PDF坐标系:Y轴从下往上,所以从顶部开始往下绘制) lines.forEach((line, lineIdx) => { // 从顶部开始,每一行往下偏移 const lineY = bboxTop - paddingTop - yOffset - (lineIdx * lineHeight); if (lineY < bboxBottom || lineY > bboxTop) return; page.drawText(line, { x: x + paddingX, y: lineY, size: fontSize, font: font, color: rgb(0, 0, 0), }); }); }); } // 生成PDF const modifiedPdfBytes = await pdfDoc.save(); // 创建Blob并下载 const blob = new Blob([modifiedPdfBytes], { type: 'application/pdf' }); const timestamp = new Date().toISOString().replace(/[:.]/g, '-').split('T')[0]; const filename = `translated_${timestamp}.pdf`; // 下载文件 if (typeof saveAs === 'function') { saveAs(blob, filename); if (showNotification) { showNotification('译文PDF导出成功!', 'success'); } } else { // 后备方案 const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); if (showNotification) { showNotification('译文PDF导出成功!', 'success'); } } } catch (error) { console.error('[PDFExporter] 导出PDF失败:', error); if (showNotification) { showNotification('导出失败: ' + error.message, 'error'); } } } /** * 预处理PDF字号:计算全局字号限制(与TextFittingAdapter算法一致) * @param {Object} pdfDoc - pdf-lib文档对象 * @param {Object} font - pdf-lib字体对象 * @param {Array} translatedContentList - 翻译内容列表 * @returns {Object} { shortTextLimit, longTextLimit } 字号限制(单位:pt) */ preprocessPdfFontSizes(pdfDoc, font, translatedContentList) { console.log('[PDFExporter] 开始预处理全局字号(计算百分位数限制)...'); const startTime = performance.now(); const BBOX_NORMALIZED_RANGE = this.options.bboxNormalizedRange; const allScales = []; const allBboxHeights = []; // 收集所有段落的最优缩放因子和bbox高度 translatedContentList.forEach((item, idx) => { if (item.type !== 'text' || !item.text || !item.bbox) return; const bbox = item.bbox; const pageIdx = item.page_idx !== undefined ? item.page_idx : 0; if (pageIdx >= pdfDoc.getPageCount()) return; const page = pdfDoc.getPage(pageIdx); const { width: pageWidth, height: pageHeight } = page.getSize(); const scaleX = pageWidth / BBOX_NORMALIZED_RANGE; const scaleY = pageHeight / BBOX_NORMALIZED_RANGE; const boxWidth = (bbox[2] - bbox[0]) * scaleX; const boxHeight = (bbox[3] - bbox[1]) * scaleY; const text = item.text; // 收集bbox高度(用于计算平均值) allBboxHeights.push(boxHeight); // 检测公式和短文本 const hasFormula = /\$\$?[\s\S]*?\$\$?/.test(text); const isShortText = text.length < 50 || (/\n/.test(text) && text.length < 80); // 计算最优缩放(字号/bbox高度) const optimalScale = this._calculateOptimalScaleForPdf(font, text, boxWidth, boxHeight, hasFormula); // 按字符数加权采样 const unitCount = Math.max(1, Math.floor(text.length / 10)); for (let i = 0; i < unitCount; i++) { allScales.push(optimalScale); } }); // 计算百分位数(缩放因子的百分位数,不是字号) const percentile60 = this._calculatePercentile(allScales, 0.60); const percentile80 = this._calculatePercentile(allScales, 0.80); const result = { shortTextLimitScale: percentile80, // 短文本缩放因子上限(80%百分位) longTextLimitScale: percentile60 // 长文本缩放因子上限(60%百分位) }; console.log(`[PDFExporter] 预处理完成: 样本数=${allScales.length}`); console.log(`[PDFExporter] 百分位数: 60%=${percentile60.toFixed(3)}, 80%=${percentile80.toFixed(3)}, 耗时=${(performance.now() - startTime).toFixed(0)}ms`); console.log(`[PDFExporter] 缩放因子限制: 短文本≤${result.shortTextLimitScale.toFixed(3)} (80%分位), 长文本≤${result.longTextLimitScale.toFixed(3)} (60%分位)`); return result; } /** * 计算单个段落的最优缩放因子(PDF版本) * @private */ _calculateOptimalScaleForPdf(font, text, boxWidth, boxHeight, hasFormula = false) { if (hasFormula) return 0.5; // 公式使用保守缩放 const isCJK = /[\u4e00-\u9fa5]/.test(text); const hasNewlines = /\n/.test(text); const textLength = text.length; const initialLineSkip = isCJK ? 1.5 : 1.3; // 迭代尝试不同缩放因子 for (const scale of [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3]) { const fontSize = boxHeight * scale; const estimatedCharWidth = fontSize * (isCJK ? 1.0 : 0.6); const effectiveWidth = boxWidth * 0.9; const charsPerLine = Math.max(1, Math.floor(effectiveWidth / estimatedCharWidth)); const estimatedLines = hasNewlines ? text.split('\n').length : Math.ceil(textLength / charsPerLine); const lineHeight = fontSize * initialLineSkip; const totalHeight = estimatedLines === 1 ? fontSize * 1.2 : (estimatedLines - 1) * lineHeight + fontSize * 1.2; if (totalHeight <= boxHeight) { return scale; // 找到第一个可行的缩放 } } return 0.3; // 最小缩放 } /** * 计算百分位数(线性插值法) * @private */ _calculatePercentile(arr, percentile) { if (arr.length === 0) return 0.85; // 默认值 const sorted = [...arr].sort((a, b) => a - b); const index = percentile * (sorted.length - 1); const lower = Math.floor(index); const upper = Math.ceil(index); const weight = index - lower; if (lower === upper) { return sorted[lower]; } return sorted[lower] * (1 - weight) + sorted[upper] * weight; } /** * 计算PDF文本布局(与Canvas渲染算法一致) * @param {Object} font - pdf-lib字体对象 * @param {string} text - 文本内容 * @param {number} boxWidth - 区域宽度 * @param {number} boxHeight - 区域高度 * @param {boolean} isShortText - 是否为短文本 * @param {Object} fontSizeLimits - 全局字号限制 { shortTextLimit, longTextLimit } * @returns {Object} { fontSize, lines, lineHeight } */ calculatePdfTextLayout(font, text, boxWidth, boxHeight, isShortText = false, fontSizeLimits = null) { // 判断是否为 CJK 语言 const isCJK = /[\u4e00-\u9fa5]/.test(text); // ✅ 使用与Canvas渲染一致的初始行距 const lineSkip = isCJK ? 1.5 : 1.3; // 内边距:对小bbox减少padding避免裁剪 const paddingTop = boxHeight < 20 ? 0.5 : 2; const paddingX = 2; const availableHeight = boxHeight - paddingTop * 2; const availableWidth = boxWidth - paddingX * 2; // 字号范围 const estimatedSingleLineFontSize = boxHeight * 0.8; // 最小字号:动态调整(基于bbox高度) let minFontSize; if (boxHeight < 20) { minFontSize = Math.max(6, boxHeight * 0.35); // 小bbox:最小6px } else { minFontSize = isShortText ? 10 : 8; // 正常bbox:10px/8px } let maxFontSize = Math.min(estimatedSingleLineFontSize * 1.5, boxHeight * 1.2); // ✅ 应用全局缩放因子限制(与Canvas渲染保持一致) if (fontSizeLimits) { const limitScale = isShortText ? fontSizeLimits.shortTextLimitScale : fontSizeLimits.longTextLimitScale; const limitFontSize = boxHeight * limitScale; // 缩放因子 × bbox高度 = 字号上限 maxFontSize = Math.min(maxFontSize, limitFontSize); } const hasNewlines = text.includes('\n'); const textLength = text.length; // 宽度因子 const widthFactors = (textLength < 20 || hasNewlines) ? [1.0] : [1.0, 0.95, 0.90, 0.85, 0.80, 0.75, 0.70]; let bestSolution = null; // 二分查找最大可用字号 for (const widthFactor of widthFactors) { const effectiveWidth = availableWidth * widthFactor; let low = minFontSize; let high = maxFontSize; let foundFontSize = null; let foundLines = null; while (high - low > 0.5) { const mid = (low + high) / 2; const lines = this.wrapTextForPdf(font, text, effectiveWidth, mid); const lineHeight = mid * lineSkip; // 与Canvas渲染保持一致:最后一行使用 mid * 1.2 留出垂直空间 const totalHeight = lines.length === 1 ? mid * 1.2 : (lines.length - 1) * lineHeight + mid * 1.2; if (totalHeight <= availableHeight) { foundFontSize = mid; foundLines = lines; low = mid; } else { high = mid; } } if (foundFontSize && (!bestSolution || foundFontSize > bestSolution.fontSize)) { bestSolution = { fontSize: foundFontSize, widthFactor: widthFactor, lines: foundLines, lineHeight: foundFontSize * lineSkip }; } } // 返回最优解 if (bestSolution) { return bestSolution; } // 后备方案 const fallbackFontSize = minFontSize; const fallbackLineHeight = fallbackFontSize * lineSkip; const allLines = this.wrapTextForPdf(font, text, availableWidth, fallbackFontSize); const maxLines = Math.floor(availableHeight / fallbackLineHeight); const linesToDraw = allLines.slice(0, Math.max(1, maxLines)); return { fontSize: fallbackFontSize, lines: linesToDraw, lineHeight: fallbackLineHeight, widthFactor: 1.0 }; } /** * PDF文本换行(使用pdf-lib字体测量) * @param {Object} font - pdf-lib字体对象 * @param {string} text - 文本内容 * @param {number} maxWidth - 最大宽度 * @param {number} fontSize - 字号 * @returns {Array} 换行后的文本数组 */ wrapTextForPdf(font, text, maxWidth, fontSize) { if (!text) return []; const lines = []; let currentLine = ''; // 按自然断句分段 const segments = text.split(/([。?!,、;:\n])/); for (let segment of segments) { if (!segment) continue; // 标点符号直接加到当前行 if (/^[。?!,、;:]$/.test(segment)) { currentLine += segment; continue; } // 换行符强制换行 if (segment === '\n') { if (currentLine) { lines.push(currentLine); currentLine = ''; } continue; } // 按字符逐个添加 for (let i = 0; i < segment.length; i++) { const char = segment[i]; const testLine = currentLine + char; const width = font.widthOfTextAtSize(testLine, fontSize); if (width > maxWidth && currentLine.length > 0) { lines.push(currentLine); currentLine = char; } else { currentLine = testLine; } } } if (currentLine) { lines.push(currentLine); } return lines.length > 0 ? lines : ['']; } /** * 动态加载 pdf-lib 库和 fontkit */ async loadPdfLib() { // 加载 pdf-lib if (typeof PDFLib === 'undefined') { await new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = this.options.pdfLibUrl; script.onload = () => { console.log('[PDFExporter] pdf-lib 加载成功'); this.pdfLibLoaded = true; resolve(); }; script.onerror = (error) => { console.error('[PDFExporter] pdf-lib 加载失败:', error); reject(new Error('Failed to load pdf-lib library')); }; document.head.appendChild(script); }); } // 加载 fontkit if (typeof fontkit === 'undefined') { await new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = this.options.fontkitUrl; script.onload = () => { console.log('[PDFExporter] fontkit 加载成功'); this.fontkitLoaded = true; resolve(); }; script.onerror = (error) => { console.warn('[PDFExporter] fontkit 加载失败:', error); resolve(); // fontkit失败不阻止流程 }; document.head.appendChild(script); }); } } /** * Base64 转 Uint8Array * @param {string} base64 - Base64编码字符串 * @returns {Uint8Array} 字节数组 */ base64ToUint8Array(base64) { const binaryString = atob(base64); const len = binaryString.length; const bytes = new Uint8Array(len); for (let i = 0; i < len; i++) { bytes[i] = binaryString.charCodeAt(i); } return bytes; } } // 导出模块 if (typeof module !== 'undefined' && module.exports) { module.exports = PDFExporter; }