From fcd490b1f5a851e4c58e178b90edc101ffeeccfe Mon Sep 17 00:00:00 2001
From: MT-Mint <798521692@qq.com>
Date: Wed, 25 Mar 2026 11:11:21 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A6=82=E6=9E=9C=E6=98=AF=E4=B8=AD?=
 =?UTF-8?q?=E6=96=87=E6=96=87=E6=A1=A3=EF=BC=8C=E5=B0=B1=E9=9A=90=E8=97=8F?=
 =?UTF-8?q?pdf=E5=AF=B9=E7=85=A7=E7=BF=BB=E8=AF=91=E5=92=8C=E5=88=86?=
 =?UTF-8?q?=E5=9D=97=E5=AF=B9=E6=AF=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 css/history_detail/02-layout/layout.css |   5 +
 js/history/history_detail_render.js     | 171 ++++++++++++++++++++++++
 2 files changed, 176 insertions(+)
diff --git a/css/history_detail/02-layout/layout.css b/css/history_detail/02-layout/layout.css
index 964aa0e..aa468ef 100644
--- a/css/history_detail/02-layout/layout.css
+++ b/css/history_detail/02-layout/layout.css
@@ -54,6 +54,11 @@ body {
   border-radius: 0; /* 移除圆角，纯下划线风格 */
 }
 
+/* 隐藏的 Tab 按钮（用于中文文档隐藏翻译相关功能） */
+.tab-btn.hidden-tab {
+  display: none !important;
+}
+
 .tab-btn:hover:not(.active) {
   color: var(--slate-700);
   background: transparent; /* 移除悬停背景，保持极简 */
diff --git a/js/history/history_detail_render.js b/js/history/history_detail_render.js
index e51836b..6378c89 100644
--- a/js/history/history_detail_render.js
+++ b/js/history/history_detail_render.js
@@ -1,3 +1,157 @@
+/**
+ * 检测文本是否主要为中文
+ * 通过统计中文字符占比来判断
+ * @param {string} text - 待检测的文本
+ * @returns {boolean} - 如果中文字符占比超过 30% 则认为是中文文档
+ */
+function isChineseDocument(text) {
+  if (!text || typeof text !== 'string') return false;
+
+  // 移除空白字符后进行检测
+  const cleanText = text.replace(/\s+/g, '');
+  if (cleanText.length === 0) return false;
+
+  // 匹配中文字符（包括简体和繁体）
+  const chineseCharRegex = /[\u4e00-\u9fff]/g;
+  const chineseChars = cleanText.match(chineseCharRegex);
+
+  if (!chineseChars) return false;
+
+  // 计算中文字符占比
+  const ratio = chineseChars.length / cleanText.length;
+
+  // 如果中文字符占比超过 30%，认为是中文文档
+  return ratio > 0.3;
+}
+
+/**
+ * 使用 PDF.js 从 PDF base64 数据中提取文本
+ * @param {string} base64Data - PDF 文件的 base64 编码数据
+ * @param {number} maxPages - 最大提取页数（默认提取前3页用于检测）
+ * @returns {Promise<string>} 提取的文本内容
+ */
+async function extractTextFromPdfBase64(base64Data, maxPages = 3) {
+  if (!base64Data || typeof pdfjsLib === 'undefined') {
+    return '';
+  }
+
+  try {
+    // 将 base64 转换为 Uint8Array
+    const binaryString = atob(base64Data);
+    const bytes = new Uint8Array(binaryString.length);
+    for (let i = 0; i < binaryString.length; i++) {
+      bytes[i] = binaryString.charCodeAt(i);
+    }
+
+    // 使用 PDF.js 加载 PDF
+    const loadingTask = pdfjsLib.getDocument({ data: bytes });
+    const pdfDocument = await loadingTask.promise;
+
+    // 提取前几页的文本
+    const totalPages = Math.min(pdfDocument.numPages, maxPages);
+    let extractedText = '';
+
+    for (let pageNum = 1; pageNum <= totalPages; pageNum++) {
+      const page = await pdfDocument.getPage(pageNum);
+      const textContent = await page.getTextContent();
+      const pageText = textContent.items.map(item => item.str).join(' ');
+      extractedText += pageText + ' ';
+    }
+
+    return extractedText;
+  } catch (error) {
+    console.error('[extractTextFromPdfBase64] 提取PDF文本失败:', error);
+    return '';
+  }
+}
+
+/**
+ * 根据文档语言隐藏不需要的 Tab
+ * 中文文档隐藏：PDF对照、仅翻译、分块对比
+ * @param {Object} data - 文档数据对象
+ * @returns {Promise<boolean>} - 是否为中文文档
+ */
+async function hideTabsForChineseDocument(data) {
+  // 获取 OCR 文本用于判断语言
+  // 优先级：contentListJson > ocr > ocrChunks > originalBinary(PDF提取)
+  let textToDetect = '';
+  const meta = data?.metadata?.metadata || data?.metadata || {};
+
+  // 1. 优先从 contentListJson 提取文本（MinerU 结构化数据）
+  if (meta.contentListJson && Array.isArray(meta.contentListJson)) {
+    const textItems = meta.contentListJson
+      .filter(item => item.type === 'text' && item.text)
+      .map(item => item.text);
+    textToDetect = textItems.join(' ');
+    console.log('[hideTabsForChineseDocument] 从 contentListJson 提取文本，共', textItems.length, '个文本块');
+  }
+
+  // 2. 如果没有 contentListJson，尝试 ocr 字段
+  if (!textToDetect && data.ocr && typeof data.ocr === 'string') {
+    textToDetect = data.ocr;
+    console.log('[hideTabsForChineseDocument] 使用 data.ocr 字段');
+  }
+
+  // 3. 尝试 metadata.ocr
+  if (!textToDetect && meta.ocr && typeof meta.ocr === 'string') {
+    textToDetect = meta.ocr;
+    console.log('[hideTabsForChineseDocument] 使用 metadata.ocr 字段');
+  }
+
+  // 4. 尝试 ocrChunks
+  if (!textToDetect && data.ocrChunks && Array.isArray(data.ocrChunks) && data.ocrChunks.length > 0) {
+    textToDetect = data.ocrChunks.map(chunk =>
+      typeof chunk === 'string' ? chunk : (chunk.text || chunk.content || '')
+    ).join(' ');
+    console.log('[hideTabsForChineseDocument] 从 data.ocrChunks 提取文本');
+  }
+
+  // 5. 尝试 metadata.ocrChunks
+  if (!textToDetect && meta.ocrChunks && Array.isArray(meta.ocrChunks) && meta.ocrChunks.length > 0) {
+    textToDetect = meta.ocrChunks.map(chunk =>
+      typeof chunk === 'string' ? chunk : (chunk.text || chunk.content || '')
+    ).join(' ');
+    console.log('[hideTabsForChineseDocument] 从 metadata.ocrChunks 提取文本');
+  }
+
+  // 6. 如果以上都没有，尝试从原始PDF中提取文本
+  if (!textToDetect && meta.originalPdfBase64) {
+    console.log('[hideTabsForChineseDocument] 尝试从原始PDF提取文本进行语言检测...');
+    textToDetect = await extractTextFromPdfBase64(meta.originalPdfBase64, 3);
+  }
+
+  // 检测是否为中文文档
+  const isChinese = isChineseDocument(textToDetect);
+
+  console.log('[hideTabsForChineseDocument] 文档语言检测结果:', {
+    isChinese,
+    textLength: textToDetect.length,
+    textSample: textToDetect.substring(0, 100) + '...'
+  });
+
+  // 需要隐藏的 Tab ID 列表
+  const tabsToHide = [
+    'tab-pdf-compare',   // PDF对照
+    'tab-translation',   // 仅翻译
+    'tab-chunk-compare'  // 分块对比
+  ];
+
+  tabsToHide.forEach(tabId => {
+    const tabElement = document.getElementById(tabId);
+    if (tabElement) {
+      if (isChinese) {
+        tabElement.classList.add('hidden-tab');
+        console.log(`[hideTabsForChineseDocument] 隐藏 Tab: ${tabId}`);
+      } else {
+        tabElement.classList.remove('hidden-tab');
+        console.log(`[hideTabsForChineseDocument] 显示 Tab: ${tabId}`);
+      }
+    }
+  });
+
+  return isChinese;
+}
+
 /**
  * 异步渲染历史详情页面的主函数。
  * - 从 URL 查询参数中获取记录 ID。
@@ -153,6 +307,10 @@ async function renderDetail() {
     console.error("initAnnotationSystem is not defined. Check js/annotation_logic.js");
   }
 
+  // ========== 检测文档语言并隐藏不必要的 Tab ==========
+  const isChinese = await hideTabsForChineseDocument(data);
+  // =============================================
+
   // Determine initial tab, AFTER annotations are loaded
   let initialTab = hasOriginalPdf ? 'original-file' : 'ocr'; // 有原始文件时默认显示原始文件
   if (docIdForLocalStorage) {
@@ -164,6 +322,11 @@ async function renderDetail() {
       !(savedTab !== 'ocr' && (!data.translation || data.translation.trim() === ""))
     ) {
       initialTab = savedTab;
+      // 如果是中文文档，且保存的 Tab 是被隐藏的，则切换到默认 Tab
+      if (isChinese && ['translation', 'chunk-compare', 'pdf-compare'].includes(savedTab)) {
+        console.log(`[renderDetail] 中文文档，切换保存的 Tab 从 ${savedTab} 到 ocr`);
+        initialTab = hasOriginalPdf ? 'original-file' : 'ocr';
+      }
     } else if (hasOriginalPdf) {
       // 如果有原始文件，优先显示原始文件
       initialTab = 'original-file';
@@ -174,6 +337,10 @@ async function renderDetail() {
       data.translation && data.translation.trim() !== ""
     ) {
       initialTab = 'chunk-compare';
+      // 如果是中文文档，不显示分块对比
+      if (isChinese) {
+        initialTab = 'ocr';
+      }
     }
   } else if (hasOriginalPdf) {
     // 如果有原始文件，优先显示原始文件
@@ -185,6 +352,10 @@ async function renderDetail() {
     data.translation && data.translation.trim() !== ""
   ) {
     initialTab = 'chunk-compare';
+    // 如果是中文文档，不显示分块对比
+    if (isChinese) {
+      initialTab = 'ocr';
+    }
   }
 
   // 现在，在批注肯定加载完毕后，才调用 showTab