paper-burner/js/ui/ocr-settings.js

617 lines
25 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ui/ocr-settings.js
// OCR 配置管理模块 - 独立于翻译配置
/**
* OCR 设置管理器
* 负责:
* 1. 加载/保存 OCR 配置(独立于翻译配置)
* 2. 引擎切换逻辑
* 3. 首次使用提示
*/
class OcrSettingsManager {
constructor() {
this.BATCH_SIZE = 10; // MinerU V2 批量翻译的批次大小
// localStorage keys所有以 'ocr' 开头,与翻译配置隔离)
this.keys = {
engine: 'ocrEngine',
// Worker Auth Key (共享)
workerAuthKey: 'ocrWorkerAuthKey',
// Mistral OCR
mistralKeys: 'ocrMistralKeys',
mistralBaseUrl: 'ocrMistralBaseUrl',
// MinerU
mineruToken: 'ocrMinerUToken',
mineruWorkerUrl: 'ocrMinerUWorkerUrl',
mineruTokenMode: 'ocrMinerUTokenMode',
mineruEnableOcr: 'ocrMinerUEnableOcr',
mineruEnableFormula: 'ocrMinerUEnableFormula',
mineruEnableTable: 'ocrMinerUEnableTable',
mineruTranslationMode: 'ocrMinerUTranslationMode', // 新增:翻译模式
// Doc2X
doc2xToken: 'ocrDoc2XToken',
doc2xWorkerUrl: 'ocrDoc2XWorkerUrl',
doc2xTokenMode: 'ocrDoc2XTokenMode',
doc2xFormulaMode: 'ocrDoc2XFormulaMode',
doc2xExportFormat: 'ocrDoc2XExportFormat',
// 首次提示标记
firstTimeTipShown: 'ocrFirstTimeTipShown'
};
// DOM 元素
this.elements = {};
this.init();
}
/**
* 初始化
*/
init() {
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => this.onDOMReady());
} else {
this.onDOMReady();
}
}
/**
* DOM 加载完成后的初始化
*/
onDOMReady() {
this.cacheElements();
this.loadSettings();
// 后端模式:异步加载 settings.ocrConfig 并应用到表单
this._loadFromBackendIfAvailable();
this.bindEvents();
this.showFirstTimeTip();
console.log('[OCR Settings] Initialized');
}
/**
* 缓存 DOM 元素引用
*/
cacheElements() {
// OCR 引擎选择
this.elements.ocrEngine = document.getElementById('ocrEngine');
this.elements.localOcrHint = document.getElementById('localOcrHint');
// Mistral OCR
this.elements.mistralOcrKeys = document.getElementById('mistralOcrKeys');
this.elements.mistralBaseUrl = document.getElementById('mistralBaseUrl');
this.elements.mistralOcrConfig = document.getElementById('mistralOcrConfig');
// MinerU
this.elements.mineruToken = document.getElementById('mineruToken');
this.elements.mineruWorkerUrl = document.getElementById('mineruWorkerUrl');
this.elements.mineruEnableOcr = document.getElementById('mineruEnableOcr');
this.elements.mineruEnableFormula = document.getElementById('mineruEnableFormula');
this.elements.mineruEnableTable = document.getElementById('mineruEnableTable');
this.elements.mineruOcrConfig = document.getElementById('mineruOcrConfig');
this.elements.mineruTranslationModeConfig = document.getElementById('mineruTranslationModeConfig'); // 新增
this.elements.mineruTranslationModeRadios = document.getElementsByName('mineruTranslationMode'); // 新增
// Doc2X
this.elements.doc2xToken = document.getElementById('doc2xToken');
this.elements.doc2xWorkerUrl = document.getElementById('doc2xWorkerUrl');
this.elements.doc2xFormulaMode = document.getElementById('doc2xFormulaMode');
this.elements.doc2xExportFormat = document.getElementById('doc2xExportFormat');
this.elements.doc2xOcrConfig = document.getElementById('doc2xOcrConfig');
}
/**
* 加载所有 OCR 配置
*/
loadSettings() {
try {
// 引擎选择
const engine = localStorage.getItem(this.keys.engine) || 'mineru';
if (this.elements.ocrEngine) {
this.elements.ocrEngine.value = engine;
this.switchEngine(engine); // 显示对应的配置面板
}
// Mistral OCR
if (this.elements.mistralOcrKeys) {
this.elements.mistralOcrKeys.value = localStorage.getItem(this.keys.mistralKeys) || '';
}
if (this.elements.mistralBaseUrl) {
this.elements.mistralBaseUrl.value = localStorage.getItem(this.keys.mistralBaseUrl) || 'https://api.mistral.ai';
}
// MinerU
if (this.elements.mineruToken) {
this.elements.mineruToken.value = localStorage.getItem(this.keys.mineruToken) || '';
}
if (this.elements.mineruWorkerUrl) {
const defaultUrl = (typeof window !== 'undefined' && window.ProxyConfig) ? window.ProxyConfig.getProxyUrl() : '/api';
this.elements.mineruWorkerUrl.value = localStorage.getItem(this.keys.mineruWorkerUrl) || defaultUrl;
}
if (this.elements.mineruEnableOcr) {
this.elements.mineruEnableOcr.checked = localStorage.getItem(this.keys.mineruEnableOcr) !== 'false';
}
if (this.elements.mineruEnableFormula) {
this.elements.mineruEnableFormula.checked = localStorage.getItem(this.keys.mineruEnableFormula) !== 'false';
}
if (this.elements.mineruEnableTable) {
this.elements.mineruEnableTable.checked = localStorage.getItem(this.keys.mineruEnableTable) !== 'false';
}
// 加载翻译模式
if (this.elements.mineruTranslationModeRadios.length > 0) {
const savedMode = localStorage.getItem(this.keys.mineruTranslationMode) || 'standard';
Array.from(this.elements.mineruTranslationModeRadios).forEach(radio => {
if (radio.value === savedMode) {
radio.checked = true;
}
});
}
// Doc2X
if (this.elements.doc2xToken) {
this.elements.doc2xToken.value = localStorage.getItem(this.keys.doc2xToken) || '';
}
if (this.elements.doc2xWorkerUrl) {
const defaultUrl = (typeof window !== 'undefined' && window.ProxyConfig) ? window.ProxyConfig.getProxyUrl() : '/api';
this.elements.doc2xWorkerUrl.value = localStorage.getItem(this.keys.doc2xWorkerUrl) || defaultUrl;
}
if (this.elements.doc2xFormulaMode) {
this.elements.doc2xFormulaMode.value = localStorage.getItem(this.keys.doc2xFormulaMode) || 'dollar';
}
if (this.elements.doc2xExportFormat) {
this.elements.doc2xExportFormat.value = localStorage.getItem(this.keys.doc2xExportFormat) || '';
}
console.log('[OCR Settings] Settings loaded');
} catch (error) {
console.error('[OCR Settings] Failed to load settings:', error);
}
}
/**
* 如处于后端模式,则从后端 settings.ocrConfig 拉取配置并应用到表单,同时更新本地缓存以保持其它模块兼容。
*/
async _loadFromBackendIfAvailable() {
try {
if (typeof window === 'undefined' || !window.storageAdapter || window.storageAdapter.isFrontendMode !== false) return;
const settings = await window.storageAdapter.loadSettings();
const cfg = (settings && settings.ocrConfig && typeof settings.ocrConfig === 'object') ? settings.ocrConfig : null;
if (!cfg || !cfg.engine) return;
// 将后端配置应用到 UI
this._applyOcrConfigToDom(cfg);
// 同步到 localStorage 以兼容其它读取路径(例如 ui_model_ocr_config.js 等)
this._mirrorOcrConfigToLocalStorage(cfg);
console.log('[OCR Settings] Loaded ocrConfig from backend and applied.');
} catch (e) {
console.warn('[OCR Settings] Failed to load ocrConfig from backend (ignored):', e?.message || e);
}
}
/**
* 将后端 ocrConfig 应用到现有表单控件
*/
_applyOcrConfigToDom(cfg) {
try {
if (!cfg || !cfg.engine) return;
if (this.elements.ocrEngine) {
this.elements.ocrEngine.value = cfg.engine;
}
this.switchEngine(cfg.engine);
const setVal = (el, val) => { if (el && val !== undefined && val !== null) el.value = String(val); };
const setChk = (el, val) => { if (el) el.checked = !!val; };
if (cfg.engine === 'mistral') {
setVal(this.elements.mistralBaseUrl, cfg.baseUrl || 'https://api.mistral.ai');
if (Array.isArray(cfg.keys)) {
setVal(this.elements.mistralOcrKeys, cfg.keys.join('\n'));
}
} else if (cfg.engine === 'mineru') {
setVal(this.elements.mineruWorkerUrl, (cfg.workerUrl || '').replace(/\/+$/, ''));
setVal(this.elements.mineruToken, cfg.token || '');
setVal(document.getElementById('mineruAuthKey') || null, cfg.authKey || '');
setChk(this.elements.mineruEnableOcr, cfg.enableOcr !== false);
setChk(this.elements.mineruEnableFormula, cfg.enableFormula !== false);
setChk(this.elements.mineruEnableTable, cfg.enableTable !== false);
if (this.elements.mineruTranslationModeRadios && cfg.translationMode) {
Array.from(this.elements.mineruTranslationModeRadios).forEach(r => { r.checked = (r.value === cfg.translationMode); });
}
} else if (cfg.engine === 'doc2x') {
setVal(this.elements.doc2xWorkerUrl, (cfg.workerUrl || '').replace(/\/+$/, ''));
setVal(this.elements.doc2xToken, cfg.token || '');
setVal(this.elements.doc2xFormulaMode, cfg.formulaMode || 'dollar');
setVal(this.elements.doc2xExportFormat, cfg.exportFormat || '');
}
} catch (_) { /* ignore */ }
}
/**
* 将后端 ocrConfig 写入本地存储键(用于兼容仍从 localStorage 读取的模块)
*/
_mirrorOcrConfigToLocalStorage(cfg) {
try {
if (!cfg || !cfg.engine) return;
localStorage.setItem(this.keys.engine, cfg.engine);
if (cfg.engine === 'mistral') {
if (Array.isArray(cfg.keys)) localStorage.setItem(this.keys.mistralKeys, cfg.keys.join('\n'));
localStorage.setItem(this.keys.mistralBaseUrl, cfg.baseUrl || 'https://api.mistral.ai');
} else if (cfg.engine === 'mineru') {
localStorage.setItem(this.keys.mineruWorkerUrl, (cfg.workerUrl || '').replace(/\/+$/, ''));
localStorage.setItem(this.keys.mineruToken, cfg.token || '');
localStorage.setItem(this.keys.workerAuthKey, cfg.authKey || '');
localStorage.setItem(this.keys.mineruTokenMode, cfg.tokenMode || 'backend'); // 默认后端转发模式
localStorage.setItem(this.keys.mineruEnableOcr, cfg.enableOcr !== false);
localStorage.setItem(this.keys.mineruEnableFormula, cfg.enableFormula !== false);
localStorage.setItem(this.keys.mineruEnableTable, cfg.enableTable !== false);
localStorage.setItem(this.keys.mineruTranslationMode, cfg.translationMode || 'standard');
} else if (cfg.engine === 'doc2x') {
localStorage.setItem(this.keys.doc2xWorkerUrl, (cfg.workerUrl || '').replace(/\/+$/, ''));
localStorage.setItem(this.keys.doc2xToken, cfg.token || '');
localStorage.setItem(this.keys.workerAuthKey, cfg.authKey || '');
localStorage.setItem(this.keys.doc2xTokenMode, cfg.tokenMode || 'backend'); // 默认后端转发模式
localStorage.setItem(this.keys.doc2xFormulaMode, cfg.formulaMode || 'dollar');
localStorage.setItem(this.keys.doc2xExportFormat, cfg.exportFormat || '');
}
} catch (_) { /* ignore */ }
}
/**
* 保存所有 OCR 配置
*/
async saveSettings() {
try {
// 引擎选择
if (this.elements.ocrEngine) {
localStorage.setItem(this.keys.engine, this.elements.ocrEngine.value);
}
// Mistral OCR
if (this.elements.mistralOcrKeys) {
localStorage.setItem(this.keys.mistralKeys, this.elements.mistralOcrKeys.value);
}
if (this.elements.mistralBaseUrl) {
localStorage.setItem(this.keys.mistralBaseUrl, this.elements.mistralBaseUrl.value);
}
// MinerU
if (this.elements.mineruToken) {
localStorage.setItem(this.keys.mineruToken, this.elements.mineruToken.value);
}
if (this.elements.mineruWorkerUrl) {
localStorage.setItem(this.keys.mineruWorkerUrl, this.elements.mineruWorkerUrl.value);
}
if (this.elements.mineruEnableOcr) {
localStorage.setItem(this.keys.mineruEnableOcr, this.elements.mineruEnableOcr.checked);
}
if (this.elements.mineruEnableFormula) {
localStorage.setItem(this.keys.mineruEnableFormula, this.elements.mineruEnableFormula.checked);
}
if (this.elements.mineruEnableTable) {
localStorage.setItem(this.keys.mineruEnableTable, this.elements.mineruEnableTable.checked);
}
// 保存翻译模式
if (this.elements.mineruTranslationModeRadios.length > 0) {
const checkedRadio = Array.from(this.elements.mineruTranslationModeRadios).find(r => r.checked);
if (checkedRadio) {
localStorage.setItem(this.keys.mineruTranslationMode, checkedRadio.value);
}
}
// Doc2X
if (this.elements.doc2xToken) {
localStorage.setItem(this.keys.doc2xToken, this.elements.doc2xToken.value);
}
if (this.elements.doc2xWorkerUrl) {
localStorage.setItem(this.keys.doc2xWorkerUrl, this.elements.doc2xWorkerUrl.value);
}
if (this.elements.doc2xFormulaMode) {
localStorage.setItem(this.keys.doc2xFormulaMode, this.elements.doc2xFormulaMode.value);
}
if (this.elements.doc2xExportFormat) {
localStorage.setItem(this.keys.doc2xExportFormat, this.elements.doc2xExportFormat.value);
}
console.log('[OCR Settings] Settings saved');
// 后端模式:写入 settings.ocrConfig
try {
if (typeof window !== 'undefined' && window.storageAdapter && window.storageAdapter.isFrontendMode === false) {
const settings = await window.storageAdapter.loadSettings();
const cfg = this.getCurrentConfig();
const merged = { ...(settings || {}), ocrConfig: cfg };
await window.storageAdapter.saveSettings(merged);
console.log('[OCR Settings] ocrConfig persisted to backend');
}
} catch (be) {
console.warn('[OCR Settings] Persist ocrConfig to backend failed (ignored):', be?.message || be);
}
} catch (error) {
console.error('[OCR Settings] Failed to save settings:', error);
}
}
/**
* 绑定事件监听器
*/
bindEvents() {
// 引擎切换
if (this.elements.ocrEngine) {
this.elements.ocrEngine.addEventListener('change', (e) => {
this.switchEngine(e.target.value);
this.saveSettings();
});
}
// MinerU 翻译模式改变时自动保存
if (this.elements.mineruTranslationModeRadios.length > 0) {
Array.from(this.elements.mineruTranslationModeRadios).forEach(radio => {
radio.addEventListener('change', () => this.saveSettings());
});
}
// 所有输入框自动保存
const inputIds = [
'mistralOcrKeys', 'mistralBaseUrl',
'mineruToken', 'mineruWorkerUrl',
'mineruEnableOcr', 'mineruEnableFormula', 'mineruEnableTable',
'doc2xToken', 'doc2xWorkerUrl',
'doc2xFormulaMode', 'doc2xExportFormat'
];
inputIds.forEach(id => {
const el = this.elements[id];
if (el) {
el.addEventListener('change', () => this.saveSettings());
// 对于 textarea 和 text input也监听 input 事件(实时保存)
if (el.tagName === 'TEXTAREA' || el.type === 'text' || el.type === 'password') {
el.addEventListener('input', this.debounce(() => this.saveSettings(), 500));
}
}
});
console.log('[OCR Settings] Events bound');
}
/**
* 切换 OCR 引擎(显示/隐藏对应配置面板)
* @param {string} engine - 引擎名称: 'none' | 'local' | 'mistral' | 'mineru' | 'doc2x'
*/
switchEngine(engine) {
// 隐藏所有配置面板
if (this.elements.mistralOcrConfig) {
this.elements.mistralOcrConfig.classList.add('hidden');
}
if (this.elements.mineruOcrConfig) {
this.elements.mineruOcrConfig.classList.add('hidden');
}
if (this.elements.doc2xOcrConfig) {
this.elements.doc2xOcrConfig.classList.add('hidden');
}
// 隐藏 MinerU 翻译模式配置
if (this.elements.mineruTranslationModeConfig) {
this.elements.mineruTranslationModeConfig.classList.add('hidden');
}
// 隐藏本地解析提示
if (this.elements.localOcrHint) {
this.elements.localOcrHint.classList.add('hidden');
}
// 显示选中的配置面板
switch (engine) {
case 'none':
// 不需要 OCR不显示任何配置面板
break;
case 'local':
// 显示本地解析提示
if (this.elements.localOcrHint) {
this.elements.localOcrHint.classList.remove('hidden');
}
break;
case 'mistral':
if (this.elements.mistralOcrConfig) {
this.elements.mistralOcrConfig.classList.remove('hidden');
}
break;
case 'mineru':
if (this.elements.mineruOcrConfig) {
this.elements.mineruOcrConfig.classList.remove('hidden');
}
// 显示 MinerU 翻译模式配置
if (this.elements.mineruTranslationModeConfig) {
this.elements.mineruTranslationModeConfig.classList.remove('hidden');
}
break;
case 'doc2x':
if (this.elements.doc2xOcrConfig) {
this.elements.doc2xOcrConfig.classList.remove('hidden');
}
break;
}
console.log(`[OCR Settings] Switched to ${engine}`);
}
/**
* 首次使用提示(可选功能)
* 检测是否已有翻译用的 Mistral Keys提示用户是否复制到 OCR 配置
*/
showFirstTimeTip() {
try {
const ocrKeys = localStorage.getItem(this.keys.mistralKeys);
const translationKeys = localStorage.getItem('mistralApiKeys'); // 翻译用的 Keys
const tipShown = localStorage.getItem(this.keys.firstTimeTipShown);
// 条件OCR 未配置 + 翻译已配置 + 提示未显示过
if (!ocrKeys && translationKeys && !tipShown) {
const message =
'检测到您已配置 Mistral 翻译 API Keys。\n\n' +
'提示OCR 功能使用独立的 API Key 配置。\n' +
'是否将翻译配置复制到 OCR 配置中作为初始值?\n\n' +
'(您可以稍后在设置中单独修改)';
if (confirm(message)) {
localStorage.setItem(this.keys.mistralKeys, translationKeys);
if (this.elements.mistralOcrKeys) {
this.elements.mistralOcrKeys.value = translationKeys;
}
console.log('[OCR Settings] Copied translation keys to OCR config');
}
// 标记提示已显示
localStorage.setItem(this.keys.firstTimeTipShown, 'true');
}
} catch (error) {
console.error('[OCR Settings] Failed to show first time tip:', error);
}
}
/**
* 获取当前选择的 OCR 引擎配置
* @returns {Object} 配置对象
*/
getCurrentConfig() {
const engine = localStorage.getItem(this.keys.engine) || 'mineru';
switch (engine) {
case 'none':
return { engine: 'none' };
case 'local':
return { engine: 'local' };
case 'mistral':
// 优先从 Key 管理器读取 Mistral Keys若为空则回退到 legacy 文本框存储ocrMistralKeys
try {
const loadFn = (typeof window !== 'undefined' && typeof window.loadModelKeys === 'function')
? window.loadModelKeys
: (typeof loadModelKeys === 'function' ? loadModelKeys : null);
let keysFromManager = [];
if (loadFn) {
const all = loadFn('mistral') || [];
keysFromManager = all
.filter(k => k && k.value && k.value.trim() && (k.status === 'valid' || k.status === 'untested'))
.map(k => k.value.trim());
}
const legacy = (localStorage.getItem(this.keys.mistralKeys) || '')
.split('\n')
.map(k => k.trim())
.filter(Boolean);
const merged = (keysFromManager && keysFromManager.length > 0) ? keysFromManager : legacy;
const baseUrl = localStorage.getItem(this.keys.mistralBaseUrl) || 'https://api.mistral.ai';
return { engine: 'mistral', keys: merged, baseUrl };
} catch (e) {
console.warn('[OCR Settings] 读取 Mistral Keys 失败,回退 legacy。', e);
const legacy = (localStorage.getItem(this.keys.mistralKeys) || '')
.split('\n')
.map(k => k.trim())
.filter(Boolean);
const baseUrl = localStorage.getItem(this.keys.mistralBaseUrl) || 'https://api.mistral.ai';
return { engine: 'mistral', keys: legacy, baseUrl };
}
case 'mineru':
const mineruDefaultUrl = (typeof window !== 'undefined' && window.ProxyConfig) ? window.ProxyConfig.getProxyUrl() : '/api';
return {
engine: 'mineru',
token: localStorage.getItem(this.keys.mineruToken) || '',
workerUrl: (localStorage.getItem(this.keys.mineruWorkerUrl) || mineruDefaultUrl).replace(/\/+$/, ''), // 去掉末尾斜杠
authKey: localStorage.getItem(this.keys.workerAuthKey) || '',
tokenMode: localStorage.getItem(this.keys.mineruTokenMode) || 'backend', // 默认后端转发模式
enableOcr: localStorage.getItem(this.keys.mineruEnableOcr) !== 'false',
enableFormula: localStorage.getItem(this.keys.mineruEnableFormula) !== 'false',
enableTable: localStorage.getItem(this.keys.mineruEnableTable) !== 'false',
translationMode: localStorage.getItem(this.keys.mineruTranslationMode) || 'standard' // 新增
};
case 'doc2x':
const doc2xDefaultUrl = (typeof window !== 'undefined' && window.ProxyConfig) ? window.ProxyConfig.getProxyUrl() : '/api';
return {
engine: 'doc2x',
token: localStorage.getItem(this.keys.doc2xToken) || '',
workerUrl: (localStorage.getItem(this.keys.doc2xWorkerUrl) || doc2xDefaultUrl).replace(/\/+$/, ''), // 去掉末尾斜杠
authKey: localStorage.getItem(this.keys.workerAuthKey) || '',
tokenMode: localStorage.getItem(this.keys.doc2xTokenMode) || 'backend', // 默认后端转发模式
formulaMode: localStorage.getItem(this.keys.doc2xFormulaMode) || 'dollar',
exportFormat: localStorage.getItem(this.keys.doc2xExportFormat) || ''
};
default:
throw new Error(`Unknown OCR engine: ${engine}`);
}
}
/**
* 验证 OCR 配置是否完整
* @returns {Object} { valid: boolean, message: string }
*/
validateConfig() {
const config = this.getCurrentConfig();
switch (config.engine) {
case 'none':
case 'local':
// 不需要 OCR 或本地解析,配置总是有效的
return { valid: true, message: '' };
case 'mistral':
// 支持 Key 管理器 + legacy 两种来源(由 getCurrentConfig 聚合)
if (false) {
return { valid: false, message: '请配置 Mistral OCR API Keys' };
}
break;
case 'mineru':
// 后端转发模式tokenMode 不是 'frontend')不需要前端配置 Token
if (config.tokenMode === 'frontend' && !config.token) {
return { valid: false, message: '请配置 MinerU Token前端透传模式' };
}
if (config.tokenMode === 'frontend' && !config.workerUrl) {
return { valid: false, message: '请配置 MinerU Worker URL' };
}
break;
case 'doc2x':
// 后端转发模式tokenMode 不是 'frontend')不需要前端配置 Token
if (config.tokenMode === 'frontend' && !config.token) {
return { valid: false, message: '请配置 Doc2X Token前端透传模式' };
}
if (!config.workerUrl) {
return { valid: false, message: '请配置 Doc2X Worker URL' };
}
break;
}
return { valid: true, message: '' };
}
/**
* 防抖函数
* @param {Function} func - 要防抖的函数
* @param {number} wait - 等待时间(毫秒)
* @returns {Function} 防抖后的函数
*/
debounce(func, wait) {
let timeout;
return function (...args) {
clearTimeout(timeout);
timeout = setTimeout(() => func.apply(this, args), wait);
};
}
}
// 创建全局实例
if (typeof window !== 'undefined') {
window.ocrSettingsManager = new OcrSettingsManager();
}
// 导出(如果使用模块化)
if (typeof module !== 'undefined' && module.exports) {
module.exports = OcrSettingsManager;
}