/** * Paper Burner 本地代理服务器 * * 功能完全等同于 Cloudflare Worker,用户可以本地快速部署使用 * * 支持的服务: * 1. OCR 代理 (MinerU / Doc2X) * 2. 学术搜索代理 (Semantic Scholar / PubMed / CrossRef / OpenAlex / arXiv) * 3. PDF/ZIP 下载代理 * * 使用方法: * 1. npm install * 2. 复制 .env.example 到 .env 并配置 * 3. npm start * * 然后在 Paper Burner 前端设置代理地址为 http://localhost:3456 */ import http from 'http'; import { URL, URLSearchParams } from 'url'; import fetch from 'node-fetch'; import { createReadStream, readFileSync, existsSync } from 'fs'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; import { Readable } from 'stream'; import OSS from 'ali-oss'; // Express 应用(持久化 API) import app from './app.js'; // 数据库初始化 import { initDatabase, prisma } from './db/client.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); // ==================== 配置加载 ==================== // 尝试加载 .env 文件 function loadEnv() { const envPaths = [ join(__dirname, '.env'), // local-proxy 目录配置优先(后加载覆盖前者) ]; for (const envPath of envPaths) { if (existsSync(envPath)) { console.log(`[Env] Loading variables from: ${envPath}`); const content = readFileSync(envPath, 'utf-8'); for (const line of content.split('\n')) { const trimmed = line.trim(); if (trimmed && !trimmed.startsWith('#')) { const firstEqualIndex = trimmed.indexOf('='); if (firstEqualIndex !== -1) { const key = trimmed.substring(0, firstEqualIndex).trim(); let value = trimmed.substring(firstEqualIndex + 1).trim(); // Remove surrounding quotes if exist value = value.replace(/^["']|["']$/g, ''); // 后加载的配置覆盖前者(只要值不为空) if (value !== '') { process.env[key] = value; } else if (process.env[key] === undefined) { // 只有当前变量未定义时才设置空值 process.env[key] = value; } } } } } } } loadEnv(); const PORT = parseInt(process.env.PORT || '3456', 10); const MINERU_BASE_URL = 'https://mineru.net/api/v4'; const DOC2X_BASE_URL = 'https://v2.doc2x.noedgeai.com'; // ==================== 数据库初始化 ==================== let dbConnected = false; initDatabase() .then(connected => { dbConnected = connected; if (connected) { console.log('[Database] PostgreSQL connected successfully'); globalThis.__prismaConnected = true; } else { console.warn('[Database] PostgreSQL not configured or connection failed'); } }) .catch(err => { console.error('[Database] Initialization error:', err.message); }); // ==================== OSS 配置 ==================== let ossClient = null; function initOssClient() { const region = process.env.OSS_REGION; const accessKeyId = process.env.OSS_ACCESS_KEY_ID; const accessKeySecret = process.env.OSS_ACCESS_KEY_SECRET; const bucket = process.env.OSS_BUCKET || process.env.OSS_BUCKET_NAME; if (region && accessKeyId && accessKeySecret && bucket) { // 确保 region 格式正确(如 cn-beijing -> oss-cn-beijing) let normalizedRegion = region; if (!region.startsWith('oss-')) { normalizedRegion = `oss-${region}`; } ossClient = new OSS({ region: normalizedRegion, accessKeyId, accessKeySecret, bucket, secure: true // 强制使用 HTTPS }); console.log(`[OSS] Configured with bucket: ${bucket} at region: ${normalizedRegion}`); } else { console.warn('[OSS] OSS_REGION, OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, or OSS_BUCKET not found in environment.'); } } initOssClient(); // ==================== 工具函数 ==================== function jsonResponse(res, data, status = 200, origin = '*') { res.writeHead(status, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': origin, 'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS', 'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key, X-Api-Key, X-MinerU-Key, X-Doc2X-Key, Authorization, x-goog-api-key, anthropic-version', 'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges', }); res.end(JSON.stringify(data)); } function handleCORS(res, origin = '*') { res.writeHead(204, { 'Access-Control-Allow-Origin': origin, 'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS', 'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key, X-Api-Key, X-MinerU-Key, X-Doc2X-Key, Authorization, x-goog-api-key, anthropic-version', 'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges', 'Access-Control-Max-Age': '86400', }); res.end(); } /** * 从环境变量获取 API Token(完全后端代理模式,不再接受前端传来的 Token) * @param {Object} service - 服务名称 ('MINERU' 或 'DOC2X') * @returns {string|null} - Token 或 null */ function getToken(service) { const envKey = service === 'MINERU' ? 'MINERU_API_TOKEN' : 'DOC2X_API_TOKEN'; let token = process.env[envKey]; if (token) { token = token.replace(/^Bearer\s+/i, '').trim(); const preview = token.length > 12 ? `${token.substring(0, 6)}...${token.substring(token.length - 6)}` : token; console.log(`[${service}] Token from environment: ${preview}`); } else { console.warn(`[${service}] Token not found in environment variable ${envKey}`); } return token || null; } async function readBody(req) { return new Promise((resolve, reject) => { const chunks = []; req.on('data', chunk => chunks.push(chunk)); req.on('end', () => resolve(Buffer.concat(chunks))); req.on('error', reject); }); } // 简易 multipart 解析器 async function parseMultipart(req) { const contentType = req.headers['content-type'] || ''; const boundaryMatch = contentType.match(/boundary=(?:"([^"]+)"|([^;]+))/); if (!boundaryMatch) throw new Error('No boundary found'); const boundary = boundaryMatch[1] || boundaryMatch[2]; const body = await readBody(req); const parts = []; const boundaryBuffer = Buffer.from(`--${boundary}`); const endBoundary = Buffer.from(`--${boundary}--`); let start = body.indexOf(boundaryBuffer) + boundaryBuffer.length + 2; // skip \r\n while (start < body.length) { const nextBoundary = body.indexOf(boundaryBuffer, start); if (nextBoundary === -1) break; const partData = body.slice(start, nextBoundary - 2); // remove trailing \r\n const headerEnd = partData.indexOf('\r\n\r\n'); if (headerEnd !== -1) { const headerStr = partData.slice(0, headerEnd).toString(); const content = partData.slice(headerEnd + 4); const nameMatch = headerStr.match(/name="([^"]+)"/); const filenameMatch = headerStr.match(/filename="([^"]+)"/); if (nameMatch) { parts.push({ name: nameMatch[1], filename: filenameMatch ? filenameMatch[1] : null, data: filenameMatch ? content : content.toString() }); } } start = nextBoundary + boundaryBuffer.length + 2; } return parts; } // ==================== MinerU 处理 ==================== // 日志工具函数 function logMinerU(level, message, data = null) { const timestamp = new Date().toISOString(); const prefix = `[${timestamp}] [MinerU] [${level.toUpperCase()}]`; if (data) { console.log(`${prefix} ${message}`, typeof data === 'object' ? JSON.stringify(data, null, 2) : data); } else { console.log(`${prefix} ${message}`); } } async function handleMinerUUpload(req, res, origin) { const startTime = Date.now(); const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; logMinerU('info', `[${requestId}] ====== 上传请求开始 ======`); try { // 1. 解析 multipart 表单 logMinerU('info', `[${requestId}] 步骤1: 解析 multipart 表单数据`); const parseStart = Date.now(); const parts = await parseMultipart(req); const parseTime = Date.now() - parseStart; logMinerU('info', `[${requestId}] 表单解析完成`, { partsCount: parts.length, parseTimeMs: parseTime }); const filePart = parts.find(p => p.name === 'file'); if (!filePart || !filePart.filename) { logMinerU('error', `[${requestId}] 错误: 未找到文件`, { parts: parts.map(p => ({ name: p.name, filename: p.filename })) }); return jsonResponse(res, { error: 'No file provided', requestId }, 400, origin); } logMinerU('info', `[${requestId}] 文件信息`, { filename: filePart.filename, size: filePart.data?.length || 0, sizeReadable: `${((filePart.data?.length || 0) / 1024).toFixed(2)} KB` }); // 2. 获取 Token logMinerU('info', `[${requestId}] 步骤2: 获取 MinerU API Token`); const token = getToken('MINERU'); if (!token) { logMinerU('error', `[${requestId}] 错误: MinerU Token 未配置`); return jsonResponse(res, { error: 'MinerU API Token required. Configure MINERU_API_TOKEN in .env', requestId }, 401, origin); } logMinerU('info', `[${requestId}] Token 验证通过`); // 3. 解析表单字段 logMinerU('info', `[${requestId}] 步骤3: 解析处理参数`); const getField = (name, defaultVal) => { const part = parts.find(p => p.name === name); return part ? part.data : defaultVal; }; const params = { is_ocr: getField('is_ocr', 'true') !== 'false', enable_formula: getField('enable_formula', 'true') !== 'false', enable_table: getField('enable_table', 'true') !== 'false', language: getField('language', 'ch'), data_id: getField('data_id', null), page_ranges: getField('page_ranges', null) }; logMinerU('info', `[${requestId}] 处理参数`, params); // 4. 申请上传链接 logMinerU('info', `[${requestId}] 步骤4: 向 MinerU API 申请上传链接`); logMinerU('debug', `[${requestId}] 请求详情`, { url: `${MINERU_BASE_URL}/file-urls/batch`, method: 'POST', body: { enable_formula: params.enable_formula, enable_table: params.enable_table, language: params.language, files: [{ name: filePart.filename, is_ocr: params.is_ocr, ...(params.data_id && { data_id: params.data_id }), ...(params.page_ranges && { page_ranges: params.page_ranges }) }] } }); const apiStartTime = Date.now(); const uploadUrlResponse = await fetch(`${MINERU_BASE_URL}/file-urls/batch`, { method: 'POST', headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ enable_formula: params.enable_formula, enable_table: params.enable_table, language: params.language, files: [{ name: filePart.filename, is_ocr: params.is_ocr, ...(params.data_id && { data_id: params.data_id }), ...(params.page_ranges && { page_ranges: params.page_ranges }), }], }), }); const apiTime = Date.now() - apiStartTime; logMinerU('info', `[${requestId}] MinerU API 响应`, { status: uploadUrlResponse.status, statusText: uploadUrlResponse.statusText, responseTimeMs: apiTime }); if (!uploadUrlResponse.ok) { const errorText = await uploadUrlResponse.text(); logMinerU('error', `[${requestId}] MinerU API 返回错误`, { status: uploadUrlResponse.status, error: errorText }); throw new Error(`MinerU申请上传链接失败: ${errorText}`); } const uploadData = await uploadUrlResponse.json(); logMinerU('debug', `[${requestId}] MinerU API 响应数据`, uploadData); if (uploadData.code !== 0) { logMinerU('error', `[${requestId}] MinerU API 业务错误`, { code: uploadData.code, msg: uploadData.msg }); throw new Error(`MinerU返回错误: ${uploadData.msg}`); } const batchId = uploadData.data.batch_id; const ossUrl = uploadData.data.file_urls[0]; logMinerU('info', `[${requestId}] 获取上传链接成功`, { batchId, ossUrl: ossUrl?.substring(0, 50) + '...' }); // 5. 上传到 OSS logMinerU('info', `[${requestId}] 步骤5: 上传文件到 OSS`); logMinerU('info', `[${requestId}] OSS 上传详情`, { url: ossUrl?.substring(0, 60) + '...', fileSize: filePart.data.length }); const ossStartTime = Date.now(); const ossResponse = await fetch(ossUrl, { method: 'PUT', body: filePart.data, headers: { 'Content-Length': filePart.data.length.toString(), }, }); const ossTime = Date.now() - ossStartTime; logMinerU('info', `[${requestId}] OSS 上传完成`, { status: ossResponse.status, uploadTimeMs: ossTime }); if (!ossResponse.ok) { logMinerU('error', `[${requestId}] OSS 上传失败`, { status: ossResponse.status, statusText: ossResponse.statusText }); throw new Error(`OSS上传失败: ${ossResponse.status}`); } const totalTime = Date.now() - startTime; logMinerU('info', `[${requestId}] ====== 上传请求完成 ======`, { batchId, filename: filePart.filename, totalTimeMs: totalTime, phases: { parse: parseTime, mineruApi: apiTime, ossUpload: ossTime } }); jsonResponse(res, { success: true, batch_id: batchId, file_name: filePart.filename, service: 'mineru', requestId, timing: { totalMs: totalTime } }, 200, origin); } catch (error) { const totalTime = Date.now() - startTime; logMinerU('error', `[${requestId}] ====== 上传请求失败 ======`, { error: error.message, stack: error.stack, totalTimeMs: totalTime }); jsonResponse(res, { error: error.message, requestId }, 500, origin); } } async function handleMinerUResult(req, res, batchId, origin) { try { if (batchId === '__health__') { const token = getToken('MINERU'); if (!token) { return jsonResponse(res, { error: 'MinerU API Token required' }, 401, origin); } return jsonResponse(res, { success: true, service: 'mineru', health: true, timestamp: Date.now() }, 200, origin); } const token = getToken('MINERU'); if (!token) { return jsonResponse(res, { error: 'MinerU API Token required' }, 401, origin); } const response = await fetch(`${MINERU_BASE_URL}/extract-results/batch/${batchId}`, { headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'application/json', }, }); if (!response.ok) { throw new Error(`MinerU查询失败: ${response.statusText}`); } const data = await response.json(); if (data.code !== 0) { throw new Error(`MinerU返回错误: ${data.msg}`); } jsonResponse(res, { success: true, service: 'mineru', ...data.data }, 200, origin); } catch (error) { console.error('[MinerU] Result error:', error.message); jsonResponse(res, { error: error.message }, 500, origin); } } // ==================== Doc2X 处理 ==================== async function handleDoc2XUpload(req, res, origin) { try { const parts = await parseMultipart(req); const filePart = parts.find(p => p.name === 'file'); if (!filePart || !filePart.filename) { return jsonResponse(res, { error: 'No file provided' }, 400, origin); } const token = getToken('DOC2X'); if (!token) { return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin); } console.log(`[Doc2X] Uploading: ${filePart.filename}`); // 请求预上传链接 const preuploadResponse = await fetch(`${DOC2X_BASE_URL}/api/v2/parse/preupload`, { method: 'POST', headers: { 'Authorization': `Bearer ${token}` }, }); if (!preuploadResponse.ok) { throw new Error(`Doc2X预上传失败: ${await preuploadResponse.text()}`); } const preuploadData = await preuploadResponse.json(); if (preuploadData.code !== 'success') { throw new Error(`Doc2X返回错误: ${preuploadData.msg}`); } const { uid, url: uploadUrl } = preuploadData.data; // 上传到 OSS console.log(`[Doc2X] Uploading to OSS: ${filePart.data.length} bytes`); const ossResponse = await fetch(uploadUrl, { method: 'PUT', body: filePart.data, headers: { 'Content-Length': filePart.data.length.toString(), }, }); if (!ossResponse.ok) { throw new Error(`Doc2X OSS上传失败: ${ossResponse.status}`); } jsonResponse(res, { success: true, uid, file_name: filePart.filename, service: 'doc2x' }, 200, origin); } catch (error) { console.error('[Doc2X] Upload error:', error.message); jsonResponse(res, { error: error.message }, 500, origin); } } async function handleDoc2XStatus(req, res, uid, origin) { try { if (uid === '__health__') { const token = getToken('DOC2X'); if (!token) { return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin); } return jsonResponse(res, { success: true, service: 'doc2x', health: true, timestamp: Date.now() }, 200, origin); } const token = getToken('DOC2X'); if (!token) { return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin); } const response = await fetch(`${DOC2X_BASE_URL}/api/v2/parse/status?uid=${uid}`, { headers: { 'Authorization': `Bearer ${token}` }, }); if (!response.ok) { throw new Error(`Doc2X查询失败: ${response.statusText}`); } const data = await response.json(); if (data.code !== 'success') { return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin); } jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin); } catch (error) { console.error('[Doc2X] Status error:', error.message); jsonResponse(res, { error: error.message }, 500, origin); } } async function handleDoc2XConvert(req, res, origin) { try { const body = JSON.parse((await readBody(req)).toString()); const { uid, to = 'md', formula_mode = 'normal', filename, merge_cross_page_forms = false } = body; if (!uid) { return jsonResponse(res, { error: 'uid is required' }, 400, origin); } const token = getToken('DOC2X'); if (!token) { return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin); } const response = await fetch(`${DOC2X_BASE_URL}/api/v2/convert/parse`, { method: 'POST', headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ uid, to, formula_mode, ...(filename && { filename }), merge_cross_page_forms }), }); if (!response.ok) { throw new Error(`Doc2X转换失败: ${await response.text()}`); } const data = await response.json(); if (data.code !== 'success') { return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin); } jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin); } catch (error) { console.error('[Doc2X] Convert error:', error.message); jsonResponse(res, { error: error.message }, 500, origin); } } async function handleDoc2XConvertResult(req, res, uid, origin) { try { const token = getToken('DOC2X'); if (!token) { return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin); } const response = await fetch(`${DOC2X_BASE_URL}/api/v2/convert/parse/result?uid=${uid}`, { headers: { 'Authorization': `Bearer ${token}` }, }); if (!response.ok) { throw new Error(`Doc2X查询转换结果失败: ${response.statusText}`); } const data = await response.json(); if (data.code !== 'success') { return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin); } jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin); } catch (error) { console.error('[Doc2X] Convert result error:', error.message); jsonResponse(res, { error: error.message }, 500, origin); } } // ==================== LLM & Mistral API 代理 ==================== async function handleLLMProxy(req, res, targetUrl, provider, origin) { try { const method = req.method; // 根据 provider 获取对应的环境变量 const apiKeyEnvMap = { 'aliyun': 'DASHSCOPE_API_KEY', 'tongyi': 'DASHSCOPE_API_KEY', 'zhipu': 'ZHIPU_API_KEY', 'openai': 'OPENAI_API_KEY', 'deepseek': 'DEEPSEEK_API_KEY', 'anthropic': 'ANTHROPIC_API_KEY', 'gemini': 'GOOGLE_API_KEY', 'mistral': 'MISTRAL_API_KEY', }; const envKeyName = apiKeyEnvMap[provider] || `${provider.toUpperCase()}_API_KEY`; const apiKey = process.env[envKeyName]; const headers = { 'Content-Type': req.headers['content-type'] || 'application/json', 'Accept': req.headers['accept'] || '*/*' }; if (apiKey) { if (provider === 'anthropic') { headers['x-api-key'] = apiKey; headers['anthropic-version'] = req.headers['anthropic-version'] || '2023-06-01'; } else if (provider === 'gemini') { headers['x-goog-api-key'] = apiKey; } else { // OpenAI 格式(包括阿里云百炼、智谱 AI 等) headers['Authorization'] = `Bearer ${apiKey}`; } } else { // 如果后端没有配置环境变量,则穿透使用前端传递的 Header if (req.headers['authorization']) headers['Authorization'] = req.headers['authorization']; if (req.headers['x-api-key']) headers['x-api-key'] = req.headers['x-api-key']; if (req.headers['x-goog-api-key']) headers['x-goog-api-key'] = req.headers['x-goog-api-key']; } const options = { method, headers, redirect: 'follow' }; // 处理 POST/PUT 请求的 Body if (method !== 'GET' && method !== 'HEAD') { const bodyBuffer = await readBody(req); if (bodyBuffer.length > 0) { options.body = bodyBuffer; // 支持 FormData 上传 (如 Mistral 文件上传) if (headers['Content-Type'] && headers['Content-Type'].includes('multipart/form-data')) { headers['Content-Type'] = req.headers['content-type']; // 保留 boundary } } } console.log(`[LLM Proxy] ${method} ${targetUrl} (Provider: ${provider})`); const response = await fetch(targetUrl, options); const contentType = response.headers.get('content-type') || ''; const responseHeaders = { 'Access-Control-Allow-Origin': origin, 'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS', 'Access-Control-Allow-Headers': '*', 'Access-Control-Expose-Headers': '*', }; if (contentType) responseHeaders['Content-Type'] = contentType; res.writeHead(response.status, responseHeaders); // 对于文件流和 SSE 流式输出直接 pipe response.body.pipe(res); } catch (error) { console.error(`[LLM Proxy] Error:`, error.message); if (!res.headersSent) { jsonResponse(res, { error: 'Proxy upstream error', message: error.message }, 503, origin); } else { res.end(); } } } // ==================== 学术搜索代理 ==================== async function proxySemanticScholar(req, res, path, searchParams, origin) { try { const apiKey = req.headers['x-api-key'] || process.env.SEMANTIC_SCHOLAR_API_KEY; const url = `https://api.semanticscholar.org/${path}?${searchParams}`; console.log(`[Semantic Scholar] Proxying: ${url}`); const headers = { 'User-Agent': 'PaperBurner-LocalProxy/1.0' }; if (apiKey) headers['x-api-key'] = apiKey; const response = await fetch(url, { headers }); const data = await response.json(); jsonResponse(res, data, response.status, origin); } catch (error) { console.error('[Semantic Scholar] Error:', error.message); jsonResponse(res, { error: 'Semantic Scholar upstream error', message: error.message }, 503, origin); } } async function proxyPubMed(req, res, path, searchParams, origin) { try { const apiKey = req.headers['x-api-key'] || process.env.PUBMED_API_KEY; const params = new URLSearchParams(searchParams); if (apiKey) params.set('api_key', apiKey); const url = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/${path}?${params}`; console.log(`[PubMed] Proxying: ${url}`); const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } }); const contentType = response.headers.get('content-type') || ''; const text = await response.text(); res.writeHead(response.status, { 'Content-Type': contentType.includes('xml') ? 'application/xml' : 'text/plain', 'Access-Control-Allow-Origin': origin, }); res.end(text); } catch (error) { console.error('[PubMed] Error:', error.message); jsonResponse(res, { error: 'PubMed upstream error', message: error.message }, 503, origin); } } async function proxyCrossRef(req, res, path, searchParams, origin) { try { const url = `https://api.crossref.org/${path}?${searchParams}`; console.log(`[CrossRef] Proxying: ${url}`); const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } }); const data = await response.json(); jsonResponse(res, data, response.status, origin); } catch (error) { console.error('[CrossRef] Error:', error.message); jsonResponse(res, { error: 'CrossRef upstream error', message: error.message }, 503, origin); } } async function proxyOpenAlex(req, res, path, searchParams, origin) { try { const url = `https://api.openalex.org/${path}?${searchParams}`; console.log(`[OpenAlex] Proxying: ${url}`); const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } }); const data = await response.json(); jsonResponse(res, data, response.status, origin); } catch (error) { console.error('[OpenAlex] Error:', error.message); jsonResponse(res, { error: 'OpenAlex upstream error', message: error.message }, 503, origin); } } async function proxyArXiv(req, res, path, searchParams, origin) { try { const url = `http://export.arxiv.org/api/${path}?${searchParams}`; console.log(`[arXiv] Proxying: ${url}`); const response = await fetch(url); const text = await response.text(); res.writeHead(response.status, { 'Content-Type': 'application/xml', 'Access-Control-Allow-Origin': origin, }); res.end(text); } catch (error) { console.error('[arXiv] Error:', error.message); jsonResponse(res, { error: 'arXiv upstream error', message: error.message }, 503, origin); } } // ==================== OSS 上传服务 ==================== async function handleOssUpload(req, res, origin) { try { if (!ossClient) { return jsonResponse(res, { error: 'OSS is not configured on the server' }, 500, origin); } const parts = await parseMultipart(req); const filePart = parts.find(p => p.name === 'file'); if (!filePart || !filePart.filename) { return jsonResponse(res, { error: 'No file provided' }, 400, origin); } console.log(`[OSS Upload] Uploading: ${filePart.filename}`); const ext = filePart.filename.split('.').pop() || 'pdf'; const objectName = `chat-pdfs/${Date.now()}_${Math.random().toString(36).substring(2, 7)}.${ext}`; const result = await ossClient.put(objectName, Buffer.from(filePart.data)); console.log(`[OSS Upload] Upload success: ${result.url}`); // If bucket is not public-read, you might need to generate a signature url // For now we assume the bucket is configured as public readable for this use case, // or the URL is sufficient for the downstream API. // If needed: const signUrl = ossClient.signatureUrl(objectName, { expires: 3600 }); // Some regions use -internal or different endpoints, result.url usually gives the public url let urlToReturn = result.url; // ensure https if (urlToReturn && urlToReturn.startsWith('http://')) { urlToReturn = urlToReturn.replace('http://', 'https://'); } jsonResponse(res, { success: true, url: urlToReturn, file_name: filePart.filename }, 200, origin); } catch (error) { console.error('[OSS Upload] Error:', error.message); jsonResponse(res, { error: error.message }, 500, origin); } } // ==================== ZIP/PDF 代理 ==================== async function handleProxyDownload(req, res, downloadUrl, origin) { try { if (!downloadUrl) { return jsonResponse(res, { error: 'url parameter is required' }, 400, origin); } const method = req.method || 'GET'; console.log(`[Proxy] ${method} ${downloadUrl}`); const headers = { 'User-Agent': 'PaperBurner-LocalProxy/1.0' }; const rangeHeader = req.headers['range']; if (rangeHeader) { headers['Range'] = rangeHeader; } const response = await fetch(downloadUrl, { method, headers, redirect: 'follow' }); if (!response.ok && response.status !== 206) { return jsonResponse(res, { error: `Upstream fetch failed: ${response.status}` }, 502, origin); } const contentLength = response.headers.get('Content-Length'); console.log(`[Proxy] ${method} response: ${response.status}, Content-Length: ${contentLength}`); const responseHeaders = { 'Content-Type': response.headers.get('Content-Type') || 'application/octet-stream', 'Access-Control-Allow-Origin': origin, 'Access-Control-Allow-Methods': 'GET, HEAD, OPTIONS', 'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key', 'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges', 'Cache-Control': 'no-store', }; if (contentLength) { responseHeaders['Content-Length'] = contentLength; } if (response.headers.get('Content-Range')) { responseHeaders['Content-Range'] = response.headers.get('Content-Range'); } if (response.headers.get('Accept-Ranges')) { responseHeaders['Accept-Ranges'] = response.headers.get('Accept-Ranges'); } // HEAD 请求不返回 body if (method === 'HEAD') { res.writeHead(response.status, responseHeaders); return res.end(); } // 对于 GET 请求,使用简单的方式:先读取全部数据再发送 // 这样更可靠,虽然会占用更多内存 console.log(`[Proxy] Reading response body...`); const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); console.log(`[Proxy] Got ${buffer.length} bytes, sending to client...`); // 更新实际的 Content-Length responseHeaders['Content-Length'] = buffer.length.toString(); res.writeHead(response.status, responseHeaders); res.end(buffer); console.log(`[Proxy] Done sending ${buffer.length} bytes`); } catch (error) { console.error('[Proxy] Download error:', error.message); // 只有在还没发送响应头时才返回错误 if (!res.headersSent) { jsonResponse(res, { error: error.message }, 500, origin); } else { res.end(); } } } // ==================== 主路由 ==================== const server = http.createServer(async (req, res) => { const url = new URL(req.url, `http://${req.headers.host}`); const pathname = url.pathname; const searchParams = url.searchParams.toString(); const origin = req.headers.origin || '*'; // CORS 预检 if (req.method === 'OPTIONS') { return handleCORS(res, origin); } try { // ===== OCR 路由 (兼容 CF Worker) ===== // MinerU if (pathname === '/mineru/upload' && req.method === 'POST') { return await handleMinerUUpload(req, res, origin); } if (pathname.startsWith('/mineru/result/') && req.method === 'GET') { const batchId = pathname.split('/mineru/result/')[1]; return await handleMinerUResult(req, res, batchId, origin); } // Doc2X if (pathname === '/doc2x/upload' && req.method === 'POST') { return await handleDoc2XUpload(req, res, origin); } if (pathname.startsWith('/doc2x/status/') && req.method === 'GET') { const uid = pathname.split('/doc2x/status/')[1]; return await handleDoc2XStatus(req, res, uid, origin); } if (pathname === '/doc2x/convert' && req.method === 'POST') { return await handleDoc2XConvert(req, res, origin); } if (pathname.startsWith('/doc2x/convert/result/') && req.method === 'GET') { const uid = pathname.split('/doc2x/convert/result/')[1]; return await handleDoc2XConvertResult(req, res, uid, origin); } // ZIP 代理 if ((pathname === '/mineru/zip' || pathname === '/doc2x/zip') && (req.method === 'GET' || req.method === 'HEAD')) { const zipUrl = url.searchParams.get('url'); return await handleProxyDownload(req, res, zipUrl, origin); } // ===== LLM & OCR 代理 ===== if (pathname.startsWith('/api/llm/')) { const pathParts = pathname.replace('/api/llm/', '').split('/'); const provider = pathParts[0]; const restPath = pathParts.slice(1).join('/'); let targetBaseUrl = ''; if (provider === 'openai') targetBaseUrl = 'https://api.openai.com'; else if (provider === 'deepseek') targetBaseUrl = 'https://api.deepseek.com'; else if (provider === 'anthropic') targetBaseUrl = 'https://api.anthropic.com'; else if (provider === 'gemini') targetBaseUrl = 'https://generativelanguage.googleapis.com'; else if (provider === 'mistral') targetBaseUrl = 'https://api.mistral.ai'; else if (provider === 'aliyun' || provider === 'tongyi') targetBaseUrl = 'https://dashscope.aliyuncs.com/compatible-mode'; else if (provider === 'zhipu') targetBaseUrl = 'https://open.bigmodel.cn/api/paas/v4'; else return jsonResponse(res, { error: 'Unknown LLM provider' }, 400, origin); // 智谱 API 使用 v4 而非 v1,需要去除 let adjustedPath = restPath; if (provider === 'zhipu' && adjustedPath.startsWith('v1/')) { adjustedPath = adjustedPath.replace('v1/', ''); } let targetUrl = `${targetBaseUrl}/${adjustedPath}`; if (searchParams) targetUrl += `?${searchParams}`; return await handleLLMProxy(req, res, targetUrl, provider, origin); } if (pathname.startsWith('/api/mistral/')) { // 代理 mistral ocr 服务商的直接转发 const restPath = pathname.replace('/api/mistral/', ''); let targetUrl = `https://api.mistral.ai/${restPath}`; if (searchParams) targetUrl += `?${searchParams}`; return await handleLLMProxy(req, res, targetUrl, 'mistral', origin); } // 如果是前端未更新 API 路径,直接发往 /v1/files 等 Mistral 官方路径但指向了 localhost if (pathname.startsWith('/v1/')) { let targetUrl = `https://api.mistral.ai${pathname}`; if (searchParams) targetUrl += `?${searchParams}`; return await handleLLMProxy(req, res, targetUrl, 'mistral', origin); } // ===== OSS 上传 ===== if (pathname === '/api/upload/oss' && req.method === 'POST') { return await handleOssUpload(req, res, origin); } // ===== 学术搜索路由 (兼容 CF Worker) ===== if (pathname.startsWith('/api/semanticscholar/')) { const path = pathname.replace('/api/semanticscholar/', ''); return await proxySemanticScholar(req, res, path, searchParams, origin); } if (pathname.startsWith('/api/pubmed/')) { const path = pathname.replace('/api/pubmed/', ''); return await proxyPubMed(req, res, path, searchParams, origin); } if (pathname.startsWith('/api/crossref/')) { const path = pathname.replace('/api/crossref/', ''); return await proxyCrossRef(req, res, path, searchParams, origin); } if (pathname.startsWith('/api/openalex/')) { const path = pathname.replace('/api/openalex/', ''); return await proxyOpenAlex(req, res, path, searchParams, origin); } if (pathname.startsWith('/api/arxiv/')) { const path = pathname.replace('/api/arxiv/', ''); return await proxyArXiv(req, res, path, searchParams, origin); } // ===== 本地文件读取接口 ===== if (pathname === '/api/local/read-first-input' && req.method === 'GET') { try { const inputDir = join(dirname(__dirname), 'input'); if (!existsSync(inputDir)) { return jsonResponse(res, { error: 'Input directory not found' }, 404, origin); } const fs = await import('fs/promises'); const files = await fs.readdir(inputDir); // 过滤支持的文件格式 (pdf, md, txt 等) const supportedExts = ['.pdf', '.md', '.txt', '.png', '.jpg', '.jpeg']; const validFiles = files.filter(f => supportedExts.some(ext => f.toLowerCase().endsWith(ext))); if (validFiles.length === 0) { return jsonResponse(res, { error: 'No valid files found in input directory' }, 404, origin); } // 取第一个文件 const firstFile = validFiles[0]; const filePath = join(inputDir, firstFile); const stats = await fs.stat(filePath); // 设置类型 const ext = firstFile.toLowerCase().split('.').pop(); const mimeTypes = { 'pdf': 'application/pdf', 'md': 'text/markdown', 'txt': 'text/plain', 'png': 'image/png', 'jpg': 'image/jpeg', 'jpeg': 'image/jpeg' }; const contentType = mimeTypes[ext] || 'application/octet-stream'; // 设置响应头并使用流发送 res.writeHead(200, { 'Content-Type': contentType, 'Content-Length': stats.size.toString(), 'Content-Disposition': `attachment; filename="${encodeURIComponent(firstFile)}"`, 'Access-Control-Allow-Origin': origin, 'Access-Control-Allow-Methods': 'GET, HEAD, OPTIONS', 'Access-Control-Expose-Headers': 'Content-Disposition, Content-Length' }); const readStream = createReadStream(filePath); readStream.pipe(res); return; // Stream 将接管响应 } catch (err) { console.error('[Local API] Error reading input:', err); return jsonResponse(res, { error: 'Failed to read input files' }, 500, origin); } } // PDF 下载代理 if (pathname === '/api/pdf/download' && (req.method === 'GET' || req.method === 'HEAD')) { const pdfUrl = url.searchParams.get('url'); return await handleProxyDownload(req, res, pdfUrl, origin); } // ===== 健康检查(兼容两个路径)===== if (pathname === '/health' || pathname === '/api/health') { return jsonResponse(res, { status: 'ok', timestamp: Date.now(), version: '1.0.0', database: dbConnected ? 'connected' : 'not_configured', services: { ocr: { mineru: { enabled: true, hasToken: !!process.env.MINERU_API_TOKEN }, doc2x: { enabled: true, hasToken: !!process.env.DOC2X_API_TOKEN }, }, academic: { semanticscholar: { enabled: true, hasApiKey: !!process.env.SEMANTIC_SCHOLAR_API_KEY }, pubmed: { enabled: true, hasApiKey: !!process.env.PUBMED_API_KEY }, crossref: { enabled: true }, openalex: { enabled: true }, arxiv: { enabled: true }, } } }, 200, origin); } // ===== 持久化 API(交给 Express 处理)===== // 匹配 /api/documents, /api/user, /api/glossary, /api/chat, /api/references, /api/prompt-pool const persistentApiPrefixes = [ '/api/documents', '/api/user', '/api/glossary', '/api/chat', '/api/references', '/api/prompt-pool' ]; if (persistentApiPrefixes.some(prefix => pathname.startsWith(prefix))) { // 交给 Express 应用处理 return app(req, res); } // 404 if (!res.headersSent) { jsonResponse(res, { error: 'Not Found' }, 404, origin); } } catch (error) { console.error('Server error:', error); if (!res.headersSent) { jsonResponse(res, { error: error.message || 'Internal Server Error' }, 500, origin); } } }); // ==================== 启动服务器 ==================== // 设置服务器超时时间(5分钟,用于大文件传输) server.timeout = 300000; server.keepAliveTimeout = 120000; server.headersTimeout = 120000; server.listen(PORT, async () => { // 等待数据库初始化完成 await new Promise(resolve => setTimeout(resolve, 500)); console.log(` ╔═══════════════════════════════════════════════════════╗ ║ Paper Burner Local Proxy Server ║ ╠═══════════════════════════════════════════════════════╣ ║ Port: ${PORT.toString().padEnd(47)} ║ ║ URL: http://localhost:${PORT.toString().padEnd(30)} ║ ╠═══════════════════════════════════════════════════════╣ ║ Database: ${(dbConnected ? '✓ PostgreSQL Connected' : '✗ Not configured').padEnd(38)} ║ ║ Auth: ${(process.env.AUTH_CHECK_URL ? '✓ External' : '✓ Default (xueai.art)').padEnd(38)} ║ ╠═══════════════════════════════════════════════════════╣ ║ LLM Providers: ║ ║ Zhipu AI: ${(process.env.ZHIPU_API_KEY ? '✓ ' + process.env.ZHIPU_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)} ║ ║ Aliyun: ${(process.env.DASHSCOPE_API_KEY ? '✓ ' + process.env.DASHSCOPE_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)} ║ ║ ║ ║ OCR Services: ║ ║ MinerU Token: ${(process.env.MINERU_API_TOKEN ? '✓ Configured' : '✗ Not set').padEnd(36)} ║ ║ Doc2X Token: ${(process.env.DOC2X_API_TOKEN ? '✓ Configured' : '✗ Not set').padEnd(36)} ║ ║ Mistral Key: ${(process.env.MISTRAL_API_KEY ? '✓ ' + process.env.MISTRAL_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)} ║ ║ ║ ║ Academic Search: ║ ║ Semantic Scholar, PubMed, CrossRef, ║ ║ OpenAlex, arXiv ║ ║ ║ ║ OSS Upload: ║ ║ Configured: ${((process.env.OSS_BUCKET || process.env.OSS_BUCKET_NAME) && process.env.OSS_ACCESS_KEY_ID ? '✓ Yes' : '✗ No').padEnd(36)} ║ ╠═══════════════════════════════════════════════════════╣ ║ Persistent API: ║ ║ /api/documents, /api/user, /api/glossary ║ ║ /api/chat, /api/references, /api/prompt-pool ║ ╠═══════════════════════════════════════════════════════╣ ║ 在 Paper Burner 中设置代理地址为: ║ ║ http://localhost:${PORT.toString().padEnd(38)} ║ ╚═══════════════════════════════════════════════════════╝ `); });