paper-burner/local-proxy/server.js

1142 lines
41 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Paper Burner 本地代理服务器
*
* 功能完全等同于 Cloudflare Worker用户可以本地快速部署使用
*
* 支持的服务:
* 1. OCR 代理 (MinerU / Doc2X)
* 2. 学术搜索代理 (Semantic Scholar / PubMed / CrossRef / OpenAlex / arXiv)
* 3. PDF/ZIP 下载代理
*
* 使用方法:
* 1. npm install
* 2. 复制 .env.example 到 .env 并配置
* 3. npm start
*
* 然后在 Paper Burner 前端设置代理地址为 http://localhost:3456
*/
import http from 'http';
import { URL, URLSearchParams } from 'url';
import fetch from 'node-fetch';
import { createReadStream, readFileSync, existsSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import { Readable } from 'stream';
import OSS from 'ali-oss';
const __dirname = dirname(fileURLToPath(import.meta.url));
// ==================== 配置加载 ====================
// 尝试加载 .env 文件
function loadEnv() {
const envPaths = [
join(__dirname, '.env'), // local-proxy 目录配置优先(后加载覆盖前者)
];
for (const envPath of envPaths) {
if (existsSync(envPath)) {
console.log(`[Env] Loading variables from: ${envPath}`);
const content = readFileSync(envPath, 'utf-8');
for (const line of content.split('\n')) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith('#')) {
const firstEqualIndex = trimmed.indexOf('=');
if (firstEqualIndex !== -1) {
const key = trimmed.substring(0, firstEqualIndex).trim();
let value = trimmed.substring(firstEqualIndex + 1).trim();
// Remove surrounding quotes if exist
value = value.replace(/^["']|["']$/g, '');
// 后加载的配置覆盖前者(只要值不为空)
if (value !== '') {
process.env[key] = value;
} else if (process.env[key] === undefined) {
// 只有当前变量未定义时才设置空值
process.env[key] = value;
}
}
}
}
}
}
}
loadEnv();
const PORT = parseInt(process.env.PORT || '3456', 10);
const MINERU_BASE_URL = 'https://mineru.net/api/v4';
const DOC2X_BASE_URL = 'https://v2.doc2x.noedgeai.com';
// ==================== OSS 配置 ====================
let ossClient = null;
function initOssClient() {
const region = process.env.OSS_REGION;
const accessKeyId = process.env.OSS_ACCESS_KEY_ID;
const accessKeySecret = process.env.OSS_ACCESS_KEY_SECRET;
const bucket = process.env.OSS_BUCKET || process.env.OSS_BUCKET_NAME;
if (region && accessKeyId && accessKeySecret && bucket) {
// 确保 region 格式正确(如 cn-beijing -> oss-cn-beijing
let normalizedRegion = region;
if (!region.startsWith('oss-')) {
normalizedRegion = `oss-${region}`;
}
ossClient = new OSS({
region: normalizedRegion,
accessKeyId,
accessKeySecret,
bucket,
secure: true // 强制使用 HTTPS
});
console.log(`[OSS] Configured with bucket: ${bucket} at region: ${normalizedRegion}`);
} else {
console.warn('[OSS] OSS_REGION, OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, or OSS_BUCKET not found in environment.');
}
}
initOssClient();
// ==================== 工具函数 ====================
function jsonResponse(res, data, status = 200, origin = '*') {
res.writeHead(status, {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': origin,
'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key, X-Api-Key, X-MinerU-Key, X-Doc2X-Key, Authorization, x-goog-api-key, anthropic-version',
'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges',
});
res.end(JSON.stringify(data));
}
function handleCORS(res, origin = '*') {
res.writeHead(204, {
'Access-Control-Allow-Origin': origin,
'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key, X-Api-Key, X-MinerU-Key, X-Doc2X-Key, Authorization, x-goog-api-key, anthropic-version',
'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges',
'Access-Control-Max-Age': '86400',
});
res.end();
}
/**
* 从环境变量获取 API Token完全后端代理模式不再接受前端传来的 Token
* @param {Object} service - 服务名称 ('MINERU' 或 'DOC2X')
* @returns {string|null} - Token 或 null
*/
function getToken(service) {
const envKey = service === 'MINERU' ? 'MINERU_API_TOKEN' : 'DOC2X_API_TOKEN';
let token = process.env[envKey];
if (token) {
token = token.replace(/^Bearer\s+/i, '').trim();
const preview = token.length > 12
? `${token.substring(0, 6)}...${token.substring(token.length - 6)}`
: token;
console.log(`[${service}] Token from environment: ${preview}`);
} else {
console.warn(`[${service}] Token not found in environment variable ${envKey}`);
}
return token || null;
}
async function readBody(req) {
return new Promise((resolve, reject) => {
const chunks = [];
req.on('data', chunk => chunks.push(chunk));
req.on('end', () => resolve(Buffer.concat(chunks)));
req.on('error', reject);
});
}
// 简易 multipart 解析器
async function parseMultipart(req) {
const contentType = req.headers['content-type'] || '';
const boundaryMatch = contentType.match(/boundary=(?:"([^"]+)"|([^;]+))/);
if (!boundaryMatch) throw new Error('No boundary found');
const boundary = boundaryMatch[1] || boundaryMatch[2];
const body = await readBody(req);
const parts = [];
const boundaryBuffer = Buffer.from(`--${boundary}`);
const endBoundary = Buffer.from(`--${boundary}--`);
let start = body.indexOf(boundaryBuffer) + boundaryBuffer.length + 2; // skip \r\n
while (start < body.length) {
const nextBoundary = body.indexOf(boundaryBuffer, start);
if (nextBoundary === -1) break;
const partData = body.slice(start, nextBoundary - 2); // remove trailing \r\n
const headerEnd = partData.indexOf('\r\n\r\n');
if (headerEnd !== -1) {
const headerStr = partData.slice(0, headerEnd).toString();
const content = partData.slice(headerEnd + 4);
const nameMatch = headerStr.match(/name="([^"]+)"/);
const filenameMatch = headerStr.match(/filename="([^"]+)"/);
if (nameMatch) {
parts.push({
name: nameMatch[1],
filename: filenameMatch ? filenameMatch[1] : null,
data: filenameMatch ? content : content.toString()
});
}
}
start = nextBoundary + boundaryBuffer.length + 2;
}
return parts;
}
// ==================== MinerU 处理 ====================
// 日志工具函数
function logMinerU(level, message, data = null) {
const timestamp = new Date().toISOString();
const prefix = `[${timestamp}] [MinerU] [${level.toUpperCase()}]`;
if (data) {
console.log(`${prefix} ${message}`, typeof data === 'object' ? JSON.stringify(data, null, 2) : data);
} else {
console.log(`${prefix} ${message}`);
}
}
async function handleMinerUUpload(req, res, origin) {
const startTime = Date.now();
const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
logMinerU('info', `[${requestId}] ====== 上传请求开始 ======`);
try {
// 1. 解析 multipart 表单
logMinerU('info', `[${requestId}] 步骤1: 解析 multipart 表单数据`);
const parseStart = Date.now();
const parts = await parseMultipart(req);
const parseTime = Date.now() - parseStart;
logMinerU('info', `[${requestId}] 表单解析完成`, { partsCount: parts.length, parseTimeMs: parseTime });
const filePart = parts.find(p => p.name === 'file');
if (!filePart || !filePart.filename) {
logMinerU('error', `[${requestId}] 错误: 未找到文件`, { parts: parts.map(p => ({ name: p.name, filename: p.filename })) });
return jsonResponse(res, { error: 'No file provided', requestId }, 400, origin);
}
logMinerU('info', `[${requestId}] 文件信息`, {
filename: filePart.filename,
size: filePart.data?.length || 0,
sizeReadable: `${((filePart.data?.length || 0) / 1024).toFixed(2)} KB`
});
// 2. 获取 Token
logMinerU('info', `[${requestId}] 步骤2: 获取 MinerU API Token`);
const token = getToken('MINERU');
if (!token) {
logMinerU('error', `[${requestId}] 错误: MinerU Token 未配置`);
return jsonResponse(res, {
error: 'MinerU API Token required. Configure MINERU_API_TOKEN in .env',
requestId
}, 401, origin);
}
logMinerU('info', `[${requestId}] Token 验证通过`);
// 3. 解析表单字段
logMinerU('info', `[${requestId}] 步骤3: 解析处理参数`);
const getField = (name, defaultVal) => {
const part = parts.find(p => p.name === name);
return part ? part.data : defaultVal;
};
const params = {
is_ocr: getField('is_ocr', 'true') !== 'false',
enable_formula: getField('enable_formula', 'true') !== 'false',
enable_table: getField('enable_table', 'true') !== 'false',
language: getField('language', 'ch'),
data_id: getField('data_id', null),
page_ranges: getField('page_ranges', null)
};
logMinerU('info', `[${requestId}] 处理参数`, params);
// 4. 申请上传链接
logMinerU('info', `[${requestId}] 步骤4: 向 MinerU API 申请上传链接`);
logMinerU('debug', `[${requestId}] 请求详情`, {
url: `${MINERU_BASE_URL}/file-urls/batch`,
method: 'POST',
body: {
enable_formula: params.enable_formula,
enable_table: params.enable_table,
language: params.language,
files: [{
name: filePart.filename,
is_ocr: params.is_ocr,
...(params.data_id && { data_id: params.data_id }),
...(params.page_ranges && { page_ranges: params.page_ranges })
}]
}
});
const apiStartTime = Date.now();
const uploadUrlResponse = await fetch(`${MINERU_BASE_URL}/file-urls/batch`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
enable_formula: params.enable_formula,
enable_table: params.enable_table,
language: params.language,
files: [{
name: filePart.filename,
is_ocr: params.is_ocr,
...(params.data_id && { data_id: params.data_id }),
...(params.page_ranges && { page_ranges: params.page_ranges }),
}],
}),
});
const apiTime = Date.now() - apiStartTime;
logMinerU('info', `[${requestId}] MinerU API 响应`, { status: uploadUrlResponse.status, statusText: uploadUrlResponse.statusText, responseTimeMs: apiTime });
if (!uploadUrlResponse.ok) {
const errorText = await uploadUrlResponse.text();
logMinerU('error', `[${requestId}] MinerU API 返回错误`, { status: uploadUrlResponse.status, error: errorText });
throw new Error(`MinerU申请上传链接失败: ${errorText}`);
}
const uploadData = await uploadUrlResponse.json();
logMinerU('debug', `[${requestId}] MinerU API 响应数据`, uploadData);
if (uploadData.code !== 0) {
logMinerU('error', `[${requestId}] MinerU API 业务错误`, { code: uploadData.code, msg: uploadData.msg });
throw new Error(`MinerU返回错误: ${uploadData.msg}`);
}
const batchId = uploadData.data.batch_id;
const ossUrl = uploadData.data.file_urls[0];
logMinerU('info', `[${requestId}] 获取上传链接成功`, { batchId, ossUrl: ossUrl?.substring(0, 50) + '...' });
// 5. 上传到 OSS
logMinerU('info', `[${requestId}] 步骤5: 上传文件到 OSS`);
logMinerU('info', `[${requestId}] OSS 上传详情`, { url: ossUrl?.substring(0, 60) + '...', fileSize: filePart.data.length });
const ossStartTime = Date.now();
const ossResponse = await fetch(ossUrl, {
method: 'PUT',
body: filePart.data,
headers: {
'Content-Length': filePart.data.length.toString(),
},
});
const ossTime = Date.now() - ossStartTime;
logMinerU('info', `[${requestId}] OSS 上传完成`, { status: ossResponse.status, uploadTimeMs: ossTime });
if (!ossResponse.ok) {
logMinerU('error', `[${requestId}] OSS 上传失败`, { status: ossResponse.status, statusText: ossResponse.statusText });
throw new Error(`OSS上传失败: ${ossResponse.status}`);
}
const totalTime = Date.now() - startTime;
logMinerU('info', `[${requestId}] ====== 上传请求完成 ======`, {
batchId,
filename: filePart.filename,
totalTimeMs: totalTime,
phases: {
parse: parseTime,
mineruApi: apiTime,
ossUpload: ossTime
}
});
jsonResponse(res, {
success: true,
batch_id: batchId,
file_name: filePart.filename,
service: 'mineru',
requestId,
timing: { totalMs: totalTime }
}, 200, origin);
} catch (error) {
const totalTime = Date.now() - startTime;
logMinerU('error', `[${requestId}] ====== 上传请求失败 ======`, {
error: error.message,
stack: error.stack,
totalTimeMs: totalTime
});
jsonResponse(res, { error: error.message, requestId }, 500, origin);
}
}
async function handleMinerUResult(req, res, batchId, origin) {
try {
if (batchId === '__health__') {
const token = getToken('MINERU');
if (!token) {
return jsonResponse(res, { error: 'MinerU API Token required' }, 401, origin);
}
return jsonResponse(res, { success: true, service: 'mineru', health: true, timestamp: Date.now() }, 200, origin);
}
const token = getToken('MINERU');
if (!token) {
return jsonResponse(res, { error: 'MinerU API Token required' }, 401, origin);
}
const response = await fetch(`${MINERU_BASE_URL}/extract-results/batch/${batchId}`, {
headers: {
'Authorization': `Bearer ${token}`,
'Accept': 'application/json',
},
});
if (!response.ok) {
throw new Error(`MinerU查询失败: ${response.statusText}`);
}
const data = await response.json();
if (data.code !== 0) {
throw new Error(`MinerU返回错误: ${data.msg}`);
}
jsonResponse(res, { success: true, service: 'mineru', ...data.data }, 200, origin);
} catch (error) {
console.error('[MinerU] Result error:', error.message);
jsonResponse(res, { error: error.message }, 500, origin);
}
}
// ==================== Doc2X 处理 ====================
async function handleDoc2XUpload(req, res, origin) {
try {
const parts = await parseMultipart(req);
const filePart = parts.find(p => p.name === 'file');
if (!filePart || !filePart.filename) {
return jsonResponse(res, { error: 'No file provided' }, 400, origin);
}
const token = getToken('DOC2X');
if (!token) {
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
}
console.log(`[Doc2X] Uploading: ${filePart.filename}`);
// 请求预上传链接
const preuploadResponse = await fetch(`${DOC2X_BASE_URL}/api/v2/parse/preupload`, {
method: 'POST',
headers: { 'Authorization': `Bearer ${token}` },
});
if (!preuploadResponse.ok) {
throw new Error(`Doc2X预上传失败: ${await preuploadResponse.text()}`);
}
const preuploadData = await preuploadResponse.json();
if (preuploadData.code !== 'success') {
throw new Error(`Doc2X返回错误: ${preuploadData.msg}`);
}
const { uid, url: uploadUrl } = preuploadData.data;
// 上传到 OSS
console.log(`[Doc2X] Uploading to OSS: ${filePart.data.length} bytes`);
const ossResponse = await fetch(uploadUrl, {
method: 'PUT',
body: filePart.data,
headers: {
'Content-Length': filePart.data.length.toString(),
},
});
if (!ossResponse.ok) {
throw new Error(`Doc2X OSS上传失败: ${ossResponse.status}`);
}
jsonResponse(res, {
success: true,
uid,
file_name: filePart.filename,
service: 'doc2x'
}, 200, origin);
} catch (error) {
console.error('[Doc2X] Upload error:', error.message);
jsonResponse(res, { error: error.message }, 500, origin);
}
}
async function handleDoc2XStatus(req, res, uid, origin) {
try {
if (uid === '__health__') {
const token = getToken('DOC2X');
if (!token) {
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
}
return jsonResponse(res, { success: true, service: 'doc2x', health: true, timestamp: Date.now() }, 200, origin);
}
const token = getToken('DOC2X');
if (!token) {
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
}
const response = await fetch(`${DOC2X_BASE_URL}/api/v2/parse/status?uid=${uid}`, {
headers: { 'Authorization': `Bearer ${token}` },
});
if (!response.ok) {
throw new Error(`Doc2X查询失败: ${response.statusText}`);
}
const data = await response.json();
if (data.code !== 'success') {
return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin);
}
jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin);
} catch (error) {
console.error('[Doc2X] Status error:', error.message);
jsonResponse(res, { error: error.message }, 500, origin);
}
}
async function handleDoc2XConvert(req, res, origin) {
try {
const body = JSON.parse((await readBody(req)).toString());
const { uid, to = 'md', formula_mode = 'normal', filename, merge_cross_page_forms = false } = body;
if (!uid) {
return jsonResponse(res, { error: 'uid is required' }, 400, origin);
}
const token = getToken('DOC2X');
if (!token) {
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
}
const response = await fetch(`${DOC2X_BASE_URL}/api/v2/convert/parse`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({ uid, to, formula_mode, ...(filename && { filename }), merge_cross_page_forms }),
});
if (!response.ok) {
throw new Error(`Doc2X转换失败: ${await response.text()}`);
}
const data = await response.json();
if (data.code !== 'success') {
return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin);
}
jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin);
} catch (error) {
console.error('[Doc2X] Convert error:', error.message);
jsonResponse(res, { error: error.message }, 500, origin);
}
}
async function handleDoc2XConvertResult(req, res, uid, origin) {
try {
const token = getToken('DOC2X');
if (!token) {
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
}
const response = await fetch(`${DOC2X_BASE_URL}/api/v2/convert/parse/result?uid=${uid}`, {
headers: { 'Authorization': `Bearer ${token}` },
});
if (!response.ok) {
throw new Error(`Doc2X查询转换结果失败: ${response.statusText}`);
}
const data = await response.json();
if (data.code !== 'success') {
return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin);
}
jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin);
} catch (error) {
console.error('[Doc2X] Convert result error:', error.message);
jsonResponse(res, { error: error.message }, 500, origin);
}
}
// ==================== LLM & Mistral API 代理 ====================
async function handleLLMProxy(req, res, targetUrl, provider, origin) {
try {
const method = req.method;
// 根据 provider 获取对应的环境变量
const apiKeyEnvMap = {
'aliyun': 'DASHSCOPE_API_KEY',
'tongyi': 'DASHSCOPE_API_KEY',
'zhipu': 'ZHIPU_API_KEY',
'openai': 'OPENAI_API_KEY',
'deepseek': 'DEEPSEEK_API_KEY',
'anthropic': 'ANTHROPIC_API_KEY',
'gemini': 'GOOGLE_API_KEY',
'mistral': 'MISTRAL_API_KEY',
};
const envKeyName = apiKeyEnvMap[provider] || `${provider.toUpperCase()}_API_KEY`;
const apiKey = process.env[envKeyName];
const headers = {
'Content-Type': req.headers['content-type'] || 'application/json',
'Accept': req.headers['accept'] || '*/*'
};
if (apiKey) {
if (provider === 'anthropic') {
headers['x-api-key'] = apiKey;
headers['anthropic-version'] = req.headers['anthropic-version'] || '2023-06-01';
} else if (provider === 'gemini') {
headers['x-goog-api-key'] = apiKey;
} else {
// OpenAI 格式(包括阿里云百炼、智谱 AI 等)
headers['Authorization'] = `Bearer ${apiKey}`;
}
} else {
// 如果后端没有配置环境变量,则穿透使用前端传递的 Header
if (req.headers['authorization']) headers['Authorization'] = req.headers['authorization'];
if (req.headers['x-api-key']) headers['x-api-key'] = req.headers['x-api-key'];
if (req.headers['x-goog-api-key']) headers['x-goog-api-key'] = req.headers['x-goog-api-key'];
}
const options = { method, headers, redirect: 'follow' };
// 处理 POST/PUT 请求的 Body
if (method !== 'GET' && method !== 'HEAD') {
const bodyBuffer = await readBody(req);
if (bodyBuffer.length > 0) {
options.body = bodyBuffer;
// 支持 FormData 上传 (如 Mistral 文件上传)
if (headers['Content-Type'] && headers['Content-Type'].includes('multipart/form-data')) {
headers['Content-Type'] = req.headers['content-type']; // 保留 boundary
}
}
}
console.log(`[LLM Proxy] ${method} ${targetUrl} (Provider: ${provider})`);
const response = await fetch(targetUrl, options);
const contentType = response.headers.get('content-type') || '';
const responseHeaders = {
'Access-Control-Allow-Origin': origin,
'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS',
'Access-Control-Allow-Headers': '*',
'Access-Control-Expose-Headers': '*',
};
if (contentType) responseHeaders['Content-Type'] = contentType;
res.writeHead(response.status, responseHeaders);
// 对于文件流和 SSE 流式输出直接 pipe
response.body.pipe(res);
} catch (error) {
console.error(`[LLM Proxy] Error:`, error.message);
if (!res.headersSent) {
jsonResponse(res, { error: 'Proxy upstream error', message: error.message }, 503, origin);
} else {
res.end();
}
}
}
// ==================== 学术搜索代理 ====================
async function proxySemanticScholar(req, res, path, searchParams, origin) {
try {
const apiKey = req.headers['x-api-key'] || process.env.SEMANTIC_SCHOLAR_API_KEY;
const url = `https://api.semanticscholar.org/${path}?${searchParams}`;
console.log(`[Semantic Scholar] Proxying: ${url}`);
const headers = { 'User-Agent': 'PaperBurner-LocalProxy/1.0' };
if (apiKey) headers['x-api-key'] = apiKey;
const response = await fetch(url, { headers });
const data = await response.json();
jsonResponse(res, data, response.status, origin);
} catch (error) {
console.error('[Semantic Scholar] Error:', error.message);
jsonResponse(res, { error: 'Semantic Scholar upstream error', message: error.message }, 503, origin);
}
}
async function proxyPubMed(req, res, path, searchParams, origin) {
try {
const apiKey = req.headers['x-api-key'] || process.env.PUBMED_API_KEY;
const params = new URLSearchParams(searchParams);
if (apiKey) params.set('api_key', apiKey);
const url = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/${path}?${params}`;
console.log(`[PubMed] Proxying: ${url}`);
const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } });
const contentType = response.headers.get('content-type') || '';
const text = await response.text();
res.writeHead(response.status, {
'Content-Type': contentType.includes('xml') ? 'application/xml' : 'text/plain',
'Access-Control-Allow-Origin': origin,
});
res.end(text);
} catch (error) {
console.error('[PubMed] Error:', error.message);
jsonResponse(res, { error: 'PubMed upstream error', message: error.message }, 503, origin);
}
}
async function proxyCrossRef(req, res, path, searchParams, origin) {
try {
const url = `https://api.crossref.org/${path}?${searchParams}`;
console.log(`[CrossRef] Proxying: ${url}`);
const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } });
const data = await response.json();
jsonResponse(res, data, response.status, origin);
} catch (error) {
console.error('[CrossRef] Error:', error.message);
jsonResponse(res, { error: 'CrossRef upstream error', message: error.message }, 503, origin);
}
}
async function proxyOpenAlex(req, res, path, searchParams, origin) {
try {
const url = `https://api.openalex.org/${path}?${searchParams}`;
console.log(`[OpenAlex] Proxying: ${url}`);
const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } });
const data = await response.json();
jsonResponse(res, data, response.status, origin);
} catch (error) {
console.error('[OpenAlex] Error:', error.message);
jsonResponse(res, { error: 'OpenAlex upstream error', message: error.message }, 503, origin);
}
}
async function proxyArXiv(req, res, path, searchParams, origin) {
try {
const url = `http://export.arxiv.org/api/${path}?${searchParams}`;
console.log(`[arXiv] Proxying: ${url}`);
const response = await fetch(url);
const text = await response.text();
res.writeHead(response.status, {
'Content-Type': 'application/xml',
'Access-Control-Allow-Origin': origin,
});
res.end(text);
} catch (error) {
console.error('[arXiv] Error:', error.message);
jsonResponse(res, { error: 'arXiv upstream error', message: error.message }, 503, origin);
}
}
// ==================== OSS 上传服务 ====================
async function handleOssUpload(req, res, origin) {
try {
if (!ossClient) {
return jsonResponse(res, { error: 'OSS is not configured on the server' }, 500, origin);
}
const parts = await parseMultipart(req);
const filePart = parts.find(p => p.name === 'file');
if (!filePart || !filePart.filename) {
return jsonResponse(res, { error: 'No file provided' }, 400, origin);
}
console.log(`[OSS Upload] Uploading: ${filePart.filename}`);
const ext = filePart.filename.split('.').pop() || 'pdf';
const objectName = `chat-pdfs/${Date.now()}_${Math.random().toString(36).substring(2, 7)}.${ext}`;
const result = await ossClient.put(objectName, Buffer.from(filePart.data));
console.log(`[OSS Upload] Upload success: ${result.url}`);
// If bucket is not public-read, you might need to generate a signature url
// For now we assume the bucket is configured as public readable for this use case,
// or the URL is sufficient for the downstream API.
// If needed: const signUrl = ossClient.signatureUrl(objectName, { expires: 3600 });
// Some regions use -internal or different endpoints, result.url usually gives the public url
let urlToReturn = result.url;
// ensure https
if (urlToReturn && urlToReturn.startsWith('http://')) {
urlToReturn = urlToReturn.replace('http://', 'https://');
}
jsonResponse(res, {
success: true,
url: urlToReturn,
file_name: filePart.filename
}, 200, origin);
} catch (error) {
console.error('[OSS Upload] Error:', error.message);
jsonResponse(res, { error: error.message }, 500, origin);
}
}
// ==================== ZIP/PDF 代理 ====================
async function handleProxyDownload(req, res, downloadUrl, origin) {
try {
if (!downloadUrl) {
return jsonResponse(res, { error: 'url parameter is required' }, 400, origin);
}
const method = req.method || 'GET';
console.log(`[Proxy] ${method} ${downloadUrl}`);
const headers = { 'User-Agent': 'PaperBurner-LocalProxy/1.0' };
const rangeHeader = req.headers['range'];
if (rangeHeader) {
headers['Range'] = rangeHeader;
}
const response = await fetch(downloadUrl, { method, headers, redirect: 'follow' });
if (!response.ok && response.status !== 206) {
return jsonResponse(res, { error: `Upstream fetch failed: ${response.status}` }, 502, origin);
}
const contentLength = response.headers.get('Content-Length');
console.log(`[Proxy] ${method} response: ${response.status}, Content-Length: ${contentLength}`);
const responseHeaders = {
'Content-Type': response.headers.get('Content-Type') || 'application/octet-stream',
'Access-Control-Allow-Origin': origin,
'Access-Control-Allow-Methods': 'GET, HEAD, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key',
'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges',
'Cache-Control': 'no-store',
};
if (contentLength) {
responseHeaders['Content-Length'] = contentLength;
}
if (response.headers.get('Content-Range')) {
responseHeaders['Content-Range'] = response.headers.get('Content-Range');
}
if (response.headers.get('Accept-Ranges')) {
responseHeaders['Accept-Ranges'] = response.headers.get('Accept-Ranges');
}
// HEAD 请求不返回 body
if (method === 'HEAD') {
res.writeHead(response.status, responseHeaders);
return res.end();
}
// 对于 GET 请求,使用简单的方式:先读取全部数据再发送
// 这样更可靠,虽然会占用更多内存
console.log(`[Proxy] Reading response body...`);
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
console.log(`[Proxy] Got ${buffer.length} bytes, sending to client...`);
// 更新实际的 Content-Length
responseHeaders['Content-Length'] = buffer.length.toString();
res.writeHead(response.status, responseHeaders);
res.end(buffer);
console.log(`[Proxy] Done sending ${buffer.length} bytes`);
} catch (error) {
console.error('[Proxy] Download error:', error.message);
// 只有在还没发送响应头时才返回错误
if (!res.headersSent) {
jsonResponse(res, { error: error.message }, 500, origin);
} else {
res.end();
}
}
}
// ==================== 主路由 ====================
const server = http.createServer(async (req, res) => {
const url = new URL(req.url, `http://${req.headers.host}`);
const pathname = url.pathname;
const searchParams = url.searchParams.toString();
const origin = req.headers.origin || '*';
// CORS 预检
if (req.method === 'OPTIONS') {
return handleCORS(res, origin);
}
try {
// ===== OCR 路由 (兼容 CF Worker) =====
// MinerU
if (pathname === '/mineru/upload' && req.method === 'POST') {
return await handleMinerUUpload(req, res, origin);
}
if (pathname.startsWith('/mineru/result/') && req.method === 'GET') {
const batchId = pathname.split('/mineru/result/')[1];
return await handleMinerUResult(req, res, batchId, origin);
}
// Doc2X
if (pathname === '/doc2x/upload' && req.method === 'POST') {
return await handleDoc2XUpload(req, res, origin);
}
if (pathname.startsWith('/doc2x/status/') && req.method === 'GET') {
const uid = pathname.split('/doc2x/status/')[1];
return await handleDoc2XStatus(req, res, uid, origin);
}
if (pathname === '/doc2x/convert' && req.method === 'POST') {
return await handleDoc2XConvert(req, res, origin);
}
if (pathname.startsWith('/doc2x/convert/result/') && req.method === 'GET') {
const uid = pathname.split('/doc2x/convert/result/')[1];
return await handleDoc2XConvertResult(req, res, uid, origin);
}
// ZIP 代理
if ((pathname === '/mineru/zip' || pathname === '/doc2x/zip') && (req.method === 'GET' || req.method === 'HEAD')) {
const zipUrl = url.searchParams.get('url');
return await handleProxyDownload(req, res, zipUrl, origin);
}
// ===== LLM & OCR 代理 =====
if (pathname.startsWith('/api/llm/')) {
const pathParts = pathname.replace('/api/llm/', '').split('/');
const provider = pathParts[0];
const restPath = pathParts.slice(1).join('/');
let targetBaseUrl = '';
if (provider === 'openai') targetBaseUrl = 'https://api.openai.com';
else if (provider === 'deepseek') targetBaseUrl = 'https://api.deepseek.com';
else if (provider === 'anthropic') targetBaseUrl = 'https://api.anthropic.com';
else if (provider === 'gemini') targetBaseUrl = 'https://generativelanguage.googleapis.com';
else if (provider === 'mistral') targetBaseUrl = 'https://api.mistral.ai';
else if (provider === 'aliyun' || provider === 'tongyi') targetBaseUrl = 'https://dashscope.aliyuncs.com/compatible-mode';
else if (provider === 'zhipu') targetBaseUrl = 'https://open.bigmodel.cn/api/paas/v4';
else return jsonResponse(res, { error: 'Unknown LLM provider' }, 400, origin);
// 智谱 API 使用 v4 而非 v1需要去除
let adjustedPath = restPath;
if (provider === 'zhipu' && adjustedPath.startsWith('v1/')) {
adjustedPath = adjustedPath.replace('v1/', '');
}
let targetUrl = `${targetBaseUrl}/${adjustedPath}`;
if (searchParams) targetUrl += `?${searchParams}`;
return await handleLLMProxy(req, res, targetUrl, provider, origin);
}
if (pathname.startsWith('/api/mistral/')) {
// 代理 mistral ocr 服务商的直接转发
const restPath = pathname.replace('/api/mistral/', '');
let targetUrl = `https://api.mistral.ai/${restPath}`;
if (searchParams) targetUrl += `?${searchParams}`;
return await handleLLMProxy(req, res, targetUrl, 'mistral', origin);
}
// 如果是前端未更新 API 路径,直接发往 /v1/files 等 Mistral 官方路径但指向了 localhost
if (pathname.startsWith('/v1/')) {
let targetUrl = `https://api.mistral.ai${pathname}`;
if (searchParams) targetUrl += `?${searchParams}`;
return await handleLLMProxy(req, res, targetUrl, 'mistral', origin);
}
// ===== OSS 上传 =====
if (pathname === '/api/upload/oss' && req.method === 'POST') {
return await handleOssUpload(req, res, origin);
}
// ===== 学术搜索路由 (兼容 CF Worker) =====
if (pathname.startsWith('/api/semanticscholar/')) {
const path = pathname.replace('/api/semanticscholar/', '');
return await proxySemanticScholar(req, res, path, searchParams, origin);
}
if (pathname.startsWith('/api/pubmed/')) {
const path = pathname.replace('/api/pubmed/', '');
return await proxyPubMed(req, res, path, searchParams, origin);
}
if (pathname.startsWith('/api/crossref/')) {
const path = pathname.replace('/api/crossref/', '');
return await proxyCrossRef(req, res, path, searchParams, origin);
}
if (pathname.startsWith('/api/openalex/')) {
const path = pathname.replace('/api/openalex/', '');
return await proxyOpenAlex(req, res, path, searchParams, origin);
}
if (pathname.startsWith('/api/arxiv/')) {
const path = pathname.replace('/api/arxiv/', '');
return await proxyArXiv(req, res, path, searchParams, origin);
}
// ===== 本地文件读取接口 =====
if (pathname === '/api/local/read-first-input' && req.method === 'GET') {
try {
const inputDir = join(dirname(__dirname), 'input');
if (!existsSync(inputDir)) {
return jsonResponse(res, { error: 'Input directory not found' }, 404, origin);
}
const fs = await import('fs/promises');
const files = await fs.readdir(inputDir);
// 过滤支持的文件格式 (pdf, md, txt 等)
const supportedExts = ['.pdf', '.md', '.txt', '.png', '.jpg', '.jpeg'];
const validFiles = files.filter(f => supportedExts.some(ext => f.toLowerCase().endsWith(ext)));
if (validFiles.length === 0) {
return jsonResponse(res, { error: 'No valid files found in input directory' }, 404, origin);
}
// 取第一个文件
const firstFile = validFiles[0];
const filePath = join(inputDir, firstFile);
const stats = await fs.stat(filePath);
// 设置类型
const ext = firstFile.toLowerCase().split('.').pop();
const mimeTypes = {
'pdf': 'application/pdf',
'md': 'text/markdown',
'txt': 'text/plain',
'png': 'image/png',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg'
};
const contentType = mimeTypes[ext] || 'application/octet-stream';
// 设置响应头并使用流发送
res.writeHead(200, {
'Content-Type': contentType,
'Content-Length': stats.size.toString(),
'Content-Disposition': `attachment; filename="${encodeURIComponent(firstFile)}"`,
'Access-Control-Allow-Origin': origin,
'Access-Control-Allow-Methods': 'GET, HEAD, OPTIONS',
'Access-Control-Expose-Headers': 'Content-Disposition, Content-Length'
});
const readStream = createReadStream(filePath);
readStream.pipe(res);
return; // Stream 将接管响应
} catch (err) {
console.error('[Local API] Error reading input:', err);
return jsonResponse(res, { error: 'Failed to read input files' }, 500, origin);
}
}
// PDF 下载代理
if (pathname === '/api/pdf/download' && (req.method === 'GET' || req.method === 'HEAD')) {
const pdfUrl = url.searchParams.get('url');
return await handleProxyDownload(req, res, pdfUrl, origin);
}
// ===== 健康检查 =====
if (pathname === '/health') {
return jsonResponse(res, {
status: 'ok',
timestamp: Date.now(),
version: '1.0.0',
services: {
ocr: {
mineru: { enabled: true, hasToken: !!process.env.MINERU_API_TOKEN },
doc2x: { enabled: true, hasToken: !!process.env.DOC2X_API_TOKEN },
},
academic: {
semanticscholar: { enabled: true, hasApiKey: !!process.env.SEMANTIC_SCHOLAR_API_KEY },
pubmed: { enabled: true, hasApiKey: !!process.env.PUBMED_API_KEY },
crossref: { enabled: true },
openalex: { enabled: true },
arxiv: { enabled: true },
}
}
}, 200, origin);
}
// 404
if (!res.headersSent) {
jsonResponse(res, { error: 'Not Found' }, 404, origin);
}
} catch (error) {
console.error('Server error:', error);
if (!res.headersSent) {
jsonResponse(res, { error: error.message || 'Internal Server Error' }, 500, origin);
}
}
});
// ==================== 启动服务器 ====================
// 设置服务器超时时间5分钟用于大文件传输
server.timeout = 300000;
server.keepAliveTimeout = 120000;
server.headersTimeout = 120000;
server.listen(PORT, () => {
console.log(`
╔═══════════════════════════════════════════════════════╗
║ Paper Burner Local Proxy Server ║
╠═══════════════════════════════════════════════════════╣
║ Port: ${PORT.toString().padEnd(47)}
║ URL: http://localhost:${PORT.toString().padEnd(30)}
╠═══════════════════════════════════════════════════════╣
║ LLM Providers: ║
║ Zhipu AI: ${(process.env.ZHIPU_API_KEY ? '✓ ' + process.env.ZHIPU_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)}
║ Aliyun: ${(process.env.DASHSCOPE_API_KEY ? '✓ ' + process.env.DASHSCOPE_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)}
║ ║
║ OCR Services: ║
║ MinerU Token: ${(process.env.MINERU_API_TOKEN ? '✓ Configured' : '✗ Not set').padEnd(36)}
║ Doc2X Token: ${(process.env.DOC2X_API_TOKEN ? '✓ Configured' : '✗ Not set').padEnd(36)}
║ Mistral Key: ${(process.env.MISTRAL_API_KEY ? '✓ ' + process.env.MISTRAL_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)}
║ ║
║ Academic Search: ║
║ Semantic Scholar, PubMed, CrossRef, ║
║ OpenAlex, arXiv ║
║ ║
║ OSS Upload: ║
║ Configured: ${((process.env.OSS_BUCKET || process.env.OSS_BUCKET_NAME) && process.env.OSS_ACCESS_KEY_ID ? '✓ Yes' : '✗ No').padEnd(36)}
╠═══════════════════════════════════════════════════════╣
║ 在 Paper Burner 中设置代理地址为: ║
║ http://localhost:${PORT.toString().padEnd(38)}
╚═══════════════════════════════════════════════════════╝
`);
});