1192 lines
44 KiB
JavaScript
1192 lines
44 KiB
JavaScript
/**
|
||
* Paper Burner 本地代理服务器
|
||
*
|
||
* 功能完全等同于 Cloudflare Worker,用户可以本地快速部署使用
|
||
*
|
||
* 支持的服务:
|
||
* 1. OCR 代理 (MinerU / Doc2X)
|
||
* 2. 学术搜索代理 (Semantic Scholar / PubMed / CrossRef / OpenAlex / arXiv)
|
||
* 3. PDF/ZIP 下载代理
|
||
*
|
||
* 使用方法:
|
||
* 1. npm install
|
||
* 2. 复制 .env.example 到 .env 并配置
|
||
* 3. npm start
|
||
*
|
||
* 然后在 Paper Burner 前端设置代理地址为 http://localhost:3456
|
||
*/
|
||
|
||
import http from 'http';
|
||
import { URL, URLSearchParams } from 'url';
|
||
import fetch from 'node-fetch';
|
||
import { createReadStream, readFileSync, existsSync } from 'fs';
|
||
import { join, dirname } from 'path';
|
||
import { fileURLToPath } from 'url';
|
||
import { Readable } from 'stream';
|
||
import OSS from 'ali-oss';
|
||
|
||
// Express 应用(持久化 API)
|
||
import app from './app.js';
|
||
|
||
// 数据库初始化
|
||
import { initDatabase, prisma } from './db/client.js';
|
||
|
||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||
|
||
// ==================== 配置加载 ====================
|
||
|
||
// 尝试加载 .env 文件
|
||
function loadEnv() {
|
||
const envPaths = [
|
||
join(__dirname, '.env'), // local-proxy 目录配置优先(后加载覆盖前者)
|
||
];
|
||
|
||
for (const envPath of envPaths) {
|
||
if (existsSync(envPath)) {
|
||
console.log(`[Env] Loading variables from: ${envPath}`);
|
||
const content = readFileSync(envPath, 'utf-8');
|
||
for (const line of content.split('\n')) {
|
||
const trimmed = line.trim();
|
||
if (trimmed && !trimmed.startsWith('#')) {
|
||
const firstEqualIndex = trimmed.indexOf('=');
|
||
if (firstEqualIndex !== -1) {
|
||
const key = trimmed.substring(0, firstEqualIndex).trim();
|
||
let value = trimmed.substring(firstEqualIndex + 1).trim();
|
||
// Remove surrounding quotes if exist
|
||
value = value.replace(/^["']|["']$/g, '');
|
||
// 后加载的配置覆盖前者(只要值不为空)
|
||
if (value !== '') {
|
||
process.env[key] = value;
|
||
} else if (process.env[key] === undefined) {
|
||
// 只有当前变量未定义时才设置空值
|
||
process.env[key] = value;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
loadEnv();
|
||
|
||
const PORT = parseInt(process.env.PORT || '3456', 10);
|
||
const MINERU_BASE_URL = 'https://mineru.net/api/v4';
|
||
const DOC2X_BASE_URL = 'https://v2.doc2x.noedgeai.com';
|
||
|
||
// ==================== 数据库初始化 ====================
|
||
let dbConnected = false;
|
||
initDatabase()
|
||
.then(connected => {
|
||
dbConnected = connected;
|
||
if (connected) {
|
||
console.log('[Database] PostgreSQL connected successfully');
|
||
globalThis.__prismaConnected = true;
|
||
} else {
|
||
console.warn('[Database] PostgreSQL not configured or connection failed');
|
||
}
|
||
})
|
||
.catch(err => {
|
||
console.error('[Database] Initialization error:', err.message);
|
||
});
|
||
|
||
// ==================== OSS 配置 ====================
|
||
let ossClient = null;
|
||
function initOssClient() {
|
||
const region = process.env.OSS_REGION;
|
||
const accessKeyId = process.env.OSS_ACCESS_KEY_ID;
|
||
const accessKeySecret = process.env.OSS_ACCESS_KEY_SECRET;
|
||
const bucket = process.env.OSS_BUCKET || process.env.OSS_BUCKET_NAME;
|
||
|
||
if (region && accessKeyId && accessKeySecret && bucket) {
|
||
// 确保 region 格式正确(如 cn-beijing -> oss-cn-beijing)
|
||
let normalizedRegion = region;
|
||
if (!region.startsWith('oss-')) {
|
||
normalizedRegion = `oss-${region}`;
|
||
}
|
||
|
||
ossClient = new OSS({
|
||
region: normalizedRegion,
|
||
accessKeyId,
|
||
accessKeySecret,
|
||
bucket,
|
||
secure: true // 强制使用 HTTPS
|
||
});
|
||
console.log(`[OSS] Configured with bucket: ${bucket} at region: ${normalizedRegion}`);
|
||
} else {
|
||
console.warn('[OSS] OSS_REGION, OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, or OSS_BUCKET not found in environment.');
|
||
}
|
||
}
|
||
initOssClient();
|
||
|
||
// ==================== 工具函数 ====================
|
||
|
||
function jsonResponse(res, data, status = 200, origin = '*') {
|
||
res.writeHead(status, {
|
||
'Content-Type': 'application/json',
|
||
'Access-Control-Allow-Origin': origin,
|
||
'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS',
|
||
'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key, X-Api-Key, X-MinerU-Key, X-Doc2X-Key, Authorization, x-goog-api-key, anthropic-version',
|
||
'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges',
|
||
});
|
||
res.end(JSON.stringify(data));
|
||
}
|
||
|
||
function handleCORS(res, origin = '*') {
|
||
res.writeHead(204, {
|
||
'Access-Control-Allow-Origin': origin,
|
||
'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS',
|
||
'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key, X-Api-Key, X-MinerU-Key, X-Doc2X-Key, Authorization, x-goog-api-key, anthropic-version',
|
||
'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges',
|
||
'Access-Control-Max-Age': '86400',
|
||
});
|
||
res.end();
|
||
}
|
||
|
||
/**
|
||
* 从环境变量获取 API Token(完全后端代理模式,不再接受前端传来的 Token)
|
||
* @param {Object} service - 服务名称 ('MINERU' 或 'DOC2X')
|
||
* @returns {string|null} - Token 或 null
|
||
*/
|
||
function getToken(service) {
|
||
const envKey = service === 'MINERU' ? 'MINERU_API_TOKEN' : 'DOC2X_API_TOKEN';
|
||
let token = process.env[envKey];
|
||
|
||
if (token) {
|
||
token = token.replace(/^Bearer\s+/i, '').trim();
|
||
const preview = token.length > 12
|
||
? `${token.substring(0, 6)}...${token.substring(token.length - 6)}`
|
||
: token;
|
||
console.log(`[${service}] Token from environment: ${preview}`);
|
||
} else {
|
||
console.warn(`[${service}] Token not found in environment variable ${envKey}`);
|
||
}
|
||
|
||
return token || null;
|
||
}
|
||
|
||
async function readBody(req) {
|
||
return new Promise((resolve, reject) => {
|
||
const chunks = [];
|
||
req.on('data', chunk => chunks.push(chunk));
|
||
req.on('end', () => resolve(Buffer.concat(chunks)));
|
||
req.on('error', reject);
|
||
});
|
||
}
|
||
|
||
// 简易 multipart 解析器
|
||
async function parseMultipart(req) {
|
||
const contentType = req.headers['content-type'] || '';
|
||
const boundaryMatch = contentType.match(/boundary=(?:"([^"]+)"|([^;]+))/);
|
||
if (!boundaryMatch) throw new Error('No boundary found');
|
||
|
||
const boundary = boundaryMatch[1] || boundaryMatch[2];
|
||
const body = await readBody(req);
|
||
const parts = [];
|
||
|
||
const boundaryBuffer = Buffer.from(`--${boundary}`);
|
||
const endBoundary = Buffer.from(`--${boundary}--`);
|
||
|
||
let start = body.indexOf(boundaryBuffer) + boundaryBuffer.length + 2; // skip \r\n
|
||
|
||
while (start < body.length) {
|
||
const nextBoundary = body.indexOf(boundaryBuffer, start);
|
||
if (nextBoundary === -1) break;
|
||
|
||
const partData = body.slice(start, nextBoundary - 2); // remove trailing \r\n
|
||
const headerEnd = partData.indexOf('\r\n\r\n');
|
||
|
||
if (headerEnd !== -1) {
|
||
const headerStr = partData.slice(0, headerEnd).toString();
|
||
const content = partData.slice(headerEnd + 4);
|
||
|
||
const nameMatch = headerStr.match(/name="([^"]+)"/);
|
||
const filenameMatch = headerStr.match(/filename="([^"]+)"/);
|
||
|
||
if (nameMatch) {
|
||
parts.push({
|
||
name: nameMatch[1],
|
||
filename: filenameMatch ? filenameMatch[1] : null,
|
||
data: filenameMatch ? content : content.toString()
|
||
});
|
||
}
|
||
}
|
||
|
||
start = nextBoundary + boundaryBuffer.length + 2;
|
||
}
|
||
|
||
return parts;
|
||
}
|
||
|
||
// ==================== MinerU 处理 ====================
|
||
|
||
// 日志工具函数
|
||
function logMinerU(level, message, data = null) {
|
||
const timestamp = new Date().toISOString();
|
||
const prefix = `[${timestamp}] [MinerU] [${level.toUpperCase()}]`;
|
||
if (data) {
|
||
console.log(`${prefix} ${message}`, typeof data === 'object' ? JSON.stringify(data, null, 2) : data);
|
||
} else {
|
||
console.log(`${prefix} ${message}`);
|
||
}
|
||
}
|
||
|
||
async function handleMinerUUpload(req, res, origin) {
|
||
const startTime = Date.now();
|
||
const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||
|
||
logMinerU('info', `[${requestId}] ====== 上传请求开始 ======`);
|
||
|
||
try {
|
||
// 1. 解析 multipart 表单
|
||
logMinerU('info', `[${requestId}] 步骤1: 解析 multipart 表单数据`);
|
||
const parseStart = Date.now();
|
||
const parts = await parseMultipart(req);
|
||
const parseTime = Date.now() - parseStart;
|
||
logMinerU('info', `[${requestId}] 表单解析完成`, { partsCount: parts.length, parseTimeMs: parseTime });
|
||
|
||
const filePart = parts.find(p => p.name === 'file');
|
||
|
||
if (!filePart || !filePart.filename) {
|
||
logMinerU('error', `[${requestId}] 错误: 未找到文件`, { parts: parts.map(p => ({ name: p.name, filename: p.filename })) });
|
||
return jsonResponse(res, { error: 'No file provided', requestId }, 400, origin);
|
||
}
|
||
|
||
logMinerU('info', `[${requestId}] 文件信息`, {
|
||
filename: filePart.filename,
|
||
size: filePart.data?.length || 0,
|
||
sizeReadable: `${((filePart.data?.length || 0) / 1024).toFixed(2)} KB`
|
||
});
|
||
|
||
// 2. 获取 Token
|
||
logMinerU('info', `[${requestId}] 步骤2: 获取 MinerU API Token`);
|
||
const token = getToken('MINERU');
|
||
if (!token) {
|
||
logMinerU('error', `[${requestId}] 错误: MinerU Token 未配置`);
|
||
return jsonResponse(res, {
|
||
error: 'MinerU API Token required. Configure MINERU_API_TOKEN in .env',
|
||
requestId
|
||
}, 401, origin);
|
||
}
|
||
logMinerU('info', `[${requestId}] Token 验证通过`);
|
||
|
||
// 3. 解析表单字段
|
||
logMinerU('info', `[${requestId}] 步骤3: 解析处理参数`);
|
||
const getField = (name, defaultVal) => {
|
||
const part = parts.find(p => p.name === name);
|
||
return part ? part.data : defaultVal;
|
||
};
|
||
|
||
const params = {
|
||
is_ocr: getField('is_ocr', 'true') !== 'false',
|
||
enable_formula: getField('enable_formula', 'true') !== 'false',
|
||
enable_table: getField('enable_table', 'true') !== 'false',
|
||
language: getField('language', 'ch'),
|
||
data_id: getField('data_id', null),
|
||
page_ranges: getField('page_ranges', null)
|
||
};
|
||
logMinerU('info', `[${requestId}] 处理参数`, params);
|
||
|
||
// 4. 申请上传链接
|
||
logMinerU('info', `[${requestId}] 步骤4: 向 MinerU API 申请上传链接`);
|
||
logMinerU('debug', `[${requestId}] 请求详情`, {
|
||
url: `${MINERU_BASE_URL}/file-urls/batch`,
|
||
method: 'POST',
|
||
body: {
|
||
enable_formula: params.enable_formula,
|
||
enable_table: params.enable_table,
|
||
language: params.language,
|
||
files: [{
|
||
name: filePart.filename,
|
||
is_ocr: params.is_ocr,
|
||
...(params.data_id && { data_id: params.data_id }),
|
||
...(params.page_ranges && { page_ranges: params.page_ranges })
|
||
}]
|
||
}
|
||
});
|
||
|
||
const apiStartTime = Date.now();
|
||
const uploadUrlResponse = await fetch(`${MINERU_BASE_URL}/file-urls/batch`, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Authorization': `Bearer ${token}`,
|
||
'Content-Type': 'application/json',
|
||
},
|
||
body: JSON.stringify({
|
||
enable_formula: params.enable_formula,
|
||
enable_table: params.enable_table,
|
||
language: params.language,
|
||
files: [{
|
||
name: filePart.filename,
|
||
is_ocr: params.is_ocr,
|
||
...(params.data_id && { data_id: params.data_id }),
|
||
...(params.page_ranges && { page_ranges: params.page_ranges }),
|
||
}],
|
||
}),
|
||
});
|
||
const apiTime = Date.now() - apiStartTime;
|
||
logMinerU('info', `[${requestId}] MinerU API 响应`, { status: uploadUrlResponse.status, statusText: uploadUrlResponse.statusText, responseTimeMs: apiTime });
|
||
|
||
if (!uploadUrlResponse.ok) {
|
||
const errorText = await uploadUrlResponse.text();
|
||
logMinerU('error', `[${requestId}] MinerU API 返回错误`, { status: uploadUrlResponse.status, error: errorText });
|
||
throw new Error(`MinerU申请上传链接失败: ${errorText}`);
|
||
}
|
||
|
||
const uploadData = await uploadUrlResponse.json();
|
||
logMinerU('debug', `[${requestId}] MinerU API 响应数据`, uploadData);
|
||
|
||
if (uploadData.code !== 0) {
|
||
logMinerU('error', `[${requestId}] MinerU API 业务错误`, { code: uploadData.code, msg: uploadData.msg });
|
||
throw new Error(`MinerU返回错误: ${uploadData.msg}`);
|
||
}
|
||
|
||
const batchId = uploadData.data.batch_id;
|
||
const ossUrl = uploadData.data.file_urls[0];
|
||
logMinerU('info', `[${requestId}] 获取上传链接成功`, { batchId, ossUrl: ossUrl?.substring(0, 50) + '...' });
|
||
|
||
// 5. 上传到 OSS
|
||
logMinerU('info', `[${requestId}] 步骤5: 上传文件到 OSS`);
|
||
logMinerU('info', `[${requestId}] OSS 上传详情`, { url: ossUrl?.substring(0, 60) + '...', fileSize: filePart.data.length });
|
||
|
||
const ossStartTime = Date.now();
|
||
const ossResponse = await fetch(ossUrl, {
|
||
method: 'PUT',
|
||
body: filePart.data,
|
||
headers: {
|
||
'Content-Length': filePart.data.length.toString(),
|
||
},
|
||
});
|
||
const ossTime = Date.now() - ossStartTime;
|
||
logMinerU('info', `[${requestId}] OSS 上传完成`, { status: ossResponse.status, uploadTimeMs: ossTime });
|
||
|
||
if (!ossResponse.ok) {
|
||
logMinerU('error', `[${requestId}] OSS 上传失败`, { status: ossResponse.status, statusText: ossResponse.statusText });
|
||
throw new Error(`OSS上传失败: ${ossResponse.status}`);
|
||
}
|
||
|
||
const totalTime = Date.now() - startTime;
|
||
logMinerU('info', `[${requestId}] ====== 上传请求完成 ======`, {
|
||
batchId,
|
||
filename: filePart.filename,
|
||
totalTimeMs: totalTime,
|
||
phases: {
|
||
parse: parseTime,
|
||
mineruApi: apiTime,
|
||
ossUpload: ossTime
|
||
}
|
||
});
|
||
|
||
jsonResponse(res, {
|
||
success: true,
|
||
batch_id: batchId,
|
||
file_name: filePart.filename,
|
||
service: 'mineru',
|
||
requestId,
|
||
timing: { totalMs: totalTime }
|
||
}, 200, origin);
|
||
|
||
} catch (error) {
|
||
const totalTime = Date.now() - startTime;
|
||
logMinerU('error', `[${requestId}] ====== 上传请求失败 ======`, {
|
||
error: error.message,
|
||
stack: error.stack,
|
||
totalTimeMs: totalTime
|
||
});
|
||
jsonResponse(res, { error: error.message, requestId }, 500, origin);
|
||
}
|
||
}
|
||
|
||
async function handleMinerUResult(req, res, batchId, origin) {
|
||
try {
|
||
if (batchId === '__health__') {
|
||
const token = getToken('MINERU');
|
||
if (!token) {
|
||
return jsonResponse(res, { error: 'MinerU API Token required' }, 401, origin);
|
||
}
|
||
return jsonResponse(res, { success: true, service: 'mineru', health: true, timestamp: Date.now() }, 200, origin);
|
||
}
|
||
|
||
const token = getToken('MINERU');
|
||
if (!token) {
|
||
return jsonResponse(res, { error: 'MinerU API Token required' }, 401, origin);
|
||
}
|
||
|
||
const response = await fetch(`${MINERU_BASE_URL}/extract-results/batch/${batchId}`, {
|
||
headers: {
|
||
'Authorization': `Bearer ${token}`,
|
||
'Accept': 'application/json',
|
||
},
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`MinerU查询失败: ${response.statusText}`);
|
||
}
|
||
|
||
const data = await response.json();
|
||
if (data.code !== 0) {
|
||
throw new Error(`MinerU返回错误: ${data.msg}`);
|
||
}
|
||
|
||
jsonResponse(res, { success: true, service: 'mineru', ...data.data }, 200, origin);
|
||
|
||
} catch (error) {
|
||
console.error('[MinerU] Result error:', error.message);
|
||
jsonResponse(res, { error: error.message }, 500, origin);
|
||
}
|
||
}
|
||
|
||
// ==================== Doc2X 处理 ====================
|
||
|
||
async function handleDoc2XUpload(req, res, origin) {
|
||
try {
|
||
const parts = await parseMultipart(req);
|
||
const filePart = parts.find(p => p.name === 'file');
|
||
|
||
if (!filePart || !filePart.filename) {
|
||
return jsonResponse(res, { error: 'No file provided' }, 400, origin);
|
||
}
|
||
|
||
const token = getToken('DOC2X');
|
||
if (!token) {
|
||
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
|
||
}
|
||
|
||
console.log(`[Doc2X] Uploading: ${filePart.filename}`);
|
||
|
||
// 请求预上传链接
|
||
const preuploadResponse = await fetch(`${DOC2X_BASE_URL}/api/v2/parse/preupload`, {
|
||
method: 'POST',
|
||
headers: { 'Authorization': `Bearer ${token}` },
|
||
});
|
||
|
||
if (!preuploadResponse.ok) {
|
||
throw new Error(`Doc2X预上传失败: ${await preuploadResponse.text()}`);
|
||
}
|
||
|
||
const preuploadData = await preuploadResponse.json();
|
||
if (preuploadData.code !== 'success') {
|
||
throw new Error(`Doc2X返回错误: ${preuploadData.msg}`);
|
||
}
|
||
|
||
const { uid, url: uploadUrl } = preuploadData.data;
|
||
|
||
// 上传到 OSS
|
||
console.log(`[Doc2X] Uploading to OSS: ${filePart.data.length} bytes`);
|
||
const ossResponse = await fetch(uploadUrl, {
|
||
method: 'PUT',
|
||
body: filePart.data,
|
||
headers: {
|
||
'Content-Length': filePart.data.length.toString(),
|
||
},
|
||
});
|
||
|
||
if (!ossResponse.ok) {
|
||
throw new Error(`Doc2X OSS上传失败: ${ossResponse.status}`);
|
||
}
|
||
|
||
jsonResponse(res, {
|
||
success: true,
|
||
uid,
|
||
file_name: filePart.filename,
|
||
service: 'doc2x'
|
||
}, 200, origin);
|
||
|
||
} catch (error) {
|
||
console.error('[Doc2X] Upload error:', error.message);
|
||
jsonResponse(res, { error: error.message }, 500, origin);
|
||
}
|
||
}
|
||
|
||
async function handleDoc2XStatus(req, res, uid, origin) {
|
||
try {
|
||
if (uid === '__health__') {
|
||
const token = getToken('DOC2X');
|
||
if (!token) {
|
||
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
|
||
}
|
||
return jsonResponse(res, { success: true, service: 'doc2x', health: true, timestamp: Date.now() }, 200, origin);
|
||
}
|
||
|
||
const token = getToken('DOC2X');
|
||
if (!token) {
|
||
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
|
||
}
|
||
|
||
const response = await fetch(`${DOC2X_BASE_URL}/api/v2/parse/status?uid=${uid}`, {
|
||
headers: { 'Authorization': `Bearer ${token}` },
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`Doc2X查询失败: ${response.statusText}`);
|
||
}
|
||
|
||
const data = await response.json();
|
||
if (data.code !== 'success') {
|
||
return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin);
|
||
}
|
||
|
||
jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin);
|
||
|
||
} catch (error) {
|
||
console.error('[Doc2X] Status error:', error.message);
|
||
jsonResponse(res, { error: error.message }, 500, origin);
|
||
}
|
||
}
|
||
|
||
async function handleDoc2XConvert(req, res, origin) {
|
||
try {
|
||
const body = JSON.parse((await readBody(req)).toString());
|
||
const { uid, to = 'md', formula_mode = 'normal', filename, merge_cross_page_forms = false } = body;
|
||
|
||
if (!uid) {
|
||
return jsonResponse(res, { error: 'uid is required' }, 400, origin);
|
||
}
|
||
|
||
const token = getToken('DOC2X');
|
||
if (!token) {
|
||
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
|
||
}
|
||
|
||
const response = await fetch(`${DOC2X_BASE_URL}/api/v2/convert/parse`, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Authorization': `Bearer ${token}`,
|
||
'Content-Type': 'application/json',
|
||
},
|
||
body: JSON.stringify({ uid, to, formula_mode, ...(filename && { filename }), merge_cross_page_forms }),
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`Doc2X转换失败: ${await response.text()}`);
|
||
}
|
||
|
||
const data = await response.json();
|
||
if (data.code !== 'success') {
|
||
return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin);
|
||
}
|
||
|
||
jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin);
|
||
|
||
} catch (error) {
|
||
console.error('[Doc2X] Convert error:', error.message);
|
||
jsonResponse(res, { error: error.message }, 500, origin);
|
||
}
|
||
}
|
||
|
||
async function handleDoc2XConvertResult(req, res, uid, origin) {
|
||
try {
|
||
const token = getToken('DOC2X');
|
||
if (!token) {
|
||
return jsonResponse(res, { error: 'Doc2X API Token required' }, 401, origin);
|
||
}
|
||
|
||
const response = await fetch(`${DOC2X_BASE_URL}/api/v2/convert/parse/result?uid=${uid}`, {
|
||
headers: { 'Authorization': `Bearer ${token}` },
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`Doc2X查询转换结果失败: ${response.statusText}`);
|
||
}
|
||
|
||
const data = await response.json();
|
||
if (data.code !== 'success') {
|
||
return jsonResponse(res, { success: false, service: 'doc2x', error: data.code, message: data.msg }, 200, origin);
|
||
}
|
||
|
||
jsonResponse(res, { success: true, service: 'doc2x', ...data.data }, 200, origin);
|
||
|
||
} catch (error) {
|
||
console.error('[Doc2X] Convert result error:', error.message);
|
||
jsonResponse(res, { error: error.message }, 500, origin);
|
||
}
|
||
}
|
||
|
||
// ==================== LLM & Mistral API 代理 ====================
|
||
|
||
async function handleLLMProxy(req, res, targetUrl, provider, origin) {
|
||
try {
|
||
const method = req.method;
|
||
|
||
// 根据 provider 获取对应的环境变量
|
||
const apiKeyEnvMap = {
|
||
'aliyun': 'DASHSCOPE_API_KEY',
|
||
'tongyi': 'DASHSCOPE_API_KEY',
|
||
'zhipu': 'ZHIPU_API_KEY',
|
||
'openai': 'OPENAI_API_KEY',
|
||
'deepseek': 'DEEPSEEK_API_KEY',
|
||
'anthropic': 'ANTHROPIC_API_KEY',
|
||
'gemini': 'GOOGLE_API_KEY',
|
||
'mistral': 'MISTRAL_API_KEY',
|
||
};
|
||
|
||
const envKeyName = apiKeyEnvMap[provider] || `${provider.toUpperCase()}_API_KEY`;
|
||
const apiKey = process.env[envKeyName];
|
||
|
||
const headers = {
|
||
'Content-Type': req.headers['content-type'] || 'application/json',
|
||
'Accept': req.headers['accept'] || '*/*'
|
||
};
|
||
|
||
if (apiKey) {
|
||
if (provider === 'anthropic') {
|
||
headers['x-api-key'] = apiKey;
|
||
headers['anthropic-version'] = req.headers['anthropic-version'] || '2023-06-01';
|
||
} else if (provider === 'gemini') {
|
||
headers['x-goog-api-key'] = apiKey;
|
||
} else {
|
||
// OpenAI 格式(包括阿里云百炼、智谱 AI 等)
|
||
headers['Authorization'] = `Bearer ${apiKey}`;
|
||
}
|
||
} else {
|
||
// 如果后端没有配置环境变量,则穿透使用前端传递的 Header
|
||
if (req.headers['authorization']) headers['Authorization'] = req.headers['authorization'];
|
||
if (req.headers['x-api-key']) headers['x-api-key'] = req.headers['x-api-key'];
|
||
if (req.headers['x-goog-api-key']) headers['x-goog-api-key'] = req.headers['x-goog-api-key'];
|
||
}
|
||
|
||
const options = { method, headers, redirect: 'follow' };
|
||
|
||
// 处理 POST/PUT 请求的 Body
|
||
if (method !== 'GET' && method !== 'HEAD') {
|
||
const bodyBuffer = await readBody(req);
|
||
if (bodyBuffer.length > 0) {
|
||
options.body = bodyBuffer;
|
||
// 支持 FormData 上传 (如 Mistral 文件上传)
|
||
if (headers['Content-Type'] && headers['Content-Type'].includes('multipart/form-data')) {
|
||
headers['Content-Type'] = req.headers['content-type']; // 保留 boundary
|
||
}
|
||
}
|
||
}
|
||
|
||
console.log(`[LLM Proxy] ${method} ${targetUrl} (Provider: ${provider})`);
|
||
|
||
const response = await fetch(targetUrl, options);
|
||
|
||
const contentType = response.headers.get('content-type') || '';
|
||
|
||
const responseHeaders = {
|
||
'Access-Control-Allow-Origin': origin,
|
||
'Access-Control-Allow-Methods': 'GET, HEAD, POST, PUT, DELETE, OPTIONS',
|
||
'Access-Control-Allow-Headers': '*',
|
||
'Access-Control-Expose-Headers': '*',
|
||
};
|
||
|
||
if (contentType) responseHeaders['Content-Type'] = contentType;
|
||
|
||
res.writeHead(response.status, responseHeaders);
|
||
|
||
// 对于文件流和 SSE 流式输出直接 pipe
|
||
response.body.pipe(res);
|
||
|
||
} catch (error) {
|
||
console.error(`[LLM Proxy] Error:`, error.message);
|
||
if (!res.headersSent) {
|
||
jsonResponse(res, { error: 'Proxy upstream error', message: error.message }, 503, origin);
|
||
} else {
|
||
res.end();
|
||
}
|
||
}
|
||
}
|
||
|
||
// ==================== 学术搜索代理 ====================
|
||
|
||
async function proxySemanticScholar(req, res, path, searchParams, origin) {
|
||
try {
|
||
const apiKey = req.headers['x-api-key'] || process.env.SEMANTIC_SCHOLAR_API_KEY;
|
||
const url = `https://api.semanticscholar.org/${path}?${searchParams}`;
|
||
|
||
console.log(`[Semantic Scholar] Proxying: ${url}`);
|
||
|
||
const headers = { 'User-Agent': 'PaperBurner-LocalProxy/1.0' };
|
||
if (apiKey) headers['x-api-key'] = apiKey;
|
||
|
||
const response = await fetch(url, { headers });
|
||
const data = await response.json();
|
||
jsonResponse(res, data, response.status, origin);
|
||
} catch (error) {
|
||
console.error('[Semantic Scholar] Error:', error.message);
|
||
jsonResponse(res, { error: 'Semantic Scholar upstream error', message: error.message }, 503, origin);
|
||
}
|
||
}
|
||
|
||
async function proxyPubMed(req, res, path, searchParams, origin) {
|
||
try {
|
||
const apiKey = req.headers['x-api-key'] || process.env.PUBMED_API_KEY;
|
||
const params = new URLSearchParams(searchParams);
|
||
if (apiKey) params.set('api_key', apiKey);
|
||
|
||
const url = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/${path}?${params}`;
|
||
|
||
console.log(`[PubMed] Proxying: ${url}`);
|
||
|
||
const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } });
|
||
const contentType = response.headers.get('content-type') || '';
|
||
const text = await response.text();
|
||
|
||
res.writeHead(response.status, {
|
||
'Content-Type': contentType.includes('xml') ? 'application/xml' : 'text/plain',
|
||
'Access-Control-Allow-Origin': origin,
|
||
});
|
||
res.end(text);
|
||
} catch (error) {
|
||
console.error('[PubMed] Error:', error.message);
|
||
jsonResponse(res, { error: 'PubMed upstream error', message: error.message }, 503, origin);
|
||
}
|
||
}
|
||
|
||
async function proxyCrossRef(req, res, path, searchParams, origin) {
|
||
try {
|
||
const url = `https://api.crossref.org/${path}?${searchParams}`;
|
||
console.log(`[CrossRef] Proxying: ${url}`);
|
||
|
||
const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } });
|
||
const data = await response.json();
|
||
jsonResponse(res, data, response.status, origin);
|
||
} catch (error) {
|
||
console.error('[CrossRef] Error:', error.message);
|
||
jsonResponse(res, { error: 'CrossRef upstream error', message: error.message }, 503, origin);
|
||
}
|
||
}
|
||
|
||
async function proxyOpenAlex(req, res, path, searchParams, origin) {
|
||
try {
|
||
const url = `https://api.openalex.org/${path}?${searchParams}`;
|
||
console.log(`[OpenAlex] Proxying: ${url}`);
|
||
|
||
const response = await fetch(url, { headers: { 'User-Agent': 'PaperBurner-LocalProxy/1.0' } });
|
||
const data = await response.json();
|
||
jsonResponse(res, data, response.status, origin);
|
||
} catch (error) {
|
||
console.error('[OpenAlex] Error:', error.message);
|
||
jsonResponse(res, { error: 'OpenAlex upstream error', message: error.message }, 503, origin);
|
||
}
|
||
}
|
||
|
||
async function proxyArXiv(req, res, path, searchParams, origin) {
|
||
try {
|
||
const url = `http://export.arxiv.org/api/${path}?${searchParams}`;
|
||
console.log(`[arXiv] Proxying: ${url}`);
|
||
|
||
const response = await fetch(url);
|
||
const text = await response.text();
|
||
|
||
res.writeHead(response.status, {
|
||
'Content-Type': 'application/xml',
|
||
'Access-Control-Allow-Origin': origin,
|
||
});
|
||
res.end(text);
|
||
} catch (error) {
|
||
console.error('[arXiv] Error:', error.message);
|
||
jsonResponse(res, { error: 'arXiv upstream error', message: error.message }, 503, origin);
|
||
}
|
||
}
|
||
|
||
// ==================== OSS 上传服务 ====================
|
||
|
||
async function handleOssUpload(req, res, origin) {
|
||
try {
|
||
if (!ossClient) {
|
||
return jsonResponse(res, { error: 'OSS is not configured on the server' }, 500, origin);
|
||
}
|
||
|
||
const parts = await parseMultipart(req);
|
||
const filePart = parts.find(p => p.name === 'file');
|
||
|
||
if (!filePart || !filePart.filename) {
|
||
return jsonResponse(res, { error: 'No file provided' }, 400, origin);
|
||
}
|
||
|
||
console.log(`[OSS Upload] Uploading: ${filePart.filename}`);
|
||
|
||
const ext = filePart.filename.split('.').pop() || 'pdf';
|
||
const objectName = `chat-pdfs/${Date.now()}_${Math.random().toString(36).substring(2, 7)}.${ext}`;
|
||
|
||
const result = await ossClient.put(objectName, Buffer.from(filePart.data));
|
||
|
||
console.log(`[OSS Upload] Upload success: ${result.url}`);
|
||
|
||
// If bucket is not public-read, you might need to generate a signature url
|
||
// For now we assume the bucket is configured as public readable for this use case,
|
||
// or the URL is sufficient for the downstream API.
|
||
// If needed: const signUrl = ossClient.signatureUrl(objectName, { expires: 3600 });
|
||
|
||
// Some regions use -internal or different endpoints, result.url usually gives the public url
|
||
let urlToReturn = result.url;
|
||
// ensure https
|
||
if (urlToReturn && urlToReturn.startsWith('http://')) {
|
||
urlToReturn = urlToReturn.replace('http://', 'https://');
|
||
}
|
||
|
||
jsonResponse(res, {
|
||
success: true,
|
||
url: urlToReturn,
|
||
key: objectName, // 返回 ossKey 用于后续删除
|
||
file_name: filePart.filename
|
||
}, 200, origin);
|
||
|
||
} catch (error) {
|
||
console.error('[OSS Upload] Error:', error.message);
|
||
jsonResponse(res, { error: error.message }, 500, origin);
|
||
}
|
||
}
|
||
|
||
// ==================== ZIP/PDF 代理 ====================
|
||
|
||
async function handleProxyDownload(req, res, downloadUrl, origin) {
|
||
try {
|
||
if (!downloadUrl) {
|
||
return jsonResponse(res, { error: 'url parameter is required' }, 400, origin);
|
||
}
|
||
|
||
const method = req.method || 'GET';
|
||
console.log(`[Proxy] ${method} ${downloadUrl}`);
|
||
|
||
const headers = { 'User-Agent': 'PaperBurner-LocalProxy/1.0' };
|
||
const rangeHeader = req.headers['range'];
|
||
if (rangeHeader) {
|
||
headers['Range'] = rangeHeader;
|
||
}
|
||
|
||
const response = await fetch(downloadUrl, { method, headers, redirect: 'follow' });
|
||
|
||
if (!response.ok && response.status !== 206) {
|
||
return jsonResponse(res, { error: `Upstream fetch failed: ${response.status}` }, 502, origin);
|
||
}
|
||
|
||
const contentLength = response.headers.get('Content-Length');
|
||
console.log(`[Proxy] ${method} response: ${response.status}, Content-Length: ${contentLength}`);
|
||
|
||
const responseHeaders = {
|
||
'Content-Type': response.headers.get('Content-Type') || 'application/octet-stream',
|
||
'Access-Control-Allow-Origin': origin,
|
||
'Access-Control-Allow-Methods': 'GET, HEAD, OPTIONS',
|
||
'Access-Control-Allow-Headers': 'Content-Type, Range, X-Auth-Key',
|
||
'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges',
|
||
'Cache-Control': 'no-store',
|
||
};
|
||
|
||
if (contentLength) {
|
||
responseHeaders['Content-Length'] = contentLength;
|
||
}
|
||
if (response.headers.get('Content-Range')) {
|
||
responseHeaders['Content-Range'] = response.headers.get('Content-Range');
|
||
}
|
||
if (response.headers.get('Accept-Ranges')) {
|
||
responseHeaders['Accept-Ranges'] = response.headers.get('Accept-Ranges');
|
||
}
|
||
|
||
// HEAD 请求不返回 body
|
||
if (method === 'HEAD') {
|
||
res.writeHead(response.status, responseHeaders);
|
||
return res.end();
|
||
}
|
||
|
||
// 对于 GET 请求,使用简单的方式:先读取全部数据再发送
|
||
// 这样更可靠,虽然会占用更多内存
|
||
console.log(`[Proxy] Reading response body...`);
|
||
const arrayBuffer = await response.arrayBuffer();
|
||
const buffer = Buffer.from(arrayBuffer);
|
||
console.log(`[Proxy] Got ${buffer.length} bytes, sending to client...`);
|
||
|
||
// 更新实际的 Content-Length
|
||
responseHeaders['Content-Length'] = buffer.length.toString();
|
||
|
||
res.writeHead(response.status, responseHeaders);
|
||
res.end(buffer);
|
||
console.log(`[Proxy] Done sending ${buffer.length} bytes`);
|
||
|
||
} catch (error) {
|
||
console.error('[Proxy] Download error:', error.message);
|
||
// 只有在还没发送响应头时才返回错误
|
||
if (!res.headersSent) {
|
||
jsonResponse(res, { error: error.message }, 500, origin);
|
||
} else {
|
||
res.end();
|
||
}
|
||
}
|
||
}
|
||
|
||
// ==================== 主路由 ====================
|
||
|
||
const server = http.createServer(async (req, res) => {
|
||
const url = new URL(req.url, `http://${req.headers.host}`);
|
||
const pathname = url.pathname;
|
||
const searchParams = url.searchParams.toString();
|
||
const origin = req.headers.origin || '*';
|
||
|
||
// CORS 预检
|
||
if (req.method === 'OPTIONS') {
|
||
return handleCORS(res, origin);
|
||
}
|
||
|
||
try {
|
||
// ===== OCR 路由 (兼容 CF Worker) =====
|
||
|
||
// MinerU
|
||
if (pathname === '/mineru/upload' && req.method === 'POST') {
|
||
return await handleMinerUUpload(req, res, origin);
|
||
}
|
||
if (pathname.startsWith('/mineru/result/') && req.method === 'GET') {
|
||
const batchId = pathname.split('/mineru/result/')[1];
|
||
return await handleMinerUResult(req, res, batchId, origin);
|
||
}
|
||
|
||
// Doc2X
|
||
if (pathname === '/doc2x/upload' && req.method === 'POST') {
|
||
return await handleDoc2XUpload(req, res, origin);
|
||
}
|
||
if (pathname.startsWith('/doc2x/status/') && req.method === 'GET') {
|
||
const uid = pathname.split('/doc2x/status/')[1];
|
||
return await handleDoc2XStatus(req, res, uid, origin);
|
||
}
|
||
if (pathname === '/doc2x/convert' && req.method === 'POST') {
|
||
return await handleDoc2XConvert(req, res, origin);
|
||
}
|
||
if (pathname.startsWith('/doc2x/convert/result/') && req.method === 'GET') {
|
||
const uid = pathname.split('/doc2x/convert/result/')[1];
|
||
return await handleDoc2XConvertResult(req, res, uid, origin);
|
||
}
|
||
|
||
// ZIP 代理
|
||
if ((pathname === '/mineru/zip' || pathname === '/doc2x/zip') && (req.method === 'GET' || req.method === 'HEAD')) {
|
||
const zipUrl = url.searchParams.get('url');
|
||
return await handleProxyDownload(req, res, zipUrl, origin);
|
||
}
|
||
|
||
// ===== LLM & OCR 代理 =====
|
||
|
||
if (pathname.startsWith('/api/llm/')) {
|
||
const pathParts = pathname.replace('/api/llm/', '').split('/');
|
||
const provider = pathParts[0];
|
||
const restPath = pathParts.slice(1).join('/');
|
||
|
||
let targetBaseUrl = '';
|
||
if (provider === 'openai') targetBaseUrl = 'https://api.openai.com';
|
||
else if (provider === 'deepseek') targetBaseUrl = 'https://api.deepseek.com';
|
||
else if (provider === 'anthropic') targetBaseUrl = 'https://api.anthropic.com';
|
||
else if (provider === 'gemini') targetBaseUrl = 'https://generativelanguage.googleapis.com';
|
||
else if (provider === 'mistral') targetBaseUrl = 'https://api.mistral.ai';
|
||
else if (provider === 'aliyun' || provider === 'tongyi') targetBaseUrl = 'https://dashscope.aliyuncs.com/compatible-mode';
|
||
else if (provider === 'zhipu') targetBaseUrl = 'https://open.bigmodel.cn/api/paas/v4';
|
||
else return jsonResponse(res, { error: 'Unknown LLM provider' }, 400, origin);
|
||
|
||
// 智谱 API 使用 v4 而非 v1,需要去除
|
||
let adjustedPath = restPath;
|
||
if (provider === 'zhipu' && adjustedPath.startsWith('v1/')) {
|
||
adjustedPath = adjustedPath.replace('v1/', '');
|
||
}
|
||
|
||
let targetUrl = `${targetBaseUrl}/${adjustedPath}`;
|
||
|
||
if (searchParams) targetUrl += `?${searchParams}`;
|
||
|
||
return await handleLLMProxy(req, res, targetUrl, provider, origin);
|
||
}
|
||
|
||
if (pathname.startsWith('/api/mistral/')) {
|
||
// 代理 mistral ocr 服务商的直接转发
|
||
const restPath = pathname.replace('/api/mistral/', '');
|
||
let targetUrl = `https://api.mistral.ai/${restPath}`;
|
||
if (searchParams) targetUrl += `?${searchParams}`;
|
||
|
||
return await handleLLMProxy(req, res, targetUrl, 'mistral', origin);
|
||
}
|
||
|
||
// 如果是前端未更新 API 路径,直接发往 /v1/files 等 Mistral 官方路径但指向了 localhost
|
||
if (pathname.startsWith('/v1/')) {
|
||
let targetUrl = `https://api.mistral.ai${pathname}`;
|
||
if (searchParams) targetUrl += `?${searchParams}`;
|
||
return await handleLLMProxy(req, res, targetUrl, 'mistral', origin);
|
||
}
|
||
|
||
// ===== OSS 上传 =====
|
||
if (pathname === '/api/upload/oss' && req.method === 'POST') {
|
||
return await handleOssUpload(req, res, origin);
|
||
}
|
||
|
||
// ===== 学术搜索路由 (兼容 CF Worker) =====
|
||
|
||
if (pathname.startsWith('/api/semanticscholar/')) {
|
||
const path = pathname.replace('/api/semanticscholar/', '');
|
||
return await proxySemanticScholar(req, res, path, searchParams, origin);
|
||
}
|
||
|
||
if (pathname.startsWith('/api/pubmed/')) {
|
||
const path = pathname.replace('/api/pubmed/', '');
|
||
return await proxyPubMed(req, res, path, searchParams, origin);
|
||
}
|
||
|
||
if (pathname.startsWith('/api/crossref/')) {
|
||
const path = pathname.replace('/api/crossref/', '');
|
||
return await proxyCrossRef(req, res, path, searchParams, origin);
|
||
}
|
||
|
||
if (pathname.startsWith('/api/openalex/')) {
|
||
const path = pathname.replace('/api/openalex/', '');
|
||
return await proxyOpenAlex(req, res, path, searchParams, origin);
|
||
}
|
||
|
||
if (pathname.startsWith('/api/arxiv/')) {
|
||
const path = pathname.replace('/api/arxiv/', '');
|
||
return await proxyArXiv(req, res, path, searchParams, origin);
|
||
}
|
||
|
||
// ===== 本地文件读取接口 =====
|
||
if (pathname === '/api/local/read-first-input' && req.method === 'GET') {
|
||
try {
|
||
const inputDir = join(dirname(__dirname), 'input');
|
||
if (!existsSync(inputDir)) {
|
||
return jsonResponse(res, { error: 'Input directory not found' }, 404, origin);
|
||
}
|
||
|
||
const fs = await import('fs/promises');
|
||
const files = await fs.readdir(inputDir);
|
||
// 过滤支持的文件格式 (pdf, md, txt 等)
|
||
const supportedExts = ['.pdf', '.md', '.txt', '.png', '.jpg', '.jpeg'];
|
||
const validFiles = files.filter(f => supportedExts.some(ext => f.toLowerCase().endsWith(ext)));
|
||
|
||
if (validFiles.length === 0) {
|
||
return jsonResponse(res, { error: 'No valid files found in input directory' }, 404, origin);
|
||
}
|
||
|
||
// 取第一个文件
|
||
const firstFile = validFiles[0];
|
||
const filePath = join(inputDir, firstFile);
|
||
const stats = await fs.stat(filePath);
|
||
|
||
// 设置类型
|
||
const ext = firstFile.toLowerCase().split('.').pop();
|
||
const mimeTypes = {
|
||
'pdf': 'application/pdf',
|
||
'md': 'text/markdown',
|
||
'txt': 'text/plain',
|
||
'png': 'image/png',
|
||
'jpg': 'image/jpeg',
|
||
'jpeg': 'image/jpeg'
|
||
};
|
||
const contentType = mimeTypes[ext] || 'application/octet-stream';
|
||
|
||
// 设置响应头并使用流发送
|
||
res.writeHead(200, {
|
||
'Content-Type': contentType,
|
||
'Content-Length': stats.size.toString(),
|
||
'Content-Disposition': `attachment; filename="${encodeURIComponent(firstFile)}"`,
|
||
'Access-Control-Allow-Origin': origin,
|
||
'Access-Control-Allow-Methods': 'GET, HEAD, OPTIONS',
|
||
'Access-Control-Expose-Headers': 'Content-Disposition, Content-Length'
|
||
});
|
||
|
||
const readStream = createReadStream(filePath);
|
||
readStream.pipe(res);
|
||
return; // Stream 将接管响应
|
||
} catch (err) {
|
||
console.error('[Local API] Error reading input:', err);
|
||
return jsonResponse(res, { error: 'Failed to read input files' }, 500, origin);
|
||
}
|
||
}
|
||
|
||
// PDF 下载代理
|
||
if (pathname === '/api/pdf/download' && (req.method === 'GET' || req.method === 'HEAD')) {
|
||
const pdfUrl = url.searchParams.get('url');
|
||
return await handleProxyDownload(req, res, pdfUrl, origin);
|
||
}
|
||
|
||
// ===== 健康检查(兼容两个路径)=====
|
||
if (pathname === '/health' || pathname === '/api/health') {
|
||
return jsonResponse(res, {
|
||
status: 'ok',
|
||
timestamp: Date.now(),
|
||
version: '1.0.0',
|
||
database: dbConnected ? 'connected' : 'not_configured',
|
||
services: {
|
||
ocr: {
|
||
mineru: { enabled: true, hasToken: !!process.env.MINERU_API_TOKEN },
|
||
doc2x: { enabled: true, hasToken: !!process.env.DOC2X_API_TOKEN },
|
||
},
|
||
academic: {
|
||
semanticscholar: { enabled: true, hasApiKey: !!process.env.SEMANTIC_SCHOLAR_API_KEY },
|
||
pubmed: { enabled: true, hasApiKey: !!process.env.PUBMED_API_KEY },
|
||
crossref: { enabled: true },
|
||
openalex: { enabled: true },
|
||
arxiv: { enabled: true },
|
||
}
|
||
}
|
||
}, 200, origin);
|
||
}
|
||
|
||
// ===== 持久化 API(交给 Express 处理)=====
|
||
// 匹配 /api/documents, /api/user, /api/glossary, /api/chat, /api/references, /api/prompt-pool
|
||
const persistentApiPrefixes = [
|
||
'/api/documents',
|
||
'/api/user',
|
||
'/api/glossary',
|
||
'/api/chat',
|
||
'/api/references',
|
||
'/api/prompt-pool'
|
||
];
|
||
|
||
if (persistentApiPrefixes.some(prefix => pathname.startsWith(prefix))) {
|
||
// 交给 Express 应用处理
|
||
return app(req, res);
|
||
}
|
||
|
||
// 404
|
||
if (!res.headersSent) {
|
||
jsonResponse(res, { error: 'Not Found' }, 404, origin);
|
||
}
|
||
|
||
} catch (error) {
|
||
console.error('Server error:', error);
|
||
if (!res.headersSent) {
|
||
jsonResponse(res, { error: error.message || 'Internal Server Error' }, 500, origin);
|
||
}
|
||
}
|
||
});
|
||
|
||
// ==================== 启动服务器 ====================
|
||
|
||
// 设置服务器超时时间(5分钟,用于大文件传输)
|
||
server.timeout = 300000;
|
||
server.keepAliveTimeout = 120000;
|
||
server.headersTimeout = 120000;
|
||
|
||
server.listen(PORT, async () => {
|
||
// 等待数据库初始化完成
|
||
await new Promise(resolve => setTimeout(resolve, 500));
|
||
|
||
console.log(`
|
||
╔═══════════════════════════════════════════════════════╗
|
||
║ Paper Burner Local Proxy Server ║
|
||
╠═══════════════════════════════════════════════════════╣
|
||
║ Port: ${PORT.toString().padEnd(47)} ║
|
||
║ URL: http://localhost:${PORT.toString().padEnd(30)} ║
|
||
╠═══════════════════════════════════════════════════════╣
|
||
║ Database: ${(dbConnected ? '✓ PostgreSQL Connected' : '✗ Not configured').padEnd(38)} ║
|
||
║ Auth: ${(process.env.AUTH_CHECK_URL ? '✓ External' : '✓ Default (xueai.art)').padEnd(38)} ║
|
||
╠═══════════════════════════════════════════════════════╣
|
||
║ LLM Providers: ║
|
||
║ Zhipu AI: ${(process.env.ZHIPU_API_KEY ? '✓ ' + process.env.ZHIPU_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)} ║
|
||
║ Aliyun: ${(process.env.DASHSCOPE_API_KEY ? '✓ ' + process.env.DASHSCOPE_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)} ║
|
||
║ ║
|
||
║ OCR Services: ║
|
||
║ MinerU Token: ${(process.env.MINERU_API_TOKEN ? '✓ Configured' : '✗ Not set').padEnd(36)} ║
|
||
║ Doc2X Token: ${(process.env.DOC2X_API_TOKEN ? '✓ Configured' : '✗ Not set').padEnd(36)} ║
|
||
║ Mistral Key: ${(process.env.MISTRAL_API_KEY ? '✓ ' + process.env.MISTRAL_API_KEY.substring(0,6) + '***' : '✗ Not set').padEnd(36)} ║
|
||
║ ║
|
||
║ Academic Search: ║
|
||
║ Semantic Scholar, PubMed, CrossRef, ║
|
||
║ OpenAlex, arXiv ║
|
||
║ ║
|
||
║ OSS Upload: ║
|
||
║ Configured: ${((process.env.OSS_BUCKET || process.env.OSS_BUCKET_NAME) && process.env.OSS_ACCESS_KEY_ID ? '✓ Yes' : '✗ No').padEnd(36)} ║
|
||
╠═══════════════════════════════════════════════════════╣
|
||
║ Persistent API: ║
|
||
║ /api/documents, /api/user, /api/glossary ║
|
||
║ /api/chat, /api/references, /api/prompt-pool ║
|
||
╠═══════════════════════════════════════════════════════╣
|
||
║ 在 Paper Burner 中设置代理地址为: ║
|
||
║ http://localhost:${PORT.toString().padEnd(38)} ║
|
||
╚═══════════════════════════════════════════════════════╝
|
||
`);
|
||
});
|