paper-burner/local-proxy/routes/documents.js

472 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 文档路由
* 复用 server/src/routes/document.js 的逻辑
*/
import express from 'express';
import { prisma } from '../db/client.js';
const router = express.Router();
// 允许的状态值白名单
const ALLOWED_STATUSES = ['PENDING', 'PROCESSING', 'COMPLETED', 'FAILED'];
// ==================== 文档 CRUD ====================
// 获取文档列表
router.get('/', async (req, res, next) => {
try {
const { page = 1, limit = 20, status } = req.query;
const where = {
userId: req.user.id,
...(status && ALLOWED_STATUSES.includes(status) && { status })
};
const pageNum = Math.max(parseInt(page) || 1, 1);
const limitNum = Math.min(Math.max(parseInt(limit) || 20, 1), 100);
const documents = await prisma.document.findMany({
where,
orderBy: { createdAt: 'desc' },
skip: (pageNum - 1) * limitNum,
take: limitNum,
select: {
id: true,
fileName: true,
fileSize: true,
fileType: true,
status: true,
ocrProvider: true,
ocrText: true,
translationModel: true,
translatedText: true,
processingTime: true,
createdAt: true,
updatedAt: true,
metadata: true,
ocrMetadata: true,
translationMetadata: true,
summary: true,
toc: true
}
});
const total = await prisma.document.count({ where });
// 字段映射:适配前端期望的字段名,并从 metadata 中提取嵌套字段
const mappedDocs = documents.map(doc => {
const meta = doc.metadata || {};
return {
...doc,
name: doc.fileName,
size: doc.fileSize,
time: doc.createdAt,
ocr: doc.ocrText,
translation: doc.translatedText,
ocrEngine: doc.ocrProvider,
translationModelName: doc.translationModel,
// 从 metadata 中提取的字段
ocrChunks: meta.ocrChunks || [],
translatedChunks: meta.translatedChunks || [],
images: meta.images || [],
metadata: meta
};
});
res.json({
documents: mappedDocs,
pagination: {
page: pageNum,
limit: limitNum,
total,
totalPages: Math.ceil(total / limitNum)
}
});
} catch (error) {
next(error);
}
});
// 获取单个文档详情
router.get('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const document = await prisma.document.findFirst({
where: {
id,
userId: req.user.id
},
include: {
annotations: true,
semanticGroups: true
}
});
if (!document) {
return res.status(404).json({ error: 'Document not found' });
}
// 字段映射:适配前端期望的字段名,并从 metadata 中提取嵌套字段
const meta = document.metadata || {};
const mappedDoc = {
...document,
name: document.fileName,
size: document.fileSize,
time: document.createdAt,
ocr: document.ocrText,
translation: document.translatedText,
ocrEngine: document.ocrProvider,
translationModelName: document.translationModel,
// 从 metadata 中提取的字段
ocrChunks: meta.ocrChunks || [],
translatedChunks: meta.translatedChunks || [],
images: meta.images || [],
metadata: meta
};
res.json(mappedDoc);
} catch (error) {
next(error);
}
});
// 创建文档记录
router.post('/', async (req, res, next) => {
try {
const body = { ...req.body };
// 前端字段映射name -> fileName, size -> fileSize
if (body.name && !body.fileName) {
body.fileName = body.name;
}
if (body.size && !body.fileSize) {
body.fileSize = body.size;
}
// Schema 中定义的字段
const schemaFields = {
fileName: body.fileName || body.name,
fileSize: body.fileSize || body.size,
fileType: body.fileType,
filePath: body.filePath,
status: body.status || 'PENDING',
ocrProvider: body.ocrProvider || body.ocrEngine,
ocrText: body.ocrText || body.ocr,
ocrMetadata: body.ocrMetadata,
translationModel: body.translationModel || body.translationModelName,
translatedText: body.translatedText || body.translation,
translationMetadata: body.translationMetadata,
summary: body.summary,
toc: body.toc,
processingTime: body.processingTime,
errorMessage: body.errorMessage
};
// 其他字段保存到 metadata
const metadataFields = {};
const knownFields = [
'fileName', 'name', 'fileSize', 'size', 'fileType', 'filePath', 'status',
'ocrProvider', 'ocrText', 'ocr', 'ocrMetadata', 'ocrEngine', 'ocrSource',
'translationModel', 'translatedText', 'translation', 'translationMetadata',
'translationModelName', 'summary', 'toc', 'processingTime', 'errorMessage',
'id', 'userId', 'createdAt', 'updatedAt',
'metadata' // 添加 metadata 到已知字段,避免嵌套保存
];
for (const [key, value] of Object.entries(body)) {
if (!knownFields.includes(key) && value !== undefined) {
metadataFields[key] = value;
}
}
// 合并原有 metadata 的内容(而不是保存 metadata 字段本身)
if (body.metadata && typeof body.metadata === 'object') {
Object.assign(metadataFields, body.metadata);
}
schemaFields.metadata = Object.keys(metadataFields).length > 0 ? metadataFields : null;
// 移除 undefined 字段,并排除前端传入的 id使用数据库自动生成的 UUID
const cleanData = {};
for (const [key, value] of Object.entries(schemaFields)) {
if (value !== undefined && key !== 'id') {
cleanData[key] = value;
}
}
const document = await prisma.document.create({
data: {
userId: req.user.id,
...cleanData
}
});
// 返回时添加前端需要的字段
const responseData = {
...document,
name: document.fileName,
size: document.fileSize
};
res.status(201).json(responseData);
} catch (error) {
next(error);
}
});
// 更新文档
router.put('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const body = { ...req.body };
// 字段映射:前端字段名 -> 数据库字段名
const fieldMapping = {
name: 'fileName',
size: 'fileSize',
ocr: 'ocrText',
translation: 'translatedText',
ocrEngine: 'ocrProvider',
translationModelName: 'translationModel'
};
// 应用字段映射
for (const [frontendField, dbField] of Object.entries(fieldMapping)) {
if (body[frontendField] !== undefined && body[dbField] === undefined) {
body[dbField] = body[frontendField];
}
}
// 提取已知字段,其余放入 metadata
const knownFields = [
'fileName', 'fileSize', 'fileType', 'filePath', 'status',
'ocrProvider', 'ocrText', 'ocrMetadata',
'translationModel', 'translatedText', 'translationMetadata',
'summary', 'toc', 'processingTime', 'errorMessage'
];
const updateData = {};
const metadataUpdate = {};
for (const [key, value] of Object.entries(body)) {
if (key === 'id' || key === 'userId' || key === 'createdAt' || key === 'updatedAt') {
continue; // 跳过只读字段
}
if (knownFields.includes(key)) {
updateData[key] = value;
} else if (key === 'metadata') {
// 合并 metadata
metadataUpdate['metadata'] = value;
} else if (!key.startsWith('_')) {
// 其他字段放入 metadata排除内部字段
metadataUpdate[key] = value;
}
}
// 如果有需要放入 metadata 的字段,需要先读取现有 metadata 再合并
if (Object.keys(metadataUpdate).length > 0) {
const existingDoc = await prisma.document.findFirst({
where: { id, userId: req.user.id },
select: { metadata: true }
});
if (existingDoc) {
const existingMeta = existingDoc.metadata || {};
const newMeta = { ...existingMeta };
for (const [key, value] of Object.entries(metadataUpdate)) {
if (key === 'metadata' && typeof value === 'object') {
Object.assign(newMeta, value);
} else {
newMeta[key] = value;
}
}
updateData.metadata = newMeta;
}
}
await prisma.document.updateMany({
where: {
id,
userId: req.user.id
},
data: updateData
});
res.json({ success: true });
} catch (error) {
next(error);
}
});
// 删除文档
router.delete('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const document = await prisma.document.findFirst({
where: {
id,
userId: req.user.id
}
});
if (document) {
await prisma.document.delete({
where: { id }
});
}
res.json({ success: true });
} catch (error) {
next(error);
}
});
// ==================== 标注管理 ====================
// 保存标注
router.post('/:id/annotations', async (req, res, next) => {
try {
const { id } = req.params;
const annotation = await prisma.annotation.create({
data: {
userId: req.user.id,
documentId: id,
...req.body
}
});
res.status(201).json(annotation);
} catch (error) {
next(error);
}
});
// 获取文档的所有标注
router.get('/:id/annotations', async (req, res, next) => {
try {
const { id } = req.params;
const annotations = await prisma.annotation.findMany({
where: {
documentId: id,
userId: req.user.id
}
});
res.json(annotations);
} catch (error) {
next(error);
}
});
// 更新标注
router.put('/:documentId/annotations/:annotationId', async (req, res, next) => {
try {
const { documentId, annotationId } = req.params;
await prisma.annotation.updateMany({
where: {
id: annotationId,
documentId,
userId: req.user.id
},
data: req.body
});
res.json({ success: true });
} catch (error) {
next(error);
}
});
// 删除标注
router.delete('/:documentId/annotations/:annotationId', async (req, res, next) => {
try {
const { documentId, annotationId } = req.params;
await prisma.annotation.deleteMany({
where: {
id: annotationId,
documentId,
userId: req.user.id
}
});
res.json({ success: true });
} catch (error) {
next(error);
}
});
// ==================== 意群数据 ====================
// 保存意群数据
router.post('/:id/semantic-groups', async (req, res, next) => {
try {
const { id } = req.params;
const { groups, version, source } = req.body;
// 验证文档所有权
const document = await prisma.document.findFirst({
where: {
id,
userId: req.user.id
}
});
if (!document) {
return res.status(404).json({ error: 'Document not found' });
}
const semanticGroup = await prisma.semanticGroup.upsert({
where: { documentId: id },
update: { groups, version, source },
create: {
documentId: id,
groups,
version,
source
}
});
res.json(semanticGroup);
} catch (error) {
next(error);
}
});
// 获取意群数据
router.get('/:id/semantic-groups', async (req, res, next) => {
try {
const { id } = req.params;
// 验证文档所有权
const document = await prisma.document.findFirst({
where: {
id,
userId: req.user.id
}
});
if (!document) {
return res.status(404).json({ error: 'Document not found' });
}
const semanticGroup = await prisma.semanticGroup.findUnique({
where: { documentId: id }
});
if (!semanticGroup) {
return res.status(404).json({ error: 'Semantic groups not found' });
}
res.json(semanticGroup);
} catch (error) {
next(error);
}
});
export default router;