472 lines
12 KiB
JavaScript
472 lines
12 KiB
JavaScript
/**
|
||
* 文档路由
|
||
* 复用 server/src/routes/document.js 的逻辑
|
||
*/
|
||
|
||
import express from 'express';
|
||
import { prisma } from '../db/client.js';
|
||
|
||
const router = express.Router();
|
||
|
||
// 允许的状态值白名单
|
||
const ALLOWED_STATUSES = ['PENDING', 'PROCESSING', 'COMPLETED', 'FAILED'];
|
||
|
||
// ==================== 文档 CRUD ====================
|
||
|
||
// 获取文档列表
|
||
router.get('/', async (req, res, next) => {
|
||
try {
|
||
const { page = 1, limit = 20, status } = req.query;
|
||
|
||
const where = {
|
||
userId: req.user.id,
|
||
...(status && ALLOWED_STATUSES.includes(status) && { status })
|
||
};
|
||
|
||
const pageNum = Math.max(parseInt(page) || 1, 1);
|
||
const limitNum = Math.min(Math.max(parseInt(limit) || 20, 1), 100);
|
||
|
||
const documents = await prisma.document.findMany({
|
||
where,
|
||
orderBy: { createdAt: 'desc' },
|
||
skip: (pageNum - 1) * limitNum,
|
||
take: limitNum,
|
||
select: {
|
||
id: true,
|
||
fileName: true,
|
||
fileSize: true,
|
||
fileType: true,
|
||
status: true,
|
||
ocrProvider: true,
|
||
ocrText: true,
|
||
translationModel: true,
|
||
translatedText: true,
|
||
processingTime: true,
|
||
createdAt: true,
|
||
updatedAt: true,
|
||
metadata: true,
|
||
ocrMetadata: true,
|
||
translationMetadata: true,
|
||
summary: true,
|
||
toc: true
|
||
}
|
||
});
|
||
|
||
const total = await prisma.document.count({ where });
|
||
|
||
// 字段映射:适配前端期望的字段名,并从 metadata 中提取嵌套字段
|
||
const mappedDocs = documents.map(doc => {
|
||
const meta = doc.metadata || {};
|
||
return {
|
||
...doc,
|
||
name: doc.fileName,
|
||
size: doc.fileSize,
|
||
time: doc.createdAt,
|
||
ocr: doc.ocrText,
|
||
translation: doc.translatedText,
|
||
ocrEngine: doc.ocrProvider,
|
||
translationModelName: doc.translationModel,
|
||
// 从 metadata 中提取的字段
|
||
ocrChunks: meta.ocrChunks || [],
|
||
translatedChunks: meta.translatedChunks || [],
|
||
images: meta.images || [],
|
||
metadata: meta
|
||
};
|
||
});
|
||
|
||
res.json({
|
||
documents: mappedDocs,
|
||
pagination: {
|
||
page: pageNum,
|
||
limit: limitNum,
|
||
total,
|
||
totalPages: Math.ceil(total / limitNum)
|
||
}
|
||
});
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 获取单个文档详情
|
||
router.get('/:id', async (req, res, next) => {
|
||
try {
|
||
const { id } = req.params;
|
||
|
||
const document = await prisma.document.findFirst({
|
||
where: {
|
||
id,
|
||
userId: req.user.id
|
||
},
|
||
include: {
|
||
annotations: true,
|
||
semanticGroups: true
|
||
}
|
||
});
|
||
|
||
if (!document) {
|
||
return res.status(404).json({ error: 'Document not found' });
|
||
}
|
||
|
||
// 字段映射:适配前端期望的字段名,并从 metadata 中提取嵌套字段
|
||
const meta = document.metadata || {};
|
||
const mappedDoc = {
|
||
...document,
|
||
name: document.fileName,
|
||
size: document.fileSize,
|
||
time: document.createdAt,
|
||
ocr: document.ocrText,
|
||
translation: document.translatedText,
|
||
ocrEngine: document.ocrProvider,
|
||
translationModelName: document.translationModel,
|
||
// 从 metadata 中提取的字段
|
||
ocrChunks: meta.ocrChunks || [],
|
||
translatedChunks: meta.translatedChunks || [],
|
||
images: meta.images || [],
|
||
metadata: meta
|
||
};
|
||
|
||
res.json(mappedDoc);
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 创建文档记录
|
||
router.post('/', async (req, res, next) => {
|
||
try {
|
||
const body = { ...req.body };
|
||
|
||
// 前端字段映射:name -> fileName, size -> fileSize
|
||
if (body.name && !body.fileName) {
|
||
body.fileName = body.name;
|
||
}
|
||
if (body.size && !body.fileSize) {
|
||
body.fileSize = body.size;
|
||
}
|
||
|
||
// Schema 中定义的字段
|
||
const schemaFields = {
|
||
fileName: body.fileName || body.name,
|
||
fileSize: body.fileSize || body.size,
|
||
fileType: body.fileType,
|
||
filePath: body.filePath,
|
||
status: body.status || 'PENDING',
|
||
ocrProvider: body.ocrProvider || body.ocrEngine,
|
||
ocrText: body.ocrText || body.ocr,
|
||
ocrMetadata: body.ocrMetadata,
|
||
translationModel: body.translationModel || body.translationModelName,
|
||
translatedText: body.translatedText || body.translation,
|
||
translationMetadata: body.translationMetadata,
|
||
summary: body.summary,
|
||
toc: body.toc,
|
||
processingTime: body.processingTime,
|
||
errorMessage: body.errorMessage
|
||
};
|
||
|
||
// 其他字段保存到 metadata
|
||
const metadataFields = {};
|
||
const knownFields = [
|
||
'fileName', 'name', 'fileSize', 'size', 'fileType', 'filePath', 'status',
|
||
'ocrProvider', 'ocrText', 'ocr', 'ocrMetadata', 'ocrEngine', 'ocrSource',
|
||
'translationModel', 'translatedText', 'translation', 'translationMetadata',
|
||
'translationModelName', 'summary', 'toc', 'processingTime', 'errorMessage',
|
||
'id', 'userId', 'createdAt', 'updatedAt',
|
||
'metadata' // 添加 metadata 到已知字段,避免嵌套保存
|
||
];
|
||
for (const [key, value] of Object.entries(body)) {
|
||
if (!knownFields.includes(key) && value !== undefined) {
|
||
metadataFields[key] = value;
|
||
}
|
||
}
|
||
// 合并原有 metadata 的内容(而不是保存 metadata 字段本身)
|
||
if (body.metadata && typeof body.metadata === 'object') {
|
||
Object.assign(metadataFields, body.metadata);
|
||
}
|
||
schemaFields.metadata = Object.keys(metadataFields).length > 0 ? metadataFields : null;
|
||
|
||
// 移除 undefined 字段,并排除前端传入的 id(使用数据库自动生成的 UUID)
|
||
const cleanData = {};
|
||
for (const [key, value] of Object.entries(schemaFields)) {
|
||
if (value !== undefined && key !== 'id') {
|
||
cleanData[key] = value;
|
||
}
|
||
}
|
||
|
||
const document = await prisma.document.create({
|
||
data: {
|
||
userId: req.user.id,
|
||
...cleanData
|
||
}
|
||
});
|
||
|
||
// 返回时添加前端需要的字段
|
||
const responseData = {
|
||
...document,
|
||
name: document.fileName,
|
||
size: document.fileSize
|
||
};
|
||
|
||
res.status(201).json(responseData);
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 更新文档
|
||
router.put('/:id', async (req, res, next) => {
|
||
try {
|
||
const { id } = req.params;
|
||
const body = { ...req.body };
|
||
|
||
// 字段映射:前端字段名 -> 数据库字段名
|
||
const fieldMapping = {
|
||
name: 'fileName',
|
||
size: 'fileSize',
|
||
ocr: 'ocrText',
|
||
translation: 'translatedText',
|
||
ocrEngine: 'ocrProvider',
|
||
translationModelName: 'translationModel'
|
||
};
|
||
|
||
// 应用字段映射
|
||
for (const [frontendField, dbField] of Object.entries(fieldMapping)) {
|
||
if (body[frontendField] !== undefined && body[dbField] === undefined) {
|
||
body[dbField] = body[frontendField];
|
||
}
|
||
}
|
||
|
||
// 提取已知字段,其余放入 metadata
|
||
const knownFields = [
|
||
'fileName', 'fileSize', 'fileType', 'filePath', 'status',
|
||
'ocrProvider', 'ocrText', 'ocrMetadata',
|
||
'translationModel', 'translatedText', 'translationMetadata',
|
||
'summary', 'toc', 'processingTime', 'errorMessage'
|
||
];
|
||
|
||
const updateData = {};
|
||
const metadataUpdate = {};
|
||
|
||
for (const [key, value] of Object.entries(body)) {
|
||
if (key === 'id' || key === 'userId' || key === 'createdAt' || key === 'updatedAt') {
|
||
continue; // 跳过只读字段
|
||
}
|
||
if (knownFields.includes(key)) {
|
||
updateData[key] = value;
|
||
} else if (key === 'metadata') {
|
||
// 合并 metadata
|
||
metadataUpdate['metadata'] = value;
|
||
} else if (!key.startsWith('_')) {
|
||
// 其他字段放入 metadata(排除内部字段)
|
||
metadataUpdate[key] = value;
|
||
}
|
||
}
|
||
|
||
// 如果有需要放入 metadata 的字段,需要先读取现有 metadata 再合并
|
||
if (Object.keys(metadataUpdate).length > 0) {
|
||
const existingDoc = await prisma.document.findFirst({
|
||
where: { id, userId: req.user.id },
|
||
select: { metadata: true }
|
||
});
|
||
|
||
if (existingDoc) {
|
||
const existingMeta = existingDoc.metadata || {};
|
||
const newMeta = { ...existingMeta };
|
||
|
||
for (const [key, value] of Object.entries(metadataUpdate)) {
|
||
if (key === 'metadata' && typeof value === 'object') {
|
||
Object.assign(newMeta, value);
|
||
} else {
|
||
newMeta[key] = value;
|
||
}
|
||
}
|
||
|
||
updateData.metadata = newMeta;
|
||
}
|
||
}
|
||
|
||
await prisma.document.updateMany({
|
||
where: {
|
||
id,
|
||
userId: req.user.id
|
||
},
|
||
data: updateData
|
||
});
|
||
|
||
res.json({ success: true });
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 删除文档
|
||
router.delete('/:id', async (req, res, next) => {
|
||
try {
|
||
const { id } = req.params;
|
||
|
||
const document = await prisma.document.findFirst({
|
||
where: {
|
||
id,
|
||
userId: req.user.id
|
||
}
|
||
});
|
||
|
||
if (document) {
|
||
await prisma.document.delete({
|
||
where: { id }
|
||
});
|
||
}
|
||
|
||
res.json({ success: true });
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// ==================== 标注管理 ====================
|
||
|
||
// 保存标注
|
||
router.post('/:id/annotations', async (req, res, next) => {
|
||
try {
|
||
const { id } = req.params;
|
||
|
||
const annotation = await prisma.annotation.create({
|
||
data: {
|
||
userId: req.user.id,
|
||
documentId: id,
|
||
...req.body
|
||
}
|
||
});
|
||
|
||
res.status(201).json(annotation);
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 获取文档的所有标注
|
||
router.get('/:id/annotations', async (req, res, next) => {
|
||
try {
|
||
const { id } = req.params;
|
||
|
||
const annotations = await prisma.annotation.findMany({
|
||
where: {
|
||
documentId: id,
|
||
userId: req.user.id
|
||
}
|
||
});
|
||
|
||
res.json(annotations);
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 更新标注
|
||
router.put('/:documentId/annotations/:annotationId', async (req, res, next) => {
|
||
try {
|
||
const { documentId, annotationId } = req.params;
|
||
|
||
await prisma.annotation.updateMany({
|
||
where: {
|
||
id: annotationId,
|
||
documentId,
|
||
userId: req.user.id
|
||
},
|
||
data: req.body
|
||
});
|
||
|
||
res.json({ success: true });
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 删除标注
|
||
router.delete('/:documentId/annotations/:annotationId', async (req, res, next) => {
|
||
try {
|
||
const { documentId, annotationId } = req.params;
|
||
|
||
await prisma.annotation.deleteMany({
|
||
where: {
|
||
id: annotationId,
|
||
documentId,
|
||
userId: req.user.id
|
||
}
|
||
});
|
||
|
||
res.json({ success: true });
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// ==================== 意群数据 ====================
|
||
|
||
// 保存意群数据
|
||
router.post('/:id/semantic-groups', async (req, res, next) => {
|
||
try {
|
||
const { id } = req.params;
|
||
const { groups, version, source } = req.body;
|
||
|
||
// 验证文档所有权
|
||
const document = await prisma.document.findFirst({
|
||
where: {
|
||
id,
|
||
userId: req.user.id
|
||
}
|
||
});
|
||
|
||
if (!document) {
|
||
return res.status(404).json({ error: 'Document not found' });
|
||
}
|
||
|
||
const semanticGroup = await prisma.semanticGroup.upsert({
|
||
where: { documentId: id },
|
||
update: { groups, version, source },
|
||
create: {
|
||
documentId: id,
|
||
groups,
|
||
version,
|
||
source
|
||
}
|
||
});
|
||
|
||
res.json(semanticGroup);
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
// 获取意群数据
|
||
router.get('/:id/semantic-groups', async (req, res, next) => {
|
||
try {
|
||
const { id } = req.params;
|
||
|
||
// 验证文档所有权
|
||
const document = await prisma.document.findFirst({
|
||
where: {
|
||
id,
|
||
userId: req.user.id
|
||
}
|
||
});
|
||
|
||
if (!document) {
|
||
return res.status(404).json({ error: 'Document not found' });
|
||
}
|
||
|
||
const semanticGroup = await prisma.semanticGroup.findUnique({
|
||
where: { documentId: id }
|
||
});
|
||
|
||
if (!semanticGroup) {
|
||
return res.status(404).json({ error: 'Semantic groups not found' });
|
||
}
|
||
|
||
res.json(semanticGroup);
|
||
} catch (error) {
|
||
next(error);
|
||
}
|
||
});
|
||
|
||
export default router; |