paper-burner/local-proxy/routes/documents.js

/**
 * 文档路由
 * 复用 server/src/routes/document.js 的逻辑
 */

import express from 'express';
import { prisma } from '../db/client.js';

const router = express.Router();

// 允许的状态值白名单
const ALLOWED_STATUSES = ['PENDING', 'PROCESSING', 'COMPLETED', 'FAILED'];

// ==================== 文档 CRUD ====================

// 获取文档列表
router.get('/', async (req, res, next) => {
  try {
    const { page = 1, limit = 20, status } = req.query;

    const where = {
      userId: req.user.id,
      ...(status && ALLOWED_STATUSES.includes(status) && { status })
    };

    const pageNum = Math.max(parseInt(page) || 1, 1);
    const limitNum = Math.min(Math.max(parseInt(limit) || 20, 1), 100);

    const documents = await prisma.document.findMany({
      where,
      orderBy: { createdAt: 'desc' },
      skip: (pageNum - 1) * limitNum,
      take: limitNum,
      select: {
        id: true,
        fileName: true,
        fileSize: true,
        fileType: true,
        status: true,
        ocrProvider: true,
        ocrText: true,
        translationModel: true,
        translatedText: true,
        processingTime: true,
        createdAt: true,
        updatedAt: true,
        metadata: true,
        ocrMetadata: true,
        translationMetadata: true,
        summary: true,
        toc: true
      }
    });

    const total = await prisma.document.count({ where });

    // 字段映射：适配前端期望的字段名，并从 metadata 中提取嵌套字段
    const mappedDocs = documents.map(doc => {
      const meta = doc.metadata || {};
      return {
        ...doc,
        name: doc.fileName,
        size: doc.fileSize,
        time: doc.createdAt,
        ocr: doc.ocrText,
        translation: doc.translatedText,
        ocrEngine: doc.ocrProvider,
        translationModelName: doc.translationModel,
        // 从 metadata 中提取的字段
        ocrChunks: meta.ocrChunks || [],
        translatedChunks: meta.translatedChunks || [],
        images: meta.images || [],
        metadata: meta
      };
    });

    res.json({
      documents: mappedDocs,
      pagination: {
        page: pageNum,
        limit: limitNum,
        total,
        totalPages: Math.ceil(total / limitNum)
      }
    });
  } catch (error) {
    next(error);
  }
});

// 获取单个文档详情
router.get('/:id', async (req, res, next) => {
  try {
    const { id } = req.params;

    const document = await prisma.document.findFirst({
      where: {
        id,
        userId: req.user.id
      },
      include: {
        annotations: true,
        semanticGroups: true
      }
    });

    if (!document) {
      return res.status(404).json({ error: 'Document not found' });
    }

    // 字段映射：适配前端期望的字段名，并从 metadata 中提取嵌套字段
    const meta = document.metadata || {};
    const mappedDoc = {
      ...document,
      name: document.fileName,
      size: document.fileSize,
      time: document.createdAt,
      ocr: document.ocrText,
      translation: document.translatedText,
      ocrEngine: document.ocrProvider,
      translationModelName: document.translationModel,
      // 从 metadata 中提取的字段
      ocrChunks: meta.ocrChunks || [],
      translatedChunks: meta.translatedChunks || [],
      images: meta.images || [],
      metadata: meta
    };

    res.json(mappedDoc);
  } catch (error) {
    next(error);
  }
});

// 创建文档记录
router.post('/', async (req, res, next) => {
  try {
    const body = { ...req.body };

    // 前端字段映射：name -> fileName, size -> fileSize
    if (body.name && !body.fileName) {
      body.fileName = body.name;
    }
    if (body.size && !body.fileSize) {
      body.fileSize = body.size;
    }

    // Schema 中定义的字段
    const schemaFields = {
      fileName: body.fileName || body.name,
      fileSize: body.fileSize || body.size,
      fileType: body.fileType,
      filePath: body.filePath,
      status: body.status || 'PENDING',
      ocrProvider: body.ocrProvider || body.ocrEngine,
      ocrText: body.ocrText || body.ocr,
      ocrMetadata: body.ocrMetadata,
      translationModel: body.translationModel || body.translationModelName,
      translatedText: body.translatedText || body.translation,
      translationMetadata: body.translationMetadata,
      summary: body.summary,
      toc: body.toc,
      processingTime: body.processingTime,
      errorMessage: body.errorMessage
    };

    // 其他字段保存到 metadata
    const metadataFields = {};
    const knownFields = [
      'fileName', 'name', 'fileSize', 'size', 'fileType', 'filePath', 'status',
      'ocrProvider', 'ocrText', 'ocr', 'ocrMetadata', 'ocrEngine', 'ocrSource',
      'translationModel', 'translatedText', 'translation', 'translationMetadata',
      'translationModelName', 'summary', 'toc', 'processingTime', 'errorMessage',
      'id', 'userId', 'createdAt', 'updatedAt',
      'metadata'  // 添加 metadata 到已知字段，避免嵌套保存
    ];
    for (const [key, value] of Object.entries(body)) {
      if (!knownFields.includes(key) && value !== undefined) {
        metadataFields[key] = value;
      }
    }
    // 合并原有 metadata 的内容（而不是保存 metadata 字段本身）
    if (body.metadata && typeof body.metadata === 'object') {
      Object.assign(metadataFields, body.metadata);
    }
    schemaFields.metadata = Object.keys(metadataFields).length > 0 ? metadataFields : null;

    // 移除 undefined 字段，并排除前端传入的 id（使用数据库自动生成的 UUID）
    const cleanData = {};
    for (const [key, value] of Object.entries(schemaFields)) {
      if (value !== undefined && key !== 'id') {
        cleanData[key] = value;
      }
    }

    const document = await prisma.document.create({
      data: {
        userId: req.user.id,
        ...cleanData
      }
    });

    // 返回时添加前端需要的字段
    const responseData = {
      ...document,
      name: document.fileName,
      size: document.fileSize
    };

    res.status(201).json(responseData);
  } catch (error) {
    next(error);
  }
});

// 更新文档
router.put('/:id', async (req, res, next) => {
  try {
    const { id } = req.params;
    const body = { ...req.body };

    // 字段映射：前端字段名 -> 数据库字段名
    const fieldMapping = {
      name: 'fileName',
      size: 'fileSize',
      ocr: 'ocrText',
      translation: 'translatedText',
      ocrEngine: 'ocrProvider',
      translationModelName: 'translationModel'
    };

    // 应用字段映射
    for (const [frontendField, dbField] of Object.entries(fieldMapping)) {
      if (body[frontendField] !== undefined && body[dbField] === undefined) {
        body[dbField] = body[frontendField];
      }
    }

    // 提取已知字段，其余放入 metadata
    const knownFields = [
      'fileName', 'fileSize', 'fileType', 'filePath', 'status',
      'ocrProvider', 'ocrText', 'ocrMetadata',
      'translationModel', 'translatedText', 'translationMetadata',
      'summary', 'toc', 'processingTime', 'errorMessage'
    ];

    const updateData = {};
    const metadataUpdate = {};

    for (const [key, value] of Object.entries(body)) {
      if (key === 'id' || key === 'userId' || key === 'createdAt' || key === 'updatedAt') {
        continue; // 跳过只读字段
      }
      if (knownFields.includes(key)) {
        updateData[key] = value;
      } else if (key === 'metadata') {
        // 合并 metadata
        metadataUpdate['metadata'] = value;
      } else if (!key.startsWith('_')) {
        // 其他字段放入 metadata（排除内部字段）
        metadataUpdate[key] = value;
      }
    }

    // 如果有需要放入 metadata 的字段，需要先读取现有 metadata 再合并
    if (Object.keys(metadataUpdate).length > 0) {
      const existingDoc = await prisma.document.findFirst({
        where: { id, userId: req.user.id },
        select: { metadata: true }
      });

      if (existingDoc) {
        const existingMeta = existingDoc.metadata || {};
        const newMeta = { ...existingMeta };

        for (const [key, value] of Object.entries(metadataUpdate)) {
          if (key === 'metadata' && typeof value === 'object') {
            Object.assign(newMeta, value);
          } else {
            newMeta[key] = value;
          }
        }

        updateData.metadata = newMeta;
      }
    }

    await prisma.document.updateMany({
      where: {
        id,
        userId: req.user.id
      },
      data: updateData
    });

    res.json({ success: true });
  } catch (error) {
    next(error);
  }
});

// 删除文档
router.delete('/:id', async (req, res, next) => {
  try {
    const { id } = req.params;

    const document = await prisma.document.findFirst({
      where: {
        id,
        userId: req.user.id
      }
    });

    if (document) {
      await prisma.document.delete({
        where: { id }
      });
    }

    res.json({ success: true });
  } catch (error) {
    next(error);
  }
});

// ==================== 标注管理 ====================

// 保存标注
router.post('/:id/annotations', async (req, res, next) => {
  try {
    const { id } = req.params;

    const annotation = await prisma.annotation.create({
      data: {
        userId: req.user.id,
        documentId: id,
        ...req.body
      }
    });

    res.status(201).json(annotation);
  } catch (error) {
    next(error);
  }
});

// 获取文档的所有标注
router.get('/:id/annotations', async (req, res, next) => {
  try {
    const { id } = req.params;

    const annotations = await prisma.annotation.findMany({
      where: {
        documentId: id,
        userId: req.user.id
      }
    });

    res.json(annotations);
  } catch (error) {
    next(error);
  }
});

// 更新标注
router.put('/:documentId/annotations/:annotationId', async (req, res, next) => {
  try {
    const { documentId, annotationId } = req.params;

    await prisma.annotation.updateMany({
      where: {
        id: annotationId,
        documentId,
        userId: req.user.id
      },
      data: req.body
    });

    res.json({ success: true });
  } catch (error) {
    next(error);
  }
});

// 删除标注
router.delete('/:documentId/annotations/:annotationId', async (req, res, next) => {
  try {
    const { documentId, annotationId } = req.params;

    await prisma.annotation.deleteMany({
      where: {
        id: annotationId,
        documentId,
        userId: req.user.id
      }
    });

    res.json({ success: true });
  } catch (error) {
    next(error);
  }
});

// ==================== 意群数据 ====================

// 保存意群数据
router.post('/:id/semantic-groups', async (req, res, next) => {
  try {
    const { id } = req.params;
    const { groups, version, source } = req.body;

    // 验证文档所有权
    const document = await prisma.document.findFirst({
      where: {
        id,
        userId: req.user.id
      }
    });

    if (!document) {
      return res.status(404).json({ error: 'Document not found' });
    }

    const semanticGroup = await prisma.semanticGroup.upsert({
      where: { documentId: id },
      update: { groups, version, source },
      create: {
        documentId: id,
        groups,
        version,
        source
      }
    });

    res.json(semanticGroup);
  } catch (error) {
    next(error);
  }
});

// 获取意群数据
router.get('/:id/semantic-groups', async (req, res, next) => {
  try {
    const { id } = req.params;

    // 验证文档所有权
    const document = await prisma.document.findFirst({
      where: {
        id,
        userId: req.user.id
      }
    });

    if (!document) {
      return res.status(404).json({ error: 'Document not found' });
    }

    const semanticGroup = await prisma.semanticGroup.findUnique({
      where: { documentId: id }
    });

    if (!semanticGroup) {
      return res.status(404).json({ error: 'Semantic groups not found' });
    }

    res.json(semanticGroup);
  } catch (error) {
    next(error);
  }
});

export default router;