feat: 实现文件附件功能，前端管理上传状态，后端解析文本、PDF、Word文件内容并注入聊天。

2026-03-03 18:02:45 +08:00 · 2026-03-03 18:02:45 +08:00 · ec16cc93ed
commit ec16cc93ed
parent 2d4e777056
6 changed files with 287 additions and 32 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ uploads
 .env
 .venv
 __pycache__
+.claude

 # Editor directories and files
 .vscode/*
--- a/server/api/chat_routes.py
+++ b/server/api/chat_routes.py
@ -34,6 +34,211 @@ upload_dir = Path("uploads")
 upload_dir.mkdir(exist_ok=True)


+def _extract_text_from_docmind(obj, depth: int = 0) -> str:
+    """
+    递归提取 DocMind JSON 结构中的可读文本。
+    DashScopeParse 返回的 text 字段是 JSON 字符串，内部为文档智能解析的树形结构。
+    """
+    if depth > 15:
+        return ""
+
+    if isinstance(obj, str):
+        s = obj.strip()
+        # 过滤极短、URL、base64 等非正文字符串
+        if len(s) > 3 and not s.startswith(('http://', 'https://', 'data:', 'oss://')):
+            return s
+        return ""
+
+    if isinstance(obj, list):
+        parts = [_extract_text_from_docmind(item, depth + 1) for item in obj]
+        return "\n".join(p for p in parts if p)
+
+    if isinstance(obj, dict):
+        # 优先处理文本相关字段
+        priority_keys = ['content', 'text', 'paragraph', 'caption', 'value', 'title']
+        # 跳过纯元数据字段
+        skip_keys = {'backlink', 'pos', 'index', 'style', 'font', 'color',
+                     'size', 'hash', 'id_', 'id', 'layouts', 'type', 'link'}
+        parts = []
+        for key in priority_keys:
+            if key in obj:
+                t = _extract_text_from_docmind(obj[key], depth + 1)
+                if t:
+                    parts.append(t)
+        for key, val in obj.items():
+            if key not in priority_keys and key not in skip_keys:
+                t = _extract_text_from_docmind(val, depth + 1)
+                if t:
+                    parts.append(t)
+        return "\n".join(parts)
+
+    return ""
+
+
+def _read_file_content(file_url: str):
+    """
+    【路线一：本地文本提取】对纯文本格式文件，直接读取内容注入消息。
+    【路线二：DashScopeParse】对 doc/docx/pdf 文件，返回 (local_path, suffix) 供异步调用。
+
+    返回值：
+    - str：文本内容（路线一成功）
+    - tuple(Path, str)：(本地路径, 扩展名)，需异步调用 DashScopeParse（路线二）
+    - None：不支持的文件类型
+    """
+    try:
+        from urllib.parse import urlparse
+        parsed = urlparse(file_url)
+        relative_path = parsed.path.lstrip('/')
+        local_path = Path(relative_path)
+
+        if not local_path.exists():
+            return f"[文件不存在: {local_path}]"
+
+        suffix = local_path.suffix.lower()
+
+        # 路线一：纯文本格式直接读取
+        text_extensions = {'.txt', '.md', '.csv', '.json', '.xml',
+                           '.yaml', '.yml', '.log', '.py', '.js', '.ts', '.html', '.css'}
+        if suffix in text_extensions:
+            with open(local_path, 'r', encoding='utf-8', errors='replace') as f:
+                content = f.read()
+            max_len = 8000
+            if len(content) > max_len:
+                content = content[:max_len] + f"\n\n[...文件内容过长，已截断，共 {len(content)} 字符]"
+            return content
+
+        # 路线二：doc/docx/pdf 使用 DashScopeParse 云端解析
+        dashscope_extensions = {'.doc', '.docx', '.pdf'}
+        if suffix in dashscope_extensions:
+            return (local_path, suffix)  # 交给异步函数处理
+
+        # 其余格式（xlsx、pptx 等）暂不支持内容读取
+        return None
+
+    except Exception as e:
+        print(f"[WARNING] 读取文件内容失败: {e}")
+        return f"[文件读取失败: {str(e)}]"
+
+
+
+async def _parse_with_dashscope(local_path: Path) -> str:
+    """
+    【路线二：DashScopeParse】使用阿里云文档智能解析 doc/docx/pdf 文件。
+    在线程池中运行（避免阻塞 FastAPI 事件循环）。
+    仅支持 .doc/.docx/.pdf，文件大小 ≤100MB，页数 ≤1000 页。
+    """
+    import asyncio
+
+    def _sync_parse():
+        try:
+            from llama_index.readers.dashscope.base import DashScopeParse
+            from llama_index.readers.dashscope.utils import ResultType
+            import json
+
+            api_key = os.getenv("ALIYUN_API_KEY")
+            parser = DashScopeParse(
+                result_type=ResultType.DASHSCOPE_DOCMIND,
+                api_key=api_key,
+                num_workers=1,
+            )
+            print(f"[INFO] DashScopeParse: 开始解析 {local_path.name} ...")
+            documents = parser.load_data(file_path=[str(local_path)])
+
+            if not documents:
+                return f"[DashScopeParse: {local_path.name} 解析结果为空]"
+
+            texts = []
+            for doc in documents:
+                try:
+                    content = json.loads(doc.text)
+                    extracted = _extract_text_from_docmind(content)
+                    texts.append(extracted if extracted else doc.text[:6000])
+                except Exception:
+                    texts.append(doc.text[:6000] if doc.text else "")
+
+            result = "\n\n".join(t for t in texts if t)
+            print(f"[INFO] DashScopeParse: {local_path.name} 解析完成，提取 {len(result)} 字符")
+            return result or f"[DashScopeParse: {local_path.name} 未能提取到文本内容]"
+
+        except ImportError:
+            return "[错误：DashScopeParse 未安装，请运行: pip install llama-index-core llama-index-readers-dashscope]"
+        except Exception as e:
+            print(f"[ERROR] DashScopeParse 解析失败: {e}")
+            return f"[DashScopeParse 解析失败: {str(e)}]"
+
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, _sync_parse)
+
+
+async def _inject_files_into_messages(messages: list, files: list) -> list:
+    """
+    将文件内容异步注入到消息列表中。
+    - 文本类文件（路线一）：读取内容并追加到最后一条 user 消息中
+    - doc/docx/pdf（路线二）：调用 DashScopeParse 云端解析后注入
+    - 其他二进制文件：仅告知 AI 文件名和类型
+    """
+    if not files:
+        return messages
+
+    file_context_parts = []
+    for file_url in files:
+        from urllib.parse import urlparse
+        parsed = urlparse(file_url)
+        filename = parsed.path.split('/')[-1]
+        suffix = Path(filename).suffix.lower()
+
+        result = _read_file_content(file_url)
+
+        if isinstance(result, str):
+            # 路线一：文本内容，直接嵌入
+            file_context_parts.append(
+                f"--- 附件文件内容（{filename}）---\n{result}\n--- 附件结束 ---"
+            )
+        elif isinstance(result, tuple):
+            # 路线二：doc/docx/pdf → 调用 DashScopeParse
+            local_path, _ = result
+            print(f"[INFO] 路线二：调用 DashScopeParse 解析 {filename}")
+            parsed_text = await _parse_with_dashscope(local_path)
+            file_context_parts.append(
+                f"--- 附件文件内容（{filename}，阿里云文档智能解析）---\n"
+                f"{parsed_text}\n--- 附件结束 ---"
+            )
+        else:
+            # 其他不支持的格式：仅告知文件信息
+            file_context_parts.append(
+                f"[用户上传了一个文件: {filename}，类型: {suffix}，暂不支持自动读取内容，请告知用户。]"
+            )
+
+    if not file_context_parts:
+        return messages
+
+    file_context_text = "\n\n" + "\n\n".join(file_context_parts)
+
+    # 把文件内容追加到最后一条 user 消息
+    messages = list(messages)  # 复制，避免修改原始列表
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+        if isinstance(msg, dict) and msg.get('role') == 'user':
+            content = msg.get('content', '')
+            if isinstance(content, str):
+                messages[i] = dict(msg, content=content + file_context_text)
+            elif isinstance(content, list):
+                # 找到现有的 text 项，追加内容
+                new_content = list(content)
+                appended = False
+                for j, item in enumerate(new_content):
+                    if isinstance(item, dict) and item.get('type') == 'text':
+                        new_content[j] = dict(item, text=item['text'] + file_context_text)
+                        appended = True
+                        break
+                if not appended:
+                    new_content.append({'type': 'text', 'text': file_context_text})
+                messages[i] = dict(msg, content=new_content)
+            break
+
+    return messages
+
+
 async def chat_endpoint_handler(body: dict):
    """
    聊天接口处理器 - 与阿里云百炼API兼容的接口
@ -56,24 +261,33 @@ async def chat_endpoint_handler(body: dict):
            stream = body.get('stream', True)
            temperature = body.get('temperature', 0.7)
            max_tokens = body.get('max_tokens', 2000)
+
+            # 处理 files 附件：将文件内容注入到最后一条 user 消息中
+            files = body.get('files', [])
+            if files:
+                messages = await _inject_files_into_messages(messages, files)
+                # 调试：打印注入后最后一条 user 消息的内容（截断显示 500 字）
+                for msg in reversed(messages):
+                    if isinstance(msg, dict) and msg.get('role') == 'user':
+                        content_preview = str(msg.get('content', ''))[:500]
+                        print(f"[DEBUG] 注入文件后 user 消息内容预览: {content_preview}")
+                        break
        else:
            # 否则是前端简化格式 (来自chat函数)
-            # 需要将其转换为OpenAI兼容格式
            message_text = body.get('message', '')

            # 检查message是否已经是格式化的列表（带图片的情况）
            if isinstance(message_text, list):
                user_content = message_text
            else:
-                # 如果是字符串，转换为标准格式
                user_content = [{"type": "text", "text": message_text}]

            messages = [
-                {"role": "system", "content": body.get('systemPrompt', '你是一个支持视觉理解的助手。')},
+                {"role": "system", "content": body.get('systemPrompt', '你是一个智能助手，可以分析用户发送的文本和文件内容。')},
                {"role": "user", "content": user_content}
            ]
            model = body.get('model', 'qwen-plus')
-            stream = body.get('stream', False)  # 默认为非流式
+            stream = body.get('stream', False)
            temperature = body.get('temperature', 0.7)
            max_tokens = body.get('maxTokens', 2000)

@ -602,13 +816,41 @@ async def delete_conversation_handler(conversation_id: str):
 async def upload_file_handler(file: UploadFile = File(...)):
    """文件上传处理器"""
    try:
-        # 检查文件类型
-        allowed_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp', 'text/plain', 'application/pdf']
-        if file.content_type not in allowed_types:
-            raise HTTPException(status_code=400, detail=f"不支持的文件类型: {file.content_type}")
+        # 允许的 MIME 类型（宽松策略）
+        allowed_types = {
+            # 图片
+            'image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/bmp', 'image/svg+xml',
+            # 文本类
+            'text/plain', 'text/csv', 'text/markdown', 'text/html', 'text/xml',
+            'application/json', 'application/xml',
+            # PDF
+            'application/pdf',
+            # Office 文档
+            'application/msword',
+            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            'application/vnd.ms-excel',
+            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+            'application/vnd.ms-powerpoint',
+            'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+        }
+
+        # 允许的扩展名（兜底：MIME 类型可能被浏览器误判）
+        allowed_extensions = {
+            '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp',
+            '.txt', '.md', '.csv', '.json', '.xml', '.yaml', '.yml', '.log',
+            '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
+            '.py', '.js', '.ts', '.html', '.css'
+        }
+
+        file_extension = Path(file.filename).suffix.lower()
+
+        if file.content_type not in allowed_types and file_extension not in allowed_extensions:
+            raise HTTPException(
+                status_code=400,
+                detail=f"不支持的文件类型: {file.content_type}（{file_extension}）"
+            )

        # 生成唯一文件名
-        file_extension = Path(file.filename).suffix.lower()
        unique_filename = f"{int(datetime.utcnow().timestamp())}_{generate_unique_id()}{file_extension}"
        file_path = upload_dir / unique_filename

--- a/server/requirements.txt
+++ b/server/requirements.txt
@ -5,4 +5,7 @@ python-multipart==0.0.18
 python-dotenv==1.0.1
 aiofiles==24.1.0
 pydantic==2.9.2
-typing-extensions==4.12.2
+typing-extensions==4.12.2
+# 路线二：阿里云文档智能解析（doc/docx/pdf）
+llama-index-core>=0.10.0
+llama-index-readers-dashscope>=0.1.0
--- a/src/components/chat/ChatMain.vue
+++ b/src/components/chat/ChatMain.vue
@ -118,7 +118,7 @@ function handlePin() {
 // 发送消息 - 使用真实 API
 async function handleSend(text: string, attachments: Attachment[]) {
  // 检查是否还有正在上传的附件
-  const uploadingAttachments = attachments.filter(a => a.uploading);
+  const uploadingAttachments = attachments.filter((a) => a.uploading);
  if (uploadingAttachments.length > 0) {
    // 等待所有上传完成
    const uploads = uploadingAttachments.map(async (attachment) => {
@ -126,7 +126,9 @@ async function handleSend(text: string, attachments: Attachment[]) {
      // 但更简单的方法是等待一小段时间，让上传有机会完成
      return new Promise<void>((resolve) => {
        const checkUpload = () => {
-          const stillUploading = attachments.some(a => a.id === attachment.id && a.uploading);
+          const stillUploading = attachments.some(
+            (a) => a.id === attachment.id && a.uploading,
+          );
          if (!stillUploading) {
            resolve();
          } else {
@ -140,7 +142,7 @@ async function handleSend(text: string, attachments: Attachment[]) {
    try {
      await Promise.all(uploads);
    } catch (error) {
-      console.error('等待上传完成时发生错误:', error);
+      console.error("等待上传完成时发生错误:", error);
    }
  }

@ -174,13 +176,19 @@ async function handleSend(text: string, attachments: Attachment[]) {
    // 提取图片URL用于发送给API
    const imageUrls = attachments
      .filter((a) => a.type === "image")
-      .map(a => a.url);
+      .map((a) => a.url);
+
+    // 提取非图片文件URL（txt, pdf, docx 等）
+    const fileUrls = attachments
+      .filter((a) => a.type === "file")
+      .map((a) => a.url);

    const stream = chatApi.streamChat(
      {
        message: text,
        conversationId: currentConversation.value?.id || "",
-        images: imageUrls, // 添加图片URL
+        images: imageUrls,
+        files: fileUrls, // 传递文件 URL，后端会读取内容
        model: settings.value.defaultModel,
        stream: true,
      },
--- a/src/services/api.ts
+++ b/src/services/api.ts
@ -34,7 +34,7 @@ export interface ChatRequest {
  conversationId?: string;
  message: string;
  images?: string[];
-  files?: string[];
+  files?: string[]; // 非图片附件 URL 列表
  model?: string;
  temperature?: number;
  maxTokens?: number;
@ -93,14 +93,12 @@ class ChatApi {
    let userContent;
    if (request.images && request.images.length > 0) {
      // 如果有图片，则构建内容数组（针对阿里云DashScope API的格式）
-      userContent = [
-        { type: "text", text: request.message }
-      ];
+      userContent = [{ type: "text", text: request.message }];
      // 添加图片URL到内容中（阿里云格式）
-      request.images.forEach(imageUrl => {
+      request.images.forEach((imageUrl) => {
        userContent.push({
          type: "image_url",
-          image_url: imageUrl  // 注意：阿里云格式不需要嵌套对象
+          image_url: imageUrl, // 注意：阿里云格式不需要嵌套对象
        });
      });
    } else {
@ -110,17 +108,21 @@ class ChatApi {

    // 将前端简化的请求翻译为 OpenAI 兼容的规范请求体
    const openAiRequest = {
-      model: request.model || "qwen-plus",  // 可能需要指定支持视觉的模型
+      model: request.model || "qwen-plus", // 可能需要指定支持视觉的模型
      messages: [
-        { role: "system", content: request.systemPrompt || "你是一个支持视觉理解的助手。" },
+        {
+          role: "system",
+          content: request.systemPrompt || "你是一个智能助手，可以分析用户发送的文字，文件或图片内容，并进行回答。",
+        },
        {
          role: "user",
-          content: userContent
-        }
+          content: userContent,
+        },
      ],
      stream: true,
      temperature: request.temperature,
-      max_tokens: request.maxTokens
+      max_tokens: request.maxTokens,
+      files: request.files || [],  // 传递文件 URL 列表给后端
    };

    const response = await fetch(
@ -190,14 +192,12 @@ class ChatApi {
    let userContent;
    if (request.images && request.images.length > 0) {
      // 如果有图片，则构建内容数组
-      userContent = [
-        { type: "text", text: request.message }
-      ];
+      userContent = [{ type: "text", text: request.message }];
      // 添加图片URL到内容中
-      request.images.forEach(imageUrl => {
+      request.images.forEach((imageUrl) => {
        userContent.push({
          type: "image_url",
-          image_url: { url: imageUrl }
+          image_url: { url: imageUrl },
        });
      });
    } else {
@ -207,7 +207,7 @@ class ChatApi {

    const requestBody = {
      ...request,
-      message: userContent
+      message: userContent,
    };

    const response = await fetch(`${this.baseUrl}${API_ENDPOINTS.CHAT}`, {
--- a/src/types/chat.ts
+++ b/src/types/chat.ts
@ -26,6 +26,7 @@ export interface Attachment {
  size?: number;
  mimeType?: string;
  thumbnail?: string;
+  uploading?: boolean; // 标记附件是否正在上传中
 }

 // 推荐选项