diff --git a/.gitignore b/.gitignore index 6801002..b3a01dc 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ uploads .env .venv __pycache__ +.claude # Editor directories and files .vscode/* diff --git a/server/api/chat_routes.py b/server/api/chat_routes.py index 47460fb..69ff2e9 100644 --- a/server/api/chat_routes.py +++ b/server/api/chat_routes.py @@ -34,6 +34,211 @@ upload_dir = Path("uploads") upload_dir.mkdir(exist_ok=True) +def _extract_text_from_docmind(obj, depth: int = 0) -> str: + """ + 递归提取 DocMind JSON 结构中的可读文本。 + DashScopeParse 返回的 text 字段是 JSON 字符串,内部为文档智能解析的树形结构。 + """ + if depth > 15: + return "" + + if isinstance(obj, str): + s = obj.strip() + # 过滤极短、URL、base64 等非正文字符串 + if len(s) > 3 and not s.startswith(('http://', 'https://', 'data:', 'oss://')): + return s + return "" + + if isinstance(obj, list): + parts = [_extract_text_from_docmind(item, depth + 1) for item in obj] + return "\n".join(p for p in parts if p) + + if isinstance(obj, dict): + # 优先处理文本相关字段 + priority_keys = ['content', 'text', 'paragraph', 'caption', 'value', 'title'] + # 跳过纯元数据字段 + skip_keys = {'backlink', 'pos', 'index', 'style', 'font', 'color', + 'size', 'hash', 'id_', 'id', 'layouts', 'type', 'link'} + parts = [] + for key in priority_keys: + if key in obj: + t = _extract_text_from_docmind(obj[key], depth + 1) + if t: + parts.append(t) + for key, val in obj.items(): + if key not in priority_keys and key not in skip_keys: + t = _extract_text_from_docmind(val, depth + 1) + if t: + parts.append(t) + return "\n".join(parts) + + return "" + + +def _read_file_content(file_url: str): + """ + 【路线一:本地文本提取】对纯文本格式文件,直接读取内容注入消息。 + 【路线二:DashScopeParse】对 doc/docx/pdf 文件,返回 (local_path, suffix) 供异步调用。 + + 返回值: + - str:文本内容(路线一成功) + - tuple(Path, str):(本地路径, 扩展名),需异步调用 DashScopeParse(路线二) + - None:不支持的文件类型 + """ + try: + from urllib.parse import urlparse + parsed = urlparse(file_url) + relative_path = parsed.path.lstrip('/') + local_path = Path(relative_path) + + if not local_path.exists(): + return f"[文件不存在: {local_path}]" + + suffix = local_path.suffix.lower() + + # 路线一:纯文本格式直接读取 + text_extensions = {'.txt', '.md', '.csv', '.json', '.xml', + '.yaml', '.yml', '.log', '.py', '.js', '.ts', '.html', '.css'} + if suffix in text_extensions: + with open(local_path, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + max_len = 8000 + if len(content) > max_len: + content = content[:max_len] + f"\n\n[...文件内容过长,已截断,共 {len(content)} 字符]" + return content + + # 路线二:doc/docx/pdf 使用 DashScopeParse 云端解析 + dashscope_extensions = {'.doc', '.docx', '.pdf'} + if suffix in dashscope_extensions: + return (local_path, suffix) # 交给异步函数处理 + + # 其余格式(xlsx、pptx 等)暂不支持内容读取 + return None + + except Exception as e: + print(f"[WARNING] 读取文件内容失败: {e}") + return f"[文件读取失败: {str(e)}]" + + + +async def _parse_with_dashscope(local_path: Path) -> str: + """ + 【路线二:DashScopeParse】使用阿里云文档智能解析 doc/docx/pdf 文件。 + 在线程池中运行(避免阻塞 FastAPI 事件循环)。 + 仅支持 .doc/.docx/.pdf,文件大小 ≤100MB,页数 ≤1000 页。 + """ + import asyncio + + def _sync_parse(): + try: + from llama_index.readers.dashscope.base import DashScopeParse + from llama_index.readers.dashscope.utils import ResultType + import json + + api_key = os.getenv("ALIYUN_API_KEY") + parser = DashScopeParse( + result_type=ResultType.DASHSCOPE_DOCMIND, + api_key=api_key, + num_workers=1, + ) + print(f"[INFO] DashScopeParse: 开始解析 {local_path.name} ...") + documents = parser.load_data(file_path=[str(local_path)]) + + if not documents: + return f"[DashScopeParse: {local_path.name} 解析结果为空]" + + texts = [] + for doc in documents: + try: + content = json.loads(doc.text) + extracted = _extract_text_from_docmind(content) + texts.append(extracted if extracted else doc.text[:6000]) + except Exception: + texts.append(doc.text[:6000] if doc.text else "") + + result = "\n\n".join(t for t in texts if t) + print(f"[INFO] DashScopeParse: {local_path.name} 解析完成,提取 {len(result)} 字符") + return result or f"[DashScopeParse: {local_path.name} 未能提取到文本内容]" + + except ImportError: + return "[错误:DashScopeParse 未安装,请运行: pip install llama-index-core llama-index-readers-dashscope]" + except Exception as e: + print(f"[ERROR] DashScopeParse 解析失败: {e}") + return f"[DashScopeParse 解析失败: {str(e)}]" + + loop = asyncio.get_event_loop() + return await loop.run_in_executor(None, _sync_parse) + + +async def _inject_files_into_messages(messages: list, files: list) -> list: + """ + 将文件内容异步注入到消息列表中。 + - 文本类文件(路线一):读取内容并追加到最后一条 user 消息中 + - doc/docx/pdf(路线二):调用 DashScopeParse 云端解析后注入 + - 其他二进制文件:仅告知 AI 文件名和类型 + """ + if not files: + return messages + + file_context_parts = [] + for file_url in files: + from urllib.parse import urlparse + parsed = urlparse(file_url) + filename = parsed.path.split('/')[-1] + suffix = Path(filename).suffix.lower() + + result = _read_file_content(file_url) + + if isinstance(result, str): + # 路线一:文本内容,直接嵌入 + file_context_parts.append( + f"--- 附件文件内容({filename})---\n{result}\n--- 附件结束 ---" + ) + elif isinstance(result, tuple): + # 路线二:doc/docx/pdf → 调用 DashScopeParse + local_path, _ = result + print(f"[INFO] 路线二:调用 DashScopeParse 解析 {filename}") + parsed_text = await _parse_with_dashscope(local_path) + file_context_parts.append( + f"--- 附件文件内容({filename},阿里云文档智能解析)---\n" + f"{parsed_text}\n--- 附件结束 ---" + ) + else: + # 其他不支持的格式:仅告知文件信息 + file_context_parts.append( + f"[用户上传了一个文件: {filename},类型: {suffix},暂不支持自动读取内容,请告知用户。]" + ) + + if not file_context_parts: + return messages + + file_context_text = "\n\n" + "\n\n".join(file_context_parts) + + # 把文件内容追加到最后一条 user 消息 + messages = list(messages) # 复制,避免修改原始列表 + for i in range(len(messages) - 1, -1, -1): + msg = messages[i] + if isinstance(msg, dict) and msg.get('role') == 'user': + content = msg.get('content', '') + if isinstance(content, str): + messages[i] = dict(msg, content=content + file_context_text) + elif isinstance(content, list): + # 找到现有的 text 项,追加内容 + new_content = list(content) + appended = False + for j, item in enumerate(new_content): + if isinstance(item, dict) and item.get('type') == 'text': + new_content[j] = dict(item, text=item['text'] + file_context_text) + appended = True + break + if not appended: + new_content.append({'type': 'text', 'text': file_context_text}) + messages[i] = dict(msg, content=new_content) + break + + return messages + + async def chat_endpoint_handler(body: dict): """ 聊天接口处理器 - 与阿里云百炼API兼容的接口 @@ -56,24 +261,33 @@ async def chat_endpoint_handler(body: dict): stream = body.get('stream', True) temperature = body.get('temperature', 0.7) max_tokens = body.get('max_tokens', 2000) + + # 处理 files 附件:将文件内容注入到最后一条 user 消息中 + files = body.get('files', []) + if files: + messages = await _inject_files_into_messages(messages, files) + # 调试:打印注入后最后一条 user 消息的内容(截断显示 500 字) + for msg in reversed(messages): + if isinstance(msg, dict) and msg.get('role') == 'user': + content_preview = str(msg.get('content', ''))[:500] + print(f"[DEBUG] 注入文件后 user 消息内容预览: {content_preview}") + break else: # 否则是前端简化格式 (来自chat函数) - # 需要将其转换为OpenAI兼容格式 message_text = body.get('message', '') # 检查message是否已经是格式化的列表(带图片的情况) if isinstance(message_text, list): user_content = message_text else: - # 如果是字符串,转换为标准格式 user_content = [{"type": "text", "text": message_text}] messages = [ - {"role": "system", "content": body.get('systemPrompt', '你是一个支持视觉理解的助手。')}, + {"role": "system", "content": body.get('systemPrompt', '你是一个智能助手,可以分析用户发送的文本和文件内容。')}, {"role": "user", "content": user_content} ] model = body.get('model', 'qwen-plus') - stream = body.get('stream', False) # 默认为非流式 + stream = body.get('stream', False) temperature = body.get('temperature', 0.7) max_tokens = body.get('maxTokens', 2000) @@ -602,13 +816,41 @@ async def delete_conversation_handler(conversation_id: str): async def upload_file_handler(file: UploadFile = File(...)): """文件上传处理器""" try: - # 检查文件类型 - allowed_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp', 'text/plain', 'application/pdf'] - if file.content_type not in allowed_types: - raise HTTPException(status_code=400, detail=f"不支持的文件类型: {file.content_type}") + # 允许的 MIME 类型(宽松策略) + allowed_types = { + # 图片 + 'image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/bmp', 'image/svg+xml', + # 文本类 + 'text/plain', 'text/csv', 'text/markdown', 'text/html', 'text/xml', + 'application/json', 'application/xml', + # PDF + 'application/pdf', + # Office 文档 + 'application/msword', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.ms-excel', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.ms-powerpoint', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + } + + # 允许的扩展名(兜底:MIME 类型可能被浏览器误判) + allowed_extensions = { + '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', + '.txt', '.md', '.csv', '.json', '.xml', '.yaml', '.yml', '.log', + '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', + '.py', '.js', '.ts', '.html', '.css' + } + + file_extension = Path(file.filename).suffix.lower() + + if file.content_type not in allowed_types and file_extension not in allowed_extensions: + raise HTTPException( + status_code=400, + detail=f"不支持的文件类型: {file.content_type}({file_extension})" + ) # 生成唯一文件名 - file_extension = Path(file.filename).suffix.lower() unique_filename = f"{int(datetime.utcnow().timestamp())}_{generate_unique_id()}{file_extension}" file_path = upload_dir / unique_filename diff --git a/server/requirements.txt b/server/requirements.txt index d311ae3..2fa7527 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -5,4 +5,7 @@ python-multipart==0.0.18 python-dotenv==1.0.1 aiofiles==24.1.0 pydantic==2.9.2 -typing-extensions==4.12.2 \ No newline at end of file +typing-extensions==4.12.2 +# 路线二:阿里云文档智能解析(doc/docx/pdf) +llama-index-core>=0.10.0 +llama-index-readers-dashscope>=0.1.0 diff --git a/src/components/chat/ChatMain.vue b/src/components/chat/ChatMain.vue index 7b2e067..fa31b1f 100644 --- a/src/components/chat/ChatMain.vue +++ b/src/components/chat/ChatMain.vue @@ -118,7 +118,7 @@ function handlePin() { // 发送消息 - 使用真实 API async function handleSend(text: string, attachments: Attachment[]) { // 检查是否还有正在上传的附件 - const uploadingAttachments = attachments.filter(a => a.uploading); + const uploadingAttachments = attachments.filter((a) => a.uploading); if (uploadingAttachments.length > 0) { // 等待所有上传完成 const uploads = uploadingAttachments.map(async (attachment) => { @@ -126,7 +126,9 @@ async function handleSend(text: string, attachments: Attachment[]) { // 但更简单的方法是等待一小段时间,让上传有机会完成 return new Promise((resolve) => { const checkUpload = () => { - const stillUploading = attachments.some(a => a.id === attachment.id && a.uploading); + const stillUploading = attachments.some( + (a) => a.id === attachment.id && a.uploading, + ); if (!stillUploading) { resolve(); } else { @@ -140,7 +142,7 @@ async function handleSend(text: string, attachments: Attachment[]) { try { await Promise.all(uploads); } catch (error) { - console.error('等待上传完成时发生错误:', error); + console.error("等待上传完成时发生错误:", error); } } @@ -174,13 +176,19 @@ async function handleSend(text: string, attachments: Attachment[]) { // 提取图片URL用于发送给API const imageUrls = attachments .filter((a) => a.type === "image") - .map(a => a.url); + .map((a) => a.url); + + // 提取非图片文件URL(txt, pdf, docx 等) + const fileUrls = attachments + .filter((a) => a.type === "file") + .map((a) => a.url); const stream = chatApi.streamChat( { message: text, conversationId: currentConversation.value?.id || "", - images: imageUrls, // 添加图片URL + images: imageUrls, + files: fileUrls, // 传递文件 URL,后端会读取内容 model: settings.value.defaultModel, stream: true, }, diff --git a/src/services/api.ts b/src/services/api.ts index 7c63954..55704e0 100644 --- a/src/services/api.ts +++ b/src/services/api.ts @@ -34,7 +34,7 @@ export interface ChatRequest { conversationId?: string; message: string; images?: string[]; - files?: string[]; + files?: string[]; // 非图片附件 URL 列表 model?: string; temperature?: number; maxTokens?: number; @@ -93,14 +93,12 @@ class ChatApi { let userContent; if (request.images && request.images.length > 0) { // 如果有图片,则构建内容数组(针对阿里云DashScope API的格式) - userContent = [ - { type: "text", text: request.message } - ]; + userContent = [{ type: "text", text: request.message }]; // 添加图片URL到内容中(阿里云格式) - request.images.forEach(imageUrl => { + request.images.forEach((imageUrl) => { userContent.push({ type: "image_url", - image_url: imageUrl // 注意:阿里云格式不需要嵌套对象 + image_url: imageUrl, // 注意:阿里云格式不需要嵌套对象 }); }); } else { @@ -110,17 +108,21 @@ class ChatApi { // 将前端简化的请求翻译为 OpenAI 兼容的规范请求体 const openAiRequest = { - model: request.model || "qwen-plus", // 可能需要指定支持视觉的模型 + model: request.model || "qwen-plus", // 可能需要指定支持视觉的模型 messages: [ - { role: "system", content: request.systemPrompt || "你是一个支持视觉理解的助手。" }, + { + role: "system", + content: request.systemPrompt || "你是一个智能助手,可以分析用户发送的文字,文件或图片内容,并进行回答。", + }, { role: "user", - content: userContent - } + content: userContent, + }, ], stream: true, temperature: request.temperature, - max_tokens: request.maxTokens + max_tokens: request.maxTokens, + files: request.files || [], // 传递文件 URL 列表给后端 }; const response = await fetch( @@ -190,14 +192,12 @@ class ChatApi { let userContent; if (request.images && request.images.length > 0) { // 如果有图片,则构建内容数组 - userContent = [ - { type: "text", text: request.message } - ]; + userContent = [{ type: "text", text: request.message }]; // 添加图片URL到内容中 - request.images.forEach(imageUrl => { + request.images.forEach((imageUrl) => { userContent.push({ type: "image_url", - image_url: { url: imageUrl } + image_url: { url: imageUrl }, }); }); } else { @@ -207,7 +207,7 @@ class ChatApi { const requestBody = { ...request, - message: userContent + message: userContent, }; const response = await fetch(`${this.baseUrl}${API_ENDPOINTS.CHAT}`, { diff --git a/src/types/chat.ts b/src/types/chat.ts index 50ae0a8..1be82fd 100644 --- a/src/types/chat.ts +++ b/src/types/chat.ts @@ -26,6 +26,7 @@ export interface Attachment { size?: number; mimeType?: string; thumbnail?: string; + uploading?: boolean; // 标记附件是否正在上传中 } // 推荐选项