feat: 实现文件附件功能,前端管理上传状态,后端解析文本、PDF、Word文件内容并注入聊天。

This commit is contained in:
肖应宇 2026-03-03 18:02:45 +08:00 committed by SuperManTouX
parent 2d4e777056
commit ec16cc93ed
6 changed files with 287 additions and 32 deletions

1
.gitignore vendored
View File

@ -15,6 +15,7 @@ uploads
.env
.venv
__pycache__
.claude
# Editor directories and files
.vscode/*

View File

@ -34,6 +34,211 @@ upload_dir = Path("uploads")
upload_dir.mkdir(exist_ok=True)
def _extract_text_from_docmind(obj, depth: int = 0) -> str:
"""
递归提取 DocMind JSON 结构中的可读文本
DashScopeParse 返回的 text 字段是 JSON 字符串内部为文档智能解析的树形结构
"""
if depth > 15:
return ""
if isinstance(obj, str):
s = obj.strip()
# 过滤极短、URL、base64 等非正文字符串
if len(s) > 3 and not s.startswith(('http://', 'https://', 'data:', 'oss://')):
return s
return ""
if isinstance(obj, list):
parts = [_extract_text_from_docmind(item, depth + 1) for item in obj]
return "\n".join(p for p in parts if p)
if isinstance(obj, dict):
# 优先处理文本相关字段
priority_keys = ['content', 'text', 'paragraph', 'caption', 'value', 'title']
# 跳过纯元数据字段
skip_keys = {'backlink', 'pos', 'index', 'style', 'font', 'color',
'size', 'hash', 'id_', 'id', 'layouts', 'type', 'link'}
parts = []
for key in priority_keys:
if key in obj:
t = _extract_text_from_docmind(obj[key], depth + 1)
if t:
parts.append(t)
for key, val in obj.items():
if key not in priority_keys and key not in skip_keys:
t = _extract_text_from_docmind(val, depth + 1)
if t:
parts.append(t)
return "\n".join(parts)
return ""
def _read_file_content(file_url: str):
"""
路线一本地文本提取对纯文本格式文件直接读取内容注入消息
路线二DashScopeParse doc/docx/pdf 文件返回 (local_path, suffix) 供异步调用
返回值
- str文本内容路线一成功
- tuple(Path, str)(本地路径, 扩展名)需异步调用 DashScopeParse路线二
- None不支持的文件类型
"""
try:
from urllib.parse import urlparse
parsed = urlparse(file_url)
relative_path = parsed.path.lstrip('/')
local_path = Path(relative_path)
if not local_path.exists():
return f"[文件不存在: {local_path}]"
suffix = local_path.suffix.lower()
# 路线一:纯文本格式直接读取
text_extensions = {'.txt', '.md', '.csv', '.json', '.xml',
'.yaml', '.yml', '.log', '.py', '.js', '.ts', '.html', '.css'}
if suffix in text_extensions:
with open(local_path, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
max_len = 8000
if len(content) > max_len:
content = content[:max_len] + f"\n\n[...文件内容过长,已截断,共 {len(content)} 字符]"
return content
# 路线二doc/docx/pdf 使用 DashScopeParse 云端解析
dashscope_extensions = {'.doc', '.docx', '.pdf'}
if suffix in dashscope_extensions:
return (local_path, suffix) # 交给异步函数处理
# 其余格式xlsx、pptx 等)暂不支持内容读取
return None
except Exception as e:
print(f"[WARNING] 读取文件内容失败: {e}")
return f"[文件读取失败: {str(e)}]"
async def _parse_with_dashscope(local_path: Path) -> str:
"""
路线二DashScopeParse使用阿里云文档智能解析 doc/docx/pdf 文件
在线程池中运行避免阻塞 FastAPI 事件循环
仅支持 .doc/.docx/.pdf文件大小 100MB页数 1000
"""
import asyncio
def _sync_parse():
try:
from llama_index.readers.dashscope.base import DashScopeParse
from llama_index.readers.dashscope.utils import ResultType
import json
api_key = os.getenv("ALIYUN_API_KEY")
parser = DashScopeParse(
result_type=ResultType.DASHSCOPE_DOCMIND,
api_key=api_key,
num_workers=1,
)
print(f"[INFO] DashScopeParse: 开始解析 {local_path.name} ...")
documents = parser.load_data(file_path=[str(local_path)])
if not documents:
return f"[DashScopeParse: {local_path.name} 解析结果为空]"
texts = []
for doc in documents:
try:
content = json.loads(doc.text)
extracted = _extract_text_from_docmind(content)
texts.append(extracted if extracted else doc.text[:6000])
except Exception:
texts.append(doc.text[:6000] if doc.text else "")
result = "\n\n".join(t for t in texts if t)
print(f"[INFO] DashScopeParse: {local_path.name} 解析完成,提取 {len(result)} 字符")
return result or f"[DashScopeParse: {local_path.name} 未能提取到文本内容]"
except ImportError:
return "[错误DashScopeParse 未安装,请运行: pip install llama-index-core llama-index-readers-dashscope]"
except Exception as e:
print(f"[ERROR] DashScopeParse 解析失败: {e}")
return f"[DashScopeParse 解析失败: {str(e)}]"
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, _sync_parse)
async def _inject_files_into_messages(messages: list, files: list) -> list:
"""
将文件内容异步注入到消息列表中
- 文本类文件路线一读取内容并追加到最后一条 user 消息中
- doc/docx/pdf路线二调用 DashScopeParse 云端解析后注入
- 其他二进制文件仅告知 AI 文件名和类型
"""
if not files:
return messages
file_context_parts = []
for file_url in files:
from urllib.parse import urlparse
parsed = urlparse(file_url)
filename = parsed.path.split('/')[-1]
suffix = Path(filename).suffix.lower()
result = _read_file_content(file_url)
if isinstance(result, str):
# 路线一:文本内容,直接嵌入
file_context_parts.append(
f"--- 附件文件内容({filename}---\n{result}\n--- 附件结束 ---"
)
elif isinstance(result, tuple):
# 路线二doc/docx/pdf → 调用 DashScopeParse
local_path, _ = result
print(f"[INFO] 路线二:调用 DashScopeParse 解析 {filename}")
parsed_text = await _parse_with_dashscope(local_path)
file_context_parts.append(
f"--- 附件文件内容({filename},阿里云文档智能解析)---\n"
f"{parsed_text}\n--- 附件结束 ---"
)
else:
# 其他不支持的格式:仅告知文件信息
file_context_parts.append(
f"[用户上传了一个文件: {filename},类型: {suffix},暂不支持自动读取内容,请告知用户。]"
)
if not file_context_parts:
return messages
file_context_text = "\n\n" + "\n\n".join(file_context_parts)
# 把文件内容追加到最后一条 user 消息
messages = list(messages) # 复制,避免修改原始列表
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
if isinstance(msg, dict) and msg.get('role') == 'user':
content = msg.get('content', '')
if isinstance(content, str):
messages[i] = dict(msg, content=content + file_context_text)
elif isinstance(content, list):
# 找到现有的 text 项,追加内容
new_content = list(content)
appended = False
for j, item in enumerate(new_content):
if isinstance(item, dict) and item.get('type') == 'text':
new_content[j] = dict(item, text=item['text'] + file_context_text)
appended = True
break
if not appended:
new_content.append({'type': 'text', 'text': file_context_text})
messages[i] = dict(msg, content=new_content)
break
return messages
async def chat_endpoint_handler(body: dict):
"""
聊天接口处理器 - 与阿里云百炼API兼容的接口
@ -56,24 +261,33 @@ async def chat_endpoint_handler(body: dict):
stream = body.get('stream', True)
temperature = body.get('temperature', 0.7)
max_tokens = body.get('max_tokens', 2000)
# 处理 files 附件:将文件内容注入到最后一条 user 消息中
files = body.get('files', [])
if files:
messages = await _inject_files_into_messages(messages, files)
# 调试:打印注入后最后一条 user 消息的内容(截断显示 500 字)
for msg in reversed(messages):
if isinstance(msg, dict) and msg.get('role') == 'user':
content_preview = str(msg.get('content', ''))[:500]
print(f"[DEBUG] 注入文件后 user 消息内容预览: {content_preview}")
break
else:
# 否则是前端简化格式 (来自chat函数)
# 需要将其转换为OpenAI兼容格式
message_text = body.get('message', '')
# 检查message是否已经是格式化的列表带图片的情况
if isinstance(message_text, list):
user_content = message_text
else:
# 如果是字符串,转换为标准格式
user_content = [{"type": "text", "text": message_text}]
messages = [
{"role": "system", "content": body.get('systemPrompt', '你是一个支持视觉理解的助手')},
{"role": "system", "content": body.get('systemPrompt', '你是一个智能助手,可以分析用户发送的文本和文件内容')},
{"role": "user", "content": user_content}
]
model = body.get('model', 'qwen-plus')
stream = body.get('stream', False) # 默认为非流式
stream = body.get('stream', False)
temperature = body.get('temperature', 0.7)
max_tokens = body.get('maxTokens', 2000)
@ -602,13 +816,41 @@ async def delete_conversation_handler(conversation_id: str):
async def upload_file_handler(file: UploadFile = File(...)):
"""文件上传处理器"""
try:
# 检查文件类型
allowed_types = ['image/jpeg', 'image/png', 'image/gif', 'image/webp', 'text/plain', 'application/pdf']
if file.content_type not in allowed_types:
raise HTTPException(status_code=400, detail=f"不支持的文件类型: {file.content_type}")
# 允许的 MIME 类型(宽松策略)
allowed_types = {
# 图片
'image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/bmp', 'image/svg+xml',
# 文本类
'text/plain', 'text/csv', 'text/markdown', 'text/html', 'text/xml',
'application/json', 'application/xml',
# PDF
'application/pdf',
# Office 文档
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-excel',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.ms-powerpoint',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
}
# 允许的扩展名兜底MIME 类型可能被浏览器误判)
allowed_extensions = {
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp',
'.txt', '.md', '.csv', '.json', '.xml', '.yaml', '.yml', '.log',
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
'.py', '.js', '.ts', '.html', '.css'
}
file_extension = Path(file.filename).suffix.lower()
if file.content_type not in allowed_types and file_extension not in allowed_extensions:
raise HTTPException(
status_code=400,
detail=f"不支持的文件类型: {file.content_type}{file_extension}"
)
# 生成唯一文件名
file_extension = Path(file.filename).suffix.lower()
unique_filename = f"{int(datetime.utcnow().timestamp())}_{generate_unique_id()}{file_extension}"
file_path = upload_dir / unique_filename

View File

@ -5,4 +5,7 @@ python-multipart==0.0.18
python-dotenv==1.0.1
aiofiles==24.1.0
pydantic==2.9.2
typing-extensions==4.12.2
typing-extensions==4.12.2
# 路线二阿里云文档智能解析doc/docx/pdf
llama-index-core>=0.10.0
llama-index-readers-dashscope>=0.1.0

View File

@ -118,7 +118,7 @@ function handlePin() {
// - 使 API
async function handleSend(text: string, attachments: Attachment[]) {
//
const uploadingAttachments = attachments.filter(a => a.uploading);
const uploadingAttachments = attachments.filter((a) => a.uploading);
if (uploadingAttachments.length > 0) {
//
const uploads = uploadingAttachments.map(async (attachment) => {
@ -126,7 +126,9 @@ async function handleSend(text: string, attachments: Attachment[]) {
//
return new Promise<void>((resolve) => {
const checkUpload = () => {
const stillUploading = attachments.some(a => a.id === attachment.id && a.uploading);
const stillUploading = attachments.some(
(a) => a.id === attachment.id && a.uploading,
);
if (!stillUploading) {
resolve();
} else {
@ -140,7 +142,7 @@ async function handleSend(text: string, attachments: Attachment[]) {
try {
await Promise.all(uploads);
} catch (error) {
console.error('等待上传完成时发生错误:', error);
console.error("等待上传完成时发生错误:", error);
}
}
@ -174,13 +176,19 @@ async function handleSend(text: string, attachments: Attachment[]) {
// URLAPI
const imageUrls = attachments
.filter((a) => a.type === "image")
.map(a => a.url);
.map((a) => a.url);
// URLtxt, pdf, docx
const fileUrls = attachments
.filter((a) => a.type === "file")
.map((a) => a.url);
const stream = chatApi.streamChat(
{
message: text,
conversationId: currentConversation.value?.id || "",
images: imageUrls, // URL
images: imageUrls,
files: fileUrls, // URL
model: settings.value.defaultModel,
stream: true,
},

View File

@ -34,7 +34,7 @@ export interface ChatRequest {
conversationId?: string;
message: string;
images?: string[];
files?: string[];
files?: string[]; // 非图片附件 URL 列表
model?: string;
temperature?: number;
maxTokens?: number;
@ -93,14 +93,12 @@ class ChatApi {
let userContent;
if (request.images && request.images.length > 0) {
// 如果有图片则构建内容数组针对阿里云DashScope API的格式
userContent = [
{ type: "text", text: request.message }
];
userContent = [{ type: "text", text: request.message }];
// 添加图片URL到内容中阿里云格式
request.images.forEach(imageUrl => {
request.images.forEach((imageUrl) => {
userContent.push({
type: "image_url",
image_url: imageUrl // 注意:阿里云格式不需要嵌套对象
image_url: imageUrl, // 注意:阿里云格式不需要嵌套对象
});
});
} else {
@ -110,17 +108,21 @@ class ChatApi {
// 将前端简化的请求翻译为 OpenAI 兼容的规范请求体
const openAiRequest = {
model: request.model || "qwen-plus", // 可能需要指定支持视觉的模型
model: request.model || "qwen-plus", // 可能需要指定支持视觉的模型
messages: [
{ role: "system", content: request.systemPrompt || "你是一个支持视觉理解的助手。" },
{
role: "system",
content: request.systemPrompt || "你是一个智能助手,可以分析用户发送的文字,文件或图片内容,并进行回答。",
},
{
role: "user",
content: userContent
}
content: userContent,
},
],
stream: true,
temperature: request.temperature,
max_tokens: request.maxTokens
max_tokens: request.maxTokens,
files: request.files || [], // 传递文件 URL 列表给后端
};
const response = await fetch(
@ -190,14 +192,12 @@ class ChatApi {
let userContent;
if (request.images && request.images.length > 0) {
// 如果有图片,则构建内容数组
userContent = [
{ type: "text", text: request.message }
];
userContent = [{ type: "text", text: request.message }];
// 添加图片URL到内容中
request.images.forEach(imageUrl => {
request.images.forEach((imageUrl) => {
userContent.push({
type: "image_url",
image_url: { url: imageUrl }
image_url: { url: imageUrl },
});
});
} else {
@ -207,7 +207,7 @@ class ChatApi {
const requestBody = {
...request,
message: userContent
message: userContent,
};
const response = await fetch(`${this.baseUrl}${API_ENDPOINTS.CHAT}`, {

View File

@ -26,6 +26,7 @@ export interface Attachment {
size?: number;
mimeType?: string;
thumbnail?: string;
uploading?: boolean; // 标记附件是否正在上传中
}
// 推荐选项