feat: 深度思考,联网搜索功能完成

This commit is contained in:
肖应宇 2026-03-04 11:33:12 +08:00 committed by SuperManTouX
parent 68e6bad3ca
commit 4ba245706d
7 changed files with 118 additions and 53 deletions

View File

@ -24,6 +24,7 @@ from utils.helpers import (
format_api_response, format_api_response,
extract_delta_content extract_delta_content
) )
from utils.logger import log_error, log_exception, log_info
# 模拟数据库 - 实际应用中应使用持久化存储 # 模拟数据库 - 实际应用中应使用持久化存储
@ -261,6 +262,11 @@ async def chat_endpoint_handler(body: dict):
stream = body.get('stream', True) stream = body.get('stream', True)
temperature = body.get('temperature', 0.7) temperature = body.get('temperature', 0.7)
max_tokens = body.get('max_tokens', 2000) max_tokens = body.get('max_tokens', 2000)
deepSearch = body.get('deepSearch', False)
webSearch = body.get('webSearch', False)
deepThinking = body.get('deepThinking', False)
log_info(f"POST /api/chat-ui/chat | 模型: {model} | 流式: {stream} | 联网搜索: {webSearch} | 深度搜索: {deepSearch} | 深度思考: {deepThinking}")
# 处理 files 附件:将文件内容注入到最后一条 user 消息中 # 处理 files 附件:将文件内容注入到最后一条 user 消息中
files = body.get('files', []) files = body.get('files', [])
@ -290,6 +296,9 @@ async def chat_endpoint_handler(body: dict):
stream = body.get('stream', False) stream = body.get('stream', False)
temperature = body.get('temperature', 0.7) temperature = body.get('temperature', 0.7)
max_tokens = body.get('maxTokens', 2000) max_tokens = body.get('maxTokens', 2000)
deepSearch = body.get('deepSearch', False)
webSearch = body.get('webSearch', False)
deepThinking = body.get('deepThinking', False)
# 检查是否包含图像内容如果是多模态请求使用MultiModalConversation # 检查是否包含图像内容如果是多模态请求使用MultiModalConversation
has_images = any( has_images = any(
@ -303,6 +312,21 @@ async def chat_endpoint_handler(body: dict):
# 使用多模态API处理图像 # 使用多模态API处理图像
return await multimodal_chat_handler(messages, model, stream, temperature, max_tokens) return await multimodal_chat_handler(messages, model, stream, temperature, max_tokens)
else: else:
# 构建 DashScope 额外参数
dashscope_kwargs = {}
if deepSearch:
dashscope_kwargs["enable_search"] = True
dashscope_kwargs["search_options"] = {"search_strategy": "max"}
# 只有特定的思考模型版本支持部分高级 agent但目前我们保持使用基础模型 + max 策略
elif webSearch:
dashscope_kwargs["enable_search"] = True
dashscope_kwargs["search_options"] = {"search_strategy": "turbo"}
if deepThinking:
dashscope_kwargs["enable_thinking"] = True
dashscope_kwargs["result_format"] = "message" # enable_thinking 必须配合 result_format=message
dashscope_kwargs["incremental_output"] = True # 流式模式下 enable_thinking 还必须配合 incremental_output=True
# 使用常规聊天API # 使用常规聊天API
if stream: if stream:
# 流式响应 # 流式响应
@ -313,15 +337,15 @@ async def chat_endpoint_handler(body: dict):
messages=messages, messages=messages,
stream=True, stream=True,
max_tokens=max_tokens, max_tokens=max_tokens,
temperature=temperature temperature=temperature,
**dashscope_kwargs
) )
full_content = "" # 用于累计完整内容 full_content = "" # 用于累计完整内容
full_reasoning_content = "" # 用于累计完整思考内容
for idx, response in enumerate(responses): for idx, response in enumerate(responses):
if response.status_code == 200: if response.status_code == 200:
# 检查响应是否包含预期的内容
# DashScope API的响应结构可能是 output.choices 或 output.text
content = None content = None
# 尝试从 output.choices 获取内容 # 尝试从 output.choices 获取内容
@ -330,18 +354,34 @@ async def chat_endpoint_handler(body: dict):
hasattr(response.output, 'choices') and hasattr(response.output, 'choices') and
response.output.choices is not None and response.output.choices is not None and
len(response.output.choices) > 0 and len(response.output.choices) > 0 and
'message' in response.output.choices[0] and 'message' in response.output.choices[0]):
'content' in response.output.choices[0]['message']):
content = response.output.choices[0]['message']['content'] msg_dict = response.output.choices[0]['message']
# incremental_output=True 时,每个 chunk 的 content/reasoning_content 已是增量片段
# 直接使用,无需与 full_* 做对比
content = msg_dict.get('content') or ''
reasoning_content = msg_dict.get('reasoning_content') or ''
# 只有当内容发生变化时才发送增量 delta_str = ""
if len(content) > len(full_content):
delta_content = extract_delta_content(content, full_content)
full_content = content
if delta_content.strip(): # 只有当有非空白新内容时才发送 # 处理思考过程片段
# 构建 SSE 数据块 if reasoning_content:
if not full_reasoning_content:
# 第一个思考片段,加标题前缀
delta_str += "> **💭 深度思考过程:**\n> \n> "
full_reasoning_content += reasoning_content
# markdown 引用块内换行需加 >
delta_str += reasoning_content.replace("\n", "\n> ")
# 处理正式回复片段
if content:
if not full_content and full_reasoning_content:
# 思考结束后首个正式回复,加分隔线
delta_str += "\n\n---\n\n"
full_content += content
delta_str += content
if delta_str:
data = { data = {
"id": f"chatcmpl-{generate_unique_id()}", "id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
@ -350,7 +390,7 @@ async def chat_endpoint_handler(body: dict):
"choices": [ "choices": [
{ {
"index": 0, "index": 0,
"delta": {"content": delta_content}, "delta": {"content": delta_str},
"finish_reason": None "finish_reason": None
} }
] ]
@ -387,7 +427,8 @@ async def chat_endpoint_handler(body: dict):
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
else: else:
# 错误处理 # 错误处理:写入 logger方便排查
log_error(f"DashScope API 返回错误: chunk status={response.status_code}, code={response.code}, msg={response.message}")
error_data = { error_data = {
"error": { "error": {
"message": f"API Error: {response.code} - {response.message}", "message": f"API Error: {response.code} - {response.message}",
@ -416,6 +457,7 @@ async def chat_endpoint_handler(body: dict):
yield f"data: {json.dumps(finish_data, ensure_ascii=False)}\n\n" yield f"data: {json.dumps(finish_data, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
except Exception as e: except Exception as e:
log_exception(f"流式生成器异常: {e}")
error_data = { error_data = {
"error": { "error": {
"message": str(e), "message": str(e),
@ -432,7 +474,8 @@ async def chat_endpoint_handler(body: dict):
messages=messages, messages=messages,
stream=False, stream=False,
max_tokens=max_tokens, max_tokens=max_tokens,
temperature=temperature temperature=temperature,
**dashscope_kwargs
) )
if response.status_code == 200: if response.status_code == 200:
@ -446,10 +489,14 @@ async def chat_endpoint_handler(body: dict):
hasattr(response.output, 'choices') and hasattr(response.output, 'choices') and
response.output.choices is not None and response.output.choices is not None and
len(response.output.choices) > 0 and len(response.output.choices) > 0 and
'message' in response.output.choices[0] and 'message' in response.output.choices[0]):
'content' in response.output.choices[0]['message']):
content = response.output.choices[0]['message']['content'] msg_dict = response.output.choices[0]['message']
content = msg_dict.get('content', '')
rc = msg_dict.get('reasoning_content', '')
if rc:
rc_formatted = rc.replace('\n', '\n> ')
content = f"> **💭 深度思考过程:**\n> \n> {rc_formatted}\n\n---\n\n{content}"
# 否则尝试从 output.text 获取内容DashScope特定格式 # 否则尝试从 output.text 获取内容DashScope特定格式
elif (hasattr(response, 'output') and elif (hasattr(response, 'output') and
response.output and response.output and

View File

@ -4,7 +4,7 @@
""" """
import os import os
import json import json
from datetime import datetime from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
import dashscope import dashscope
from dotenv import load_dotenv from dotenv import load_dotenv
@ -28,6 +28,9 @@ from api.chat_routes import (
) )
from models.chat_models import ChatRequest, ModelInfo from models.chat_models import ChatRequest, ModelInfo
from utils.helpers import log_request, log_response from utils.helpers import log_request, log_response
from utils.logger import get_logger
logger = get_logger()
# 加载环境变量 # 加载环境变量
@ -46,30 +49,32 @@ app = FastAPI(title="AI Chat API Server (Python)", version="2.0.0")
@app.middleware("http") @app.middleware("http")
async def logging_middleware(request: Request, call_next): async def logging_middleware(request: Request, call_next):
"""中间件:记录请求日志""" """中间件:记录请求日志并美化输出"""
start_time = datetime.utcnow() start_time = datetime.now(timezone.utc)
client_ip = request.client.host if request.client else 'unknown'
# 记录请求信息 # 请求日志
log_request(request.method, request.url.path, request.client.host if request.client else 'unknown') logger.info(f"{request.method} {request.url.path} | IP: {client_ip}")
response = await call_next(request) response = await call_next(request)
# 计算处理时间 process_time = (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
process_time = (datetime.utcnow() - start_time).total_seconds() * 1000 status_emoji = "" if response.status_code < 400 else ""
# 记录响应信息 # 响应日志(包含端点、状态码、耗时)
logger.info(f"{status_emoji} {request.method} {request.url.path} | 状态: {response.status_code} | 耗时: {process_time:.0f}ms")
# 记录结构化日志(写入日志文件)
log_response(response.status_code, process_time) log_response(response.status_code, process_time)
# 在响应头中添加处理时间
response.headers["X-Process-Time"] = f"{process_time:.2f}ms" response.headers["X-Process-Time"] = f"{process_time:.2f}ms"
return response return response
@app.get("/health") @app.get("/health")
async def health_check(): async def health_check():
"""健康检查端点""" """健康检查端点"""
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()} return {"status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat()}
@app.post("/api/chat-ui/chat") @app.post("/api/chat-ui/chat")

View File

@ -24,6 +24,10 @@ class ChatRequest(BaseModel):
stream: bool = True stream: bool = True
temperature: Optional[float] = 0.7 temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 2000 max_tokens: Optional[int] = 2000
files: Optional[List[str]] = None
deepSearch: Optional[bool] = False
webSearch: Optional[bool] = False
deepThinking: Optional[bool] = False
class ModelInfo(BaseModel): class ModelInfo(BaseModel):

View File

@ -35,9 +35,9 @@ class LoggerSetup:
# 创建日志目录 # 创建日志目录
self.log_dir.mkdir(exist_ok=True) self.log_dir.mkdir(exist_ok=True)
# 设置日志格式 # 设置日志格式(去掉 funcName:lineno保持人类可读性
self.formatter = logging.Formatter( self.formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s' '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
) )
# 创建logger实例 # 创建logger实例
@ -166,14 +166,12 @@ def log_structured(level: str, message: str, **details):
**details: 额外的结构化数据 **details: 额外的结构化数据
""" """
logger = get_logger() logger = get_logger()
structured_log = { # 为了开发时的可读性,不再使用单行 JSON 打印全结构
"timestamp": datetime.now().isoformat(), # 转换为更易读的格式
"level": level.upper(), detail_str = ", ".join(f"{k}={v}" for k, v in details.items() if v)
"message": message, formatted_msg = f"[{message}] {detail_str}"
"details": details
}
getattr(logger, level.lower())(json.dumps(structured_log, ensure_ascii=False)) getattr(logger, level.lower())(formatted_msg)
def log_request_info(method: str, path: str, client_ip: str = "unknown", def log_request_info(method: str, path: str, client_ip: str = "unknown",

View File

@ -116,7 +116,8 @@ function handlePin() {
} }
// - 使 API // - 使 API
async function handleSend(text: string, attachments: Attachment[]) { async function handleSend(text: string, attachments: Attachment[], options?: { deepSearch?: boolean; webSearch?: boolean; deepThinking?: boolean }) {
console.log("handleSend", text, attachments, options);
// //
const uploadingAttachments = attachments.filter((a) => a.uploading); const uploadingAttachments = attachments.filter((a) => a.uploading);
if (uploadingAttachments.length > 0) { if (uploadingAttachments.length > 0) {
@ -191,6 +192,9 @@ async function handleSend(text: string, attachments: Attachment[]) {
files: fileUrls, // URL files: fileUrls, // URL
model: settings.value.defaultModel, model: settings.value.defaultModel,
stream: true, stream: true,
deepSearch: options?.deepSearch,
webSearch: options?.webSearch,
deepThinking: options?.deepThinking,
}, },
abortController.value.signal, abortController.value.signal,
); );

View File

@ -123,6 +123,10 @@ class ChatApi {
temperature: request.temperature, temperature: request.temperature,
max_tokens: request.maxTokens, max_tokens: request.maxTokens,
files: request.files || [], // 传递文件 URL 列表给后端 files: request.files || [], // 传递文件 URL 列表给后端
// 扩展参数传递给我们的 Python 后端进行特殊处理
deepSearch: request.deepSearch,
webSearch: request.webSearch,
deepThinking: request.deepThinking,
}; };
const response = await fetch( const response = await fetch(

View File

@ -12,6 +12,9 @@ cleanup() {
exit exit
} }
# 清除旧进程
lsof -i :8000 -t | xargs -r kill -9; lsof -i :5173 -t | xargs -r kill -9;
# 捕获退出信号 # 捕获退出信号
trap cleanup SIGINT SIGTERM EXIT trap cleanup SIGINT SIGTERM EXIT