feat: 深度思考,联网搜索功能完成

This commit is contained in:
肖应宇 2026-03-04 11:33:12 +08:00 committed by SuperManTouX
parent 68e6bad3ca
commit 4ba245706d
7 changed files with 118 additions and 53 deletions

View File

@ -24,6 +24,7 @@ from utils.helpers import (
format_api_response,
extract_delta_content
)
from utils.logger import log_error, log_exception, log_info
# 模拟数据库 - 实际应用中应使用持久化存储
@ -261,6 +262,11 @@ async def chat_endpoint_handler(body: dict):
stream = body.get('stream', True)
temperature = body.get('temperature', 0.7)
max_tokens = body.get('max_tokens', 2000)
deepSearch = body.get('deepSearch', False)
webSearch = body.get('webSearch', False)
deepThinking = body.get('deepThinking', False)
log_info(f"POST /api/chat-ui/chat | 模型: {model} | 流式: {stream} | 联网搜索: {webSearch} | 深度搜索: {deepSearch} | 深度思考: {deepThinking}")
# 处理 files 附件:将文件内容注入到最后一条 user 消息中
files = body.get('files', [])
@ -290,6 +296,9 @@ async def chat_endpoint_handler(body: dict):
stream = body.get('stream', False)
temperature = body.get('temperature', 0.7)
max_tokens = body.get('maxTokens', 2000)
deepSearch = body.get('deepSearch', False)
webSearch = body.get('webSearch', False)
deepThinking = body.get('deepThinking', False)
# 检查是否包含图像内容如果是多模态请求使用MultiModalConversation
has_images = any(
@ -303,6 +312,21 @@ async def chat_endpoint_handler(body: dict):
# 使用多模态API处理图像
return await multimodal_chat_handler(messages, model, stream, temperature, max_tokens)
else:
# 构建 DashScope 额外参数
dashscope_kwargs = {}
if deepSearch:
dashscope_kwargs["enable_search"] = True
dashscope_kwargs["search_options"] = {"search_strategy": "max"}
# 只有特定的思考模型版本支持部分高级 agent但目前我们保持使用基础模型 + max 策略
elif webSearch:
dashscope_kwargs["enable_search"] = True
dashscope_kwargs["search_options"] = {"search_strategy": "turbo"}
if deepThinking:
dashscope_kwargs["enable_thinking"] = True
dashscope_kwargs["result_format"] = "message" # enable_thinking 必须配合 result_format=message
dashscope_kwargs["incremental_output"] = True # 流式模式下 enable_thinking 还必须配合 incremental_output=True
# 使用常规聊天API
if stream:
# 流式响应
@ -313,15 +337,15 @@ async def chat_endpoint_handler(body: dict):
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature
temperature=temperature,
**dashscope_kwargs
)
full_content = "" # 用于累计完整内容
full_reasoning_content = "" # 用于累计完整思考内容
for idx, response in enumerate(responses):
if response.status_code == 200:
# 检查响应是否包含预期的内容
# DashScope API的响应结构可能是 output.choices 或 output.text
content = None
# 尝试从 output.choices 获取内容
@ -330,33 +354,49 @@ async def chat_endpoint_handler(body: dict):
hasattr(response.output, 'choices') and
response.output.choices is not None and
len(response.output.choices) > 0 and
'message' in response.output.choices[0] and
'content' in response.output.choices[0]['message']):
'message' in response.output.choices[0]):
content = response.output.choices[0]['message']['content']
msg_dict = response.output.choices[0]['message']
# incremental_output=True 时,每个 chunk 的 content/reasoning_content 已是增量片段
# 直接使用,无需与 full_* 做对比
content = msg_dict.get('content') or ''
reasoning_content = msg_dict.get('reasoning_content') or ''
# 只有当内容发生变化时才发送增量
if len(content) > len(full_content):
delta_content = extract_delta_content(content, full_content)
full_content = content
delta_str = ""
if delta_content.strip(): # 只有当有非空白新内容时才发送
# 构建 SSE 数据块
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"content": delta_content},
"finish_reason": None
}
]
}
# 处理思考过程片段
if reasoning_content:
if not full_reasoning_content:
# 第一个思考片段,加标题前缀
delta_str += "> **💭 深度思考过程:**\n> \n> "
full_reasoning_content += reasoning_content
# markdown 引用块内换行需加 >
delta_str += reasoning_content.replace("\n", "\n> ")
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
# 处理正式回复片段
if content:
if not full_content and full_reasoning_content:
# 思考结束后首个正式回复,加分隔线
delta_str += "\n\n---\n\n"
full_content += content
delta_str += content
if delta_str:
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"content": delta_str},
"finish_reason": None
}
]
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
# 否则尝试从 output.text 获取内容DashScope特定格式
elif (hasattr(response, 'output') and
response.output and
@ -387,7 +427,8 @@ async def chat_endpoint_handler(body: dict):
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
else:
# 错误处理
# 错误处理:写入 logger方便排查
log_error(f"DashScope API 返回错误: chunk status={response.status_code}, code={response.code}, msg={response.message}")
error_data = {
"error": {
"message": f"API Error: {response.code} - {response.message}",
@ -416,6 +457,7 @@ async def chat_endpoint_handler(body: dict):
yield f"data: {json.dumps(finish_data, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
log_exception(f"流式生成器异常: {e}")
error_data = {
"error": {
"message": str(e),
@ -432,7 +474,8 @@ async def chat_endpoint_handler(body: dict):
messages=messages,
stream=False,
max_tokens=max_tokens,
temperature=temperature
temperature=temperature,
**dashscope_kwargs
)
if response.status_code == 200:
@ -446,10 +489,14 @@ async def chat_endpoint_handler(body: dict):
hasattr(response.output, 'choices') and
response.output.choices is not None and
len(response.output.choices) > 0 and
'message' in response.output.choices[0] and
'content' in response.output.choices[0]['message']):
'message' in response.output.choices[0]):
content = response.output.choices[0]['message']['content']
msg_dict = response.output.choices[0]['message']
content = msg_dict.get('content', '')
rc = msg_dict.get('reasoning_content', '')
if rc:
rc_formatted = rc.replace('\n', '\n> ')
content = f"> **💭 深度思考过程:**\n> \n> {rc_formatted}\n\n---\n\n{content}"
# 否则尝试从 output.text 获取内容DashScope特定格式
elif (hasattr(response, 'output') and
response.output and

View File

@ -4,7 +4,7 @@
"""
import os
import json
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
import dashscope
from dotenv import load_dotenv
@ -28,6 +28,9 @@ from api.chat_routes import (
)
from models.chat_models import ChatRequest, ModelInfo
from utils.helpers import log_request, log_response
from utils.logger import get_logger
logger = get_logger()
# 加载环境变量
@ -46,30 +49,32 @@ app = FastAPI(title="AI Chat API Server (Python)", version="2.0.0")
@app.middleware("http")
async def logging_middleware(request: Request, call_next):
"""中间件:记录请求日志"""
start_time = datetime.utcnow()
"""中间件:记录请求日志并美化输出"""
start_time = datetime.now(timezone.utc)
client_ip = request.client.host if request.client else 'unknown'
# 记录请求信息
log_request(request.method, request.url.path, request.client.host if request.client else 'unknown')
# 请求日志
logger.info(f"{request.method} {request.url.path} | IP: {client_ip}")
response = await call_next(request)
# 计算处理时间
process_time = (datetime.utcnow() - start_time).total_seconds() * 1000
process_time = (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
status_emoji = "" if response.status_code < 400 else ""
# 记录响应信息
# 响应日志(包含端点、状态码、耗时)
logger.info(f"{status_emoji} {request.method} {request.url.path} | 状态: {response.status_code} | 耗时: {process_time:.0f}ms")
# 记录结构化日志(写入日志文件)
log_response(response.status_code, process_time)
# 在响应头中添加处理时间
response.headers["X-Process-Time"] = f"{process_time:.2f}ms"
return response
@app.get("/health")
async def health_check():
"""健康检查端点"""
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
return {"status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat()}
@app.post("/api/chat-ui/chat")

View File

@ -24,6 +24,10 @@ class ChatRequest(BaseModel):
stream: bool = True
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 2000
files: Optional[List[str]] = None
deepSearch: Optional[bool] = False
webSearch: Optional[bool] = False
deepThinking: Optional[bool] = False
class ModelInfo(BaseModel):

View File

@ -35,9 +35,9 @@ class LoggerSetup:
# 创建日志目录
self.log_dir.mkdir(exist_ok=True)
# 设置日志格式
# 设置日志格式(去掉 funcName:lineno保持人类可读性
self.formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s'
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# 创建logger实例
@ -166,14 +166,12 @@ def log_structured(level: str, message: str, **details):
**details: 额外的结构化数据
"""
logger = get_logger()
structured_log = {
"timestamp": datetime.now().isoformat(),
"level": level.upper(),
"message": message,
"details": details
}
getattr(logger, level.lower())(json.dumps(structured_log, ensure_ascii=False))
# 为了开发时的可读性,不再使用单行 JSON 打印全结构
# 转换为更易读的格式
detail_str = ", ".join(f"{k}={v}" for k, v in details.items() if v)
formatted_msg = f"[{message}] {detail_str}"
getattr(logger, level.lower())(formatted_msg)
def log_request_info(method: str, path: str, client_ip: str = "unknown",

View File

@ -116,7 +116,8 @@ function handlePin() {
}
// - 使 API
async function handleSend(text: string, attachments: Attachment[]) {
async function handleSend(text: string, attachments: Attachment[], options?: { deepSearch?: boolean; webSearch?: boolean; deepThinking?: boolean }) {
console.log("handleSend", text, attachments, options);
//
const uploadingAttachments = attachments.filter((a) => a.uploading);
if (uploadingAttachments.length > 0) {
@ -191,6 +192,9 @@ async function handleSend(text: string, attachments: Attachment[]) {
files: fileUrls, // URL
model: settings.value.defaultModel,
stream: true,
deepSearch: options?.deepSearch,
webSearch: options?.webSearch,
deepThinking: options?.deepThinking,
},
abortController.value.signal,
);

View File

@ -123,6 +123,10 @@ class ChatApi {
temperature: request.temperature,
max_tokens: request.maxTokens,
files: request.files || [], // 传递文件 URL 列表给后端
// 扩展参数传递给我们的 Python 后端进行特殊处理
deepSearch: request.deepSearch,
webSearch: request.webSearch,
deepThinking: request.deepThinking,
};
const response = await fetch(

View File

@ -12,6 +12,9 @@ cleanup() {
exit
}
# 清除旧进程
lsof -i :8000 -t | xargs -r kill -9; lsof -i :5173 -t | xargs -r kill -9;
# 捕获退出信号
trap cleanup SIGINT SIGTERM EXIT