feat: 深度思考,联网搜索功能完成
This commit is contained in:
parent
68e6bad3ca
commit
4ba245706d
|
|
@ -24,6 +24,7 @@ from utils.helpers import (
|
||||||
format_api_response,
|
format_api_response,
|
||||||
extract_delta_content
|
extract_delta_content
|
||||||
)
|
)
|
||||||
|
from utils.logger import log_error, log_exception, log_info
|
||||||
|
|
||||||
|
|
||||||
# 模拟数据库 - 实际应用中应使用持久化存储
|
# 模拟数据库 - 实际应用中应使用持久化存储
|
||||||
|
|
@ -261,6 +262,11 @@ async def chat_endpoint_handler(body: dict):
|
||||||
stream = body.get('stream', True)
|
stream = body.get('stream', True)
|
||||||
temperature = body.get('temperature', 0.7)
|
temperature = body.get('temperature', 0.7)
|
||||||
max_tokens = body.get('max_tokens', 2000)
|
max_tokens = body.get('max_tokens', 2000)
|
||||||
|
deepSearch = body.get('deepSearch', False)
|
||||||
|
webSearch = body.get('webSearch', False)
|
||||||
|
deepThinking = body.get('deepThinking', False)
|
||||||
|
|
||||||
|
log_info(f"POST /api/chat-ui/chat | 模型: {model} | 流式: {stream} | 联网搜索: {webSearch} | 深度搜索: {deepSearch} | 深度思考: {deepThinking}")
|
||||||
|
|
||||||
# 处理 files 附件:将文件内容注入到最后一条 user 消息中
|
# 处理 files 附件:将文件内容注入到最后一条 user 消息中
|
||||||
files = body.get('files', [])
|
files = body.get('files', [])
|
||||||
|
|
@ -290,6 +296,9 @@ async def chat_endpoint_handler(body: dict):
|
||||||
stream = body.get('stream', False)
|
stream = body.get('stream', False)
|
||||||
temperature = body.get('temperature', 0.7)
|
temperature = body.get('temperature', 0.7)
|
||||||
max_tokens = body.get('maxTokens', 2000)
|
max_tokens = body.get('maxTokens', 2000)
|
||||||
|
deepSearch = body.get('deepSearch', False)
|
||||||
|
webSearch = body.get('webSearch', False)
|
||||||
|
deepThinking = body.get('deepThinking', False)
|
||||||
|
|
||||||
# 检查是否包含图像内容,如果是多模态请求,使用MultiModalConversation
|
# 检查是否包含图像内容,如果是多模态请求,使用MultiModalConversation
|
||||||
has_images = any(
|
has_images = any(
|
||||||
|
|
@ -303,6 +312,21 @@ async def chat_endpoint_handler(body: dict):
|
||||||
# 使用多模态API处理图像
|
# 使用多模态API处理图像
|
||||||
return await multimodal_chat_handler(messages, model, stream, temperature, max_tokens)
|
return await multimodal_chat_handler(messages, model, stream, temperature, max_tokens)
|
||||||
else:
|
else:
|
||||||
|
# 构建 DashScope 额外参数
|
||||||
|
dashscope_kwargs = {}
|
||||||
|
if deepSearch:
|
||||||
|
dashscope_kwargs["enable_search"] = True
|
||||||
|
dashscope_kwargs["search_options"] = {"search_strategy": "max"}
|
||||||
|
# 只有特定的思考模型版本支持部分高级 agent,但目前我们保持使用基础模型 + max 策略
|
||||||
|
elif webSearch:
|
||||||
|
dashscope_kwargs["enable_search"] = True
|
||||||
|
dashscope_kwargs["search_options"] = {"search_strategy": "turbo"}
|
||||||
|
|
||||||
|
if deepThinking:
|
||||||
|
dashscope_kwargs["enable_thinking"] = True
|
||||||
|
dashscope_kwargs["result_format"] = "message" # enable_thinking 必须配合 result_format=message
|
||||||
|
dashscope_kwargs["incremental_output"] = True # 流式模式下 enable_thinking 还必须配合 incremental_output=True
|
||||||
|
|
||||||
# 使用常规聊天API
|
# 使用常规聊天API
|
||||||
if stream:
|
if stream:
|
||||||
# 流式响应
|
# 流式响应
|
||||||
|
|
@ -313,15 +337,15 @@ async def chat_endpoint_handler(body: dict):
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=True,
|
stream=True,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
temperature=temperature
|
temperature=temperature,
|
||||||
|
**dashscope_kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
full_content = "" # 用于累计完整内容
|
full_content = "" # 用于累计完整内容
|
||||||
|
full_reasoning_content = "" # 用于累计完整思考内容
|
||||||
|
|
||||||
for idx, response in enumerate(responses):
|
for idx, response in enumerate(responses):
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
# 检查响应是否包含预期的内容
|
|
||||||
# DashScope API的响应结构可能是 output.choices 或 output.text
|
|
||||||
content = None
|
content = None
|
||||||
|
|
||||||
# 尝试从 output.choices 获取内容
|
# 尝试从 output.choices 获取内容
|
||||||
|
|
@ -330,18 +354,34 @@ async def chat_endpoint_handler(body: dict):
|
||||||
hasattr(response.output, 'choices') and
|
hasattr(response.output, 'choices') and
|
||||||
response.output.choices is not None and
|
response.output.choices is not None and
|
||||||
len(response.output.choices) > 0 and
|
len(response.output.choices) > 0 and
|
||||||
'message' in response.output.choices[0] and
|
'message' in response.output.choices[0]):
|
||||||
'content' in response.output.choices[0]['message']):
|
|
||||||
|
|
||||||
content = response.output.choices[0]['message']['content']
|
msg_dict = response.output.choices[0]['message']
|
||||||
|
# incremental_output=True 时,每个 chunk 的 content/reasoning_content 已是增量片段
|
||||||
|
# 直接使用,无需与 full_* 做对比
|
||||||
|
content = msg_dict.get('content') or ''
|
||||||
|
reasoning_content = msg_dict.get('reasoning_content') or ''
|
||||||
|
|
||||||
# 只有当内容发生变化时才发送增量
|
delta_str = ""
|
||||||
if len(content) > len(full_content):
|
|
||||||
delta_content = extract_delta_content(content, full_content)
|
|
||||||
full_content = content
|
|
||||||
|
|
||||||
if delta_content.strip(): # 只有当有非空白新内容时才发送
|
# 处理思考过程片段
|
||||||
# 构建 SSE 数据块
|
if reasoning_content:
|
||||||
|
if not full_reasoning_content:
|
||||||
|
# 第一个思考片段,加标题前缀
|
||||||
|
delta_str += "> **💭 深度思考过程:**\n> \n> "
|
||||||
|
full_reasoning_content += reasoning_content
|
||||||
|
# markdown 引用块内换行需加 >
|
||||||
|
delta_str += reasoning_content.replace("\n", "\n> ")
|
||||||
|
|
||||||
|
# 处理正式回复片段
|
||||||
|
if content:
|
||||||
|
if not full_content and full_reasoning_content:
|
||||||
|
# 思考结束后首个正式回复,加分隔线
|
||||||
|
delta_str += "\n\n---\n\n"
|
||||||
|
full_content += content
|
||||||
|
delta_str += content
|
||||||
|
|
||||||
|
if delta_str:
|
||||||
data = {
|
data = {
|
||||||
"id": f"chatcmpl-{generate_unique_id()}",
|
"id": f"chatcmpl-{generate_unique_id()}",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
|
|
@ -350,7 +390,7 @@ async def chat_endpoint_handler(body: dict):
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"delta": {"content": delta_content},
|
"delta": {"content": delta_str},
|
||||||
"finish_reason": None
|
"finish_reason": None
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
@ -387,7 +427,8 @@ async def chat_endpoint_handler(body: dict):
|
||||||
|
|
||||||
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||||
else:
|
else:
|
||||||
# 错误处理
|
# 错误处理:写入 logger,方便排查
|
||||||
|
log_error(f"DashScope API 返回错误: chunk status={response.status_code}, code={response.code}, msg={response.message}")
|
||||||
error_data = {
|
error_data = {
|
||||||
"error": {
|
"error": {
|
||||||
"message": f"API Error: {response.code} - {response.message}",
|
"message": f"API Error: {response.code} - {response.message}",
|
||||||
|
|
@ -416,6 +457,7 @@ async def chat_endpoint_handler(body: dict):
|
||||||
yield f"data: {json.dumps(finish_data, ensure_ascii=False)}\n\n"
|
yield f"data: {json.dumps(finish_data, ensure_ascii=False)}\n\n"
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
log_exception(f"流式生成器异常: {e}")
|
||||||
error_data = {
|
error_data = {
|
||||||
"error": {
|
"error": {
|
||||||
"message": str(e),
|
"message": str(e),
|
||||||
|
|
@ -432,7 +474,8 @@ async def chat_endpoint_handler(body: dict):
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=False,
|
stream=False,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
temperature=temperature
|
temperature=temperature,
|
||||||
|
**dashscope_kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
|
|
@ -446,10 +489,14 @@ async def chat_endpoint_handler(body: dict):
|
||||||
hasattr(response.output, 'choices') and
|
hasattr(response.output, 'choices') and
|
||||||
response.output.choices is not None and
|
response.output.choices is not None and
|
||||||
len(response.output.choices) > 0 and
|
len(response.output.choices) > 0 and
|
||||||
'message' in response.output.choices[0] and
|
'message' in response.output.choices[0]):
|
||||||
'content' in response.output.choices[0]['message']):
|
|
||||||
|
|
||||||
content = response.output.choices[0]['message']['content']
|
msg_dict = response.output.choices[0]['message']
|
||||||
|
content = msg_dict.get('content', '')
|
||||||
|
rc = msg_dict.get('reasoning_content', '')
|
||||||
|
if rc:
|
||||||
|
rc_formatted = rc.replace('\n', '\n> ')
|
||||||
|
content = f"> **💭 深度思考过程:**\n> \n> {rc_formatted}\n\n---\n\n{content}"
|
||||||
# 否则尝试从 output.text 获取内容(DashScope特定格式)
|
# 否则尝试从 output.text 获取内容(DashScope特定格式)
|
||||||
elif (hasattr(response, 'output') and
|
elif (hasattr(response, 'output') and
|
||||||
response.output and
|
response.output and
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import dashscope
|
import dashscope
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
@ -28,6 +28,9 @@ from api.chat_routes import (
|
||||||
)
|
)
|
||||||
from models.chat_models import ChatRequest, ModelInfo
|
from models.chat_models import ChatRequest, ModelInfo
|
||||||
from utils.helpers import log_request, log_response
|
from utils.helpers import log_request, log_response
|
||||||
|
from utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
# 加载环境变量
|
# 加载环境变量
|
||||||
|
|
@ -46,30 +49,32 @@ app = FastAPI(title="AI Chat API Server (Python)", version="2.0.0")
|
||||||
|
|
||||||
@app.middleware("http")
|
@app.middleware("http")
|
||||||
async def logging_middleware(request: Request, call_next):
|
async def logging_middleware(request: Request, call_next):
|
||||||
"""中间件:记录请求日志"""
|
"""中间件:记录请求日志并美化输出"""
|
||||||
start_time = datetime.utcnow()
|
start_time = datetime.now(timezone.utc)
|
||||||
|
client_ip = request.client.host if request.client else 'unknown'
|
||||||
|
|
||||||
# 记录请求信息
|
# 请求日志
|
||||||
log_request(request.method, request.url.path, request.client.host if request.client else 'unknown')
|
logger.info(f"→ {request.method} {request.url.path} | IP: {client_ip}")
|
||||||
|
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
|
|
||||||
# 计算处理时间
|
process_time = (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
|
||||||
process_time = (datetime.utcnow() - start_time).total_seconds() * 1000
|
status_emoji = "✅" if response.status_code < 400 else "❌"
|
||||||
|
|
||||||
# 记录响应信息
|
# 响应日志(包含端点、状态码、耗时)
|
||||||
|
logger.info(f"{status_emoji} {request.method} {request.url.path} | 状态: {response.status_code} | 耗时: {process_time:.0f}ms")
|
||||||
|
|
||||||
|
# 记录结构化日志(写入日志文件)
|
||||||
log_response(response.status_code, process_time)
|
log_response(response.status_code, process_time)
|
||||||
|
|
||||||
# 在响应头中添加处理时间
|
|
||||||
response.headers["X-Process-Time"] = f"{process_time:.2f}ms"
|
response.headers["X-Process-Time"] = f"{process_time:.2f}ms"
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""健康检查端点"""
|
"""健康检查端点"""
|
||||||
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
|
return {"status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat()}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/chat-ui/chat")
|
@app.post("/api/chat-ui/chat")
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,10 @@ class ChatRequest(BaseModel):
|
||||||
stream: bool = True
|
stream: bool = True
|
||||||
temperature: Optional[float] = 0.7
|
temperature: Optional[float] = 0.7
|
||||||
max_tokens: Optional[int] = 2000
|
max_tokens: Optional[int] = 2000
|
||||||
|
files: Optional[List[str]] = None
|
||||||
|
deepSearch: Optional[bool] = False
|
||||||
|
webSearch: Optional[bool] = False
|
||||||
|
deepThinking: Optional[bool] = False
|
||||||
|
|
||||||
|
|
||||||
class ModelInfo(BaseModel):
|
class ModelInfo(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -35,9 +35,9 @@ class LoggerSetup:
|
||||||
# 创建日志目录
|
# 创建日志目录
|
||||||
self.log_dir.mkdir(exist_ok=True)
|
self.log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
# 设置日志格式
|
# 设置日志格式(去掉 funcName:lineno,保持人类可读性)
|
||||||
self.formatter = logging.Formatter(
|
self.formatter = logging.Formatter(
|
||||||
'%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s'
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
)
|
)
|
||||||
|
|
||||||
# 创建logger实例
|
# 创建logger实例
|
||||||
|
|
@ -166,14 +166,12 @@ def log_structured(level: str, message: str, **details):
|
||||||
**details: 额外的结构化数据
|
**details: 额外的结构化数据
|
||||||
"""
|
"""
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
structured_log = {
|
# 为了开发时的可读性,不再使用单行 JSON 打印全结构
|
||||||
"timestamp": datetime.now().isoformat(),
|
# 转换为更易读的格式
|
||||||
"level": level.upper(),
|
detail_str = ", ".join(f"{k}={v}" for k, v in details.items() if v)
|
||||||
"message": message,
|
formatted_msg = f"[{message}] {detail_str}"
|
||||||
"details": details
|
|
||||||
}
|
|
||||||
|
|
||||||
getattr(logger, level.lower())(json.dumps(structured_log, ensure_ascii=False))
|
getattr(logger, level.lower())(formatted_msg)
|
||||||
|
|
||||||
|
|
||||||
def log_request_info(method: str, path: str, client_ip: str = "unknown",
|
def log_request_info(method: str, path: str, client_ip: str = "unknown",
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,8 @@ function handlePin() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 发送消息 - 使用真实 API
|
// 发送消息 - 使用真实 API
|
||||||
async function handleSend(text: string, attachments: Attachment[]) {
|
async function handleSend(text: string, attachments: Attachment[], options?: { deepSearch?: boolean; webSearch?: boolean; deepThinking?: boolean }) {
|
||||||
|
console.log("handleSend", text, attachments, options);
|
||||||
// 检查是否还有正在上传的附件
|
// 检查是否还有正在上传的附件
|
||||||
const uploadingAttachments = attachments.filter((a) => a.uploading);
|
const uploadingAttachments = attachments.filter((a) => a.uploading);
|
||||||
if (uploadingAttachments.length > 0) {
|
if (uploadingAttachments.length > 0) {
|
||||||
|
|
@ -191,6 +192,9 @@ async function handleSend(text: string, attachments: Attachment[]) {
|
||||||
files: fileUrls, // 传递文件 URL,后端会读取内容
|
files: fileUrls, // 传递文件 URL,后端会读取内容
|
||||||
model: settings.value.defaultModel,
|
model: settings.value.defaultModel,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
deepSearch: options?.deepSearch,
|
||||||
|
webSearch: options?.webSearch,
|
||||||
|
deepThinking: options?.deepThinking,
|
||||||
},
|
},
|
||||||
abortController.value.signal,
|
abortController.value.signal,
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -123,6 +123,10 @@ class ChatApi {
|
||||||
temperature: request.temperature,
|
temperature: request.temperature,
|
||||||
max_tokens: request.maxTokens,
|
max_tokens: request.maxTokens,
|
||||||
files: request.files || [], // 传递文件 URL 列表给后端
|
files: request.files || [], // 传递文件 URL 列表给后端
|
||||||
|
// 扩展参数传递给我们的 Python 后端进行特殊处理
|
||||||
|
deepSearch: request.deepSearch,
|
||||||
|
webSearch: request.webSearch,
|
||||||
|
deepThinking: request.deepThinking,
|
||||||
};
|
};
|
||||||
|
|
||||||
const response = await fetch(
|
const response = await fetch(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue