feat: 深度思考，联网搜索功能完成

2026-03-04 11:33:12 +08:00 · 2026-03-04 11:33:12 +08:00 · 4ba245706d
parent 68e6bad3ca
commit 4ba245706d
7 changed files with 118 additions and 53 deletions
--- a/server/api/chat_routes.py
+++ b/server/api/chat_routes.py
@ -24,6 +24,7 @@ from utils.helpers import (
    format_api_response,
    extract_delta_content
 )
+from utils.logger import log_error, log_exception, log_info


 # 模拟数据库 - 实际应用中应使用持久化存储
@ -261,6 +262,11 @@ async def chat_endpoint_handler(body: dict):
            stream = body.get('stream', True)
            temperature = body.get('temperature', 0.7)
            max_tokens = body.get('max_tokens', 2000)
+            deepSearch = body.get('deepSearch', False)
+            webSearch = body.get('webSearch', False)
+            deepThinking = body.get('deepThinking', False)
+
+            log_info(f"POST /api/chat-ui/chat | 模型: {model} | 流式: {stream} | 联网搜索: {webSearch} | 深度搜索: {deepSearch} | 深度思考: {deepThinking}")

            # 处理 files 附件：将文件内容注入到最后一条 user 消息中
            files = body.get('files', [])
@ -290,6 +296,9 @@ async def chat_endpoint_handler(body: dict):
            stream = body.get('stream', False)
            temperature = body.get('temperature', 0.7)
            max_tokens = body.get('maxTokens', 2000)
+            deepSearch = body.get('deepSearch', False)
+            webSearch = body.get('webSearch', False)
+            deepThinking = body.get('deepThinking', False)

        # 检查是否包含图像内容，如果是多模态请求，使用MultiModalConversation
        has_images = any(
@ -303,6 +312,21 @@ async def chat_endpoint_handler(body: dict):
            # 使用多模态API处理图像
            return await multimodal_chat_handler(messages, model, stream, temperature, max_tokens)
        else:
+            # 构建 DashScope 额外参数
+            dashscope_kwargs = {}
+            if deepSearch:
+                dashscope_kwargs["enable_search"] = True
+                dashscope_kwargs["search_options"] = {"search_strategy": "max"}
+                # 只有特定的思考模型版本支持部分高级 agent，但目前我们保持使用基础模型 + max 策略
+            elif webSearch:
+                dashscope_kwargs["enable_search"] = True
+                dashscope_kwargs["search_options"] = {"search_strategy": "turbo"}
+                
+            if deepThinking:
+                dashscope_kwargs["enable_thinking"] = True
+                dashscope_kwargs["result_format"] = "message"       # enable_thinking 必须配合 result_format=message
+                dashscope_kwargs["incremental_output"] = True        # 流式模式下 enable_thinking 还必须配合 incremental_output=True
+
            # 使用常规聊天API
            if stream:
                # 流式响应
@ -313,15 +337,15 @@ async def chat_endpoint_handler(body: dict):
                            messages=messages,
                            stream=True,
                            max_tokens=max_tokens,
-                            temperature=temperature
+                            temperature=temperature,
+                            **dashscope_kwargs
                        )

                        full_content = ""  # 用于累计完整内容
+                        full_reasoning_content = ""  # 用于累计完整思考内容

                        for idx, response in enumerate(responses):
                            if response.status_code == 200:
-                                # 检查响应是否包含预期的内容
-                                # DashScope API的响应结构可能是 output.choices 或 output.text
                                content = None

                                # 尝试从 output.choices 获取内容
@ -330,33 +354,49 @@ async def chat_endpoint_handler(body: dict):
                                    hasattr(response.output, 'choices') and
                                    response.output.choices is not None and
                                    len(response.output.choices) > 0 and
-                                    'message' in response.output.choices[0] and
-                                    'content' in response.output.choices[0]['message']):
+                                    'message' in response.output.choices[0]):

-                                    content = response.output.choices[0]['message']['content']
+                                    msg_dict = response.output.choices[0]['message']
+                                    # incremental_output=True 时，每个 chunk 的 content/reasoning_content 已是增量片段
+                                    # 直接使用，无需与 full_* 做对比
+                                    content = msg_dict.get('content') or ''
+                                    reasoning_content = msg_dict.get('reasoning_content') or ''

-                                    # 只有当内容发生变化时才发送增量
-                                    if len(content) > len(full_content):
-                                        delta_content = extract_delta_content(content, full_content)
-                                        full_content = content
+                                    delta_str = ""

-                                        if delta_content.strip():  # 只有当有非空白新内容时才发送
-                                            # 构建 SSE 数据块
-                                            data = {
-                                                "id": f"chatcmpl-{generate_unique_id()}",
-                                                "object": "chat.completion.chunk",
-                                                "created": get_current_timestamp(),
-                                                "model": model,
-                                                "choices": [
-                                                    {
-                                                        "index": 0,
-                                                        "delta": {"content": delta_content},
-                                                        "finish_reason": None
-                                                    }
-                                                ]
-                                            }
+                                    # 处理思考过程片段
+                                    if reasoning_content:
+                                        if not full_reasoning_content:
+                                            # 第一个思考片段，加标题前缀
+                                            delta_str += "> **💭 深度思考过程：**\n> \n> "
+                                        full_reasoning_content += reasoning_content
+                                        # markdown 引用块内换行需加 >
+                                        delta_str += reasoning_content.replace("\n", "\n> ")

-                                            yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+                                    # 处理正式回复片段
+                                    if content:
+                                        if not full_content and full_reasoning_content:
+                                            # 思考结束后首个正式回复，加分隔线
+                                            delta_str += "\n\n---\n\n"
+                                        full_content += content
+                                        delta_str += content
+
+                                    if delta_str:
+                                        data = {
+                                            "id": f"chatcmpl-{generate_unique_id()}",
+                                            "object": "chat.completion.chunk",
+                                            "created": get_current_timestamp(),
+                                            "model": model,
+                                            "choices": [
+                                                {
+                                                    "index": 0,
+                                                    "delta": {"content": delta_str},
+                                                    "finish_reason": None
+                                                }
+                                            ]
+                                        }
+
+                                        yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
                                # 否则尝试从 output.text 获取内容（DashScope特定格式）
                                elif (hasattr(response, 'output') and
                                      response.output and
@ -387,7 +427,8 @@ async def chat_endpoint_handler(body: dict):

                                            yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
                            else:
-                                # 错误处理
+                                # 错误处理：写入 logger，方便排查
+                                log_error(f"DashScope API 返回错误: chunk status={response.status_code}, code={response.code}, msg={response.message}")
                                error_data = {
                                    "error": {
                                        "message": f"API Error: {response.code} - {response.message}",
@ -416,6 +457,7 @@ async def chat_endpoint_handler(body: dict):
                        yield f"data: {json.dumps(finish_data, ensure_ascii=False)}\n\n"
                        yield "data: [DONE]\n\n"
                    except Exception as e:
+                        log_exception(f"流式生成器异常: {e}")
                        error_data = {
                            "error": {
                                "message": str(e),
@ -432,7 +474,8 @@ async def chat_endpoint_handler(body: dict):
                    messages=messages,
                    stream=False,
                    max_tokens=max_tokens,
-                    temperature=temperature
+                    temperature=temperature,
+                    **dashscope_kwargs
                )

                if response.status_code == 200:
@ -446,10 +489,14 @@ async def chat_endpoint_handler(body: dict):
                        hasattr(response.output, 'choices') and
                        response.output.choices is not None and
                        len(response.output.choices) > 0 and
-                        'message' in response.output.choices[0] and
-                        'content' in response.output.choices[0]['message']):
+                        'message' in response.output.choices[0]):

-                        content = response.output.choices[0]['message']['content']
+                        msg_dict = response.output.choices[0]['message']
+                        content = msg_dict.get('content', '')
+                        rc = msg_dict.get('reasoning_content', '')
+                        if rc:
+                            rc_formatted = rc.replace('\n', '\n> ')
+                            content = f"> **💭 深度思考过程：**\n> \n> {rc_formatted}\n\n---\n\n{content}"
                    # 否则尝试从 output.text 获取内容（DashScope特定格式）
                    elif (hasattr(response, 'output') and
                          response.output and
--- a/server/main.py
+++ b/server/main.py
@ -4,7 +4,7 @@
 """
 import os
 import json
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 import dashscope
 from dotenv import load_dotenv
@ -28,6 +28,9 @@ from api.chat_routes import (
 )
 from models.chat_models import ChatRequest, ModelInfo
 from utils.helpers import log_request, log_response
+from utils.logger import get_logger
+
+logger = get_logger()


 # 加载环境变量
@ -46,30 +49,32 @@ app = FastAPI(title="AI Chat API Server (Python)", version="2.0.0")

@app.middleware("http")
 async def logging_middleware(request: Request, call_next):
-    """中间件：记录请求日志"""
-    start_time = datetime.utcnow()
+    """中间件：记录请求日志并美化输出"""
+    start_time = datetime.now(timezone.utc)
+    client_ip = request.client.host if request.client else 'unknown'

-    # 记录请求信息
-    log_request(request.method, request.url.path, request.client.host if request.client else 'unknown')
+    # 请求日志
+    logger.info(f"→ {request.method} {request.url.path} | IP: {client_ip}")

    response = await call_next(request)

-    # 计算处理时间
-    process_time = (datetime.utcnow() - start_time).total_seconds() * 1000
+    process_time = (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
+    status_emoji = "✅" if response.status_code < 400 else "❌"

-    # 记录响应信息
+    # 响应日志（包含端点、状态码、耗时）
+    logger.info(f"{status_emoji} {request.method} {request.url.path} | 状态: {response.status_code} | 耗时: {process_time:.0f}ms")
+
+    # 记录结构化日志（写入日志文件）
    log_response(response.status_code, process_time)

-    # 在响应头中添加处理时间
    response.headers["X-Process-Time"] = f"{process_time:.2f}ms"
-
    return response


@app.get("/health")
 async def health_check():
    """健康检查端点"""
-    return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
+    return {"status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat()}


@app.post("/api/chat-ui/chat")
--- a/server/models/chat_models.py
+++ b/server/models/chat_models.py
@ -24,6 +24,10 @@ class ChatRequest(BaseModel):
    stream: bool = True
    temperature: Optional[float] = 0.7
    max_tokens: Optional[int] = 2000
+    files: Optional[List[str]] = None
+    deepSearch: Optional[bool] = False
+    webSearch: Optional[bool] = False
+    deepThinking: Optional[bool] = False


 class ModelInfo(BaseModel):
--- a/server/utils/logger.py
+++ b/server/utils/logger.py
@ -35,9 +35,9 @@ class LoggerSetup:
        # 创建日志目录
        self.log_dir.mkdir(exist_ok=True)

-        # 设置日志格式
+        # 设置日志格式（去掉 funcName:lineno，保持人类可读性）
        self.formatter = logging.Formatter(
-            '%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s'
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )

        # 创建logger实例
@ -166,14 +166,12 @@ def log_structured(level: str, message: str, **details):
        **details: 额外的结构化数据
    """
    logger = get_logger()
-    structured_log = {
-        "timestamp": datetime.now().isoformat(),
-        "level": level.upper(),
-        "message": message,
-        "details": details
-    }
-
-    getattr(logger, level.lower())(json.dumps(structured_log, ensure_ascii=False))
+    # 为了开发时的可读性，不再使用单行 JSON 打印全结构
+    # 转换为更易读的格式
+    detail_str = ", ".join(f"{k}={v}" for k, v in details.items() if v)
+    formatted_msg = f"[{message}] {detail_str}"
+    
+    getattr(logger, level.lower())(formatted_msg)


 def log_request_info(method: str, path: str, client_ip: str = "unknown",
--- a/src/components/chat/ChatMain.vue
+++ b/src/components/chat/ChatMain.vue
@ -116,7 +116,8 @@ function handlePin() {
 }

 // 发送消息 - 使用真实 API
-async function handleSend(text: string, attachments: Attachment[]) {
+async function handleSend(text: string, attachments: Attachment[], options?: { deepSearch?: boolean; webSearch?: boolean; deepThinking?: boolean }) {
+  console.log("handleSend", text, attachments, options);
  // 检查是否还有正在上传的附件
  const uploadingAttachments = attachments.filter((a) => a.uploading);
  if (uploadingAttachments.length > 0) {
@ -191,6 +192,9 @@ async function handleSend(text: string, attachments: Attachment[]) {
        files: fileUrls, // 传递文件 URL，后端会读取内容
        model: settings.value.defaultModel,
        stream: true,
+        deepSearch: options?.deepSearch,
+        webSearch: options?.webSearch,
+        deepThinking: options?.deepThinking,
      },
      abortController.value.signal,
    );
--- a/src/services/api.ts
+++ b/src/services/api.ts
@ -123,6 +123,10 @@ class ChatApi {
      temperature: request.temperature,
      max_tokens: request.maxTokens,
      files: request.files || [],  // 传递文件 URL 列表给后端
+      // 扩展参数传递给我们的 Python 后端进行特殊处理
+      deepSearch: request.deepSearch,
+      webSearch: request.webSearch,
+      deepThinking: request.deepThinking,
    };

    const response = await fetch(
--- a/start.sh
+++ b/start.sh
@ -12,6 +12,9 @@ cleanup() {
    exit
 }

+# 清除旧进程
+lsof -i :8000 -t | xargs -r kill -9; lsof -i :5173 -t | xargs -r kill -9;
+
 # 捕获退出信号
 trap cleanup SIGINT SIGTERM EXIT