From 5e81c903cfdbe8a7db80eec241313efa080c280e Mon Sep 17 00:00:00 2001
From: SuperManTouX <93423476+SuperManTouX@users.noreply.github.com>
Date: Fri, 6 Mar 2026 15:43:05 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0deepseek=E6=94=AF?=
=?UTF-8?q?=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
docs/CHANGELOG-2026-03-06.md | 199 +++++++++++
server/adapters/__init__.py | 16 +
server/adapters/base.py | 126 +++++++
server/adapters/dashscope_adapter.py | 434 ++++++++++++++++++++++++
server/adapters/glm_adapter.py | 482 +++++++++++++++++++++++++++
server/adapters/openai_adapter.py | 304 +++++++++++++++++
server/adapters/registry.py | 120 +++++++
server/api/openai_gateway.py | 119 +++++++
server/config.py | 59 ++++
server/main.py | 119 +++++--
src/components/chat/ChatMain.vue | 58 +++-
src/services/api.ts | 35 +-
src/stores/settings.ts | 14 +
13 files changed, 2057 insertions(+), 28 deletions(-)
create mode 100644 docs/CHANGELOG-2026-03-06.md
create mode 100644 server/adapters/__init__.py
create mode 100644 server/adapters/base.py
create mode 100644 server/adapters/dashscope_adapter.py
create mode 100644 server/adapters/glm_adapter.py
create mode 100644 server/adapters/openai_adapter.py
create mode 100644 server/adapters/registry.py
create mode 100644 server/api/openai_gateway.py
create mode 100644 server/config.py
diff --git a/docs/CHANGELOG-2026-03-06.md b/docs/CHANGELOG-2026-03-06.md
new file mode 100644
index 0000000..b8b1c83
--- /dev/null
+++ b/docs/CHANGELOG-2026-03-06.md
@@ -0,0 +1,199 @@
+# 开发日志 - 2026-03-06
+
+## 修复:深度思考内容不显示
+
+### 问题描述
+启用深度思考后,后端日志显示已启用深度思考功能,但前端没有显示深度思考内容。
+
+### 根本原因
+前端 `src/services/api.ts` 中的 `streamChat` 方法只处理了普通 `content`,完全忽略了后端返回的 `reasoning_content`(深度思考内容)。
+
+### 解决方案
+
+#### 1. 前端 `src/services/api.ts`
+
+添加 `StreamChunk` 接口,区分内容类型:
+
+```typescript
+// 流式响应块类型
+export interface StreamChunk {
+ type: "content" | "reasoning";
+ text: string;
+}
+```
+
+修改 `streamChat` 方法同时处理两种内容:
+
+```typescript
+const delta = data.choices?.[0]?.delta;
+
+// 处理深度思考内容(reasoning_content)
+const reasoningContent = delta?.reasoning_content;
+if (reasoningContent) {
+ yield { type: "reasoning", text: reasoningContent };
+}
+
+// 处理普通内容
+const content = delta?.content;
+if (content) {
+ yield { type: "content", text: content };
+}
+```
+
+#### 2. 前端 `src/components/chat/ChatMain.vue`
+
+修改流式处理逻辑,将 `reasoning` 类型内容包装成 `` 标签:
+
+```typescript
+let isInReasoning = false;
+
+for await (const chunk of stream) {
+ if (chunk.type === "reasoning") {
+ if (!isInReasoning) {
+ isInReasoning = true;
+ fullText += "\n";
+ }
+ fullText += chunk.text;
+ } else {
+ if (isInReasoning) {
+ isInReasoning = false;
+ fullText += "\n\n";
+ }
+ fullText += chunk.text;
+ }
+}
+
+// 如果最后还在深度思考块中,关闭它
+if (isInReasoning) {
+ fullText += "\n";
+}
+```
+
+`` 标签会被 `markstream-vue` 库识别,并由 `ThinkingNode` 组件渲染。
+
+---
+
+## 功能:图片/文件附件自动切换模型
+
+### 需求
+当用户上传图片或文件(PDF、DOCX等)时,无论前端选择了什么模型,后端都应强制使用 `glm-4.6v` 模型(支持多模态)。
+
+### 解决方案
+
+#### 后端 `server/adapters/glm_adapter.py`
+
+**修改 `_build_messages` 方法**:
+
+返回值从 `(messages, has_vision)` 改为 `(messages, has_vision, has_files)`:
+
+```python
+def _build_messages(
+ self, request: ChatCompletionRequest
+) -> tuple[List[Dict], bool, bool]:
+ """
+ 构建 GLM 格式的消息
+ 返回:(消息列表, 是否包含图片, 是否包含文件附件)
+ """
+ messages = []
+ has_vision = False
+ has_files = bool(request.files) # 检查是否有文件附件
+ # ... 处理消息 ...
+ return messages, has_vision, has_files
+```
+
+**修改 `_resolve_model` 方法**:
+
+```python
+def _resolve_model(self, model: str, has_vision: bool, has_files: bool = False) -> str:
+ """解析实际使用的模型"""
+ model_lower = model.lower()
+ # 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态)
+ if (has_vision or has_files) and model_lower not in VISION_MODELS:
+ logger.info(
+ f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v"
+ )
+ return "glm-4.6v"
+ return model
+```
+
+**修改 `chat` 方法调用**:
+
+```python
+glm_messages, has_vision, has_files = self._build_messages(request)
+actual_model = self._resolve_model(request.model, has_vision, has_files)
+```
+
+---
+
+## 功能:DeepSeek 深度思考支持
+
+### 需求
+为 DeepSeek 的 `deepseek-reasoner` 模型添加深度思考支持,通过 `extra_body` 参数启用。
+
+### 解决方案
+
+#### 后端 `server/adapters/openai_adapter.py`
+
+**添加支持深度思考的模型列表**:
+
+```python
+# DeepSeek 支持深度思考的模型
+DEEPSEEK_THINKING_MODELS = {"deepseek-reasoner"}
+```
+
+**修改 `chat` 方法**:
+
+```python
+# DeepSeek 深度思考支持
+extra_body = None
+if self._provider_type == "deepseek" and request.deep_thinking:
+ if self._supports_thinking(request.model):
+ extra_body = {"thinking": {"type": "enabled"}}
+ kwargs["extra_body"] = extra_body
+ logger.info(f"[{provider_name}] 深度思考已启用: extra_body = {extra_body}")
+```
+
+**添加 `_supports_thinking` 方法**:
+
+```python
+def _supports_thinking(self, model: str) -> bool:
+ """检查模型是否支持深度思考"""
+ return model.lower() in DEEPSEEK_THINKING_MODELS
+```
+
+**修改 `_stream_chat` 和 `_sync_chat` 方法**:
+
+- 添加 `extra_body` 参数
+- 增强 `reasoning_content` 日志输出
+
+---
+
+## 涉及文件
+
+| 文件 | 修改类型 |
+|------|----------|
+| `src/services/api.ts` | 新增 `StreamChunk` 接口,修改 `streamChat` 方法 |
+| `src/components/chat/ChatMain.vue` | 修改流式处理逻辑,支持 `reasoning` 类型 |
+| `server/adapters/glm_adapter.py` | 修改 `_build_messages` 和 `_resolve_model` 方法 |
+| `server/adapters/openai_adapter.py` | 添加 DeepSeek 深度思考支持 |
+
+---
+
+## 测试建议
+
+1. **GLM 深度思考测试**:
+ - 选择支持深度思考的模型(如 glm-4.6v)
+ - 开启深度思考开关
+ - 发送问题,确认前端显示深度思考内容块
+
+2. **DeepSeek 深度思考测试**:
+ - 选择 `deepseek-reasoner` 模型
+ - 开启深度思考开关
+ - 发送问题,确认后端日志显示 `extra_body = {'thinking': {'type': 'enabled'}}`
+ - 确认前端显示深度思考内容块
+
+3. **模型自动切换测试**:
+ - 选择非多模态模型(如 glm-4-flash)
+ - 上传图片或 PDF 文件
+ - 确认后端日志显示模型切换为 glm-4.6v
+ - 确认多模态内容正确处理
\ No newline at end of file
diff --git a/server/adapters/__init__.py b/server/adapters/__init__.py
new file mode 100644
index 0000000..295fb14
--- /dev/null
+++ b/server/adapters/__init__.py
@@ -0,0 +1,16 @@
+"""
+LLM 平台适配器模块
+"""
+
+from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
+from .registry import get_adapter, get_all_adapters, get_provider_from_model, register_adapter
+
+__all__ = [
+ "BaseAdapter",
+ "ChatCompletionRequest",
+ "ModelInfo",
+ "get_adapter",
+ "get_all_adapters",
+ "get_provider_from_model",
+ "register_adapter",
+]
\ No newline at end of file
diff --git a/server/adapters/base.py b/server/adapters/base.py
new file mode 100644
index 0000000..e18da2f
--- /dev/null
+++ b/server/adapters/base.py
@@ -0,0 +1,126 @@
+"""
+适配器基类定义
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+
+
+@dataclass
+class ModelInfo:
+ """模型信息"""
+
+ id: str
+ name: str
+ description: str
+ max_tokens: int = 4096
+ provider: str = "unknown"
+
+ def to_dict(self) -> Dict[str, Any]:
+ return {
+ "id": self.id,
+ "name": self.name,
+ "description": self.description,
+ "maxTokens": self.max_tokens,
+ "provider": self.provider,
+ }
+
+
+@dataclass
+class ChatCompletionRequest:
+ """OpenAI 格式的聊天请求"""
+
+ model: str
+ messages: List[Dict[str, Any]]
+ stream: bool = True
+ temperature: float = 0.7
+ max_tokens: int = 2000
+ files: Optional[List[str]] = None
+ deep_search: bool = False
+ web_search: bool = False
+ deep_thinking: bool = False
+ # 原始请求体(保留额外字段)
+ extra: Dict[str, Any] = field(default_factory=dict)
+
+ @classmethod
+ def from_dict(cls, data: Dict[str, Any]) -> "ChatCompletionRequest":
+ """从字典创建请求对象"""
+ # 提取已知字段
+ known_fields = {
+ "model",
+ "messages",
+ "stream",
+ "temperature",
+ "max_tokens",
+ "files",
+ "deepSearch",
+ "webSearch",
+ "deepThinking",
+ }
+ extra = {k: v for k, v in data.items() if k not in known_fields}
+
+ return cls(
+ model=data.get("model", "glm-4-flash"),
+ messages=data.get("messages", []),
+ stream=data.get("stream", True),
+ temperature=data.get("temperature", 0.7),
+ max_tokens=data.get("max_tokens", data.get("maxTokens", 2000)),
+ files=data.get("files"),
+ deep_search=data.get("deepSearch", False),
+ web_search=data.get("webSearch", False),
+ deep_thinking=data.get("deepThinking", False),
+ extra=extra,
+ )
+
+
+class BaseAdapter(ABC):
+ """
+ LLM 平台适配器基类
+ 所有平台适配器需继承此类并实现抽象方法
+ """
+
+ @property
+ @abstractmethod
+ def provider_name(self) -> str:
+ """返回平台名称(如 'glm', 'dashscope', 'openai')"""
+ pass
+
+ @abstractmethod
+ async def chat(self, request: ChatCompletionRequest):
+ """
+ 处理聊天请求
+
+ Args:
+ request: OpenAI 格式的聊天请求
+
+ Returns:
+ 流式响应返回 StreamingResponse
+ 非流式返回 JSONResponse 或 dict
+ """
+ pass
+
+ @abstractmethod
+ def list_models(self) -> List[ModelInfo]:
+ """
+ 返回该平台支持的模型列表
+
+ Returns:
+ ModelInfo 对象列表
+ """
+ pass
+
+ def is_available(self) -> bool:
+ """
+ 检查该适配器是否可用(API Key 是否配置)
+ 默认实现:检查环境变量中的 API Key
+ """
+ return True
+
+ def get_models_response(self) -> Dict[str, Any]:
+ """返回 OpenAI 格式的模型列表响应"""
+ models = self.list_models()
+ return {
+ "object": "list",
+ "data": [m.to_dict() for m in models],
+ }
\ No newline at end of file
diff --git a/server/adapters/dashscope_adapter.py b/server/adapters/dashscope_adapter.py
new file mode 100644
index 0000000..5df24ce
--- /dev/null
+++ b/server/adapters/dashscope_adapter.py
@@ -0,0 +1,434 @@
+"""
+阿里云百炼 DashScope 适配器
+基于 api/chat_routes.py 重构
+"""
+
+import json
+import os
+from typing import Dict, List
+
+from fastapi.responses import JSONResponse, StreamingResponse
+
+from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
+from utils.logger import get_logger
+
+logger = get_logger()
+
+# 百炼模型配置
+DASHSCOPE_MODELS = [
+ ModelInfo(
+ id="qwen-max",
+ name="通义千问 Max",
+ description="最强大的模型",
+ max_tokens=8192,
+ provider="Aliyun",
+ ),
+ ModelInfo(
+ id="qwen-plus",
+ name="通义千问 Plus",
+ description="能力均衡",
+ max_tokens=8192,
+ provider="Aliyun",
+ ),
+ ModelInfo(
+ id="qwen-turbo",
+ name="通义千问 Turbo",
+ description="速度更快、成本更低",
+ max_tokens=8192,
+ provider="Aliyun",
+ ),
+ ModelInfo(
+ id="qwen-vl-max",
+ name="通义万相 VL-Max",
+ description="支持视觉理解的多模态模型",
+ max_tokens=8192,
+ provider="Aliyun",
+ ),
+ ModelInfo(
+ id="qwen-vl-plus",
+ name="通义万相 VL-Plus",
+ description="支持视觉理解的多模态模型",
+ max_tokens=8192,
+ provider="Aliyun",
+ ),
+]
+
+
+class DashScopeAdapter(BaseAdapter):
+ """阿里云百炼 DashScope 平台适配器"""
+
+ @property
+ def provider_name(self) -> str:
+ return "dashscope"
+
+ def is_available(self) -> bool:
+ """检查 API Key 是否配置"""
+ return bool(os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY"))
+
+ def _get_api_key(self) -> str:
+ """获取 API Key"""
+ return os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY", "")
+
+ def list_models(self) -> List[ModelInfo]:
+ return DASHSCOPE_MODELS
+
+ async def chat(self, request: ChatCompletionRequest):
+ """
+ 处理 DashScope 聊天请求
+ 支持流式/非流式、多模态
+ """
+ # 打印请求参数
+ logger.info(f"[DashScope] 请求参数:")
+ logger.info(f" - model: {request.model}")
+ logger.info(f" - stream: {request.stream}")
+ logger.info(f" - temperature: {request.temperature}")
+ logger.info(f" - max_tokens: {request.max_tokens}")
+ logger.info(f" - files: {request.files}")
+ logger.info(f" - messages: {json.dumps(request.messages, ensure_ascii=False, indent=2)}")
+
+ # 检测是否包含多模态内容
+ has_multimodal = self._has_multimodal_content(request)
+ logger.info(f" - has_multimodal: {has_multimodal}")
+
+ if has_multimodal:
+ return await self._multimodal_chat(request)
+ else:
+ return await self._text_chat(request)
+
+ def _has_multimodal_content(self, request: ChatCompletionRequest) -> bool:
+ """检查是否包含多模态内容"""
+ for msg in request.messages:
+ content = msg.get("content", "")
+ if isinstance(content, list):
+ for item in content:
+ if isinstance(item, dict) and item.get("type") == "image_url":
+ return True
+ return bool(request.files)
+
+ async def _text_chat(self, request: ChatCompletionRequest):
+ """纯文本聊天"""
+ import dashscope
+ from dashscope import Generation
+
+ dashscope.api_key = self._get_api_key()
+
+ # 转换消息格式
+ messages = self._build_text_messages(request)
+
+ if request.stream:
+ return self._stream_text_chat(messages, request)
+ else:
+ return self._sync_text_chat(messages, request)
+
+ def _build_text_messages(self, request: ChatCompletionRequest) -> List[Dict]:
+ """构建文本消息"""
+ messages = []
+ for msg in request.messages:
+ role = msg.get("role", "user")
+ content = msg.get("content", "")
+ if isinstance(content, str) and content.strip():
+ messages.append({"role": role, "content": content})
+ elif isinstance(content, list):
+ text = ""
+ for item in content:
+ if isinstance(item, dict) and item.get("type") == "text":
+ text += item.get("text", "")
+ if text.strip():
+ messages.append({"role": role, "content": text})
+ return messages
+
+ def _stream_text_chat(self, messages: List[Dict], request: ChatCompletionRequest):
+ """流式文本聊天"""
+ logger.info(f"[DashScope] 开始流式文本响应...")
+
+ def generator():
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ from dashscope import Generation
+
+ full_content = ""
+ chunk_count = 0
+ responses = Generation.call(
+ model=request.model,
+ messages=messages,
+ stream=True,
+ temperature=request.temperature,
+ max_tokens=request.max_tokens,
+ result_format="message",
+ )
+
+ for resp in responses:
+ if resp.status_code == 200:
+ chunk_count += 1
+ content = resp.output.choices[0].message.content
+ if content:
+ full_content += content
+ data = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": request.model,
+ "choices": [
+ {
+ "index": 0,
+ "delta": {"content": content},
+ "finish_reason": None,
+ }
+ ],
+ }
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+
+ finish = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": request.model,
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+ }
+ yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
+ yield "data: [DONE]\n\n"
+
+ # 打印流式响应结果
+ logger.info(f"[DashScope] 流式文本响应完成:")
+ logger.info(f" - chunks: {chunk_count}")
+ logger.info(f" - content_length: {len(full_content)} 字符")
+ logger.info(f" - content_preview: {full_content[:200]}..." if len(full_content) > 200 else f" - content: {full_content}")
+
+ return StreamingResponse(generator(), media_type="text/event-stream")
+
+ def _sync_text_chat(self, messages: List[Dict], request: ChatCompletionRequest):
+ """非流式文本聊天"""
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ from dashscope import Generation
+
+ resp = Generation.call(
+ model=request.model,
+ messages=messages,
+ stream=False,
+ temperature=request.temperature,
+ max_tokens=request.max_tokens,
+ result_format="message",
+ )
+
+ if resp.status_code == 200:
+ content = resp.output.choices[0].message.content
+ response = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion",
+ "created": get_current_timestamp(),
+ "model": request.model,
+ "choices": [
+ {
+ "index": 0,
+ "message": {"role": "assistant", "content": content},
+ "finish_reason": "stop",
+ }
+ ],
+ }
+
+ if hasattr(resp, "usage") and resp.usage:
+ response["usage"] = {
+ "prompt_tokens": resp.usage.input_tokens,
+ "completion_tokens": resp.usage.output_tokens,
+ "total_tokens": resp.usage.total_tokens,
+ }
+
+ # 打印响应结果
+ logger.info(f"[DashScope] 响应结果:")
+ logger.info(f" - content_length: {len(content)} 字符")
+ logger.info(f" - content_preview: {content[:200]}..." if len(content) > 200 else f" - content: {content}")
+ if hasattr(resp, "usage") and resp.usage:
+ logger.info(f" - usage: {response['usage']}")
+
+ return JSONResponse(content=response)
+
+ logger.error(f"[DashScope] 请求失败: {resp.code} - {resp.message}")
+ return JSONResponse(
+ status_code=500,
+ content={"error": f"DashScope Error: {resp.code} - {resp.message}"},
+ )
+
+ async def _multimodal_chat(self, request: ChatCompletionRequest):
+ """多模态聊天"""
+ import dashscope
+ from dashscope import MultiModalConversation
+
+ dashscope.api_key = self._get_api_key()
+
+ # 转换消息格式
+ messages = self._build_multimodal_messages(request)
+
+ # 选择多模态模型
+ model = request.model
+ if "qwen-" in model and "vl" not in model:
+ model = model.replace("qwen-", "qwen-vl-")
+
+ if request.stream:
+ return self._stream_multimodal_chat(messages, model, request)
+ else:
+ return self._sync_multimodal_chat(messages, model, request)
+
+ def _build_multimodal_messages(self, request: ChatCompletionRequest) -> List[Dict]:
+ """构建多模态消息"""
+ messages = []
+
+ for msg in request.messages:
+ role = msg.get("role", "user")
+ content = msg.get("content", "")
+
+ if isinstance(content, str):
+ if content.strip():
+ messages.append({"role": role, "content": [{"text": content}]})
+ elif isinstance(content, list):
+ ds_content = []
+ for item in content:
+ if isinstance(item, dict):
+ if item.get("type") == "text":
+ ds_content.append({"text": item.get("text", "")})
+ elif item.get("type") == "image_url":
+ img_url = self._extract_image_url(item)
+ if img_url:
+ ds_content.append({"image": img_url})
+
+ if ds_content:
+ messages.append({"role": role, "content": ds_content})
+
+ return messages
+
+ def _extract_image_url(self, item: Dict) -> str:
+ """提取并转换图片 URL"""
+ img_val = item.get("image_url", "")
+ if isinstance(img_val, str):
+ img_url = img_val
+ elif isinstance(img_val, dict):
+ img_url = img_val.get("url", "")
+ else:
+ img_url = ""
+
+ # 转换 http URL 为 file:// 格式(如果是本地文件)
+ if img_url.startswith(("http://", "https://")):
+ from urllib.parse import urlparse
+
+ parsed = urlparse(img_url)
+ if "localhost" in parsed.netloc or "127.0.0.1" in parsed.netloc:
+ path_parts = parsed.path.split("/")
+ try:
+ uploads_idx = path_parts.index("uploads")
+ img_url = f"file://{'/'.join(path_parts[uploads_idx:])}"
+ except ValueError:
+ pass
+ elif not img_url.startswith("file://"):
+ img_url = f"file://{img_url}"
+
+ return img_url
+
+ def _stream_multimodal_chat(
+ self, messages: List[Dict], model: str, request: ChatCompletionRequest
+ ):
+ """流式多模态聊天"""
+
+ def generator():
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ from dashscope import MultiModalConversation
+
+ responses = MultiModalConversation.call(
+ model=model,
+ messages=messages,
+ stream=True,
+ max_tokens=request.max_tokens,
+ temperature=request.temperature,
+ )
+
+ full_content = ""
+ for resp in responses:
+ if resp.status_code == 200:
+ try:
+ content_items = resp.output.choices[0]["message"]["content"]
+ text = ""
+ for item in content_items:
+ if isinstance(item, dict) and "text" in item:
+ text += item["text"]
+
+ if len(text) > len(full_content):
+ delta = text[len(full_content) :]
+ full_content = text
+
+ data = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": model,
+ "choices": [
+ {
+ "index": 0,
+ "delta": {"content": delta},
+ "finish_reason": None,
+ }
+ ],
+ }
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+ except (KeyError, IndexError, TypeError):
+ pass
+
+ finish = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": model,
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+ }
+ yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
+ yield "data: [DONE]\n\n"
+
+ return StreamingResponse(generator(), media_type="text/event-stream")
+
+ def _sync_multimodal_chat(
+ self, messages: List[Dict], model: str, request: ChatCompletionRequest
+ ):
+ """非流式多模态聊天"""
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ from dashscope import MultiModalConversation
+
+ resp = MultiModalConversation.call(
+ model=model,
+ messages=messages,
+ stream=False,
+ max_tokens=request.max_tokens,
+ temperature=request.temperature,
+ )
+
+ if resp.status_code == 200:
+ try:
+ content_items = resp.output.choices[0]["message"]["content"]
+ text = ""
+ for item in content_items:
+ if isinstance(item, dict) and "text" in item:
+ text += item["text"]
+
+ response = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion",
+ "created": get_current_timestamp(),
+ "model": model,
+ "choices": [
+ {
+ "index": 0,
+ "message": {"role": "assistant", "content": text},
+ "finish_reason": "stop",
+ }
+ ],
+ }
+ return JSONResponse(content=response)
+ except (KeyError, IndexError, TypeError) as e:
+ return JSONResponse(
+ status_code=500,
+ content={"error": f"Parse error: {str(e)}"},
+ )
+
+ return JSONResponse(
+ status_code=500,
+ content={"error": f"DashScope Error: {resp.code} - {resp.message}"},
+ )
\ No newline at end of file
diff --git a/server/adapters/glm_adapter.py b/server/adapters/glm_adapter.py
new file mode 100644
index 0000000..77711b5
--- /dev/null
+++ b/server/adapters/glm_adapter.py
@@ -0,0 +1,482 @@
+"""
+智谱 GLM 适配器
+基于 utils/glm_adapter.py 重构
+使用zai-sdk。因为已经完成这一部分的整套逻辑,如果更换OpenAI-SDK会花很多时间调试。
+"""
+
+import json
+import os
+from typing import Dict, List, Optional
+
+from fastapi.responses import JSONResponse, StreamingResponse
+
+from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
+from utils.logger import get_logger
+
+logger = get_logger()
+
+# GLM 模型配置
+GLM_MODELS = [
+ ModelInfo(
+ id="glm-4.6v",
+ name="GLM-4.6V(推荐)",
+ description="最新旗舰模型,支持文本/图像/文档/深度思考",
+ max_tokens=128000,
+ provider="ZhipuAI",
+ ),
+ ModelInfo(
+ id="glm-4-flash",
+ name="GLM-4 Flash",
+ description="高性价比文本模型",
+ max_tokens=128000,
+ provider="ZhipuAI",
+ ),
+ ModelInfo(
+ id="glm-4v-plus-0111",
+ name="GLM-4V Plus",
+ description="图像 + PDF/DOCX 原生多模态",
+ max_tokens=128000,
+ provider="ZhipuAI",
+ ),
+ ModelInfo(
+ id="glm-z1-flash",
+ name="GLM-Z1 Flash",
+ description="深度思考推理模型",
+ max_tokens=128000,
+ provider="ZhipuAI",
+ ),
+]
+
+# 视觉模型列表(用于自动切换)
+VISION_MODELS = {"glm-4v", "glm-4v-plus", "glm-4v-plus-0111", "glm-4.6v"}
+
+# 支持深度思考的模型
+THINKING_MODELS = {"glm-z1-flash", "glm-z1-air", "glm-4.6v", "glm-4.6"}
+
+
+class GLMAdapter(BaseAdapter):
+ """智谱 GLM 平台适配器"""
+
+ _client = None
+
+ @property
+ def provider_name(self) -> str:
+ return "glm"
+
+ def is_available(self) -> bool:
+ """检查 API Key 是否配置"""
+ return bool(os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY"))
+
+ def _get_client(self):
+ """获取 GLM 客户端(懒加载)"""
+ if self._client is None:
+ from zhipuai import ZhipuAI
+
+ api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")
+ self._client = ZhipuAI(api_key=api_key)
+ return self._client
+
+ def list_models(self) -> List[ModelInfo]:
+ return GLM_MODELS
+
+ async def chat(self, request: ChatCompletionRequest):
+ """
+ 处理 GLM 聊天请求
+ 支持流式/非流式、图像、文档、联网搜索、深度思考
+ """
+ client = self._get_client()
+
+ # 构建消息
+ glm_messages, has_vision, has_files = self._build_messages(request)
+ actual_model = self._resolve_model(request.model, has_vision, has_files)
+
+ # 调试:打印原始请求参数
+ logger.info(f"[GLM] 原始请求参数:")
+ logger.info(
+ f" - request.deep_thinking: {request.deep_thinking} (type: {type(request.deep_thinking)})"
+ )
+ logger.info(f" - request.web_search: {request.web_search}")
+ logger.info(f" - request.deep_search: {request.deep_search}")
+ logger.info(f" - actual_model: {actual_model}")
+ logger.info(f" - supports_thinking: {self._supports_thinking(actual_model)}")
+
+ # 构建额外参数
+ extra_kwargs = {}
+ web_search = self._get_web_search_mode(request)
+
+ if web_search:
+ extra_kwargs["tools"] = [self._build_web_search_tool(web_search)]
+ extra_kwargs["tool_choice"] = "auto"
+
+ # 深度思考:正向选择(True 时启用,False 时禁用)
+ # 注意:只有特定模型支持深度思考(如 glm-z1-flash)
+ thinking_enabled = request.deep_thinking and self._supports_thinking(
+ actual_model
+ )
+ logger.info(
+ f"[GLM] 深度思考判断: {request.deep_thinking} and {self._supports_thinking(actual_model)} = {thinking_enabled}"
+ )
+
+ if thinking_enabled:
+ extra_kwargs["thinking"] = {"type": "enabled"}
+ logger.info(
+ f"[GLM] 深度思考已启用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}"
+ )
+
+ if extra_kwargs:
+ logger.info(
+ f"[GLM] 最终 extra_kwargs: {json.dumps(extra_kwargs, ensure_ascii=False)}"
+ )
+
+ if request.stream:
+ return self._stream_chat(
+ client, glm_messages, actual_model, request, extra_kwargs
+ )
+ else:
+ return self._sync_chat(
+ client, glm_messages, actual_model, request, extra_kwargs
+ )
+
+ def _build_messages(
+ self, request: ChatCompletionRequest
+ ) -> tuple[List[Dict], bool, bool]:
+ """
+ 构建 GLM 格式的消息
+ 返回:(消息列表, 是否包含图片, 是否包含文件附件)
+ """
+ messages = []
+ has_vision = False
+ has_files = bool(request.files) # 检查是否有文件附件
+
+ for msg in request.messages:
+ role = msg.get("role", "user")
+ content = msg.get("content", "")
+
+ if isinstance(content, str):
+ # 纯文本
+ if content.strip():
+ messages.append({"role": role, "content": content})
+ elif isinstance(content, list):
+ # 多模态内容
+ glm_content = []
+ for item in content:
+ if isinstance(item, dict):
+ item_type = item.get("type", "")
+ if item_type == "text":
+ text = item.get("text", "")
+ if text:
+ glm_content.append({"type": "text", "text": text})
+ elif item_type == "image_url":
+ img_url = self._extract_image_url(item)
+ if img_url:
+ glm_content.append(
+ {"type": "image_url", "image_url": {"url": img_url}}
+ )
+ has_vision = True
+
+ if glm_content:
+ messages.append({"role": role, "content": glm_content})
+
+ # 处理文件附件
+ if request.files:
+ file_content = self._build_file_content(request.files)
+ if messages and messages[-1]["role"] == "user":
+ # 追加到最后一个用户消息
+ if isinstance(messages[-1]["content"], list):
+ messages[-1]["content"].extend(file_content)
+ else:
+ messages[-1]["content"] = [
+ {"type": "text", "text": messages[-1]["content"]},
+ *file_content,
+ ]
+ else:
+ messages.append({"role": "user", "content": file_content})
+
+ return messages, has_vision, has_files
+
+ def _extract_image_url(self, item: Dict) -> Optional[str]:
+ """提取图片 URL"""
+ img_val = item.get("image_url", "")
+ if isinstance(img_val, str):
+ return img_val
+ elif isinstance(img_val, dict):
+ return img_val.get("url", "")
+ return None
+
+ def _build_file_content(self, files: List[str]) -> List[Dict]:
+ """构建文件内容"""
+ content = []
+ for file_url in files:
+ if file_url.startswith(("http://", "https://")):
+ content.append({"type": "file_url", "file_url": {"url": file_url}})
+ return content
+
+ def _resolve_model(
+ self, model: str, has_vision: bool, has_files: bool = False
+ ) -> str:
+ """解析实际使用的模型"""
+ model_lower = model.lower()
+ # 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态)
+ if (has_vision or has_files) and model_lower not in VISION_MODELS:
+ logger.info(
+ f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v"
+ )
+ return "glm-4.6v"
+ return model
+
+ def _supports_thinking(self, model: str) -> bool:
+ """检查模型是否支持深度思考"""
+ return model.lower() in THINKING_MODELS
+
+ def _get_web_search_mode(self, request: ChatCompletionRequest) -> str:
+ """获取联网搜索模式"""
+ if request.deep_search:
+ return "deep"
+ elif request.web_search:
+ return "simple"
+ return ""
+
+ def _build_web_search_tool(self, mode: str) -> Dict:
+ """构建联网搜索工具"""
+ from datetime import datetime
+
+ today = datetime.now().strftime("%Y年%m月%d日")
+
+ if mode == "deep":
+ # 深度搜索:返回搜索结果详情
+ return {
+ "type": "web_search",
+ "web_search": {
+ "enable": True,
+ "search_engine": "search_pro",
+ "search_result": True,
+ "search_prompt": f"你是一位智能助手。请用简洁的语言总结网络搜索{{search_result}}中的关键信息,按重要性排序并引用来源日期。今天的日期是{today}。",
+ "count": 5,
+ "search_recency_filter": "noLimit",
+ "content_size": "high",
+ },
+ }
+ else:
+ # 简单搜索
+ return {
+ "type": "web_search",
+ "web_search": {
+ "enable": True,
+ "search_engine": "search_pro",
+ "search_result": True,
+ "count": 5,
+ },
+ }
+
+ def _stream_chat(
+ self, client, messages, model, request, extra_kwargs
+ ) -> StreamingResponse:
+ """流式聊天"""
+ logger.info(f"[GLM] 开始流式响应...")
+
+ # 提取深度思考配置
+ thinking_config = extra_kwargs.get("thinking")
+ tools_config = extra_kwargs.get("tools")
+
+ def generator():
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ full_content = ""
+
+ # 构建 API 调用参数
+ api_params = {
+ "model": model,
+ "messages": messages,
+ "stream": True,
+ "temperature": request.temperature,
+ "max_tokens": request.max_tokens,
+ }
+
+ # 深度思考:使用 extra_body 传递
+ if thinking_config:
+ api_params["extra_body"] = {"thinking": thinking_config}
+
+ # 联网搜索:使用 tools 参数
+ if tools_config:
+ api_params["tools"] = tools_config
+ api_params["tool_choice"] = "auto"
+
+ # 打印请求参数
+ logger.info(f"[GLM] API 调用参数:")
+ logger.info(f" - model: {model}")
+ logger.info(f" - stream: True")
+ logger.info(f" - temperature: {request.temperature}")
+ logger.info(f" - max_tokens: {request.max_tokens}")
+ if thinking_config:
+ logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
+ if tools_config:
+ logger.info(
+ f" - tools: {json.dumps(tools_config, ensure_ascii=False)}"
+ )
+ logger.info(f" - tool_choice: auto")
+ logger.info(
+ f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
+ )
+
+ chunk_count = 0
+ resp = client.chat.completions.create(**api_params)
+
+ for chunk in resp:
+ chunk_count += 1
+
+ # 检查 delta 是否存在
+ if not hasattr(chunk.choices[0], "delta"):
+ continue
+
+ delta = chunk.choices[0].delta
+
+ # 处理深度思考内容(reasoning_content)
+ reasoning_content = getattr(delta, "reasoning_content", None)
+ if reasoning_content:
+ data = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": model,
+ "choices": [
+ {
+ "index": 0,
+ "delta": {"reasoning_content": reasoning_content},
+ "finish_reason": None,
+ }
+ ],
+ }
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+ continue
+
+ # 处理普通内容
+ content = getattr(delta, "content", None)
+ if content:
+ full_content += content
+ data = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": model,
+ "choices": [
+ {
+ "index": 0,
+ "delta": {"content": content},
+ "finish_reason": None,
+ }
+ ],
+ }
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+
+ # 结束标记
+ finish = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": model,
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+ }
+ yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
+ yield "data: [DONE]\n\n"
+
+ # 打印流式响应结果
+ logger.info(f"[GLM] 流式响应完成:")
+ logger.info(f" - chunks: {chunk_count}")
+ logger.info(f" - content_length: {len(full_content)} 字符")
+ logger.info(
+ f" - content_preview: {full_content[:200]}..."
+ if len(full_content) > 200
+ else f" - content: {full_content}"
+ )
+
+ return StreamingResponse(generator(), media_type="text/event-stream")
+
+ def _sync_chat(
+ self, client, messages, model, request, extra_kwargs
+ ) -> JSONResponse:
+ """非流式聊天"""
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ # 提取深度思考配置
+ thinking_config = extra_kwargs.get("thinking")
+ tools_config = extra_kwargs.get("tools")
+
+ # 构建 API 调用参数
+ api_params = {
+ "model": model,
+ "messages": messages,
+ "stream": False,
+ "temperature": request.temperature,
+ "max_tokens": request.max_tokens,
+ }
+
+ # 深度思考:使用 extra_body 传递
+ if thinking_config:
+ api_params["extra_body"] = {"thinking": thinking_config}
+
+ # 联网搜索:使用 tools 参数
+ if tools_config:
+ api_params["tools"] = tools_config
+ api_params["tool_choice"] = "auto"
+
+ # 打印请求参数
+ logger.info(f"[GLM] API 调用参数:")
+ logger.info(f" - model: {model}")
+ logger.info(f" - stream: {request.stream}")
+ logger.info(f" - temperature: {request.temperature}")
+ logger.info(f" - max_tokens: {request.max_tokens}")
+ if thinking_config:
+ logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
+ if tools_config:
+ logger.info(f" - tools: {json.dumps(tools_config, ensure_ascii=False)}")
+ logger.info(f" - tool_choice: auto")
+ logger.info(
+ f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
+ )
+
+ resp = client.chat.completions.create(**api_params)
+
+ message = resp.choices[0].message
+ content = message.content or ""
+
+ # 构建响应
+ response_message = {"role": "assistant", "content": content}
+
+ # 处理深度思考内容
+ reasoning_content = getattr(message, "reasoning_content", None)
+ if reasoning_content:
+ response_message["reasoning_content"] = reasoning_content
+
+ response = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion",
+ "created": get_current_timestamp(),
+ "model": model,
+ "choices": [
+ {
+ "index": 0,
+ "message": response_message,
+ "finish_reason": "stop",
+ }
+ ],
+ }
+
+ if hasattr(resp, "usage") and resp.usage:
+ response["usage"] = {
+ "prompt_tokens": resp.usage.prompt_tokens,
+ "completion_tokens": resp.usage.completion_tokens,
+ "total_tokens": resp.usage.total_tokens,
+ }
+
+ # 打印响应结果
+ logger.info(f"[GLM] 响应结果:")
+ logger.info(f" - content_length: {len(content)} 字符")
+ logger.info(
+ f" - content_preview: {content[:200]}..."
+ if len(content) > 200
+ else f" - content: {content}"
+ )
+ if hasattr(resp, "usage") and resp.usage:
+ logger.info(f" - usage: {response['usage']}")
+
+ return JSONResponse(content=response)
diff --git a/server/adapters/openai_adapter.py b/server/adapters/openai_adapter.py
new file mode 100644
index 0000000..d1a4dcf
--- /dev/null
+++ b/server/adapters/openai_adapter.py
@@ -0,0 +1,304 @@
+"""
+OpenAI 适配器
+支持 OpenAI 及兼容 API(如 Deepseek)
+"""
+
+import json
+import os
+from typing import Dict, List, Optional
+
+from fastapi.responses import JSONResponse, StreamingResponse
+
+from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
+from utils.logger import get_logger
+
+logger = get_logger()
+
+# OpenAI 模型配置
+OPENAI_MODELS = [
+ ModelInfo(
+ id="gpt-4o",
+ name="GPT-4o",
+ description="最新旗舰多模态模型",
+ max_tokens=128000,
+ provider="OpenAI",
+ ),
+ ModelInfo(
+ id="gpt-4o-mini",
+ name="GPT-4o Mini",
+ description="高性价比多模态模型",
+ max_tokens=128000,
+ provider="OpenAI",
+ ),
+ ModelInfo(
+ id="gpt-4-turbo",
+ name="GPT-4 Turbo",
+ description="GPT-4 增强版",
+ max_tokens=128000,
+ provider="OpenAI",
+ ),
+ ModelInfo(
+ id="gpt-3.5-turbo",
+ name="GPT-3.5 Turbo",
+ description="快速经济的选择",
+ max_tokens=16385,
+ provider="OpenAI",
+ ),
+]
+
+# Deepseek 模型配置
+DEEPSEEK_MODELS = [
+ ModelInfo(
+ id="deepseek-chat",
+ name="Deepseek Chat",
+ description="Deepseek 对话模型",
+ max_tokens=64000,
+ provider="Deepseek",
+ ),
+ ModelInfo(
+ id="deepseek-reasoner",
+ name="Deepseek Reasoner",
+ description="Deepseek 推理模型(支持深度思考)",
+ max_tokens=64000,
+ provider="Deepseek",
+ ),
+]
+
+# DeepSeek 支持深度思考的模型
+DEEPSEEK_THINKING_MODELS = {"deepseek-reasoner"}
+
+
+class OpenAIAdapter(BaseAdapter):
+ """OpenAI 平台适配器"""
+
+ _client = None
+ _provider_type: str = "openai" # openai 或 deepseek
+
+ def __init__(self, provider_type: str = "openai"):
+ self._provider_type = provider_type
+
+ @property
+ def provider_name(self) -> str:
+ return self._provider_type
+
+ def is_available(self) -> bool:
+ """检查 API Key 是否配置"""
+ if self._provider_type == "deepseek":
+ return bool(os.getenv("DEEPSEEK_API_KEY"))
+ return bool(os.getenv("OPENAI_API_KEY"))
+
+ def _get_client(self):
+ """获取 OpenAI 客户端(懒加载)"""
+ if self._client is None:
+ from openai import OpenAI
+
+ if self._provider_type == "deepseek":
+ api_key = os.getenv("DEEPSEEK_API_KEY", "")
+ base_url = os.getenv(
+ "DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"
+ )
+ else:
+ api_key = os.getenv("OPENAI_API_KEY", "")
+ base_url = os.getenv("OPENAI_BASE_URL") # 可选自定义端点
+
+ kwargs = {"api_key": api_key}
+ if base_url:
+ kwargs["base_url"] = base_url
+
+ self._client = OpenAI(**kwargs)
+ return self._client
+
+ def list_models(self) -> List[ModelInfo]:
+ if self._provider_type == "deepseek":
+ return DEEPSEEK_MODELS
+ return OPENAI_MODELS
+
+ async def chat(self, request: ChatCompletionRequest):
+ """
+ 处理 OpenAI 聊天请求
+ 直接使用 OpenAI SDK,支持流式/非流式
+ """
+ client = self._get_client()
+
+ # 打印请求参数
+ provider_name = self._provider_type.upper()
+ logger.info(f"[{provider_name}] 请求参数:")
+ logger.info(f" - model: {request.model}")
+ logger.info(f" - stream: {request.stream}")
+ logger.info(f" - temperature: {request.temperature}")
+ logger.info(f" - max_tokens: {request.max_tokens}")
+ logger.info(f" - provider_type: {self._provider_type}")
+ if self._provider_type == "deepseek":
+ logger.info(f" - deep_thinking: {request.deep_thinking}")
+
+ # 构建消息
+ messages = self._build_messages(request)
+ logger.info(f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}")
+
+ # 构建请求参数
+ kwargs = {
+ "model": request.model,
+ "messages": messages,
+ "temperature": request.temperature,
+ "max_tokens": request.max_tokens,
+ "stream": request.stream,
+ }
+
+ # DeepSeek 深度思考支持
+ extra_body = None
+ if self._provider_type == "deepseek" and request.deep_thinking:
+ if self._supports_thinking(request.model):
+ extra_body = {"thinking": {"type": "enabled"}}
+ kwargs["extra_body"] = extra_body
+ logger.info(f"[{provider_name}] 深度思考已启用: extra_body = {extra_body}")
+
+ if request.stream:
+ return self._stream_chat(client, kwargs, extra_body)
+ else:
+ return self._sync_chat(client, kwargs, extra_body)
+
+ def _supports_thinking(self, model: str) -> bool:
+ """检查模型是否支持深度思考"""
+ return model.lower() in DEEPSEEK_THINKING_MODELS
+
+ def _build_messages(self, request: ChatCompletionRequest) -> List[Dict]:
+ """构建 OpenAI 格式消息"""
+ messages = []
+
+ for msg in request.messages:
+ role = msg.get("role", "user")
+ content = msg.get("content", "")
+
+ # OpenAI 直接支持标准格式
+ if isinstance(content, str):
+ if content.strip():
+ messages.append({"role": role, "content": content})
+ elif isinstance(content, list):
+ # 多模态内容
+ openai_content = []
+ for item in content:
+ if isinstance(item, dict):
+ openai_content.append(item)
+ if openai_content:
+ messages.append({"role": role, "content": openai_content})
+
+ return messages
+
+ def _stream_chat(self, client, kwargs: Dict, extra_body: Optional[Dict] = None) -> StreamingResponse:
+ """流式聊天"""
+ provider_name = self._provider_type.upper()
+ logger.info(f"[{provider_name}] 开始流式响应...")
+
+ def generator():
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ resp = client.chat.completions.create(**kwargs)
+
+ full_content = ""
+ full_reasoning = ""
+ chunk_count = 0
+ for chunk in resp:
+ if chunk.choices:
+ chunk_count += 1
+ delta = chunk.choices[0].delta
+
+ delta_content = {}
+ if hasattr(delta, "content") and delta.content:
+ delta_content["content"] = delta.content
+ full_content += delta.content
+ if hasattr(delta, "reasoning_content") and delta.reasoning_content:
+ delta_content["reasoning_content"] = delta.reasoning_content
+ full_reasoning += delta.reasoning_content
+
+ if delta_content:
+ data = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": kwargs["model"],
+ "choices": [
+ {
+ "index": 0,
+ "delta": delta_content,
+ "finish_reason": None,
+ }
+ ],
+ }
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+
+ finish = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion.chunk",
+ "created": get_current_timestamp(),
+ "model": kwargs["model"],
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+ }
+ yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
+ yield "data: [DONE]\n\n"
+
+ # 打印流式响应结果
+ logger.info(f"[{provider_name}] 流式响应完成:")
+ logger.info(f" - chunks: {chunk_count}")
+ logger.info(f" - content_length: {len(full_content)} 字符")
+ if full_reasoning:
+ logger.info(f" - reasoning_length: {len(full_reasoning)} 字符")
+ logger.info(f" - content_preview: {full_content[:200]}..." if len(full_content) > 200 else f" - content: {full_content}")
+
+ return StreamingResponse(generator(), media_type="text/event-stream")
+
+ def _sync_chat(self, client, kwargs: Dict, extra_body: Optional[Dict] = None) -> JSONResponse:
+ """非流式聊天"""
+ from utils.helpers import generate_unique_id, get_current_timestamp
+
+ resp = client.chat.completions.create(**kwargs)
+
+ message = resp.choices[0].message
+ content = message.content or ""
+ response = {
+ "id": f"chatcmpl-{generate_unique_id()}",
+ "object": "chat.completion",
+ "created": get_current_timestamp(),
+ "model": kwargs["model"],
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": message.role,
+ "content": content,
+ },
+ "finish_reason": resp.choices[0].finish_reason,
+ }
+ ],
+ }
+
+ # 添加推理内容(如有)
+ if hasattr(message, "reasoning_content") and message.reasoning_content:
+ response["choices"][0]["message"]["reasoning_content"] = (
+ message.reasoning_content
+ )
+
+ if resp.usage:
+ response["usage"] = {
+ "prompt_tokens": resp.usage.prompt_tokens,
+ "completion_tokens": resp.usage.completion_tokens,
+ "total_tokens": resp.usage.total_tokens,
+ }
+
+ # 打印响应结果
+ provider_name = self._provider_type.upper()
+ logger.info(f"[{provider_name}] 响应结果:")
+ logger.info(f" - content_length: {len(content)} 字符")
+ if hasattr(message, "reasoning_content") and message.reasoning_content:
+ logger.info(f" - reasoning_length: {len(message.reasoning_content)} 字符")
+ logger.info(f" - content_preview: {content[:200]}..." if len(content) > 200 else f" - content: {content}")
+ if resp.usage:
+ logger.info(f" - usage: {response['usage']}")
+
+ return JSONResponse(content=response)
+
+
+class DeepseekAdapter(OpenAIAdapter):
+ """Deepseek 平台适配器(继承 OpenAI 适配器)"""
+
+ def __init__(self):
+ super().__init__(provider_type="deepseek")
\ No newline at end of file
diff --git a/server/adapters/registry.py b/server/adapters/registry.py
new file mode 100644
index 0000000..a44dd1a
--- /dev/null
+++ b/server/adapters/registry.py
@@ -0,0 +1,120 @@
+"""
+适配器注册表
+根据模型名称路由到对应的平台适配器
+"""
+
+import os
+from typing import Dict, Optional, Type
+
+from .base import BaseAdapter
+
+# 模型前缀到平台名称的映射
+MODEL_PREFIX_MAP = {
+ # 智谱 GLM
+ "glm-": "glm",
+ # 阿里云百炼(Qwen 系列)
+ "qwen-": "dashscope",
+ # OpenAI
+ "gpt-": "openai",
+ "o1-": "openai",
+ "o3-": "openai",
+ # Deepseek
+ "deepseek-": "deepseek",
+}
+
+# 已注册的适配器实例
+_adapters: Dict[str, BaseAdapter] = {}
+
+# 已注册的适配器类
+_adapter_classes: Dict[str, Type[BaseAdapter]] = {}
+
+
+def register_adapter(name: str, adapter_class: Type[BaseAdapter]):
+ """
+ 注册适配器类
+
+ Args:
+ name: 平台名称(如 'glm', 'dashscope', 'openai')
+ adapter_class: 适配器类
+ """
+ _adapter_classes[name] = adapter_class
+
+
+def get_provider_from_model(model: str) -> str:
+ """
+ 根据模型名称判断所属平台
+
+ Args:
+ model: 模型 ID(如 'glm-4-flash', 'qwen-turbo', 'gpt-4')
+
+ Returns:
+ 平台名称(如 'glm', 'dashscope', 'openai')
+ """
+ model_lower = model.lower()
+
+ # 优先精确匹配
+ exact_matches = {
+ # GLM 精确模型名
+ "glm-4": "glm",
+ "glm-4v": "glm",
+ # Deepseek
+ "deepseek-chat": "deepseek",
+ "deepseek-reasoner": "deepseek",
+ }
+ if model_lower in exact_matches:
+ return exact_matches[model_lower]
+
+ # 前缀匹配
+ for prefix, provider in MODEL_PREFIX_MAP.items():
+ if model_lower.startswith(prefix):
+ return provider
+
+ # 默认使用环境变量或 GLM
+ return os.getenv("DEFAULT_PROVIDER", "glm")
+
+
+def get_adapter(provider: str) -> Optional[BaseAdapter]:
+ """
+ 获取适配器实例(懒加载)
+
+ Args:
+ provider: 平台名称
+
+ Returns:
+ 适配器实例,如果平台未注册则返回 None
+ """
+ if provider in _adapters:
+ return _adapters[provider]
+
+ # 懒加载:首次使用时实例化
+ if provider in _adapter_classes:
+ adapter_class = _adapter_classes[provider]
+ adapter = adapter_class()
+ _adapters[provider] = adapter
+ return adapter
+
+ return None
+
+
+def get_all_adapters() -> Dict[str, BaseAdapter]:
+ """
+ 获取所有已注册的适配器实例
+ """
+ result = {}
+ for name, adapter_class in _adapter_classes.items():
+ if name not in _adapters:
+ _adapters[name] = adapter_class()
+ result[name] = _adapters[name]
+ return result
+
+
+def get_available_providers() -> list:
+ """
+ 获取所有可用的平台列表
+ """
+ providers = []
+ for name, adapter_class in _adapter_classes.items():
+ adapter = get_adapter(name)
+ if adapter and adapter.is_available():
+ providers.append(name)
+ return providers
diff --git a/server/api/openai_gateway.py b/server/api/openai_gateway.py
new file mode 100644
index 0000000..9afa60e
--- /dev/null
+++ b/server/api/openai_gateway.py
@@ -0,0 +1,119 @@
+"""
+OpenAI 兼容 API 网关
+提供统一的 /v1/chat/completions 和 /v1/models 端点
+"""
+
+from typing import Any, Dict
+
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import JSONResponse
+
+from adapters import get_adapter, get_provider_from_model
+from adapters.base import ChatCompletionRequest
+from utils.logger import get_logger
+
+logger = get_logger()
+
+router = APIRouter(tags=["OpenAI Compatible API"])
+
+
+@router.post("/chat/completions")
+async def chat_completions(request: Request):
+ """
+ OpenAI 兼容的聊天补全接口
+
+ 根据请求中的 model 字段自动路由到对应的平台适配器:
+ - glm-* → 智谱 GLM
+ - qwen-* → 阿里云百炼
+ - gpt-* / o1-* / o3-* → OpenAI
+ - deepseek-* → Deepseek
+ """
+ try:
+ body = await request.json()
+ except Exception:
+ raise HTTPException(status_code=400, detail="Invalid JSON body")
+
+ # 创建请求对象
+ chat_request = ChatCompletionRequest.from_dict(body)
+ model = chat_request.model
+
+ # 根据模型名称确定平台
+ provider = get_provider_from_model(model)
+ logger.info(f"[Gateway] model={model} → provider={provider}")
+
+ # 获取对应平台的适配器
+ adapter = get_adapter(provider)
+ if adapter is None:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Unsupported model: {model} (provider: {provider})",
+ )
+
+ # 检查适配器是否可用
+ if not adapter.is_available():
+ raise HTTPException(
+ status_code=503,
+ detail=f"Provider '{provider}' is not available (API key not configured)",
+ )
+
+ # 调用适配器处理请求
+ return await adapter.chat(chat_request)
+
+
+@router.get("/models")
+async def list_models():
+ """
+ 返回所有可用平台的模型列表
+
+ 聚合所有已配置 API Key 的平台模型
+ """
+ from adapters import get_all_adapters
+
+ all_models = []
+
+ for provider, adapter in get_all_adapters().items():
+ if adapter.is_available():
+ models = adapter.list_models()
+ all_models.extend([m.to_dict() for m in models])
+
+ return {
+ "object": "list",
+ "data": all_models,
+ }
+
+
+@router.get("/models/{model_id}")
+async def get_model(model_id: str):
+ """
+ 获取特定模型信息
+ """
+ from adapters import get_all_adapters
+
+ for provider, adapter in get_all_adapters().items():
+ if adapter.is_available():
+ for model in adapter.list_models():
+ if model.id == model_id:
+ return {
+ "object": "model",
+ "id": model.id,
+ "owned_by": model.provider,
+ "data": model.to_dict(),
+ }
+
+ raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
+
+
+# 初始化时注册适配器
+def init_adapters():
+ """注册所有适配器"""
+ from adapters import register_adapter
+ from adapters.dashscope_adapter import DashScopeAdapter
+ from adapters.glm_adapter import GLMAdapter
+ from adapters.openai_adapter import DeepseekAdapter, OpenAIAdapter
+
+ register_adapter("glm", GLMAdapter)
+ register_adapter("dashscope", DashScopeAdapter)
+ register_adapter("openai", OpenAIAdapter)
+ register_adapter("deepseek", DeepseekAdapter)
+
+ logger.info("[Gateway] Adapters registered: glm, dashscope, openai, deepseek")
\ No newline at end of file
diff --git a/server/config.py b/server/config.py
new file mode 100644
index 0000000..1beb173
--- /dev/null
+++ b/server/config.py
@@ -0,0 +1,59 @@
+"""
+统一配置管理
+"""
+
+import os
+from dataclasses import dataclass
+from typing import Dict, Optional
+
+
+@dataclass
+class ProviderConfig:
+ """平台配置"""
+
+ api_key: Optional[str] = None
+ base_url: Optional[str] = None
+ enabled: bool = True
+
+
+# 平台配置映射
+PROVIDERS: Dict[str, ProviderConfig] = {
+ "glm": ProviderConfig(
+ api_key=os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY"),
+ enabled=True,
+ ),
+ "dashscope": ProviderConfig(
+ api_key=os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY"),
+ enabled=True,
+ ),
+ "openai": ProviderConfig(
+ api_key=os.getenv("OPENAI_API_KEY"),
+ base_url=os.getenv("OPENAI_BASE_URL"),
+ enabled=True,
+ ),
+ "deepseek": ProviderConfig(
+ api_key=os.getenv("DEEPSEEK_API_KEY"),
+ base_url=os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"),
+ enabled=True,
+ ),
+}
+
+
+def get_provider_config(provider: str) -> Optional[ProviderConfig]:
+ """获取平台配置"""
+ return PROVIDERS.get(provider)
+
+
+def is_provider_available(provider: str) -> bool:
+ """检查平台是否可用(已配置 API Key)"""
+ config = PROVIDERS.get(provider)
+ return config is not None and bool(config.api_key) and config.enabled
+
+
+def get_available_providers() -> list:
+ """获取所有可用的平台列表"""
+ return [name for name, config in PROVIDERS.items() if is_provider_available(name)]
+
+
+# 默认平台
+DEFAULT_PROVIDER = os.getenv("DEFAULT_PROVIDER", "glm")
\ No newline at end of file
diff --git a/server/main.py b/server/main.py
index e56880c..4d53ea6 100644
--- a/server/main.py
+++ b/server/main.py
@@ -3,12 +3,18 @@ AI Chat API Server — 主入口(纯基础设施层)
职责:
- 注入运行时依赖(venv site-packages)
- - 读取 LLM_BACKEND 环境变量,动态加载对应平台模块
- - 注册 FastAPI 路由和中间件
+ - 支持 OpenAI 兼容 API 网关(/v1/*)和多平台路由
+ - 保留向后兼容的 /api/chat-ui/* 路由
-平台代码位置(main.py 中不包含任何平台逻辑):
- - 百炼 DashScope → api/chat_routes.py
- - 智谱 GLM-4.6V → api/chat_routes_glm.py + utils/glm_adapter.py
+平台适配器位置:
+ - adapters/glm_adapter.py → 智谱 GLM
+ - adapters/dashscope_adapter.py → 阿里云百炼
+ - adapters/openai_adapter.py → OpenAI / Deepseek
+
+API 端点:
+ - POST /v1/chat/completions → OpenAI 兼容网关(根据 model 自动路由)
+ - GET /v1/models → 所有可用模型列表
+ - POST /api/chat-ui/chat → 传统聊天接口(保持兼容)
"""
import os
@@ -62,12 +68,21 @@ from api.chat_routes import (delete_conversation_handler,
save_conversation_handler, serve_upload_handler,
stop_generation_handler, upload_file_handler)
+# ── OpenAI 兼容网关初始化 ───────────────────────────────────────────────
+from api.openai_gateway import init_adapters, router as openai_router
+
+init_adapters()
+
# ── FastAPI 应用 ──────────────────────────────────────────────────────
app = FastAPI(
- title=f"AI Chat API(LLM_BACKEND={LLM_BACKEND})",
- version="3.0.0",
+ title="AI Chat API Gateway",
+ version="4.0.0",
+ description="统一 OpenAI 兼容 API 网关,支持多平台模型",
)
+# 注册 OpenAI 兼容路由
+app.include_router(openai_router, prefix="/v1")
+
@app.middleware("http")
async def logging_middleware(request: Request, call_next):
@@ -90,27 +105,72 @@ async def logging_middleware(request: Request, call_next):
@app.get("/health")
async def health_check():
+ from config import get_available_providers
+
return {
"status": "healthy",
- "backend": LLM_BACKEND,
+ "version": "4.0.0",
+ "default_backend": LLM_BACKEND,
+ "available_providers": get_available_providers(),
+ "endpoints": {
+ "openai_compatible": "/v1/chat/completions",
+ "legacy": "/api/chat-ui/chat",
+ "models": "/v1/models",
+ },
"timestamp": datetime.now(timezone.utc).isoformat(),
}
@app.post("/api/chat-ui/chat")
async def chat_endpoint(request: Request):
- """聊天接口(自动路由到当前平台)"""
- return await _platform.chat_handler(await request.json())
+ """聊天接口(根据 model 自动路由到对应平台)"""
+ from adapters import get_adapter, get_provider_from_model
+ from adapters.base import ChatCompletionRequest
+
+ try:
+ body = await request.json()
+ except Exception:
+ return JSONResponse({"error": "Invalid JSON body"}, status_code=400)
+
+ # 创建请求对象
+ chat_request = ChatCompletionRequest.from_dict(body)
+ model = chat_request.model
+
+ # 根据模型名称确定平台
+ provider = get_provider_from_model(model)
+ logger.info(f"[Legacy API] model={model} → provider={provider}")
+
+ # 获取对应平台的适配器
+ adapter = get_adapter(provider)
+ if adapter is None:
+ return JSONResponse(
+ {"error": f"Unsupported model: {model} (provider: {provider})"},
+ status_code=400,
+ )
+
+ # 检查适配器是否可用
+ if not adapter.is_available():
+ return JSONResponse(
+ {"error": f"Provider '{provider}' is not available (API key not configured)"},
+ status_code=503,
+ )
+
+ # 调用适配器处理请求
+ return await adapter.chat(chat_request)
@app.get("/api/chat-ui/models")
async def get_models():
- """模型列表(由当前平台返回)"""
- result = _platform.models_handler()
- # 支持同步和异步两种返回
- if hasattr(result, "__await__"):
- return await result
- return result
+ """模型列表(聚合所有可用平台的模型)"""
+ from adapters import get_all_adapters
+
+ all_models = []
+ for provider, adapter in get_all_adapters().items():
+ if adapter.is_available():
+ models = adapter.list_models()
+ all_models.extend([m.to_dict() for m in models])
+
+ return {"object": "list", "data": all_models}
# ── 通用路由(与平台无关)────────────────────────────────────────────
@@ -161,10 +221,25 @@ if __name__ == "__main__":
import uvicorn
port = int(os.getenv("PORT", 8000))
- print("=" * 55)
- print(f" AI Chat Server v3.0 启动中...")
- print(f" 后端平台 : {LLM_BACKEND.upper()} [LLM_BACKEND={LLM_BACKEND}]")
- print(f" 监听端口 : {port}")
- print(f" 切换平台 : 修改 .env 中 LLM_BACKEND=glm|dashscope,重启")
- print("=" * 55)
+
+ # 获取可用平台
+ from config import get_available_providers
+
+ available = get_available_providers()
+
+ print("=" * 60)
+ print(" AI Chat API Gateway v4.0")
+ print("=" * 60)
+ print(f" OpenAI 兼容端点: http://localhost:{port}/v1/chat/completions")
+ print(f" 模型列表 : http://localhost:{port}/v1/models")
+ print("-" * 60)
+ print(f" 可用平台 : {', '.join(available) or '无(请配置 API Key)'}")
+ print(f" 默认平台 : {LLM_BACKEND} (向后兼容模式)")
+ print("-" * 60)
+ print(" 使用方法:")
+ print(" curl -X POST http://localhost:8000/v1/chat/completions \\")
+ print(' -H "Content-Type: application/json" \\')
+ print(' -d \'{"model":"glm-4-flash","messages":[{"role":"user","content":"hi"}]}\'')
+ print("=" * 60)
+
uvicorn.run(app, host="0.0.0.0", port=port)
diff --git a/src/components/chat/ChatMain.vue b/src/components/chat/ChatMain.vue
index 88d7bce..549053b 100644
--- a/src/components/chat/ChatMain.vue
+++ b/src/components/chat/ChatMain.vue
@@ -223,14 +223,40 @@ async function handleSend(
);
let fullText = "";
+ let reasoningText = "";
+ let isInReasoning = false;
isTyping.value = false;
for await (const chunk of stream) {
if (abortController.value?.signal.aborted) break;
- fullText += chunk;
+
+ if (chunk.type === "reasoning") {
+ // 深度思考内容
+ if (!isInReasoning) {
+ // 开始深度思考块
+ reasoningText = "";
+ isInReasoning = true;
+ fullText += "\n";
+ }
+ reasoningText += chunk.text;
+ fullText += chunk.text;
+ } else {
+ // 普通内容
+ if (isInReasoning) {
+ // 结束深度思考块
+ isInReasoning = false;
+ fullText += "\n\n";
+ }
+ fullText += chunk.text;
+ }
chatStore.updateMessageContent(aiMessage.id, fullText);
}
+ // 如果最后还在深度思考块中,关闭它
+ if (isInReasoning) {
+ fullText += "\n";
+ }
+
if (!abortController.value?.signal.aborted) {
chatStore.updateMessage(aiMessage.id, {
isStreaming: false,
@@ -320,13 +346,39 @@ async function handleRetry(messageId: string) {
);
let fullText = "";
+ let reasoningText = "";
+ let isInReasoning = false;
for await (const chunk of stream) {
if (abortController.value?.signal.aborted) break;
- fullText += chunk;
+
+ if (chunk.type === "reasoning") {
+ // 深度思考内容
+ if (!isInReasoning) {
+ // 开始深度思考块
+ reasoningText = "";
+ isInReasoning = true;
+ fullText += "\n";
+ }
+ reasoningText += chunk.text;
+ fullText += chunk.text;
+ } else {
+ // 普通内容
+ if (isInReasoning) {
+ // 结束深度思考块
+ isInReasoning = false;
+ fullText += "\n\n";
+ }
+ fullText += chunk.text;
+ }
chatStore.updateMessageContent(messageId, fullText);
}
+ // 如果最后还在深度思考块中,关闭它
+ if (isInReasoning) {
+ fullText += "\n";
+ }
+
if (!abortController.value?.signal.aborted) {
chatStore.updateMessage(messageId, {
isStreaming: false,
@@ -419,4 +471,4 @@ watch(
min-width: 1000px;
}
}
-
+
\ No newline at end of file
diff --git a/src/services/api.ts b/src/services/api.ts
index 236cc66..90182d9 100644
--- a/src/services/api.ts
+++ b/src/services/api.ts
@@ -76,6 +76,12 @@ export interface UploadResult {
mimeType?: string;
}
+// 流式响应块类型
+export interface StreamChunk {
+ type: "content" | "reasoning";
+ text: string;
+}
+
// API 调用类
class ChatApi {
private baseUrl: string;
@@ -90,7 +96,7 @@ class ChatApi {
async *streamChat(
request: ChatRequest,
signal?: AbortSignal,
- ): AsyncGenerator {
+ ): AsyncGenerator {
// 构建消息数组,考虑是否包含图片
let userContent;
if (request.images && request.images.length > 0) {
@@ -185,9 +191,18 @@ class ChatApi {
break;
}
- const content = data.choices?.[0]?.delta?.content;
+ const delta = data.choices?.[0]?.delta;
+
+ // 处理深度思考内容(reasoning_content)
+ const reasoningContent = delta?.reasoning_content;
+ if (reasoningContent) {
+ yield { type: "reasoning", text: reasoningContent };
+ }
+
+ // 处理普通内容
+ const content = delta?.content;
if (content) {
- yield content;
+ yield { type: "content", text: content };
}
} catch (e) {
console.warn("JSON解析错误", e, line);
@@ -271,6 +286,20 @@ class ChatApi {
// maxTokens: 8192,
// provider: "Zhipu",
// },
+ {
+ id: "deepseek-chat",
+ name: "DeepSeek Chat",
+ description: "DeepSeek 对话模型",
+ maxTokens: 8192,
+ provider: "DeepSeek",
+ },
+ {
+ id: "deepseek-reasoner",
+ name: "DeepSeek Reasoner",
+ description: "DeepSeek 深度思考模型",
+ maxTokens: 8192,
+ provider: "DeepSeek",
+ },
];
}
diff --git a/src/stores/settings.ts b/src/stores/settings.ts
index 6589022..54376b0 100644
--- a/src/stores/settings.ts
+++ b/src/stores/settings.ts
@@ -61,6 +61,20 @@ export const useSettingsStore = defineStore("settings", () => {
maxTokens: 8192,
provider: "Zhipu",
},
+ {
+ id: "deepseek-chat",
+ name: "DeepSeek Chat",
+ description: "DeepSeek 对话模型",
+ maxTokens: 8192,
+ provider: "DeepSeek",
+ },
+ {
+ id: "deepseek-reasoner",
+ name: "DeepSeek Reasoner",
+ description: "DeepSeek 深度思考模型",
+ maxTokens: 8192,
+ provider: "DeepSeek",
+ },
];
// 状态