feat: 添加deepseek支持

This commit is contained in:
SuperManTouX 2026-03-06 15:43:05 +08:00
parent 214d2dd80a
commit 5e81c903cf
13 changed files with 2057 additions and 28 deletions

View File

@ -0,0 +1,199 @@
# 开发日志 - 2026-03-06
## 修复:深度思考内容不显示
### 问题描述
启用深度思考后,后端日志显示已启用深度思考功能,但前端没有显示深度思考内容。
### 根本原因
前端 `src/services/api.ts` 中的 `streamChat` 方法只处理了普通 `content`,完全忽略了后端返回的 `reasoning_content`(深度思考内容)。
### 解决方案
#### 1. 前端 `src/services/api.ts`
添加 `StreamChunk` 接口,区分内容类型:
```typescript
// 流式响应块类型
export interface StreamChunk {
type: "content" | "reasoning";
text: string;
}
```
修改 `streamChat` 方法同时处理两种内容:
```typescript
const delta = data.choices?.[0]?.delta;
// 处理深度思考内容reasoning_content
const reasoningContent = delta?.reasoning_content;
if (reasoningContent) {
yield { type: "reasoning", text: reasoningContent };
}
// 处理普通内容
const content = delta?.content;
if (content) {
yield { type: "content", text: content };
}
```
#### 2. 前端 `src/components/chat/ChatMain.vue`
修改流式处理逻辑,将 `reasoning` 类型内容包装成 `<think>` 标签:
```typescript
let isInReasoning = false;
for await (const chunk of stream) {
if (chunk.type === "reasoning") {
if (!isInReasoning) {
isInReasoning = true;
fullText += "<think>\n";
}
fullText += chunk.text;
} else {
if (isInReasoning) {
isInReasoning = false;
fullText += "\n</think>\n";
}
fullText += chunk.text;
}
}
// 如果最后还在深度思考块中,关闭它
if (isInReasoning) {
fullText += "\n</think>";
}
```
`<think>` 标签会被 `markstream-vue` 库识别,并由 `ThinkingNode` 组件渲染。
---
## 功能:图片/文件附件自动切换模型
### 需求
当用户上传图片或文件PDF、DOCX等无论前端选择了什么模型后端都应强制使用 `glm-4.6v` 模型(支持多模态)。
### 解决方案
#### 后端 `server/adapters/glm_adapter.py`
**修改 `_build_messages` 方法**
返回值从 `(messages, has_vision)` 改为 `(messages, has_vision, has_files)`
```python
def _build_messages(
self, request: ChatCompletionRequest
) -> tuple[List[Dict], bool, bool]:
"""
构建 GLM 格式的消息
返回:(消息列表, 是否包含图片, 是否包含文件附件)
"""
messages = []
has_vision = False
has_files = bool(request.files) # 检查是否有文件附件
# ... 处理消息 ...
return messages, has_vision, has_files
```
**修改 `_resolve_model` 方法**
```python
def _resolve_model(self, model: str, has_vision: bool, has_files: bool = False) -> str:
"""解析实际使用的模型"""
model_lower = model.lower()
# 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态)
if (has_vision or has_files) and model_lower not in VISION_MODELS:
logger.info(
f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v"
)
return "glm-4.6v"
return model
```
**修改 `chat` 方法调用**
```python
glm_messages, has_vision, has_files = self._build_messages(request)
actual_model = self._resolve_model(request.model, has_vision, has_files)
```
---
## 功能DeepSeek 深度思考支持
### 需求
为 DeepSeek 的 `deepseek-reasoner` 模型添加深度思考支持,通过 `extra_body` 参数启用。
### 解决方案
#### 后端 `server/adapters/openai_adapter.py`
**添加支持深度思考的模型列表**
```python
# DeepSeek 支持深度思考的模型
DEEPSEEK_THINKING_MODELS = {"deepseek-reasoner"}
```
**修改 `chat` 方法**
```python
# DeepSeek 深度思考支持
extra_body = None
if self._provider_type == "deepseek" and request.deep_thinking:
if self._supports_thinking(request.model):
extra_body = {"thinking": {"type": "enabled"}}
kwargs["extra_body"] = extra_body
logger.info(f"[{provider_name}] 深度思考已启用: extra_body = {extra_body}")
```
**添加 `_supports_thinking` 方法**
```python
def _supports_thinking(self, model: str) -> bool:
"""检查模型是否支持深度思考"""
return model.lower() in DEEPSEEK_THINKING_MODELS
```
**修改 `_stream_chat``_sync_chat` 方法**
- 添加 `extra_body` 参数
- 增强 `reasoning_content` 日志输出
---
## 涉及文件
| 文件 | 修改类型 |
|------|----------|
| `src/services/api.ts` | 新增 `StreamChunk` 接口,修改 `streamChat` 方法 |
| `src/components/chat/ChatMain.vue` | 修改流式处理逻辑,支持 `reasoning` 类型 |
| `server/adapters/glm_adapter.py` | 修改 `_build_messages``_resolve_model` 方法 |
| `server/adapters/openai_adapter.py` | 添加 DeepSeek 深度思考支持 |
---
## 测试建议
1. **GLM 深度思考测试**
- 选择支持深度思考的模型(如 glm-4.6v
- 开启深度思考开关
- 发送问题,确认前端显示深度思考内容块
2. **DeepSeek 深度思考测试**
- 选择 `deepseek-reasoner` 模型
- 开启深度思考开关
- 发送问题,确认后端日志显示 `extra_body = {'thinking': {'type': 'enabled'}}`
- 确认前端显示深度思考内容块
3. **模型自动切换测试**
- 选择非多模态模型(如 glm-4-flash
- 上传图片或 PDF 文件
- 确认后端日志显示模型切换为 glm-4.6v
- 确认多模态内容正确处理

View File

@ -0,0 +1,16 @@
"""
LLM 平台适配器模块
"""
from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
from .registry import get_adapter, get_all_adapters, get_provider_from_model, register_adapter
__all__ = [
"BaseAdapter",
"ChatCompletionRequest",
"ModelInfo",
"get_adapter",
"get_all_adapters",
"get_provider_from_model",
"register_adapter",
]

126
server/adapters/base.py Normal file
View File

@ -0,0 +1,126 @@
"""
适配器基类定义
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
@dataclass
class ModelInfo:
"""模型信息"""
id: str
name: str
description: str
max_tokens: int = 4096
provider: str = "unknown"
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"name": self.name,
"description": self.description,
"maxTokens": self.max_tokens,
"provider": self.provider,
}
@dataclass
class ChatCompletionRequest:
"""OpenAI 格式的聊天请求"""
model: str
messages: List[Dict[str, Any]]
stream: bool = True
temperature: float = 0.7
max_tokens: int = 2000
files: Optional[List[str]] = None
deep_search: bool = False
web_search: bool = False
deep_thinking: bool = False
# 原始请求体(保留额外字段)
extra: Dict[str, Any] = field(default_factory=dict)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "ChatCompletionRequest":
"""从字典创建请求对象"""
# 提取已知字段
known_fields = {
"model",
"messages",
"stream",
"temperature",
"max_tokens",
"files",
"deepSearch",
"webSearch",
"deepThinking",
}
extra = {k: v for k, v in data.items() if k not in known_fields}
return cls(
model=data.get("model", "glm-4-flash"),
messages=data.get("messages", []),
stream=data.get("stream", True),
temperature=data.get("temperature", 0.7),
max_tokens=data.get("max_tokens", data.get("maxTokens", 2000)),
files=data.get("files"),
deep_search=data.get("deepSearch", False),
web_search=data.get("webSearch", False),
deep_thinking=data.get("deepThinking", False),
extra=extra,
)
class BaseAdapter(ABC):
"""
LLM 平台适配器基类
所有平台适配器需继承此类并实现抽象方法
"""
@property
@abstractmethod
def provider_name(self) -> str:
"""返回平台名称(如 'glm', 'dashscope', 'openai'"""
pass
@abstractmethod
async def chat(self, request: ChatCompletionRequest):
"""
处理聊天请求
Args:
request: OpenAI 格式的聊天请求
Returns:
流式响应返回 StreamingResponse
非流式返回 JSONResponse dict
"""
pass
@abstractmethod
def list_models(self) -> List[ModelInfo]:
"""
返回该平台支持的模型列表
Returns:
ModelInfo 对象列表
"""
pass
def is_available(self) -> bool:
"""
检查该适配器是否可用API Key 是否配置
默认实现检查环境变量中的 API Key
"""
return True
def get_models_response(self) -> Dict[str, Any]:
"""返回 OpenAI 格式的模型列表响应"""
models = self.list_models()
return {
"object": "list",
"data": [m.to_dict() for m in models],
}

View File

@ -0,0 +1,434 @@
"""
阿里云百炼 DashScope 适配器
基于 api/chat_routes.py 重构
"""
import json
import os
from typing import Dict, List
from fastapi.responses import JSONResponse, StreamingResponse
from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
from utils.logger import get_logger
logger = get_logger()
# 百炼模型配置
DASHSCOPE_MODELS = [
ModelInfo(
id="qwen-max",
name="通义千问 Max",
description="最强大的模型",
max_tokens=8192,
provider="Aliyun",
),
ModelInfo(
id="qwen-plus",
name="通义千问 Plus",
description="能力均衡",
max_tokens=8192,
provider="Aliyun",
),
ModelInfo(
id="qwen-turbo",
name="通义千问 Turbo",
description="速度更快、成本更低",
max_tokens=8192,
provider="Aliyun",
),
ModelInfo(
id="qwen-vl-max",
name="通义万相 VL-Max",
description="支持视觉理解的多模态模型",
max_tokens=8192,
provider="Aliyun",
),
ModelInfo(
id="qwen-vl-plus",
name="通义万相 VL-Plus",
description="支持视觉理解的多模态模型",
max_tokens=8192,
provider="Aliyun",
),
]
class DashScopeAdapter(BaseAdapter):
"""阿里云百炼 DashScope 平台适配器"""
@property
def provider_name(self) -> str:
return "dashscope"
def is_available(self) -> bool:
"""检查 API Key 是否配置"""
return bool(os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY"))
def _get_api_key(self) -> str:
"""获取 API Key"""
return os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY", "")
def list_models(self) -> List[ModelInfo]:
return DASHSCOPE_MODELS
async def chat(self, request: ChatCompletionRequest):
"""
处理 DashScope 聊天请求
支持流式/非流式多模态
"""
# 打印请求参数
logger.info(f"[DashScope] 请求参数:")
logger.info(f" - model: {request.model}")
logger.info(f" - stream: {request.stream}")
logger.info(f" - temperature: {request.temperature}")
logger.info(f" - max_tokens: {request.max_tokens}")
logger.info(f" - files: {request.files}")
logger.info(f" - messages: {json.dumps(request.messages, ensure_ascii=False, indent=2)}")
# 检测是否包含多模态内容
has_multimodal = self._has_multimodal_content(request)
logger.info(f" - has_multimodal: {has_multimodal}")
if has_multimodal:
return await self._multimodal_chat(request)
else:
return await self._text_chat(request)
def _has_multimodal_content(self, request: ChatCompletionRequest) -> bool:
"""检查是否包含多模态内容"""
for msg in request.messages:
content = msg.get("content", "")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "image_url":
return True
return bool(request.files)
async def _text_chat(self, request: ChatCompletionRequest):
"""纯文本聊天"""
import dashscope
from dashscope import Generation
dashscope.api_key = self._get_api_key()
# 转换消息格式
messages = self._build_text_messages(request)
if request.stream:
return self._stream_text_chat(messages, request)
else:
return self._sync_text_chat(messages, request)
def _build_text_messages(self, request: ChatCompletionRequest) -> List[Dict]:
"""构建文本消息"""
messages = []
for msg in request.messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if isinstance(content, str) and content.strip():
messages.append({"role": role, "content": content})
elif isinstance(content, list):
text = ""
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
text += item.get("text", "")
if text.strip():
messages.append({"role": role, "content": text})
return messages
def _stream_text_chat(self, messages: List[Dict], request: ChatCompletionRequest):
"""流式文本聊天"""
logger.info(f"[DashScope] 开始流式文本响应...")
def generator():
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import Generation
full_content = ""
chunk_count = 0
responses = Generation.call(
model=request.model,
messages=messages,
stream=True,
temperature=request.temperature,
max_tokens=request.max_tokens,
result_format="message",
)
for resp in responses:
if resp.status_code == 200:
chunk_count += 1
content = resp.output.choices[0].message.content
if content:
full_content += content
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": request.model,
"choices": [
{
"index": 0,
"delta": {"content": content},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
finish = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": request.model,
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
# 打印流式响应结果
logger.info(f"[DashScope] 流式文本响应完成:")
logger.info(f" - chunks: {chunk_count}")
logger.info(f" - content_length: {len(full_content)} 字符")
logger.info(f" - content_preview: {full_content[:200]}..." if len(full_content) > 200 else f" - content: {full_content}")
return StreamingResponse(generator(), media_type="text/event-stream")
def _sync_text_chat(self, messages: List[Dict], request: ChatCompletionRequest):
"""非流式文本聊天"""
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import Generation
resp = Generation.call(
model=request.model,
messages=messages,
stream=False,
temperature=request.temperature,
max_tokens=request.max_tokens,
result_format="message",
)
if resp.status_code == 200:
content = resp.output.choices[0].message.content
response = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": request.model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": content},
"finish_reason": "stop",
}
],
}
if hasattr(resp, "usage") and resp.usage:
response["usage"] = {
"prompt_tokens": resp.usage.input_tokens,
"completion_tokens": resp.usage.output_tokens,
"total_tokens": resp.usage.total_tokens,
}
# 打印响应结果
logger.info(f"[DashScope] 响应结果:")
logger.info(f" - content_length: {len(content)} 字符")
logger.info(f" - content_preview: {content[:200]}..." if len(content) > 200 else f" - content: {content}")
if hasattr(resp, "usage") and resp.usage:
logger.info(f" - usage: {response['usage']}")
return JSONResponse(content=response)
logger.error(f"[DashScope] 请求失败: {resp.code} - {resp.message}")
return JSONResponse(
status_code=500,
content={"error": f"DashScope Error: {resp.code} - {resp.message}"},
)
async def _multimodal_chat(self, request: ChatCompletionRequest):
"""多模态聊天"""
import dashscope
from dashscope import MultiModalConversation
dashscope.api_key = self._get_api_key()
# 转换消息格式
messages = self._build_multimodal_messages(request)
# 选择多模态模型
model = request.model
if "qwen-" in model and "vl" not in model:
model = model.replace("qwen-", "qwen-vl-")
if request.stream:
return self._stream_multimodal_chat(messages, model, request)
else:
return self._sync_multimodal_chat(messages, model, request)
def _build_multimodal_messages(self, request: ChatCompletionRequest) -> List[Dict]:
"""构建多模态消息"""
messages = []
for msg in request.messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if isinstance(content, str):
if content.strip():
messages.append({"role": role, "content": [{"text": content}]})
elif isinstance(content, list):
ds_content = []
for item in content:
if isinstance(item, dict):
if item.get("type") == "text":
ds_content.append({"text": item.get("text", "")})
elif item.get("type") == "image_url":
img_url = self._extract_image_url(item)
if img_url:
ds_content.append({"image": img_url})
if ds_content:
messages.append({"role": role, "content": ds_content})
return messages
def _extract_image_url(self, item: Dict) -> str:
"""提取并转换图片 URL"""
img_val = item.get("image_url", "")
if isinstance(img_val, str):
img_url = img_val
elif isinstance(img_val, dict):
img_url = img_val.get("url", "")
else:
img_url = ""
# 转换 http URL 为 file:// 格式(如果是本地文件)
if img_url.startswith(("http://", "https://")):
from urllib.parse import urlparse
parsed = urlparse(img_url)
if "localhost" in parsed.netloc or "127.0.0.1" in parsed.netloc:
path_parts = parsed.path.split("/")
try:
uploads_idx = path_parts.index("uploads")
img_url = f"file://{'/'.join(path_parts[uploads_idx:])}"
except ValueError:
pass
elif not img_url.startswith("file://"):
img_url = f"file://{img_url}"
return img_url
def _stream_multimodal_chat(
self, messages: List[Dict], model: str, request: ChatCompletionRequest
):
"""流式多模态聊天"""
def generator():
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import MultiModalConversation
responses = MultiModalConversation.call(
model=model,
messages=messages,
stream=True,
max_tokens=request.max_tokens,
temperature=request.temperature,
)
full_content = ""
for resp in responses:
if resp.status_code == 200:
try:
content_items = resp.output.choices[0]["message"]["content"]
text = ""
for item in content_items:
if isinstance(item, dict) and "text" in item:
text += item["text"]
if len(text) > len(full_content):
delta = text[len(full_content) :]
full_content = text
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"content": delta},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
except (KeyError, IndexError, TypeError):
pass
finish = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(generator(), media_type="text/event-stream")
def _sync_multimodal_chat(
self, messages: List[Dict], model: str, request: ChatCompletionRequest
):
"""非流式多模态聊天"""
from utils.helpers import generate_unique_id, get_current_timestamp
from dashscope import MultiModalConversation
resp = MultiModalConversation.call(
model=model,
messages=messages,
stream=False,
max_tokens=request.max_tokens,
temperature=request.temperature,
)
if resp.status_code == 200:
try:
content_items = resp.output.choices[0]["message"]["content"]
text = ""
for item in content_items:
if isinstance(item, dict) and "text" in item:
text += item["text"]
response = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": text},
"finish_reason": "stop",
}
],
}
return JSONResponse(content=response)
except (KeyError, IndexError, TypeError) as e:
return JSONResponse(
status_code=500,
content={"error": f"Parse error: {str(e)}"},
)
return JSONResponse(
status_code=500,
content={"error": f"DashScope Error: {resp.code} - {resp.message}"},
)

View File

@ -0,0 +1,482 @@
"""
智谱 GLM 适配器
基于 utils/glm_adapter.py 重构
使用zai-sdk因为已经完成这一部分的整套逻辑如果更换OpenAI-SDK会花很多时间调试
"""
import json
import os
from typing import Dict, List, Optional
from fastapi.responses import JSONResponse, StreamingResponse
from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
from utils.logger import get_logger
logger = get_logger()
# GLM 模型配置
GLM_MODELS = [
ModelInfo(
id="glm-4.6v",
name="GLM-4.6V(推荐)",
description="最新旗舰模型,支持文本/图像/文档/深度思考",
max_tokens=128000,
provider="ZhipuAI",
),
ModelInfo(
id="glm-4-flash",
name="GLM-4 Flash",
description="高性价比文本模型",
max_tokens=128000,
provider="ZhipuAI",
),
ModelInfo(
id="glm-4v-plus-0111",
name="GLM-4V Plus",
description="图像 + PDF/DOCX 原生多模态",
max_tokens=128000,
provider="ZhipuAI",
),
ModelInfo(
id="glm-z1-flash",
name="GLM-Z1 Flash",
description="深度思考推理模型",
max_tokens=128000,
provider="ZhipuAI",
),
]
# 视觉模型列表(用于自动切换)
VISION_MODELS = {"glm-4v", "glm-4v-plus", "glm-4v-plus-0111", "glm-4.6v"}
# 支持深度思考的模型
THINKING_MODELS = {"glm-z1-flash", "glm-z1-air", "glm-4.6v", "glm-4.6"}
class GLMAdapter(BaseAdapter):
"""智谱 GLM 平台适配器"""
_client = None
@property
def provider_name(self) -> str:
return "glm"
def is_available(self) -> bool:
"""检查 API Key 是否配置"""
return bool(os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY"))
def _get_client(self):
"""获取 GLM 客户端(懒加载)"""
if self._client is None:
from zhipuai import ZhipuAI
api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")
self._client = ZhipuAI(api_key=api_key)
return self._client
def list_models(self) -> List[ModelInfo]:
return GLM_MODELS
async def chat(self, request: ChatCompletionRequest):
"""
处理 GLM 聊天请求
支持流式/非流式图像文档联网搜索深度思考
"""
client = self._get_client()
# 构建消息
glm_messages, has_vision, has_files = self._build_messages(request)
actual_model = self._resolve_model(request.model, has_vision, has_files)
# 调试:打印原始请求参数
logger.info(f"[GLM] 原始请求参数:")
logger.info(
f" - request.deep_thinking: {request.deep_thinking} (type: {type(request.deep_thinking)})"
)
logger.info(f" - request.web_search: {request.web_search}")
logger.info(f" - request.deep_search: {request.deep_search}")
logger.info(f" - actual_model: {actual_model}")
logger.info(f" - supports_thinking: {self._supports_thinking(actual_model)}")
# 构建额外参数
extra_kwargs = {}
web_search = self._get_web_search_mode(request)
if web_search:
extra_kwargs["tools"] = [self._build_web_search_tool(web_search)]
extra_kwargs["tool_choice"] = "auto"
# 深度思考正向选择True 时启用False 时禁用)
# 注意:只有特定模型支持深度思考(如 glm-z1-flash
thinking_enabled = request.deep_thinking and self._supports_thinking(
actual_model
)
logger.info(
f"[GLM] 深度思考判断: {request.deep_thinking} and {self._supports_thinking(actual_model)} = {thinking_enabled}"
)
if thinking_enabled:
extra_kwargs["thinking"] = {"type": "enabled"}
logger.info(
f"[GLM] 深度思考已启用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}"
)
if extra_kwargs:
logger.info(
f"[GLM] 最终 extra_kwargs: {json.dumps(extra_kwargs, ensure_ascii=False)}"
)
if request.stream:
return self._stream_chat(
client, glm_messages, actual_model, request, extra_kwargs
)
else:
return self._sync_chat(
client, glm_messages, actual_model, request, extra_kwargs
)
def _build_messages(
self, request: ChatCompletionRequest
) -> tuple[List[Dict], bool, bool]:
"""
构建 GLM 格式的消息
返回(消息列表, 是否包含图片, 是否包含文件附件)
"""
messages = []
has_vision = False
has_files = bool(request.files) # 检查是否有文件附件
for msg in request.messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if isinstance(content, str):
# 纯文本
if content.strip():
messages.append({"role": role, "content": content})
elif isinstance(content, list):
# 多模态内容
glm_content = []
for item in content:
if isinstance(item, dict):
item_type = item.get("type", "")
if item_type == "text":
text = item.get("text", "")
if text:
glm_content.append({"type": "text", "text": text})
elif item_type == "image_url":
img_url = self._extract_image_url(item)
if img_url:
glm_content.append(
{"type": "image_url", "image_url": {"url": img_url}}
)
has_vision = True
if glm_content:
messages.append({"role": role, "content": glm_content})
# 处理文件附件
if request.files:
file_content = self._build_file_content(request.files)
if messages and messages[-1]["role"] == "user":
# 追加到最后一个用户消息
if isinstance(messages[-1]["content"], list):
messages[-1]["content"].extend(file_content)
else:
messages[-1]["content"] = [
{"type": "text", "text": messages[-1]["content"]},
*file_content,
]
else:
messages.append({"role": "user", "content": file_content})
return messages, has_vision, has_files
def _extract_image_url(self, item: Dict) -> Optional[str]:
"""提取图片 URL"""
img_val = item.get("image_url", "")
if isinstance(img_val, str):
return img_val
elif isinstance(img_val, dict):
return img_val.get("url", "")
return None
def _build_file_content(self, files: List[str]) -> List[Dict]:
"""构建文件内容"""
content = []
for file_url in files:
if file_url.startswith(("http://", "https://")):
content.append({"type": "file_url", "file_url": {"url": file_url}})
return content
def _resolve_model(
self, model: str, has_vision: bool, has_files: bool = False
) -> str:
"""解析实际使用的模型"""
model_lower = model.lower()
# 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态)
if (has_vision or has_files) and model_lower not in VISION_MODELS:
logger.info(
f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v"
)
return "glm-4.6v"
return model
def _supports_thinking(self, model: str) -> bool:
"""检查模型是否支持深度思考"""
return model.lower() in THINKING_MODELS
def _get_web_search_mode(self, request: ChatCompletionRequest) -> str:
"""获取联网搜索模式"""
if request.deep_search:
return "deep"
elif request.web_search:
return "simple"
return ""
def _build_web_search_tool(self, mode: str) -> Dict:
"""构建联网搜索工具"""
from datetime import datetime
today = datetime.now().strftime("%Y年%m月%d")
if mode == "deep":
# 深度搜索:返回搜索结果详情
return {
"type": "web_search",
"web_search": {
"enable": True,
"search_engine": "search_pro",
"search_result": True,
"search_prompt": f"你是一位智能助手。请用简洁的语言总结网络搜索{{search_result}}中的关键信息,按重要性排序并引用来源日期。今天的日期是{today}",
"count": 5,
"search_recency_filter": "noLimit",
"content_size": "high",
},
}
else:
# 简单搜索
return {
"type": "web_search",
"web_search": {
"enable": True,
"search_engine": "search_pro",
"search_result": True,
"count": 5,
},
}
def _stream_chat(
self, client, messages, model, request, extra_kwargs
) -> StreamingResponse:
"""流式聊天"""
logger.info(f"[GLM] 开始流式响应...")
# 提取深度思考配置
thinking_config = extra_kwargs.get("thinking")
tools_config = extra_kwargs.get("tools")
def generator():
from utils.helpers import generate_unique_id, get_current_timestamp
full_content = ""
# 构建 API 调用参数
api_params = {
"model": model,
"messages": messages,
"stream": True,
"temperature": request.temperature,
"max_tokens": request.max_tokens,
}
# 深度思考:使用 extra_body 传递
if thinking_config:
api_params["extra_body"] = {"thinking": thinking_config}
# 联网搜索:使用 tools 参数
if tools_config:
api_params["tools"] = tools_config
api_params["tool_choice"] = "auto"
# 打印请求参数
logger.info(f"[GLM] API 调用参数:")
logger.info(f" - model: {model}")
logger.info(f" - stream: True")
logger.info(f" - temperature: {request.temperature}")
logger.info(f" - max_tokens: {request.max_tokens}")
if thinking_config:
logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
if tools_config:
logger.info(
f" - tools: {json.dumps(tools_config, ensure_ascii=False)}"
)
logger.info(f" - tool_choice: auto")
logger.info(
f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
)
chunk_count = 0
resp = client.chat.completions.create(**api_params)
for chunk in resp:
chunk_count += 1
# 检查 delta 是否存在
if not hasattr(chunk.choices[0], "delta"):
continue
delta = chunk.choices[0].delta
# 处理深度思考内容reasoning_content
reasoning_content = getattr(delta, "reasoning_content", None)
if reasoning_content:
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"reasoning_content": reasoning_content},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
continue
# 处理普通内容
content = getattr(delta, "content", None)
if content:
full_content += content
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"content": content},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
# 结束标记
finish = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
# 打印流式响应结果
logger.info(f"[GLM] 流式响应完成:")
logger.info(f" - chunks: {chunk_count}")
logger.info(f" - content_length: {len(full_content)} 字符")
logger.info(
f" - content_preview: {full_content[:200]}..."
if len(full_content) > 200
else f" - content: {full_content}"
)
return StreamingResponse(generator(), media_type="text/event-stream")
def _sync_chat(
self, client, messages, model, request, extra_kwargs
) -> JSONResponse:
"""非流式聊天"""
from utils.helpers import generate_unique_id, get_current_timestamp
# 提取深度思考配置
thinking_config = extra_kwargs.get("thinking")
tools_config = extra_kwargs.get("tools")
# 构建 API 调用参数
api_params = {
"model": model,
"messages": messages,
"stream": False,
"temperature": request.temperature,
"max_tokens": request.max_tokens,
}
# 深度思考:使用 extra_body 传递
if thinking_config:
api_params["extra_body"] = {"thinking": thinking_config}
# 联网搜索:使用 tools 参数
if tools_config:
api_params["tools"] = tools_config
api_params["tool_choice"] = "auto"
# 打印请求参数
logger.info(f"[GLM] API 调用参数:")
logger.info(f" - model: {model}")
logger.info(f" - stream: {request.stream}")
logger.info(f" - temperature: {request.temperature}")
logger.info(f" - max_tokens: {request.max_tokens}")
if thinking_config:
logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
if tools_config:
logger.info(f" - tools: {json.dumps(tools_config, ensure_ascii=False)}")
logger.info(f" - tool_choice: auto")
logger.info(
f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
)
resp = client.chat.completions.create(**api_params)
message = resp.choices[0].message
content = message.content or ""
# 构建响应
response_message = {"role": "assistant", "content": content}
# 处理深度思考内容
reasoning_content = getattr(message, "reasoning_content", None)
if reasoning_content:
response_message["reasoning_content"] = reasoning_content
response = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"message": response_message,
"finish_reason": "stop",
}
],
}
if hasattr(resp, "usage") and resp.usage:
response["usage"] = {
"prompt_tokens": resp.usage.prompt_tokens,
"completion_tokens": resp.usage.completion_tokens,
"total_tokens": resp.usage.total_tokens,
}
# 打印响应结果
logger.info(f"[GLM] 响应结果:")
logger.info(f" - content_length: {len(content)} 字符")
logger.info(
f" - content_preview: {content[:200]}..."
if len(content) > 200
else f" - content: {content}"
)
if hasattr(resp, "usage") and resp.usage:
logger.info(f" - usage: {response['usage']}")
return JSONResponse(content=response)

View File

@ -0,0 +1,304 @@
"""
OpenAI 适配器
支持 OpenAI 及兼容 API Deepseek
"""
import json
import os
from typing import Dict, List, Optional
from fastapi.responses import JSONResponse, StreamingResponse
from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
from utils.logger import get_logger
logger = get_logger()
# OpenAI 模型配置
OPENAI_MODELS = [
ModelInfo(
id="gpt-4o",
name="GPT-4o",
description="最新旗舰多模态模型",
max_tokens=128000,
provider="OpenAI",
),
ModelInfo(
id="gpt-4o-mini",
name="GPT-4o Mini",
description="高性价比多模态模型",
max_tokens=128000,
provider="OpenAI",
),
ModelInfo(
id="gpt-4-turbo",
name="GPT-4 Turbo",
description="GPT-4 增强版",
max_tokens=128000,
provider="OpenAI",
),
ModelInfo(
id="gpt-3.5-turbo",
name="GPT-3.5 Turbo",
description="快速经济的选择",
max_tokens=16385,
provider="OpenAI",
),
]
# Deepseek 模型配置
DEEPSEEK_MODELS = [
ModelInfo(
id="deepseek-chat",
name="Deepseek Chat",
description="Deepseek 对话模型",
max_tokens=64000,
provider="Deepseek",
),
ModelInfo(
id="deepseek-reasoner",
name="Deepseek Reasoner",
description="Deepseek 推理模型(支持深度思考)",
max_tokens=64000,
provider="Deepseek",
),
]
# DeepSeek 支持深度思考的模型
DEEPSEEK_THINKING_MODELS = {"deepseek-reasoner"}
class OpenAIAdapter(BaseAdapter):
"""OpenAI 平台适配器"""
_client = None
_provider_type: str = "openai" # openai 或 deepseek
def __init__(self, provider_type: str = "openai"):
self._provider_type = provider_type
@property
def provider_name(self) -> str:
return self._provider_type
def is_available(self) -> bool:
"""检查 API Key 是否配置"""
if self._provider_type == "deepseek":
return bool(os.getenv("DEEPSEEK_API_KEY"))
return bool(os.getenv("OPENAI_API_KEY"))
def _get_client(self):
"""获取 OpenAI 客户端(懒加载)"""
if self._client is None:
from openai import OpenAI
if self._provider_type == "deepseek":
api_key = os.getenv("DEEPSEEK_API_KEY", "")
base_url = os.getenv(
"DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"
)
else:
api_key = os.getenv("OPENAI_API_KEY", "")
base_url = os.getenv("OPENAI_BASE_URL") # 可选自定义端点
kwargs = {"api_key": api_key}
if base_url:
kwargs["base_url"] = base_url
self._client = OpenAI(**kwargs)
return self._client
def list_models(self) -> List[ModelInfo]:
if self._provider_type == "deepseek":
return DEEPSEEK_MODELS
return OPENAI_MODELS
async def chat(self, request: ChatCompletionRequest):
"""
处理 OpenAI 聊天请求
直接使用 OpenAI SDK支持流式/非流式
"""
client = self._get_client()
# 打印请求参数
provider_name = self._provider_type.upper()
logger.info(f"[{provider_name}] 请求参数:")
logger.info(f" - model: {request.model}")
logger.info(f" - stream: {request.stream}")
logger.info(f" - temperature: {request.temperature}")
logger.info(f" - max_tokens: {request.max_tokens}")
logger.info(f" - provider_type: {self._provider_type}")
if self._provider_type == "deepseek":
logger.info(f" - deep_thinking: {request.deep_thinking}")
# 构建消息
messages = self._build_messages(request)
logger.info(f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}")
# 构建请求参数
kwargs = {
"model": request.model,
"messages": messages,
"temperature": request.temperature,
"max_tokens": request.max_tokens,
"stream": request.stream,
}
# DeepSeek 深度思考支持
extra_body = None
if self._provider_type == "deepseek" and request.deep_thinking:
if self._supports_thinking(request.model):
extra_body = {"thinking": {"type": "enabled"}}
kwargs["extra_body"] = extra_body
logger.info(f"[{provider_name}] 深度思考已启用: extra_body = {extra_body}")
if request.stream:
return self._stream_chat(client, kwargs, extra_body)
else:
return self._sync_chat(client, kwargs, extra_body)
def _supports_thinking(self, model: str) -> bool:
"""检查模型是否支持深度思考"""
return model.lower() in DEEPSEEK_THINKING_MODELS
def _build_messages(self, request: ChatCompletionRequest) -> List[Dict]:
"""构建 OpenAI 格式消息"""
messages = []
for msg in request.messages:
role = msg.get("role", "user")
content = msg.get("content", "")
# OpenAI 直接支持标准格式
if isinstance(content, str):
if content.strip():
messages.append({"role": role, "content": content})
elif isinstance(content, list):
# 多模态内容
openai_content = []
for item in content:
if isinstance(item, dict):
openai_content.append(item)
if openai_content:
messages.append({"role": role, "content": openai_content})
return messages
def _stream_chat(self, client, kwargs: Dict, extra_body: Optional[Dict] = None) -> StreamingResponse:
"""流式聊天"""
provider_name = self._provider_type.upper()
logger.info(f"[{provider_name}] 开始流式响应...")
def generator():
from utils.helpers import generate_unique_id, get_current_timestamp
resp = client.chat.completions.create(**kwargs)
full_content = ""
full_reasoning = ""
chunk_count = 0
for chunk in resp:
if chunk.choices:
chunk_count += 1
delta = chunk.choices[0].delta
delta_content = {}
if hasattr(delta, "content") and delta.content:
delta_content["content"] = delta.content
full_content += delta.content
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
delta_content["reasoning_content"] = delta.reasoning_content
full_reasoning += delta.reasoning_content
if delta_content:
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": kwargs["model"],
"choices": [
{
"index": 0,
"delta": delta_content,
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
finish = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": kwargs["model"],
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
# 打印流式响应结果
logger.info(f"[{provider_name}] 流式响应完成:")
logger.info(f" - chunks: {chunk_count}")
logger.info(f" - content_length: {len(full_content)} 字符")
if full_reasoning:
logger.info(f" - reasoning_length: {len(full_reasoning)} 字符")
logger.info(f" - content_preview: {full_content[:200]}..." if len(full_content) > 200 else f" - content: {full_content}")
return StreamingResponse(generator(), media_type="text/event-stream")
def _sync_chat(self, client, kwargs: Dict, extra_body: Optional[Dict] = None) -> JSONResponse:
"""非流式聊天"""
from utils.helpers import generate_unique_id, get_current_timestamp
resp = client.chat.completions.create(**kwargs)
message = resp.choices[0].message
content = message.content or ""
response = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": kwargs["model"],
"choices": [
{
"index": 0,
"message": {
"role": message.role,
"content": content,
},
"finish_reason": resp.choices[0].finish_reason,
}
],
}
# 添加推理内容(如有)
if hasattr(message, "reasoning_content") and message.reasoning_content:
response["choices"][0]["message"]["reasoning_content"] = (
message.reasoning_content
)
if resp.usage:
response["usage"] = {
"prompt_tokens": resp.usage.prompt_tokens,
"completion_tokens": resp.usage.completion_tokens,
"total_tokens": resp.usage.total_tokens,
}
# 打印响应结果
provider_name = self._provider_type.upper()
logger.info(f"[{provider_name}] 响应结果:")
logger.info(f" - content_length: {len(content)} 字符")
if hasattr(message, "reasoning_content") and message.reasoning_content:
logger.info(f" - reasoning_length: {len(message.reasoning_content)} 字符")
logger.info(f" - content_preview: {content[:200]}..." if len(content) > 200 else f" - content: {content}")
if resp.usage:
logger.info(f" - usage: {response['usage']}")
return JSONResponse(content=response)
class DeepseekAdapter(OpenAIAdapter):
"""Deepseek 平台适配器(继承 OpenAI 适配器)"""
def __init__(self):
super().__init__(provider_type="deepseek")

120
server/adapters/registry.py Normal file
View File

@ -0,0 +1,120 @@
"""
适配器注册表
根据模型名称路由到对应的平台适配器
"""
import os
from typing import Dict, Optional, Type
from .base import BaseAdapter
# 模型前缀到平台名称的映射
MODEL_PREFIX_MAP = {
# 智谱 GLM
"glm-": "glm",
# 阿里云百炼Qwen 系列)
"qwen-": "dashscope",
# OpenAI
"gpt-": "openai",
"o1-": "openai",
"o3-": "openai",
# Deepseek
"deepseek-": "deepseek",
}
# 已注册的适配器实例
_adapters: Dict[str, BaseAdapter] = {}
# 已注册的适配器类
_adapter_classes: Dict[str, Type[BaseAdapter]] = {}
def register_adapter(name: str, adapter_class: Type[BaseAdapter]):
"""
注册适配器类
Args:
name: 平台名称 'glm', 'dashscope', 'openai'
adapter_class: 适配器类
"""
_adapter_classes[name] = adapter_class
def get_provider_from_model(model: str) -> str:
"""
根据模型名称判断所属平台
Args:
model: 模型 ID 'glm-4-flash', 'qwen-turbo', 'gpt-4'
Returns:
平台名称 'glm', 'dashscope', 'openai'
"""
model_lower = model.lower()
# 优先精确匹配
exact_matches = {
# GLM 精确模型名
"glm-4": "glm",
"glm-4v": "glm",
# Deepseek
"deepseek-chat": "deepseek",
"deepseek-reasoner": "deepseek",
}
if model_lower in exact_matches:
return exact_matches[model_lower]
# 前缀匹配
for prefix, provider in MODEL_PREFIX_MAP.items():
if model_lower.startswith(prefix):
return provider
# 默认使用环境变量或 GLM
return os.getenv("DEFAULT_PROVIDER", "glm")
def get_adapter(provider: str) -> Optional[BaseAdapter]:
"""
获取适配器实例懒加载
Args:
provider: 平台名称
Returns:
适配器实例如果平台未注册则返回 None
"""
if provider in _adapters:
return _adapters[provider]
# 懒加载:首次使用时实例化
if provider in _adapter_classes:
adapter_class = _adapter_classes[provider]
adapter = adapter_class()
_adapters[provider] = adapter
return adapter
return None
def get_all_adapters() -> Dict[str, BaseAdapter]:
"""
获取所有已注册的适配器实例
"""
result = {}
for name, adapter_class in _adapter_classes.items():
if name not in _adapters:
_adapters[name] = adapter_class()
result[name] = _adapters[name]
return result
def get_available_providers() -> list:
"""
获取所有可用的平台列表
"""
providers = []
for name, adapter_class in _adapter_classes.items():
adapter = get_adapter(name)
if adapter and adapter.is_available():
providers.append(name)
return providers

View File

@ -0,0 +1,119 @@
"""
OpenAI 兼容 API 网关
提供统一的 /v1/chat/completions /v1/models 端点
"""
from typing import Any, Dict
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import JSONResponse
from adapters import get_adapter, get_provider_from_model
from adapters.base import ChatCompletionRequest
from utils.logger import get_logger
logger = get_logger()
router = APIRouter(tags=["OpenAI Compatible API"])
@router.post("/chat/completions")
async def chat_completions(request: Request):
"""
OpenAI 兼容的聊天补全接口
根据请求中的 model 字段自动路由到对应的平台适配器
- glm-* 智谱 GLM
- qwen-* 阿里云百炼
- gpt-* / o1-* / o3-* OpenAI
- deepseek-* Deepseek
"""
try:
body = await request.json()
except Exception:
raise HTTPException(status_code=400, detail="Invalid JSON body")
# 创建请求对象
chat_request = ChatCompletionRequest.from_dict(body)
model = chat_request.model
# 根据模型名称确定平台
provider = get_provider_from_model(model)
logger.info(f"[Gateway] model={model} → provider={provider}")
# 获取对应平台的适配器
adapter = get_adapter(provider)
if adapter is None:
raise HTTPException(
status_code=400,
detail=f"Unsupported model: {model} (provider: {provider})",
)
# 检查适配器是否可用
if not adapter.is_available():
raise HTTPException(
status_code=503,
detail=f"Provider '{provider}' is not available (API key not configured)",
)
# 调用适配器处理请求
return await adapter.chat(chat_request)
@router.get("/models")
async def list_models():
"""
返回所有可用平台的模型列表
聚合所有已配置 API Key 的平台模型
"""
from adapters import get_all_adapters
all_models = []
for provider, adapter in get_all_adapters().items():
if adapter.is_available():
models = adapter.list_models()
all_models.extend([m.to_dict() for m in models])
return {
"object": "list",
"data": all_models,
}
@router.get("/models/{model_id}")
async def get_model(model_id: str):
"""
获取特定模型信息
"""
from adapters import get_all_adapters
for provider, adapter in get_all_adapters().items():
if adapter.is_available():
for model in adapter.list_models():
if model.id == model_id:
return {
"object": "model",
"id": model.id,
"owned_by": model.provider,
"data": model.to_dict(),
}
raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")
# 初始化时注册适配器
def init_adapters():
"""注册所有适配器"""
from adapters import register_adapter
from adapters.dashscope_adapter import DashScopeAdapter
from adapters.glm_adapter import GLMAdapter
from adapters.openai_adapter import DeepseekAdapter, OpenAIAdapter
register_adapter("glm", GLMAdapter)
register_adapter("dashscope", DashScopeAdapter)
register_adapter("openai", OpenAIAdapter)
register_adapter("deepseek", DeepseekAdapter)
logger.info("[Gateway] Adapters registered: glm, dashscope, openai, deepseek")

59
server/config.py Normal file
View File

@ -0,0 +1,59 @@
"""
统一配置管理
"""
import os
from dataclasses import dataclass
from typing import Dict, Optional
@dataclass
class ProviderConfig:
"""平台配置"""
api_key: Optional[str] = None
base_url: Optional[str] = None
enabled: bool = True
# 平台配置映射
PROVIDERS: Dict[str, ProviderConfig] = {
"glm": ProviderConfig(
api_key=os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY"),
enabled=True,
),
"dashscope": ProviderConfig(
api_key=os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY"),
enabled=True,
),
"openai": ProviderConfig(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL"),
enabled=True,
),
"deepseek": ProviderConfig(
api_key=os.getenv("DEEPSEEK_API_KEY"),
base_url=os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"),
enabled=True,
),
}
def get_provider_config(provider: str) -> Optional[ProviderConfig]:
"""获取平台配置"""
return PROVIDERS.get(provider)
def is_provider_available(provider: str) -> bool:
"""检查平台是否可用(已配置 API Key"""
config = PROVIDERS.get(provider)
return config is not None and bool(config.api_key) and config.enabled
def get_available_providers() -> list:
"""获取所有可用的平台列表"""
return [name for name, config in PROVIDERS.items() if is_provider_available(name)]
# 默认平台
DEFAULT_PROVIDER = os.getenv("DEFAULT_PROVIDER", "glm")

View File

@ -3,12 +3,18 @@ AI Chat API Server — 主入口(纯基础设施层)
职责 职责
- 注入运行时依赖venv site-packages - 注入运行时依赖venv site-packages
- 读取 LLM_BACKEND 环境变量动态加载对应平台模块 - 支持 OpenAI 兼容 API 网关/v1/*和多平台路由
- 注册 FastAPI 路由和中间件 - 保留向后兼容的 /api/chat-ui/* 路由
平台代码位置main.py 中不包含任何平台逻辑 平台适配器位置
- 百炼 DashScope api/chat_routes.py - adapters/glm_adapter.py 智谱 GLM
- 智谱 GLM-4.6V api/chat_routes_glm.py + utils/glm_adapter.py - adapters/dashscope_adapter.py 阿里云百炼
- adapters/openai_adapter.py OpenAI / Deepseek
API 端点
- POST /v1/chat/completions OpenAI 兼容网关根据 model 自动路由
- GET /v1/models 所有可用模型列表
- POST /api/chat-ui/chat 传统聊天接口保持兼容
""" """
import os import os
@ -62,12 +68,21 @@ from api.chat_routes import (delete_conversation_handler,
save_conversation_handler, serve_upload_handler, save_conversation_handler, serve_upload_handler,
stop_generation_handler, upload_file_handler) stop_generation_handler, upload_file_handler)
# ── OpenAI 兼容网关初始化 ───────────────────────────────────────────────
from api.openai_gateway import init_adapters, router as openai_router
init_adapters()
# ── FastAPI 应用 ────────────────────────────────────────────────────── # ── FastAPI 应用 ──────────────────────────────────────────────────────
app = FastAPI( app = FastAPI(
title=f"AI Chat APILLM_BACKEND={LLM_BACKEND}", title="AI Chat API Gateway",
version="3.0.0", version="4.0.0",
description="统一 OpenAI 兼容 API 网关,支持多平台模型",
) )
# 注册 OpenAI 兼容路由
app.include_router(openai_router, prefix="/v1")
@app.middleware("http") @app.middleware("http")
async def logging_middleware(request: Request, call_next): async def logging_middleware(request: Request, call_next):
@ -90,27 +105,72 @@ async def logging_middleware(request: Request, call_next):
@app.get("/health") @app.get("/health")
async def health_check(): async def health_check():
from config import get_available_providers
return { return {
"status": "healthy", "status": "healthy",
"backend": LLM_BACKEND, "version": "4.0.0",
"default_backend": LLM_BACKEND,
"available_providers": get_available_providers(),
"endpoints": {
"openai_compatible": "/v1/chat/completions",
"legacy": "/api/chat-ui/chat",
"models": "/v1/models",
},
"timestamp": datetime.now(timezone.utc).isoformat(), "timestamp": datetime.now(timezone.utc).isoformat(),
} }
@app.post("/api/chat-ui/chat") @app.post("/api/chat-ui/chat")
async def chat_endpoint(request: Request): async def chat_endpoint(request: Request):
"""聊天接口(自动路由到当前平台)""" """聊天接口(根据 model 自动路由到对应平台)"""
return await _platform.chat_handler(await request.json()) from adapters import get_adapter, get_provider_from_model
from adapters.base import ChatCompletionRequest
try:
body = await request.json()
except Exception:
return JSONResponse({"error": "Invalid JSON body"}, status_code=400)
# 创建请求对象
chat_request = ChatCompletionRequest.from_dict(body)
model = chat_request.model
# 根据模型名称确定平台
provider = get_provider_from_model(model)
logger.info(f"[Legacy API] model={model} → provider={provider}")
# 获取对应平台的适配器
adapter = get_adapter(provider)
if adapter is None:
return JSONResponse(
{"error": f"Unsupported model: {model} (provider: {provider})"},
status_code=400,
)
# 检查适配器是否可用
if not adapter.is_available():
return JSONResponse(
{"error": f"Provider '{provider}' is not available (API key not configured)"},
status_code=503,
)
# 调用适配器处理请求
return await adapter.chat(chat_request)
@app.get("/api/chat-ui/models") @app.get("/api/chat-ui/models")
async def get_models(): async def get_models():
"""模型列表(由当前平台返回)""" """模型列表(聚合所有可用平台的模型)"""
result = _platform.models_handler() from adapters import get_all_adapters
# 支持同步和异步两种返回
if hasattr(result, "__await__"): all_models = []
return await result for provider, adapter in get_all_adapters().items():
return result if adapter.is_available():
models = adapter.list_models()
all_models.extend([m.to_dict() for m in models])
return {"object": "list", "data": all_models}
# ── 通用路由(与平台无关)──────────────────────────────────────────── # ── 通用路由(与平台无关)────────────────────────────────────────────
@ -161,10 +221,25 @@ if __name__ == "__main__":
import uvicorn import uvicorn
port = int(os.getenv("PORT", 8000)) port = int(os.getenv("PORT", 8000))
print("=" * 55)
print(f" AI Chat Server v3.0 启动中...") # 获取可用平台
print(f" 后端平台 : {LLM_BACKEND.upper()} [LLM_BACKEND={LLM_BACKEND}]") from config import get_available_providers
print(f" 监听端口 : {port}")
print(f" 切换平台 : 修改 .env 中 LLM_BACKEND=glm|dashscope重启") available = get_available_providers()
print("=" * 55)
print("=" * 60)
print(" AI Chat API Gateway v4.0")
print("=" * 60)
print(f" OpenAI 兼容端点: http://localhost:{port}/v1/chat/completions")
print(f" 模型列表 : http://localhost:{port}/v1/models")
print("-" * 60)
print(f" 可用平台 : {', '.join(available) or '无(请配置 API Key'}")
print(f" 默认平台 : {LLM_BACKEND} (向后兼容模式)")
print("-" * 60)
print(" 使用方法:")
print(" curl -X POST http://localhost:8000/v1/chat/completions \\")
print(' -H "Content-Type: application/json" \\')
print(' -d \'{"model":"glm-4-flash","messages":[{"role":"user","content":"hi"}]}\'')
print("=" * 60)
uvicorn.run(app, host="0.0.0.0", port=port) uvicorn.run(app, host="0.0.0.0", port=port)

View File

@ -223,14 +223,40 @@ async function handleSend(
); );
let fullText = ""; let fullText = "";
let reasoningText = "";
let isInReasoning = false;
isTyping.value = false; isTyping.value = false;
for await (const chunk of stream) { for await (const chunk of stream) {
if (abortController.value?.signal.aborted) break; if (abortController.value?.signal.aborted) break;
fullText += chunk;
if (chunk.type === "reasoning") {
//
if (!isInReasoning) {
//
reasoningText = "";
isInReasoning = true;
fullText += "<think>\n";
}
reasoningText += chunk.text;
fullText += chunk.text;
} else {
//
if (isInReasoning) {
//
isInReasoning = false;
fullText += "\n</think>\n";
}
fullText += chunk.text;
}
chatStore.updateMessageContent(aiMessage.id, fullText); chatStore.updateMessageContent(aiMessage.id, fullText);
} }
//
if (isInReasoning) {
fullText += "\n</think>";
}
if (!abortController.value?.signal.aborted) { if (!abortController.value?.signal.aborted) {
chatStore.updateMessage(aiMessage.id, { chatStore.updateMessage(aiMessage.id, {
isStreaming: false, isStreaming: false,
@ -320,13 +346,39 @@ async function handleRetry(messageId: string) {
); );
let fullText = ""; let fullText = "";
let reasoningText = "";
let isInReasoning = false;
for await (const chunk of stream) { for await (const chunk of stream) {
if (abortController.value?.signal.aborted) break; if (abortController.value?.signal.aborted) break;
fullText += chunk;
if (chunk.type === "reasoning") {
//
if (!isInReasoning) {
//
reasoningText = "";
isInReasoning = true;
fullText += "<think>\n";
}
reasoningText += chunk.text;
fullText += chunk.text;
} else {
//
if (isInReasoning) {
//
isInReasoning = false;
fullText += "\n</think>\n";
}
fullText += chunk.text;
}
chatStore.updateMessageContent(messageId, fullText); chatStore.updateMessageContent(messageId, fullText);
} }
//
if (isInReasoning) {
fullText += "\n</think>";
}
if (!abortController.value?.signal.aborted) { if (!abortController.value?.signal.aborted) {
chatStore.updateMessage(messageId, { chatStore.updateMessage(messageId, {
isStreaming: false, isStreaming: false,

View File

@ -76,6 +76,12 @@ export interface UploadResult {
mimeType?: string; mimeType?: string;
} }
// 流式响应块类型
export interface StreamChunk {
type: "content" | "reasoning";
text: string;
}
// API 调用类 // API 调用类
class ChatApi { class ChatApi {
private baseUrl: string; private baseUrl: string;
@ -90,7 +96,7 @@ class ChatApi {
async *streamChat( async *streamChat(
request: ChatRequest, request: ChatRequest,
signal?: AbortSignal, signal?: AbortSignal,
): AsyncGenerator<string> { ): AsyncGenerator<StreamChunk> {
// 构建消息数组,考虑是否包含图片 // 构建消息数组,考虑是否包含图片
let userContent; let userContent;
if (request.images && request.images.length > 0) { if (request.images && request.images.length > 0) {
@ -185,9 +191,18 @@ class ChatApi {
break; break;
} }
const content = data.choices?.[0]?.delta?.content; const delta = data.choices?.[0]?.delta;
// 处理深度思考内容reasoning_content
const reasoningContent = delta?.reasoning_content;
if (reasoningContent) {
yield { type: "reasoning", text: reasoningContent };
}
// 处理普通内容
const content = delta?.content;
if (content) { if (content) {
yield content; yield { type: "content", text: content };
} }
} catch (e) { } catch (e) {
console.warn("JSON解析错误", e, line); console.warn("JSON解析错误", e, line);
@ -271,6 +286,20 @@ class ChatApi {
// maxTokens: 8192, // maxTokens: 8192,
// provider: "Zhipu", // provider: "Zhipu",
// }, // },
{
id: "deepseek-chat",
name: "DeepSeek Chat",
description: "DeepSeek 对话模型",
maxTokens: 8192,
provider: "DeepSeek",
},
{
id: "deepseek-reasoner",
name: "DeepSeek Reasoner",
description: "DeepSeek 深度思考模型",
maxTokens: 8192,
provider: "DeepSeek",
},
]; ];
} }

View File

@ -61,6 +61,20 @@ export const useSettingsStore = defineStore("settings", () => {
maxTokens: 8192, maxTokens: 8192,
provider: "Zhipu", provider: "Zhipu",
}, },
{
id: "deepseek-chat",
name: "DeepSeek Chat",
description: "DeepSeek 对话模型",
maxTokens: 8192,
provider: "DeepSeek",
},
{
id: "deepseek-reasoner",
name: "DeepSeek Reasoner",
description: "DeepSeek 深度思考模型",
maxTokens: 8192,
provider: "DeepSeek",
},
]; ];
// 状态 // 状态