From 5e81c903cfdbe8a7db80eec241313efa080c280e Mon Sep 17 00:00:00 2001 From: SuperManTouX <93423476+SuperManTouX@users.noreply.github.com> Date: Fri, 6 Mar 2026 15:43:05 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0deepseek=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/CHANGELOG-2026-03-06.md | 199 +++++++++++ server/adapters/__init__.py | 16 + server/adapters/base.py | 126 +++++++ server/adapters/dashscope_adapter.py | 434 ++++++++++++++++++++++++ server/adapters/glm_adapter.py | 482 +++++++++++++++++++++++++++ server/adapters/openai_adapter.py | 304 +++++++++++++++++ server/adapters/registry.py | 120 +++++++ server/api/openai_gateway.py | 119 +++++++ server/config.py | 59 ++++ server/main.py | 119 +++++-- src/components/chat/ChatMain.vue | 58 +++- src/services/api.ts | 35 +- src/stores/settings.ts | 14 + 13 files changed, 2057 insertions(+), 28 deletions(-) create mode 100644 docs/CHANGELOG-2026-03-06.md create mode 100644 server/adapters/__init__.py create mode 100644 server/adapters/base.py create mode 100644 server/adapters/dashscope_adapter.py create mode 100644 server/adapters/glm_adapter.py create mode 100644 server/adapters/openai_adapter.py create mode 100644 server/adapters/registry.py create mode 100644 server/api/openai_gateway.py create mode 100644 server/config.py diff --git a/docs/CHANGELOG-2026-03-06.md b/docs/CHANGELOG-2026-03-06.md new file mode 100644 index 0000000..b8b1c83 --- /dev/null +++ b/docs/CHANGELOG-2026-03-06.md @@ -0,0 +1,199 @@ +# 开发日志 - 2026-03-06 + +## 修复:深度思考内容不显示 + +### 问题描述 +启用深度思考后,后端日志显示已启用深度思考功能,但前端没有显示深度思考内容。 + +### 根本原因 +前端 `src/services/api.ts` 中的 `streamChat` 方法只处理了普通 `content`,完全忽略了后端返回的 `reasoning_content`(深度思考内容)。 + +### 解决方案 + +#### 1. 前端 `src/services/api.ts` + +添加 `StreamChunk` 接口,区分内容类型: + +```typescript +// 流式响应块类型 +export interface StreamChunk { + type: "content" | "reasoning"; + text: string; +} +``` + +修改 `streamChat` 方法同时处理两种内容: + +```typescript +const delta = data.choices?.[0]?.delta; + +// 处理深度思考内容(reasoning_content) +const reasoningContent = delta?.reasoning_content; +if (reasoningContent) { + yield { type: "reasoning", text: reasoningContent }; +} + +// 处理普通内容 +const content = delta?.content; +if (content) { + yield { type: "content", text: content }; +} +``` + +#### 2. 前端 `src/components/chat/ChatMain.vue` + +修改流式处理逻辑,将 `reasoning` 类型内容包装成 `` 标签: + +```typescript +let isInReasoning = false; + +for await (const chunk of stream) { + if (chunk.type === "reasoning") { + if (!isInReasoning) { + isInReasoning = true; + fullText += "\n"; + } + fullText += chunk.text; + } else { + if (isInReasoning) { + isInReasoning = false; + fullText += "\n\n"; + } + fullText += chunk.text; + } +} + +// 如果最后还在深度思考块中,关闭它 +if (isInReasoning) { + fullText += "\n"; +} +``` + +`` 标签会被 `markstream-vue` 库识别,并由 `ThinkingNode` 组件渲染。 + +--- + +## 功能:图片/文件附件自动切换模型 + +### 需求 +当用户上传图片或文件(PDF、DOCX等)时,无论前端选择了什么模型,后端都应强制使用 `glm-4.6v` 模型(支持多模态)。 + +### 解决方案 + +#### 后端 `server/adapters/glm_adapter.py` + +**修改 `_build_messages` 方法**: + +返回值从 `(messages, has_vision)` 改为 `(messages, has_vision, has_files)`: + +```python +def _build_messages( + self, request: ChatCompletionRequest +) -> tuple[List[Dict], bool, bool]: + """ + 构建 GLM 格式的消息 + 返回:(消息列表, 是否包含图片, 是否包含文件附件) + """ + messages = [] + has_vision = False + has_files = bool(request.files) # 检查是否有文件附件 + # ... 处理消息 ... + return messages, has_vision, has_files +``` + +**修改 `_resolve_model` 方法**: + +```python +def _resolve_model(self, model: str, has_vision: bool, has_files: bool = False) -> str: + """解析实际使用的模型""" + model_lower = model.lower() + # 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态) + if (has_vision or has_files) and model_lower not in VISION_MODELS: + logger.info( + f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v" + ) + return "glm-4.6v" + return model +``` + +**修改 `chat` 方法调用**: + +```python +glm_messages, has_vision, has_files = self._build_messages(request) +actual_model = self._resolve_model(request.model, has_vision, has_files) +``` + +--- + +## 功能:DeepSeek 深度思考支持 + +### 需求 +为 DeepSeek 的 `deepseek-reasoner` 模型添加深度思考支持,通过 `extra_body` 参数启用。 + +### 解决方案 + +#### 后端 `server/adapters/openai_adapter.py` + +**添加支持深度思考的模型列表**: + +```python +# DeepSeek 支持深度思考的模型 +DEEPSEEK_THINKING_MODELS = {"deepseek-reasoner"} +``` + +**修改 `chat` 方法**: + +```python +# DeepSeek 深度思考支持 +extra_body = None +if self._provider_type == "deepseek" and request.deep_thinking: + if self._supports_thinking(request.model): + extra_body = {"thinking": {"type": "enabled"}} + kwargs["extra_body"] = extra_body + logger.info(f"[{provider_name}] 深度思考已启用: extra_body = {extra_body}") +``` + +**添加 `_supports_thinking` 方法**: + +```python +def _supports_thinking(self, model: str) -> bool: + """检查模型是否支持深度思考""" + return model.lower() in DEEPSEEK_THINKING_MODELS +``` + +**修改 `_stream_chat` 和 `_sync_chat` 方法**: + +- 添加 `extra_body` 参数 +- 增强 `reasoning_content` 日志输出 + +--- + +## 涉及文件 + +| 文件 | 修改类型 | +|------|----------| +| `src/services/api.ts` | 新增 `StreamChunk` 接口,修改 `streamChat` 方法 | +| `src/components/chat/ChatMain.vue` | 修改流式处理逻辑,支持 `reasoning` 类型 | +| `server/adapters/glm_adapter.py` | 修改 `_build_messages` 和 `_resolve_model` 方法 | +| `server/adapters/openai_adapter.py` | 添加 DeepSeek 深度思考支持 | + +--- + +## 测试建议 + +1. **GLM 深度思考测试**: + - 选择支持深度思考的模型(如 glm-4.6v) + - 开启深度思考开关 + - 发送问题,确认前端显示深度思考内容块 + +2. **DeepSeek 深度思考测试**: + - 选择 `deepseek-reasoner` 模型 + - 开启深度思考开关 + - 发送问题,确认后端日志显示 `extra_body = {'thinking': {'type': 'enabled'}}` + - 确认前端显示深度思考内容块 + +3. **模型自动切换测试**: + - 选择非多模态模型(如 glm-4-flash) + - 上传图片或 PDF 文件 + - 确认后端日志显示模型切换为 glm-4.6v + - 确认多模态内容正确处理 \ No newline at end of file diff --git a/server/adapters/__init__.py b/server/adapters/__init__.py new file mode 100644 index 0000000..295fb14 --- /dev/null +++ b/server/adapters/__init__.py @@ -0,0 +1,16 @@ +""" +LLM 平台适配器模块 +""" + +from .base import BaseAdapter, ChatCompletionRequest, ModelInfo +from .registry import get_adapter, get_all_adapters, get_provider_from_model, register_adapter + +__all__ = [ + "BaseAdapter", + "ChatCompletionRequest", + "ModelInfo", + "get_adapter", + "get_all_adapters", + "get_provider_from_model", + "register_adapter", +] \ No newline at end of file diff --git a/server/adapters/base.py b/server/adapters/base.py new file mode 100644 index 0000000..e18da2f --- /dev/null +++ b/server/adapters/base.py @@ -0,0 +1,126 @@ +""" +适配器基类定义 +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Union + + +@dataclass +class ModelInfo: + """模型信息""" + + id: str + name: str + description: str + max_tokens: int = 4096 + provider: str = "unknown" + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "name": self.name, + "description": self.description, + "maxTokens": self.max_tokens, + "provider": self.provider, + } + + +@dataclass +class ChatCompletionRequest: + """OpenAI 格式的聊天请求""" + + model: str + messages: List[Dict[str, Any]] + stream: bool = True + temperature: float = 0.7 + max_tokens: int = 2000 + files: Optional[List[str]] = None + deep_search: bool = False + web_search: bool = False + deep_thinking: bool = False + # 原始请求体(保留额外字段) + extra: Dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ChatCompletionRequest": + """从字典创建请求对象""" + # 提取已知字段 + known_fields = { + "model", + "messages", + "stream", + "temperature", + "max_tokens", + "files", + "deepSearch", + "webSearch", + "deepThinking", + } + extra = {k: v for k, v in data.items() if k not in known_fields} + + return cls( + model=data.get("model", "glm-4-flash"), + messages=data.get("messages", []), + stream=data.get("stream", True), + temperature=data.get("temperature", 0.7), + max_tokens=data.get("max_tokens", data.get("maxTokens", 2000)), + files=data.get("files"), + deep_search=data.get("deepSearch", False), + web_search=data.get("webSearch", False), + deep_thinking=data.get("deepThinking", False), + extra=extra, + ) + + +class BaseAdapter(ABC): + """ + LLM 平台适配器基类 + 所有平台适配器需继承此类并实现抽象方法 + """ + + @property + @abstractmethod + def provider_name(self) -> str: + """返回平台名称(如 'glm', 'dashscope', 'openai')""" + pass + + @abstractmethod + async def chat(self, request: ChatCompletionRequest): + """ + 处理聊天请求 + + Args: + request: OpenAI 格式的聊天请求 + + Returns: + 流式响应返回 StreamingResponse + 非流式返回 JSONResponse 或 dict + """ + pass + + @abstractmethod + def list_models(self) -> List[ModelInfo]: + """ + 返回该平台支持的模型列表 + + Returns: + ModelInfo 对象列表 + """ + pass + + def is_available(self) -> bool: + """ + 检查该适配器是否可用(API Key 是否配置) + 默认实现:检查环境变量中的 API Key + """ + return True + + def get_models_response(self) -> Dict[str, Any]: + """返回 OpenAI 格式的模型列表响应""" + models = self.list_models() + return { + "object": "list", + "data": [m.to_dict() for m in models], + } \ No newline at end of file diff --git a/server/adapters/dashscope_adapter.py b/server/adapters/dashscope_adapter.py new file mode 100644 index 0000000..5df24ce --- /dev/null +++ b/server/adapters/dashscope_adapter.py @@ -0,0 +1,434 @@ +""" +阿里云百炼 DashScope 适配器 +基于 api/chat_routes.py 重构 +""" + +import json +import os +from typing import Dict, List + +from fastapi.responses import JSONResponse, StreamingResponse + +from .base import BaseAdapter, ChatCompletionRequest, ModelInfo +from utils.logger import get_logger + +logger = get_logger() + +# 百炼模型配置 +DASHSCOPE_MODELS = [ + ModelInfo( + id="qwen-max", + name="通义千问 Max", + description="最强大的模型", + max_tokens=8192, + provider="Aliyun", + ), + ModelInfo( + id="qwen-plus", + name="通义千问 Plus", + description="能力均衡", + max_tokens=8192, + provider="Aliyun", + ), + ModelInfo( + id="qwen-turbo", + name="通义千问 Turbo", + description="速度更快、成本更低", + max_tokens=8192, + provider="Aliyun", + ), + ModelInfo( + id="qwen-vl-max", + name="通义万相 VL-Max", + description="支持视觉理解的多模态模型", + max_tokens=8192, + provider="Aliyun", + ), + ModelInfo( + id="qwen-vl-plus", + name="通义万相 VL-Plus", + description="支持视觉理解的多模态模型", + max_tokens=8192, + provider="Aliyun", + ), +] + + +class DashScopeAdapter(BaseAdapter): + """阿里云百炼 DashScope 平台适配器""" + + @property + def provider_name(self) -> str: + return "dashscope" + + def is_available(self) -> bool: + """检查 API Key 是否配置""" + return bool(os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY")) + + def _get_api_key(self) -> str: + """获取 API Key""" + return os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY", "") + + def list_models(self) -> List[ModelInfo]: + return DASHSCOPE_MODELS + + async def chat(self, request: ChatCompletionRequest): + """ + 处理 DashScope 聊天请求 + 支持流式/非流式、多模态 + """ + # 打印请求参数 + logger.info(f"[DashScope] 请求参数:") + logger.info(f" - model: {request.model}") + logger.info(f" - stream: {request.stream}") + logger.info(f" - temperature: {request.temperature}") + logger.info(f" - max_tokens: {request.max_tokens}") + logger.info(f" - files: {request.files}") + logger.info(f" - messages: {json.dumps(request.messages, ensure_ascii=False, indent=2)}") + + # 检测是否包含多模态内容 + has_multimodal = self._has_multimodal_content(request) + logger.info(f" - has_multimodal: {has_multimodal}") + + if has_multimodal: + return await self._multimodal_chat(request) + else: + return await self._text_chat(request) + + def _has_multimodal_content(self, request: ChatCompletionRequest) -> bool: + """检查是否包含多模态内容""" + for msg in request.messages: + content = msg.get("content", "") + if isinstance(content, list): + for item in content: + if isinstance(item, dict) and item.get("type") == "image_url": + return True + return bool(request.files) + + async def _text_chat(self, request: ChatCompletionRequest): + """纯文本聊天""" + import dashscope + from dashscope import Generation + + dashscope.api_key = self._get_api_key() + + # 转换消息格式 + messages = self._build_text_messages(request) + + if request.stream: + return self._stream_text_chat(messages, request) + else: + return self._sync_text_chat(messages, request) + + def _build_text_messages(self, request: ChatCompletionRequest) -> List[Dict]: + """构建文本消息""" + messages = [] + for msg in request.messages: + role = msg.get("role", "user") + content = msg.get("content", "") + if isinstance(content, str) and content.strip(): + messages.append({"role": role, "content": content}) + elif isinstance(content, list): + text = "" + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + text += item.get("text", "") + if text.strip(): + messages.append({"role": role, "content": text}) + return messages + + def _stream_text_chat(self, messages: List[Dict], request: ChatCompletionRequest): + """流式文本聊天""" + logger.info(f"[DashScope] 开始流式文本响应...") + + def generator(): + from utils.helpers import generate_unique_id, get_current_timestamp + + from dashscope import Generation + + full_content = "" + chunk_count = 0 + responses = Generation.call( + model=request.model, + messages=messages, + stream=True, + temperature=request.temperature, + max_tokens=request.max_tokens, + result_format="message", + ) + + for resp in responses: + if resp.status_code == 200: + chunk_count += 1 + content = resp.output.choices[0].message.content + if content: + full_content += content + data = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": request.model, + "choices": [ + { + "index": 0, + "delta": {"content": content}, + "finish_reason": None, + } + ], + } + yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" + + finish = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": request.model, + "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], + } + yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n" + yield "data: [DONE]\n\n" + + # 打印流式响应结果 + logger.info(f"[DashScope] 流式文本响应完成:") + logger.info(f" - chunks: {chunk_count}") + logger.info(f" - content_length: {len(full_content)} 字符") + logger.info(f" - content_preview: {full_content[:200]}..." if len(full_content) > 200 else f" - content: {full_content}") + + return StreamingResponse(generator(), media_type="text/event-stream") + + def _sync_text_chat(self, messages: List[Dict], request: ChatCompletionRequest): + """非流式文本聊天""" + from utils.helpers import generate_unique_id, get_current_timestamp + + from dashscope import Generation + + resp = Generation.call( + model=request.model, + messages=messages, + stream=False, + temperature=request.temperature, + max_tokens=request.max_tokens, + result_format="message", + ) + + if resp.status_code == 200: + content = resp.output.choices[0].message.content + response = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion", + "created": get_current_timestamp(), + "model": request.model, + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": content}, + "finish_reason": "stop", + } + ], + } + + if hasattr(resp, "usage") and resp.usage: + response["usage"] = { + "prompt_tokens": resp.usage.input_tokens, + "completion_tokens": resp.usage.output_tokens, + "total_tokens": resp.usage.total_tokens, + } + + # 打印响应结果 + logger.info(f"[DashScope] 响应结果:") + logger.info(f" - content_length: {len(content)} 字符") + logger.info(f" - content_preview: {content[:200]}..." if len(content) > 200 else f" - content: {content}") + if hasattr(resp, "usage") and resp.usage: + logger.info(f" - usage: {response['usage']}") + + return JSONResponse(content=response) + + logger.error(f"[DashScope] 请求失败: {resp.code} - {resp.message}") + return JSONResponse( + status_code=500, + content={"error": f"DashScope Error: {resp.code} - {resp.message}"}, + ) + + async def _multimodal_chat(self, request: ChatCompletionRequest): + """多模态聊天""" + import dashscope + from dashscope import MultiModalConversation + + dashscope.api_key = self._get_api_key() + + # 转换消息格式 + messages = self._build_multimodal_messages(request) + + # 选择多模态模型 + model = request.model + if "qwen-" in model and "vl" not in model: + model = model.replace("qwen-", "qwen-vl-") + + if request.stream: + return self._stream_multimodal_chat(messages, model, request) + else: + return self._sync_multimodal_chat(messages, model, request) + + def _build_multimodal_messages(self, request: ChatCompletionRequest) -> List[Dict]: + """构建多模态消息""" + messages = [] + + for msg in request.messages: + role = msg.get("role", "user") + content = msg.get("content", "") + + if isinstance(content, str): + if content.strip(): + messages.append({"role": role, "content": [{"text": content}]}) + elif isinstance(content, list): + ds_content = [] + for item in content: + if isinstance(item, dict): + if item.get("type") == "text": + ds_content.append({"text": item.get("text", "")}) + elif item.get("type") == "image_url": + img_url = self._extract_image_url(item) + if img_url: + ds_content.append({"image": img_url}) + + if ds_content: + messages.append({"role": role, "content": ds_content}) + + return messages + + def _extract_image_url(self, item: Dict) -> str: + """提取并转换图片 URL""" + img_val = item.get("image_url", "") + if isinstance(img_val, str): + img_url = img_val + elif isinstance(img_val, dict): + img_url = img_val.get("url", "") + else: + img_url = "" + + # 转换 http URL 为 file:// 格式(如果是本地文件) + if img_url.startswith(("http://", "https://")): + from urllib.parse import urlparse + + parsed = urlparse(img_url) + if "localhost" in parsed.netloc or "127.0.0.1" in parsed.netloc: + path_parts = parsed.path.split("/") + try: + uploads_idx = path_parts.index("uploads") + img_url = f"file://{'/'.join(path_parts[uploads_idx:])}" + except ValueError: + pass + elif not img_url.startswith("file://"): + img_url = f"file://{img_url}" + + return img_url + + def _stream_multimodal_chat( + self, messages: List[Dict], model: str, request: ChatCompletionRequest + ): + """流式多模态聊天""" + + def generator(): + from utils.helpers import generate_unique_id, get_current_timestamp + + from dashscope import MultiModalConversation + + responses = MultiModalConversation.call( + model=model, + messages=messages, + stream=True, + max_tokens=request.max_tokens, + temperature=request.temperature, + ) + + full_content = "" + for resp in responses: + if resp.status_code == 200: + try: + content_items = resp.output.choices[0]["message"]["content"] + text = "" + for item in content_items: + if isinstance(item, dict) and "text" in item: + text += item["text"] + + if len(text) > len(full_content): + delta = text[len(full_content) :] + full_content = text + + data = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": model, + "choices": [ + { + "index": 0, + "delta": {"content": delta}, + "finish_reason": None, + } + ], + } + yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" + except (KeyError, IndexError, TypeError): + pass + + finish = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": model, + "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], + } + yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(generator(), media_type="text/event-stream") + + def _sync_multimodal_chat( + self, messages: List[Dict], model: str, request: ChatCompletionRequest + ): + """非流式多模态聊天""" + from utils.helpers import generate_unique_id, get_current_timestamp + + from dashscope import MultiModalConversation + + resp = MultiModalConversation.call( + model=model, + messages=messages, + stream=False, + max_tokens=request.max_tokens, + temperature=request.temperature, + ) + + if resp.status_code == 200: + try: + content_items = resp.output.choices[0]["message"]["content"] + text = "" + for item in content_items: + if isinstance(item, dict) and "text" in item: + text += item["text"] + + response = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion", + "created": get_current_timestamp(), + "model": model, + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": text}, + "finish_reason": "stop", + } + ], + } + return JSONResponse(content=response) + except (KeyError, IndexError, TypeError) as e: + return JSONResponse( + status_code=500, + content={"error": f"Parse error: {str(e)}"}, + ) + + return JSONResponse( + status_code=500, + content={"error": f"DashScope Error: {resp.code} - {resp.message}"}, + ) \ No newline at end of file diff --git a/server/adapters/glm_adapter.py b/server/adapters/glm_adapter.py new file mode 100644 index 0000000..77711b5 --- /dev/null +++ b/server/adapters/glm_adapter.py @@ -0,0 +1,482 @@ +""" +智谱 GLM 适配器 +基于 utils/glm_adapter.py 重构 +使用zai-sdk。因为已经完成这一部分的整套逻辑,如果更换OpenAI-SDK会花很多时间调试。 +""" + +import json +import os +from typing import Dict, List, Optional + +from fastapi.responses import JSONResponse, StreamingResponse + +from .base import BaseAdapter, ChatCompletionRequest, ModelInfo +from utils.logger import get_logger + +logger = get_logger() + +# GLM 模型配置 +GLM_MODELS = [ + ModelInfo( + id="glm-4.6v", + name="GLM-4.6V(推荐)", + description="最新旗舰模型,支持文本/图像/文档/深度思考", + max_tokens=128000, + provider="ZhipuAI", + ), + ModelInfo( + id="glm-4-flash", + name="GLM-4 Flash", + description="高性价比文本模型", + max_tokens=128000, + provider="ZhipuAI", + ), + ModelInfo( + id="glm-4v-plus-0111", + name="GLM-4V Plus", + description="图像 + PDF/DOCX 原生多模态", + max_tokens=128000, + provider="ZhipuAI", + ), + ModelInfo( + id="glm-z1-flash", + name="GLM-Z1 Flash", + description="深度思考推理模型", + max_tokens=128000, + provider="ZhipuAI", + ), +] + +# 视觉模型列表(用于自动切换) +VISION_MODELS = {"glm-4v", "glm-4v-plus", "glm-4v-plus-0111", "glm-4.6v"} + +# 支持深度思考的模型 +THINKING_MODELS = {"glm-z1-flash", "glm-z1-air", "glm-4.6v", "glm-4.6"} + + +class GLMAdapter(BaseAdapter): + """智谱 GLM 平台适配器""" + + _client = None + + @property + def provider_name(self) -> str: + return "glm" + + def is_available(self) -> bool: + """检查 API Key 是否配置""" + return bool(os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")) + + def _get_client(self): + """获取 GLM 客户端(懒加载)""" + if self._client is None: + from zhipuai import ZhipuAI + + api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY") + self._client = ZhipuAI(api_key=api_key) + return self._client + + def list_models(self) -> List[ModelInfo]: + return GLM_MODELS + + async def chat(self, request: ChatCompletionRequest): + """ + 处理 GLM 聊天请求 + 支持流式/非流式、图像、文档、联网搜索、深度思考 + """ + client = self._get_client() + + # 构建消息 + glm_messages, has_vision, has_files = self._build_messages(request) + actual_model = self._resolve_model(request.model, has_vision, has_files) + + # 调试:打印原始请求参数 + logger.info(f"[GLM] 原始请求参数:") + logger.info( + f" - request.deep_thinking: {request.deep_thinking} (type: {type(request.deep_thinking)})" + ) + logger.info(f" - request.web_search: {request.web_search}") + logger.info(f" - request.deep_search: {request.deep_search}") + logger.info(f" - actual_model: {actual_model}") + logger.info(f" - supports_thinking: {self._supports_thinking(actual_model)}") + + # 构建额外参数 + extra_kwargs = {} + web_search = self._get_web_search_mode(request) + + if web_search: + extra_kwargs["tools"] = [self._build_web_search_tool(web_search)] + extra_kwargs["tool_choice"] = "auto" + + # 深度思考:正向选择(True 时启用,False 时禁用) + # 注意:只有特定模型支持深度思考(如 glm-z1-flash) + thinking_enabled = request.deep_thinking and self._supports_thinking( + actual_model + ) + logger.info( + f"[GLM] 深度思考判断: {request.deep_thinking} and {self._supports_thinking(actual_model)} = {thinking_enabled}" + ) + + if thinking_enabled: + extra_kwargs["thinking"] = {"type": "enabled"} + logger.info( + f"[GLM] 深度思考已启用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}" + ) + + if extra_kwargs: + logger.info( + f"[GLM] 最终 extra_kwargs: {json.dumps(extra_kwargs, ensure_ascii=False)}" + ) + + if request.stream: + return self._stream_chat( + client, glm_messages, actual_model, request, extra_kwargs + ) + else: + return self._sync_chat( + client, glm_messages, actual_model, request, extra_kwargs + ) + + def _build_messages( + self, request: ChatCompletionRequest + ) -> tuple[List[Dict], bool, bool]: + """ + 构建 GLM 格式的消息 + 返回:(消息列表, 是否包含图片, 是否包含文件附件) + """ + messages = [] + has_vision = False + has_files = bool(request.files) # 检查是否有文件附件 + + for msg in request.messages: + role = msg.get("role", "user") + content = msg.get("content", "") + + if isinstance(content, str): + # 纯文本 + if content.strip(): + messages.append({"role": role, "content": content}) + elif isinstance(content, list): + # 多模态内容 + glm_content = [] + for item in content: + if isinstance(item, dict): + item_type = item.get("type", "") + if item_type == "text": + text = item.get("text", "") + if text: + glm_content.append({"type": "text", "text": text}) + elif item_type == "image_url": + img_url = self._extract_image_url(item) + if img_url: + glm_content.append( + {"type": "image_url", "image_url": {"url": img_url}} + ) + has_vision = True + + if glm_content: + messages.append({"role": role, "content": glm_content}) + + # 处理文件附件 + if request.files: + file_content = self._build_file_content(request.files) + if messages and messages[-1]["role"] == "user": + # 追加到最后一个用户消息 + if isinstance(messages[-1]["content"], list): + messages[-1]["content"].extend(file_content) + else: + messages[-1]["content"] = [ + {"type": "text", "text": messages[-1]["content"]}, + *file_content, + ] + else: + messages.append({"role": "user", "content": file_content}) + + return messages, has_vision, has_files + + def _extract_image_url(self, item: Dict) -> Optional[str]: + """提取图片 URL""" + img_val = item.get("image_url", "") + if isinstance(img_val, str): + return img_val + elif isinstance(img_val, dict): + return img_val.get("url", "") + return None + + def _build_file_content(self, files: List[str]) -> List[Dict]: + """构建文件内容""" + content = [] + for file_url in files: + if file_url.startswith(("http://", "https://")): + content.append({"type": "file_url", "file_url": {"url": file_url}}) + return content + + def _resolve_model( + self, model: str, has_vision: bool, has_files: bool = False + ) -> str: + """解析实际使用的模型""" + model_lower = model.lower() + # 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态) + if (has_vision or has_files) and model_lower not in VISION_MODELS: + logger.info( + f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v" + ) + return "glm-4.6v" + return model + + def _supports_thinking(self, model: str) -> bool: + """检查模型是否支持深度思考""" + return model.lower() in THINKING_MODELS + + def _get_web_search_mode(self, request: ChatCompletionRequest) -> str: + """获取联网搜索模式""" + if request.deep_search: + return "deep" + elif request.web_search: + return "simple" + return "" + + def _build_web_search_tool(self, mode: str) -> Dict: + """构建联网搜索工具""" + from datetime import datetime + + today = datetime.now().strftime("%Y年%m月%d日") + + if mode == "deep": + # 深度搜索:返回搜索结果详情 + return { + "type": "web_search", + "web_search": { + "enable": True, + "search_engine": "search_pro", + "search_result": True, + "search_prompt": f"你是一位智能助手。请用简洁的语言总结网络搜索{{search_result}}中的关键信息,按重要性排序并引用来源日期。今天的日期是{today}。", + "count": 5, + "search_recency_filter": "noLimit", + "content_size": "high", + }, + } + else: + # 简单搜索 + return { + "type": "web_search", + "web_search": { + "enable": True, + "search_engine": "search_pro", + "search_result": True, + "count": 5, + }, + } + + def _stream_chat( + self, client, messages, model, request, extra_kwargs + ) -> StreamingResponse: + """流式聊天""" + logger.info(f"[GLM] 开始流式响应...") + + # 提取深度思考配置 + thinking_config = extra_kwargs.get("thinking") + tools_config = extra_kwargs.get("tools") + + def generator(): + from utils.helpers import generate_unique_id, get_current_timestamp + + full_content = "" + + # 构建 API 调用参数 + api_params = { + "model": model, + "messages": messages, + "stream": True, + "temperature": request.temperature, + "max_tokens": request.max_tokens, + } + + # 深度思考:使用 extra_body 传递 + if thinking_config: + api_params["extra_body"] = {"thinking": thinking_config} + + # 联网搜索:使用 tools 参数 + if tools_config: + api_params["tools"] = tools_config + api_params["tool_choice"] = "auto" + + # 打印请求参数 + logger.info(f"[GLM] API 调用参数:") + logger.info(f" - model: {model}") + logger.info(f" - stream: True") + logger.info(f" - temperature: {request.temperature}") + logger.info(f" - max_tokens: {request.max_tokens}") + if thinking_config: + logger.info(f" - extra_body: {{'thinking': {thinking_config}}}") + if tools_config: + logger.info( + f" - tools: {json.dumps(tools_config, ensure_ascii=False)}" + ) + logger.info(f" - tool_choice: auto") + logger.info( + f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}" + ) + + chunk_count = 0 + resp = client.chat.completions.create(**api_params) + + for chunk in resp: + chunk_count += 1 + + # 检查 delta 是否存在 + if not hasattr(chunk.choices[0], "delta"): + continue + + delta = chunk.choices[0].delta + + # 处理深度思考内容(reasoning_content) + reasoning_content = getattr(delta, "reasoning_content", None) + if reasoning_content: + data = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": model, + "choices": [ + { + "index": 0, + "delta": {"reasoning_content": reasoning_content}, + "finish_reason": None, + } + ], + } + yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" + continue + + # 处理普通内容 + content = getattr(delta, "content", None) + if content: + full_content += content + data = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": model, + "choices": [ + { + "index": 0, + "delta": {"content": content}, + "finish_reason": None, + } + ], + } + yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" + + # 结束标记 + finish = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": model, + "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], + } + yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n" + yield "data: [DONE]\n\n" + + # 打印流式响应结果 + logger.info(f"[GLM] 流式响应完成:") + logger.info(f" - chunks: {chunk_count}") + logger.info(f" - content_length: {len(full_content)} 字符") + logger.info( + f" - content_preview: {full_content[:200]}..." + if len(full_content) > 200 + else f" - content: {full_content}" + ) + + return StreamingResponse(generator(), media_type="text/event-stream") + + def _sync_chat( + self, client, messages, model, request, extra_kwargs + ) -> JSONResponse: + """非流式聊天""" + from utils.helpers import generate_unique_id, get_current_timestamp + + # 提取深度思考配置 + thinking_config = extra_kwargs.get("thinking") + tools_config = extra_kwargs.get("tools") + + # 构建 API 调用参数 + api_params = { + "model": model, + "messages": messages, + "stream": False, + "temperature": request.temperature, + "max_tokens": request.max_tokens, + } + + # 深度思考:使用 extra_body 传递 + if thinking_config: + api_params["extra_body"] = {"thinking": thinking_config} + + # 联网搜索:使用 tools 参数 + if tools_config: + api_params["tools"] = tools_config + api_params["tool_choice"] = "auto" + + # 打印请求参数 + logger.info(f"[GLM] API 调用参数:") + logger.info(f" - model: {model}") + logger.info(f" - stream: {request.stream}") + logger.info(f" - temperature: {request.temperature}") + logger.info(f" - max_tokens: {request.max_tokens}") + if thinking_config: + logger.info(f" - extra_body: {{'thinking': {thinking_config}}}") + if tools_config: + logger.info(f" - tools: {json.dumps(tools_config, ensure_ascii=False)}") + logger.info(f" - tool_choice: auto") + logger.info( + f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}" + ) + + resp = client.chat.completions.create(**api_params) + + message = resp.choices[0].message + content = message.content or "" + + # 构建响应 + response_message = {"role": "assistant", "content": content} + + # 处理深度思考内容 + reasoning_content = getattr(message, "reasoning_content", None) + if reasoning_content: + response_message["reasoning_content"] = reasoning_content + + response = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion", + "created": get_current_timestamp(), + "model": model, + "choices": [ + { + "index": 0, + "message": response_message, + "finish_reason": "stop", + } + ], + } + + if hasattr(resp, "usage") and resp.usage: + response["usage"] = { + "prompt_tokens": resp.usage.prompt_tokens, + "completion_tokens": resp.usage.completion_tokens, + "total_tokens": resp.usage.total_tokens, + } + + # 打印响应结果 + logger.info(f"[GLM] 响应结果:") + logger.info(f" - content_length: {len(content)} 字符") + logger.info( + f" - content_preview: {content[:200]}..." + if len(content) > 200 + else f" - content: {content}" + ) + if hasattr(resp, "usage") and resp.usage: + logger.info(f" - usage: {response['usage']}") + + return JSONResponse(content=response) diff --git a/server/adapters/openai_adapter.py b/server/adapters/openai_adapter.py new file mode 100644 index 0000000..d1a4dcf --- /dev/null +++ b/server/adapters/openai_adapter.py @@ -0,0 +1,304 @@ +""" +OpenAI 适配器 +支持 OpenAI 及兼容 API(如 Deepseek) +""" + +import json +import os +from typing import Dict, List, Optional + +from fastapi.responses import JSONResponse, StreamingResponse + +from .base import BaseAdapter, ChatCompletionRequest, ModelInfo +from utils.logger import get_logger + +logger = get_logger() + +# OpenAI 模型配置 +OPENAI_MODELS = [ + ModelInfo( + id="gpt-4o", + name="GPT-4o", + description="最新旗舰多模态模型", + max_tokens=128000, + provider="OpenAI", + ), + ModelInfo( + id="gpt-4o-mini", + name="GPT-4o Mini", + description="高性价比多模态模型", + max_tokens=128000, + provider="OpenAI", + ), + ModelInfo( + id="gpt-4-turbo", + name="GPT-4 Turbo", + description="GPT-4 增强版", + max_tokens=128000, + provider="OpenAI", + ), + ModelInfo( + id="gpt-3.5-turbo", + name="GPT-3.5 Turbo", + description="快速经济的选择", + max_tokens=16385, + provider="OpenAI", + ), +] + +# Deepseek 模型配置 +DEEPSEEK_MODELS = [ + ModelInfo( + id="deepseek-chat", + name="Deepseek Chat", + description="Deepseek 对话模型", + max_tokens=64000, + provider="Deepseek", + ), + ModelInfo( + id="deepseek-reasoner", + name="Deepseek Reasoner", + description="Deepseek 推理模型(支持深度思考)", + max_tokens=64000, + provider="Deepseek", + ), +] + +# DeepSeek 支持深度思考的模型 +DEEPSEEK_THINKING_MODELS = {"deepseek-reasoner"} + + +class OpenAIAdapter(BaseAdapter): + """OpenAI 平台适配器""" + + _client = None + _provider_type: str = "openai" # openai 或 deepseek + + def __init__(self, provider_type: str = "openai"): + self._provider_type = provider_type + + @property + def provider_name(self) -> str: + return self._provider_type + + def is_available(self) -> bool: + """检查 API Key 是否配置""" + if self._provider_type == "deepseek": + return bool(os.getenv("DEEPSEEK_API_KEY")) + return bool(os.getenv("OPENAI_API_KEY")) + + def _get_client(self): + """获取 OpenAI 客户端(懒加载)""" + if self._client is None: + from openai import OpenAI + + if self._provider_type == "deepseek": + api_key = os.getenv("DEEPSEEK_API_KEY", "") + base_url = os.getenv( + "DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1" + ) + else: + api_key = os.getenv("OPENAI_API_KEY", "") + base_url = os.getenv("OPENAI_BASE_URL") # 可选自定义端点 + + kwargs = {"api_key": api_key} + if base_url: + kwargs["base_url"] = base_url + + self._client = OpenAI(**kwargs) + return self._client + + def list_models(self) -> List[ModelInfo]: + if self._provider_type == "deepseek": + return DEEPSEEK_MODELS + return OPENAI_MODELS + + async def chat(self, request: ChatCompletionRequest): + """ + 处理 OpenAI 聊天请求 + 直接使用 OpenAI SDK,支持流式/非流式 + """ + client = self._get_client() + + # 打印请求参数 + provider_name = self._provider_type.upper() + logger.info(f"[{provider_name}] 请求参数:") + logger.info(f" - model: {request.model}") + logger.info(f" - stream: {request.stream}") + logger.info(f" - temperature: {request.temperature}") + logger.info(f" - max_tokens: {request.max_tokens}") + logger.info(f" - provider_type: {self._provider_type}") + if self._provider_type == "deepseek": + logger.info(f" - deep_thinking: {request.deep_thinking}") + + # 构建消息 + messages = self._build_messages(request) + logger.info(f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}") + + # 构建请求参数 + kwargs = { + "model": request.model, + "messages": messages, + "temperature": request.temperature, + "max_tokens": request.max_tokens, + "stream": request.stream, + } + + # DeepSeek 深度思考支持 + extra_body = None + if self._provider_type == "deepseek" and request.deep_thinking: + if self._supports_thinking(request.model): + extra_body = {"thinking": {"type": "enabled"}} + kwargs["extra_body"] = extra_body + logger.info(f"[{provider_name}] 深度思考已启用: extra_body = {extra_body}") + + if request.stream: + return self._stream_chat(client, kwargs, extra_body) + else: + return self._sync_chat(client, kwargs, extra_body) + + def _supports_thinking(self, model: str) -> bool: + """检查模型是否支持深度思考""" + return model.lower() in DEEPSEEK_THINKING_MODELS + + def _build_messages(self, request: ChatCompletionRequest) -> List[Dict]: + """构建 OpenAI 格式消息""" + messages = [] + + for msg in request.messages: + role = msg.get("role", "user") + content = msg.get("content", "") + + # OpenAI 直接支持标准格式 + if isinstance(content, str): + if content.strip(): + messages.append({"role": role, "content": content}) + elif isinstance(content, list): + # 多模态内容 + openai_content = [] + for item in content: + if isinstance(item, dict): + openai_content.append(item) + if openai_content: + messages.append({"role": role, "content": openai_content}) + + return messages + + def _stream_chat(self, client, kwargs: Dict, extra_body: Optional[Dict] = None) -> StreamingResponse: + """流式聊天""" + provider_name = self._provider_type.upper() + logger.info(f"[{provider_name}] 开始流式响应...") + + def generator(): + from utils.helpers import generate_unique_id, get_current_timestamp + + resp = client.chat.completions.create(**kwargs) + + full_content = "" + full_reasoning = "" + chunk_count = 0 + for chunk in resp: + if chunk.choices: + chunk_count += 1 + delta = chunk.choices[0].delta + + delta_content = {} + if hasattr(delta, "content") and delta.content: + delta_content["content"] = delta.content + full_content += delta.content + if hasattr(delta, "reasoning_content") and delta.reasoning_content: + delta_content["reasoning_content"] = delta.reasoning_content + full_reasoning += delta.reasoning_content + + if delta_content: + data = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": kwargs["model"], + "choices": [ + { + "index": 0, + "delta": delta_content, + "finish_reason": None, + } + ], + } + yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" + + finish = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion.chunk", + "created": get_current_timestamp(), + "model": kwargs["model"], + "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], + } + yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n" + yield "data: [DONE]\n\n" + + # 打印流式响应结果 + logger.info(f"[{provider_name}] 流式响应完成:") + logger.info(f" - chunks: {chunk_count}") + logger.info(f" - content_length: {len(full_content)} 字符") + if full_reasoning: + logger.info(f" - reasoning_length: {len(full_reasoning)} 字符") + logger.info(f" - content_preview: {full_content[:200]}..." if len(full_content) > 200 else f" - content: {full_content}") + + return StreamingResponse(generator(), media_type="text/event-stream") + + def _sync_chat(self, client, kwargs: Dict, extra_body: Optional[Dict] = None) -> JSONResponse: + """非流式聊天""" + from utils.helpers import generate_unique_id, get_current_timestamp + + resp = client.chat.completions.create(**kwargs) + + message = resp.choices[0].message + content = message.content or "" + response = { + "id": f"chatcmpl-{generate_unique_id()}", + "object": "chat.completion", + "created": get_current_timestamp(), + "model": kwargs["model"], + "choices": [ + { + "index": 0, + "message": { + "role": message.role, + "content": content, + }, + "finish_reason": resp.choices[0].finish_reason, + } + ], + } + + # 添加推理内容(如有) + if hasattr(message, "reasoning_content") and message.reasoning_content: + response["choices"][0]["message"]["reasoning_content"] = ( + message.reasoning_content + ) + + if resp.usage: + response["usage"] = { + "prompt_tokens": resp.usage.prompt_tokens, + "completion_tokens": resp.usage.completion_tokens, + "total_tokens": resp.usage.total_tokens, + } + + # 打印响应结果 + provider_name = self._provider_type.upper() + logger.info(f"[{provider_name}] 响应结果:") + logger.info(f" - content_length: {len(content)} 字符") + if hasattr(message, "reasoning_content") and message.reasoning_content: + logger.info(f" - reasoning_length: {len(message.reasoning_content)} 字符") + logger.info(f" - content_preview: {content[:200]}..." if len(content) > 200 else f" - content: {content}") + if resp.usage: + logger.info(f" - usage: {response['usage']}") + + return JSONResponse(content=response) + + +class DeepseekAdapter(OpenAIAdapter): + """Deepseek 平台适配器(继承 OpenAI 适配器)""" + + def __init__(self): + super().__init__(provider_type="deepseek") \ No newline at end of file diff --git a/server/adapters/registry.py b/server/adapters/registry.py new file mode 100644 index 0000000..a44dd1a --- /dev/null +++ b/server/adapters/registry.py @@ -0,0 +1,120 @@ +""" +适配器注册表 +根据模型名称路由到对应的平台适配器 +""" + +import os +from typing import Dict, Optional, Type + +from .base import BaseAdapter + +# 模型前缀到平台名称的映射 +MODEL_PREFIX_MAP = { + # 智谱 GLM + "glm-": "glm", + # 阿里云百炼(Qwen 系列) + "qwen-": "dashscope", + # OpenAI + "gpt-": "openai", + "o1-": "openai", + "o3-": "openai", + # Deepseek + "deepseek-": "deepseek", +} + +# 已注册的适配器实例 +_adapters: Dict[str, BaseAdapter] = {} + +# 已注册的适配器类 +_adapter_classes: Dict[str, Type[BaseAdapter]] = {} + + +def register_adapter(name: str, adapter_class: Type[BaseAdapter]): + """ + 注册适配器类 + + Args: + name: 平台名称(如 'glm', 'dashscope', 'openai') + adapter_class: 适配器类 + """ + _adapter_classes[name] = adapter_class + + +def get_provider_from_model(model: str) -> str: + """ + 根据模型名称判断所属平台 + + Args: + model: 模型 ID(如 'glm-4-flash', 'qwen-turbo', 'gpt-4') + + Returns: + 平台名称(如 'glm', 'dashscope', 'openai') + """ + model_lower = model.lower() + + # 优先精确匹配 + exact_matches = { + # GLM 精确模型名 + "glm-4": "glm", + "glm-4v": "glm", + # Deepseek + "deepseek-chat": "deepseek", + "deepseek-reasoner": "deepseek", + } + if model_lower in exact_matches: + return exact_matches[model_lower] + + # 前缀匹配 + for prefix, provider in MODEL_PREFIX_MAP.items(): + if model_lower.startswith(prefix): + return provider + + # 默认使用环境变量或 GLM + return os.getenv("DEFAULT_PROVIDER", "glm") + + +def get_adapter(provider: str) -> Optional[BaseAdapter]: + """ + 获取适配器实例(懒加载) + + Args: + provider: 平台名称 + + Returns: + 适配器实例,如果平台未注册则返回 None + """ + if provider in _adapters: + return _adapters[provider] + + # 懒加载:首次使用时实例化 + if provider in _adapter_classes: + adapter_class = _adapter_classes[provider] + adapter = adapter_class() + _adapters[provider] = adapter + return adapter + + return None + + +def get_all_adapters() -> Dict[str, BaseAdapter]: + """ + 获取所有已注册的适配器实例 + """ + result = {} + for name, adapter_class in _adapter_classes.items(): + if name not in _adapters: + _adapters[name] = adapter_class() + result[name] = _adapters[name] + return result + + +def get_available_providers() -> list: + """ + 获取所有可用的平台列表 + """ + providers = [] + for name, adapter_class in _adapter_classes.items(): + adapter = get_adapter(name) + if adapter and adapter.is_available(): + providers.append(name) + return providers diff --git a/server/api/openai_gateway.py b/server/api/openai_gateway.py new file mode 100644 index 0000000..9afa60e --- /dev/null +++ b/server/api/openai_gateway.py @@ -0,0 +1,119 @@ +""" +OpenAI 兼容 API 网关 +提供统一的 /v1/chat/completions 和 /v1/models 端点 +""" + +from typing import Any, Dict + +from fastapi import APIRouter, HTTPException, Request +from fastapi.responses import JSONResponse + +from adapters import get_adapter, get_provider_from_model +from adapters.base import ChatCompletionRequest +from utils.logger import get_logger + +logger = get_logger() + +router = APIRouter(tags=["OpenAI Compatible API"]) + + +@router.post("/chat/completions") +async def chat_completions(request: Request): + """ + OpenAI 兼容的聊天补全接口 + + 根据请求中的 model 字段自动路由到对应的平台适配器: + - glm-* → 智谱 GLM + - qwen-* → 阿里云百炼 + - gpt-* / o1-* / o3-* → OpenAI + - deepseek-* → Deepseek + """ + try: + body = await request.json() + except Exception: + raise HTTPException(status_code=400, detail="Invalid JSON body") + + # 创建请求对象 + chat_request = ChatCompletionRequest.from_dict(body) + model = chat_request.model + + # 根据模型名称确定平台 + provider = get_provider_from_model(model) + logger.info(f"[Gateway] model={model} → provider={provider}") + + # 获取对应平台的适配器 + adapter = get_adapter(provider) + if adapter is None: + raise HTTPException( + status_code=400, + detail=f"Unsupported model: {model} (provider: {provider})", + ) + + # 检查适配器是否可用 + if not adapter.is_available(): + raise HTTPException( + status_code=503, + detail=f"Provider '{provider}' is not available (API key not configured)", + ) + + # 调用适配器处理请求 + return await adapter.chat(chat_request) + + +@router.get("/models") +async def list_models(): + """ + 返回所有可用平台的模型列表 + + 聚合所有已配置 API Key 的平台模型 + """ + from adapters import get_all_adapters + + all_models = [] + + for provider, adapter in get_all_adapters().items(): + if adapter.is_available(): + models = adapter.list_models() + all_models.extend([m.to_dict() for m in models]) + + return { + "object": "list", + "data": all_models, + } + + +@router.get("/models/{model_id}") +async def get_model(model_id: str): + """ + 获取特定模型信息 + """ + from adapters import get_all_adapters + + for provider, adapter in get_all_adapters().items(): + if adapter.is_available(): + for model in adapter.list_models(): + if model.id == model_id: + return { + "object": "model", + "id": model.id, + "owned_by": model.provider, + "data": model.to_dict(), + } + + raise HTTPException(status_code=404, detail=f"Model not found: {model_id}") + + +# 初始化时注册适配器 +def init_adapters(): + """注册所有适配器""" + from adapters import register_adapter + from adapters.dashscope_adapter import DashScopeAdapter + from adapters.glm_adapter import GLMAdapter + from adapters.openai_adapter import DeepseekAdapter, OpenAIAdapter + + register_adapter("glm", GLMAdapter) + register_adapter("dashscope", DashScopeAdapter) + register_adapter("openai", OpenAIAdapter) + register_adapter("deepseek", DeepseekAdapter) + + logger.info("[Gateway] Adapters registered: glm, dashscope, openai, deepseek") \ No newline at end of file diff --git a/server/config.py b/server/config.py new file mode 100644 index 0000000..1beb173 --- /dev/null +++ b/server/config.py @@ -0,0 +1,59 @@ +""" +统一配置管理 +""" + +import os +from dataclasses import dataclass +from typing import Dict, Optional + + +@dataclass +class ProviderConfig: + """平台配置""" + + api_key: Optional[str] = None + base_url: Optional[str] = None + enabled: bool = True + + +# 平台配置映射 +PROVIDERS: Dict[str, ProviderConfig] = { + "glm": ProviderConfig( + api_key=os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY"), + enabled=True, + ), + "dashscope": ProviderConfig( + api_key=os.getenv("ALIYUN_API_KEY") or os.getenv("DASHSCOPE_API_KEY"), + enabled=True, + ), + "openai": ProviderConfig( + api_key=os.getenv("OPENAI_API_KEY"), + base_url=os.getenv("OPENAI_BASE_URL"), + enabled=True, + ), + "deepseek": ProviderConfig( + api_key=os.getenv("DEEPSEEK_API_KEY"), + base_url=os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"), + enabled=True, + ), +} + + +def get_provider_config(provider: str) -> Optional[ProviderConfig]: + """获取平台配置""" + return PROVIDERS.get(provider) + + +def is_provider_available(provider: str) -> bool: + """检查平台是否可用(已配置 API Key)""" + config = PROVIDERS.get(provider) + return config is not None and bool(config.api_key) and config.enabled + + +def get_available_providers() -> list: + """获取所有可用的平台列表""" + return [name for name, config in PROVIDERS.items() if is_provider_available(name)] + + +# 默认平台 +DEFAULT_PROVIDER = os.getenv("DEFAULT_PROVIDER", "glm") \ No newline at end of file diff --git a/server/main.py b/server/main.py index e56880c..4d53ea6 100644 --- a/server/main.py +++ b/server/main.py @@ -3,12 +3,18 @@ AI Chat API Server — 主入口(纯基础设施层) 职责: - 注入运行时依赖(venv site-packages) - - 读取 LLM_BACKEND 环境变量,动态加载对应平台模块 - - 注册 FastAPI 路由和中间件 + - 支持 OpenAI 兼容 API 网关(/v1/*)和多平台路由 + - 保留向后兼容的 /api/chat-ui/* 路由 -平台代码位置(main.py 中不包含任何平台逻辑): - - 百炼 DashScope → api/chat_routes.py - - 智谱 GLM-4.6V → api/chat_routes_glm.py + utils/glm_adapter.py +平台适配器位置: + - adapters/glm_adapter.py → 智谱 GLM + - adapters/dashscope_adapter.py → 阿里云百炼 + - adapters/openai_adapter.py → OpenAI / Deepseek + +API 端点: + - POST /v1/chat/completions → OpenAI 兼容网关(根据 model 自动路由) + - GET /v1/models → 所有可用模型列表 + - POST /api/chat-ui/chat → 传统聊天接口(保持兼容) """ import os @@ -62,12 +68,21 @@ from api.chat_routes import (delete_conversation_handler, save_conversation_handler, serve_upload_handler, stop_generation_handler, upload_file_handler) +# ── OpenAI 兼容网关初始化 ─────────────────────────────────────────────── +from api.openai_gateway import init_adapters, router as openai_router + +init_adapters() + # ── FastAPI 应用 ────────────────────────────────────────────────────── app = FastAPI( - title=f"AI Chat API(LLM_BACKEND={LLM_BACKEND})", - version="3.0.0", + title="AI Chat API Gateway", + version="4.0.0", + description="统一 OpenAI 兼容 API 网关,支持多平台模型", ) +# 注册 OpenAI 兼容路由 +app.include_router(openai_router, prefix="/v1") + @app.middleware("http") async def logging_middleware(request: Request, call_next): @@ -90,27 +105,72 @@ async def logging_middleware(request: Request, call_next): @app.get("/health") async def health_check(): + from config import get_available_providers + return { "status": "healthy", - "backend": LLM_BACKEND, + "version": "4.0.0", + "default_backend": LLM_BACKEND, + "available_providers": get_available_providers(), + "endpoints": { + "openai_compatible": "/v1/chat/completions", + "legacy": "/api/chat-ui/chat", + "models": "/v1/models", + }, "timestamp": datetime.now(timezone.utc).isoformat(), } @app.post("/api/chat-ui/chat") async def chat_endpoint(request: Request): - """聊天接口(自动路由到当前平台)""" - return await _platform.chat_handler(await request.json()) + """聊天接口(根据 model 自动路由到对应平台)""" + from adapters import get_adapter, get_provider_from_model + from adapters.base import ChatCompletionRequest + + try: + body = await request.json() + except Exception: + return JSONResponse({"error": "Invalid JSON body"}, status_code=400) + + # 创建请求对象 + chat_request = ChatCompletionRequest.from_dict(body) + model = chat_request.model + + # 根据模型名称确定平台 + provider = get_provider_from_model(model) + logger.info(f"[Legacy API] model={model} → provider={provider}") + + # 获取对应平台的适配器 + adapter = get_adapter(provider) + if adapter is None: + return JSONResponse( + {"error": f"Unsupported model: {model} (provider: {provider})"}, + status_code=400, + ) + + # 检查适配器是否可用 + if not adapter.is_available(): + return JSONResponse( + {"error": f"Provider '{provider}' is not available (API key not configured)"}, + status_code=503, + ) + + # 调用适配器处理请求 + return await adapter.chat(chat_request) @app.get("/api/chat-ui/models") async def get_models(): - """模型列表(由当前平台返回)""" - result = _platform.models_handler() - # 支持同步和异步两种返回 - if hasattr(result, "__await__"): - return await result - return result + """模型列表(聚合所有可用平台的模型)""" + from adapters import get_all_adapters + + all_models = [] + for provider, adapter in get_all_adapters().items(): + if adapter.is_available(): + models = adapter.list_models() + all_models.extend([m.to_dict() for m in models]) + + return {"object": "list", "data": all_models} # ── 通用路由(与平台无关)──────────────────────────────────────────── @@ -161,10 +221,25 @@ if __name__ == "__main__": import uvicorn port = int(os.getenv("PORT", 8000)) - print("=" * 55) - print(f" AI Chat Server v3.0 启动中...") - print(f" 后端平台 : {LLM_BACKEND.upper()} [LLM_BACKEND={LLM_BACKEND}]") - print(f" 监听端口 : {port}") - print(f" 切换平台 : 修改 .env 中 LLM_BACKEND=glm|dashscope,重启") - print("=" * 55) + + # 获取可用平台 + from config import get_available_providers + + available = get_available_providers() + + print("=" * 60) + print(" AI Chat API Gateway v4.0") + print("=" * 60) + print(f" OpenAI 兼容端点: http://localhost:{port}/v1/chat/completions") + print(f" 模型列表 : http://localhost:{port}/v1/models") + print("-" * 60) + print(f" 可用平台 : {', '.join(available) or '无(请配置 API Key)'}") + print(f" 默认平台 : {LLM_BACKEND} (向后兼容模式)") + print("-" * 60) + print(" 使用方法:") + print(" curl -X POST http://localhost:8000/v1/chat/completions \\") + print(' -H "Content-Type: application/json" \\') + print(' -d \'{"model":"glm-4-flash","messages":[{"role":"user","content":"hi"}]}\'') + print("=" * 60) + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/src/components/chat/ChatMain.vue b/src/components/chat/ChatMain.vue index 88d7bce..549053b 100644 --- a/src/components/chat/ChatMain.vue +++ b/src/components/chat/ChatMain.vue @@ -223,14 +223,40 @@ async function handleSend( ); let fullText = ""; + let reasoningText = ""; + let isInReasoning = false; isTyping.value = false; for await (const chunk of stream) { if (abortController.value?.signal.aborted) break; - fullText += chunk; + + if (chunk.type === "reasoning") { + // 深度思考内容 + if (!isInReasoning) { + // 开始深度思考块 + reasoningText = ""; + isInReasoning = true; + fullText += "\n"; + } + reasoningText += chunk.text; + fullText += chunk.text; + } else { + // 普通内容 + if (isInReasoning) { + // 结束深度思考块 + isInReasoning = false; + fullText += "\n\n"; + } + fullText += chunk.text; + } chatStore.updateMessageContent(aiMessage.id, fullText); } + // 如果最后还在深度思考块中,关闭它 + if (isInReasoning) { + fullText += "\n"; + } + if (!abortController.value?.signal.aborted) { chatStore.updateMessage(aiMessage.id, { isStreaming: false, @@ -320,13 +346,39 @@ async function handleRetry(messageId: string) { ); let fullText = ""; + let reasoningText = ""; + let isInReasoning = false; for await (const chunk of stream) { if (abortController.value?.signal.aborted) break; - fullText += chunk; + + if (chunk.type === "reasoning") { + // 深度思考内容 + if (!isInReasoning) { + // 开始深度思考块 + reasoningText = ""; + isInReasoning = true; + fullText += "\n"; + } + reasoningText += chunk.text; + fullText += chunk.text; + } else { + // 普通内容 + if (isInReasoning) { + // 结束深度思考块 + isInReasoning = false; + fullText += "\n\n"; + } + fullText += chunk.text; + } chatStore.updateMessageContent(messageId, fullText); } + // 如果最后还在深度思考块中,关闭它 + if (isInReasoning) { + fullText += "\n"; + } + if (!abortController.value?.signal.aborted) { chatStore.updateMessage(messageId, { isStreaming: false, @@ -419,4 +471,4 @@ watch( min-width: 1000px; } } - + \ No newline at end of file diff --git a/src/services/api.ts b/src/services/api.ts index 236cc66..90182d9 100644 --- a/src/services/api.ts +++ b/src/services/api.ts @@ -76,6 +76,12 @@ export interface UploadResult { mimeType?: string; } +// 流式响应块类型 +export interface StreamChunk { + type: "content" | "reasoning"; + text: string; +} + // API 调用类 class ChatApi { private baseUrl: string; @@ -90,7 +96,7 @@ class ChatApi { async *streamChat( request: ChatRequest, signal?: AbortSignal, - ): AsyncGenerator { + ): AsyncGenerator { // 构建消息数组,考虑是否包含图片 let userContent; if (request.images && request.images.length > 0) { @@ -185,9 +191,18 @@ class ChatApi { break; } - const content = data.choices?.[0]?.delta?.content; + const delta = data.choices?.[0]?.delta; + + // 处理深度思考内容(reasoning_content) + const reasoningContent = delta?.reasoning_content; + if (reasoningContent) { + yield { type: "reasoning", text: reasoningContent }; + } + + // 处理普通内容 + const content = delta?.content; if (content) { - yield content; + yield { type: "content", text: content }; } } catch (e) { console.warn("JSON解析错误", e, line); @@ -271,6 +286,20 @@ class ChatApi { // maxTokens: 8192, // provider: "Zhipu", // }, + { + id: "deepseek-chat", + name: "DeepSeek Chat", + description: "DeepSeek 对话模型", + maxTokens: 8192, + provider: "DeepSeek", + }, + { + id: "deepseek-reasoner", + name: "DeepSeek Reasoner", + description: "DeepSeek 深度思考模型", + maxTokens: 8192, + provider: "DeepSeek", + }, ]; } diff --git a/src/stores/settings.ts b/src/stores/settings.ts index 6589022..54376b0 100644 --- a/src/stores/settings.ts +++ b/src/stores/settings.ts @@ -61,6 +61,20 @@ export const useSettingsStore = defineStore("settings", () => { maxTokens: 8192, provider: "Zhipu", }, + { + id: "deepseek-chat", + name: "DeepSeek Chat", + description: "DeepSeek 对话模型", + maxTokens: 8192, + provider: "DeepSeek", + }, + { + id: "deepseek-reasoner", + name: "DeepSeek Reasoner", + description: "DeepSeek 深度思考模型", + maxTokens: 8192, + provider: "DeepSeek", + }, ]; // 状态