499 lines
18 KiB
Python
499 lines
18 KiB
Python
"""
|
||
智谱 GLM 适配器
|
||
基于 utils/glm_adapter.py 重构
|
||
使用zai-sdk。因为已经完成这一部分的整套逻辑,如果更换OpenAI-SDK会花很多时间调试。
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
from typing import Dict, List, Optional
|
||
|
||
from fastapi.responses import JSONResponse, StreamingResponse
|
||
|
||
from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
|
||
from utils.logger import get_logger
|
||
|
||
logger = get_logger()
|
||
|
||
# GLM 模型配置
|
||
GLM_MODELS = [
|
||
ModelInfo(
|
||
id="glm-4.6v",
|
||
name="GLM-4.6V(推荐)",
|
||
description="最新旗舰模型,支持文本/图像/文档/深度思考",
|
||
max_tokens=128000,
|
||
provider="ZhipuAI",
|
||
supports_thinking=True,
|
||
supports_web_search=True,
|
||
supports_vision=True,
|
||
supports_files=True,
|
||
),
|
||
ModelInfo(
|
||
id="glm-4-flash",
|
||
name="GLM-4 Flash",
|
||
description="高性价比文本模型",
|
||
max_tokens=128000,
|
||
provider="ZhipuAI",
|
||
supports_thinking=False,
|
||
supports_web_search=True,
|
||
supports_vision=False,
|
||
supports_files=False,
|
||
),
|
||
ModelInfo(
|
||
id="glm-4v-plus-0111",
|
||
name="GLM-4V Plus",
|
||
description="图像 + PDF/DOCX 原生多模态",
|
||
max_tokens=128000,
|
||
provider="ZhipuAI",
|
||
supports_thinking=False,
|
||
supports_web_search=True,
|
||
supports_vision=True,
|
||
supports_files=True,
|
||
),
|
||
ModelInfo(
|
||
id="glm-z1-flash",
|
||
name="GLM-Z1 Flash",
|
||
description="深度思考推理模型",
|
||
max_tokens=128000,
|
||
provider="ZhipuAI",
|
||
supports_thinking=True,
|
||
supports_web_search=False,
|
||
supports_vision=False,
|
||
supports_files=False,
|
||
),
|
||
]
|
||
|
||
# 视觉模型列表(用于自动切换)
|
||
VISION_MODELS = {"glm-4v", "glm-4v-plus", "glm-4v-plus-0111", "glm-4.6v"}
|
||
|
||
# 支持深度思考的模型
|
||
THINKING_MODELS = {"glm-z1-flash", "glm-z1-air", "glm-4.6v", "glm-4.6"}
|
||
|
||
|
||
class GLMAdapter(BaseAdapter):
|
||
"""智谱 GLM 平台适配器"""
|
||
|
||
_client = None
|
||
|
||
@property
|
||
def provider_name(self) -> str:
|
||
return "glm"
|
||
|
||
def is_available(self) -> bool:
|
||
"""检查 API Key 是否配置"""
|
||
return bool(os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY"))
|
||
|
||
def _get_client(self):
|
||
"""获取 GLM 客户端(懒加载)"""
|
||
if self._client is None:
|
||
from zhipuai import ZhipuAI
|
||
|
||
api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")
|
||
self._client = ZhipuAI(api_key=api_key)
|
||
return self._client
|
||
|
||
def list_models(self) -> List[ModelInfo]:
|
||
return GLM_MODELS
|
||
|
||
async def chat(self, request: ChatCompletionRequest):
|
||
"""
|
||
处理 GLM 聊天请求
|
||
支持流式/非流式、图像、文档、联网搜索、深度思考
|
||
"""
|
||
client = self._get_client()
|
||
|
||
# 构建消息
|
||
glm_messages, has_vision, has_files = self._build_messages(request)
|
||
actual_model = self._resolve_model(request.model, has_vision, has_files)
|
||
|
||
# 调试:打印原始请求参数
|
||
logger.info(f"[GLM] 原始请求参数:")
|
||
logger.info(
|
||
f" - request.deep_thinking: {request.deep_thinking} (type: {type(request.deep_thinking)})"
|
||
)
|
||
logger.info(f" - request.web_search: {request.web_search}")
|
||
logger.info(f" - request.deep_search: {request.deep_search}")
|
||
logger.info(f" - actual_model: {actual_model}")
|
||
logger.info(f" - supports_thinking: {self._supports_thinking(actual_model)}")
|
||
|
||
# 构建额外参数
|
||
extra_kwargs = {}
|
||
web_search = self._get_web_search_mode(request)
|
||
|
||
if web_search:
|
||
extra_kwargs["tools"] = [self._build_web_search_tool(web_search)]
|
||
extra_kwargs["tool_choice"] = "auto"
|
||
|
||
# 深度思考:正向选择(True 时启用,False 时禁用)
|
||
# 注意:只有特定模型支持深度思考(如 glm-z1-flash)
|
||
thinking_enabled = request.deep_thinking and self._supports_thinking(
|
||
actual_model
|
||
)
|
||
logger.info(
|
||
f"[GLM] 深度思考判断: {request.deep_thinking} and {self._supports_thinking(actual_model)} = {thinking_enabled}"
|
||
)
|
||
|
||
if thinking_enabled:
|
||
extra_kwargs["thinking"] = {"type": "enabled"}
|
||
logger.info(
|
||
f"[GLM] 深度思考已启用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}"
|
||
)
|
||
|
||
if extra_kwargs:
|
||
logger.info(
|
||
f"[GLM] 最终 extra_kwargs: {json.dumps(extra_kwargs, ensure_ascii=False)}"
|
||
)
|
||
|
||
if request.stream:
|
||
return self._stream_chat(
|
||
client, glm_messages, actual_model, request, extra_kwargs
|
||
)
|
||
else:
|
||
return self._sync_chat(
|
||
client, glm_messages, actual_model, request, extra_kwargs
|
||
)
|
||
|
||
def _build_messages(
|
||
self, request: ChatCompletionRequest
|
||
) -> tuple[List[Dict], bool, bool]:
|
||
"""
|
||
构建 GLM 格式的消息
|
||
返回:(消息列表, 是否包含图片, 是否包含文件附件)
|
||
"""
|
||
messages = []
|
||
has_vision = False
|
||
has_files = bool(request.files) # 检查是否有文件附件
|
||
|
||
for msg in request.messages:
|
||
role = msg.get("role", "user")
|
||
content = msg.get("content", "")
|
||
|
||
if isinstance(content, str):
|
||
# 纯文本
|
||
if content.strip():
|
||
messages.append({"role": role, "content": content})
|
||
elif isinstance(content, list):
|
||
# 多模态内容
|
||
glm_content = []
|
||
for item in content:
|
||
if isinstance(item, dict):
|
||
item_type = item.get("type", "")
|
||
if item_type == "text":
|
||
text = item.get("text", "")
|
||
if text:
|
||
glm_content.append({"type": "text", "text": text})
|
||
elif item_type == "image_url":
|
||
img_url = self._extract_image_url(item)
|
||
if img_url:
|
||
glm_content.append(
|
||
{"type": "image_url", "image_url": {"url": img_url}}
|
||
)
|
||
has_vision = True
|
||
|
||
if glm_content:
|
||
messages.append({"role": role, "content": glm_content})
|
||
|
||
# 处理文件附件
|
||
if request.files:
|
||
file_content = self._build_file_content(request.files)
|
||
if messages and messages[-1]["role"] == "user":
|
||
# 追加到最后一个用户消息
|
||
if isinstance(messages[-1]["content"], list):
|
||
messages[-1]["content"].extend(file_content)
|
||
else:
|
||
messages[-1]["content"] = [
|
||
{"type": "text", "text": messages[-1]["content"]},
|
||
*file_content,
|
||
]
|
||
else:
|
||
messages.append({"role": "user", "content": file_content})
|
||
|
||
return messages, has_vision, has_files
|
||
|
||
def _extract_image_url(self, item: Dict) -> Optional[str]:
|
||
"""提取图片 URL"""
|
||
img_val = item.get("image_url", "")
|
||
if isinstance(img_val, str):
|
||
return img_val
|
||
elif isinstance(img_val, dict):
|
||
return img_val.get("url", "")
|
||
return None
|
||
|
||
def _build_file_content(self, files: List[str]) -> List[Dict]:
|
||
"""构建文件内容"""
|
||
content = []
|
||
for file_url in files:
|
||
if file_url.startswith(("http://", "https://")):
|
||
content.append({"type": "file_url", "file_url": {"url": file_url}})
|
||
return content
|
||
|
||
def _resolve_model(
|
||
self, model: str, has_vision: bool, has_files: bool = False
|
||
) -> str:
|
||
"""解析实际使用的模型"""
|
||
model_lower = model.lower()
|
||
# 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态)
|
||
if (has_vision or has_files) and model_lower not in VISION_MODELS:
|
||
logger.info(
|
||
f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v"
|
||
)
|
||
return "glm-4.6v"
|
||
return model
|
||
|
||
def _supports_thinking(self, model: str) -> bool:
|
||
"""检查模型是否支持深度思考"""
|
||
return model.lower() in THINKING_MODELS
|
||
|
||
def _get_web_search_mode(self, request: ChatCompletionRequest) -> str:
|
||
"""获取联网搜索模式"""
|
||
if request.deep_search:
|
||
return "deep"
|
||
elif request.web_search:
|
||
return "simple"
|
||
return ""
|
||
|
||
def _build_web_search_tool(self, mode: str) -> Dict:
|
||
"""构建联网搜索工具"""
|
||
from datetime import datetime
|
||
|
||
today = datetime.now().strftime("%Y年%m月%d日")
|
||
|
||
if mode == "deep":
|
||
# 深度搜索:返回搜索结果详情
|
||
return {
|
||
"type": "web_search",
|
||
"web_search": {
|
||
"enable": True,
|
||
"search_engine": "search_pro",
|
||
"search_result": True,
|
||
"search_prompt": f"你是一位智能助手。请用简洁的语言总结网络搜索{{search_result}}中的关键信息,按重要性排序并引用来源日期。今天的日期是{today}。",
|
||
"count": 5,
|
||
"search_recency_filter": "noLimit",
|
||
"content_size": "high",
|
||
},
|
||
}
|
||
else:
|
||
# 简单搜索
|
||
return {
|
||
"type": "web_search",
|
||
"web_search": {
|
||
"enable": True,
|
||
"search_engine": "search_pro",
|
||
"search_result": True,
|
||
"count": 5,
|
||
},
|
||
}
|
||
|
||
def _stream_chat(
|
||
self, client, messages, model, request, extra_kwargs
|
||
) -> StreamingResponse:
|
||
"""流式聊天"""
|
||
logger.info(f"[GLM] 开始流式响应...")
|
||
|
||
# 提取深度思考配置
|
||
thinking_config = extra_kwargs.get("thinking")
|
||
tools_config = extra_kwargs.get("tools")
|
||
|
||
def generator():
|
||
from utils.helpers import generate_unique_id, get_current_timestamp
|
||
|
||
full_content = ""
|
||
|
||
# 构建 API 调用参数
|
||
api_params = {
|
||
"model": model,
|
||
"messages": messages,
|
||
"stream": True,
|
||
"temperature": request.temperature,
|
||
"max_tokens": request.max_tokens,
|
||
}
|
||
|
||
# 深度思考:使用 extra_body 传递
|
||
if thinking_config:
|
||
api_params["extra_body"] = {"thinking": thinking_config}
|
||
|
||
# 联网搜索:使用 tools 参数
|
||
if tools_config:
|
||
api_params["tools"] = tools_config
|
||
api_params["tool_choice"] = "auto"
|
||
|
||
# 打印请求参数
|
||
logger.info(f"[GLM] API 调用参数:")
|
||
logger.info(f" - model: {model}")
|
||
logger.info(f" - stream: True")
|
||
logger.info(f" - temperature: {request.temperature}")
|
||
logger.info(f" - max_tokens: {request.max_tokens}")
|
||
if thinking_config:
|
||
logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
|
||
if tools_config:
|
||
logger.info(
|
||
f" - tools: {json.dumps(tools_config, ensure_ascii=False)}"
|
||
)
|
||
logger.info(f" - tool_choice: auto")
|
||
logger.info(
|
||
f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
|
||
)
|
||
|
||
chunk_count = 0
|
||
resp = client.chat.completions.create(**api_params)
|
||
|
||
for chunk in resp:
|
||
chunk_count += 1
|
||
|
||
# 检查 delta 是否存在
|
||
if not hasattr(chunk.choices[0], "delta"):
|
||
continue
|
||
|
||
delta = chunk.choices[0].delta
|
||
|
||
# 处理深度思考内容(reasoning_content)
|
||
reasoning_content = getattr(delta, "reasoning_content", None)
|
||
if reasoning_content:
|
||
data = {
|
||
"id": f"chatcmpl-{generate_unique_id()}",
|
||
"object": "chat.completion.chunk",
|
||
"created": get_current_timestamp(),
|
||
"model": model,
|
||
"choices": [
|
||
{
|
||
"index": 0,
|
||
"delta": {"reasoning_content": reasoning_content},
|
||
"finish_reason": None,
|
||
}
|
||
],
|
||
}
|
||
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||
continue
|
||
|
||
# 处理普通内容
|
||
content = getattr(delta, "content", None)
|
||
if content:
|
||
full_content += content
|
||
data = {
|
||
"id": f"chatcmpl-{generate_unique_id()}",
|
||
"object": "chat.completion.chunk",
|
||
"created": get_current_timestamp(),
|
||
"model": model,
|
||
"choices": [
|
||
{
|
||
"index": 0,
|
||
"delta": {"content": content},
|
||
"finish_reason": None,
|
||
}
|
||
],
|
||
}
|
||
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||
|
||
# 结束标记
|
||
finish = {
|
||
"id": f"chatcmpl-{generate_unique_id()}",
|
||
"object": "chat.completion.chunk",
|
||
"created": get_current_timestamp(),
|
||
"model": model,
|
||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||
}
|
||
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
|
||
yield "data: [DONE]\n\n"
|
||
|
||
# 打印流式响应结果
|
||
logger.info(f"[GLM] 流式响应完成:")
|
||
logger.info(f" - chunks: {chunk_count}")
|
||
logger.info(f" - content_length: {len(full_content)} 字符")
|
||
logger.info(
|
||
f" - content_preview: {full_content[:200]}..."
|
||
if len(full_content) > 200
|
||
else f" - content: {full_content}"
|
||
)
|
||
|
||
return StreamingResponse(generator(), media_type="text/event-stream")
|
||
|
||
def _sync_chat(
|
||
self, client, messages, model, request, extra_kwargs
|
||
) -> JSONResponse:
|
||
"""非流式聊天"""
|
||
from utils.helpers import generate_unique_id, get_current_timestamp
|
||
|
||
# 提取深度思考配置
|
||
thinking_config = extra_kwargs.get("thinking")
|
||
tools_config = extra_kwargs.get("tools")
|
||
|
||
# 构建 API 调用参数
|
||
api_params = {
|
||
"model": model,
|
||
"messages": messages,
|
||
"stream": False,
|
||
"temperature": request.temperature,
|
||
"max_tokens": request.max_tokens,
|
||
}
|
||
|
||
# 深度思考:使用 extra_body 传递
|
||
if thinking_config:
|
||
api_params["extra_body"] = {"thinking": thinking_config}
|
||
|
||
# 联网搜索:使用 tools 参数
|
||
if tools_config:
|
||
api_params["tools"] = tools_config
|
||
api_params["tool_choice"] = "auto"
|
||
|
||
# 打印请求参数
|
||
logger.info(f"[GLM] API 调用参数:")
|
||
logger.info(f" - model: {model}")
|
||
logger.info(f" - stream: {request.stream}")
|
||
logger.info(f" - temperature: {request.temperature}")
|
||
logger.info(f" - max_tokens: {request.max_tokens}")
|
||
if thinking_config:
|
||
logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
|
||
if tools_config:
|
||
logger.info(f" - tools: {json.dumps(tools_config, ensure_ascii=False)}")
|
||
logger.info(f" - tool_choice: auto")
|
||
logger.info(
|
||
f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
|
||
)
|
||
|
||
resp = client.chat.completions.create(**api_params)
|
||
|
||
message = resp.choices[0].message
|
||
content = message.content or ""
|
||
|
||
# 构建响应
|
||
response_message = {"role": "assistant", "content": content}
|
||
|
||
# 处理深度思考内容
|
||
reasoning_content = getattr(message, "reasoning_content", None)
|
||
if reasoning_content:
|
||
response_message["reasoning_content"] = reasoning_content
|
||
|
||
response = {
|
||
"id": f"chatcmpl-{generate_unique_id()}",
|
||
"object": "chat.completion",
|
||
"created": get_current_timestamp(),
|
||
"model": model,
|
||
"choices": [
|
||
{
|
||
"index": 0,
|
||
"message": response_message,
|
||
"finish_reason": "stop",
|
||
}
|
||
],
|
||
}
|
||
|
||
if hasattr(resp, "usage") and resp.usage:
|
||
response["usage"] = {
|
||
"prompt_tokens": resp.usage.prompt_tokens,
|
||
"completion_tokens": resp.usage.completion_tokens,
|
||
"total_tokens": resp.usage.total_tokens,
|
||
}
|
||
|
||
# 打印响应结果
|
||
logger.info(f"[GLM] 响应结果:")
|
||
logger.info(f" - content_length: {len(content)} 字符")
|
||
logger.info(
|
||
f" - content_preview: {content[:200]}..."
|
||
if len(content) > 200
|
||
else f" - content: {content}"
|
||
)
|
||
if hasattr(resp, "usage") and resp.usage:
|
||
logger.info(f" - usage: {response['usage']}")
|
||
|
||
return JSONResponse(content=response)
|