ai-chat-ui/server/adapters/glm_adapter.py

476 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
智谱 GLM 适配器
基于 utils/glm_adapter.py 重构
使用zai-sdk。因为已经完成这一部分的整套逻辑如果更换OpenAI-SDK会花很多时间调试。
"""
import json
import os
from typing import Dict, List, Optional
from fastapi.responses import JSONResponse, StreamingResponse
from .base import BaseAdapter, ChatCompletionRequest, ModelInfo
from .plugins import get_web_search_mode, build_glm_search_tool
from core import get_logger
logger = get_logger()
# GLM 模型配置
GLM_MODELS = [
ModelInfo(
id="glm-5",
name="GLM-5",
description="Coding与长程Agent能力SOTA",
max_tokens=128000,
provider="ZhipuAI",
supports_thinking=True,
supports_web_search=True,
supports_vision=False,
supports_files=False,
),
ModelInfo(
id="glm-4.6v",
name="GLM-4.6V(推荐)",
description="最新旗舰模型,支持文本/图像/文档/深度思考",
max_tokens=128000,
provider="ZhipuAI",
supports_thinking=True,
supports_web_search=False,
supports_vision=True,
supports_files=True,
),
ModelInfo(
id="glm-4-flash",
name="GLM-4 Flash",
description="高性价比文本模型",
max_tokens=128000,
provider="ZhipuAI",
supports_thinking=False,
supports_web_search=True,
supports_vision=False,
supports_files=False,
),
ModelInfo(
id="glm-4v-plus-0111",
name="GLM-4V Plus",
description="图像 + PDF/DOCX 原生多模态",
max_tokens=128000,
provider="ZhipuAI",
supports_thinking=False,
supports_web_search=False,
supports_vision=True,
supports_files=True,
),
ModelInfo(
id="glm-z1-flash",
name="GLM-Z1 Flash",
description="深度思考推理模型,默认开启深度思考",
max_tokens=128000,
provider="ZhipuAI",
supports_thinking=True,
supports_web_search=True,
supports_vision=False,
supports_files=False,
),
]
# 视觉模型列表(用于自动切换)
VISION_MODELS = {"glm-4v", "glm-4v-plus", "glm-4v-plus-0111", "glm-4.6v"}
# 支持深度思考的模型
THINKING_MODELS = {"glm-z1-flash", "glm-z1-air", "glm-4.6v", "glm-4.6"}
class GLMAdapter(BaseAdapter):
"""智谱 GLM 平台适配器"""
_client = None
@property
def provider_name(self) -> str:
return "glm"
def is_available(self) -> bool:
"""检查 API Key 是否配置"""
return bool(os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY"))
def _get_client(self):
"""获取 GLM 客户端(懒加载)"""
if self._client is None:
from zhipuai import ZhipuAI
api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")
self._client = ZhipuAI(api_key=api_key)
return self._client
def list_models(self) -> List[ModelInfo]:
return GLM_MODELS
async def chat(self, request: ChatCompletionRequest):
"""
处理 GLM 聊天请求
支持流式/非流式、图像、文档、联网搜索、深度思考
"""
client = self._get_client()
# 构建消息
glm_messages, has_vision, has_files = self._build_messages(request)
actual_model = self._resolve_model(request.model, has_vision, has_files)
# 调试:打印原始请求参数
logger.info(f"[GLM] 原始请求参数:")
logger.info(
f" - request.deep_thinking: {request.deep_thinking} (type: {type(request.deep_thinking)})"
)
logger.info(f" - request.web_search: {request.web_search}")
logger.info(f" - request.deep_search: {request.deep_search}")
logger.info(f" - actual_model: {actual_model}")
logger.info(f" - supports_thinking: {self._supports_thinking(actual_model)}")
# 构建额外参数
extra_kwargs = {}
web_search_mode = get_web_search_mode(request)
if web_search_mode:
extra_kwargs["tools"] = [build_glm_search_tool(web_search_mode)]
extra_kwargs["tool_choice"] = "auto"
# 深度思考正向选择True 时启用False 时禁用)
# 注意:只有特定模型支持深度思考(如 glm-z1-flash
thinking_enabled = request.deep_thinking and self._supports_thinking(
actual_model
)
logger.info(
f"[GLM] 深度思考判断: {request.deep_thinking} and {self._supports_thinking(actual_model)} = {thinking_enabled}"
)
if thinking_enabled:
extra_kwargs["thinking"] = {"type": "enabled"}
logger.info(
f"[GLM] 深度思考已启用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}"
)
else:
extra_kwargs["thinking"] = {"type": "disabled"}
logger.info(
f"[GLM] 深度思考已禁用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}"
)
if extra_kwargs:
logger.info(
f"[GLM] 最终 extra_kwargs: {json.dumps(extra_kwargs, ensure_ascii=False)}"
)
if request.stream:
return self._stream_chat(
client, glm_messages, actual_model, request, extra_kwargs
)
else:
return self._sync_chat(
client, glm_messages, actual_model, request, extra_kwargs
)
def _build_messages(
self, request: ChatCompletionRequest
) -> tuple[List[Dict], bool, bool]:
"""
构建 GLM 格式的消息
返回:(消息列表, 是否包含图片, 是否包含文件附件)
"""
messages = []
has_vision = False
has_files = bool(request.files) # 检查是否有文件附件
for msg in request.messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if isinstance(content, str):
# 纯文本
if content.strip():
messages.append({"role": role, "content": content})
elif isinstance(content, list):
# 多模态内容
glm_content = []
for item in content:
if isinstance(item, dict):
item_type = item.get("type", "")
if item_type == "text":
text = item.get("text", "")
if text:
glm_content.append({"type": "text", "text": text})
elif item_type == "image_url":
img_url = self._extract_image_url(item)
if img_url:
glm_content.append(
{"type": "image_url", "image_url": {"url": img_url}}
)
has_vision = True
if glm_content:
messages.append({"role": role, "content": glm_content})
# 处理文件附件
if request.files:
file_content = self._build_file_content(request.files)
if messages and messages[-1]["role"] == "user":
# 追加到最后一个用户消息
if isinstance(messages[-1]["content"], list):
messages[-1]["content"].extend(file_content)
else:
messages[-1]["content"] = [
{"type": "text", "text": messages[-1]["content"]},
*file_content,
]
else:
messages.append({"role": "user", "content": file_content})
return messages, has_vision, has_files
def _extract_image_url(self, item: Dict) -> Optional[str]:
"""提取图片 URL"""
img_val = item.get("image_url", "")
if isinstance(img_val, str):
return img_val
elif isinstance(img_val, dict):
return img_val.get("url", "")
return None
def _build_file_content(self, files: List[str]) -> List[Dict]:
"""构建文件内容"""
content = []
for file_url in files:
if file_url.startswith(("http://", "https://")):
content.append({"type": "file_url", "file_url": {"url": file_url}})
return content
def _resolve_model(
self, model: str, has_vision: bool, has_files: bool = False
) -> str:
"""解析实际使用的模型"""
model_lower = model.lower()
# 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态)
if (has_vision or has_files) and model_lower not in VISION_MODELS:
logger.info(
f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v"
)
return "glm-4.6v"
return model
def _supports_thinking(self, model: str) -> bool:
"""检查模型是否支持深度思考"""
return model.lower() in THINKING_MODELS
def _stream_chat(
self, client, messages, model, request, extra_kwargs
) -> StreamingResponse:
"""流式聊天"""
logger.info(f"[GLM] 开始流式响应...")
# 提取深度思考配置
thinking_config = extra_kwargs.get("thinking")
tools_config = extra_kwargs.get("tools")
def generator():
from utils.helpers import generate_unique_id, get_current_timestamp
full_content = ""
# 构建 API 调用参数
api_params = {
"model": model,
"messages": messages,
"stream": True,
"temperature": request.temperature,
"max_tokens": request.max_tokens,
}
# 深度思考:使用 extra_body 传递
if thinking_config:
api_params["extra_body"] = {"thinking": thinking_config}
# 联网搜索:使用 tools 参数
if tools_config:
api_params["tools"] = tools_config
api_params["tool_choice"] = "auto"
# 打印请求参数
logger.info(f"[GLM] API 调用参数:")
logger.info(f" - model: {model}")
logger.info(f" - stream: True")
logger.info(f" - temperature: {request.temperature}")
logger.info(f" - max_tokens: {request.max_tokens}")
if thinking_config:
logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
if tools_config:
logger.info(
f" - tools: {json.dumps(tools_config, ensure_ascii=False)}"
)
logger.info(f" - tool_choice: auto")
logger.info(
f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
)
chunk_count = 0
resp = client.chat.completions.create(**api_params)
for chunk in resp:
chunk_count += 1
# 检查 delta 是否存在
if not hasattr(chunk.choices[0], "delta"):
continue
delta = chunk.choices[0].delta
# 处理深度思考内容reasoning_content
reasoning_content = getattr(delta, "reasoning_content", None)
if reasoning_content:
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"reasoning_content": reasoning_content},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
continue
# 处理普通内容
content = getattr(delta, "content", None)
if content:
full_content += content
data = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"delta": {"content": content},
"finish_reason": None,
}
],
}
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
# 结束标记
finish = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion.chunk",
"created": get_current_timestamp(),
"model": model,
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
# 打印流式响应结果
logger.info(f"[GLM] 流式响应完成:")
logger.info(f" - chunks: {chunk_count}")
logger.info(f" - content_length: {len(full_content)} 字符")
logger.info(
f" - content_preview: {full_content[:200]}..."
if len(full_content) > 200
else f" - content: {full_content}"
)
return StreamingResponse(generator(), media_type="text/event-stream")
def _sync_chat(
self, client, messages, model, request, extra_kwargs
) -> JSONResponse:
"""非流式聊天"""
from utils.helpers import generate_unique_id, get_current_timestamp
# 提取深度思考配置
thinking_config = extra_kwargs.get("thinking")
tools_config = extra_kwargs.get("tools")
# 构建 API 调用参数
api_params = {
"model": model,
"messages": messages,
"stream": False,
"temperature": request.temperature,
"max_tokens": request.max_tokens,
}
# 深度思考:使用 extra_body 传递
if thinking_config:
api_params["extra_body"] = {"thinking": thinking_config}
# 联网搜索:使用 tools 参数
if tools_config:
api_params["tools"] = tools_config
api_params["tool_choice"] = "auto"
# 打印请求参数
logger.info(f"[GLM] API 调用参数:")
logger.info(f" - model: {model}")
logger.info(f" - stream: {request.stream}")
logger.info(f" - temperature: {request.temperature}")
logger.info(f" - max_tokens: {request.max_tokens}")
if thinking_config:
logger.info(f" - extra_body: {{'thinking': {thinking_config}}}")
if tools_config:
logger.info(f" - tools: {json.dumps(tools_config, ensure_ascii=False)}")
logger.info(f" - tool_choice: auto")
logger.info(
f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}"
)
resp = client.chat.completions.create(**api_params)
message = resp.choices[0].message
content = message.content or ""
# 构建响应
response_message = {"role": "assistant", "content": content}
# 处理深度思考内容
reasoning_content = getattr(message, "reasoning_content", None)
if reasoning_content:
response_message["reasoning_content"] = reasoning_content
response = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": model,
"choices": [
{
"index": 0,
"message": response_message,
"finish_reason": "stop",
}
],
}
if hasattr(resp, "usage") and resp.usage:
response["usage"] = {
"prompt_tokens": resp.usage.prompt_tokens,
"completion_tokens": resp.usage.completion_tokens,
"total_tokens": resp.usage.total_tokens,
}
# 打印响应结果
logger.info(f"[GLM] 响应结果:")
logger.info(f" - content_length: {len(content)} 字符")
logger.info(
f" - content_preview: {content[:200]}..."
if len(content) > 200
else f" - content: {content}"
)
if hasattr(resp, "usage") and resp.usage:
logger.info(f" - usage: {response['usage']}")
return JSONResponse(content=response)