""" 智谱 GLM 适配器 基于 utils/glm_adapter.py 重构 使用zai-sdk。因为已经完成这一部分的整套逻辑,如果更换OpenAI-SDK会花很多时间调试。 """ import json import os from typing import Dict, List, Optional from fastapi.responses import JSONResponse, StreamingResponse from .base import BaseAdapter, ChatCompletionRequest, ModelInfo from .plugins import get_web_search_mode, build_glm_search_tool from core import get_logger logger = get_logger() # GLM 模型配置 GLM_MODELS = [ ModelInfo( id="glm-5", name="GLM-5", description="Coding与长程Agent能力SOTA", max_tokens=128000, provider="ZhipuAI", supports_thinking=True, supports_web_search=True, supports_vision=False, supports_files=False, ), ModelInfo( id="glm-4.6v", name="GLM-4.6V(推荐)", description="最新旗舰模型,支持文本/图像/文档/深度思考", max_tokens=128000, provider="ZhipuAI", supports_thinking=True, supports_web_search=False, supports_vision=True, supports_files=True, ), ModelInfo( id="glm-4-flash", name="GLM-4 Flash", description="高性价比文本模型", max_tokens=128000, provider="ZhipuAI", supports_thinking=False, supports_web_search=True, supports_vision=False, supports_files=False, ), ModelInfo( id="glm-4v-plus-0111", name="GLM-4V Plus", description="图像 + PDF/DOCX 原生多模态", max_tokens=128000, provider="ZhipuAI", supports_thinking=False, supports_web_search=False, supports_vision=True, supports_files=True, ), ModelInfo( id="glm-z1-flash", name="GLM-Z1 Flash", description="深度思考推理模型,默认开启深度思考", max_tokens=128000, provider="ZhipuAI", supports_thinking=True, supports_web_search=True, supports_vision=False, supports_files=False, ), ] # 视觉模型列表(用于自动切换) VISION_MODELS = {"glm-4v", "glm-4v-plus", "glm-4v-plus-0111", "glm-4.6v"} # 支持深度思考的模型 THINKING_MODELS = {"glm-z1-flash", "glm-z1-air", "glm-4.6v", "glm-4.6"} class GLMAdapter(BaseAdapter): """智谱 GLM 平台适配器""" _client = None @property def provider_name(self) -> str: return "glm" def is_available(self) -> bool: """检查 API Key 是否配置""" return bool(os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")) def _get_client(self): """获取 GLM 客户端(懒加载)""" if self._client is None: from zhipuai import ZhipuAI api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY") self._client = ZhipuAI(api_key=api_key) return self._client def list_models(self) -> List[ModelInfo]: return GLM_MODELS async def chat(self, request: ChatCompletionRequest): """ 处理 GLM 聊天请求 支持流式/非流式、图像、文档、联网搜索、深度思考 """ client = self._get_client() # 构建消息 glm_messages, has_vision, has_files = self._build_messages(request) actual_model = self._resolve_model(request.model, has_vision, has_files) # 调试:打印原始请求参数 logger.info(f"[GLM] 原始请求参数:") logger.info( f" - request.deep_thinking: {request.deep_thinking} (type: {type(request.deep_thinking)})" ) logger.info(f" - request.web_search: {request.web_search}") logger.info(f" - request.deep_search: {request.deep_search}") logger.info(f" - actual_model: {actual_model}") logger.info(f" - supports_thinking: {self._supports_thinking(actual_model)}") # 构建额外参数 extra_kwargs = {} web_search_mode = get_web_search_mode(request) if web_search_mode: extra_kwargs["tools"] = [build_glm_search_tool(web_search_mode)] extra_kwargs["tool_choice"] = "auto" # 深度思考:正向选择(True 时启用,False 时禁用) # 注意:只有特定模型支持深度思考(如 glm-z1-flash) thinking_enabled = request.deep_thinking and self._supports_thinking( actual_model ) logger.info( f"[GLM] 深度思考判断: {request.deep_thinking} and {self._supports_thinking(actual_model)} = {thinking_enabled}" ) if thinking_enabled: extra_kwargs["thinking"] = {"type": "enabled"} logger.info( f"[GLM] 深度思考已启用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}" ) else: extra_kwargs["thinking"] = {"type": "disabled"} logger.info( f"[GLM] 深度思考已禁用: extra_kwargs['thinking'] = {extra_kwargs['thinking']}" ) if extra_kwargs: logger.info( f"[GLM] 最终 extra_kwargs: {json.dumps(extra_kwargs, ensure_ascii=False)}" ) if request.stream: return self._stream_chat( client, glm_messages, actual_model, request, extra_kwargs ) else: return self._sync_chat( client, glm_messages, actual_model, request, extra_kwargs ) def _build_messages( self, request: ChatCompletionRequest ) -> tuple[List[Dict], bool, bool]: """ 构建 GLM 格式的消息 返回:(消息列表, 是否包含图片, 是否包含文件附件) """ messages = [] has_vision = False has_files = bool(request.files) # 检查是否有文件附件 for msg in request.messages: role = msg.get("role", "user") content = msg.get("content", "") if isinstance(content, str): # 纯文本 if content.strip(): messages.append({"role": role, "content": content}) elif isinstance(content, list): # 多模态内容 glm_content = [] for item in content: if isinstance(item, dict): item_type = item.get("type", "") if item_type == "text": text = item.get("text", "") if text: glm_content.append({"type": "text", "text": text}) elif item_type == "image_url": img_url = self._extract_image_url(item) if img_url: glm_content.append( {"type": "image_url", "image_url": {"url": img_url}} ) has_vision = True if glm_content: messages.append({"role": role, "content": glm_content}) # 处理文件附件 if request.files: file_content = self._build_file_content(request.files) if messages and messages[-1]["role"] == "user": # 追加到最后一个用户消息 if isinstance(messages[-1]["content"], list): messages[-1]["content"].extend(file_content) else: messages[-1]["content"] = [ {"type": "text", "text": messages[-1]["content"]}, *file_content, ] else: messages.append({"role": "user", "content": file_content}) return messages, has_vision, has_files def _extract_image_url(self, item: Dict) -> Optional[str]: """提取图片 URL""" img_val = item.get("image_url", "") if isinstance(img_val, str): return img_val elif isinstance(img_val, dict): return img_val.get("url", "") return None def _build_file_content(self, files: List[str]) -> List[Dict]: """构建文件内容""" content = [] for file_url in files: if file_url.startswith(("http://", "https://")): content.append({"type": "file_url", "file_url": {"url": file_url}}) return content def _resolve_model( self, model: str, has_vision: bool, has_files: bool = False ) -> str: """解析实际使用的模型""" model_lower = model.lower() # 如果有图片或文件附件,强制使用 glm-4.6v(支持多模态) if (has_vision or has_files) and model_lower not in VISION_MODELS: logger.info( f"[GLM] 检测到图片或文件附件,强制切换模型: {model} -> glm-4.6v" ) return "glm-4.6v" return model def _supports_thinking(self, model: str) -> bool: """检查模型是否支持深度思考""" return model.lower() in THINKING_MODELS def _stream_chat( self, client, messages, model, request, extra_kwargs ) -> StreamingResponse: """流式聊天""" logger.info(f"[GLM] 开始流式响应...") # 提取深度思考配置 thinking_config = extra_kwargs.get("thinking") tools_config = extra_kwargs.get("tools") def generator(): from utils.helpers import generate_unique_id, get_current_timestamp full_content = "" # 构建 API 调用参数 api_params = { "model": model, "messages": messages, "stream": True, "temperature": request.temperature, "max_tokens": request.max_tokens, } # 深度思考:使用 extra_body 传递 if thinking_config: api_params["extra_body"] = {"thinking": thinking_config} # 联网搜索:使用 tools 参数 if tools_config: api_params["tools"] = tools_config api_params["tool_choice"] = "auto" # 打印请求参数 logger.info(f"[GLM] API 调用参数:") logger.info(f" - model: {model}") logger.info(f" - stream: True") logger.info(f" - temperature: {request.temperature}") logger.info(f" - max_tokens: {request.max_tokens}") if thinking_config: logger.info(f" - extra_body: {{'thinking': {thinking_config}}}") if tools_config: logger.info( f" - tools: {json.dumps(tools_config, ensure_ascii=False)}" ) logger.info(f" - tool_choice: auto") logger.info( f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}" ) chunk_count = 0 resp = client.chat.completions.create(**api_params) for chunk in resp: chunk_count += 1 # 检查 delta 是否存在 if not hasattr(chunk.choices[0], "delta"): continue delta = chunk.choices[0].delta # 处理深度思考内容(reasoning_content) reasoning_content = getattr(delta, "reasoning_content", None) if reasoning_content: data = { "id": f"chatcmpl-{generate_unique_id()}", "object": "chat.completion.chunk", "created": get_current_timestamp(), "model": model, "choices": [ { "index": 0, "delta": {"reasoning_content": reasoning_content}, "finish_reason": None, } ], } yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" continue # 处理普通内容 content = getattr(delta, "content", None) if content: full_content += content data = { "id": f"chatcmpl-{generate_unique_id()}", "object": "chat.completion.chunk", "created": get_current_timestamp(), "model": model, "choices": [ { "index": 0, "delta": {"content": content}, "finish_reason": None, } ], } yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" # 结束标记 finish = { "id": f"chatcmpl-{generate_unique_id()}", "object": "chat.completion.chunk", "created": get_current_timestamp(), "model": model, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], } yield f"data: {json.dumps(finish, ensure_ascii=False)}\n\n" yield "data: [DONE]\n\n" # 打印流式响应结果 logger.info(f"[GLM] 流式响应完成:") logger.info(f" - chunks: {chunk_count}") logger.info(f" - content_length: {len(full_content)} 字符") logger.info( f" - content_preview: {full_content[:200]}..." if len(full_content) > 200 else f" - content: {full_content}" ) return StreamingResponse(generator(), media_type="text/event-stream") def _sync_chat( self, client, messages, model, request, extra_kwargs ) -> JSONResponse: """非流式聊天""" from utils.helpers import generate_unique_id, get_current_timestamp # 提取深度思考配置 thinking_config = extra_kwargs.get("thinking") tools_config = extra_kwargs.get("tools") # 构建 API 调用参数 api_params = { "model": model, "messages": messages, "stream": False, "temperature": request.temperature, "max_tokens": request.max_tokens, } # 深度思考:使用 extra_body 传递 if thinking_config: api_params["extra_body"] = {"thinking": thinking_config} # 联网搜索:使用 tools 参数 if tools_config: api_params["tools"] = tools_config api_params["tool_choice"] = "auto" # 打印请求参数 logger.info(f"[GLM] API 调用参数:") logger.info(f" - model: {model}") logger.info(f" - stream: {request.stream}") logger.info(f" - temperature: {request.temperature}") logger.info(f" - max_tokens: {request.max_tokens}") if thinking_config: logger.info(f" - extra_body: {{'thinking': {thinking_config}}}") if tools_config: logger.info(f" - tools: {json.dumps(tools_config, ensure_ascii=False)}") logger.info(f" - tool_choice: auto") logger.info( f" - messages: {json.dumps(messages, ensure_ascii=False, indent=2)}" ) resp = client.chat.completions.create(**api_params) message = resp.choices[0].message content = message.content or "" # 构建响应 response_message = {"role": "assistant", "content": content} # 处理深度思考内容 reasoning_content = getattr(message, "reasoning_content", None) if reasoning_content: response_message["reasoning_content"] = reasoning_content response = { "id": f"chatcmpl-{generate_unique_id()}", "object": "chat.completion", "created": get_current_timestamp(), "model": model, "choices": [ { "index": 0, "message": response_message, "finish_reason": "stop", } ], } if hasattr(resp, "usage") and resp.usage: response["usage"] = { "prompt_tokens": resp.usage.prompt_tokens, "completion_tokens": resp.usage.completion_tokens, "total_tokens": resp.usage.total_tokens, } # 打印响应结果 logger.info(f"[GLM] 响应结果:") logger.info(f" - content_length: {len(content)} 字符") logger.info( f" - content_preview: {content[:200]}..." if len(content) > 200 else f" - content: {content}" ) if hasattr(resp, "usage") and resp.usage: logger.info(f" - usage: {response['usage']}") return JSONResponse(content=response)