""" GLM-4.6V 平台路由处理器(zai-sdk) 所有智谱 GLM 相关逻辑均集中在此文件,main.py 无感知任何平台细节。 """ import json import os import sys from pathlib import Path from fastapi import HTTPException from fastapi.responses import JSONResponse, StreamingResponse from utils.helpers import generate_unique_id, get_current_timestamp from utils.logger import log_info def init(): """ 初始化 GLM 后端:验证 API Key 是否配置。 由 main.py 在启动时调用(若 LLM_BACKEND=glm)。 """ api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY") if not api_key: raise ValueError( "GLM 模式需要设置环境变量 ZHIPU_API_KEY(在 https://open.bigmodel.cn 申请)" ) log_info(f"[GLM] 初始化完成,ZHIPU_API_KEY 已配置") async def chat_handler(body: dict): """ GLM 聊天处理器(对外接口与百炼 chat_endpoint_handler 完全兼容)。 流式/非流式自动适配,支持图像、文档附件、联网搜索、深度思考。 """ from utils.glm_adapter import glm_chat_sync, glm_stream_generator if not isinstance(body, dict): raise HTTPException(status_code=400, detail="请求体必须是 JSON 对象") messages = body.get("messages", []) model = body.get("model", "glm-4.6v") stream = body.get("stream", True) temperature = body.get("temperature", 0.7) max_tokens = body.get("max_tokens", body.get("maxTokens", 2000)) web_search = body.get("webSearch", False) or body.get("deepSearch", False) deep_think = body.get("deepThinking", False) files = body.get("files", []) # 兼容前端简化格式(非 messages 结构) if not messages: msg_text = body.get("message", "") sys_prompt = body.get("systemPrompt", "你是一个智能助手。") user_content = ( msg_text if isinstance(msg_text, list) else [{"type": "text", "text": msg_text}] ) messages = [ {"role": "system", "content": sys_prompt}, {"role": "user", "content": user_content}, ] log_info( f"[GLM] model={model} stream={stream} web_search={web_search} " f"thinking={deep_think} files={len(files)} msgs={len(messages)}" ) if stream: return StreamingResponse( glm_stream_generator( messages=messages, model=model, temperature=temperature, max_tokens=max_tokens, files=files or None, web_search=web_search, deep_thinking=deep_think, ), media_type="text/event-stream", ) result = glm_chat_sync( messages=messages, model=model, temperature=temperature, max_tokens=max_tokens, files=files or None, web_search=web_search, deep_thinking=deep_think, ) resp = { "id": f"chatcmpl-{generate_unique_id()}", "object": "chat.completion", "created": get_current_timestamp(), "model": result["model"], "choices": [ { "index": 0, "message": {"role": "assistant", "content": result["content"]}, "finish_reason": "stop", } ], } if result.get("usage"): resp["usage"] = result["usage"] return JSONResponse(content=resp) def models_handler(): """返回 GLM 可用模型列表""" return { "data": [ { "id": "glm-4.6v", "name": "GLM-4.6V(推荐)", "description": "最新旗舰模型,支持文本/图像/文档/深度思考", "maxTokens": 128000, "provider": "ZhipuAI", }, { "id": "glm-4-flash", "name": "GLM-4 Flash", "description": "高性价比文本模型(0.2元/千token)", "maxTokens": 128000, "provider": "ZhipuAI", }, { "id": "glm-4v-plus-0111", "name": "GLM-4V Plus", "description": "图像 + PDF/DOCX 原生多模态", "maxTokens": 128000, "provider": "ZhipuAI", }, { "id": "glm-z1-flash", "name": "GLM-Z1 Flash", "description": "深度思考推理模型", "maxTokens": 128000, "provider": "ZhipuAI", }, ], "object": "list", }