ai-chat-ui/server/api/chat_routes_glm.py

151 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
GLM-4.6V 平台路由处理器zai-sdk
所有智谱 GLM 相关逻辑均集中在此文件main.py 无感知任何平台细节。
"""
import json
import os
import sys
from pathlib import Path
from fastapi import HTTPException
from fastapi.responses import JSONResponse, StreamingResponse
from utils.helpers import generate_unique_id, get_current_timestamp
from utils.logger import log_info
def init():
"""
初始化 GLM 后端:验证 API Key 是否配置。
由 main.py 在启动时调用(若 LLM_BACKEND=glm
"""
api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")
if not api_key:
raise ValueError(
"GLM 模式需要设置环境变量 ZHIPU_API_KEY在 https://open.bigmodel.cn 申请)"
)
log_info(f"[GLM] 初始化完成ZHIPU_API_KEY 已配置")
async def chat_handler(body: dict):
"""
GLM 聊天处理器(对外接口与百炼 chat_endpoint_handler 完全兼容)。
流式/非流式自动适配,支持图像、文档附件、联网搜索、深度思考。
"""
from utils.glm_adapter import glm_chat_sync, glm_stream_generator
if not isinstance(body, dict):
raise HTTPException(status_code=400, detail="请求体必须是 JSON 对象")
messages = body.get("messages", [])
model = body.get("model", "glm-4.6v")
stream = body.get("stream", True)
temperature = body.get("temperature", 0.7)
max_tokens = body.get("max_tokens", body.get("maxTokens", 2000))
# 区分搜索模式:深度搜索 > 简单搜索 > 不搜索
if body.get("deepSearch", False):
web_search = "deep"
elif body.get("webSearch", False):
web_search = "simple"
else:
web_search = False
deep_think = body.get("deepThinking", False)
files = body.get("files", [])
# 兼容前端简化格式(非 messages 结构)
if not messages:
msg_text = body.get("message", "")
sys_prompt = body.get("systemPrompt", "你是一个智能助手。")
user_content = (
msg_text
if isinstance(msg_text, list)
else [{"type": "text", "text": msg_text}]
)
messages = [
{"role": "system", "content": sys_prompt},
{"role": "user", "content": user_content},
]
log_info(
f"[GLM] model={model} stream={stream} web_search={web_search} "
f"thinking={deep_think} files={len(files)} msgs={len(messages)}"
)
if stream:
return StreamingResponse(
glm_stream_generator(
messages=messages,
model=model,
temperature=temperature,
max_tokens=max_tokens,
files=files or None,
web_search=web_search,
deep_thinking=deep_think,
),
media_type="text/event-stream",
)
result = glm_chat_sync(
messages=messages,
model=model,
temperature=temperature,
max_tokens=max_tokens,
files=files or None,
web_search=web_search,
deep_thinking=deep_think,
)
resp = {
"id": f"chatcmpl-{generate_unique_id()}",
"object": "chat.completion",
"created": get_current_timestamp(),
"model": result["model"],
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": result["content"]},
"finish_reason": "stop",
}
],
}
if result.get("usage"):
resp["usage"] = result["usage"]
return JSONResponse(content=resp)
def models_handler():
"""返回 GLM 可用模型列表"""
return {
"data": [
{
"id": "glm-4.6v",
"name": "GLM-4.6V(推荐)",
"description": "最新旗舰模型,支持文本/图像/文档/深度思考",
"maxTokens": 128000,
"provider": "ZhipuAI",
},
{
"id": "glm-4-flash",
"name": "GLM-4 Flash",
"description": "高性价比文本模型0.2元/千token",
"maxTokens": 128000,
"provider": "ZhipuAI",
},
{
"id": "glm-4v-plus-0111",
"name": "GLM-4V Plus",
"description": "图像 + PDF/DOCX 原生多模态",
"maxTokens": 128000,
"provider": "ZhipuAI",
},
{
"id": "glm-z1-flash",
"name": "GLM-Z1 Flash",
"description": "深度思考推理模型",
"maxTokens": 128000,
"provider": "ZhipuAI",
},
],
"object": "list",
}