145 lines
4.6 KiB
Python
145 lines
4.6 KiB
Python
"""
|
||
GLM-4.6V 平台路由处理器(zai-sdk)
|
||
所有智谱 GLM 相关逻辑均集中在此文件,main.py 无感知任何平台细节。
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
from fastapi import HTTPException
|
||
from fastapi.responses import JSONResponse, StreamingResponse
|
||
|
||
from utils.helpers import generate_unique_id, get_current_timestamp
|
||
from utils.logger import log_info
|
||
|
||
|
||
def init():
|
||
"""
|
||
初始化 GLM 后端:验证 API Key 是否配置。
|
||
由 main.py 在启动时调用(若 LLM_BACKEND=glm)。
|
||
"""
|
||
api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY")
|
||
if not api_key:
|
||
raise ValueError(
|
||
"GLM 模式需要设置环境变量 ZHIPU_API_KEY(在 https://open.bigmodel.cn 申请)"
|
||
)
|
||
log_info(f"[GLM] 初始化完成,ZHIPU_API_KEY 已配置")
|
||
|
||
|
||
async def chat_handler(body: dict):
|
||
"""
|
||
GLM 聊天处理器(对外接口与百炼 chat_endpoint_handler 完全兼容)。
|
||
流式/非流式自动适配,支持图像、文档附件、联网搜索、深度思考。
|
||
"""
|
||
from utils.glm_adapter import glm_chat_sync, glm_stream_generator
|
||
|
||
if not isinstance(body, dict):
|
||
raise HTTPException(status_code=400, detail="请求体必须是 JSON 对象")
|
||
|
||
messages = body.get("messages", [])
|
||
model = body.get("model", "glm-4.6v")
|
||
stream = body.get("stream", True)
|
||
temperature = body.get("temperature", 0.7)
|
||
max_tokens = body.get("max_tokens", body.get("maxTokens", 2000))
|
||
web_search = body.get("webSearch", False) or body.get("deepSearch", False)
|
||
deep_think = body.get("deepThinking", False)
|
||
files = body.get("files", [])
|
||
|
||
# 兼容前端简化格式(非 messages 结构)
|
||
if not messages:
|
||
msg_text = body.get("message", "")
|
||
sys_prompt = body.get("systemPrompt", "你是一个智能助手。")
|
||
user_content = (
|
||
msg_text
|
||
if isinstance(msg_text, list)
|
||
else [{"type": "text", "text": msg_text}]
|
||
)
|
||
messages = [
|
||
{"role": "system", "content": sys_prompt},
|
||
{"role": "user", "content": user_content},
|
||
]
|
||
|
||
log_info(
|
||
f"[GLM] model={model} stream={stream} web_search={web_search} "
|
||
f"thinking={deep_think} files={len(files)} msgs={len(messages)}"
|
||
)
|
||
|
||
if stream:
|
||
return StreamingResponse(
|
||
glm_stream_generator(
|
||
messages=messages,
|
||
model=model,
|
||
temperature=temperature,
|
||
max_tokens=max_tokens,
|
||
files=files or None,
|
||
web_search=web_search,
|
||
deep_thinking=deep_think,
|
||
),
|
||
media_type="text/event-stream",
|
||
)
|
||
|
||
result = glm_chat_sync(
|
||
messages=messages,
|
||
model=model,
|
||
temperature=temperature,
|
||
max_tokens=max_tokens,
|
||
files=files or None,
|
||
web_search=web_search,
|
||
deep_thinking=deep_think,
|
||
)
|
||
resp = {
|
||
"id": f"chatcmpl-{generate_unique_id()}",
|
||
"object": "chat.completion",
|
||
"created": get_current_timestamp(),
|
||
"model": result["model"],
|
||
"choices": [
|
||
{
|
||
"index": 0,
|
||
"message": {"role": "assistant", "content": result["content"]},
|
||
"finish_reason": "stop",
|
||
}
|
||
],
|
||
}
|
||
if result.get("usage"):
|
||
resp["usage"] = result["usage"]
|
||
return JSONResponse(content=resp)
|
||
|
||
|
||
def models_handler():
|
||
"""返回 GLM 可用模型列表"""
|
||
return {
|
||
"data": [
|
||
{
|
||
"id": "glm-4.6v",
|
||
"name": "GLM-4.6V(推荐)",
|
||
"description": "最新旗舰模型,支持文本/图像/文档/深度思考",
|
||
"maxTokens": 128000,
|
||
"provider": "ZhipuAI",
|
||
},
|
||
{
|
||
"id": "glm-4-flash",
|
||
"name": "GLM-4 Flash",
|
||
"description": "高性价比文本模型(0.2元/千token)",
|
||
"maxTokens": 128000,
|
||
"provider": "ZhipuAI",
|
||
},
|
||
{
|
||
"id": "glm-4v-plus-0111",
|
||
"name": "GLM-4V Plus",
|
||
"description": "图像 + PDF/DOCX 原生多模态",
|
||
"maxTokens": 128000,
|
||
"provider": "ZhipuAI",
|
||
},
|
||
{
|
||
"id": "glm-z1-flash",
|
||
"name": "GLM-Z1 Flash",
|
||
"description": "深度思考推理模型",
|
||
"maxTokens": 128000,
|
||
"provider": "ZhipuAI",
|
||
},
|
||
],
|
||
"object": "list",
|
||
}
|