diff --git a/server/utils/glm_adapter.py b/server/utils/glm_adapter.py index 529cfc5..cbfa796 100644 --- a/server/utils/glm_adapter.py +++ b/server/utils/glm_adapter.py @@ -34,7 +34,7 @@ def get_client(): from zai import ZhipuAiClient except ImportError: raise ImportError("GLM 模式需要安装 zai-sdk:.venv/bin/pip install zai-sdk") - api_key = os.getenv("ZHIPU_API_KEY") or os.getenv("GLM_API_KEY") + api_key = os.getenv("ZHIPU_API_KEY").strip() or os.getenv("GLM_API_KEY").strip() if not api_key: raise ValueError("GLM 模式需要设置环境变量 ZHIPU_API_KEY") _client = ZhipuAiClient(api_key=api_key) @@ -43,15 +43,15 @@ def get_client(): # ── 模型映射 ────────────────────────────────────────────────────────── -DEFAULT_TEXT_MODEL = "glm-4.6v" # glm-4.6v 文本+视觉统一模型 -DEFAULT_VISION_MODEL = "glm-4.6v" +DEFAULT_TEXT_MODEL = "glm-4.5-Air" # glm-4.6 文本统一模型 +DEFAULT_VISION_MODEL = "glm-4.5-Air" MODEL_MAP = { - "qwen-max": "glm-4.6v", - "qwen-plus": "glm-4.6v", - "qwen-turbo": "glm-4.6v", - "qwen-vl-max": "glm-4.6v", - "qwen-vl-plus": "glm-4.6v", + "qwen-max": "glm-4.5-Air", + "qwen-plus": "glm-4.5-Air", + "qwen-turbo": "glm-4.5-Air", + "qwen-vl-max": "glm-4.5-Air", + "qwen-vl-plus": "glm-4.5-Air", } @@ -195,7 +195,7 @@ def build_glm_messages(messages: list, files: list | None = None) -> tuple[list, # ── 哨兵对象 ───────────────────────────────────────────────────────── _SENTINEL = object() - +# ── 流式调用 ──────────────────────────────────────────────────────── async def glm_stream_generator( messages: list, model: str, @@ -221,11 +221,11 @@ async def glm_stream_generator( extra_kwargs: dict = {} if web_search: extra_kwargs["tools"] = [ - {"type": "web_search", "web_search": {"search_result": True}} + {"type": "web_search", "web_search": {"enable":True,"search_result": True}} ] - if deep_thinking: - extra_kwargs["thinking"] = {"type": "enabled"} - + if not deep_thinking: + # 智普默认开启思考模式,所以要用非门(不知道“非门”描述是否准确。前端选择开启思考模式,这里不做变动。前端选择关闭思考模式,这里关闭。) + extra_kwargs["thinking"] = {"type": "disabled"} print(f"[GLM] 流式请求:model={actual_model} vision={has_vision} " f"web_search={web_search} thinking={deep_thinking}") @@ -334,9 +334,13 @@ def glm_chat_sync( extra_kwargs: dict = {} if web_search: - extra_kwargs["tools"] = [ - {"type": "web_search", "web_search": {"search_result": True}} - ] + extra_kwargs["tools"] = [{ + "type": "web_search", + "web_search": { + "enable": True, + "search_result": True + } +}] if deep_thinking: extra_kwargs["thinking"] = {"type": "enabled"} diff --git a/server/utils/test_glm_search.py b/server/utils/test_glm_search.py new file mode 100644 index 0000000..d6f8729 --- /dev/null +++ b/server/utils/test_glm_search.py @@ -0,0 +1,31 @@ +import os +import sys +import asyncio +from pathlib import Path + +# Add project root to sys.path +root_dir = Path(__file__).parent +sys.path.insert(0, str(root_dir)) + +from utils.glm_adapter import glm_stream_generator, _ensure_venv, glm_chat_sync + +# Set API key from .env if needed +from dotenv import load_dotenv +load_dotenv() + +async def test_stream(): + msgs = [{"role": "user", "content": "今天北京天气怎样?"}] + print("Testing stream...") + async for chunk in glm_stream_generator(msgs, "glm-4.5-air", 0.7, 1024, web_search=True): + print(chunk, end="") + +def test_sync(): + msgs = [{"role": "user", "content": "今天几号?武汉天气怎样?"}] + print("Testing sync...") + res = glm_chat_sync(msgs, "glm-4.5-air", 0.7, 1024, web_search=True) + print(res) + +if __name__ == "__main__": + _ensure_venv() + # test_sync() + asyncio.run(test_stream())