diff --git a/server/utils/glm_adapter.py b/server/utils/glm_adapter.py index 22612bb..529cfc5 100644 --- a/server/utils/glm_adapter.py +++ b/server/utils/glm_adapter.py @@ -196,7 +196,6 @@ def build_glm_messages(messages: list, files: list | None = None) -> tuple[list, _SENTINEL = object() -# ── 流式 SSE 生成器 ─────────────────────────────────────────────────── async def glm_stream_generator( messages: list, model: str, @@ -255,6 +254,9 @@ async def glm_stream_generator( loop = asyncio.get_running_loop() + full_reasoning = "" # 累计思考内容(用于判断是否首次) + full_content = "" # 累计正式回答(用于判断是否首次) + while True: item = await loop.run_in_executor(None, chunk_queue.get) @@ -268,17 +270,40 @@ async def glm_stream_generator( try: delta = item.choices[0].delta - text = getattr(delta, "content", "") or "" - if not text: + reasoning = getattr(delta, "reasoning_content", "") or "" + text = getattr(delta, "content", "") or "" + + delta_str = "" + + # ── 思考过程(reasoning_content)──────────────────────── + if reasoning: + if not full_reasoning: + # 首个思考片段:加 Markdown 引用块标题 + delta_str += "> **💭 深度思考过程:**\n> \n> " + full_reasoning += reasoning + # 引用块内换行需在每行前加 `> ` + delta_str += reasoning.replace("\n", "\n> ") + + # ── 正式回答(content)────────────────────────────────── + if text: + if not full_content and full_reasoning: + # 思考结束后首次出现正式回答:加分隔线 + delta_str += "\n\n---\n\n" + full_content += text + delta_str += text + + if not delta_str: continue + data = { "id": f"chatcmpl-{generate_unique_id()}", "object": "chat.completion.chunk", "created": get_current_timestamp(), "model": actual_model, - "choices": [{"index": 0, "delta": {"content": text}, "finish_reason": None}], + "choices": [{"index": 0, "delta": {"content": delta_str}, "finish_reason": None}], } yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n" + except Exception as e: print(f"[GLM] chunk 解析异常:{e}") @@ -293,6 +318,7 @@ async def glm_stream_generator( yield "data: [DONE]\n\n" + # ── 非流式调用 ──────────────────────────────────────────────────────── def glm_chat_sync( messages: list,