From 03aa9dd8f805f545959e2b777486201b965e7438 Mon Sep 17 00:00:00 2001 From: MT-Mint <798521692@qq.com> Date: Fri, 8 May 2026 11:26:08 +0800 Subject: [PATCH] =?UTF-8?q?feat:=E5=86=99=E5=85=A5=E8=B7=9F=E7=94=A8?= =?UTF-8?q?=E6=88=B7=E7=9B=B8=E5=90=8C=E7=9A=84=E8=AF=AD=E8=A8=80=E7=9A=84?= =?UTF-8?q?=E8=AE=B0=E5=BF=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../deerflow/agents/memory/thread_prompt.py | 36 +++++++++++++++++++ backend/tests/test_thread_memory_prompt.py | 17 +++++++++ 2 files changed, 53 insertions(+) diff --git a/backend/packages/harness/deerflow/agents/memory/thread_prompt.py b/backend/packages/harness/deerflow/agents/memory/thread_prompt.py index c005e5a9..f5631405 100644 --- a/backend/packages/harness/deerflow/agents/memory/thread_prompt.py +++ b/backend/packages/harness/deerflow/agents/memory/thread_prompt.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +import re from typing import Any from deerflow.agents.memory.prompt import _coerce_confidence, _count_tokens, format_conversation_for_update @@ -19,6 +20,8 @@ Conversation: {conversation} +Preferred memory language: {preferred_language} + Return JSON only with this schema: {{ "user": {{ @@ -44,6 +47,7 @@ Rules: - Keep only stable and useful user profile facts. - Do not store sensitive personal data (phone/email/address/password/token/id/bank). - Deduplicate and keep high-confidence facts. +- Write all human-readable text fields (`summary`, `content`, and similar prose) in the preferred memory language. - Return valid JSON only. """ @@ -64,6 +68,37 @@ def create_empty_thread_memory() -> dict[str, Any]: } +def _infer_preferred_memory_language(messages: list[Any]) -> str: + conversation = format_conversation_for_update(messages) + if not conversation.strip(): + return "same as the user's latest message" + + # If user explicitly provides locale hints, prefer them. + locale_match = re.search(r"\b([a-z]{2}-[A-Z]{2})\b", conversation) + if locale_match: + return locale_match.group(1) + + # Script-based heuristic (dynamic, not hard-coded to two languages). + script_patterns = { + "zh-Hans": r"[\u4e00-\u9fff]", + "ja-JP": r"[\u3040-\u30ff]", + "ko-KR": r"[\uac00-\ud7af]", + "ru-RU": r"[\u0400-\u04FF]", + "ar": r"[\u0600-\u06FF]", + "hi-IN": r"[\u0900-\u097F]", + "th-TH": r"[\u0E00-\u0E7F]", + "he-IL": r"[\u0590-\u05FF]", + "el-GR": r"[\u0370-\u03FF]", + } + counts = {lang: len(re.findall(pattern, conversation)) for lang, pattern in script_patterns.items()} + best_lang, best_count = max(counts.items(), key=lambda item: item[1]) + if best_count > 0: + return best_lang + + # Latin-script fallback: ask model to keep same language as the user's latest message. + return "same as the user's latest message" + + def format_thread_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str: if not memory_data: return "" @@ -130,4 +165,5 @@ def build_thread_memory_prompt(existing_memory: dict[str, Any], messages: list[A return THREAD_MEMORY_UPDATE_PROMPT.format( existing_memory=json.dumps(existing_memory, ensure_ascii=False, indent=2), conversation=format_conversation_for_update(messages), + preferred_language=_infer_preferred_memory_language(messages), ) diff --git a/backend/tests/test_thread_memory_prompt.py b/backend/tests/test_thread_memory_prompt.py index 34db19ab..4260eebe 100644 --- a/backend/tests/test_thread_memory_prompt.py +++ b/backend/tests/test_thread_memory_prompt.py @@ -34,3 +34,20 @@ def test_build_thread_memory_prompt_does_not_raise_format_key_error(): ) assert "Current per-thread memory" in prompt assert '"user"' in prompt + assert "Preferred memory language: same as the user's latest message" in prompt + + +def test_build_thread_memory_prompt_prefers_chinese_for_chinese_conversation(): + prompt = build_thread_memory_prompt( + {"user": {}, "history": {}, "facts": []}, + [HumanMessage(content="我叫小明,我更喜欢中文交流。")], + ) + assert "Preferred memory language: zh-Hans" in prompt + + +def test_build_thread_memory_prompt_prefers_japanese_for_japanese_conversation(): + prompt = build_thread_memory_prompt( + {"user": {}, "history": {}, "facts": []}, + [HumanMessage(content="私は日本語で会話したいです。")], + ) + assert "Preferred memory language: ja-JP" in prompt