feat:写入跟用户相同的语言的记忆
This commit is contained in:
parent
31daed1887
commit
03aa9dd8f8
@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from deerflow.agents.memory.prompt import _coerce_confidence, _count_tokens, format_conversation_for_update
|
||||
@ -19,6 +20,8 @@ Conversation:
|
||||
{conversation}
|
||||
</conversation>
|
||||
|
||||
Preferred memory language: {preferred_language}
|
||||
|
||||
Return JSON only with this schema:
|
||||
{{
|
||||
"user": {{
|
||||
@ -44,6 +47,7 @@ Rules:
|
||||
- Keep only stable and useful user profile facts.
|
||||
- Do not store sensitive personal data (phone/email/address/password/token/id/bank).
|
||||
- Deduplicate and keep high-confidence facts.
|
||||
- Write all human-readable text fields (`summary`, `content`, and similar prose) in the preferred memory language.
|
||||
- Return valid JSON only.
|
||||
"""
|
||||
|
||||
@ -64,6 +68,37 @@ def create_empty_thread_memory() -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _infer_preferred_memory_language(messages: list[Any]) -> str:
|
||||
conversation = format_conversation_for_update(messages)
|
||||
if not conversation.strip():
|
||||
return "same as the user's latest message"
|
||||
|
||||
# If user explicitly provides locale hints, prefer them.
|
||||
locale_match = re.search(r"\b([a-z]{2}-[A-Z]{2})\b", conversation)
|
||||
if locale_match:
|
||||
return locale_match.group(1)
|
||||
|
||||
# Script-based heuristic (dynamic, not hard-coded to two languages).
|
||||
script_patterns = {
|
||||
"zh-Hans": r"[\u4e00-\u9fff]",
|
||||
"ja-JP": r"[\u3040-\u30ff]",
|
||||
"ko-KR": r"[\uac00-\ud7af]",
|
||||
"ru-RU": r"[\u0400-\u04FF]",
|
||||
"ar": r"[\u0600-\u06FF]",
|
||||
"hi-IN": r"[\u0900-\u097F]",
|
||||
"th-TH": r"[\u0E00-\u0E7F]",
|
||||
"he-IL": r"[\u0590-\u05FF]",
|
||||
"el-GR": r"[\u0370-\u03FF]",
|
||||
}
|
||||
counts = {lang: len(re.findall(pattern, conversation)) for lang, pattern in script_patterns.items()}
|
||||
best_lang, best_count = max(counts.items(), key=lambda item: item[1])
|
||||
if best_count > 0:
|
||||
return best_lang
|
||||
|
||||
# Latin-script fallback: ask model to keep same language as the user's latest message.
|
||||
return "same as the user's latest message"
|
||||
|
||||
|
||||
def format_thread_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str:
|
||||
if not memory_data:
|
||||
return ""
|
||||
@ -130,4 +165,5 @@ def build_thread_memory_prompt(existing_memory: dict[str, Any], messages: list[A
|
||||
return THREAD_MEMORY_UPDATE_PROMPT.format(
|
||||
existing_memory=json.dumps(existing_memory, ensure_ascii=False, indent=2),
|
||||
conversation=format_conversation_for_update(messages),
|
||||
preferred_language=_infer_preferred_memory_language(messages),
|
||||
)
|
||||
|
||||
@ -34,3 +34,20 @@ def test_build_thread_memory_prompt_does_not_raise_format_key_error():
|
||||
)
|
||||
assert "Current per-thread memory" in prompt
|
||||
assert '"user"' in prompt
|
||||
assert "Preferred memory language: same as the user's latest message" in prompt
|
||||
|
||||
|
||||
def test_build_thread_memory_prompt_prefers_chinese_for_chinese_conversation():
|
||||
prompt = build_thread_memory_prompt(
|
||||
{"user": {}, "history": {}, "facts": []},
|
||||
[HumanMessage(content="我叫小明,我更喜欢中文交流。")],
|
||||
)
|
||||
assert "Preferred memory language: zh-Hans" in prompt
|
||||
|
||||
|
||||
def test_build_thread_memory_prompt_prefers_japanese_for_japanese_conversation():
|
||||
prompt = build_thread_memory_prompt(
|
||||
{"user": {}, "history": {}, "facts": []},
|
||||
[HumanMessage(content="私は日本語で会話したいです。")],
|
||||
)
|
||||
assert "Preferred memory language: ja-JP" in prompt
|
||||
|
||||
Loading…
Reference in New Issue
Block a user