112 lines
3.7 KiB
Python
112 lines
3.7 KiB
Python
"""Prompt and formatting helpers for per-thread memory."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
from typing import Any
|
|
|
|
from deerflow.agents.memory.prompt import format_conversation_for_update, format_memory_for_injection
|
|
|
|
THREAD_MEMORY_UPDATE_PROMPT = """You are a user profile memory system.
|
|
|
|
Current per-thread memory:
|
|
<existing_memory>
|
|
{existing_memory}
|
|
</existing_memory>
|
|
|
|
Conversation:
|
|
<conversation>
|
|
{conversation}
|
|
</conversation>
|
|
|
|
Preferred memory language: {preferred_language}
|
|
|
|
Return JSON only with this schema:
|
|
{{
|
|
"user": {{
|
|
"workContext": {{"summary": string, "updatedAt": string}},
|
|
"personalContext": {{"summary": string, "updatedAt": string}},
|
|
"topOfMind": {{"summary": string, "updatedAt": string}}
|
|
}},
|
|
"history": {{
|
|
"recentMonths": {{"summary": string, "updatedAt": string}},
|
|
"earlierContext": {{"summary": string, "updatedAt": string}},
|
|
"longTermBackground": {{"summary": string, "updatedAt": string}}
|
|
}},
|
|
"facts": [
|
|
{{
|
|
"content": string,
|
|
"category": "tech_stack"|"preference"|"personal"|"context"|"goal",
|
|
"confidence": number
|
|
}}
|
|
]
|
|
}}
|
|
|
|
Rules:
|
|
- Keep only stable and useful user profile facts.
|
|
- Do not store sensitive personal data (phone/email/address/password/token/id/bank).
|
|
- Deduplicate and keep high-confidence facts.
|
|
- Write all human-readable text fields (`summary`, `content`, and similar prose) in the preferred memory language.
|
|
- Return valid JSON only.
|
|
"""
|
|
|
|
|
|
def create_empty_thread_memory() -> dict[str, Any]:
|
|
return {
|
|
"user": {
|
|
"workContext": {"summary": "", "updatedAt": ""},
|
|
"personalContext": {"summary": "", "updatedAt": ""},
|
|
"topOfMind": {"summary": "", "updatedAt": ""},
|
|
},
|
|
"history": {
|
|
"recentMonths": {"summary": "", "updatedAt": ""},
|
|
"earlierContext": {"summary": "", "updatedAt": ""},
|
|
"longTermBackground": {"summary": "", "updatedAt": ""},
|
|
},
|
|
"facts": [],
|
|
}
|
|
|
|
|
|
def _infer_preferred_memory_language(messages: list[Any]) -> str:
|
|
conversation = format_conversation_for_update(messages)
|
|
if not conversation.strip():
|
|
return "same as the user's latest message"
|
|
|
|
# If user explicitly provides locale hints, prefer them.
|
|
locale_match = re.search(r"\b([a-z]{2}-[A-Z]{2})\b", conversation)
|
|
if locale_match:
|
|
return locale_match.group(1)
|
|
|
|
# Script-based heuristic (dynamic, not hard-coded to two languages).
|
|
script_patterns = {
|
|
"zh-Hans": r"[\u4e00-\u9fff]",
|
|
"ja-JP": r"[\u3040-\u30ff]",
|
|
"ko-KR": r"[\uac00-\ud7af]",
|
|
"ru-RU": r"[\u0400-\u04FF]",
|
|
"ar": r"[\u0600-\u06FF]",
|
|
"hi-IN": r"[\u0900-\u097F]",
|
|
"th-TH": r"[\u0E00-\u0E7F]",
|
|
"he-IL": r"[\u0590-\u05FF]",
|
|
"el-GR": r"[\u0370-\u03FF]",
|
|
}
|
|
counts = {lang: len(re.findall(pattern, conversation)) for lang, pattern in script_patterns.items()}
|
|
best_lang, best_count = max(counts.items(), key=lambda item: item[1])
|
|
if best_count > 0:
|
|
return best_lang
|
|
|
|
# Latin-script fallback: ask model to keep same language as the user's latest message.
|
|
return "same as the user's latest message"
|
|
|
|
|
|
def format_thread_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str:
|
|
return format_memory_for_injection(memory_data, max_tokens=max_tokens)
|
|
|
|
|
|
def build_thread_memory_prompt(existing_memory: dict[str, Any], messages: list[Any]) -> str:
|
|
return THREAD_MEMORY_UPDATE_PROMPT.format(
|
|
existing_memory=json.dumps(existing_memory, ensure_ascii=False, indent=2),
|
|
conversation=format_conversation_for_update(messages),
|
|
preferred_language=_infer_preferred_memory_language(messages),
|
|
)
|