"""Prompt and formatting helpers for per-thread memory.""" from __future__ import annotations import json import re from typing import Any from deerflow.agents.memory.prompt import format_conversation_for_update, format_memory_for_injection THREAD_MEMORY_UPDATE_PROMPT = """You are a user profile memory system. Current per-thread memory: {existing_memory} Conversation: {conversation} Preferred memory language: {preferred_language} Return JSON only with this schema: {{ "user": {{ "workContext": {{"summary": string, "updatedAt": string}}, "personalContext": {{"summary": string, "updatedAt": string}}, "topOfMind": {{"summary": string, "updatedAt": string}} }}, "history": {{ "recentMonths": {{"summary": string, "updatedAt": string}}, "earlierContext": {{"summary": string, "updatedAt": string}}, "longTermBackground": {{"summary": string, "updatedAt": string}} }}, "facts": [ {{ "content": string, "category": "tech_stack"|"preference"|"personal"|"context"|"goal", "confidence": number }} ] }} Rules: - Keep only stable and useful user profile facts. - Do not store sensitive personal data (phone/email/address/password/token/id/bank). - Deduplicate and keep high-confidence facts. - Write all human-readable text fields (`summary`, `content`, and similar prose) in the preferred memory language. - Return valid JSON only. """ def create_empty_thread_memory() -> dict[str, Any]: return { "user": { "workContext": {"summary": "", "updatedAt": ""}, "personalContext": {"summary": "", "updatedAt": ""}, "topOfMind": {"summary": "", "updatedAt": ""}, }, "history": { "recentMonths": {"summary": "", "updatedAt": ""}, "earlierContext": {"summary": "", "updatedAt": ""}, "longTermBackground": {"summary": "", "updatedAt": ""}, }, "facts": [], } def _infer_preferred_memory_language(messages: list[Any]) -> str: conversation = format_conversation_for_update(messages) if not conversation.strip(): return "same as the user's latest message" # If user explicitly provides locale hints, prefer them. locale_match = re.search(r"\b([a-z]{2}-[A-Z]{2})\b", conversation) if locale_match: return locale_match.group(1) # Script-based heuristic (dynamic, not hard-coded to two languages). script_patterns = { "zh-Hans": r"[\u4e00-\u9fff]", "ja-JP": r"[\u3040-\u30ff]", "ko-KR": r"[\uac00-\ud7af]", "ru-RU": r"[\u0400-\u04FF]", "ar": r"[\u0600-\u06FF]", "hi-IN": r"[\u0900-\u097F]", "th-TH": r"[\u0E00-\u0E7F]", "he-IL": r"[\u0590-\u05FF]", "el-GR": r"[\u0370-\u03FF]", } counts = {lang: len(re.findall(pattern, conversation)) for lang, pattern in script_patterns.items()} best_lang, best_count = max(counts.items(), key=lambda item: item[1]) if best_count > 0: return best_lang # Latin-script fallback: ask model to keep same language as the user's latest message. return "same as the user's latest message" def format_thread_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str: return format_memory_for_injection(memory_data, max_tokens=max_tokens) def build_thread_memory_prompt(existing_memory: dict[str, Any], messages: list[Any]) -> str: return THREAD_MEMORY_UPDATE_PROMPT.format( existing_memory=json.dumps(existing_memory, ensure_ascii=False, indent=2), conversation=format_conversation_for_update(messages), preferred_language=_infer_preferred_memory_language(messages), )