feat: 数据结构向df的memory.json对齐

This commit is contained in:
肖应宇 2026-05-08 11:10:26 +08:00
parent 7db468aa6f
commit 31daed1887
6 changed files with 140 additions and 161 deletions

View File

@ -407,8 +407,8 @@ def _get_memory_context(agent_name: str | None = None) -> str:
if thread_memory is not None:
thread_content = format_thread_memory_for_injection(
{
"profile": thread_memory.get("profile", {}),
"preferences": thread_memory.get("preferences", {}),
"user": thread_memory.get("user", {}),
"history": thread_memory.get("history", {}),
"facts": thread_memory.get("facts", []),
},
max_tokens=thread_config.max_injection_tokens,

View File

@ -21,18 +21,15 @@ Conversation:
Return JSON only with this schema:
{{
"profile": {{
"name": string|null,
"role": string|null,
"expertise": string[],
"language": "zh-CN"|"en-US"|null,
"context": string|null
"user": {{
"workContext": {{"summary": string, "updatedAt": string}},
"personalContext": {{"summary": string, "updatedAt": string}},
"topOfMind": {{"summary": string, "updatedAt": string}}
}},
"preferences": {{
"tone": "casual"|"formal"|"technical"|"friendly"|null,
"verbosity": "concise"|"detailed"|null,
"codeStyle": string|null,
"other": string|null
"history": {{
"recentMonths": {{"summary": string, "updatedAt": string}},
"earlierContext": {{"summary": string, "updatedAt": string}},
"longTermBackground": {{"summary": string, "updatedAt": string}}
}},
"facts": [
{{
@ -53,8 +50,16 @@ Rules:
def create_empty_thread_memory() -> dict[str, Any]:
return {
"profile": {"name": None, "role": None, "expertise": [], "language": None, "context": None},
"preferences": {"tone": None, "verbosity": None, "codeStyle": None, "other": None},
"user": {
"workContext": {"summary": "", "updatedAt": ""},
"personalContext": {"summary": "", "updatedAt": ""},
"topOfMind": {"summary": "", "updatedAt": ""},
},
"history": {
"recentMonths": {"summary": "", "updatedAt": ""},
"earlierContext": {"summary": "", "updatedAt": ""},
"longTermBackground": {"summary": "", "updatedAt": ""},
},
"facts": [],
}
@ -63,32 +68,31 @@ def format_thread_memory_for_injection(memory_data: dict[str, Any], max_tokens:
if not memory_data:
return ""
profile = memory_data.get("profile") or {}
preferences = memory_data.get("preferences") or {}
user = memory_data.get("user") or {}
history = memory_data.get("history") or {}
facts = memory_data.get("facts") or []
profile_lines: list[str] = []
for key, label in (("name", "Name"), ("role", "Role"), ("language", "Language"), ("context", "Context")):
value = profile.get(key)
if isinstance(value, str) and value.strip():
profile_lines.append(f"- {label}: {value.strip()}")
expertise = profile.get("expertise")
if isinstance(expertise, list):
cleaned = [str(item).strip() for item in expertise if str(item).strip()]
if cleaned:
profile_lines.append(f"- Expertise: {', '.join(cleaned)}")
user_lines: list[str] = []
for key, label in (("workContext", "Work Context"), ("personalContext", "Personal Context"), ("topOfMind", "Top Of Mind")):
section = user.get(key) if isinstance(user, dict) else None
if isinstance(section, dict):
summary = section.get("summary")
if isinstance(summary, str) and summary.strip():
user_lines.append(f"- {label}: {summary.strip()}")
pref_lines: list[str] = []
for key, label in (("tone", "Tone"), ("verbosity", "Verbosity"), ("codeStyle", "Code Style"), ("other", "Other")):
value = preferences.get(key)
if isinstance(value, str) and value.strip():
pref_lines.append(f"- {label}: {value.strip()}")
history_lines: list[str] = []
for key, label in (("recentMonths", "Recent Months"), ("earlierContext", "Earlier Context"), ("longTermBackground", "Long-Term Background")):
section = history.get(key) if isinstance(history, dict) else None
if isinstance(section, dict):
summary = section.get("summary")
if isinstance(summary, str) and summary.strip():
history_lines.append(f"- {label}: {summary.strip()}")
sections: list[str] = []
if profile_lines:
sections.append("Profile:\n" + "\n".join(profile_lines))
if pref_lines:
sections.append("Preferences:\n" + "\n".join(pref_lines))
if user_lines:
sections.append("User:\n" + "\n".join(user_lines))
if history_lines:
sections.append("History:\n" + "\n".join(history_lines))
# Facts are lowest priority: include by confidence/recency and trim by token budget.
ranked_facts = sorted(

View File

@ -8,7 +8,7 @@ import logging
import re
import sqlite3
import threading
from datetime import datetime
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
@ -40,13 +40,13 @@ def _memory_to_markdown(data: dict[str, Any]) -> str:
return (
"# Thread Memory\n\n"
f"Owner ID: {owner_text}\n\n"
"## Profile\n"
"## User\n"
"```json\n"
f"{json.dumps(data.get('profile', {}), ensure_ascii=False, indent=2)}\n"
f"{json.dumps(data.get('user', {}), ensure_ascii=False, indent=2)}\n"
"```\n\n"
"## Preferences\n"
"## History\n"
"```json\n"
f"{json.dumps(data.get('preferences', {}), ensure_ascii=False, indent=2)}\n"
f"{json.dumps(data.get('history', {}), ensure_ascii=False, indent=2)}\n"
"```\n\n"
"## Facts\n"
"```json\n"
@ -66,16 +66,16 @@ def _memory_from_markdown(markdown: str) -> dict[str, Any]:
blocks = _JSON_FENCE_RE.findall(markdown)
if len(blocks) >= 1:
try:
profile = json.loads(blocks[0])
if isinstance(profile, dict):
parsed["profile"] = profile
user = json.loads(blocks[0])
if isinstance(user, dict):
parsed["user"] = user
except Exception:
pass
if len(blocks) >= 2:
try:
preferences = json.loads(blocks[1])
if isinstance(preferences, dict):
parsed["preferences"] = preferences
history = json.loads(blocks[1])
if isinstance(history, dict):
parsed["history"] = history
except Exception:
pass
if len(blocks) >= 3:
@ -90,26 +90,19 @@ def _memory_from_markdown(markdown: str) -> dict[str, Any]:
def _row_to_memory(row: tuple[Any, ...]) -> dict[str, Any]:
memory_md = row[2]
if isinstance(memory_md, str) and memory_md.strip():
decoded = _memory_from_markdown(memory_md)
profile = decoded.get("profile", {})
preferences = decoded.get("preferences", {})
decoded = _memory_from_markdown(row[2] if isinstance(row[2], str) else "")
user = decoded.get("user", create_empty_thread_memory()["user"])
history = decoded.get("history", create_empty_thread_memory()["history"])
facts = decoded.get("facts", [])
owner_id = decoded.get("ownerId")
else:
owner_id = row[1]
profile = json.loads(row[3])
preferences = json.loads(row[4])
facts = json.loads(row[5])
return {
"threadId": row[0],
"ownerId": owner_id,
"profile": profile,
"preferences": preferences,
"ownerId": row[1] if owner_id is None else owner_id,
"user": user,
"history": history,
"facts": facts,
"memoryVersion": int(row[6]),
"lastUpdated": str(row[7]),
"memoryVersion": int(row[3]),
"lastUpdated": str(row[4]),
}
@ -128,71 +121,41 @@ class SqliteThreadMemoryStorage(ThreadMemoryStorage):
thread_id TEXT PRIMARY KEY,
owner_id TEXT NULL,
memory_md TEXT NOT NULL DEFAULT '',
profile TEXT NOT NULL DEFAULT '{}',
preferences TEXT NOT NULL DEFAULT '{}',
facts TEXT NOT NULL DEFAULT '[]',
memory_version INTEGER NOT NULL DEFAULT 0,
last_updated TEXT NOT NULL DEFAULT (datetime('now'))
)
"""
)
columns = {r[1] for r in self._conn.execute("PRAGMA table_info(thread_memory)").fetchall()}
if "memory_md" not in columns:
self._conn.execute("ALTER TABLE thread_memory ADD COLUMN memory_md TEXT NOT NULL DEFAULT ''")
self._conn.execute("CREATE INDEX IF NOT EXISTS idx_thread_memory_owner_id ON thread_memory(owner_id)")
self._conn.commit()
def load(self, thread_id: str) -> dict[str, Any] | None:
with self._lock:
row = self._conn.execute(
"SELECT thread_id, owner_id, profile, preferences, facts, memory_version, last_updated "
"SELECT thread_id, owner_id, memory_md, memory_version, last_updated "
"FROM thread_memory WHERE thread_id = ?",
(thread_id,),
).fetchone()
if row is None:
return None
row = (
row[0],
row[1],
"",
row[2],
row[3],
row[4],
row[5],
row[6],
)
try:
row2 = self._conn.execute(
"SELECT thread_id, owner_id, memory_md, profile, preferences, facts, memory_version, last_updated "
"FROM thread_memory WHERE thread_id = ?",
(thread_id,),
).fetchone()
if row2 is not None:
row = row2
except sqlite3.OperationalError:
# Backward compatibility when running before migration.
pass
return _row_to_memory(row)
def save(self, thread_id: str, data: dict[str, Any], expected_version: int | None = None) -> bool:
now = datetime.utcnow().isoformat() + "Z"
now = datetime.now(UTC).isoformat().replace("+00:00", "Z")
owner_id = data.get("ownerId")
if expected_version is None:
expected_version = 0
with self._lock:
cur = self._conn.execute(
"""
INSERT INTO thread_memory (thread_id, owner_id, memory_md, profile, preferences, facts, memory_version, last_updated)
VALUES (?, ?, ?, ?, ?, ?, 0, ?)
INSERT INTO thread_memory (thread_id, owner_id, memory_md, memory_version, last_updated)
VALUES (?, ?, ?, 0, ?)
ON CONFLICT(thread_id) DO NOTHING
""",
(
thread_id,
owner_id,
_memory_to_markdown(data),
json.dumps(data.get("profile", {}), ensure_ascii=False),
json.dumps(data.get("preferences", {}), ensure_ascii=False),
json.dumps(data.get("facts", []), ensure_ascii=False),
now,
),
)
@ -203,15 +166,12 @@ class SqliteThreadMemoryStorage(ThreadMemoryStorage):
cur = self._conn.execute(
"""
UPDATE thread_memory
SET owner_id = ?, memory_md = ?, profile = ?, preferences = ?, facts = ?, memory_version = memory_version + 1, last_updated = ?
SET owner_id = ?, memory_md = ?, memory_version = memory_version + 1, last_updated = ?
WHERE thread_id = ? AND memory_version = ?
""",
(
owner_id,
_memory_to_markdown(data),
json.dumps(data.get("profile", {}), ensure_ascii=False),
json.dumps(data.get("preferences", {}), ensure_ascii=False),
json.dumps(data.get("facts", []), ensure_ascii=False),
now,
thread_id,
expected_version,
@ -239,31 +199,18 @@ class MysqlThreadMemoryStorage(ThreadMemoryStorage):
thread_id VARCHAR(64) PRIMARY KEY,
owner_id VARCHAR(64) NULL,
memory_md LONGTEXT NOT NULL,
profile JSON NOT NULL,
preferences JSON NOT NULL,
facts JSON NOT NULL,
memory_version INT NOT NULL DEFAULT 0,
last_updated TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_owner_id (owner_id)
)
"""
)
cur.execute(
"""
SELECT COUNT(*)
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = 'thread_memory' AND COLUMN_NAME = 'memory_md'
"""
)
has_memory_md = cur.fetchone()[0] > 0
if not has_memory_md:
cur.execute("ALTER TABLE thread_memory ADD COLUMN memory_md LONGTEXT NOT NULL")
self._conn.commit()
def load(self, thread_id: str) -> dict[str, Any] | None:
with self._conn.cursor() as cur:
cur.execute(
"SELECT thread_id, owner_id, memory_md, profile, preferences, facts, memory_version, last_updated FROM thread_memory WHERE thread_id = %s",
"SELECT thread_id, owner_id, memory_md, memory_version, last_updated FROM thread_memory WHERE thread_id = %s",
(thread_id,),
)
row = cur.fetchone()
@ -276,17 +223,14 @@ class MysqlThreadMemoryStorage(ThreadMemoryStorage):
with self._conn.cursor() as cur:
cur.execute(
"""
INSERT INTO thread_memory (thread_id, owner_id, memory_md, profile, preferences, facts, memory_version)
VALUES (%s, %s, %s, %s, %s, %s, 0)
INSERT INTO thread_memory (thread_id, owner_id, memory_md, memory_version)
VALUES (%s, %s, %s, 0)
ON DUPLICATE KEY UPDATE thread_id = thread_id
""",
(
thread_id,
owner_id,
_memory_to_markdown(data),
json.dumps(data.get("profile", {}), ensure_ascii=False),
json.dumps(data.get("preferences", {}), ensure_ascii=False),
json.dumps(data.get("facts", []), ensure_ascii=False),
),
)
if cur.rowcount == 1:
@ -295,15 +239,12 @@ class MysqlThreadMemoryStorage(ThreadMemoryStorage):
cur.execute(
"""
UPDATE thread_memory
SET owner_id = %s, memory_md = %s, profile = %s, preferences = %s, facts = %s, memory_version = memory_version + 1
SET owner_id = %s, memory_md = %s, memory_version = memory_version + 1
WHERE thread_id = %s AND memory_version = %s
""",
(
owner_id,
_memory_to_markdown(data),
json.dumps(data.get("profile", {}), ensure_ascii=False),
json.dumps(data.get("preferences", {}), ensure_ascii=False),
json.dumps(data.get("facts", []), ensure_ascii=False),
thread_id,
expected_version,
),

View File

@ -6,7 +6,7 @@ import json
import logging
import re
import uuid
from datetime import datetime
from datetime import UTC, datetime
from typing import Any
from deerflow.agents.memory.updater import _extract_text
@ -51,23 +51,32 @@ class ThreadMemoryUpdater:
return None
return text
profile = data.get("profile", {})
preferences = data.get("preferences", {})
user = data.get("user", {})
history = data.get("history", {})
facts = data.get("facts", [])
cleaned = create_empty_thread_memory()
cleaned["profile"]["name"] = safe_text(profile.get("name"))
cleaned["profile"]["role"] = safe_text(profile.get("role"))
cleaned["profile"]["language"] = safe_text(profile.get("language"))
cleaned["profile"]["context"] = safe_text(profile.get("context"))
expertise = profile.get("expertise")
if isinstance(expertise, list):
cleaned["profile"]["expertise"] = [x for x in (safe_text(item) for item in expertise) if x]
def copy_summary_section(target_parent: dict[str, Any], target_key: str, source_parent: Any):
if not isinstance(source_parent, dict):
return
source_section = source_parent.get(target_key)
if not isinstance(source_section, dict):
return
summary = safe_text(source_section.get("summary"))
updated_at = safe_text(source_section.get("updatedAt"))
if summary:
target_parent[target_key]["summary"] = summary
if updated_at:
target_parent[target_key]["updatedAt"] = updated_at
elif summary:
target_parent[target_key]["updatedAt"] = datetime.now(UTC).isoformat().replace("+00:00", "Z")
cleaned["preferences"]["tone"] = safe_text(preferences.get("tone"))
cleaned["preferences"]["verbosity"] = safe_text(preferences.get("verbosity"))
cleaned["preferences"]["codeStyle"] = safe_text(preferences.get("codeStyle"))
cleaned["preferences"]["other"] = safe_text(preferences.get("other"))
copy_summary_section(cleaned["user"], "workContext", user)
copy_summary_section(cleaned["user"], "personalContext", user)
copy_summary_section(cleaned["user"], "topOfMind", user)
copy_summary_section(cleaned["history"], "recentMonths", history)
copy_summary_section(cleaned["history"], "earlierContext", history)
copy_summary_section(cleaned["history"], "longTermBackground", history)
seen: set[str] = set()
for fact in facts if isinstance(facts, list) else []:
@ -87,7 +96,7 @@ class ThreadMemoryUpdater:
"content": content,
"category": str(fact.get("category", "context")).strip() or "context",
"confidence": max(0.0, min(1.0, confidence)),
"createdAt": datetime.utcnow().isoformat() + "Z",
"createdAt": datetime.now(UTC).isoformat().replace("+00:00", "Z"),
"source": thread_id,
}
)
@ -101,8 +110,8 @@ class ThreadMemoryUpdater:
storage = get_thread_memory_storage()
current = storage.load(thread_id)
base_memory = create_empty_thread_memory() if current is None else {
"profile": current.get("profile", {}),
"preferences": current.get("preferences", {}),
"user": current.get("user", {}),
"history": current.get("history", {}),
"facts": current.get("facts", []),
}
prompt = build_thread_memory_prompt(base_memory, messages)

View File

@ -6,8 +6,16 @@ from deerflow.agents.memory.thread_prompt import build_thread_memory_prompt, for
def test_thread_memory_injection_keeps_profile_and_preferences_under_small_budget(monkeypatch):
monkeypatch.setattr("deerflow.agents.memory.thread_prompt._count_tokens", lambda text, encoding_name="cl100k_base": len(text))
memory = {
"profile": {"name": "Alice", "role": "Engineer", "expertise": ["Python", "React"], "language": "en-US", "context": "Building APIs"},
"preferences": {"tone": "technical", "verbosity": "concise", "codeStyle": "typed-first", "other": "tests first"},
"user": {
"workContext": {"summary": "Building APIs", "updatedAt": "2026-05-08T00:00:00Z"},
"personalContext": {"summary": "Engineer using Python and React", "updatedAt": "2026-05-08T00:00:00Z"},
"topOfMind": {"summary": "Improving thread memory", "updatedAt": "2026-05-08T00:00:00Z"},
},
"history": {
"recentMonths": {"summary": "Shipped memory features", "updatedAt": "2026-05-08T00:00:00Z"},
"earlierContext": {"summary": "Started from TS projects", "updatedAt": "2026-05-08T00:00:00Z"},
"longTermBackground": {"summary": "Frontend developer", "updatedAt": "2026-05-08T00:00:00Z"},
},
"facts": [
{"content": "Fact one that might be trimmed", "category": "context", "confidence": 0.9},
{"content": "Fact two that might be trimmed", "category": "context", "confidence": 0.8},
@ -15,14 +23,14 @@ def test_thread_memory_injection_keeps_profile_and_preferences_under_small_budge
}
result = format_thread_memory_for_injection(memory, max_tokens=140)
assert "Profile:" in result
assert "Preferences:" in result
assert "User:" in result
assert "History:" in result
def test_build_thread_memory_prompt_does_not_raise_format_key_error():
prompt = build_thread_memory_prompt(
{"profile": {}, "preferences": {}, "facts": []},
{"user": {}, "history": {}, "facts": []},
[HumanMessage(content="My name is Alice.")],
)
assert "Current per-thread memory" in prompt
assert '"profile"' in prompt
assert '"user"' in prompt

View File

@ -1,11 +1,21 @@
import json
from deerflow.agents.memory.thread_storage import SqliteThreadMemoryStorage
def _payload():
return {
"ownerId": None,
"profile": {"name": "A", "role": None, "expertise": [], "language": None, "context": None},
"preferences": {"tone": None, "verbosity": None, "codeStyle": None, "other": None},
"user": {
"workContext": {"summary": "Frontend engineer", "updatedAt": "2026-05-08T00:00:00Z"},
"personalContext": {"summary": "Prefers Chinese", "updatedAt": "2026-05-08T00:00:00Z"},
"topOfMind": {"summary": "Thread memory migration", "updatedAt": "2026-05-08T00:00:00Z"},
},
"history": {
"recentMonths": {"summary": "Worked on memory features", "updatedAt": "2026-05-08T00:00:00Z"},
"earlierContext": {"summary": "", "updatedAt": ""},
"longTermBackground": {"summary": "Builds web products", "updatedAt": "2026-05-08T00:00:00Z"},
},
"facts": [],
}
@ -39,28 +49,35 @@ def test_sqlite_thread_memory_saves_markdown_payload(tmp_path):
row = storage._conn.execute("SELECT memory_md FROM thread_memory WHERE thread_id = ?", (thread_id,)).fetchone()
assert row is not None
assert isinstance(row[0], str)
assert "## Profile" in row[0]
assert "## Preferences" in row[0]
assert "## User" in row[0]
assert "## History" in row[0]
assert "## Facts" in row[0]
def test_sqlite_thread_memory_loads_legacy_json_row(tmp_path):
db_path = tmp_path / "legacy-thread-memory.db"
def test_sqlite_thread_memory_loads_markdown_row(tmp_path):
db_path = tmp_path / "thread-memory.db"
storage = SqliteThreadMemoryStorage(str(db_path))
thread_id = "thread-legacy"
thread_id = "thread-load"
payload = _payload()
with storage._lock:
storage._conn.execute(
"""
INSERT INTO thread_memory (thread_id, owner_id, memory_md, profile, preferences, facts, memory_version, last_updated)
VALUES (?, ?, '', ?, ?, ?, 0, datetime('now'))
INSERT INTO thread_memory (thread_id, owner_id, memory_md, memory_version, last_updated)
VALUES (?, ?, ?, 0, datetime('now'))
""",
(
thread_id,
"owner-1",
'{"name":"Alice","role":null,"expertise":[],"language":null,"context":null}',
'{"tone":null,"verbosity":null,"codeStyle":null,"other":null}',
"[]",
(
"# Thread Memory\n\n"
"Owner ID: owner-1\n\n"
"## User\n```json\n"
+ json.dumps(payload["user"], ensure_ascii=False, indent=2)
+ "\n```\n\n## History\n```json\n"
+ json.dumps(payload["history"], ensure_ascii=False, indent=2)
+ "\n```\n\n## Facts\n```json\n[]\n```"
),
),
)
storage._conn.commit()
@ -68,4 +85,4 @@ def test_sqlite_thread_memory_loads_legacy_json_row(tmp_path):
loaded = storage.load(thread_id)
assert loaded is not None
assert loaded["ownerId"] == "owner-1"
assert loaded["profile"]["name"] == "Alice"
assert loaded["user"]["workContext"]["summary"] == "Frontend engineer"