From ade336f30a854a2101ba8120ae83186f80805ddb Mon Sep 17 00:00:00 2001 From: MT-Mint <798521692@qq.com> Date: Thu, 11 Jun 2026 17:47:07 +0800 Subject: [PATCH] =?UTF-8?q?refactor(memory):=20=E6=8F=90=E5=8F=96=20JSON?= =?UTF-8?q?=20=E5=B7=A5=E5=85=B7=E5=87=BD=E6=95=B0=E5=88=B0=E5=85=B1?= =?UTF-8?q?=E4=BA=AB=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将 thread_summary.py 中的 _strip_code_fence、_extract_json_object、 _escape_inner_quotes_in_json_strings 三个函数提取到新建的 json_utils.py 共享模块,thread_updater.py 同步使用统一接口。 --- .../deerflow/agents/memory/json_utils.py | 95 ++++++++++++++++ .../deerflow/agents/memory/thread_summary.py | 106 +----------------- .../deerflow/agents/memory/thread_updater.py | 8 +- 3 files changed, 104 insertions(+), 105 deletions(-) create mode 100644 backend/packages/harness/deerflow/agents/memory/json_utils.py diff --git a/backend/packages/harness/deerflow/agents/memory/json_utils.py b/backend/packages/harness/deerflow/agents/memory/json_utils.py new file mode 100644 index 00000000..988676e6 --- /dev/null +++ b/backend/packages/harness/deerflow/agents/memory/json_utils.py @@ -0,0 +1,95 @@ +"""JSON extraction helpers for LLM-generated memory payloads.""" + +from __future__ import annotations + +import json +import re +from typing import Any + + +def strip_code_fence(text: str) -> str: + cleaned = text.strip() + if not cleaned.startswith("```"): + return cleaned + lines = cleaned.split("\n") + return "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]).strip() + + +def escape_inner_quotes_in_json_strings(text: str) -> str: + """Heuristically repair unescaped inner double quotes inside JSON strings.""" + out: list[str] = [] + in_string = False + escape = False + n = len(text) + i = 0 + while i < n: + ch = text[i] + if not in_string: + out.append(ch) + if ch == '"': + in_string = True + i += 1 + continue + + if escape: + out.append(ch) + escape = False + i += 1 + continue + + if ch == "\\": + out.append(ch) + escape = True + i += 1 + continue + + if ch == '"': + j = i + 1 + while j < n and text[j].isspace(): + j += 1 + next_char = text[j] if j < n else "" + if next_char in {":", ",", "}", "]", ""}: + out.append(ch) + in_string = False + else: + out.append('\\"') + i += 1 + continue + + out.append(ch) + i += 1 + + return "".join(out) + + +def extract_json_object(text: str) -> dict[str, Any] | None: + cleaned = strip_code_fence(text) + try: + parsed = json.loads(cleaned) + return parsed if isinstance(parsed, dict) else None + except json.JSONDecodeError: + repaired = escape_inner_quotes_in_json_strings(cleaned) + if repaired != cleaned: + try: + parsed = json.loads(repaired) + return parsed if isinstance(parsed, dict) else None + except json.JSONDecodeError: + pass + + match = re.search(r"\{.*\}", cleaned, flags=re.DOTALL) + if not match: + return None + + candidate = match.group(0) + try: + parsed = json.loads(candidate) + return parsed if isinstance(parsed, dict) else None + except json.JSONDecodeError: + repaired = escape_inner_quotes_in_json_strings(candidate) + if repaired != candidate: + try: + parsed = json.loads(repaired) + return parsed if isinstance(parsed, dict) else None + except json.JSONDecodeError: + return None + return None diff --git a/backend/packages/harness/deerflow/agents/memory/thread_summary.py b/backend/packages/harness/deerflow/agents/memory/thread_summary.py index a90ac12d..c222e590 100644 --- a/backend/packages/harness/deerflow/agents/memory/thread_summary.py +++ b/backend/packages/harness/deerflow/agents/memory/thread_summary.py @@ -4,10 +4,14 @@ from __future__ import annotations import json import logging -import re import hashlib from typing import Any +from deerflow.agents.memory.json_utils import ( + escape_inner_quotes_in_json_strings as _escape_inner_quotes_in_json_strings, +) +from deerflow.agents.memory.json_utils import extract_json_object as _extract_json_object +from deerflow.agents.memory.json_utils import strip_code_fence as _strip_code_fence from deerflow.agents.memory.thread_prompt import create_empty_thread_memory from deerflow.agents.memory.thread_storage import get_thread_memory_storage from deerflow.agents.memory.thread_updater import ThreadMemoryUpdater @@ -74,106 +78,6 @@ def _get_summary_model(): config = get_thread_memory_config() return create_chat_model(name=config.model_name, thinking_enabled=False, stream_usage=False) - -def _strip_code_fence(text: str) -> str: - cleaned = text.strip() - if not cleaned.startswith("```"): - return cleaned - lines = cleaned.split("\n") - return "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]).strip() - - -def _extract_json_object(text: str) -> dict[str, Any] | None: - cleaned = _strip_code_fence(text) - try: - parsed = json.loads(cleaned) - return parsed if isinstance(parsed, dict) else None - except json.JSONDecodeError: - repaired = _escape_inner_quotes_in_json_strings(cleaned) - if repaired != cleaned: - try: - parsed = json.loads(repaired) - if isinstance(parsed, dict): - logger.warning("THREAD_SUMMARY_DEBUG parse_repaired mode=full_text") - return parsed - except json.JSONDecodeError: - pass - - match = re.search(r"\{.*\}", cleaned, flags=re.DOTALL) - if not match: - return None - try: - parsed = json.loads(match.group(0)) - return parsed if isinstance(parsed, dict) else None - except json.JSONDecodeError: - candidate = match.group(0) - repaired = _escape_inner_quotes_in_json_strings(candidate) - if repaired != candidate: - try: - parsed = json.loads(repaired) - if isinstance(parsed, dict): - logger.warning("THREAD_SUMMARY_DEBUG parse_repaired mode=regex_object") - return parsed - except json.JSONDecodeError: - return None - return None - - -def _escape_inner_quotes_in_json_strings(text: str) -> str: - """Heuristically repair unescaped inner double quotes inside JSON strings. - - If a quote appears while inside a string but the next non-space character is - not a valid string terminator (comma, object/array close, or key colon), it is - treated as content and escaped. - """ - out: list[str] = [] - in_string = False - escape = False - n = len(text) - i = 0 - while i < n: - ch = text[i] - if not in_string: - out.append(ch) - if ch == '"': - in_string = True - i += 1 - continue - - if escape: - out.append(ch) - escape = False - i += 1 - continue - - if ch == "\\": - out.append(ch) - escape = True - i += 1 - continue - - if ch == '"': - j = i + 1 - while j < n and text[j].isspace(): - j += 1 - next_char = text[j] if j < n else "" - # Valid JSON string terminators in context: - # - key string: : - # - value string: , } ] - if next_char in {":", ",", "}", "]", ""}: - out.append(ch) - in_string = False - else: - out.append('\\"') - i += 1 - continue - - out.append(ch) - i += 1 - - return "".join(out) - - def _merge_summary_patch(base: dict[str, Any], patch: dict[str, Any]) -> dict[str, Any]: merged = {"ownerId": base.get("ownerId"), **create_empty_thread_memory()} merged["user"] = dict(base.get("user", {})) if isinstance(base.get("user"), dict) else merged["user"] diff --git a/backend/packages/harness/deerflow/agents/memory/thread_updater.py b/backend/packages/harness/deerflow/agents/memory/thread_updater.py index f2361652..51fbd814 100644 --- a/backend/packages/harness/deerflow/agents/memory/thread_updater.py +++ b/backend/packages/harness/deerflow/agents/memory/thread_updater.py @@ -9,6 +9,7 @@ import uuid from datetime import UTC, datetime from typing import Any +from deerflow.agents.memory.json_utils import extract_json_object from deerflow.agents.memory.updater import _extract_text from deerflow.agents.memory.thread_prompt import build_thread_memory_prompt, create_empty_thread_memory from deerflow.agents.memory.thread_storage import get_thread_memory_storage @@ -128,10 +129,9 @@ class ThreadMemoryUpdater: try: response = self._get_model().invoke(prompt) response_text = _extract_text(response.content).strip() - if response_text.startswith("```"): - lines = response_text.split("\n") - response_text = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:]) - parsed = json.loads(response_text) + parsed = extract_json_object(response_text) + if not isinstance(parsed, dict): + raise json.JSONDecodeError("No valid JSON object found", response_text, 0) cleaned = self._scrub_sensitive(parsed, thread_id) expected_version = 0 if current is None else int(current.get("memoryVersion", 0))