feat(backend): 统一"发送附件"语义为上传+提及

This commit is contained in:
肖应宇 2026-04-16 09:43:04 +08:00
parent dae911af70
commit e5f89c3d37
8 changed files with 208 additions and 13 deletions

View File

@ -266,7 +266,9 @@ You: "Deploying to staging..." [proceed]
**File Management:** **File Management:**
- Uploaded files are automatically listed in the <uploaded_files> section before each request - Uploaded files are automatically listed in the <uploaded_files> section before each request
- Use `read_file` tool to read uploaded files using their paths from the list - Mentioned files are listed in the <mentioned_files> section when references are present
- Treat "files the user sent" as the conversation-level union of uploaded + mentioned files (deduplicated by file path)
- Use `read_file` tool to read listed files using their paths from the file-context sections
- For PDF, PPT, Excel, and Word files, converted Markdown versions (*.md) are available alongside originals - For PDF, PPT, Excel, and Word files, converted Markdown versions (*.md) are available alongside originals
- All temporary work happens in `/mnt/user-data/workspace` - All temporary work happens in `/mnt/user-data/workspace`
- Final deliverables must be copied to `/mnt/user-data/outputs` and presented using `present_files` tool - Final deliverables must be copied to `/mnt/user-data/outputs` and presented using `present_files` tool

View File

@ -343,11 +343,15 @@ def format_conversation_for_update(messages: list[Any]) -> str:
text_parts.append(text_val) text_parts.append(text_val)
content = " ".join(text_parts) if text_parts else str(content) content = " ".join(text_parts) if text_parts else str(content)
# Strip uploaded_files tags from human messages to avoid persisting # Strip file-context tags from human messages to avoid persisting
# ephemeral file path info into long-term memory. Skip the turn entirely # ephemeral file path info into long-term memory. Skip the turn entirely
# when nothing remains after stripping (upload-only message). # when nothing remains after stripping (file-context-only message).
if role == "human": if role == "human":
content = re.sub(r"<uploaded_files>[\s\S]*?</uploaded_files>\n*", "", str(content)).strip() content = re.sub(
r"<(?:uploaded_files|mentioned_files|sent_files_semantics)>[\s\S]*?</(?:uploaded_files|mentioned_files|sent_files_semantics)>\n*",
"",
str(content),
).strip()
if not content: if not content:
continue continue

View File

@ -213,6 +213,7 @@ _UPLOAD_SENTENCE_RE = re.compile(
r"|/mnt/user-data/uploads/" r"|/mnt/user-data/uploads/"
r"|<uploaded_files>" r"|<uploaded_files>"
r"|<mentioned_files>" r"|<mentioned_files>"
r"|<sent_files_semantics>"
r")[^.!?]*[.!?]?\s*", r")[^.!?]*[.!?]?\s*",
re.IGNORECASE, re.IGNORECASE,
) )

View File

@ -15,7 +15,7 @@ from deerflow.config.memory_config import get_memory_config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_UPLOAD_BLOCK_RE = re.compile( _UPLOAD_BLOCK_RE = re.compile(
r"<(?:uploaded_files|mentioned_files)>[\s\S]*?</(?:uploaded_files|mentioned_files)>\n*", r"<(?:uploaded_files|mentioned_files|sent_files_semantics)>[\s\S]*?</(?:uploaded_files|mentioned_files|sent_files_semantics)>\n*",
re.IGNORECASE, re.IGNORECASE,
) )
_CORRECTION_PATTERNS = ( _CORRECTION_PATTERNS = (

View File

@ -145,6 +145,72 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
return "\n".join(lines) return "\n".join(lines)
def _merge_sent_files(self, uploaded_files: list[dict], mention_files: list[dict]) -> list[dict]:
"""Build conversation-level sent-files view (uploads mentions, deduped by path)."""
merged: dict[str, dict] = {}
def _upsert(file: dict, source: str) -> None:
path = file.get("path") or ""
if not path:
return
entry = merged.get(path)
if entry is None:
entry = {
"filename": file.get("filename") or Path(path).name,
"path": path,
"size": int(file.get("size") or 0),
"sent_sources": set(),
}
merged[path] = entry
entry["sent_sources"].add(source)
entry["size"] = max(entry["size"], int(file.get("size") or 0))
if source == "mention" and file.get("ref_source"):
entry["ref_source"] = file["ref_source"]
for file in uploaded_files:
_upsert(file, "upload")
for file in mention_files:
_upsert(file, "mention")
ordered = sorted(
merged.values(),
key=lambda f: (str(f.get("filename", "")).lower(), str(f.get("path", "")).lower()),
)
for file in ordered:
sources = file.get("sent_sources") or set()
if "upload" in sources and "mention" in sources:
file["sent_source_label"] = "upload+mention"
elif "upload" in sources:
file["sent_source_label"] = "upload"
else:
file["sent_source_label"] = "mention"
return ordered
def _create_sent_files_summary(self, sent_files: list[dict]) -> str:
"""Create policy block describing unified 'sent files' semantics."""
lines = [
"<sent_files_semantics>",
"Conversation attachment semantics:",
"- Treat uploaded files and mentioned files as one unified concept of files the user has sent.",
"- For questions like 'what files did I send' or 'how many files did I send', use the conversation-level union of uploaded + mentioned files.",
"- Count unique files by path (deduplicated).",
"",
"Conversation-level sent files (deduplicated):",
]
if sent_files:
for file in sent_files:
size_kb = file["size"] / 1024
size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
lines.append(
f"- {file['filename']} ({size_str}, source: {file['sent_source_label']})"
)
lines.append(f" Path: {file['path']}")
else:
lines.append("- (none)")
lines.append("</sent_files_semantics>")
return "\n".join(lines)
def _mentioned_files_from_kwargs(self, message: HumanMessage) -> list[dict]: def _mentioned_files_from_kwargs(self, message: HumanMessage) -> list[dict]:
"""Extract mention references from additional_kwargs.files. """Extract mention references from additional_kwargs.files.
@ -186,7 +252,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
return references return references
def _create_mentions_message(self, mention_files: list[dict]) -> str: def _create_mentions_message(self, mention_files: list[dict]) -> str:
lines = ["<mentioned_files>", "The following files were referenced in this message:", ""] lines = ["<mentioned_files>", "The following files were referenced by the user in this conversation:", ""]
for file in mention_files: for file in mention_files:
size_kb = file["size"] / 1024 size_kb = file["size"] / 1024
size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB" size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
@ -199,6 +265,21 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
lines.append("</mentioned_files>") lines.append("</mentioned_files>")
return "\n".join(lines) return "\n".join(lines)
def _mentioned_files_from_messages(self, messages: list) -> list[dict]:
"""Extract mention references across conversation messages."""
references: list[dict] = []
seen: set[tuple[str, str]] = set()
for message in messages:
if not isinstance(message, HumanMessage):
continue
for file in self._mentioned_files_from_kwargs(message):
key = (file["filename"], file["path"])
if key in seen:
continue
seen.add(key)
references.append(file)
return references
def _files_from_kwargs(self, message: HumanMessage, uploads_dir: Path | None = None) -> list[dict] | None: def _files_from_kwargs(self, message: HumanMessage, uploads_dir: Path | None = None) -> list[dict] | None:
"""Extract file info from message additional_kwargs.files. """Extract file info from message additional_kwargs.files.
@ -282,7 +363,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
# Get newly uploaded files from the current message's additional_kwargs.files # Get newly uploaded files from the current message's additional_kwargs.files
new_files = self._files_from_kwargs(last_message, uploads_dir) or [] new_files = self._files_from_kwargs(last_message, uploads_dir) or []
mention_files = self._mentioned_files_from_kwargs(last_message) mention_files = self._mentioned_files_from_messages(messages)
# Collect historical files from the uploads directory (all except the new ones) # Collect historical files from the uploads directory (all except the new ones)
new_filenames = {f["filename"] for f in new_files} new_filenames = {f["filename"] for f in new_files}
@ -311,13 +392,18 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
file["outline"] = outline file["outline"] = outline
file["outline_preview"] = preview file["outline_preview"] = preview
if not new_files and not historical_files and not mention_files: sent_files = self._merge_sent_files(new_files + historical_files, mention_files)
if not new_files and not historical_files and not mention_files and not sent_files:
return None return None
logger.debug(f"New files: {[f['filename'] for f in new_files]}, historical: {[f['filename'] for f in historical_files]}") logger.debug(f"New files: {[f['filename'] for f in new_files]}, historical: {[f['filename'] for f in historical_files]}")
# Create context message(s) and prepend to the last human message content. # Create context message(s) and prepend to the last human message content.
message_parts = [self._create_files_message(new_files, historical_files)] message_parts = [
self._create_files_message(new_files, historical_files),
self._create_sent_files_summary(sent_files),
]
if mention_files: if mention_files:
message_parts.append(self._create_mentions_message(mention_files)) message_parts.append(self._create_mentions_message(mention_files))
files_message = "\n\n".join(message_parts) files_message = "\n\n".join(message_parts)

View File

@ -510,6 +510,22 @@ class TestFormatConversationForUpdate:
assert "raw user text" in result assert "raw user text" in result
assert "structured text" in result assert "structured text" in result
def test_strips_uploaded_mentioned_and_sent_semantics_tags(self):
msg = MagicMock()
msg.type = "human"
msg.content = (
"<uploaded_files>\nfile list\n</uploaded_files>\n"
"<sent_files_semantics>\nsummary\n</sent_files_semantics>\n"
"<mentioned_files>\nmentions\n</mentioned_files>\n"
"actual question"
)
result = format_conversation_for_update([msg])
assert "actual question" in result
assert "uploaded_files" not in result
assert "mentioned_files" not in result
assert "sent_files_semantics" not in result
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# update_memory - structured LLM response handling # update_memory - structured LLM response handling

View File

@ -278,10 +278,91 @@ class TestBeforeAgent:
assert result is not None assert result is not None
content = result["messages"][-1].content content = result["messages"][-1].content
assert "<mentioned_files>" in content assert "<mentioned_files>" in content
assert "referenced in this message" in content assert "referenced by the user in this conversation" in content
assert "/mnt/user-data/uploads/saten-ruiko.jpg" in content assert "/mnt/user-data/uploads/saten-ruiko.jpg" in content
assert "Do not re-upload them." in content assert "Do not re-upload them." in content
def test_injects_sent_files_semantics_for_upload_and_mention_union(self, tmp_path):
mw = _middleware(tmp_path)
uploads_dir = _uploads_dir(tmp_path)
(uploads_dir / "uploaded.txt").write_bytes(b"u")
msg = _human(
"how many files did I send?",
files=[
{
"filename": "uploaded.txt",
"size": 1,
"path": "/mnt/user-data/uploads/uploaded.txt",
},
{
"filename": "mentioned.jpg",
"size": 0,
"path": "/mnt/user-data/outputs/mentioned.jpg",
"ref_kind": "mention",
"ref_source": "artifact",
},
],
)
result = mw.before_agent(self._state(msg), _runtime())
assert result is not None
content = result["messages"][-1].content
assert "<sent_files_semantics>" in content
assert "union of uploaded + mentioned files" in content
assert "uploaded.txt (0.0 KB, source: upload)" in content
assert "mentioned.jpg (0.0 KB, source: mention)" in content
def test_sent_files_union_dedupes_same_file_path_and_marks_both(self, tmp_path):
mw = _middleware(tmp_path)
uploads_dir = _uploads_dir(tmp_path)
(uploads_dir / "same.txt").write_bytes(b"x")
msg = _human(
"count files",
files=[
{
"filename": "same.txt",
"size": 1,
"path": "/mnt/user-data/uploads/same.txt",
},
{
"filename": "same.txt",
"size": 1,
"path": "/mnt/user-data/uploads/same.txt",
"ref_kind": "mention",
"ref_source": "upload",
},
],
)
result = mw.before_agent(self._state(msg), _runtime())
assert result is not None
content = result["messages"][-1].content
assert "same.txt (0.0 KB, source: upload+mention)" in content
assert content.count("same.txt (0.0 KB, source: upload+mention)") == 1
def test_historical_mentions_are_included_for_follow_up_count_question(self, tmp_path):
mw = _middleware(tmp_path)
prev = _human(
"analyse this",
files=[
{
"filename": "history.png",
"size": 0,
"path": "/mnt/user-data/outputs/history.png",
"ref_kind": "mention",
"ref_source": "artifact",
}
],
)
current = _human("我总共发送了多少个附件?")
result = mw.before_agent(self._state(prev, current), _runtime())
assert result is not None
content = result["messages"][-1].content
assert "<mentioned_files>" in content
assert "history.png" in content
assert "source: mention" in content
def test_mentioned_files_do_not_enter_uploaded_files_state(self, tmp_path): def test_mentioned_files_do_not_enter_uploaded_files_state(self, tmp_path):
mw = _middleware(tmp_path) mw = _middleware(tmp_path)
msg = _human( msg = _human(

View File

@ -337,12 +337,17 @@ export interface FileInMessage {
} }
/** /**
* Strip <uploaded_files> tag from message content. * Strip internal file-context tags from message content.
* Returns the content with the tag removed. * Returns the content with these tags removed:
* - <uploaded_files>...</uploaded_files>
* - <mentioned_files>...</mentioned_files>
* - <sent_files_semantics>...</sent_files_semantics>
*/ */
export function stripUploadedFilesTag(content: string): string { export function stripUploadedFilesTag(content: string): string {
return content return content
.replace(/<uploaded_files>[\s\S]*?<\/uploaded_files>/g, "") .replace(/<uploaded_files>[\s\S]*?<\/uploaded_files>/g, "")
.replace(/<mentioned_files>[\s\S]*?<\/mentioned_files>/g, "")
.replace(/<sent_files_semantics>[\s\S]*?<\/sent_files_semantics>/g, "")
.trim(); .trim();
} }