fix(uploads): 对当前回合提及的文件优先处理指代

This commit is contained in:
肖应宇 2026-04-16 14:57:02 +08:00
parent ad709767ea
commit e3063d94c4
2 changed files with 65 additions and 4 deletions

View File

@ -187,17 +187,49 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
file["sent_source_label"] = "mention"
return ordered
def _create_sent_files_summary(self, sent_files: list[dict]) -> str:
def _create_sent_files_summary(
self,
sent_files: list[dict],
current_turn_mentions: list[dict] | None = None,
) -> str:
"""Create policy block describing unified 'sent files' semantics."""
current_turn_mentions = current_turn_mentions or []
lines = [
"<sent_files_semantics>",
"Conversation attachment semantics:",
"- Treat uploaded files and mentioned files as one unified concept of files the user has sent.",
"- For questions like 'what files did I send' or 'how many files did I send', use the conversation-level union of uploaded + mentioned files.",
"- Count unique files by path (deduplicated).",
"",
"Conversation-level sent files (deduplicated):",
]
if current_turn_mentions:
lines.extend(
[
"- Current-turn mention priority: if the user says deictic references like 'this image/file' (e.g. '这张图', '这个文件'), bind to files mentioned in the current message first.",
"- Only ask for clarification when the current message itself mentions multiple files.",
"",
"Current message mentioned files (highest priority for deictic references):",
]
)
for file in current_turn_mentions:
size_kb = file["size"] / 1024
size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
lines.append(
f"- {file['filename']} ({size_str}, source: mention)"
)
lines.append(f" Path: {file['path']}")
lines.extend(
[
"",
"Conversation-level sent files (deduplicated):",
]
)
else:
lines.extend(
[
"",
"Conversation-level sent files (deduplicated):",
]
)
if sent_files:
for file in sent_files:
size_kb = file["size"] / 1024
@ -364,6 +396,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
# Get newly uploaded files from the current message's additional_kwargs.files
new_files = self._files_from_kwargs(last_message, uploads_dir) or []
mention_files = self._mentioned_files_from_messages(messages)
current_turn_mentions = self._mentioned_files_from_kwargs(last_message)
# Collect historical files from the uploads directory (all except the new ones)
new_filenames = {f["filename"] for f in new_files}
@ -402,7 +435,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
# Create context message(s) and prepend to the last human message content.
message_parts = [
self._create_files_message(new_files, historical_files),
self._create_sent_files_summary(sent_files),
self._create_sent_files_summary(sent_files, current_turn_mentions),
]
if mention_files:
message_parts.append(self._create_mentions_message(mention_files))

View File

@ -363,6 +363,34 @@ class TestBeforeAgent:
assert "history.png" in content
assert "source: mention" in content
def test_current_turn_mention_priority_is_injected_for_deictic_reference(self, tmp_path):
mw = _middleware(tmp_path)
uploads_dir = _uploads_dir(tmp_path)
(uploads_dir / "old-a.jpg").write_bytes(b"a")
(uploads_dir / "old-b.jpg").write_bytes(b"b")
current = _human(
"念出这张图片的文件名",
files=[
{
"filename": "target.jpg",
"size": 0,
"path": "/mnt/user-data/uploads/target.jpg",
"status": "uploaded",
"ref_kind": "mention",
"ref_source": "upload",
}
],
)
result = mw.before_agent(self._state(current), _runtime())
assert result is not None
content = result["messages"][-1].content
assert "Current-turn mention priority" in content
assert "this image/file" in content
assert "Current message mentioned files (highest priority for deictic references):" in content
assert "target.jpg (0.0 KB, source: mention)" in content
def test_mentioned_files_do_not_enter_uploaded_files_state(self, tmp_path):
mw = _middleware(tmp_path)
msg = _human(