fix(uploads): 对当前回合提及的文件优先处理指代
This commit is contained in:
parent
ad709767ea
commit
e3063d94c4
|
|
@ -187,17 +187,49 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
||||||
file["sent_source_label"] = "mention"
|
file["sent_source_label"] = "mention"
|
||||||
return ordered
|
return ordered
|
||||||
|
|
||||||
def _create_sent_files_summary(self, sent_files: list[dict]) -> str:
|
def _create_sent_files_summary(
|
||||||
|
self,
|
||||||
|
sent_files: list[dict],
|
||||||
|
current_turn_mentions: list[dict] | None = None,
|
||||||
|
) -> str:
|
||||||
"""Create policy block describing unified 'sent files' semantics."""
|
"""Create policy block describing unified 'sent files' semantics."""
|
||||||
|
current_turn_mentions = current_turn_mentions or []
|
||||||
lines = [
|
lines = [
|
||||||
"<sent_files_semantics>",
|
"<sent_files_semantics>",
|
||||||
"Conversation attachment semantics:",
|
"Conversation attachment semantics:",
|
||||||
"- Treat uploaded files and mentioned files as one unified concept of files the user has sent.",
|
"- Treat uploaded files and mentioned files as one unified concept of files the user has sent.",
|
||||||
"- For questions like 'what files did I send' or 'how many files did I send', use the conversation-level union of uploaded + mentioned files.",
|
"- For questions like 'what files did I send' or 'how many files did I send', use the conversation-level union of uploaded + mentioned files.",
|
||||||
"- Count unique files by path (deduplicated).",
|
"- Count unique files by path (deduplicated).",
|
||||||
"",
|
|
||||||
"Conversation-level sent files (deduplicated):",
|
|
||||||
]
|
]
|
||||||
|
if current_turn_mentions:
|
||||||
|
lines.extend(
|
||||||
|
[
|
||||||
|
"- Current-turn mention priority: if the user says deictic references like 'this image/file' (e.g. '这张图', '这个文件'), bind to files mentioned in the current message first.",
|
||||||
|
"- Only ask for clarification when the current message itself mentions multiple files.",
|
||||||
|
"",
|
||||||
|
"Current message mentioned files (highest priority for deictic references):",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
for file in current_turn_mentions:
|
||||||
|
size_kb = file["size"] / 1024
|
||||||
|
size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
|
||||||
|
lines.append(
|
||||||
|
f"- {file['filename']} ({size_str}, source: mention)"
|
||||||
|
)
|
||||||
|
lines.append(f" Path: {file['path']}")
|
||||||
|
lines.extend(
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"Conversation-level sent files (deduplicated):",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
lines.extend(
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"Conversation-level sent files (deduplicated):",
|
||||||
|
]
|
||||||
|
)
|
||||||
if sent_files:
|
if sent_files:
|
||||||
for file in sent_files:
|
for file in sent_files:
|
||||||
size_kb = file["size"] / 1024
|
size_kb = file["size"] / 1024
|
||||||
|
|
@ -364,6 +396,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
||||||
# Get newly uploaded files from the current message's additional_kwargs.files
|
# Get newly uploaded files from the current message's additional_kwargs.files
|
||||||
new_files = self._files_from_kwargs(last_message, uploads_dir) or []
|
new_files = self._files_from_kwargs(last_message, uploads_dir) or []
|
||||||
mention_files = self._mentioned_files_from_messages(messages)
|
mention_files = self._mentioned_files_from_messages(messages)
|
||||||
|
current_turn_mentions = self._mentioned_files_from_kwargs(last_message)
|
||||||
|
|
||||||
# Collect historical files from the uploads directory (all except the new ones)
|
# Collect historical files from the uploads directory (all except the new ones)
|
||||||
new_filenames = {f["filename"] for f in new_files}
|
new_filenames = {f["filename"] for f in new_files}
|
||||||
|
|
@ -402,7 +435,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
||||||
# Create context message(s) and prepend to the last human message content.
|
# Create context message(s) and prepend to the last human message content.
|
||||||
message_parts = [
|
message_parts = [
|
||||||
self._create_files_message(new_files, historical_files),
|
self._create_files_message(new_files, historical_files),
|
||||||
self._create_sent_files_summary(sent_files),
|
self._create_sent_files_summary(sent_files, current_turn_mentions),
|
||||||
]
|
]
|
||||||
if mention_files:
|
if mention_files:
|
||||||
message_parts.append(self._create_mentions_message(mention_files))
|
message_parts.append(self._create_mentions_message(mention_files))
|
||||||
|
|
|
||||||
|
|
@ -363,6 +363,34 @@ class TestBeforeAgent:
|
||||||
assert "history.png" in content
|
assert "history.png" in content
|
||||||
assert "source: mention" in content
|
assert "source: mention" in content
|
||||||
|
|
||||||
|
def test_current_turn_mention_priority_is_injected_for_deictic_reference(self, tmp_path):
|
||||||
|
mw = _middleware(tmp_path)
|
||||||
|
uploads_dir = _uploads_dir(tmp_path)
|
||||||
|
(uploads_dir / "old-a.jpg").write_bytes(b"a")
|
||||||
|
(uploads_dir / "old-b.jpg").write_bytes(b"b")
|
||||||
|
|
||||||
|
current = _human(
|
||||||
|
"念出这张图片的文件名",
|
||||||
|
files=[
|
||||||
|
{
|
||||||
|
"filename": "target.jpg",
|
||||||
|
"size": 0,
|
||||||
|
"path": "/mnt/user-data/uploads/target.jpg",
|
||||||
|
"status": "uploaded",
|
||||||
|
"ref_kind": "mention",
|
||||||
|
"ref_source": "upload",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
result = mw.before_agent(self._state(current), _runtime())
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
content = result["messages"][-1].content
|
||||||
|
assert "Current-turn mention priority" in content
|
||||||
|
assert "this image/file" in content
|
||||||
|
assert "Current message mentioned files (highest priority for deictic references):" in content
|
||||||
|
assert "target.jpg (0.0 KB, source: mention)" in content
|
||||||
|
|
||||||
def test_mentioned_files_do_not_enter_uploaded_files_state(self, tmp_path):
|
def test_mentioned_files_do_not_enter_uploaded_files_state(self, tmp_path):
|
||||||
mw = _middleware(tmp_path)
|
mw = _middleware(tmp_path)
|
||||||
msg = _human(
|
msg = _human(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue