From 33705637ea7662a5cbf8c161199e3ea7e7cd864f Mon Sep 17 00:00:00 2001 From: MT-Mint <798521692@qq.com> Date: Fri, 17 Apr 2026 13:50:44 +0800 Subject: [PATCH] =?UTF-8?q?fix(artifacts):=20=E4=BF=AE=E5=A4=8D=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=90=8D=E5=B7=AE=E5=BC=82=E5=AF=BC=E8=87=B4=20Artifa?= =?UTF-8?q?ct=20not=20found?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/gateway/routers/artifacts.py | 40 ++++++++++++++++++- .../tools/builtins/present_file_tool.py | 5 +++ backend/tests/test_artifacts_router.py | 13 ++++++ .../test_present_file_tool_core_logic.py | 15 +++++++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/backend/app/gateway/routers/artifacts.py b/backend/app/gateway/routers/artifacts.py index ccbd2f10..0ec64154 100644 --- a/backend/app/gateway/routers/artifacts.py +++ b/backend/app/gateway/routers/artifacts.py @@ -1,5 +1,7 @@ import logging import mimetypes +import re +import unicodedata import zipfile from pathlib import Path from urllib.parse import quote @@ -19,6 +21,9 @@ ACTIVE_CONTENT_MIME_TYPES = { "image/svg+xml", } +_DASH_VARIANTS_RE = re.compile(r"\s*[-\u2010\u2011\u2012\u2013\u2014\u2212]\s*") +_WHITESPACE_RE = re.compile(r"\s+") + def _build_content_disposition(disposition_type: str, filename: str) -> str: """Build an RFC 5987 encoded Content-Disposition header value.""" @@ -32,6 +37,31 @@ def _build_attachment_headers(filename: str, extra_headers: dict[str, str] | Non return headers +def _canonicalize_filename_for_lookup(filename: str) -> str: + """Canonical form used for conservative compatibility lookup.""" + normalized = unicodedata.normalize("NFKC", filename).strip() + normalized = _DASH_VARIANTS_RE.sub("-", normalized) + normalized = _WHITESPACE_RE.sub(" ", normalized) + return normalized + + +def _find_compat_filename_match(missing_path: Path) -> Path | None: + """Find a same-directory file whose canonicalized name uniquely matches.""" + parent = missing_path.parent + if not parent.is_dir(): + return None + + target_name = _canonicalize_filename_for_lookup(missing_path.name) + matches: list[Path] = [] + for candidate in parent.iterdir(): + if not candidate.is_file(): + continue + if _canonicalize_filename_for_lookup(candidate.name) == target_name: + matches.append(candidate) + + return matches[0] if len(matches) == 1 else None + + def is_text_file_by_content(path: Path, sample_size: int = 8192) -> bool: """Check if file is text by examining content for null bytes.""" try: @@ -157,7 +187,15 @@ async def get_artifact(thread_id: str, path: str, request: Request, download: bo logger.info(f"Resolving artifact path: thread_id={thread_id}, requested_path={path}, actual_path={actual_path}") if not actual_path.exists(): - raise HTTPException(status_code=404, detail=f"Artifact not found: {path}") + compat_path = _find_compat_filename_match(actual_path) + if compat_path is None: + raise HTTPException(status_code=404, detail=f"Artifact not found: {path}") + logger.info( + "Artifact compatibility fallback applied: requested_path=%s, resolved_path=%s", + actual_path, + compat_path, + ) + actual_path = compat_path if not actual_path.is_file(): raise HTTPException(status_code=400, detail=f"Path is not a file: {path}") diff --git a/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py b/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py index 1e0c7610..3bc1666a 100644 --- a/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py +++ b/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py @@ -56,6 +56,11 @@ def _normalize_presented_filepath( except ValueError as exc: raise ValueError(f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}") from exc + if not actual_path.exists(): + raise ValueError(f"File does not exist: {filepath}") + if not actual_path.is_file(): + raise ValueError(f"Path is not a file: {filepath}") + return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}" diff --git a/backend/tests/test_artifacts_router.py b/backend/tests/test_artifacts_router.py index 093dc85f..cbb86af5 100644 --- a/backend/tests/test_artifacts_router.py +++ b/backend/tests/test_artifacts_router.py @@ -117,3 +117,16 @@ def test_get_artifact_pdf_with_no_null_bytes_and_non_utf8_content_is_served_inli assert bytes(response.body) == binary_content assert response.media_type == "application/pdf" assert response.headers.get("content-disposition", "").startswith("inline;") + + +def test_get_artifact_compat_fallback_for_dash_spacing(tmp_path, monkeypatch) -> None: + artifact_path = tmp_path / "xhs-note-唯-疲劳端茶.md" + artifact_path.write_text("ok", encoding="utf-8") + requested_path = tmp_path / "xhs-note-唯 - 疲劳端茶.md" + + monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: requested_path) + + response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/xhs-note-唯 - 疲劳端茶.md", _make_request())) + + assert bytes(response.body).decode("utf-8") == "ok" + assert response.media_type == "text/markdown" diff --git a/backend/tests/test_present_file_tool_core_logic.py b/backend/tests/test_present_file_tool_core_logic.py index 3068ca50..02d068e6 100644 --- a/backend/tests/test_present_file_tool_core_logic.py +++ b/backend/tests/test_present_file_tool_core_logic.py @@ -66,3 +66,18 @@ def test_present_files_rejects_paths_outside_outputs(tmp_path): assert "artifacts" not in result.update assert result.update["messages"][0].content == f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}" + + +def test_present_files_rejects_nonexistent_file_in_outputs(tmp_path): + outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs" + outputs_dir.mkdir(parents=True) + missing_path = outputs_dir / "missing.md" + + result = present_file_tool_module.present_file_tool.func( + runtime=_make_runtime(str(outputs_dir)), + filepaths=[str(missing_path)], + tool_call_id="tc-4", + ) + + assert "artifacts" not in result.update + assert result.update["messages"][0].content == f"Error: File does not exist: {missing_path}"