fix(artifacts): 修复文件名差异导致 Artifact not found
This commit is contained in:
parent
c667faad65
commit
33705637ea
|
|
@ -1,5 +1,7 @@
|
|||
import logging
|
||||
import mimetypes
|
||||
import re
|
||||
import unicodedata
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from urllib.parse import quote
|
||||
|
|
@ -19,6 +21,9 @@ ACTIVE_CONTENT_MIME_TYPES = {
|
|||
"image/svg+xml",
|
||||
}
|
||||
|
||||
_DASH_VARIANTS_RE = re.compile(r"\s*[-\u2010\u2011\u2012\u2013\u2014\u2212]\s*")
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _build_content_disposition(disposition_type: str, filename: str) -> str:
|
||||
"""Build an RFC 5987 encoded Content-Disposition header value."""
|
||||
|
|
@ -32,6 +37,31 @@ def _build_attachment_headers(filename: str, extra_headers: dict[str, str] | Non
|
|||
return headers
|
||||
|
||||
|
||||
def _canonicalize_filename_for_lookup(filename: str) -> str:
|
||||
"""Canonical form used for conservative compatibility lookup."""
|
||||
normalized = unicodedata.normalize("NFKC", filename).strip()
|
||||
normalized = _DASH_VARIANTS_RE.sub("-", normalized)
|
||||
normalized = _WHITESPACE_RE.sub(" ", normalized)
|
||||
return normalized
|
||||
|
||||
|
||||
def _find_compat_filename_match(missing_path: Path) -> Path | None:
|
||||
"""Find a same-directory file whose canonicalized name uniquely matches."""
|
||||
parent = missing_path.parent
|
||||
if not parent.is_dir():
|
||||
return None
|
||||
|
||||
target_name = _canonicalize_filename_for_lookup(missing_path.name)
|
||||
matches: list[Path] = []
|
||||
for candidate in parent.iterdir():
|
||||
if not candidate.is_file():
|
||||
continue
|
||||
if _canonicalize_filename_for_lookup(candidate.name) == target_name:
|
||||
matches.append(candidate)
|
||||
|
||||
return matches[0] if len(matches) == 1 else None
|
||||
|
||||
|
||||
def is_text_file_by_content(path: Path, sample_size: int = 8192) -> bool:
|
||||
"""Check if file is text by examining content for null bytes."""
|
||||
try:
|
||||
|
|
@ -157,7 +187,15 @@ async def get_artifact(thread_id: str, path: str, request: Request, download: bo
|
|||
logger.info(f"Resolving artifact path: thread_id={thread_id}, requested_path={path}, actual_path={actual_path}")
|
||||
|
||||
if not actual_path.exists():
|
||||
raise HTTPException(status_code=404, detail=f"Artifact not found: {path}")
|
||||
compat_path = _find_compat_filename_match(actual_path)
|
||||
if compat_path is None:
|
||||
raise HTTPException(status_code=404, detail=f"Artifact not found: {path}")
|
||||
logger.info(
|
||||
"Artifact compatibility fallback applied: requested_path=%s, resolved_path=%s",
|
||||
actual_path,
|
||||
compat_path,
|
||||
)
|
||||
actual_path = compat_path
|
||||
|
||||
if not actual_path.is_file():
|
||||
raise HTTPException(status_code=400, detail=f"Path is not a file: {path}")
|
||||
|
|
|
|||
|
|
@ -56,6 +56,11 @@ def _normalize_presented_filepath(
|
|||
except ValueError as exc:
|
||||
raise ValueError(f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}") from exc
|
||||
|
||||
if not actual_path.exists():
|
||||
raise ValueError(f"File does not exist: {filepath}")
|
||||
if not actual_path.is_file():
|
||||
raise ValueError(f"Path is not a file: {filepath}")
|
||||
|
||||
return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -117,3 +117,16 @@ def test_get_artifact_pdf_with_no_null_bytes_and_non_utf8_content_is_served_inli
|
|||
assert bytes(response.body) == binary_content
|
||||
assert response.media_type == "application/pdf"
|
||||
assert response.headers.get("content-disposition", "").startswith("inline;")
|
||||
|
||||
|
||||
def test_get_artifact_compat_fallback_for_dash_spacing(tmp_path, monkeypatch) -> None:
|
||||
artifact_path = tmp_path / "xhs-note-唯-疲劳端茶.md"
|
||||
artifact_path.write_text("ok", encoding="utf-8")
|
||||
requested_path = tmp_path / "xhs-note-唯 - 疲劳端茶.md"
|
||||
|
||||
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: requested_path)
|
||||
|
||||
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/xhs-note-唯 - 疲劳端茶.md", _make_request()))
|
||||
|
||||
assert bytes(response.body).decode("utf-8") == "ok"
|
||||
assert response.media_type == "text/markdown"
|
||||
|
|
|
|||
|
|
@ -66,3 +66,18 @@ def test_present_files_rejects_paths_outside_outputs(tmp_path):
|
|||
|
||||
assert "artifacts" not in result.update
|
||||
assert result.update["messages"][0].content == f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}"
|
||||
|
||||
|
||||
def test_present_files_rejects_nonexistent_file_in_outputs(tmp_path):
|
||||
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||
outputs_dir.mkdir(parents=True)
|
||||
missing_path = outputs_dir / "missing.md"
|
||||
|
||||
result = present_file_tool_module.present_file_tool.func(
|
||||
runtime=_make_runtime(str(outputs_dir)),
|
||||
filepaths=[str(missing_path)],
|
||||
tool_call_id="tc-4",
|
||||
)
|
||||
|
||||
assert "artifacts" not in result.update
|
||||
assert result.update["messages"][0].content == f"Error: File does not exist: {missing_path}"
|
||||
|
|
|
|||
Loading…
Reference in New Issue