fix(artifacts): 修复文件名差异导致 Artifact not found
This commit is contained in:
parent
c667faad65
commit
33705637ea
|
|
@ -1,5 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import re
|
||||||
|
import unicodedata
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
@ -19,6 +21,9 @@ ACTIVE_CONTENT_MIME_TYPES = {
|
||||||
"image/svg+xml",
|
"image/svg+xml",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_DASH_VARIANTS_RE = re.compile(r"\s*[-\u2010\u2011\u2012\u2013\u2014\u2212]\s*")
|
||||||
|
_WHITESPACE_RE = re.compile(r"\s+")
|
||||||
|
|
||||||
|
|
||||||
def _build_content_disposition(disposition_type: str, filename: str) -> str:
|
def _build_content_disposition(disposition_type: str, filename: str) -> str:
|
||||||
"""Build an RFC 5987 encoded Content-Disposition header value."""
|
"""Build an RFC 5987 encoded Content-Disposition header value."""
|
||||||
|
|
@ -32,6 +37,31 @@ def _build_attachment_headers(filename: str, extra_headers: dict[str, str] | Non
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
|
|
||||||
|
def _canonicalize_filename_for_lookup(filename: str) -> str:
|
||||||
|
"""Canonical form used for conservative compatibility lookup."""
|
||||||
|
normalized = unicodedata.normalize("NFKC", filename).strip()
|
||||||
|
normalized = _DASH_VARIANTS_RE.sub("-", normalized)
|
||||||
|
normalized = _WHITESPACE_RE.sub(" ", normalized)
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
def _find_compat_filename_match(missing_path: Path) -> Path | None:
|
||||||
|
"""Find a same-directory file whose canonicalized name uniquely matches."""
|
||||||
|
parent = missing_path.parent
|
||||||
|
if not parent.is_dir():
|
||||||
|
return None
|
||||||
|
|
||||||
|
target_name = _canonicalize_filename_for_lookup(missing_path.name)
|
||||||
|
matches: list[Path] = []
|
||||||
|
for candidate in parent.iterdir():
|
||||||
|
if not candidate.is_file():
|
||||||
|
continue
|
||||||
|
if _canonicalize_filename_for_lookup(candidate.name) == target_name:
|
||||||
|
matches.append(candidate)
|
||||||
|
|
||||||
|
return matches[0] if len(matches) == 1 else None
|
||||||
|
|
||||||
|
|
||||||
def is_text_file_by_content(path: Path, sample_size: int = 8192) -> bool:
|
def is_text_file_by_content(path: Path, sample_size: int = 8192) -> bool:
|
||||||
"""Check if file is text by examining content for null bytes."""
|
"""Check if file is text by examining content for null bytes."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -157,7 +187,15 @@ async def get_artifact(thread_id: str, path: str, request: Request, download: bo
|
||||||
logger.info(f"Resolving artifact path: thread_id={thread_id}, requested_path={path}, actual_path={actual_path}")
|
logger.info(f"Resolving artifact path: thread_id={thread_id}, requested_path={path}, actual_path={actual_path}")
|
||||||
|
|
||||||
if not actual_path.exists():
|
if not actual_path.exists():
|
||||||
raise HTTPException(status_code=404, detail=f"Artifact not found: {path}")
|
compat_path = _find_compat_filename_match(actual_path)
|
||||||
|
if compat_path is None:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Artifact not found: {path}")
|
||||||
|
logger.info(
|
||||||
|
"Artifact compatibility fallback applied: requested_path=%s, resolved_path=%s",
|
||||||
|
actual_path,
|
||||||
|
compat_path,
|
||||||
|
)
|
||||||
|
actual_path = compat_path
|
||||||
|
|
||||||
if not actual_path.is_file():
|
if not actual_path.is_file():
|
||||||
raise HTTPException(status_code=400, detail=f"Path is not a file: {path}")
|
raise HTTPException(status_code=400, detail=f"Path is not a file: {path}")
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,11 @@ def _normalize_presented_filepath(
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
raise ValueError(f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}") from exc
|
raise ValueError(f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}") from exc
|
||||||
|
|
||||||
|
if not actual_path.exists():
|
||||||
|
raise ValueError(f"File does not exist: {filepath}")
|
||||||
|
if not actual_path.is_file():
|
||||||
|
raise ValueError(f"Path is not a file: {filepath}")
|
||||||
|
|
||||||
return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}"
|
return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,3 +117,16 @@ def test_get_artifact_pdf_with_no_null_bytes_and_non_utf8_content_is_served_inli
|
||||||
assert bytes(response.body) == binary_content
|
assert bytes(response.body) == binary_content
|
||||||
assert response.media_type == "application/pdf"
|
assert response.media_type == "application/pdf"
|
||||||
assert response.headers.get("content-disposition", "").startswith("inline;")
|
assert response.headers.get("content-disposition", "").startswith("inline;")
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_artifact_compat_fallback_for_dash_spacing(tmp_path, monkeypatch) -> None:
|
||||||
|
artifact_path = tmp_path / "xhs-note-唯-疲劳端茶.md"
|
||||||
|
artifact_path.write_text("ok", encoding="utf-8")
|
||||||
|
requested_path = tmp_path / "xhs-note-唯 - 疲劳端茶.md"
|
||||||
|
|
||||||
|
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: requested_path)
|
||||||
|
|
||||||
|
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/xhs-note-唯 - 疲劳端茶.md", _make_request()))
|
||||||
|
|
||||||
|
assert bytes(response.body).decode("utf-8") == "ok"
|
||||||
|
assert response.media_type == "text/markdown"
|
||||||
|
|
|
||||||
|
|
@ -66,3 +66,18 @@ def test_present_files_rejects_paths_outside_outputs(tmp_path):
|
||||||
|
|
||||||
assert "artifacts" not in result.update
|
assert "artifacts" not in result.update
|
||||||
assert result.update["messages"][0].content == f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}"
|
assert result.update["messages"][0].content == f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_present_files_rejects_nonexistent_file_in_outputs(tmp_path):
|
||||||
|
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||||
|
outputs_dir.mkdir(parents=True)
|
||||||
|
missing_path = outputs_dir / "missing.md"
|
||||||
|
|
||||||
|
result = present_file_tool_module.present_file_tool.func(
|
||||||
|
runtime=_make_runtime(str(outputs_dir)),
|
||||||
|
filepaths=[str(missing_path)],
|
||||||
|
tool_call_id="tc-4",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "artifacts" not in result.update
|
||||||
|
assert result.update["messages"][0].content == f"Error: File does not exist: {missing_path}"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue