fix(backend): 修复二进制产物误判文本导致 PDF 返回异常

This commit is contained in:
肖应宇 2026-04-11 16:22:57 +08:00 committed by Titan
parent e742fbc521
commit 6e99f8cb37
2 changed files with 21 additions and 1 deletions

View File

@ -176,6 +176,11 @@ async def get_artifact(thread_id: str, path: str, request: Request, download: bo
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
if is_text_file_by_content(actual_path):
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
try:
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
except UnicodeDecodeError:
# Some binary formats (e.g. certain PDFs) may not contain NUL bytes in
# the sampled chunk and be misclassified as text. Fall back to binary.
logger.debug("Artifact looked like text but is not valid UTF-8: %s", actual_path, exc_info=True)
return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": _build_content_disposition("inline", actual_path.name)})

View File

@ -102,3 +102,18 @@ def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path
assert response.status_code == 200
assert response.text == "hello"
assert response.headers.get("content-disposition", "").startswith("attachment;")
def test_get_artifact_pdf_with_no_null_bytes_and_non_utf8_content_is_served_inline(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "slides.pdf"
# No NUL bytes, but invalid UTF-8 to simulate binary content misdetected as text.
binary_content = b"%PDF-1.7\n\xff\xfe\xfa\n%%EOF"
artifact_path.write_bytes(binary_content)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/slides.pdf", _make_request()))
assert bytes(response.body) == binary_content
assert response.media_type == "application/pdf"
assert response.headers.get("content-disposition", "").startswith("inline;")