From 6e99f8cb376a7eaddd8fcd48ef1d430de0b2fbcf Mon Sep 17 00:00:00 2001 From: MT-Mint <798521692@qq.com> Date: Sat, 11 Apr 2026 16:22:57 +0800 Subject: [PATCH] =?UTF-8?q?fix(backend):=20=E4=BF=AE=E5=A4=8D=E4=BA=8C?= =?UTF-8?q?=E8=BF=9B=E5=88=B6=E4=BA=A7=E7=89=A9=E8=AF=AF=E5=88=A4=E6=96=87?= =?UTF-8?q?=E6=9C=AC=E5=AF=BC=E8=87=B4=20PDF=20=E8=BF=94=E5=9B=9E=E5=BC=82?= =?UTF-8?q?=E5=B8=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/gateway/routers/artifacts.py | 7 ++++++- backend/tests/test_artifacts_router.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/backend/app/gateway/routers/artifacts.py b/backend/app/gateway/routers/artifacts.py index a58fd5c0..ccbd2f10 100644 --- a/backend/app/gateway/routers/artifacts.py +++ b/backend/app/gateway/routers/artifacts.py @@ -176,6 +176,11 @@ async def get_artifact(thread_id: str, path: str, request: Request, download: bo return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type) if is_text_file_by_content(actual_path): - return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type) + try: + return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type) + except UnicodeDecodeError: + # Some binary formats (e.g. certain PDFs) may not contain NUL bytes in + # the sampled chunk and be misclassified as text. Fall back to binary. + logger.debug("Artifact looked like text but is not valid UTF-8: %s", actual_path, exc_info=True) return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": _build_content_disposition("inline", actual_path.name)}) diff --git a/backend/tests/test_artifacts_router.py b/backend/tests/test_artifacts_router.py index 9a30ff44..093dc85f 100644 --- a/backend/tests/test_artifacts_router.py +++ b/backend/tests/test_artifacts_router.py @@ -102,3 +102,18 @@ def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path assert response.status_code == 200 assert response.text == "hello" assert response.headers.get("content-disposition", "").startswith("attachment;") + + +def test_get_artifact_pdf_with_no_null_bytes_and_non_utf8_content_is_served_inline(tmp_path, monkeypatch) -> None: + artifact_path = tmp_path / "slides.pdf" + # No NUL bytes, but invalid UTF-8 to simulate binary content misdetected as text. + binary_content = b"%PDF-1.7\n\xff\xfe\xfa\n%%EOF" + artifact_path.write_bytes(binary_content) + + monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path) + + response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/slides.pdf", _make_request())) + + assert bytes(response.body) == binary_content + assert response.media_type == "application/pdf" + assert response.headers.get("content-disposition", "").startswith("inline;")