fix(backend): 修复二进制产物误判文本导致 PDF 返回异常
This commit is contained in:
parent
e742fbc521
commit
6e99f8cb37
|
|
@ -176,6 +176,11 @@ async def get_artifact(thread_id: str, path: str, request: Request, download: bo
|
|||
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
|
||||
|
||||
if is_text_file_by_content(actual_path):
|
||||
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
|
||||
try:
|
||||
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
|
||||
except UnicodeDecodeError:
|
||||
# Some binary formats (e.g. certain PDFs) may not contain NUL bytes in
|
||||
# the sampled chunk and be misclassified as text. Fall back to binary.
|
||||
logger.debug("Artifact looked like text but is not valid UTF-8: %s", actual_path, exc_info=True)
|
||||
|
||||
return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": _build_content_disposition("inline", actual_path.name)})
|
||||
|
|
|
|||
|
|
@ -102,3 +102,18 @@ def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path
|
|||
assert response.status_code == 200
|
||||
assert response.text == "hello"
|
||||
assert response.headers.get("content-disposition", "").startswith("attachment;")
|
||||
|
||||
|
||||
def test_get_artifact_pdf_with_no_null_bytes_and_non_utf8_content_is_served_inline(tmp_path, monkeypatch) -> None:
|
||||
artifact_path = tmp_path / "slides.pdf"
|
||||
# No NUL bytes, but invalid UTF-8 to simulate binary content misdetected as text.
|
||||
binary_content = b"%PDF-1.7\n\xff\xfe\xfa\n%%EOF"
|
||||
artifact_path.write_bytes(binary_content)
|
||||
|
||||
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
|
||||
|
||||
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/slides.pdf", _make_request()))
|
||||
|
||||
assert bytes(response.body) == binary_content
|
||||
assert response.media_type == "application/pdf"
|
||||
assert response.headers.get("content-disposition", "").startswith("inline;")
|
||||
|
|
|
|||
Loading…
Reference in New Issue