deerflow2/backend/tests/test_artifacts_router.py

173 lines
7.3 KiB
Python

import asyncio
import zipfile
from pathlib import Path
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from starlette.requests import Request
from starlette.responses import FileResponse
import app.gateway.routers.artifacts as artifacts_router
ACTIVE_ARTIFACT_CASES = [
("poc.html", "<html><body><script>alert('xss')</script></body></html>"),
("page.xhtml", '<?xml version="1.0"?><html xmlns="http://www.w3.org/1999/xhtml"><body>hello</body></html>'),
("image.svg", '<svg xmlns="http://www.w3.org/2000/svg"><script>alert("xss")</script></svg>'),
]
def _make_request(query_string: bytes = b"") -> Request:
return Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": query_string})
def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt"
text = "Curly quotes: \u201cutf8\u201d"
artifact_path.write_text(text, encoding="utf-8")
original_read_text = Path.read_text
def read_text_with_gbk_default(self, *args, **kwargs):
kwargs.setdefault("encoding", "gbk")
return original_read_text(self, *args, **kwargs)
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
request = _make_request()
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request))
assert bytes(response.body).decode("utf-8") == text
assert response.media_type == "text/plain"
@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES)
def test_get_artifact_forces_download_for_active_content(tmp_path, monkeypatch, filename: str, content: str) -> None:
artifact_path = tmp_path / filename
artifact_path.write_text(content, encoding="utf-8")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/{filename}", _make_request()))
assert isinstance(response, FileResponse)
assert response.headers.get("content-disposition", "").startswith("attachment;")
@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES)
def test_get_artifact_forces_download_for_active_content_in_skill_archive(tmp_path, monkeypatch, filename: str, content: str) -> None:
skill_path = tmp_path / "sample.skill"
with zipfile.ZipFile(skill_path, "w") as zip_ref:
zip_ref.writestr(filename, content)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/sample.skill/{filename}", _make_request()))
assert response.headers.get("content-disposition", "").startswith("attachment;")
assert bytes(response.body) == content.encode("utf-8")
def test_get_artifact_download_false_does_not_force_attachment(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt"
artifact_path.write_text("hello", encoding="utf-8")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
app = FastAPI()
app.include_router(artifacts_router.router)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/note.txt?download=false")
assert response.status_code == 200
assert response.text == "hello"
assert "content-disposition" not in response.headers
def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path, monkeypatch) -> None:
skill_path = tmp_path / "sample.skill"
with zipfile.ZipFile(skill_path, "w") as zip_ref:
zip_ref.writestr("notes.txt", "hello")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
app = FastAPI()
app.include_router(artifacts_router.router)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/sample.skill/notes.txt?download=true")
assert response.status_code == 200
assert response.text == "hello"
assert response.headers.get("content-disposition", "").startswith("attachment;")
def test_get_artifact_pdf_with_no_null_bytes_and_non_utf8_content_is_served_inline(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "slides.pdf"
# No NUL bytes, but invalid UTF-8 to simulate binary content misdetected as text.
binary_content = b"%PDF-1.7\n\xff\xfe\xfa\n%%EOF"
artifact_path.write_bytes(binary_content)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/slides.pdf", _make_request()))
assert bytes(response.body) == binary_content
assert response.media_type == "application/pdf"
assert response.headers.get("content-disposition", "").startswith("inline;")
def test_get_artifact_compat_fallback_for_dash_spacing(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "xhs-note-唯-疲劳端茶.md"
artifact_path.write_text("ok", encoding="utf-8")
requested_path = tmp_path / "xhs-note-唯 - 疲劳端茶.md"
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: requested_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/xhs-note-唯 - 疲劳端茶.md", _make_request()))
assert bytes(response.body).decode("utf-8") == "ok"
assert response.media_type == "text/markdown"
def test_list_reference_files_returns_outputs_and_uploads(tmp_path, monkeypatch) -> None:
outputs_dir = tmp_path / "outputs"
uploads_dir = tmp_path / "uploads"
outputs_dir.mkdir()
uploads_dir.mkdir()
(outputs_dir / "notes.md").write_text("hello", encoding="utf-8")
(outputs_dir / "figures").mkdir()
(outputs_dir / "figures" / "plot.png").write_bytes(b"png")
(uploads_dir / "dataset.csv").write_text("a,b\n1,2\n", encoding="utf-8")
(uploads_dir / "skill").mkdir()
(uploads_dir / "skill" / "internal.txt").write_text("hidden", encoding="utf-8")
class _FakePaths:
def sandbox_outputs_dir(self, _thread_id: str) -> Path:
return outputs_dir
def sandbox_uploads_dir(self, _thread_id: str) -> Path:
return uploads_dir
monkeypatch.setattr(artifacts_router, "get_paths", lambda: _FakePaths())
app = FastAPI()
app.include_router(artifacts_router.router)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/artifacts/list")
assert response.status_code == 200
payload = response.json()
assert payload["count"] == 3
by_path = {item["virtual_path"]: item for item in payload["files"]}
assert "/mnt/user-data/outputs/notes.md" in by_path
assert "/mnt/user-data/outputs/figures/plot.png" in by_path
assert "/mnt/user-data/uploads/dataset.csv" in by_path
assert "/mnt/user-data/uploads/skill/internal.txt" not in by_path
assert by_path["/mnt/user-data/outputs/notes.md"]["source"] == "artifact"
assert by_path["/mnt/user-data/uploads/dataset.csv"]["source"] == "upload"