"""Tests for JinaClient async crawl method.""" import logging from unittest.mock import MagicMock import httpx import pytest import deerflow.community.jina_ai.jina_client as jina_client_module from deerflow.community.jina_ai.jina_client import JinaClient from deerflow.community.jina_ai.tools import web_fetch_tool @pytest.fixture def jina_client(): return JinaClient() @pytest.mark.anyio async def test_crawl_success(jina_client, monkeypatch): """Test successful crawl returns response text.""" async def mock_post(self, url, **kwargs): return httpx.Response(200, text="Hello", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) result = await jina_client.crawl("https://example.com") assert result == "Hello" @pytest.mark.anyio async def test_crawl_non_200_status(jina_client, monkeypatch): """Test that non-200 status returns error message.""" async def mock_post(self, url, **kwargs): return httpx.Response(429, text="Rate limited", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) result = await jina_client.crawl("https://example.com") assert result.startswith("Error:") assert "429" in result @pytest.mark.anyio async def test_crawl_empty_response(jina_client, monkeypatch): """Test that empty response returns error message.""" async def mock_post(self, url, **kwargs): return httpx.Response(200, text="", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) result = await jina_client.crawl("https://example.com") assert result.startswith("Error:") assert "empty" in result.lower() @pytest.mark.anyio async def test_crawl_whitespace_only_response(jina_client, monkeypatch): """Test that whitespace-only response returns error message.""" async def mock_post(self, url, **kwargs): return httpx.Response(200, text=" \n ", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) result = await jina_client.crawl("https://example.com") assert result.startswith("Error:") assert "empty" in result.lower() @pytest.mark.anyio async def test_crawl_network_error(jina_client, monkeypatch): """Test that network errors are handled gracefully.""" async def mock_post(self, url, **kwargs): raise httpx.ConnectError("Connection refused") monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) result = await jina_client.crawl("https://example.com") assert result.startswith("Error:") assert "failed" in result.lower() @pytest.mark.anyio async def test_crawl_passes_headers(jina_client, monkeypatch): """Test that correct headers are sent.""" captured_headers = {} async def mock_post(self, url, **kwargs): captured_headers.update(kwargs.get("headers", {})) return httpx.Response(200, text="ok", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) await jina_client.crawl("https://example.com", return_format="markdown", timeout=30) assert captured_headers["X-Return-Format"] == "markdown" assert captured_headers["X-Timeout"] == "30" @pytest.mark.anyio async def test_crawl_includes_api_key_when_set(jina_client, monkeypatch): """Test that Authorization header is set when JINA_API_KEY is available.""" captured_headers = {} async def mock_post(self, url, **kwargs): captured_headers.update(kwargs.get("headers", {})) return httpx.Response(200, text="ok", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) monkeypatch.setenv("JINA_API_KEY", "test-key-123") await jina_client.crawl("https://example.com") assert captured_headers["Authorization"] == "Bearer test-key-123" @pytest.mark.anyio async def test_crawl_warns_once_when_api_key_missing(jina_client, monkeypatch, caplog): """Test that the missing API key warning is logged only once.""" jina_client_module._api_key_warned = False async def mock_post(self, url, **kwargs): return httpx.Response(200, text="ok", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) monkeypatch.delenv("JINA_API_KEY", raising=False) with caplog.at_level(logging.WARNING, logger="deerflow.community.jina_ai.jina_client"): await jina_client.crawl("https://example.com") await jina_client.crawl("https://example.com") warning_count = sum(1 for record in caplog.records if "Jina API key is not set" in record.message) assert warning_count == 1 @pytest.mark.anyio async def test_crawl_no_auth_header_without_api_key(jina_client, monkeypatch): """Test that no Authorization header is set when JINA_API_KEY is not available.""" jina_client_module._api_key_warned = False captured_headers = {} async def mock_post(self, url, **kwargs): captured_headers.update(kwargs.get("headers", {})) return httpx.Response(200, text="ok", request=httpx.Request("POST", url)) monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) monkeypatch.delenv("JINA_API_KEY", raising=False) await jina_client.crawl("https://example.com") assert "Authorization" not in captured_headers @pytest.mark.anyio async def test_web_fetch_tool_returns_error_on_crawl_failure(monkeypatch): """Test that web_fetch_tool short-circuits and returns the error string when crawl fails.""" async def mock_crawl(self, url, **kwargs): return "Error: Jina API returned status 429: Rate limited" mock_config = MagicMock() mock_config.get_tool_config.return_value = None monkeypatch.setattr("deerflow.community.jina_ai.tools.get_app_config", lambda: mock_config) monkeypatch.setattr(JinaClient, "crawl", mock_crawl) result = await web_fetch_tool.ainvoke("https://example.com") assert result.startswith("Error:") assert "429" in result @pytest.mark.anyio async def test_web_fetch_tool_returns_markdown_on_success(monkeypatch): """Test that web_fetch_tool returns extracted markdown on successful crawl.""" async def mock_crawl(self, url, **kwargs): return "

Hello world

" mock_config = MagicMock() mock_config.get_tool_config.return_value = None monkeypatch.setattr("deerflow.community.jina_ai.tools.get_app_config", lambda: mock_config) monkeypatch.setattr(JinaClient, "crawl", mock_crawl) result = await web_fetch_tool.ainvoke("https://example.com") assert "Hello world" in result assert not result.startswith("Error:")