From 14cb4b3c3338e22173d93b1ee4d365d785b9e206 Mon Sep 17 00:00:00 2001 From: Titan Date: Tue, 14 Apr 2026 12:49:36 +0800 Subject: [PATCH] feat(billing): add question extraction and include in reserve payload --- .../agents/middlewares/billing_middleware.py | 9 +++++ backend/tests/test_billing_middleware.py | 35 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/backend/packages/harness/deerflow/agents/middlewares/billing_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/billing_middleware.py index c2896b77..779fd58e 100644 --- a/backend/packages/harness/deerflow/agents/middlewares/billing_middleware.py +++ b/backend/packages/harness/deerflow/agents/middlewares/billing_middleware.py @@ -113,6 +113,7 @@ def _reserve_payload(request: ModelRequest) -> tuple[dict[str, Any], str | None, estimated_input_tokens = _estimate_input_tokens(request.messages) estimated_output_tokens = _resolve_estimated_output_tokens(request, model_key) + question = _extract_latest_question(request.messages) call_id = run_id or str(uuid4()) if not run_id: @@ -141,6 +142,7 @@ def _reserve_payload(request: ModelRequest) -> tuple[dict[str, Any], str | None, "sessionId": session_id, "callId": call_id, "modelName": model_name, + "question": question, "frozenType": cfg.frozen_type, "estimatedInputTokens": estimated_input_tokens, "estimatedOutputTokens": estimated_output_tokens, @@ -504,6 +506,13 @@ def _extract_latest_user_text(messages: list[Any]) -> str: return "" +def _extract_latest_question(messages: list[Any]) -> str: + question = _extract_latest_user_text(messages) + if isinstance(question, str) and len(question) > 27: + return question[:27] + "。。。" + return question + + def _extract_usage(request: ModelRequest, response: ModelCallResult | None) -> dict[str, int] | None: if response is None: usage = None diff --git a/backend/tests/test_billing_middleware.py b/backend/tests/test_billing_middleware.py index 193117ee..3d577eef 100644 --- a/backend/tests/test_billing_middleware.py +++ b/backend/tests/test_billing_middleware.py @@ -75,6 +75,7 @@ async def test_awrap_model_call_uses_estimated_tokens_and_finalizes(monkeypatch) reserve_payload = seen_payloads[0][2] assert reserve_payload["callId"] == "run-1" assert reserve_payload["frozenType"] == 1 + assert reserve_payload["question"] == "hello world" assert reserve_payload["estimatedInputTokens"] == len("hello world") assert reserve_payload["estimatedOutputTokens"] == 4096 assert "frozenAmount" not in reserve_payload @@ -239,3 +240,37 @@ async def test_awrap_model_call_uses_worker_config_fallback_run_id(monkeypatch): assert isinstance(result, AIMessage) reserve_payload = seen_payloads[0][2] assert reserve_payload["callId"] == "run-from-worker" + + +@pytest.mark.anyio +async def test_awrap_model_call_truncates_question_like_token_usage_middleware(monkeypatch): + from langchain_core.runnables.config import var_child_runnable_config + + from deerflow.agents.middlewares import billing_middleware as bm + + monkeypatch.setattr(bm, "get_app_config", lambda: _fake_app_config()) + + seen_payloads = [] + + async def fake_post(url, headers, payload, timeout_seconds): + seen_payloads.append((url, headers, payload, timeout_seconds)) + if url.endswith("/frozen"): + return {"status": 1000, "message": "ok", "data": {"frozenId": "frozen-123"}} + return {"status": 1000, "message": "ok", "data": {}} + + monkeypatch.setattr(bm, "_post_async", fake_post) + + middleware = BillingMiddleware() + long_question = "abcdefghijklmnopqrstuvwxyz1234567890" + request = _request_with_latest_user_text(long_question) + handler = AsyncMock(return_value=AIMessage(content="ok", usage_metadata={"input_tokens": 1, "output_tokens": 2, "total_tokens": 3})) + + token = var_child_runnable_config.set({"run_id": "run-question-truncate"}) + try: + result = await middleware.awrap_model_call(request, handler) + finally: + var_child_runnable_config.reset(token) + + assert isinstance(result, AIMessage) + reserve_payload = seen_payloads[0][2] + assert reserve_payload["question"] == "abcdefghijklmnopqrstuvwxyz1。。。"