feat(billing): add question extraction and include in reserve payload
This commit is contained in:
parent
369f3af384
commit
14cb4b3c33
|
|
@ -113,6 +113,7 @@ def _reserve_payload(request: ModelRequest) -> tuple[dict[str, Any], str | None,
|
||||||
|
|
||||||
estimated_input_tokens = _estimate_input_tokens(request.messages)
|
estimated_input_tokens = _estimate_input_tokens(request.messages)
|
||||||
estimated_output_tokens = _resolve_estimated_output_tokens(request, model_key)
|
estimated_output_tokens = _resolve_estimated_output_tokens(request, model_key)
|
||||||
|
question = _extract_latest_question(request.messages)
|
||||||
|
|
||||||
call_id = run_id or str(uuid4())
|
call_id = run_id or str(uuid4())
|
||||||
if not run_id:
|
if not run_id:
|
||||||
|
|
@ -141,6 +142,7 @@ def _reserve_payload(request: ModelRequest) -> tuple[dict[str, Any], str | None,
|
||||||
"sessionId": session_id,
|
"sessionId": session_id,
|
||||||
"callId": call_id,
|
"callId": call_id,
|
||||||
"modelName": model_name,
|
"modelName": model_name,
|
||||||
|
"question": question,
|
||||||
"frozenType": cfg.frozen_type,
|
"frozenType": cfg.frozen_type,
|
||||||
"estimatedInputTokens": estimated_input_tokens,
|
"estimatedInputTokens": estimated_input_tokens,
|
||||||
"estimatedOutputTokens": estimated_output_tokens,
|
"estimatedOutputTokens": estimated_output_tokens,
|
||||||
|
|
@ -504,6 +506,13 @@ def _extract_latest_user_text(messages: list[Any]) -> str:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_latest_question(messages: list[Any]) -> str:
|
||||||
|
question = _extract_latest_user_text(messages)
|
||||||
|
if isinstance(question, str) and len(question) > 27:
|
||||||
|
return question[:27] + "。。。"
|
||||||
|
return question
|
||||||
|
|
||||||
|
|
||||||
def _extract_usage(request: ModelRequest, response: ModelCallResult | None) -> dict[str, int] | None:
|
def _extract_usage(request: ModelRequest, response: ModelCallResult | None) -> dict[str, int] | None:
|
||||||
if response is None:
|
if response is None:
|
||||||
usage = None
|
usage = None
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,7 @@ async def test_awrap_model_call_uses_estimated_tokens_and_finalizes(monkeypatch)
|
||||||
reserve_payload = seen_payloads[0][2]
|
reserve_payload = seen_payloads[0][2]
|
||||||
assert reserve_payload["callId"] == "run-1"
|
assert reserve_payload["callId"] == "run-1"
|
||||||
assert reserve_payload["frozenType"] == 1
|
assert reserve_payload["frozenType"] == 1
|
||||||
|
assert reserve_payload["question"] == "hello world"
|
||||||
assert reserve_payload["estimatedInputTokens"] == len("hello world")
|
assert reserve_payload["estimatedInputTokens"] == len("hello world")
|
||||||
assert reserve_payload["estimatedOutputTokens"] == 4096
|
assert reserve_payload["estimatedOutputTokens"] == 4096
|
||||||
assert "frozenAmount" not in reserve_payload
|
assert "frozenAmount" not in reserve_payload
|
||||||
|
|
@ -239,3 +240,37 @@ async def test_awrap_model_call_uses_worker_config_fallback_run_id(monkeypatch):
|
||||||
assert isinstance(result, AIMessage)
|
assert isinstance(result, AIMessage)
|
||||||
reserve_payload = seen_payloads[0][2]
|
reserve_payload = seen_payloads[0][2]
|
||||||
assert reserve_payload["callId"] == "run-from-worker"
|
assert reserve_payload["callId"] == "run-from-worker"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.anyio
|
||||||
|
async def test_awrap_model_call_truncates_question_like_token_usage_middleware(monkeypatch):
|
||||||
|
from langchain_core.runnables.config import var_child_runnable_config
|
||||||
|
|
||||||
|
from deerflow.agents.middlewares import billing_middleware as bm
|
||||||
|
|
||||||
|
monkeypatch.setattr(bm, "get_app_config", lambda: _fake_app_config())
|
||||||
|
|
||||||
|
seen_payloads = []
|
||||||
|
|
||||||
|
async def fake_post(url, headers, payload, timeout_seconds):
|
||||||
|
seen_payloads.append((url, headers, payload, timeout_seconds))
|
||||||
|
if url.endswith("/frozen"):
|
||||||
|
return {"status": 1000, "message": "ok", "data": {"frozenId": "frozen-123"}}
|
||||||
|
return {"status": 1000, "message": "ok", "data": {}}
|
||||||
|
|
||||||
|
monkeypatch.setattr(bm, "_post_async", fake_post)
|
||||||
|
|
||||||
|
middleware = BillingMiddleware()
|
||||||
|
long_question = "abcdefghijklmnopqrstuvwxyz1234567890"
|
||||||
|
request = _request_with_latest_user_text(long_question)
|
||||||
|
handler = AsyncMock(return_value=AIMessage(content="ok", usage_metadata={"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}))
|
||||||
|
|
||||||
|
token = var_child_runnable_config.set({"run_id": "run-question-truncate"})
|
||||||
|
try:
|
||||||
|
result = await middleware.awrap_model_call(request, handler)
|
||||||
|
finally:
|
||||||
|
var_child_runnable_config.reset(token)
|
||||||
|
|
||||||
|
assert isinstance(result, AIMessage)
|
||||||
|
reserve_payload = seen_payloads[0][2]
|
||||||
|
assert reserve_payload["question"] == "abcdefghijklmnopqrstuvwxyz1。。。"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue