feat(config): add when_thinking_disabled support for model configs (#1970)
* feat(config): add when_thinking_disabled support for model configs Allow users to explicitly configure what parameters are sent to the model when thinking is disabled, via a new `when_thinking_disabled` field in model config. This mirrors the existing `when_thinking_enabled` pattern and takes full precedence over the hardcoded disable behavior when set. Backwards compatible — existing configs work unchanged. Closes #1675 * fix(config): address copilot review — gate when_thinking_disabled independently - Switch truthiness check to `is not None` so empty dict overrides work - Restructure disable path so when_thinking_disabled is gated independently of has_thinking_settings, allowing it to work without when_thinking_enabled - Update test to reflect new behavior
This commit is contained in:
parent
35f141fc48
commit
194bab4691
|
|
@ -27,6 +27,10 @@ class ModelConfig(BaseModel):
|
||||||
default_factory=lambda: None,
|
default_factory=lambda: None,
|
||||||
description="Extra settings to be passed to the model when thinking is enabled",
|
description="Extra settings to be passed to the model when thinking is enabled",
|
||||||
)
|
)
|
||||||
|
when_thinking_disabled: dict | None = Field(
|
||||||
|
default_factory=lambda: None,
|
||||||
|
description="Extra settings to be passed to the model when thinking is disabled",
|
||||||
|
)
|
||||||
supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs")
|
supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs")
|
||||||
thinking: dict | None = Field(
|
thinking: dict | None = Field(
|
||||||
default_factory=lambda: None,
|
default_factory=lambda: None,
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,7 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, *
|
||||||
"supports_thinking",
|
"supports_thinking",
|
||||||
"supports_reasoning_effort",
|
"supports_reasoning_effort",
|
||||||
"when_thinking_enabled",
|
"when_thinking_enabled",
|
||||||
|
"when_thinking_disabled",
|
||||||
"thinking",
|
"thinking",
|
||||||
"supports_vision",
|
"supports_vision",
|
||||||
},
|
},
|
||||||
|
|
@ -72,21 +73,24 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, *
|
||||||
raise ValueError(f"Model {name} does not support thinking. Set `supports_thinking` to true in the `config.yaml` to enable thinking.") from None
|
raise ValueError(f"Model {name} does not support thinking. Set `supports_thinking` to true in the `config.yaml` to enable thinking.") from None
|
||||||
if effective_wte:
|
if effective_wte:
|
||||||
model_settings_from_config.update(effective_wte)
|
model_settings_from_config.update(effective_wte)
|
||||||
if not thinking_enabled and has_thinking_settings:
|
if not thinking_enabled:
|
||||||
if effective_wte.get("extra_body", {}).get("thinking", {}).get("type"):
|
if model_config.when_thinking_disabled is not None:
|
||||||
|
# User-provided disable settings take full precedence
|
||||||
|
model_settings_from_config.update(model_config.when_thinking_disabled)
|
||||||
|
elif has_thinking_settings and effective_wte.get("extra_body", {}).get("thinking", {}).get("type"):
|
||||||
# OpenAI-compatible gateway: thinking is nested under extra_body
|
# OpenAI-compatible gateway: thinking is nested under extra_body
|
||||||
model_settings_from_config["extra_body"] = _deep_merge_dicts(
|
model_settings_from_config["extra_body"] = _deep_merge_dicts(
|
||||||
model_settings_from_config.get("extra_body"),
|
model_settings_from_config.get("extra_body"),
|
||||||
{"thinking": {"type": "disabled"}},
|
{"thinking": {"type": "disabled"}},
|
||||||
)
|
)
|
||||||
model_settings_from_config["reasoning_effort"] = "minimal"
|
model_settings_from_config["reasoning_effort"] = "minimal"
|
||||||
elif disable_chat_template_kwargs := _vllm_disable_chat_template_kwargs(effective_wte.get("extra_body", {}).get("chat_template_kwargs") or {}):
|
elif has_thinking_settings and (disable_chat_template_kwargs := _vllm_disable_chat_template_kwargs(effective_wte.get("extra_body", {}).get("chat_template_kwargs") or {})):
|
||||||
# vLLM uses chat template kwargs to switch thinking on/off.
|
# vLLM uses chat template kwargs to switch thinking on/off.
|
||||||
model_settings_from_config["extra_body"] = _deep_merge_dicts(
|
model_settings_from_config["extra_body"] = _deep_merge_dicts(
|
||||||
model_settings_from_config.get("extra_body"),
|
model_settings_from_config.get("extra_body"),
|
||||||
{"chat_template_kwargs": disable_chat_template_kwargs},
|
{"chat_template_kwargs": disable_chat_template_kwargs},
|
||||||
)
|
)
|
||||||
elif effective_wte.get("thinking", {}).get("type"):
|
elif has_thinking_settings and effective_wte.get("thinking", {}).get("type"):
|
||||||
# Native langchain_anthropic: thinking is a direct constructor parameter
|
# Native langchain_anthropic: thinking is a direct constructor parameter
|
||||||
model_settings_from_config["thinking"] = {"type": "disabled"}
|
model_settings_from_config["thinking"] = {"type": "disabled"}
|
||||||
if not model_config.supports_reasoning_effort:
|
if not model_config.supports_reasoning_effort:
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ def _make_model(
|
||||||
supports_thinking: bool = False,
|
supports_thinking: bool = False,
|
||||||
supports_reasoning_effort: bool = False,
|
supports_reasoning_effort: bool = False,
|
||||||
when_thinking_enabled: dict | None = None,
|
when_thinking_enabled: dict | None = None,
|
||||||
|
when_thinking_disabled: dict | None = None,
|
||||||
thinking: dict | None = None,
|
thinking: dict | None = None,
|
||||||
max_tokens: int | None = None,
|
max_tokens: int | None = None,
|
||||||
) -> ModelConfig:
|
) -> ModelConfig:
|
||||||
|
|
@ -43,6 +44,7 @@ def _make_model(
|
||||||
supports_thinking=supports_thinking,
|
supports_thinking=supports_thinking,
|
||||||
supports_reasoning_effort=supports_reasoning_effort,
|
supports_reasoning_effort=supports_reasoning_effort,
|
||||||
when_thinking_enabled=when_thinking_enabled,
|
when_thinking_enabled=when_thinking_enabled,
|
||||||
|
when_thinking_disabled=when_thinking_disabled,
|
||||||
thinking=thinking,
|
thinking=thinking,
|
||||||
supports_vision=False,
|
supports_vision=False,
|
||||||
)
|
)
|
||||||
|
|
@ -244,6 +246,136 @@ def test_thinking_disabled_no_when_thinking_enabled_does_nothing(monkeypatch):
|
||||||
assert captured.get("reasoning_effort") is None
|
assert captured.get("reasoning_effort") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# when_thinking_disabled config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_when_thinking_disabled_takes_precedence_over_hardcoded_disable(monkeypatch):
|
||||||
|
"""When when_thinking_disabled is set, it takes full precedence over the
|
||||||
|
hardcoded disable logic (extra_body.thinking.type=disabled etc.)."""
|
||||||
|
wte = {"extra_body": {"thinking": {"type": "enabled", "budget_tokens": 10000}}}
|
||||||
|
wtd = {"extra_body": {"thinking": {"type": "disabled"}}, "reasoning_effort": "low"}
|
||||||
|
cfg = _make_app_config(
|
||||||
|
[
|
||||||
|
_make_model(
|
||||||
|
"custom-disable",
|
||||||
|
supports_thinking=True,
|
||||||
|
supports_reasoning_effort=True,
|
||||||
|
when_thinking_enabled=wte,
|
||||||
|
when_thinking_disabled=wtd,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
_patch_factory(monkeypatch, cfg)
|
||||||
|
|
||||||
|
captured: dict = {}
|
||||||
|
|
||||||
|
class CapturingModel(FakeChatModel):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
captured.update(kwargs)
|
||||||
|
BaseChatModel.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
|
||||||
|
|
||||||
|
factory_module.create_chat_model(name="custom-disable", thinking_enabled=False)
|
||||||
|
|
||||||
|
assert captured.get("extra_body") == {"thinking": {"type": "disabled"}}
|
||||||
|
# User overrode the hardcoded "minimal" with "low"
|
||||||
|
assert captured.get("reasoning_effort") == "low"
|
||||||
|
|
||||||
|
|
||||||
|
def test_when_thinking_disabled_not_used_when_thinking_enabled(monkeypatch):
|
||||||
|
"""when_thinking_disabled must have no effect when thinking_enabled=True."""
|
||||||
|
wte = {"extra_body": {"thinking": {"type": "enabled"}}}
|
||||||
|
wtd = {"extra_body": {"thinking": {"type": "disabled"}}}
|
||||||
|
cfg = _make_app_config(
|
||||||
|
[
|
||||||
|
_make_model(
|
||||||
|
"wtd-ignored",
|
||||||
|
supports_thinking=True,
|
||||||
|
when_thinking_enabled=wte,
|
||||||
|
when_thinking_disabled=wtd,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
_patch_factory(monkeypatch, cfg)
|
||||||
|
|
||||||
|
captured: dict = {}
|
||||||
|
|
||||||
|
class CapturingModel(FakeChatModel):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
captured.update(kwargs)
|
||||||
|
BaseChatModel.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
|
||||||
|
|
||||||
|
factory_module.create_chat_model(name="wtd-ignored", thinking_enabled=True)
|
||||||
|
|
||||||
|
# when_thinking_enabled should apply, NOT when_thinking_disabled
|
||||||
|
assert captured.get("extra_body") == {"thinking": {"type": "enabled"}}
|
||||||
|
|
||||||
|
|
||||||
|
def test_when_thinking_disabled_without_when_thinking_enabled_still_applies(monkeypatch):
|
||||||
|
"""when_thinking_disabled alone (no when_thinking_enabled) should still apply its settings."""
|
||||||
|
cfg = _make_app_config(
|
||||||
|
[
|
||||||
|
_make_model(
|
||||||
|
"wtd-only",
|
||||||
|
supports_thinking=True,
|
||||||
|
supports_reasoning_effort=True,
|
||||||
|
when_thinking_disabled={"reasoning_effort": "low"},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
_patch_factory(monkeypatch, cfg)
|
||||||
|
|
||||||
|
captured: dict = {}
|
||||||
|
|
||||||
|
class CapturingModel(FakeChatModel):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
captured.update(kwargs)
|
||||||
|
BaseChatModel.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
|
||||||
|
|
||||||
|
factory_module.create_chat_model(name="wtd-only", thinking_enabled=False)
|
||||||
|
|
||||||
|
# when_thinking_disabled is now gated independently of has_thinking_settings
|
||||||
|
assert captured.get("reasoning_effort") == "low"
|
||||||
|
|
||||||
|
|
||||||
|
def test_when_thinking_disabled_excluded_from_model_dump(monkeypatch):
|
||||||
|
"""when_thinking_disabled must not leak into the model constructor kwargs."""
|
||||||
|
wte = {"extra_body": {"thinking": {"type": "enabled"}}}
|
||||||
|
wtd = {"extra_body": {"thinking": {"type": "disabled"}}}
|
||||||
|
cfg = _make_app_config(
|
||||||
|
[
|
||||||
|
_make_model(
|
||||||
|
"no-leak-wtd",
|
||||||
|
supports_thinking=True,
|
||||||
|
when_thinking_enabled=wte,
|
||||||
|
when_thinking_disabled=wtd,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
_patch_factory(monkeypatch, cfg)
|
||||||
|
|
||||||
|
captured: dict = {}
|
||||||
|
|
||||||
|
class CapturingModel(FakeChatModel):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
captured.update(kwargs)
|
||||||
|
BaseChatModel.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
|
||||||
|
|
||||||
|
factory_module.create_chat_model(name="no-leak-wtd", thinking_enabled=True)
|
||||||
|
|
||||||
|
# when_thinking_disabled value must NOT appear as a raw key
|
||||||
|
assert "when_thinking_disabled" not in captured
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# reasoning_effort stripping
|
# reasoning_effort stripping
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Bump this number when the config schema changes.
|
# Bump this number when the config schema changes.
|
||||||
# Run `make config-upgrade` to merge new fields into your local config.yaml.
|
# Run `make config-upgrade` to merge new fields into your local config.yaml.
|
||||||
config_version: 5
|
config_version: 6
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Logging
|
# Logging
|
||||||
|
|
@ -50,6 +50,10 @@ models:
|
||||||
# extra_body:
|
# extra_body:
|
||||||
# thinking:
|
# thinking:
|
||||||
# type: enabled
|
# type: enabled
|
||||||
|
# when_thinking_disabled:
|
||||||
|
# extra_body:
|
||||||
|
# thinking:
|
||||||
|
# type: disabled
|
||||||
|
|
||||||
# Example: OpenAI model
|
# Example: OpenAI model
|
||||||
# - name: gpt-4
|
# - name: gpt-4
|
||||||
|
|
@ -88,6 +92,9 @@ models:
|
||||||
# when_thinking_enabled:
|
# when_thinking_enabled:
|
||||||
# thinking:
|
# thinking:
|
||||||
# type: enabled
|
# type: enabled
|
||||||
|
# when_thinking_disabled:
|
||||||
|
# thinking:
|
||||||
|
# type: disabled
|
||||||
|
|
||||||
# Example: Google Gemini model (native SDK, no thinking support)
|
# Example: Google Gemini model (native SDK, no thinking support)
|
||||||
# - name: gemini-2.5-pro
|
# - name: gemini-2.5-pro
|
||||||
|
|
@ -120,6 +127,10 @@ models:
|
||||||
# extra_body:
|
# extra_body:
|
||||||
# thinking:
|
# thinking:
|
||||||
# type: enabled
|
# type: enabled
|
||||||
|
# when_thinking_disabled:
|
||||||
|
# extra_body:
|
||||||
|
# thinking:
|
||||||
|
# type: disabled
|
||||||
|
|
||||||
# Example: DeepSeek model (with thinking support)
|
# Example: DeepSeek model (with thinking support)
|
||||||
# - name: deepseek-v3
|
# - name: deepseek-v3
|
||||||
|
|
@ -136,6 +147,10 @@ models:
|
||||||
# extra_body:
|
# extra_body:
|
||||||
# thinking:
|
# thinking:
|
||||||
# type: enabled
|
# type: enabled
|
||||||
|
# when_thinking_disabled:
|
||||||
|
# extra_body:
|
||||||
|
# thinking:
|
||||||
|
# type: disabled
|
||||||
|
|
||||||
# Example: Kimi K2.5 model
|
# Example: Kimi K2.5 model
|
||||||
# - name: kimi-k2.5
|
# - name: kimi-k2.5
|
||||||
|
|
@ -153,6 +168,10 @@ models:
|
||||||
# extra_body:
|
# extra_body:
|
||||||
# thinking:
|
# thinking:
|
||||||
# type: enabled
|
# type: enabled
|
||||||
|
# when_thinking_disabled:
|
||||||
|
# extra_body:
|
||||||
|
# thinking:
|
||||||
|
# type: disabled
|
||||||
|
|
||||||
# Example: Novita AI (OpenAI-compatible)
|
# Example: Novita AI (OpenAI-compatible)
|
||||||
# Novita provides an OpenAI-compatible API with competitive pricing
|
# Novita provides an OpenAI-compatible API with competitive pricing
|
||||||
|
|
@ -173,6 +192,10 @@ models:
|
||||||
# extra_body:
|
# extra_body:
|
||||||
# thinking:
|
# thinking:
|
||||||
# type: enabled
|
# type: enabled
|
||||||
|
# when_thinking_disabled:
|
||||||
|
# extra_body:
|
||||||
|
# thinking:
|
||||||
|
# type: disabled
|
||||||
|
|
||||||
# Example: MiniMax (OpenAI-compatible) - International Edition
|
# Example: MiniMax (OpenAI-compatible) - International Edition
|
||||||
# MiniMax provides high-performance models with 204K context window
|
# MiniMax provides high-performance models with 204K context window
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue