Skip to content
22 changes: 19 additions & 3 deletions src/core/services/litellm_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@

logger = logging.getLogger(__name__)

logger = logging.getLogger(__name__)


def create_openrouter_llm(
model: str = "openai/gpt-oss-120b",
Expand All @@ -55,12 +57,18 @@ def create_openrouter_llm(
When caching is enabled, system messages are automatically transformed
to include cache_control markers for 90% cost reduction on cache hits.

Provider Selection:
- Anthropic models (anthropic/*) automatically use provider="Anthropic"
for best performance, regardless of the provider parameter
- Other models use the specified provider or default routing

Args:
model: Model identifier (e.g., "openai/gpt-oss-120b", "anthropic/claude-haiku-4.5")
api_key: OpenRouter API key (defaults to OPENROUTER_API_KEY env var)
temperature: Sampling temperature (0.0-1.0)
max_tokens: Maximum tokens to generate
provider: Specific provider to use (e.g., "Cerebras", "Anthropic")
provider: Specific provider to use (e.g., "Cerebras", "DeepInfra/FP8").
Ignored for Anthropic models, which always use "Anthropic" provider.
user_id: User identifier for cache optimization (sticky routing)
enable_caching: Enable prompt caching. If None (default), enabled for all models.
OpenRouter/LiteLLM gracefully handles models that don't support caching.
Expand All @@ -82,10 +90,18 @@ def create_openrouter_llm(
},
}

# Auto-select Anthropic provider for Anthropic models (better performance)
# Override any default provider if this is an Anthropic model
if model.startswith("anthropic/"):
effective_provider = "Anthropic"
logger.debug("Auto-selected Anthropic provider for model %s (better performance)", model)
else:
effective_provider = provider

# Provider routing (e.g., {"order": ["DeepInfra/FP8"]})
# Use "order" not "only" - OpenRouter requires exact routing field name
if provider:
model_kwargs["provider"] = {"order": [provider]}
if effective_provider:
model_kwargs["provider"] = {"order": [effective_provider]}

# User ID for sticky cache routing
if user_id:
Expand Down
168 changes: 168 additions & 0 deletions tests/test_core/test_litellm_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,182 @@
This separation allows fast unit tests for wrapper logic while ensuring real
LLM integration is thoroughly tested. The FakeListChatModel is NOT used to
test LLM responses or behavior - only wrapper mechanics.

Additionally, this file includes tests for OpenRouter LLM creation, particularly
the provider auto-selection behavior for Anthropic models.
"""

import pytest
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.tools import tool
from langchain_litellm import ChatLiteLLM

from src.core.services.litellm_llm import CachingLLMWrapper, create_openrouter_llm

# ============================================================================
# Provider Selection Tests
# ============================================================================


class TestCreateOpenRouterLLMProviderSelection:
"""Tests for provider auto-selection in create_openrouter_llm."""

def test_anthropic_model_uses_anthropic_provider(self) -> None:
"""Anthropic models should auto-select Anthropic provider."""
llm = create_openrouter_llm(
model="anthropic/claude-haiku-4.5",
api_key="test-key",
)
# Access the wrapped LLM's model_kwargs
assert llm.llm.model_kwargs["provider"] == {"order": ["Anthropic"]}

def test_anthropic_model_overrides_default_provider(self) -> None:
"""Anthropic models should override any specified provider."""
llm = create_openrouter_llm(
model="anthropic/claude-sonnet-4.5",
api_key="test-key",
provider="DeepInfra/FP8", # Should be ignored for Anthropic models
)
# Should use Anthropic provider, not the specified one
assert llm.llm.model_kwargs["provider"] == {"order": ["Anthropic"]}

def test_anthropic_model_with_different_version(self) -> None:
"""All Anthropic model versions should auto-select Anthropic provider."""
llm = create_openrouter_llm(
model="anthropic/claude-opus-4",
api_key="test-key",
provider="SomeOtherProvider",
)
assert llm.llm.model_kwargs["provider"] == {"order": ["Anthropic"]}

def test_non_anthropic_model_uses_specified_provider(self) -> None:
"""Non-Anthropic models should use the specified provider."""
llm = create_openrouter_llm(
model="openai/gpt-oss-120b",
api_key="test-key",
provider="Cerebras",
)
assert llm.llm.model_kwargs["provider"] == {"order": ["Cerebras"]}

def test_non_anthropic_model_with_deepinfra_provider(self) -> None:
"""Non-Anthropic models should use DeepInfra provider when specified."""
llm = create_openrouter_llm(
model="openai/gpt-oss-120b",
api_key="test-key",
provider="DeepInfra/FP8",
)
assert llm.llm.model_kwargs["provider"] == {"order": ["DeepInfra/FP8"]}

def test_non_anthropic_model_without_provider(self) -> None:
"""Non-Anthropic models with no provider should have no provider key."""
llm = create_openrouter_llm(
model="openai/gpt-oss-120b",
api_key="test-key",
provider=None,
)
assert "provider" not in llm.llm.model_kwargs

def test_default_model_with_default_provider(self) -> None:
"""Default model with default provider should use the specified provider."""
llm = create_openrouter_llm(
api_key="test-key",
# Uses default model="openai/gpt-oss-120b" and provider="Cerebras"
)
assert llm.llm.model_kwargs["provider"] == {"order": ["Cerebras"]}


class TestCreateOpenRouterLLMConfiguration:
"""Tests for general LLM configuration options."""

def test_model_prefix(self) -> None:
"""LLM should use openrouter/ prefix for LiteLLM."""
llm = create_openrouter_llm(
model="anthropic/claude-haiku-4.5",
api_key="test-key",
)
# LiteLLM should receive the model with openrouter/ prefix
assert llm.llm.model.startswith("openrouter/")

def test_temperature_configuration(self) -> None:
"""LLM should respect temperature parameter."""
llm = create_openrouter_llm(
model="openai/gpt-oss-120b",
api_key="test-key",
temperature=0.5,
)
assert llm.llm.temperature == 0.5

def test_max_tokens_configuration(self) -> None:
"""LLM should respect max_tokens parameter."""
llm = create_openrouter_llm(
model="openai/gpt-oss-120b",
api_key="test-key",
max_tokens=1000,
)
assert llm.llm.max_tokens == 1000

def test_user_id_for_sticky_routing(self) -> None:
"""LLM should include user ID for cache optimization."""
llm = create_openrouter_llm(
model="anthropic/claude-haiku-4.5",
api_key="test-key",
user_id="test-user-123",
)
assert llm.llm.model_kwargs["user"] == "test-user-123"

def test_extra_headers_for_openrouter(self) -> None:
"""LLM should include required OpenRouter headers."""
llm = create_openrouter_llm(
model="openai/gpt-oss-120b",
api_key="test-key",
)
headers = llm.llm.model_kwargs["extra_headers"]
assert "HTTP-Referer" in headers
assert "X-Title" in headers
assert headers["HTTP-Referer"] == "https://osc.earth/osa"
assert headers["X-Title"] == "Open Science Assistant"

def test_streaming_enabled_by_default(self) -> None:
"""LLM should have streaming enabled for LangGraph events."""
llm = create_openrouter_llm(
model="openai/gpt-oss-120b",
api_key="test-key",
)
assert llm.llm.streaming is True


class TestCreateOpenRouterLLMCachingWrapper:
"""Tests for caching wrapper integration."""

def test_returns_caching_wrapper(self) -> None:
"""create_openrouter_llm should return a CachingLLMWrapper."""
llm = create_openrouter_llm(
model="anthropic/claude-haiku-4.5",
api_key="test-key",
)
# Should be wrapped for caching
assert isinstance(llm, CachingLLMWrapper)

def test_caching_enabled_by_default(self) -> None:
"""Caching should be enabled by default."""
llm = create_openrouter_llm(
model="anthropic/claude-haiku-4.5",
api_key="test-key",
)
# Should be wrapped by default
assert isinstance(llm, CachingLLMWrapper)

def test_caching_can_be_disabled(self) -> None:
"""Caching should be disableable via parameter."""
llm = create_openrouter_llm(
model="anthropic/claude-haiku-4.5",
api_key="test-key",
enable_caching=False,
)
# Should NOT be wrapped when disabled
assert not isinstance(llm, CachingLLMWrapper)
assert isinstance(llm, ChatLiteLLM)


# Test tool for tool binding tests
@tool
Expand Down