diff --git a/src/core/services/litellm_llm.py b/src/core/services/litellm_llm.py index c5e6aac..6269720 100644 --- a/src/core/services/litellm_llm.py +++ b/src/core/services/litellm_llm.py @@ -39,6 +39,8 @@ logger = logging.getLogger(__name__) +logger = logging.getLogger(__name__) + def create_openrouter_llm( model: str = "openai/gpt-oss-120b", @@ -55,12 +57,18 @@ def create_openrouter_llm( When caching is enabled, system messages are automatically transformed to include cache_control markers for 90% cost reduction on cache hits. + Provider Selection: + - Anthropic models (anthropic/*) automatically use provider="Anthropic" + for best performance, regardless of the provider parameter + - Other models use the specified provider or default routing + Args: model: Model identifier (e.g., "openai/gpt-oss-120b", "anthropic/claude-haiku-4.5") api_key: OpenRouter API key (defaults to OPENROUTER_API_KEY env var) temperature: Sampling temperature (0.0-1.0) max_tokens: Maximum tokens to generate - provider: Specific provider to use (e.g., "Cerebras", "Anthropic") + provider: Specific provider to use (e.g., "Cerebras", "DeepInfra/FP8"). + Ignored for Anthropic models, which always use "Anthropic" provider. user_id: User identifier for cache optimization (sticky routing) enable_caching: Enable prompt caching. If None (default), enabled for all models. OpenRouter/LiteLLM gracefully handles models that don't support caching. @@ -82,10 +90,18 @@ def create_openrouter_llm( }, } + # Auto-select Anthropic provider for Anthropic models (better performance) + # Override any default provider if this is an Anthropic model + if model.startswith("anthropic/"): + effective_provider = "Anthropic" + logger.debug("Auto-selected Anthropic provider for model %s (better performance)", model) + else: + effective_provider = provider + # Provider routing (e.g., {"order": ["DeepInfra/FP8"]}) # Use "order" not "only" - OpenRouter requires exact routing field name - if provider: - model_kwargs["provider"] = {"order": [provider]} + if effective_provider: + model_kwargs["provider"] = {"order": [effective_provider]} # User ID for sticky cache routing if user_id: diff --git a/tests/test_core/test_litellm_llm.py b/tests/test_core/test_litellm_llm.py index 0732fd6..db32d56 100644 --- a/tests/test_core/test_litellm_llm.py +++ b/tests/test_core/test_litellm_llm.py @@ -25,14 +25,182 @@ This separation allows fast unit tests for wrapper logic while ensuring real LLM integration is thoroughly tested. The FakeListChatModel is NOT used to test LLM responses or behavior - only wrapper mechanics. + +Additionally, this file includes tests for OpenRouter LLM creation, particularly +the provider auto-selection behavior for Anthropic models. """ import pytest from langchain_core.messages import AIMessage, HumanMessage, SystemMessage from langchain_core.tools import tool +from langchain_litellm import ChatLiteLLM from src.core.services.litellm_llm import CachingLLMWrapper, create_openrouter_llm +# ============================================================================ +# Provider Selection Tests +# ============================================================================ + + +class TestCreateOpenRouterLLMProviderSelection: + """Tests for provider auto-selection in create_openrouter_llm.""" + + def test_anthropic_model_uses_anthropic_provider(self) -> None: + """Anthropic models should auto-select Anthropic provider.""" + llm = create_openrouter_llm( + model="anthropic/claude-haiku-4.5", + api_key="test-key", + ) + # Access the wrapped LLM's model_kwargs + assert llm.llm.model_kwargs["provider"] == {"order": ["Anthropic"]} + + def test_anthropic_model_overrides_default_provider(self) -> None: + """Anthropic models should override any specified provider.""" + llm = create_openrouter_llm( + model="anthropic/claude-sonnet-4.5", + api_key="test-key", + provider="DeepInfra/FP8", # Should be ignored for Anthropic models + ) + # Should use Anthropic provider, not the specified one + assert llm.llm.model_kwargs["provider"] == {"order": ["Anthropic"]} + + def test_anthropic_model_with_different_version(self) -> None: + """All Anthropic model versions should auto-select Anthropic provider.""" + llm = create_openrouter_llm( + model="anthropic/claude-opus-4", + api_key="test-key", + provider="SomeOtherProvider", + ) + assert llm.llm.model_kwargs["provider"] == {"order": ["Anthropic"]} + + def test_non_anthropic_model_uses_specified_provider(self) -> None: + """Non-Anthropic models should use the specified provider.""" + llm = create_openrouter_llm( + model="openai/gpt-oss-120b", + api_key="test-key", + provider="Cerebras", + ) + assert llm.llm.model_kwargs["provider"] == {"order": ["Cerebras"]} + + def test_non_anthropic_model_with_deepinfra_provider(self) -> None: + """Non-Anthropic models should use DeepInfra provider when specified.""" + llm = create_openrouter_llm( + model="openai/gpt-oss-120b", + api_key="test-key", + provider="DeepInfra/FP8", + ) + assert llm.llm.model_kwargs["provider"] == {"order": ["DeepInfra/FP8"]} + + def test_non_anthropic_model_without_provider(self) -> None: + """Non-Anthropic models with no provider should have no provider key.""" + llm = create_openrouter_llm( + model="openai/gpt-oss-120b", + api_key="test-key", + provider=None, + ) + assert "provider" not in llm.llm.model_kwargs + + def test_default_model_with_default_provider(self) -> None: + """Default model with default provider should use the specified provider.""" + llm = create_openrouter_llm( + api_key="test-key", + # Uses default model="openai/gpt-oss-120b" and provider="Cerebras" + ) + assert llm.llm.model_kwargs["provider"] == {"order": ["Cerebras"]} + + +class TestCreateOpenRouterLLMConfiguration: + """Tests for general LLM configuration options.""" + + def test_model_prefix(self) -> None: + """LLM should use openrouter/ prefix for LiteLLM.""" + llm = create_openrouter_llm( + model="anthropic/claude-haiku-4.5", + api_key="test-key", + ) + # LiteLLM should receive the model with openrouter/ prefix + assert llm.llm.model.startswith("openrouter/") + + def test_temperature_configuration(self) -> None: + """LLM should respect temperature parameter.""" + llm = create_openrouter_llm( + model="openai/gpt-oss-120b", + api_key="test-key", + temperature=0.5, + ) + assert llm.llm.temperature == 0.5 + + def test_max_tokens_configuration(self) -> None: + """LLM should respect max_tokens parameter.""" + llm = create_openrouter_llm( + model="openai/gpt-oss-120b", + api_key="test-key", + max_tokens=1000, + ) + assert llm.llm.max_tokens == 1000 + + def test_user_id_for_sticky_routing(self) -> None: + """LLM should include user ID for cache optimization.""" + llm = create_openrouter_llm( + model="anthropic/claude-haiku-4.5", + api_key="test-key", + user_id="test-user-123", + ) + assert llm.llm.model_kwargs["user"] == "test-user-123" + + def test_extra_headers_for_openrouter(self) -> None: + """LLM should include required OpenRouter headers.""" + llm = create_openrouter_llm( + model="openai/gpt-oss-120b", + api_key="test-key", + ) + headers = llm.llm.model_kwargs["extra_headers"] + assert "HTTP-Referer" in headers + assert "X-Title" in headers + assert headers["HTTP-Referer"] == "https://osc.earth/osa" + assert headers["X-Title"] == "Open Science Assistant" + + def test_streaming_enabled_by_default(self) -> None: + """LLM should have streaming enabled for LangGraph events.""" + llm = create_openrouter_llm( + model="openai/gpt-oss-120b", + api_key="test-key", + ) + assert llm.llm.streaming is True + + +class TestCreateOpenRouterLLMCachingWrapper: + """Tests for caching wrapper integration.""" + + def test_returns_caching_wrapper(self) -> None: + """create_openrouter_llm should return a CachingLLMWrapper.""" + llm = create_openrouter_llm( + model="anthropic/claude-haiku-4.5", + api_key="test-key", + ) + # Should be wrapped for caching + assert isinstance(llm, CachingLLMWrapper) + + def test_caching_enabled_by_default(self) -> None: + """Caching should be enabled by default.""" + llm = create_openrouter_llm( + model="anthropic/claude-haiku-4.5", + api_key="test-key", + ) + # Should be wrapped by default + assert isinstance(llm, CachingLLMWrapper) + + def test_caching_can_be_disabled(self) -> None: + """Caching should be disableable via parameter.""" + llm = create_openrouter_llm( + model="anthropic/claude-haiku-4.5", + api_key="test-key", + enable_caching=False, + ) + # Should NOT be wrapped when disabled + assert not isinstance(llm, CachingLLMWrapper) + assert isinstance(llm, ChatLiteLLM) + # Test tool for tool binding tests @tool