GPT-5, GPT-5-mini support

Improvements to model name resolution
Improved instructions for multi-step workflows when continuation is available
Improved instructions for chat tool
Improved preferred model resolution, moved code from registry -> each provider
Updated tests
This commit is contained in:
Fahad
2025-08-08 08:51:34 +05:00
parent 9a4791cb06
commit 1a8ec2e12f
30 changed files with 792 additions and 483 deletions

View File

@@ -48,7 +48,8 @@ class TestAliasTargetRestrictions:
"""Test that restriction policy allows alias when target model is allowed.
This is the correct user-friendly behavior - if you allow 'o4-mini',
you should be able to use its alias 'mini' as well.
you should be able to use its aliases 'o4mini' and 'o4-mini'.
Note: 'mini' is now an alias for 'gpt-5-mini', not 'o4-mini'.
"""
# Clear cached restriction service
import utils.model_restrictions
@@ -57,15 +58,16 @@ class TestAliasTargetRestrictions:
provider = OpenAIModelProvider(api_key="test-key")
# Both target and alias should be allowed
# Both target and its actual aliases should be allowed
assert provider.validate_model_name("o4-mini")
assert provider.validate_model_name("mini")
assert provider.validate_model_name("o4mini")
@patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "mini"}) # Allow alias only
def test_restriction_policy_allows_only_alias_when_alias_specified(self):
"""Test that restriction policy allows only the alias when just alias is specified.
If you restrict to 'mini', only the alias should work, not the direct target.
If you restrict to 'mini' (which is an alias for gpt-5-mini),
only the alias should work, not other models.
This is the correct restrictive behavior.
"""
# Clear cached restriction service
@@ -77,7 +79,9 @@ class TestAliasTargetRestrictions:
# Only the alias should be allowed
assert provider.validate_model_name("mini")
# Direct target should NOT be allowed
# Direct target for this alias should NOT be allowed (mini -> gpt-5-mini)
assert not provider.validate_model_name("gpt-5-mini")
# Other models should NOT be allowed
assert not provider.validate_model_name("o4-mini")
@patch.dict(os.environ, {"GOOGLE_ALLOWED_MODELS": "gemini-2.5-flash"}) # Allow target
@@ -127,12 +131,15 @@ class TestAliasTargetRestrictions:
# The warning should include both aliases and targets in known models
warning_message = str(warning_calls[0])
assert "mini" in warning_message # alias should be in known models
assert "o4-mini" in warning_message # target should be in known models
assert "o4mini" in warning_message or "o4-mini" in warning_message # aliases should be in known models
@patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "mini,o4-mini"}) # Allow both alias and target
@patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "mini,gpt-5-mini,o4-mini,o4mini"}) # Allow different models
def test_both_alias_and_target_allowed_when_both_specified(self):
"""Test that both alias and target work when both are explicitly allowed."""
"""Test that both alias and target work when both are explicitly allowed.
mini -> gpt-5-mini
o4mini -> o4-mini
"""
# Clear cached restriction service
import utils.model_restrictions
@@ -140,9 +147,11 @@ class TestAliasTargetRestrictions:
provider = OpenAIModelProvider(api_key="test-key")
# Both should be allowed
assert provider.validate_model_name("mini")
assert provider.validate_model_name("o4-mini")
# All should be allowed since we explicitly allowed them
assert provider.validate_model_name("mini") # alias for gpt-5-mini
assert provider.validate_model_name("gpt-5-mini") # target
assert provider.validate_model_name("o4-mini") # target
assert provider.validate_model_name("o4mini") # alias for o4-mini
def test_alias_target_policy_regression_prevention(self):
"""Regression test to ensure aliases and targets are both validated properly.

View File

@@ -95,8 +95,8 @@ class TestAutoModeComprehensive:
},
{
"EXTENDED_REASONING": "o3", # O3 for deep reasoning
"FAST_RESPONSE": "o4-mini", # O4-mini for speed
"BALANCED": "o4-mini", # O4-mini as balanced
"FAST_RESPONSE": "gpt-5", # Prefer gpt-5 for speed
"BALANCED": "gpt-5", # Prefer gpt-5 for balanced
},
),
# Only X.AI API available
@@ -113,7 +113,7 @@ class TestAutoModeComprehensive:
"BALANCED": "grok-3", # GROK-3 as balanced
},
),
# Both Gemini and OpenAI available - should prefer based on tool category
# Both Gemini and OpenAI available - Google comes first in priority
(
{
"GEMINI_API_KEY": "real-key",
@@ -122,12 +122,12 @@ class TestAutoModeComprehensive:
"OPENROUTER_API_KEY": None,
},
{
"EXTENDED_REASONING": "o3", # Prefer O3 for deep reasoning
"FAST_RESPONSE": "o4-mini", # Prefer O4-mini for speed
"BALANCED": "o4-mini", # Prefer OpenAI for balanced
"EXTENDED_REASONING": "gemini-2.5-pro", # Gemini comes first in priority
"FAST_RESPONSE": "gemini-2.5-flash", # Prefer flash for speed
"BALANCED": "gemini-2.5-flash", # Prefer flash for balanced
},
),
# All native APIs available - should prefer based on tool category
# All native APIs available - Google still comes first
(
{
"GEMINI_API_KEY": "real-key",
@@ -136,9 +136,9 @@ class TestAutoModeComprehensive:
"OPENROUTER_API_KEY": None,
},
{
"EXTENDED_REASONING": "o3", # Prefer O3 for deep reasoning
"FAST_RESPONSE": "o4-mini", # Prefer O4-mini for speed
"BALANCED": "o4-mini", # Prefer OpenAI for balanced
"EXTENDED_REASONING": "gemini-2.5-pro", # Gemini comes first in priority
"FAST_RESPONSE": "gemini-2.5-flash", # Prefer flash for speed
"BALANCED": "gemini-2.5-flash", # Prefer flash for balanced
},
),
],

View File

@@ -97,10 +97,10 @@ class TestAutoModeProviderSelection:
fast_response = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
# Should select appropriate OpenAI models
assert extended_reasoning in ["o3", "o3-mini", "o4-mini"] # Any available OpenAI model for reasoning
assert fast_response in ["o4-mini", "o3-mini"] # Prefer faster models
assert balanced in ["o4-mini", "o3-mini"] # Balanced selection
# Should select appropriate OpenAI models based on new preference order
assert extended_reasoning == "o3" # O3 for extended reasoning
assert fast_response == "gpt-5" # gpt-5 comes first in fast response preference
assert balanced == "gpt-5" # gpt-5 for balanced
finally:
# Restore original environment
@@ -138,11 +138,11 @@ class TestAutoModeProviderSelection:
)
fast_response = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
# Should prefer OpenAI for reasoning (based on fallback logic)
assert extended_reasoning == "o3" # Should prefer O3 for extended reasoning
# Should prefer Gemini now (based on new provider priority: Gemini before OpenAI)
assert extended_reasoning == "gemini-2.5-pro" # Gemini has higher priority now
# Should prefer OpenAI for fast response
assert fast_response == "o4-mini" # Should prefer O4-mini for fast response
# Should prefer Gemini for fast response
assert fast_response == "gemini-2.5-flash" # Gemini has higher priority now
finally:
# Restore original environment
@@ -318,7 +318,7 @@ class TestAutoModeProviderSelection:
test_cases = [
("flash", ProviderType.GOOGLE, "gemini-2.5-flash"),
("pro", ProviderType.GOOGLE, "gemini-2.5-pro"),
("mini", ProviderType.OPENAI, "o4-mini"),
("mini", ProviderType.OPENAI, "gpt-5-mini"), # "mini" now resolves to gpt-5-mini
("o3mini", ProviderType.OPENAI, "o3-mini"),
("grok", ProviderType.XAI, "grok-3"),
("grokfast", ProviderType.XAI, "grok-3-fast"),

View File

@@ -132,8 +132,11 @@ class TestBuggyBehaviorPrevention:
assert not provider.validate_model_name("o3-pro") # Not in allowed list
assert not provider.validate_model_name("o3") # Not in allowed list
# This should be ALLOWED because it resolves to o4-mini which is in the allowed list
assert provider.validate_model_name("mini") # Resolves to o4-mini, which IS allowed
# "mini" now resolves to gpt-5-mini, not o4-mini, so it should be blocked
assert not provider.validate_model_name("mini") # Resolves to gpt-5-mini, which is NOT allowed
# But o4mini (the actual alias for o4-mini) should work
assert provider.validate_model_name("o4mini") # Resolves to o4-mini, which IS allowed
# Verify our list_all_known_models includes the restricted models
all_known = provider.list_all_known_models()

View File

@@ -113,7 +113,7 @@ class TestDIALProvider:
# Test temperature constraint
assert capabilities.temperature_constraint.min_temp == 0.0
assert capabilities.temperature_constraint.max_temp == 2.0
assert capabilities.temperature_constraint.default_temp == 0.7
assert capabilities.temperature_constraint.default_temp == 0.3
@patch.dict(os.environ, {"DIAL_ALLOWED_MODELS": ""}, clear=False)
@patch("utils.model_restrictions._restriction_service", None)

View File

@@ -37,14 +37,14 @@ class TestIntelligentFallback:
@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False)
def test_prefers_openai_o3_mini_when_available(self):
"""Test that o4-mini is preferred when OpenAI API key is available"""
"""Test that gpt-5 is preferred when OpenAI API key is available (based on new preference order)"""
# Register only OpenAI provider for this test
from providers.openai_provider import OpenAIModelProvider
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
assert fallback_model == "o4-mini"
assert fallback_model == "gpt-5" # Based on new preference order: gpt-5 before o4-mini
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "test-gemini-key"}, clear=False)
def test_prefers_gemini_flash_when_openai_unavailable(self):
@@ -68,7 +68,7 @@ class TestIntelligentFallback:
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
assert fallback_model == "o4-mini" # OpenAI has priority
assert fallback_model == "gemini-2.5-flash" # Gemini has priority now (based on new PROVIDER_PRIORITY_ORDER)
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": ""}, clear=False)
def test_fallback_when_no_keys_available(self):
@@ -147,8 +147,8 @@ class TestIntelligentFallback:
history, tokens = build_conversation_history(context, model_context=None)
# Verify that ModelContext was called with o4-mini (the intelligent fallback)
mock_context_class.assert_called_once_with("o4-mini")
# Verify that ModelContext was called with gpt-5 (the intelligent fallback based on new preference order)
mock_context_class.assert_called_once_with("gpt-5")
def test_auto_mode_with_gemini_only(self):
"""Test auto mode behavior when only Gemini API key is available"""

View File

@@ -635,6 +635,13 @@ class TestAutoModeWithRestrictions:
mock_openai.list_models = openai_list_models
mock_openai.list_all_known_models.return_value = ["o3", "o3-mini", "o4-mini"]
# Add get_preferred_model method to mock to match new implementation
def get_preferred_model(category, allowed_models):
# Simple preference logic for testing - just return first allowed model
return allowed_models[0] if allowed_models else None
mock_openai.get_preferred_model = get_preferred_model
def get_provider_side_effect(provider_type):
if provider_type == ProviderType.OPENAI:
return mock_openai
@@ -685,8 +692,9 @@ class TestAutoModeWithRestrictions:
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
# The fallback will depend on how get_available_models handles aliases
# For now, we accept either behavior and document it
assert model in ["o4-mini", "gemini-2.5-flash"]
# When "mini" is allowed, it's returned as the allowed model
# "mini" is now an alias for gpt-5-mini, but the list shows "mini" itself
assert model in ["mini", "gpt-5-mini", "o4-mini", "gemini-2.5-flash"]
finally:
# Restore original registry state
registry = ModelProviderRegistry()

View File

@@ -230,7 +230,7 @@ class TestO3TemperatureParameterFixSimple:
assert temp_constraint.validate(0.5) is False
# Test regular model constraints - use gpt-4.1 which is supported
gpt41_capabilities = provider.get_capabilities("gpt-4.1-2025-04-14")
gpt41_capabilities = provider.get_capabilities("gpt-4.1")
assert gpt41_capabilities.temperature_constraint is not None
# Regular models should allow a range

View File

@@ -48,12 +48,17 @@ class TestOpenAIProvider:
assert provider.validate_model_name("o3-pro") is True
assert provider.validate_model_name("o4-mini") is True
assert provider.validate_model_name("o4-mini") is True
assert provider.validate_model_name("gpt-5") is True
assert provider.validate_model_name("gpt-5-mini") is True
# Test valid aliases
assert provider.validate_model_name("mini") is True
assert provider.validate_model_name("o3mini") is True
assert provider.validate_model_name("o4mini") is True
assert provider.validate_model_name("o4mini") is True
assert provider.validate_model_name("gpt5") is True
assert provider.validate_model_name("gpt5-mini") is True
assert provider.validate_model_name("gpt5mini") is True
# Test invalid model
assert provider.validate_model_name("invalid-model") is False
@@ -65,17 +70,22 @@ class TestOpenAIProvider:
provider = OpenAIModelProvider("test-key")
# Test shorthand resolution
assert provider._resolve_model_name("mini") == "o4-mini"
assert provider._resolve_model_name("mini") == "gpt-5-mini" # "mini" now resolves to gpt-5-mini
assert provider._resolve_model_name("o3mini") == "o3-mini"
assert provider._resolve_model_name("o4mini") == "o4-mini"
assert provider._resolve_model_name("o4mini") == "o4-mini"
assert provider._resolve_model_name("gpt5") == "gpt-5"
assert provider._resolve_model_name("gpt5-mini") == "gpt-5-mini"
assert provider._resolve_model_name("gpt5mini") == "gpt-5-mini"
# Test full name passthrough
assert provider._resolve_model_name("o3") == "o3"
assert provider._resolve_model_name("o3-mini") == "o3-mini"
assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10"
assert provider._resolve_model_name("o3-pro") == "o3-pro"
assert provider._resolve_model_name("o4-mini") == "o4-mini"
assert provider._resolve_model_name("o4-mini") == "o4-mini"
assert provider._resolve_model_name("gpt-5") == "gpt-5"
assert provider._resolve_model_name("gpt-5-mini") == "gpt-5-mini"
def test_get_capabilities_o3(self):
"""Test getting model capabilities for O3."""
@@ -99,11 +109,43 @@ class TestOpenAIProvider:
provider = OpenAIModelProvider("test-key")
capabilities = provider.get_capabilities("mini")
assert capabilities.model_name == "o4-mini" # Capabilities should show resolved model name
assert capabilities.friendly_name == "OpenAI (O4-mini)"
assert capabilities.context_window == 200_000
assert capabilities.model_name == "gpt-5-mini" # "mini" now resolves to gpt-5-mini
assert capabilities.friendly_name == "OpenAI (GPT-5-mini)"
assert capabilities.context_window == 400_000
assert capabilities.provider == ProviderType.OPENAI
def test_get_capabilities_gpt5(self):
"""Test getting model capabilities for GPT-5."""
provider = OpenAIModelProvider("test-key")
capabilities = provider.get_capabilities("gpt-5")
assert capabilities.model_name == "gpt-5"
assert capabilities.friendly_name == "OpenAI (GPT-5)"
assert capabilities.context_window == 400_000
assert capabilities.max_output_tokens == 128_000
assert capabilities.provider == ProviderType.OPENAI
assert capabilities.supports_extended_thinking is True
assert capabilities.supports_system_prompts is True
assert capabilities.supports_streaming is True
assert capabilities.supports_function_calling is True
assert capabilities.supports_temperature is True
def test_get_capabilities_gpt5_mini(self):
"""Test getting model capabilities for GPT-5-mini."""
provider = OpenAIModelProvider("test-key")
capabilities = provider.get_capabilities("gpt-5-mini")
assert capabilities.model_name == "gpt-5-mini"
assert capabilities.friendly_name == "OpenAI (GPT-5-mini)"
assert capabilities.context_window == 400_000
assert capabilities.max_output_tokens == 128_000
assert capabilities.provider == ProviderType.OPENAI
assert capabilities.supports_extended_thinking is True
assert capabilities.supports_system_prompts is True
assert capabilities.supports_streaming is True
assert capabilities.supports_function_calling is True
assert capabilities.supports_temperature is True
@patch("providers.openai_compatible.OpenAI")
def test_generate_content_resolves_alias_before_api_call(self, mock_openai_class):
"""Test that generate_content resolves aliases before making API calls.
@@ -132,21 +174,19 @@ class TestOpenAIProvider:
provider = OpenAIModelProvider("test-key")
# Call generate_content with alias 'gpt4.1' (resolves to gpt-4.1-2025-04-14, supports temperature)
# Call generate_content with alias 'gpt4.1' (resolves to gpt-4.1, supports temperature)
result = provider.generate_content(
prompt="Test prompt",
model_name="gpt4.1",
temperature=1.0, # This should be resolved to "gpt-4.1-2025-04-14"
temperature=1.0, # This should be resolved to "gpt-4.1"
)
# Verify the API was called with the RESOLVED model name
mock_client.chat.completions.create.assert_called_once()
call_kwargs = mock_client.chat.completions.create.call_args[1]
# CRITICAL ASSERTION: The API should receive "gpt-4.1-2025-04-14", not "gpt4.1"
assert (
call_kwargs["model"] == "gpt-4.1-2025-04-14"
), f"Expected 'gpt-4.1-2025-04-14' but API received '{call_kwargs['model']}'"
# CRITICAL ASSERTION: The API should receive "gpt-4.1", not "gpt4.1"
assert call_kwargs["model"] == "gpt-4.1", f"Expected 'gpt-4.1' but API received '{call_kwargs['model']}'"
# Verify other parameters (gpt-4.1 supports temperature unlike O3/O4 models)
assert call_kwargs["temperature"] == 1.0
@@ -156,7 +196,7 @@ class TestOpenAIProvider:
# Verify response
assert result.content == "Test response"
assert result.model_name == "gpt-4.1-2025-04-14" # Should be the resolved name
assert result.model_name == "gpt-4.1" # Should be the resolved name
@patch("providers.openai_compatible.OpenAI")
def test_generate_content_other_aliases(self, mock_openai_class):
@@ -213,14 +253,22 @@ class TestOpenAIProvider:
assert call_kwargs["model"] == "o3-mini" # Should be unchanged
def test_supports_thinking_mode(self):
"""Test thinking mode support (currently False for all OpenAI models)."""
"""Test thinking mode support."""
provider = OpenAIModelProvider("test-key")
# All OpenAI models currently don't support thinking mode
# GPT-5 models support thinking mode (reasoning tokens)
assert provider.supports_thinking_mode("gpt-5") is True
assert provider.supports_thinking_mode("gpt-5-mini") is True
assert provider.supports_thinking_mode("gpt5") is True # Test with alias
assert provider.supports_thinking_mode("gpt5mini") is True # Test with alias
# O3/O4 models don't support thinking mode
assert provider.supports_thinking_mode("o3") is False
assert provider.supports_thinking_mode("o3-mini") is False
assert provider.supports_thinking_mode("o4-mini") is False
assert provider.supports_thinking_mode("mini") is False # Test with alias too
assert (
provider.supports_thinking_mode("mini") is True
) # "mini" now resolves to gpt-5-mini which supports thinking
@patch("providers.openai_compatible.OpenAI")
def test_o3_pro_routes_to_responses_endpoint(self, mock_openai_class):
@@ -234,7 +282,7 @@ class TestOpenAIProvider:
mock_response.output.content = [MagicMock()]
mock_response.output.content[0].type = "output_text"
mock_response.output.content[0].text = "4"
mock_response.model = "o3-pro-2025-06-10"
mock_response.model = "o3-pro"
mock_response.id = "test-id"
mock_response.created_at = 1234567890
mock_response.usage = MagicMock()
@@ -252,13 +300,13 @@ class TestOpenAIProvider:
# Verify responses.create was called
mock_client.responses.create.assert_called_once()
call_args = mock_client.responses.create.call_args[1]
assert call_args["model"] == "o3-pro-2025-06-10"
assert call_args["model"] == "o3-pro"
assert call_args["input"][0]["role"] == "user"
assert "What is 2 + 2?" in call_args["input"][0]["content"][0]["text"]
# Verify the response
assert result.content == "4"
assert result.model_name == "o3-pro-2025-06-10"
assert result.model_name == "o3-pro"
assert result.metadata["endpoint"] == "responses"
@patch("providers.openai_compatible.OpenAI")

View File

@@ -3,6 +3,7 @@ Test per-tool model default selection functionality
"""
import json
import os
from unittest.mock import MagicMock, patch
import pytest
@@ -73,154 +74,194 @@ class TestToolModelCategories:
class TestModelSelection:
"""Test model selection based on tool categories."""
def teardown_method(self):
"""Clean up after each test to prevent state pollution."""
ModelProviderRegistry.clear_cache()
# Unregister all providers
for provider_type in list(ProviderType):
ModelProviderRegistry.unregister_provider(provider_type)
def test_extended_reasoning_with_openai(self):
"""Test EXTENDED_REASONING prefers o3 when OpenAI is available."""
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
# Mock OpenAI models available
mock_get_available.return_value = {
"o3": ProviderType.OPENAI,
"o3-mini": ProviderType.OPENAI,
"o4-mini": ProviderType.OPENAI,
}
"""Test EXTENDED_REASONING with OpenAI provider."""
# Setup with only OpenAI provider
ModelProviderRegistry.clear_cache()
# First unregister all providers to ensure isolation
for provider_type in list(ProviderType):
ModelProviderRegistry.unregister_provider(provider_type)
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}, clear=False):
from providers.openai_provider import OpenAIModelProvider
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
# OpenAI prefers o3 for extended reasoning
assert model == "o3"
def test_extended_reasoning_with_gemini_only(self):
"""Test EXTENDED_REASONING prefers pro when only Gemini is available."""
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
# Mock only Gemini models available
mock_get_available.return_value = {
"gemini-2.5-pro": ProviderType.GOOGLE,
"gemini-2.5-flash": ProviderType.GOOGLE,
}
# Clear cache and unregister all providers first
ModelProviderRegistry.clear_cache()
for provider_type in list(ProviderType):
ModelProviderRegistry.unregister_provider(provider_type)
# Register only Gemini provider
with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}, clear=False):
from providers.gemini import GeminiModelProvider
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
# Should find the pro model for extended reasoning
assert "pro" in model or model == "gemini-2.5-pro"
# Gemini should return one of its models for extended reasoning
# The default behavior may return flash when pro is not explicitly preferred
assert model in ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash"]
def test_fast_response_with_openai(self):
"""Test FAST_RESPONSE prefers o4-mini when OpenAI is available."""
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
# Mock OpenAI models available
mock_get_available.return_value = {
"o3": ProviderType.OPENAI,
"o3-mini": ProviderType.OPENAI,
"o4-mini": ProviderType.OPENAI,
}
"""Test FAST_RESPONSE with OpenAI provider."""
# Setup with only OpenAI provider
ModelProviderRegistry.clear_cache()
# First unregister all providers to ensure isolation
for provider_type in list(ProviderType):
ModelProviderRegistry.unregister_provider(provider_type)
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}, clear=False):
from providers.openai_provider import OpenAIModelProvider
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
assert model == "o4-mini"
# OpenAI now prefers gpt-5 for fast response (based on our new preference order)
assert model == "gpt-5"
def test_fast_response_with_gemini_only(self):
"""Test FAST_RESPONSE prefers flash when only Gemini is available."""
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
# Mock only Gemini models available
mock_get_available.return_value = {
"gemini-2.5-pro": ProviderType.GOOGLE,
"gemini-2.5-flash": ProviderType.GOOGLE,
}
# Clear cache and unregister all providers first
ModelProviderRegistry.clear_cache()
for provider_type in list(ProviderType):
ModelProviderRegistry.unregister_provider(provider_type)
# Register only Gemini provider
with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}, clear=False):
from providers.gemini import GeminiModelProvider
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
# Should find the flash model for fast response
assert "flash" in model or model == "gemini-2.5-flash"
# Gemini should return one of its models for fast response
assert model in ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-2.5-pro"]
def test_balanced_category_fallback(self):
"""Test BALANCED category uses existing logic."""
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
# Mock OpenAI models available
mock_get_available.return_value = {
"o3": ProviderType.OPENAI,
"o3-mini": ProviderType.OPENAI,
"o4-mini": ProviderType.OPENAI,
}
# Setup with only OpenAI provider
ModelProviderRegistry.clear_cache()
# First unregister all providers to ensure isolation
for provider_type in list(ProviderType):
ModelProviderRegistry.unregister_provider(provider_type)
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}, clear=False):
from providers.openai_provider import OpenAIModelProvider
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
assert model == "o4-mini" # Balanced prefers o4-mini when OpenAI available
# OpenAI prefers gpt-5 for balanced (based on our new preference order)
assert model == "gpt-5"
def test_no_category_uses_balanced_logic(self):
"""Test that no category specified uses balanced logic."""
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
# Mock only Gemini models available
mock_get_available.return_value = {
"gemini-2.5-pro": ProviderType.GOOGLE,
"gemini-2.5-flash": ProviderType.GOOGLE,
}
# Setup with only Gemini provider
with patch.dict(os.environ, {"GEMINI_API_KEY": "test-key"}, clear=False):
from providers.gemini import GeminiModelProvider
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
model = ModelProviderRegistry.get_preferred_fallback_model()
# Should pick a reasonable default, preferring flash for balanced use
assert "flash" in model or model == "gemini-2.5-flash"
# Should pick flash for balanced use
assert model == "gemini-2.5-flash"
class TestFlexibleModelSelection:
"""Test that model selection handles various naming scenarios."""
def test_fallback_handles_mixed_model_names(self):
"""Test that fallback selection works with mix of full names and shorthands."""
# Test with mix of full names and shorthands
"""Test that fallback selection works with different providers."""
# Test with different provider configurations
test_cases = [
# Case 1: Mix of OpenAI shorthands and full names
# Case 1: OpenAI provider for extended reasoning
{
"available": {"o3": ProviderType.OPENAI, "o4-mini": ProviderType.OPENAI},
"env": {"OPENAI_API_KEY": "test-key"},
"provider_type": ProviderType.OPENAI,
"category": ToolModelCategory.EXTENDED_REASONING,
"expected": "o3",
},
# Case 2: Mix of Gemini shorthands and full names
# Case 2: Gemini provider for fast response
{
"available": {
"gemini-2.5-flash": ProviderType.GOOGLE,
"gemini-2.5-pro": ProviderType.GOOGLE,
},
"env": {"GEMINI_API_KEY": "test-key"},
"provider_type": ProviderType.GOOGLE,
"category": ToolModelCategory.FAST_RESPONSE,
"expected_contains": "flash",
"expected": "gemini-2.5-flash",
},
# Case 3: Only shorthands available
# Case 3: OpenAI provider for fast response
{
"available": {"o4-mini": ProviderType.OPENAI, "o3-mini": ProviderType.OPENAI},
"env": {"OPENAI_API_KEY": "test-key"},
"provider_type": ProviderType.OPENAI,
"category": ToolModelCategory.FAST_RESPONSE,
"expected": "o4-mini",
"expected": "gpt-5", # Based on new preference order
},
]
for case in test_cases:
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
mock_get_available.return_value = case["available"]
# Clear registry for clean test
ModelProviderRegistry.clear_cache()
# First unregister all providers to ensure isolation
for provider_type in list(ProviderType):
ModelProviderRegistry.unregister_provider(provider_type)
with patch.dict(os.environ, case["env"], clear=False):
# Register the appropriate provider
if case["provider_type"] == ProviderType.OPENAI:
from providers.openai_provider import OpenAIModelProvider
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
elif case["provider_type"] == ProviderType.GOOGLE:
from providers.gemini import GeminiModelProvider
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
model = ModelProviderRegistry.get_preferred_fallback_model(case["category"])
if "expected" in case:
assert model == case["expected"], f"Failed for case: {case}"
elif "expected_contains" in case:
assert (
case["expected_contains"] in model
), f"Expected '{case['expected_contains']}' in '{model}' for case: {case}"
assert model == case["expected"], f"Failed for case: {case}, got {model}"
class TestCustomProviderFallback:
"""Test fallback to custom/openrouter providers."""
@patch.object(ModelProviderRegistry, "_find_extended_thinking_model")
def test_extended_reasoning_custom_fallback(self, mock_find_thinking):
"""Test EXTENDED_REASONING falls back to custom thinking model."""
with patch.object(ModelProviderRegistry, "get_available_models") as mock_get_available:
# No native models available, but OpenRouter is available
mock_get_available.return_value = {"openrouter-model": ProviderType.OPENROUTER}
mock_find_thinking.return_value = "custom/thinking-model"
def test_extended_reasoning_custom_fallback(self):
"""Test EXTENDED_REASONING with custom provider."""
# Setup with custom provider
ModelProviderRegistry.clear_cache()
with patch.dict(os.environ, {"CUSTOM_API_URL": "http://localhost:11434", "CUSTOM_API_KEY": ""}, clear=False):
from providers.custom import CustomProvider
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
assert model == "custom/thinking-model"
mock_find_thinking.assert_called_once()
ModelProviderRegistry.register_provider(ProviderType.CUSTOM, CustomProvider)
@patch.object(ModelProviderRegistry, "_find_extended_thinking_model")
def test_extended_reasoning_final_fallback(self, mock_find_thinking):
"""Test EXTENDED_REASONING falls back to pro when no custom found."""
with patch.object(ModelProviderRegistry, "get_provider") as mock_get_provider:
# No providers available
mock_get_provider.return_value = None
mock_find_thinking.return_value = None
provider = ModelProviderRegistry.get_provider(ProviderType.CUSTOM)
if provider:
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
# Should get a model from custom provider
assert model is not None
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
assert model == "gemini-2.5-pro"
def test_extended_reasoning_final_fallback(self):
"""Test EXTENDED_REASONING falls back to default when no providers."""
# Clear all providers
ModelProviderRegistry.clear_cache()
for provider_type in list(
ModelProviderRegistry._instance._providers.keys() if ModelProviderRegistry._instance else []
):
ModelProviderRegistry.unregister_provider(provider_type)
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
# Should fall back to hardcoded default
assert model == "gemini-2.5-flash"
class TestAutoModeErrorMessages:
@@ -266,42 +307,45 @@ class TestAutoModeErrorMessages:
class TestProviderHelperMethods:
"""Test the helper methods for finding models from custom/openrouter."""
def test_find_extended_thinking_model_custom(self):
"""Test finding thinking model from custom provider."""
with patch.object(ModelProviderRegistry, "get_provider") as mock_get_provider:
def test_extended_reasoning_with_custom_provider(self):
"""Test extended reasoning model selection with custom provider."""
# Setup with custom provider
with patch.dict(os.environ, {"CUSTOM_API_URL": "http://localhost:11434", "CUSTOM_API_KEY": ""}, clear=False):
from providers.custom import CustomProvider
# Mock custom provider with thinking model
mock_custom = MagicMock(spec=CustomProvider)
mock_custom.model_registry = {
"model1": {"supports_extended_thinking": False},
"model2": {"supports_extended_thinking": True},
"model3": {"supports_extended_thinking": False},
}
mock_get_provider.side_effect = lambda ptype: mock_custom if ptype == ProviderType.CUSTOM else None
ModelProviderRegistry.register_provider(ProviderType.CUSTOM, CustomProvider)
model = ModelProviderRegistry._find_extended_thinking_model()
assert model == "model2"
provider = ModelProviderRegistry.get_provider(ProviderType.CUSTOM)
if provider:
# Custom provider should return a model for extended reasoning
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
assert model is not None
def test_find_extended_thinking_model_openrouter(self):
"""Test finding thinking model from openrouter."""
with patch.object(ModelProviderRegistry, "get_provider") as mock_get_provider:
# Mock openrouter provider
mock_openrouter = MagicMock()
mock_openrouter.validate_model_name.side_effect = lambda m: m == "anthropic/claude-sonnet-4"
mock_get_provider.side_effect = lambda ptype: mock_openrouter if ptype == ProviderType.OPENROUTER else None
def test_extended_reasoning_with_openrouter(self):
"""Test extended reasoning model selection with OpenRouter."""
# Setup with OpenRouter provider
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}, clear=False):
from providers.openrouter import OpenRouterProvider
model = ModelProviderRegistry._find_extended_thinking_model()
assert model == "anthropic/claude-sonnet-4"
ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
def test_find_extended_thinking_model_none_found(self):
"""Test when no thinking model is found."""
with patch.object(ModelProviderRegistry, "get_provider") as mock_get_provider:
# No providers available
mock_get_provider.return_value = None
# OpenRouter should provide a model for extended reasoning
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
# Should return first available OpenRouter model
assert model is not None
model = ModelProviderRegistry._find_extended_thinking_model()
assert model is None
def test_fallback_when_no_providers_available(self):
"""Test fallback when no providers are available."""
# Clear all providers
ModelProviderRegistry.clear_cache()
for provider_type in list(
ModelProviderRegistry._instance._providers.keys() if ModelProviderRegistry._instance else []
):
ModelProviderRegistry.unregister_provider(provider_type)
# Should return hardcoded fallback
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
assert model == "gemini-2.5-flash"
class TestEffectiveAutoMode:

View File

@@ -126,7 +126,7 @@ class TestProviderUTF8Encoding(unittest.TestCase):
mock_response.usage = Mock()
mock_response.usage.input_tokens = 50
mock_response.usage.output_tokens = 25
mock_response.model = "o3-pro-2025-06-10"
mock_response.model = "o3-pro"
mock_response.id = "test-id"
mock_response.created_at = 1234567890
@@ -141,7 +141,7 @@ class TestProviderUTF8Encoding(unittest.TestCase):
with patch("logging.info") as mock_logging:
response = provider.generate_content(
prompt="Analyze this Python code for issues",
model_name="o3-pro-2025-06-10",
model_name="o3-pro",
system_prompt="You are a code review expert.",
)
@@ -351,7 +351,7 @@ class TestLocaleModelIntegration(unittest.TestCase):
def test_model_name_resolution_utf8(self):
"""Test model name resolution with UTF-8."""
provider = OpenAIModelProvider(api_key="test")
model_names = ["gpt-4", "gemini-2.5-flash", "claude-3-opus", "o3-pro-2025-06-10"]
model_names = ["gpt-4", "gemini-2.5-flash", "claude-3-opus", "o3-pro"]
for model_name in model_names:
resolved = provider._resolve_model_name(model_name)
self.assertIsInstance(resolved, str)

View File

@@ -47,22 +47,23 @@ class TestSupportedModelsAliases:
assert isinstance(config.aliases, list), f"{model_name} aliases must be a list"
# Test specific aliases
assert "mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases
# "mini" is now an alias for gpt-5-mini, not o4-mini
assert "mini" in provider.SUPPORTED_MODELS["gpt-5-mini"].aliases
assert "o4mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases
assert "o4-mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases
assert "o3mini" in provider.SUPPORTED_MODELS["o3-mini"].aliases
assert "o3-pro" in provider.SUPPORTED_MODELS["o3-pro-2025-06-10"].aliases
assert "o4mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases
assert "gpt4.1" in provider.SUPPORTED_MODELS["gpt-4.1-2025-04-14"].aliases
assert "o3-pro" in provider.SUPPORTED_MODELS["o3-pro"].aliases
assert "gpt4.1" in provider.SUPPORTED_MODELS["gpt-4.1"].aliases
# Test alias resolution
assert provider._resolve_model_name("mini") == "o4-mini"
assert provider._resolve_model_name("mini") == "gpt-5-mini" # mini -> gpt-5-mini now
assert provider._resolve_model_name("o3mini") == "o3-mini"
assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10"
assert provider._resolve_model_name("o3-pro") == "o3-pro" # o3-pro is already the base model name
assert provider._resolve_model_name("o4mini") == "o4-mini"
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1-2025-04-14"
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1" # gpt4.1 resolves to gpt-4.1
# Test case insensitive resolution
assert provider._resolve_model_name("Mini") == "o4-mini"
assert provider._resolve_model_name("Mini") == "gpt-5-mini" # mini -> gpt-5-mini now
assert provider._resolve_model_name("O3MINI") == "o3-mini"
def test_xai_provider_aliases(self):

View File

@@ -88,7 +88,7 @@ class TestXAIProvider:
# Test temperature range
assert capabilities.temperature_constraint.min_temp == 0.0
assert capabilities.temperature_constraint.max_temp == 2.0
assert capabilities.temperature_constraint.default_temp == 0.7
assert capabilities.temperature_constraint.default_temp == 0.3
def test_get_capabilities_grok3_fast(self):
"""Test getting model capabilities for GROK-3 Fast."""