refactor: moved temperature method from base provider to model capabilities

refactor: model listing cleanup, moved logic to model_capabilities.py
docs: added AGENTS.md for onboarding Codex
This commit is contained in:
Fahad
2025-10-02 10:25:41 +04:00
parent f461cb4519
commit 6d237d0970
14 changed files with 460 additions and 512 deletions

View File

@@ -1,12 +1,9 @@
"""
Tests that demonstrate the OLD BUGGY BEHAVIOR is now FIXED.
Regression scenarios ensuring alias-aware model listings stay correct.
These tests verify that scenarios which would have incorrectly passed
before our fix now behave correctly. Each test documents the specific
bug that was fixed and what the old vs new behavior should be.
IMPORTANT: These tests PASS with our fix, but would have FAILED to catch
bugs with the old code (before list_all_known_models was implemented).
Each test captures behavior that previously regressed so we can guard it
permanently. The focus is confirming aliases and their canonical targets
remain visible to the restriction service and related validation logic.
"""
import os
@@ -21,42 +18,34 @@ from utils.model_restrictions import ModelRestrictionService
class TestBuggyBehaviorPrevention:
"""
These tests prove that our fix prevents the HIGH-severity regression
that was identified by the O3 precommit analysis.
"""Regression tests for alias-aware restriction validation."""
OLD BUG: list_models() only returned alias keys, not targets
FIX: list_all_known_models() returns both aliases AND targets
"""
def test_old_bug_would_miss_target_restrictions(self):
"""
OLD BUG: If restriction was set on target model (e.g., 'o4-mini'),
validation would incorrectly warn it's not recognized because
list_models() only returned aliases ['mini', 'o3mini'].
NEW BEHAVIOR: list_all_known_models() includes targets, so 'o4-mini'
is recognized as valid and no warning is generated.
"""
def test_alias_listing_includes_targets_for_restriction_validation(self):
"""Alias-aware lists expose both aliases and canonical targets."""
provider = OpenAIModelProvider(api_key="test-key")
# This is what the old broken list_models() would return - aliases only
old_broken_list = ["mini", "o3mini"] # Missing 'o4-mini', 'o3-mini' targets
# Baseline alias-only list captured for regression documentation
alias_only_snapshot = ["mini", "o3mini"] # Missing 'o4-mini', 'o3-mini' targets
# This is what our fixed list_all_known_models() returns
new_fixed_list = provider.list_all_known_models()
# Canonical listing with aliases and targets
comprehensive_list = provider.list_models(
respect_restrictions=False,
include_aliases=True,
lowercase=True,
unique=True,
)
# Verify the fix: new method includes both aliases AND targets
assert "mini" in new_fixed_list # alias
assert "o4-mini" in new_fixed_list # target - THIS WAS MISSING IN OLD CODE
assert "o3mini" in new_fixed_list # alias
assert "o3-mini" in new_fixed_list # target - THIS WAS MISSING IN OLD CODE
# Comprehensive listing should contain aliases and their targets
assert "mini" in comprehensive_list
assert "o4-mini" in comprehensive_list
assert "o3mini" in comprehensive_list
assert "o3-mini" in comprehensive_list
# Prove the old behavior was broken
assert "o4-mini" not in old_broken_list # Old code didn't include targets
assert "o3-mini" not in old_broken_list # Old code didn't include targets
# Legacy alias-only snapshots exclude targets
assert "o4-mini" not in alias_only_snapshot
assert "o3-mini" not in alias_only_snapshot
# This target validation would have FAILED with old code
# This scenario previously failed when targets were omitted
service = ModelRestrictionService()
service.restrictions = {ProviderType.OPENAI: {"o4-mini"}} # Restrict to target
@@ -64,24 +53,19 @@ class TestBuggyBehaviorPrevention:
provider_instances = {ProviderType.OPENAI: provider}
service.validate_against_known_models(provider_instances)
# NEW BEHAVIOR: No warnings because o4-mini is now in list_all_known_models
# No warnings expected because alias-aware list includes the target
target_warnings = [
call
for call in mock_logger.warning.call_args_list
if "o4-mini" in str(call) and "not a recognized" in str(call)
]
assert len(target_warnings) == 0, "o4-mini should be recognized with our fix"
assert len(target_warnings) == 0, "o4-mini should be recognized as a valid target"
def test_old_bug_would_incorrectly_warn_about_valid_targets(self):
"""
OLD BUG: Admins setting restrictions on target models would get
false warnings that their restriction models are "not recognized".
NEW BEHAVIOR: Target models are properly recognized.
"""
def test_target_models_are_recognized_during_validation(self):
"""Target model restrictions should not trigger false warnings."""
# Test with Gemini provider too
provider = GeminiModelProvider(api_key="test-key")
all_known = provider.list_all_known_models()
all_known = provider.list_models(respect_restrictions=False, include_aliases=True, lowercase=True, unique=True)
# Verify both aliases and targets are included
assert "flash" in all_known # alias
@@ -108,13 +92,8 @@ class TestBuggyBehaviorPrevention:
assert "gemini-2.5-flash" not in warning or "not a recognized" not in warning
assert "gemini-2.5-pro" not in warning or "not a recognized" not in warning
def test_old_bug_policy_bypass_prevention(self):
"""
OLD BUG: Policy enforcement was incomplete because validation
didn't know about target models. This could allow policy bypasses.
NEW BEHAVIOR: Complete validation against all known model names.
"""
def test_policy_enforcement_remains_comprehensive(self):
"""Policy validation must account for both aliases and targets."""
provider = OpenAIModelProvider(api_key="test-key")
# Simulate a scenario where admin wants to restrict specific targets
@@ -138,64 +117,85 @@ class TestBuggyBehaviorPrevention:
# But o4mini (the actual alias for o4-mini) should work
assert provider.validate_model_name("o4mini") # Resolves to o4-mini, which IS allowed
# Verify our list_all_known_models includes the restricted models
all_known = provider.list_all_known_models()
# Verify our alias-aware list includes the restricted models
all_known = provider.list_models(
respect_restrictions=False,
include_aliases=True,
lowercase=True,
unique=True,
)
assert "o3-mini" in all_known # Should be known (and allowed)
assert "o4-mini" in all_known # Should be known (and allowed)
assert "o3-pro" in all_known # Should be known (but blocked)
assert "mini" in all_known # Should be known (and allowed since it resolves to o4-mini)
def test_demonstration_of_old_vs_new_interface(self):
"""
Direct comparison of old vs new interface to document the fix.
"""
def test_alias_aware_listing_extends_canonical_view(self):
"""Alias-aware list should be a superset of restriction-filtered names."""
provider = OpenAIModelProvider(api_key="test-key")
# OLD interface (still exists for backward compatibility)
old_style_models = provider.list_models(respect_restrictions=False)
baseline_models = provider.list_models(respect_restrictions=False)
# NEW interface (our fix)
new_comprehensive_models = provider.list_all_known_models()
alias_aware_models = provider.list_models(
respect_restrictions=False,
include_aliases=True,
lowercase=True,
unique=True,
)
# The new interface should be a superset of the old one
for model in old_style_models:
# Alias-aware variant should contain everything from the baseline
for model in baseline_models:
assert model.lower() in [
m.lower() for m in new_comprehensive_models
], f"New interface missing model {model} from old interface"
m.lower() for m in alias_aware_models
], f"Alias-aware listing missing baseline model {model}"
# The new interface should include target models that old one might miss
targets_that_should_exist = ["o4-mini", "o3-mini"]
for target in targets_that_should_exist:
assert target in new_comprehensive_models, f"New interface should include target model {target}"
# Alias-aware variant should include canonical targets as well
for target in ("o4-mini", "o3-mini"):
assert target in alias_aware_models, f"Alias-aware listing should include target model {target}"
def test_old_validation_interface_still_works(self):
"""
Verify our fix doesn't break existing validation workflows.
"""
def test_restriction_validation_uses_alias_aware_variant(self):
"""Validation should request the alias-aware lowercased, deduped list."""
service = ModelRestrictionService()
# Create a mock provider that simulates the old behavior
old_style_provider = MagicMock()
old_style_provider.MODEL_CAPABILITIES = {
# Simulate a provider that only returns aliases when asked for models
alias_only_provider = MagicMock()
alias_only_provider.MODEL_CAPABILITIES = {
"mini": "o4-mini",
"o3mini": "o3-mini",
"o4-mini": {"context_window": 200000},
"o3-mini": {"context_window": 200000},
}
# OLD BROKEN: This would only return aliases
old_style_provider.list_models.return_value = ["mini", "o3mini"]
# NEW FIXED: This includes both aliases and targets
old_style_provider.list_all_known_models.return_value = ["mini", "o3mini", "o4-mini", "o3-mini"]
# Simulate alias-only vs. alias-aware behavior using a side effect
def list_models_side_effect(**kwargs):
respect_restrictions = kwargs.get("respect_restrictions", True)
include_aliases = kwargs.get("include_aliases", True)
lowercase = kwargs.get("lowercase", False)
unique = kwargs.get("unique", False)
if respect_restrictions and include_aliases and not lowercase and not unique:
return ["mini", "o3mini"]
if not respect_restrictions and include_aliases and lowercase and unique:
return ["mini", "o3mini", "o4-mini", "o3-mini"]
raise AssertionError(f"Unexpected list_models call: {kwargs}")
alias_only_provider.list_models.side_effect = list_models_side_effect
# Test that validation now uses the comprehensive method
service.restrictions = {ProviderType.OPENAI: {"o4-mini"}} # Restrict to target
with patch("utils.model_restrictions.logger") as mock_logger:
provider_instances = {ProviderType.OPENAI: old_style_provider}
provider_instances = {ProviderType.OPENAI: alias_only_provider}
service.validate_against_known_models(provider_instances)
# Verify the new method was called, not the old one
old_style_provider.list_all_known_models.assert_called_once()
# Verify the alias-aware variant was used
alias_only_provider.list_models.assert_called_with(
respect_restrictions=False,
include_aliases=True,
lowercase=True,
unique=True,
)
# Should not warn about o4-mini being unrecognized
target_warnings = [
@@ -205,17 +205,17 @@ class TestBuggyBehaviorPrevention:
]
assert len(target_warnings) == 0
def test_regression_proof_comprehensive_coverage(self):
"""
Comprehensive test to prove our fix covers all provider types.
"""
def test_alias_listing_covers_targets_for_all_providers(self):
"""Alias-aware listings should expose targets across providers."""
providers_to_test = [
(OpenAIModelProvider(api_key="test-key"), "mini", "o4-mini"),
(GeminiModelProvider(api_key="test-key"), "flash", "gemini-2.5-flash"),
]
for provider, alias, target in providers_to_test:
all_known = provider.list_all_known_models()
all_known = provider.list_models(
respect_restrictions=False, include_aliases=True, lowercase=True, unique=True
)
# Every provider should include both aliases and targets
assert alias in all_known, f"{provider.__class__.__name__} missing alias {alias}"
@@ -226,13 +226,7 @@ class TestBuggyBehaviorPrevention:
@patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini,invalid-model"})
def test_validation_correctly_identifies_invalid_models(self):
"""
Test that validation still catches truly invalid models while
properly recognizing valid target models.
This proves our fix works: o4-mini appears in the "Known models" list
because list_all_known_models() now includes target models.
"""
"""Validation should flag invalid models while listing valid targets."""
# Clear cached restriction service
import utils.model_restrictions
@@ -245,7 +239,6 @@ class TestBuggyBehaviorPrevention:
provider_instances = {ProviderType.OPENAI: provider}
service.validate_against_known_models(provider_instances)
# Should warn about 'invalid-model' (truly invalid)
invalid_warnings = [
call
for call in mock_logger.warning.call_args_list
@@ -253,39 +246,37 @@ class TestBuggyBehaviorPrevention:
]
assert len(invalid_warnings) > 0, "Should warn about truly invalid models"
# The warning should mention o4-mini in the "Known models" list (proving our fix works)
# The warning should mention o4-mini in the known models list
warning_text = str(mock_logger.warning.call_args_list[0])
assert "Known models:" in warning_text, "Warning should include known models list"
assert "o4-mini" in warning_text, "o4-mini should appear in known models (proves our fix works)"
assert "o3-mini" in warning_text, "o3-mini should appear in known models (proves our fix works)"
assert "o4-mini" in warning_text, "o4-mini should appear in known models"
assert "o3-mini" in warning_text, "o3-mini should appear in known models"
# But the warning should be specifically about invalid-model
assert "'invalid-model'" in warning_text, "Warning should specifically mention invalid-model"
def test_custom_provider_also_implements_fix(self):
"""
Verify that custom provider also implements the comprehensive interface.
"""
def test_custom_provider_alias_listing(self):
"""Custom provider should expose alias-aware listings as well."""
from providers.custom import CustomProvider
# This might fail if no URL is set, but that's expected
try:
provider = CustomProvider(base_url="http://test.com/v1")
all_known = provider.list_all_known_models()
all_known = provider.list_models(
respect_restrictions=False, include_aliases=True, lowercase=True, unique=True
)
# Should return a list (might be empty if registry not loaded)
assert isinstance(all_known, list)
except ValueError:
# Expected if no base_url configured, skip this test
pytest.skip("Custom provider requires URL configuration")
def test_openrouter_provider_also_implements_fix(self):
"""
Verify that OpenRouter provider also implements the comprehensive interface.
"""
def test_openrouter_provider_alias_listing(self):
"""OpenRouter provider should expose alias-aware listings."""
from providers.openrouter import OpenRouterProvider
provider = OpenRouterProvider(api_key="test-key")
all_known = provider.list_all_known_models()
all_known = provider.list_models(respect_restrictions=False, include_aliases=True, lowercase=True, unique=True)
# Should return a list with both aliases and targets
assert isinstance(all_known, list)