feat: enhance model support by adding GPT-5.1 to .gitignore and updating cassette maintenance documentation for dual-model testing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -183,6 +183,7 @@ CLAUDE.local.md
|
||||
.docker_cleaned
|
||||
logs/
|
||||
*.backup
|
||||
*.backup-*.json
|
||||
/.desktop_configured
|
||||
|
||||
/worktrees/
|
||||
|
||||
@@ -222,10 +222,45 @@ If you encounter issues with cassette testing:
|
||||
3. Run semantic matching tests to verify the system
|
||||
4. Open an issue if you find a bug in the matching logic
|
||||
|
||||
## Dual-Model Cassette Coverage
|
||||
|
||||
Some integration tests maintain cassettes for multiple model variants to ensure regression coverage across model families. For example:
|
||||
|
||||
### Consensus Tool Cassettes
|
||||
|
||||
The `test_consensus_integration.py` test uses parameterized fixtures to test both `gpt-5` and `gpt-5.1` models:
|
||||
|
||||
- `tests/openai_cassettes/consensus_step1_gpt5_for.json` - Cassette for gpt-5 model
|
||||
- `tests/openai_cassettes/consensus_step1_gpt51_for.json` - Cassette for gpt-5.1 model
|
||||
|
||||
**When updating consensus cassettes:**
|
||||
|
||||
1. Both cassettes should be updated if the test logic changes
|
||||
2. If only one model's behavior changes, update only that cassette
|
||||
3. The test uses `@pytest.mark.parametrize` to run against both models
|
||||
4. Each cassette path is mapped in the `CONSENSUS_CASSETTES` dictionary
|
||||
|
||||
**To re-record a specific model's cassette:**
|
||||
|
||||
```bash
|
||||
# Delete the specific cassette
|
||||
rm tests/openai_cassettes/consensus_step1_gpt5_for.json
|
||||
|
||||
# Run the test with real API key (it will record for gpt-5)
|
||||
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5] -v
|
||||
|
||||
# Or for gpt-5.1
|
||||
rm tests/openai_cassettes/consensus_step1_gpt51_for.json
|
||||
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5.1] -v
|
||||
```
|
||||
|
||||
This dual-coverage approach ensures that both model families continue to work correctly as the codebase evolves.
|
||||
|
||||
## Related Files
|
||||
|
||||
- `tests/http_transport_recorder.py` - Cassette recording/replay implementation
|
||||
- `tests/transport_helpers.py` - Helper functions for injecting transports
|
||||
- `tests/test_cassette_semantic_matching.py` - Tests for semantic matching
|
||||
- `tests/test_o3_pro_output_text_fix.py` - Example of cassette usage
|
||||
- `tests/test_consensus_integration.py` - Example of dual-model cassette coverage
|
||||
- `tests/openai_cassettes/` - Directory containing recorded cassettes
|
||||
|
||||
@@ -193,6 +193,7 @@ def disable_force_env_override(monkeypatch):
|
||||
monkeypatch.setenv("MAX_CONVERSATION_TURNS", "50")
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
import config
|
||||
import utils.conversation_memory as conversation_memory
|
||||
@@ -200,6 +201,10 @@ def disable_force_env_override(monkeypatch):
|
||||
importlib.reload(config)
|
||||
importlib.reload(conversation_memory)
|
||||
|
||||
test_conversation_module = sys.modules.get("tests.test_conversation_memory")
|
||||
if test_conversation_module is not None:
|
||||
test_conversation_module.MAX_CONVERSATION_TURNS = conversation_memory.MAX_CONVERSATION_TURNS
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
|
||||
82
tests/openai_cassettes/consensus_step1_gpt51_for.json
Normal file
82
tests/openai_cassettes/consensus_step1_gpt51_for.json
Normal file
File diff suppressed because one or more lines are too long
@@ -94,9 +94,9 @@ class TestAutoModeComprehensive:
|
||||
"OPENROUTER_API_KEY": None,
|
||||
},
|
||||
{
|
||||
"EXTENDED_REASONING": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks
|
||||
"FAST_RESPONSE": "gpt-5", # Prefer gpt-5 for speed
|
||||
"BALANCED": "gpt-5", # Prefer gpt-5 for balanced
|
||||
"EXTENDED_REASONING": "gpt-5.1-codex", # GPT-5.1 Codex prioritized for coding tasks
|
||||
"FAST_RESPONSE": "gpt-5.1", # Prefer gpt-5.1 for speed
|
||||
"BALANCED": "gpt-5.1", # Prefer gpt-5.1 for balanced
|
||||
},
|
||||
),
|
||||
# Only X.AI API available
|
||||
|
||||
@@ -83,7 +83,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
||||
pass
|
||||
|
||||
monkeypatch.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
|
||||
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5")
|
||||
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5.1")
|
||||
monkeypatch.setenv("OPENROUTER_ALLOWED_MODELS", "gpt5nano")
|
||||
monkeypatch.setenv("XAI_ALLOWED_MODELS", "")
|
||||
|
||||
@@ -104,7 +104,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
||||
("OPENAI_API_KEY", "test-openai"),
|
||||
("OPENROUTER_API_KEY", "test-openrouter"),
|
||||
("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro"),
|
||||
("OPENAI_ALLOWED_MODELS", "gpt-5"),
|
||||
("OPENAI_ALLOWED_MODELS", "gpt-5.1"),
|
||||
("OPENROUTER_ALLOWED_MODELS", "gpt5nano"),
|
||||
("XAI_ALLOWED_MODELS", ""),
|
||||
):
|
||||
@@ -139,7 +139,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
||||
assert payload["status"] == "error"
|
||||
|
||||
available_models = _extract_available_models(payload["content"])
|
||||
assert set(available_models) == {"gemini-2.5-pro", "gpt-5", "gpt5nano", "openai/gpt-5-nano"}
|
||||
assert set(available_models) == {"gemini-2.5-pro", "gpt-5.1", "gpt5nano", "openai/gpt-5-nano"}
|
||||
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
@@ -225,6 +225,6 @@ def test_error_listing_without_restrictions_shows_full_catalog(monkeypatch, rese
|
||||
|
||||
available_models = _extract_available_models(payload["content"])
|
||||
assert "gemini-2.5-pro" in available_models
|
||||
assert "gpt-5" in available_models
|
||||
assert any(model in available_models for model in {"gpt-5.1", "gpt-5"})
|
||||
assert "grok-4" in available_models
|
||||
assert len(available_models) >= 5
|
||||
|
||||
@@ -98,9 +98,9 @@ class TestAutoModeProviderSelection:
|
||||
balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
||||
|
||||
# Should select appropriate OpenAI models based on new preference order
|
||||
assert extended_reasoning == "gpt-5-codex" # GPT-5-Codex prioritized for extended reasoning
|
||||
assert fast_response == "gpt-5" # gpt-5 comes first in fast response preference
|
||||
assert balanced == "gpt-5" # gpt-5 for balanced
|
||||
assert extended_reasoning == "gpt-5.1-codex" # GPT-5.1 Codex prioritized for extended reasoning
|
||||
assert fast_response == "gpt-5.1" # gpt-5.1 comes first in fast response preference
|
||||
assert balanced == "gpt-5.1" # gpt-5.1 for balanced
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
|
||||
@@ -16,7 +16,12 @@ from tools.consensus import ConsensusTool
|
||||
# Directories for recorded HTTP interactions
|
||||
CASSETTE_DIR = Path(__file__).parent / "openai_cassettes"
|
||||
CASSETTE_DIR.mkdir(exist_ok=True)
|
||||
CONSENSUS_CASSETTE_PATH = CASSETTE_DIR / "consensus_step1_gpt5_for.json"
|
||||
|
||||
# Mapping of OpenAI model names to their cassette files
|
||||
CONSENSUS_CASSETTES = {
|
||||
"gpt-5": CASSETTE_DIR / "consensus_step1_gpt5_for.json",
|
||||
"gpt-5.1": CASSETTE_DIR / "consensus_step1_gpt51_for.json",
|
||||
}
|
||||
|
||||
GEMINI_REPLAY_DIR = Path(__file__).parent / "gemini_cassettes"
|
||||
GEMINI_REPLAY_DIR.mkdir(exist_ok=True)
|
||||
@@ -26,8 +31,15 @@ GEMINI_REPLAY_PATH = GEMINI_REPLAY_DIR / "consensus" / "step2_gemini25_flash_aga
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.no_mock_provider
|
||||
async def test_consensus_multi_model_consultations(monkeypatch):
|
||||
"""Exercise ConsensusTool against gpt-5 (supporting) and gemini-2.0-flash (critical)."""
|
||||
@pytest.mark.parametrize("openai_model", ["gpt-5", "gpt-5.1"])
|
||||
async def test_consensus_multi_model_consultations(monkeypatch, openai_model):
|
||||
"""Exercise ConsensusTool against OpenAI model (supporting) and gemini-2.5-flash (critical).
|
||||
|
||||
Tests both gpt-5 and gpt-5.1 to ensure regression coverage for both model families.
|
||||
"""
|
||||
|
||||
# Get the cassette path for this model
|
||||
consensus_cassette_path = CONSENSUS_CASSETTES[openai_model]
|
||||
|
||||
env_updates = {
|
||||
"DEFAULT_MODEL": "auto",
|
||||
@@ -43,13 +55,14 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
||||
"CUSTOM_API_URL",
|
||||
]
|
||||
|
||||
recording_mode = not CONSENSUS_CASSETTE_PATH.exists() or not GEMINI_REPLAY_PATH.exists()
|
||||
recording_mode = not consensus_cassette_path.exists() or not GEMINI_REPLAY_PATH.exists()
|
||||
if recording_mode:
|
||||
openai_key = env_updates["OPENAI_API_KEY"].strip()
|
||||
gemini_key = env_updates["GEMINI_API_KEY"].strip()
|
||||
if (not openai_key or openai_key.startswith("dummy")) or (not gemini_key or gemini_key.startswith("dummy")):
|
||||
pytest.skip(
|
||||
"Consensus cassette missing and OPENAI_API_KEY/GEMINI_API_KEY not configured. Provide real keys to record."
|
||||
"Consensus cassette missing and OPENAI_API_KEY/GEMINI_API_KEY "
|
||||
"not configured. Provide real keys to record."
|
||||
)
|
||||
|
||||
GEMINI_REPLAY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -66,27 +79,43 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
||||
m.setenv("GEMINI_API_KEY", "dummy-key-for-replay")
|
||||
m.setenv("GOOGLE_GENAI_CLIENT_MODE", "replay")
|
||||
|
||||
# Ensure restriction policies allow the latest OpenAI models under test
|
||||
m.setenv("OPENAI_ALLOWED_MODELS", openai_model)
|
||||
|
||||
m.setenv("GOOGLE_GENAI_REPLAYS_DIRECTORY", str(GEMINI_REPLAY_DIR))
|
||||
m.setenv("GOOGLE_GENAI_REPLAY_ID", GEMINI_REPLAY_ID)
|
||||
|
||||
for key in keys_to_clear:
|
||||
m.delenv(key, raising=False)
|
||||
|
||||
# Reset providers and register only OpenAI & Gemini for deterministic behavior
|
||||
# Ensure we use the built-in OpenAI catalogue rather than leftovers from
|
||||
# other tests that patch OPENAI_MODELS_CONFIG_PATH.
|
||||
m.delenv("OPENAI_MODELS_CONFIG_PATH", raising=False)
|
||||
|
||||
# Reset providers/restrictions and register only OpenAI & Gemini for deterministic behavior
|
||||
ModelProviderRegistry.reset_for_testing()
|
||||
import utils.model_restrictions as model_restrictions
|
||||
|
||||
model_restrictions._restriction_service = None
|
||||
from providers.gemini import GeminiModelProvider
|
||||
from providers.openai import OpenAIModelProvider
|
||||
|
||||
# Earlier tests may override the OpenAI provider's registry by pointing
|
||||
# OPENAI_MODELS_CONFIG_PATH at fixtures. Force a reload so model
|
||||
# metadata is restored from conf/openai_models.json.
|
||||
OpenAIModelProvider.reload_registry()
|
||||
assert openai_model in OpenAIModelProvider.MODEL_CAPABILITIES
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||
|
||||
# Inject HTTP transport for OpenAI interactions
|
||||
inject_transport(monkeypatch, CONSENSUS_CASSETTE_PATH)
|
||||
inject_transport(monkeypatch, str(consensus_cassette_path))
|
||||
|
||||
tool = ConsensusTool()
|
||||
|
||||
models_to_consult = [
|
||||
{"model": "gpt-5", "stance": "for"},
|
||||
{"model": openai_model, "stance": "for"},
|
||||
{"model": "gemini-2.5-flash", "stance": "against"},
|
||||
]
|
||||
|
||||
@@ -105,7 +134,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
||||
step1_data = json.loads(step1_response[0].text)
|
||||
|
||||
assert step1_data["status"] == "analysis_and_first_model_consulted"
|
||||
assert step1_data["model_consulted"] == "gpt-5"
|
||||
assert step1_data["model_consulted"] == openai_model
|
||||
assert step1_data["model_response"]["status"] == "success"
|
||||
assert step1_data["model_response"]["metadata"]["provider"] == "openai"
|
||||
assert step1_data["model_response"]["verdict"]
|
||||
@@ -118,7 +147,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
||||
summary_for_step2 = step1_data["model_response"]["verdict"][:200]
|
||||
|
||||
step2_arguments = {
|
||||
"step": f"Incorporated gpt-5 perspective: {summary_for_step2}",
|
||||
"step": f"Incorporated {openai_model} perspective: {summary_for_step2}",
|
||||
"step_number": 2,
|
||||
"total_steps": len(models_to_consult),
|
||||
"next_step_required": False,
|
||||
@@ -138,7 +167,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
||||
assert step2_data["model_response"]["metadata"]["provider"] == "google"
|
||||
assert step2_data["model_response"]["verdict"]
|
||||
assert step2_data["complete_consensus"]["models_consulted"] == [
|
||||
"gpt-5:for",
|
||||
f"{openai_model}:for",
|
||||
"gemini-2.5-flash:against",
|
||||
]
|
||||
assert step2_data["consensus_complete"] is True
|
||||
@@ -159,7 +188,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
||||
gemini_provider._client = None
|
||||
|
||||
# Ensure cassettes exist for future replays
|
||||
assert CONSENSUS_CASSETTE_PATH.exists()
|
||||
assert consensus_cassette_path.exists()
|
||||
assert GEMINI_REPLAY_PATH.exists()
|
||||
|
||||
# Clean up provider registry state after test
|
||||
|
||||
@@ -37,14 +37,14 @@ class TestIntelligentFallback:
|
||||
|
||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False)
|
||||
def test_prefers_openai_o3_mini_when_available(self):
|
||||
"""Test that gpt-5 is preferred when OpenAI API key is available (based on new preference order)"""
|
||||
"""Test that gpt-5.1 is preferred when OpenAI API key is available (based on new preference order)"""
|
||||
# Register only OpenAI provider for this test
|
||||
from providers.openai import OpenAIModelProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
|
||||
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
|
||||
assert fallback_model == "gpt-5" # Based on new preference order: gpt-5 before o4-mini
|
||||
assert fallback_model == "gpt-5.1" # Based on new preference order: gpt-5.1 before o4-mini
|
||||
|
||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "test-gemini-key"}, clear=False)
|
||||
def test_prefers_gemini_flash_when_openai_unavailable(self):
|
||||
@@ -147,8 +147,8 @@ class TestIntelligentFallback:
|
||||
|
||||
history, tokens = build_conversation_history(context, model_context=None)
|
||||
|
||||
# Verify that ModelContext was called with gpt-5 (the intelligent fallback based on new preference order)
|
||||
mock_context_class.assert_called_once_with("gpt-5")
|
||||
# Verify that ModelContext was called with gpt-5.1 (the intelligent fallback based on new preference order)
|
||||
mock_context_class.assert_called_once_with("gpt-5.1")
|
||||
|
||||
def test_auto_mode_with_gemini_only(self):
|
||||
"""Test auto mode behavior when only Gemini API key is available"""
|
||||
|
||||
@@ -50,6 +50,9 @@ class TestOpenAIProvider:
|
||||
assert provider.validate_model_name("o4-mini") is True
|
||||
assert provider.validate_model_name("gpt-5") is True
|
||||
assert provider.validate_model_name("gpt-5-mini") is True
|
||||
assert provider.validate_model_name("gpt-5.1") is True
|
||||
assert provider.validate_model_name("gpt-5.1-codex") is True
|
||||
assert provider.validate_model_name("gpt-5.1-codex-mini") is True
|
||||
|
||||
# Test valid aliases
|
||||
assert provider.validate_model_name("mini") is True
|
||||
@@ -59,6 +62,9 @@ class TestOpenAIProvider:
|
||||
assert provider.validate_model_name("gpt5") is True
|
||||
assert provider.validate_model_name("gpt5-mini") is True
|
||||
assert provider.validate_model_name("gpt5mini") is True
|
||||
assert provider.validate_model_name("gpt5.1") is True
|
||||
assert provider.validate_model_name("gpt5.1-codex") is True
|
||||
assert provider.validate_model_name("codex-mini") is True
|
||||
|
||||
# Test invalid model
|
||||
assert provider.validate_model_name("invalid-model") is False
|
||||
@@ -77,6 +83,9 @@ class TestOpenAIProvider:
|
||||
assert provider._resolve_model_name("gpt5") == "gpt-5"
|
||||
assert provider._resolve_model_name("gpt5-mini") == "gpt-5-mini"
|
||||
assert provider._resolve_model_name("gpt5mini") == "gpt-5-mini"
|
||||
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||
|
||||
# Test full name passthrough
|
||||
assert provider._resolve_model_name("o3") == "o3"
|
||||
@@ -86,6 +95,9 @@ class TestOpenAIProvider:
|
||||
assert provider._resolve_model_name("o4-mini") == "o4-mini"
|
||||
assert provider._resolve_model_name("gpt-5") == "gpt-5"
|
||||
assert provider._resolve_model_name("gpt-5-mini") == "gpt-5-mini"
|
||||
assert provider._resolve_model_name("gpt-5.1") == "gpt-5.1"
|
||||
assert provider._resolve_model_name("gpt-5.1-codex") == "gpt-5.1-codex"
|
||||
assert provider._resolve_model_name("gpt-5.1-codex-mini") == "gpt-5.1-codex-mini"
|
||||
|
||||
def test_get_capabilities_o3(self):
|
||||
"""Test getting model capabilities for O3."""
|
||||
@@ -146,6 +158,36 @@ class TestOpenAIProvider:
|
||||
assert capabilities.supports_function_calling is True
|
||||
assert capabilities.supports_temperature is True
|
||||
|
||||
def test_get_capabilities_gpt51(self):
|
||||
"""Test GPT-5.1 capabilities reflect new metadata."""
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
capabilities = provider.get_capabilities("gpt-5.1")
|
||||
assert capabilities.model_name == "gpt-5.1"
|
||||
assert capabilities.supports_streaming is True
|
||||
assert capabilities.supports_function_calling is True
|
||||
assert capabilities.supports_json_mode is True
|
||||
assert capabilities.allow_code_generation is True
|
||||
|
||||
def test_get_capabilities_gpt51_codex(self):
|
||||
"""Test GPT-5.1 Codex is responses-only and non-streaming."""
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
capabilities = provider.get_capabilities("gpt-5.1-codex")
|
||||
assert capabilities.model_name == "gpt-5.1-codex"
|
||||
assert capabilities.supports_streaming is False
|
||||
assert capabilities.use_openai_response_api is True
|
||||
assert capabilities.allow_code_generation is True
|
||||
|
||||
def test_get_capabilities_gpt51_codex_mini(self):
|
||||
"""Test GPT-5.1 Codex mini exposes streaming and code generation."""
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
capabilities = provider.get_capabilities("gpt-5.1-codex-mini")
|
||||
assert capabilities.model_name == "gpt-5.1-codex-mini"
|
||||
assert capabilities.supports_streaming is True
|
||||
assert capabilities.allow_code_generation is True
|
||||
|
||||
@patch("providers.openai_compatible.OpenAI")
|
||||
def test_generate_content_resolves_alias_before_api_call(self, mock_openai_class):
|
||||
"""Test that generate_content resolves aliases before making API calls.
|
||||
|
||||
@@ -98,8 +98,8 @@ class TestModelSelection:
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
|
||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
|
||||
# OpenAI prefers GPT-5-Codex for extended reasoning (coding tasks)
|
||||
assert model == "gpt-5-codex"
|
||||
# OpenAI prefers GPT-5.1-Codex for extended reasoning (coding tasks)
|
||||
assert model == "gpt-5.1-codex"
|
||||
|
||||
def test_extended_reasoning_with_gemini_only(self):
|
||||
"""Test EXTENDED_REASONING prefers pro when only Gemini is available."""
|
||||
@@ -133,8 +133,8 @@ class TestModelSelection:
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
|
||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
|
||||
# OpenAI now prefers gpt-5 for fast response (based on our new preference order)
|
||||
assert model == "gpt-5"
|
||||
# OpenAI now prefers gpt-5.1 for fast response (based on our new preference order)
|
||||
assert model == "gpt-5.1"
|
||||
|
||||
def test_fast_response_with_gemini_only(self):
|
||||
"""Test FAST_RESPONSE prefers flash when only Gemini is available."""
|
||||
@@ -167,8 +167,8 @@ class TestModelSelection:
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
|
||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
||||
# OpenAI prefers gpt-5 for balanced (based on our new preference order)
|
||||
assert model == "gpt-5"
|
||||
# OpenAI prefers gpt-5.1 for balanced (based on our new preference order)
|
||||
assert model == "gpt-5.1"
|
||||
|
||||
def test_no_category_uses_balanced_logic(self):
|
||||
"""Test that no category specified uses balanced logic."""
|
||||
@@ -195,7 +195,7 @@ class TestFlexibleModelSelection:
|
||||
"env": {"OPENAI_API_KEY": "test-key"},
|
||||
"provider_type": ProviderType.OPENAI,
|
||||
"category": ToolModelCategory.EXTENDED_REASONING,
|
||||
"expected": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks
|
||||
"expected": "gpt-5.1-codex", # GPT-5.1-Codex prioritized for coding tasks
|
||||
},
|
||||
# Case 2: Gemini provider for fast response
|
||||
{
|
||||
@@ -209,7 +209,7 @@ class TestFlexibleModelSelection:
|
||||
"env": {"OPENAI_API_KEY": "test-key"},
|
||||
"provider_type": ProviderType.OPENAI,
|
||||
"category": ToolModelCategory.FAST_RESPONSE,
|
||||
"expected": "gpt-5", # Based on new preference order
|
||||
"expected": "gpt-5.1", # Based on new preference order
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -209,6 +209,9 @@ class TestOpenAIProvider:
|
||||
assert provider.validate_model_name("o4-mini")
|
||||
assert provider.validate_model_name("o4mini")
|
||||
assert provider.validate_model_name("o4-mini")
|
||||
assert provider.validate_model_name("gpt-5.1")
|
||||
assert provider.validate_model_name("gpt-5.1-codex")
|
||||
assert provider.validate_model_name("gpt-5.1-codex-mini")
|
||||
assert not provider.validate_model_name("gpt-4o")
|
||||
assert not provider.validate_model_name("invalid-model")
|
||||
|
||||
@@ -219,3 +222,20 @@ class TestOpenAIProvider:
|
||||
aliases = ["o3", "o3mini", "o3-mini", "o4-mini", "o4mini"]
|
||||
for alias in aliases:
|
||||
assert not provider.get_capabilities(alias).supports_extended_thinking
|
||||
|
||||
def test_gpt51_family_capabilities(self):
|
||||
"""Ensure GPT-5.1 family exposes correct capability flags."""
|
||||
provider = OpenAIModelProvider(api_key="test-key")
|
||||
|
||||
base = provider.get_capabilities("gpt-5.1")
|
||||
assert base.supports_streaming
|
||||
assert base.allow_code_generation
|
||||
|
||||
codex = provider.get_capabilities("gpt-5.1-codex")
|
||||
assert not codex.supports_streaming
|
||||
assert codex.use_openai_response_api
|
||||
assert codex.allow_code_generation
|
||||
|
||||
codex_mini = provider.get_capabilities("gpt-5.1-codex-mini")
|
||||
assert codex_mini.supports_streaming
|
||||
assert codex_mini.allow_code_generation
|
||||
|
||||
@@ -54,6 +54,9 @@ class TestSupportedModelsAliases:
|
||||
assert "o3mini" in provider.MODEL_CAPABILITIES["o3-mini"].aliases
|
||||
assert "o3pro" in provider.MODEL_CAPABILITIES["o3-pro"].aliases
|
||||
assert "gpt4.1" in provider.MODEL_CAPABILITIES["gpt-4.1"].aliases
|
||||
assert "gpt5.1" in provider.MODEL_CAPABILITIES["gpt-5.1"].aliases
|
||||
assert "gpt5.1-codex" in provider.MODEL_CAPABILITIES["gpt-5.1-codex"].aliases
|
||||
assert "codex-mini" in provider.MODEL_CAPABILITIES["gpt-5.1-codex-mini"].aliases
|
||||
|
||||
# Test alias resolution
|
||||
assert provider._resolve_model_name("mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
||||
@@ -61,10 +64,14 @@ class TestSupportedModelsAliases:
|
||||
assert provider._resolve_model_name("o3pro") == "o3-pro" # o3pro resolves to o3-pro
|
||||
assert provider._resolve_model_name("o4mini") == "o4-mini"
|
||||
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1" # gpt4.1 resolves to gpt-4.1
|
||||
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||
|
||||
# Test case insensitive resolution
|
||||
assert provider._resolve_model_name("Mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
||||
assert provider._resolve_model_name("O3MINI") == "o3-mini"
|
||||
assert provider._resolve_model_name("Gpt5.1") == "gpt-5.1"
|
||||
|
||||
def test_xai_provider_aliases(self):
|
||||
"""Test XAI provider's alias structure."""
|
||||
|
||||
Reference in New Issue
Block a user