feat: enhance model support by adding GPT-5.1 to .gitignore and updating cassette maintenance documentation for dual-model testing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -183,6 +183,7 @@ CLAUDE.local.md
|
|||||||
.docker_cleaned
|
.docker_cleaned
|
||||||
logs/
|
logs/
|
||||||
*.backup
|
*.backup
|
||||||
|
*.backup-*.json
|
||||||
/.desktop_configured
|
/.desktop_configured
|
||||||
|
|
||||||
/worktrees/
|
/worktrees/
|
||||||
|
|||||||
@@ -222,10 +222,45 @@ If you encounter issues with cassette testing:
|
|||||||
3. Run semantic matching tests to verify the system
|
3. Run semantic matching tests to verify the system
|
||||||
4. Open an issue if you find a bug in the matching logic
|
4. Open an issue if you find a bug in the matching logic
|
||||||
|
|
||||||
|
## Dual-Model Cassette Coverage
|
||||||
|
|
||||||
|
Some integration tests maintain cassettes for multiple model variants to ensure regression coverage across model families. For example:
|
||||||
|
|
||||||
|
### Consensus Tool Cassettes
|
||||||
|
|
||||||
|
The `test_consensus_integration.py` test uses parameterized fixtures to test both `gpt-5` and `gpt-5.1` models:
|
||||||
|
|
||||||
|
- `tests/openai_cassettes/consensus_step1_gpt5_for.json` - Cassette for gpt-5 model
|
||||||
|
- `tests/openai_cassettes/consensus_step1_gpt51_for.json` - Cassette for gpt-5.1 model
|
||||||
|
|
||||||
|
**When updating consensus cassettes:**
|
||||||
|
|
||||||
|
1. Both cassettes should be updated if the test logic changes
|
||||||
|
2. If only one model's behavior changes, update only that cassette
|
||||||
|
3. The test uses `@pytest.mark.parametrize` to run against both models
|
||||||
|
4. Each cassette path is mapped in the `CONSENSUS_CASSETTES` dictionary
|
||||||
|
|
||||||
|
**To re-record a specific model's cassette:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Delete the specific cassette
|
||||||
|
rm tests/openai_cassettes/consensus_step1_gpt5_for.json
|
||||||
|
|
||||||
|
# Run the test with real API key (it will record for gpt-5)
|
||||||
|
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5] -v
|
||||||
|
|
||||||
|
# Or for gpt-5.1
|
||||||
|
rm tests/openai_cassettes/consensus_step1_gpt51_for.json
|
||||||
|
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5.1] -v
|
||||||
|
```
|
||||||
|
|
||||||
|
This dual-coverage approach ensures that both model families continue to work correctly as the codebase evolves.
|
||||||
|
|
||||||
## Related Files
|
## Related Files
|
||||||
|
|
||||||
- `tests/http_transport_recorder.py` - Cassette recording/replay implementation
|
- `tests/http_transport_recorder.py` - Cassette recording/replay implementation
|
||||||
- `tests/transport_helpers.py` - Helper functions for injecting transports
|
- `tests/transport_helpers.py` - Helper functions for injecting transports
|
||||||
- `tests/test_cassette_semantic_matching.py` - Tests for semantic matching
|
- `tests/test_cassette_semantic_matching.py` - Tests for semantic matching
|
||||||
- `tests/test_o3_pro_output_text_fix.py` - Example of cassette usage
|
- `tests/test_o3_pro_output_text_fix.py` - Example of cassette usage
|
||||||
|
- `tests/test_consensus_integration.py` - Example of dual-model cassette coverage
|
||||||
- `tests/openai_cassettes/` - Directory containing recorded cassettes
|
- `tests/openai_cassettes/` - Directory containing recorded cassettes
|
||||||
|
|||||||
@@ -193,6 +193,7 @@ def disable_force_env_override(monkeypatch):
|
|||||||
monkeypatch.setenv("MAX_CONVERSATION_TURNS", "50")
|
monkeypatch.setenv("MAX_CONVERSATION_TURNS", "50")
|
||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
|
import sys
|
||||||
|
|
||||||
import config
|
import config
|
||||||
import utils.conversation_memory as conversation_memory
|
import utils.conversation_memory as conversation_memory
|
||||||
@@ -200,6 +201,10 @@ def disable_force_env_override(monkeypatch):
|
|||||||
importlib.reload(config)
|
importlib.reload(config)
|
||||||
importlib.reload(conversation_memory)
|
importlib.reload(conversation_memory)
|
||||||
|
|
||||||
|
test_conversation_module = sys.modules.get("tests.test_conversation_memory")
|
||||||
|
if test_conversation_module is not None:
|
||||||
|
test_conversation_module.MAX_CONVERSATION_TURNS = conversation_memory.MAX_CONVERSATION_TURNS
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
82
tests/openai_cassettes/consensus_step1_gpt51_for.json
Normal file
82
tests/openai_cassettes/consensus_step1_gpt51_for.json
Normal file
File diff suppressed because one or more lines are too long
@@ -94,9 +94,9 @@ class TestAutoModeComprehensive:
|
|||||||
"OPENROUTER_API_KEY": None,
|
"OPENROUTER_API_KEY": None,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"EXTENDED_REASONING": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks
|
"EXTENDED_REASONING": "gpt-5.1-codex", # GPT-5.1 Codex prioritized for coding tasks
|
||||||
"FAST_RESPONSE": "gpt-5", # Prefer gpt-5 for speed
|
"FAST_RESPONSE": "gpt-5.1", # Prefer gpt-5.1 for speed
|
||||||
"BALANCED": "gpt-5", # Prefer gpt-5 for balanced
|
"BALANCED": "gpt-5.1", # Prefer gpt-5.1 for balanced
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
# Only X.AI API available
|
# Only X.AI API available
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
monkeypatch.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
|
monkeypatch.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
|
||||||
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5")
|
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5.1")
|
||||||
monkeypatch.setenv("OPENROUTER_ALLOWED_MODELS", "gpt5nano")
|
monkeypatch.setenv("OPENROUTER_ALLOWED_MODELS", "gpt5nano")
|
||||||
monkeypatch.setenv("XAI_ALLOWED_MODELS", "")
|
monkeypatch.setenv("XAI_ALLOWED_MODELS", "")
|
||||||
|
|
||||||
@@ -104,7 +104,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
|||||||
("OPENAI_API_KEY", "test-openai"),
|
("OPENAI_API_KEY", "test-openai"),
|
||||||
("OPENROUTER_API_KEY", "test-openrouter"),
|
("OPENROUTER_API_KEY", "test-openrouter"),
|
||||||
("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro"),
|
("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro"),
|
||||||
("OPENAI_ALLOWED_MODELS", "gpt-5"),
|
("OPENAI_ALLOWED_MODELS", "gpt-5.1"),
|
||||||
("OPENROUTER_ALLOWED_MODELS", "gpt5nano"),
|
("OPENROUTER_ALLOWED_MODELS", "gpt5nano"),
|
||||||
("XAI_ALLOWED_MODELS", ""),
|
("XAI_ALLOWED_MODELS", ""),
|
||||||
):
|
):
|
||||||
@@ -139,7 +139,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
|||||||
assert payload["status"] == "error"
|
assert payload["status"] == "error"
|
||||||
|
|
||||||
available_models = _extract_available_models(payload["content"])
|
available_models = _extract_available_models(payload["content"])
|
||||||
assert set(available_models) == {"gemini-2.5-pro", "gpt-5", "gpt5nano", "openai/gpt-5-nano"}
|
assert set(available_models) == {"gemini-2.5-pro", "gpt-5.1", "gpt5nano", "openai/gpt-5-nano"}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.no_mock_provider
|
@pytest.mark.no_mock_provider
|
||||||
@@ -225,6 +225,6 @@ def test_error_listing_without_restrictions_shows_full_catalog(monkeypatch, rese
|
|||||||
|
|
||||||
available_models = _extract_available_models(payload["content"])
|
available_models = _extract_available_models(payload["content"])
|
||||||
assert "gemini-2.5-pro" in available_models
|
assert "gemini-2.5-pro" in available_models
|
||||||
assert "gpt-5" in available_models
|
assert any(model in available_models for model in {"gpt-5.1", "gpt-5"})
|
||||||
assert "grok-4" in available_models
|
assert "grok-4" in available_models
|
||||||
assert len(available_models) >= 5
|
assert len(available_models) >= 5
|
||||||
|
|||||||
@@ -98,9 +98,9 @@ class TestAutoModeProviderSelection:
|
|||||||
balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
||||||
|
|
||||||
# Should select appropriate OpenAI models based on new preference order
|
# Should select appropriate OpenAI models based on new preference order
|
||||||
assert extended_reasoning == "gpt-5-codex" # GPT-5-Codex prioritized for extended reasoning
|
assert extended_reasoning == "gpt-5.1-codex" # GPT-5.1 Codex prioritized for extended reasoning
|
||||||
assert fast_response == "gpt-5" # gpt-5 comes first in fast response preference
|
assert fast_response == "gpt-5.1" # gpt-5.1 comes first in fast response preference
|
||||||
assert balanced == "gpt-5" # gpt-5 for balanced
|
assert balanced == "gpt-5.1" # gpt-5.1 for balanced
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Restore original environment
|
# Restore original environment
|
||||||
|
|||||||
@@ -16,7 +16,12 @@ from tools.consensus import ConsensusTool
|
|||||||
# Directories for recorded HTTP interactions
|
# Directories for recorded HTTP interactions
|
||||||
CASSETTE_DIR = Path(__file__).parent / "openai_cassettes"
|
CASSETTE_DIR = Path(__file__).parent / "openai_cassettes"
|
||||||
CASSETTE_DIR.mkdir(exist_ok=True)
|
CASSETTE_DIR.mkdir(exist_ok=True)
|
||||||
CONSENSUS_CASSETTE_PATH = CASSETTE_DIR / "consensus_step1_gpt5_for.json"
|
|
||||||
|
# Mapping of OpenAI model names to their cassette files
|
||||||
|
CONSENSUS_CASSETTES = {
|
||||||
|
"gpt-5": CASSETTE_DIR / "consensus_step1_gpt5_for.json",
|
||||||
|
"gpt-5.1": CASSETTE_DIR / "consensus_step1_gpt51_for.json",
|
||||||
|
}
|
||||||
|
|
||||||
GEMINI_REPLAY_DIR = Path(__file__).parent / "gemini_cassettes"
|
GEMINI_REPLAY_DIR = Path(__file__).parent / "gemini_cassettes"
|
||||||
GEMINI_REPLAY_DIR.mkdir(exist_ok=True)
|
GEMINI_REPLAY_DIR.mkdir(exist_ok=True)
|
||||||
@@ -26,8 +31,15 @@ GEMINI_REPLAY_PATH = GEMINI_REPLAY_DIR / "consensus" / "step2_gemini25_flash_aga
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.no_mock_provider
|
@pytest.mark.no_mock_provider
|
||||||
async def test_consensus_multi_model_consultations(monkeypatch):
|
@pytest.mark.parametrize("openai_model", ["gpt-5", "gpt-5.1"])
|
||||||
"""Exercise ConsensusTool against gpt-5 (supporting) and gemini-2.0-flash (critical)."""
|
async def test_consensus_multi_model_consultations(monkeypatch, openai_model):
|
||||||
|
"""Exercise ConsensusTool against OpenAI model (supporting) and gemini-2.5-flash (critical).
|
||||||
|
|
||||||
|
Tests both gpt-5 and gpt-5.1 to ensure regression coverage for both model families.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get the cassette path for this model
|
||||||
|
consensus_cassette_path = CONSENSUS_CASSETTES[openai_model]
|
||||||
|
|
||||||
env_updates = {
|
env_updates = {
|
||||||
"DEFAULT_MODEL": "auto",
|
"DEFAULT_MODEL": "auto",
|
||||||
@@ -43,13 +55,14 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
"CUSTOM_API_URL",
|
"CUSTOM_API_URL",
|
||||||
]
|
]
|
||||||
|
|
||||||
recording_mode = not CONSENSUS_CASSETTE_PATH.exists() or not GEMINI_REPLAY_PATH.exists()
|
recording_mode = not consensus_cassette_path.exists() or not GEMINI_REPLAY_PATH.exists()
|
||||||
if recording_mode:
|
if recording_mode:
|
||||||
openai_key = env_updates["OPENAI_API_KEY"].strip()
|
openai_key = env_updates["OPENAI_API_KEY"].strip()
|
||||||
gemini_key = env_updates["GEMINI_API_KEY"].strip()
|
gemini_key = env_updates["GEMINI_API_KEY"].strip()
|
||||||
if (not openai_key or openai_key.startswith("dummy")) or (not gemini_key or gemini_key.startswith("dummy")):
|
if (not openai_key or openai_key.startswith("dummy")) or (not gemini_key or gemini_key.startswith("dummy")):
|
||||||
pytest.skip(
|
pytest.skip(
|
||||||
"Consensus cassette missing and OPENAI_API_KEY/GEMINI_API_KEY not configured. Provide real keys to record."
|
"Consensus cassette missing and OPENAI_API_KEY/GEMINI_API_KEY "
|
||||||
|
"not configured. Provide real keys to record."
|
||||||
)
|
)
|
||||||
|
|
||||||
GEMINI_REPLAY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
GEMINI_REPLAY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -66,27 +79,43 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
m.setenv("GEMINI_API_KEY", "dummy-key-for-replay")
|
m.setenv("GEMINI_API_KEY", "dummy-key-for-replay")
|
||||||
m.setenv("GOOGLE_GENAI_CLIENT_MODE", "replay")
|
m.setenv("GOOGLE_GENAI_CLIENT_MODE", "replay")
|
||||||
|
|
||||||
|
# Ensure restriction policies allow the latest OpenAI models under test
|
||||||
|
m.setenv("OPENAI_ALLOWED_MODELS", openai_model)
|
||||||
|
|
||||||
m.setenv("GOOGLE_GENAI_REPLAYS_DIRECTORY", str(GEMINI_REPLAY_DIR))
|
m.setenv("GOOGLE_GENAI_REPLAYS_DIRECTORY", str(GEMINI_REPLAY_DIR))
|
||||||
m.setenv("GOOGLE_GENAI_REPLAY_ID", GEMINI_REPLAY_ID)
|
m.setenv("GOOGLE_GENAI_REPLAY_ID", GEMINI_REPLAY_ID)
|
||||||
|
|
||||||
for key in keys_to_clear:
|
for key in keys_to_clear:
|
||||||
m.delenv(key, raising=False)
|
m.delenv(key, raising=False)
|
||||||
|
|
||||||
# Reset providers and register only OpenAI & Gemini for deterministic behavior
|
# Ensure we use the built-in OpenAI catalogue rather than leftovers from
|
||||||
|
# other tests that patch OPENAI_MODELS_CONFIG_PATH.
|
||||||
|
m.delenv("OPENAI_MODELS_CONFIG_PATH", raising=False)
|
||||||
|
|
||||||
|
# Reset providers/restrictions and register only OpenAI & Gemini for deterministic behavior
|
||||||
ModelProviderRegistry.reset_for_testing()
|
ModelProviderRegistry.reset_for_testing()
|
||||||
|
import utils.model_restrictions as model_restrictions
|
||||||
|
|
||||||
|
model_restrictions._restriction_service = None
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai import OpenAIModelProvider
|
||||||
|
|
||||||
|
# Earlier tests may override the OpenAI provider's registry by pointing
|
||||||
|
# OPENAI_MODELS_CONFIG_PATH at fixtures. Force a reload so model
|
||||||
|
# metadata is restored from conf/openai_models.json.
|
||||||
|
OpenAIModelProvider.reload_registry()
|
||||||
|
assert openai_model in OpenAIModelProvider.MODEL_CAPABILITIES
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
|
|
||||||
# Inject HTTP transport for OpenAI interactions
|
# Inject HTTP transport for OpenAI interactions
|
||||||
inject_transport(monkeypatch, CONSENSUS_CASSETTE_PATH)
|
inject_transport(monkeypatch, str(consensus_cassette_path))
|
||||||
|
|
||||||
tool = ConsensusTool()
|
tool = ConsensusTool()
|
||||||
|
|
||||||
models_to_consult = [
|
models_to_consult = [
|
||||||
{"model": "gpt-5", "stance": "for"},
|
{"model": openai_model, "stance": "for"},
|
||||||
{"model": "gemini-2.5-flash", "stance": "against"},
|
{"model": "gemini-2.5-flash", "stance": "against"},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -105,7 +134,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
step1_data = json.loads(step1_response[0].text)
|
step1_data = json.loads(step1_response[0].text)
|
||||||
|
|
||||||
assert step1_data["status"] == "analysis_and_first_model_consulted"
|
assert step1_data["status"] == "analysis_and_first_model_consulted"
|
||||||
assert step1_data["model_consulted"] == "gpt-5"
|
assert step1_data["model_consulted"] == openai_model
|
||||||
assert step1_data["model_response"]["status"] == "success"
|
assert step1_data["model_response"]["status"] == "success"
|
||||||
assert step1_data["model_response"]["metadata"]["provider"] == "openai"
|
assert step1_data["model_response"]["metadata"]["provider"] == "openai"
|
||||||
assert step1_data["model_response"]["verdict"]
|
assert step1_data["model_response"]["verdict"]
|
||||||
@@ -118,7 +147,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
summary_for_step2 = step1_data["model_response"]["verdict"][:200]
|
summary_for_step2 = step1_data["model_response"]["verdict"][:200]
|
||||||
|
|
||||||
step2_arguments = {
|
step2_arguments = {
|
||||||
"step": f"Incorporated gpt-5 perspective: {summary_for_step2}",
|
"step": f"Incorporated {openai_model} perspective: {summary_for_step2}",
|
||||||
"step_number": 2,
|
"step_number": 2,
|
||||||
"total_steps": len(models_to_consult),
|
"total_steps": len(models_to_consult),
|
||||||
"next_step_required": False,
|
"next_step_required": False,
|
||||||
@@ -138,7 +167,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
assert step2_data["model_response"]["metadata"]["provider"] == "google"
|
assert step2_data["model_response"]["metadata"]["provider"] == "google"
|
||||||
assert step2_data["model_response"]["verdict"]
|
assert step2_data["model_response"]["verdict"]
|
||||||
assert step2_data["complete_consensus"]["models_consulted"] == [
|
assert step2_data["complete_consensus"]["models_consulted"] == [
|
||||||
"gpt-5:for",
|
f"{openai_model}:for",
|
||||||
"gemini-2.5-flash:against",
|
"gemini-2.5-flash:against",
|
||||||
]
|
]
|
||||||
assert step2_data["consensus_complete"] is True
|
assert step2_data["consensus_complete"] is True
|
||||||
@@ -159,7 +188,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
gemini_provider._client = None
|
gemini_provider._client = None
|
||||||
|
|
||||||
# Ensure cassettes exist for future replays
|
# Ensure cassettes exist for future replays
|
||||||
assert CONSENSUS_CASSETTE_PATH.exists()
|
assert consensus_cassette_path.exists()
|
||||||
assert GEMINI_REPLAY_PATH.exists()
|
assert GEMINI_REPLAY_PATH.exists()
|
||||||
|
|
||||||
# Clean up provider registry state after test
|
# Clean up provider registry state after test
|
||||||
|
|||||||
@@ -37,14 +37,14 @@ class TestIntelligentFallback:
|
|||||||
|
|
||||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False)
|
@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False)
|
||||||
def test_prefers_openai_o3_mini_when_available(self):
|
def test_prefers_openai_o3_mini_when_available(self):
|
||||||
"""Test that gpt-5 is preferred when OpenAI API key is available (based on new preference order)"""
|
"""Test that gpt-5.1 is preferred when OpenAI API key is available (based on new preference order)"""
|
||||||
# Register only OpenAI provider for this test
|
# Register only OpenAI provider for this test
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
|
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
|
||||||
assert fallback_model == "gpt-5" # Based on new preference order: gpt-5 before o4-mini
|
assert fallback_model == "gpt-5.1" # Based on new preference order: gpt-5.1 before o4-mini
|
||||||
|
|
||||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "test-gemini-key"}, clear=False)
|
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "test-gemini-key"}, clear=False)
|
||||||
def test_prefers_gemini_flash_when_openai_unavailable(self):
|
def test_prefers_gemini_flash_when_openai_unavailable(self):
|
||||||
@@ -147,8 +147,8 @@ class TestIntelligentFallback:
|
|||||||
|
|
||||||
history, tokens = build_conversation_history(context, model_context=None)
|
history, tokens = build_conversation_history(context, model_context=None)
|
||||||
|
|
||||||
# Verify that ModelContext was called with gpt-5 (the intelligent fallback based on new preference order)
|
# Verify that ModelContext was called with gpt-5.1 (the intelligent fallback based on new preference order)
|
||||||
mock_context_class.assert_called_once_with("gpt-5")
|
mock_context_class.assert_called_once_with("gpt-5.1")
|
||||||
|
|
||||||
def test_auto_mode_with_gemini_only(self):
|
def test_auto_mode_with_gemini_only(self):
|
||||||
"""Test auto mode behavior when only Gemini API key is available"""
|
"""Test auto mode behavior when only Gemini API key is available"""
|
||||||
|
|||||||
@@ -50,6 +50,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider.validate_model_name("o4-mini") is True
|
assert provider.validate_model_name("o4-mini") is True
|
||||||
assert provider.validate_model_name("gpt-5") is True
|
assert provider.validate_model_name("gpt-5") is True
|
||||||
assert provider.validate_model_name("gpt-5-mini") is True
|
assert provider.validate_model_name("gpt-5-mini") is True
|
||||||
|
assert provider.validate_model_name("gpt-5.1") is True
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex") is True
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex-mini") is True
|
||||||
|
|
||||||
# Test valid aliases
|
# Test valid aliases
|
||||||
assert provider.validate_model_name("mini") is True
|
assert provider.validate_model_name("mini") is True
|
||||||
@@ -59,6 +62,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider.validate_model_name("gpt5") is True
|
assert provider.validate_model_name("gpt5") is True
|
||||||
assert provider.validate_model_name("gpt5-mini") is True
|
assert provider.validate_model_name("gpt5-mini") is True
|
||||||
assert provider.validate_model_name("gpt5mini") is True
|
assert provider.validate_model_name("gpt5mini") is True
|
||||||
|
assert provider.validate_model_name("gpt5.1") is True
|
||||||
|
assert provider.validate_model_name("gpt5.1-codex") is True
|
||||||
|
assert provider.validate_model_name("codex-mini") is True
|
||||||
|
|
||||||
# Test invalid model
|
# Test invalid model
|
||||||
assert provider.validate_model_name("invalid-model") is False
|
assert provider.validate_model_name("invalid-model") is False
|
||||||
@@ -77,6 +83,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider._resolve_model_name("gpt5") == "gpt-5"
|
assert provider._resolve_model_name("gpt5") == "gpt-5"
|
||||||
assert provider._resolve_model_name("gpt5-mini") == "gpt-5-mini"
|
assert provider._resolve_model_name("gpt5-mini") == "gpt-5-mini"
|
||||||
assert provider._resolve_model_name("gpt5mini") == "gpt-5-mini"
|
assert provider._resolve_model_name("gpt5mini") == "gpt-5-mini"
|
||||||
|
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||||
|
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||||
|
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||||
|
|
||||||
# Test full name passthrough
|
# Test full name passthrough
|
||||||
assert provider._resolve_model_name("o3") == "o3"
|
assert provider._resolve_model_name("o3") == "o3"
|
||||||
@@ -86,6 +95,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider._resolve_model_name("o4-mini") == "o4-mini"
|
assert provider._resolve_model_name("o4-mini") == "o4-mini"
|
||||||
assert provider._resolve_model_name("gpt-5") == "gpt-5"
|
assert provider._resolve_model_name("gpt-5") == "gpt-5"
|
||||||
assert provider._resolve_model_name("gpt-5-mini") == "gpt-5-mini"
|
assert provider._resolve_model_name("gpt-5-mini") == "gpt-5-mini"
|
||||||
|
assert provider._resolve_model_name("gpt-5.1") == "gpt-5.1"
|
||||||
|
assert provider._resolve_model_name("gpt-5.1-codex") == "gpt-5.1-codex"
|
||||||
|
assert provider._resolve_model_name("gpt-5.1-codex-mini") == "gpt-5.1-codex-mini"
|
||||||
|
|
||||||
def test_get_capabilities_o3(self):
|
def test_get_capabilities_o3(self):
|
||||||
"""Test getting model capabilities for O3."""
|
"""Test getting model capabilities for O3."""
|
||||||
@@ -146,6 +158,36 @@ class TestOpenAIProvider:
|
|||||||
assert capabilities.supports_function_calling is True
|
assert capabilities.supports_function_calling is True
|
||||||
assert capabilities.supports_temperature is True
|
assert capabilities.supports_temperature is True
|
||||||
|
|
||||||
|
def test_get_capabilities_gpt51(self):
|
||||||
|
"""Test GPT-5.1 capabilities reflect new metadata."""
|
||||||
|
provider = OpenAIModelProvider("test-key")
|
||||||
|
|
||||||
|
capabilities = provider.get_capabilities("gpt-5.1")
|
||||||
|
assert capabilities.model_name == "gpt-5.1"
|
||||||
|
assert capabilities.supports_streaming is True
|
||||||
|
assert capabilities.supports_function_calling is True
|
||||||
|
assert capabilities.supports_json_mode is True
|
||||||
|
assert capabilities.allow_code_generation is True
|
||||||
|
|
||||||
|
def test_get_capabilities_gpt51_codex(self):
|
||||||
|
"""Test GPT-5.1 Codex is responses-only and non-streaming."""
|
||||||
|
provider = OpenAIModelProvider("test-key")
|
||||||
|
|
||||||
|
capabilities = provider.get_capabilities("gpt-5.1-codex")
|
||||||
|
assert capabilities.model_name == "gpt-5.1-codex"
|
||||||
|
assert capabilities.supports_streaming is False
|
||||||
|
assert capabilities.use_openai_response_api is True
|
||||||
|
assert capabilities.allow_code_generation is True
|
||||||
|
|
||||||
|
def test_get_capabilities_gpt51_codex_mini(self):
|
||||||
|
"""Test GPT-5.1 Codex mini exposes streaming and code generation."""
|
||||||
|
provider = OpenAIModelProvider("test-key")
|
||||||
|
|
||||||
|
capabilities = provider.get_capabilities("gpt-5.1-codex-mini")
|
||||||
|
assert capabilities.model_name == "gpt-5.1-codex-mini"
|
||||||
|
assert capabilities.supports_streaming is True
|
||||||
|
assert capabilities.allow_code_generation is True
|
||||||
|
|
||||||
@patch("providers.openai_compatible.OpenAI")
|
@patch("providers.openai_compatible.OpenAI")
|
||||||
def test_generate_content_resolves_alias_before_api_call(self, mock_openai_class):
|
def test_generate_content_resolves_alias_before_api_call(self, mock_openai_class):
|
||||||
"""Test that generate_content resolves aliases before making API calls.
|
"""Test that generate_content resolves aliases before making API calls.
|
||||||
|
|||||||
@@ -98,8 +98,8 @@ class TestModelSelection:
|
|||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
|
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
|
||||||
# OpenAI prefers GPT-5-Codex for extended reasoning (coding tasks)
|
# OpenAI prefers GPT-5.1-Codex for extended reasoning (coding tasks)
|
||||||
assert model == "gpt-5-codex"
|
assert model == "gpt-5.1-codex"
|
||||||
|
|
||||||
def test_extended_reasoning_with_gemini_only(self):
|
def test_extended_reasoning_with_gemini_only(self):
|
||||||
"""Test EXTENDED_REASONING prefers pro when only Gemini is available."""
|
"""Test EXTENDED_REASONING prefers pro when only Gemini is available."""
|
||||||
@@ -133,8 +133,8 @@ class TestModelSelection:
|
|||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
|
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
|
||||||
# OpenAI now prefers gpt-5 for fast response (based on our new preference order)
|
# OpenAI now prefers gpt-5.1 for fast response (based on our new preference order)
|
||||||
assert model == "gpt-5"
|
assert model == "gpt-5.1"
|
||||||
|
|
||||||
def test_fast_response_with_gemini_only(self):
|
def test_fast_response_with_gemini_only(self):
|
||||||
"""Test FAST_RESPONSE prefers flash when only Gemini is available."""
|
"""Test FAST_RESPONSE prefers flash when only Gemini is available."""
|
||||||
@@ -167,8 +167,8 @@ class TestModelSelection:
|
|||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
||||||
# OpenAI prefers gpt-5 for balanced (based on our new preference order)
|
# OpenAI prefers gpt-5.1 for balanced (based on our new preference order)
|
||||||
assert model == "gpt-5"
|
assert model == "gpt-5.1"
|
||||||
|
|
||||||
def test_no_category_uses_balanced_logic(self):
|
def test_no_category_uses_balanced_logic(self):
|
||||||
"""Test that no category specified uses balanced logic."""
|
"""Test that no category specified uses balanced logic."""
|
||||||
@@ -195,7 +195,7 @@ class TestFlexibleModelSelection:
|
|||||||
"env": {"OPENAI_API_KEY": "test-key"},
|
"env": {"OPENAI_API_KEY": "test-key"},
|
||||||
"provider_type": ProviderType.OPENAI,
|
"provider_type": ProviderType.OPENAI,
|
||||||
"category": ToolModelCategory.EXTENDED_REASONING,
|
"category": ToolModelCategory.EXTENDED_REASONING,
|
||||||
"expected": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks
|
"expected": "gpt-5.1-codex", # GPT-5.1-Codex prioritized for coding tasks
|
||||||
},
|
},
|
||||||
# Case 2: Gemini provider for fast response
|
# Case 2: Gemini provider for fast response
|
||||||
{
|
{
|
||||||
@@ -209,7 +209,7 @@ class TestFlexibleModelSelection:
|
|||||||
"env": {"OPENAI_API_KEY": "test-key"},
|
"env": {"OPENAI_API_KEY": "test-key"},
|
||||||
"provider_type": ProviderType.OPENAI,
|
"provider_type": ProviderType.OPENAI,
|
||||||
"category": ToolModelCategory.FAST_RESPONSE,
|
"category": ToolModelCategory.FAST_RESPONSE,
|
||||||
"expected": "gpt-5", # Based on new preference order
|
"expected": "gpt-5.1", # Based on new preference order
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -209,6 +209,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider.validate_model_name("o4-mini")
|
assert provider.validate_model_name("o4-mini")
|
||||||
assert provider.validate_model_name("o4mini")
|
assert provider.validate_model_name("o4mini")
|
||||||
assert provider.validate_model_name("o4-mini")
|
assert provider.validate_model_name("o4-mini")
|
||||||
|
assert provider.validate_model_name("gpt-5.1")
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex")
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex-mini")
|
||||||
assert not provider.validate_model_name("gpt-4o")
|
assert not provider.validate_model_name("gpt-4o")
|
||||||
assert not provider.validate_model_name("invalid-model")
|
assert not provider.validate_model_name("invalid-model")
|
||||||
|
|
||||||
@@ -219,3 +222,20 @@ class TestOpenAIProvider:
|
|||||||
aliases = ["o3", "o3mini", "o3-mini", "o4-mini", "o4mini"]
|
aliases = ["o3", "o3mini", "o3-mini", "o4-mini", "o4mini"]
|
||||||
for alias in aliases:
|
for alias in aliases:
|
||||||
assert not provider.get_capabilities(alias).supports_extended_thinking
|
assert not provider.get_capabilities(alias).supports_extended_thinking
|
||||||
|
|
||||||
|
def test_gpt51_family_capabilities(self):
|
||||||
|
"""Ensure GPT-5.1 family exposes correct capability flags."""
|
||||||
|
provider = OpenAIModelProvider(api_key="test-key")
|
||||||
|
|
||||||
|
base = provider.get_capabilities("gpt-5.1")
|
||||||
|
assert base.supports_streaming
|
||||||
|
assert base.allow_code_generation
|
||||||
|
|
||||||
|
codex = provider.get_capabilities("gpt-5.1-codex")
|
||||||
|
assert not codex.supports_streaming
|
||||||
|
assert codex.use_openai_response_api
|
||||||
|
assert codex.allow_code_generation
|
||||||
|
|
||||||
|
codex_mini = provider.get_capabilities("gpt-5.1-codex-mini")
|
||||||
|
assert codex_mini.supports_streaming
|
||||||
|
assert codex_mini.allow_code_generation
|
||||||
|
|||||||
@@ -54,6 +54,9 @@ class TestSupportedModelsAliases:
|
|||||||
assert "o3mini" in provider.MODEL_CAPABILITIES["o3-mini"].aliases
|
assert "o3mini" in provider.MODEL_CAPABILITIES["o3-mini"].aliases
|
||||||
assert "o3pro" in provider.MODEL_CAPABILITIES["o3-pro"].aliases
|
assert "o3pro" in provider.MODEL_CAPABILITIES["o3-pro"].aliases
|
||||||
assert "gpt4.1" in provider.MODEL_CAPABILITIES["gpt-4.1"].aliases
|
assert "gpt4.1" in provider.MODEL_CAPABILITIES["gpt-4.1"].aliases
|
||||||
|
assert "gpt5.1" in provider.MODEL_CAPABILITIES["gpt-5.1"].aliases
|
||||||
|
assert "gpt5.1-codex" in provider.MODEL_CAPABILITIES["gpt-5.1-codex"].aliases
|
||||||
|
assert "codex-mini" in provider.MODEL_CAPABILITIES["gpt-5.1-codex-mini"].aliases
|
||||||
|
|
||||||
# Test alias resolution
|
# Test alias resolution
|
||||||
assert provider._resolve_model_name("mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
assert provider._resolve_model_name("mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
||||||
@@ -61,10 +64,14 @@ class TestSupportedModelsAliases:
|
|||||||
assert provider._resolve_model_name("o3pro") == "o3-pro" # o3pro resolves to o3-pro
|
assert provider._resolve_model_name("o3pro") == "o3-pro" # o3pro resolves to o3-pro
|
||||||
assert provider._resolve_model_name("o4mini") == "o4-mini"
|
assert provider._resolve_model_name("o4mini") == "o4-mini"
|
||||||
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1" # gpt4.1 resolves to gpt-4.1
|
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1" # gpt4.1 resolves to gpt-4.1
|
||||||
|
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||||
|
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||||
|
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||||
|
|
||||||
# Test case insensitive resolution
|
# Test case insensitive resolution
|
||||||
assert provider._resolve_model_name("Mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
assert provider._resolve_model_name("Mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
||||||
assert provider._resolve_model_name("O3MINI") == "o3-mini"
|
assert provider._resolve_model_name("O3MINI") == "o3-mini"
|
||||||
|
assert provider._resolve_model_name("Gpt5.1") == "gpt-5.1"
|
||||||
|
|
||||||
def test_xai_provider_aliases(self):
|
def test_xai_provider_aliases(self):
|
||||||
"""Test XAI provider's alias structure."""
|
"""Test XAI provider's alias structure."""
|
||||||
|
|||||||
Reference in New Issue
Block a user