diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index 2da361d..5c93a3c 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -571,9 +571,10 @@ class OpenAICompatibleProvider(ModelProvider): continue # Skip unsupported parameters for reasoning models completion_params[key] = value - # Check if this is o3-pro and needs the responses endpoint - if resolved_model == "o3-pro": - # This model requires the /v1/responses endpoint + # Check if this model needs the Responses API endpoint + # Both o3-pro and gpt-5-codex use the new Responses API + if resolved_model in ["o3-pro", "gpt-5-codex"]: + # These models require the /v1/responses endpoint for stateful context # If it fails, we should not fall back to chat/completions return self._generate_with_responses_endpoint( model_name=resolved_model, diff --git a/providers/openai_provider.py b/providers/openai_provider.py index b118d9c..99cf722 100644 --- a/providers/openai_provider.py +++ b/providers/openai_provider.py @@ -174,6 +174,25 @@ class OpenAIModelProvider(OpenAICompatibleProvider): description="GPT-4.1 (1M context) - Advanced reasoning model with large context window", aliases=["gpt4.1"], ), + "gpt-5-codex": ModelCapabilities( + provider=ProviderType.OPENAI, + model_name="gpt-5-codex", + friendly_name="OpenAI (GPT-5 Codex)", + intelligence_score=17, # Higher than GPT-5 for coding tasks + context_window=400_000, # 400K tokens (same as GPT-5) + max_output_tokens=128_000, # 128K output tokens + supports_extended_thinking=True, # Responses API supports reasoning tokens + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, # Enhanced for agentic software engineering + supports_json_mode=True, + supports_images=True, # Screenshots, wireframes, diagrams + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=True, + temperature_constraint=TemperatureConstraint.create("range"), + description="GPT-5 Codex (400K context) - Uses Responses API for 40-80% cost savings. Specialized for coding, refactoring, and software architecture. 3% better performance on SWE-bench.", + aliases=["gpt5-codex", "codex", "gpt-5-code", "gpt5-code"], + ), } def __init__(self, api_key: str, **kwargs): @@ -290,15 +309,18 @@ class OpenAIModelProvider(OpenAICompatibleProvider): if category == ToolModelCategory.EXTENDED_REASONING: # Prefer models with extended thinking support - preferred = find_first(["o3", "o3-pro", "gpt-5"]) + # GPT-5-Codex first for coding tasks (uses Responses API with 40-80% cost savings) + preferred = find_first(["gpt-5-codex", "o3", "o3-pro", "gpt-5"]) return preferred if preferred else allowed_models[0] elif category == ToolModelCategory.FAST_RESPONSE: # Prefer fast, cost-efficient models - preferred = find_first(["gpt-5", "gpt-5-mini", "o4-mini", "o3-mini"]) + # GPT-5 models for speed, GPT-5-Codex after (premium pricing but cached) + preferred = find_first(["gpt-5", "gpt-5-mini", "gpt-5-codex", "o4-mini", "o3-mini"]) return preferred if preferred else allowed_models[0] else: # BALANCED or default # Prefer balanced performance/cost models - preferred = find_first(["gpt-5", "gpt-5-mini", "o4-mini", "o3-mini"]) + # Include GPT-5-Codex for coding workflows + preferred = find_first(["gpt-5", "gpt-5-codex", "gpt-5-mini", "o4-mini", "o3-mini"]) return preferred if preferred else allowed_models[0] diff --git a/tests/test_auto_mode_comprehensive.py b/tests/test_auto_mode_comprehensive.py index 9326207..56cc9e7 100644 --- a/tests/test_auto_mode_comprehensive.py +++ b/tests/test_auto_mode_comprehensive.py @@ -93,7 +93,7 @@ class TestAutoModeComprehensive: "OPENROUTER_API_KEY": None, }, { - "EXTENDED_REASONING": "o3", # O3 for deep reasoning + "EXTENDED_REASONING": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks "FAST_RESPONSE": "gpt-5", # Prefer gpt-5 for speed "BALANCED": "gpt-5", # Prefer gpt-5 for balanced }, diff --git a/tests/test_auto_mode_provider_selection.py b/tests/test_auto_mode_provider_selection.py index d59e71c..4e8666d 100644 --- a/tests/test_auto_mode_provider_selection.py +++ b/tests/test_auto_mode_provider_selection.py @@ -98,7 +98,7 @@ class TestAutoModeProviderSelection: balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED) # Should select appropriate OpenAI models based on new preference order - assert extended_reasoning == "o3" # O3 for extended reasoning + assert extended_reasoning == "gpt-5-codex" # GPT-5-Codex prioritized for extended reasoning assert fast_response == "gpt-5" # gpt-5 comes first in fast response preference assert balanced == "gpt-5" # gpt-5 for balanced diff --git a/tests/test_per_tool_model_defaults.py b/tests/test_per_tool_model_defaults.py index 1099dbf..dd2af1d 100644 --- a/tests/test_per_tool_model_defaults.py +++ b/tests/test_per_tool_model_defaults.py @@ -95,8 +95,8 @@ class TestModelSelection: ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING) - # OpenAI prefers o3 for extended reasoning - assert model == "o3" + # OpenAI prefers GPT-5-Codex for extended reasoning (coding tasks) + assert model == "gpt-5-codex" def test_extended_reasoning_with_gemini_only(self): """Test EXTENDED_REASONING prefers pro when only Gemini is available.""" @@ -192,7 +192,7 @@ class TestFlexibleModelSelection: "env": {"OPENAI_API_KEY": "test-key"}, "provider_type": ProviderType.OPENAI, "category": ToolModelCategory.EXTENDED_REASONING, - "expected": "o3", + "expected": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks }, # Case 2: Gemini provider for fast response {