diff --git a/conf/azure_models.json b/conf/azure_models.json index e1e3a3f..35d6e74 100644 --- a/conf/azure_models.json +++ b/conf/azure_models.json @@ -17,6 +17,7 @@ "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", + "use_openai_response_api": "Set to true when the deployment must call Azure's /responses endpoint (O-series reasoning models). Leave false/omit for standard chat completions.", "description": "Human-readable description of the model", "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" } @@ -37,6 +38,7 @@ "max_image_size_mb": 0.0, "supports_temperature": false, "temperature_constraint": "fixed", + "use_openai_response_api": false, "description": "GPT-4 (128K context, 16K output)", "intelligence_score": 10 } diff --git a/docs/azure_openai.md b/docs/azure_openai.md index d4f6d2f..57463fa 100644 --- a/docs/azure_openai.md +++ b/docs/azure_openai.md @@ -16,7 +16,7 @@ Without the key and endpoint the provider is skipped entirely. Leave the key bla ## 2. Define Deployments in `conf/azure_models.json` -Azure models live in `conf/azure_models.json` (or the file pointed to by `AZURE_MODELS_CONFIG_PATH`). Each entry follows the same schema as [`ModelCapabilities`](../providers/shared/model_capabilities.py) with one additional required key: `deployment`. This field must exactly match the deployment name shown in the Azure Portal (for example `prod-gpt4o`). The provider routes requests by that value, so omitting it or using the wrong name will cause the server to skip the model. +Azure models live in `conf/azure_models.json` (or the file pointed to by `AZURE_MODELS_CONFIG_PATH`). Each entry follows the same schema as [`ModelCapabilities`](../providers/shared/model_capabilities.py) with one additional required key: `deployment`. This field must exactly match the deployment name shown in the Azure Portal (for example `prod-gpt4o`). The provider routes requests by that value, so omitting it or using the wrong name will cause the server to skip the model. You can also opt into extra behaviour per model—for example set `use_openai_response_api` to `true` when an Azure deployment requires the `/responses` endpoint (O-series reasoning models), or leave it unset for standard chat completions. ```json { @@ -30,7 +30,8 @@ Azure models live in `conf/azure_models.json` (or the file pointed to by `AZURE_ "max_output_tokens": 128000, "supports_temperature": false, "temperature_constraint": "fixed", - "aliases": ["gpt4o-eu"] + "aliases": ["gpt4o-eu"], + "use_openai_response_api": false } ] } @@ -41,6 +42,7 @@ Tips: - Copy `conf/azure_models.json` into your repo and commit it, or point `AZURE_MODELS_CONFIG_PATH` at a custom path. - Add one object per deployment. Aliases are optional but help when you want short names like `gpt4o-eu`. - All capability fields are optional except `model_name`, `deployment`, and `friendly_name`. Anything you omit falls back to conservative defaults. +- Set `use_openai_response_api` to `true` for models that must call Azure's `/responses` endpoint (for example O3 deployments). Leave it unset for standard chat completions. ## 3. Optional Restrictions diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index 168549f..ed580e6 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -597,8 +597,16 @@ class OpenAICompatibleProvider(ModelProvider): completion_params[key] = value # Check if this model needs the Responses API endpoint - # Both o3-pro and gpt-5-codex use the new Responses API - if resolved_model in ["o3-pro", "gpt-5-codex"]: + # Prefer capability metadata; fall back to static map when capabilities unavailable + use_responses_api = False + if capabilities is not None: + use_responses_api = getattr(capabilities, "use_openai_response_api", False) + else: + static_capabilities = self.get_all_model_capabilities().get(resolved_model) + if static_capabilities is not None: + use_responses_api = getattr(static_capabilities, "use_openai_response_api", False) + + if use_responses_api: # These models require the /v1/responses endpoint for stateful context # If it fails, we should not fall back to chat/completions return self._generate_with_responses_endpoint( diff --git a/providers/openai_provider.py b/providers/openai_provider.py index 5b9e53e..fedb7f5 100644 --- a/providers/openai_provider.py +++ b/providers/openai_provider.py @@ -135,6 +135,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): temperature_constraint=TemperatureConstraint.create("fixed"), description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.", aliases=["o3pro"], + use_openai_response_api=True, ), "o4-mini": ModelCapabilities( provider=ProviderType.OPENAI, @@ -191,6 +192,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): temperature_constraint=TemperatureConstraint.create("range"), description="GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.", aliases=["gpt5-codex", "codex", "gpt-5-code", "gpt5-code"], + use_openai_response_api=True, ), } diff --git a/providers/shared/model_capabilities.py b/providers/shared/model_capabilities.py index cf7e208..e3aa0a1 100644 --- a/providers/shared/model_capabilities.py +++ b/providers/shared/model_capabilities.py @@ -50,6 +50,7 @@ class ModelCapabilities: supports_images: bool = False supports_json_mode: bool = False supports_temperature: bool = True + use_openai_response_api: bool = False # Additional attributes max_image_size_mb: float = 0.0