Merge pull request #331 from BjornMelin/feat/openai-gpt-5.1-support
feat: add OpenAI GPT-5.1 family support
This commit is contained in:
11
.env.example
11
.env.example
@@ -55,7 +55,8 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here
|
|||||||
|
|
||||||
# Optional: Default model to use
|
# Optional: Default model to use
|
||||||
# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
|
# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
|
||||||
# 'gpt-5', 'gpt-5-mini', 'grok', 'opus-4.1', 'sonnet-4.1', or any DIAL model if DIAL is configured
|
# 'gpt-5.1', 'gpt-5.1-codex', 'gpt-5.1-codex-mini', 'gpt-5', 'gpt-5-mini', 'grok',
|
||||||
|
# 'opus-4.1', 'sonnet-4.1', or any DIAL model if DIAL is configured
|
||||||
# When set to 'auto', Claude will select the best model for each task
|
# When set to 'auto', Claude will select the best model for each task
|
||||||
# Defaults to 'auto' if not specified
|
# Defaults to 'auto' if not specified
|
||||||
DEFAULT_MODEL=auto
|
DEFAULT_MODEL=auto
|
||||||
@@ -79,12 +80,15 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
|
|||||||
# If you want to disable a provider entirely, don't set its API key
|
# If you want to disable a provider entirely, don't set its API key
|
||||||
#
|
#
|
||||||
# Supported OpenAI models:
|
# Supported OpenAI models:
|
||||||
|
# - gpt-5.1 (400K context, 128K output, reasoning tokens, streaming enabled)
|
||||||
|
# - gpt-5.1-codex (400K context, 128K output, coding specialization, Responses API only)
|
||||||
|
# - gpt-5.1-codex-mini (400K context, 128K output, cost-efficient Codex with streaming)
|
||||||
|
# - gpt-5 (400K context, 128K output, reasoning tokens)
|
||||||
|
# - gpt-5-mini (400K context, 128K output, reasoning tokens)
|
||||||
# - o3 (200K context, high reasoning)
|
# - o3 (200K context, high reasoning)
|
||||||
# - o3-mini (200K context, balanced)
|
# - o3-mini (200K context, balanced)
|
||||||
# - o4-mini (200K context, latest balanced, temperature=1.0 only)
|
# - o4-mini (200K context, latest balanced, temperature=1.0 only)
|
||||||
# - o4-mini-high (200K context, enhanced reasoning, temperature=1.0 only)
|
# - o4-mini-high (200K context, enhanced reasoning, temperature=1.0 only)
|
||||||
# - gpt-5 (400K context, 128K output, reasoning tokens)
|
|
||||||
# - gpt-5-mini (400K context, 128K output, reasoning tokens)
|
|
||||||
# - mini (shorthand for o4-mini)
|
# - mini (shorthand for o4-mini)
|
||||||
#
|
#
|
||||||
# Supported Google/Gemini models:
|
# Supported Google/Gemini models:
|
||||||
@@ -122,6 +126,7 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
|
|||||||
#
|
#
|
||||||
# Examples:
|
# Examples:
|
||||||
# OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini # Only allow mini models (cost control)
|
# OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini # Only allow mini models (cost control)
|
||||||
|
# OPENAI_ALLOWED_MODELS=gpt-5.1,gpt-5.1-codex # Pin to GPT-5.1 family
|
||||||
# GOOGLE_ALLOWED_MODELS=flash # Only allow Flash (fast responses)
|
# GOOGLE_ALLOWED_MODELS=flash # Only allow Flash (fast responses)
|
||||||
# XAI_ALLOWED_MODELS=grok-3 # Only allow standard GROK (not fast variant)
|
# XAI_ALLOWED_MODELS=grok-3 # Only allow standard GROK (not fast variant)
|
||||||
# OPENAI_ALLOWED_MODELS=o4-mini # Single model standardization
|
# OPENAI_ALLOWED_MODELS=o4-mini # Single model standardization
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -183,6 +183,7 @@ CLAUDE.local.md
|
|||||||
.docker_cleaned
|
.docker_cleaned
|
||||||
logs/
|
logs/
|
||||||
*.backup
|
*.backup
|
||||||
|
*.backup-*.json
|
||||||
/.desktop_configured
|
/.desktop_configured
|
||||||
|
|
||||||
/worktrees/
|
/worktrees/
|
||||||
|
|||||||
@@ -232,6 +232,81 @@
|
|||||||
"supports_temperature": true,
|
"supports_temperature": true,
|
||||||
"max_image_size_mb": 20.0,
|
"max_image_size_mb": 20.0,
|
||||||
"use_openai_response_api": true
|
"use_openai_response_api": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-5.1",
|
||||||
|
"friendly_name": "OpenAI (GPT-5.1)",
|
||||||
|
"aliases": [
|
||||||
|
"gpt5.1",
|
||||||
|
"gpt-5.1",
|
||||||
|
"5.1"
|
||||||
|
],
|
||||||
|
"intelligence_score": 18,
|
||||||
|
"description": "GPT-5.1 (400K context, 128K output) - Flagship reasoning model with configurable thinking effort and vision support.",
|
||||||
|
"context_window": 400000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"supports_extended_thinking": true,
|
||||||
|
"supports_system_prompts": true,
|
||||||
|
"supports_streaming": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_json_mode": true,
|
||||||
|
"supports_images": true,
|
||||||
|
"supports_temperature": true,
|
||||||
|
"max_image_size_mb": 20.0,
|
||||||
|
"default_reasoning_effort": "medium",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"temperature_constraint": "fixed"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-5.1-codex",
|
||||||
|
"friendly_name": "OpenAI (GPT-5.1 Codex)",
|
||||||
|
"aliases": [
|
||||||
|
"gpt5.1-codex",
|
||||||
|
"gpt-5.1-codex",
|
||||||
|
"gpt5.1code",
|
||||||
|
"gpt-5.1-code",
|
||||||
|
"codex-5.1"
|
||||||
|
],
|
||||||
|
"intelligence_score": 19,
|
||||||
|
"description": "GPT-5.1 Codex (400K context, 128K output) - Agentic coding specialization available through the Responses API.",
|
||||||
|
"context_window": 400000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"supports_extended_thinking": true,
|
||||||
|
"supports_system_prompts": true,
|
||||||
|
"supports_streaming": false,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_json_mode": true,
|
||||||
|
"supports_images": true,
|
||||||
|
"supports_temperature": true,
|
||||||
|
"max_image_size_mb": 20.0,
|
||||||
|
"use_openai_response_api": true,
|
||||||
|
"default_reasoning_effort": "high",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"temperature_constraint": "fixed"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-5.1-codex-mini",
|
||||||
|
"friendly_name": "OpenAI (GPT-5.1 Codex mini)",
|
||||||
|
"aliases": [
|
||||||
|
"gpt5.1-codex-mini",
|
||||||
|
"gpt-5.1-codex-mini",
|
||||||
|
"codex-mini",
|
||||||
|
"5.1-codex-mini"
|
||||||
|
],
|
||||||
|
"intelligence_score": 16,
|
||||||
|
"description": "GPT-5.1 Codex mini (400K context, 128K output) - Cost-efficient Codex variant with streaming support.",
|
||||||
|
"context_window": 400000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"supports_extended_thinking": true,
|
||||||
|
"supports_system_prompts": true,
|
||||||
|
"supports_streaming": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_json_mode": true,
|
||||||
|
"supports_images": true,
|
||||||
|
"supports_temperature": true,
|
||||||
|
"max_image_size_mb": 20.0,
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"temperature_constraint": "fixed"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -366,6 +366,72 @@
|
|||||||
"description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
|
"description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
|
||||||
"intelligence_score": 8
|
"intelligence_score": 8
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"model_name": "openai/gpt-5.1",
|
||||||
|
"aliases": [
|
||||||
|
"gpt5.1",
|
||||||
|
"gpt-5.1",
|
||||||
|
"5.1"
|
||||||
|
],
|
||||||
|
"context_window": 400000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"supports_extended_thinking": true,
|
||||||
|
"supports_json_mode": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_images": true,
|
||||||
|
"max_image_size_mb": 20.0,
|
||||||
|
"supports_temperature": true,
|
||||||
|
"temperature_constraint": "fixed",
|
||||||
|
"default_reasoning_effort": "medium",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"description": "GPT-5.1 (400K context, 128K output) - Flagship reasoning model with configurable thinking effort and vision support",
|
||||||
|
"intelligence_score": 18
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "openai/gpt-5.1-codex",
|
||||||
|
"aliases": [
|
||||||
|
"gpt5.1-codex",
|
||||||
|
"gpt-5.1-codex",
|
||||||
|
"gpt5.1code",
|
||||||
|
"gpt-5.1-code",
|
||||||
|
"codex-5.1"
|
||||||
|
],
|
||||||
|
"context_window": 400000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"supports_extended_thinking": true,
|
||||||
|
"supports_json_mode": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_images": true,
|
||||||
|
"max_image_size_mb": 20.0,
|
||||||
|
"supports_temperature": true,
|
||||||
|
"temperature_constraint": "fixed",
|
||||||
|
"use_openai_response_api": true,
|
||||||
|
"default_reasoning_effort": "high",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"description": "GPT-5.1 Codex (400K context, 128K output) - Agentic coding specialization available through the Responses API",
|
||||||
|
"intelligence_score": 19
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "openai/gpt-5.1-codex-mini",
|
||||||
|
"aliases": [
|
||||||
|
"gpt5.1-codex-mini",
|
||||||
|
"gpt-5.1-codex-mini",
|
||||||
|
"codex-mini",
|
||||||
|
"5.1-codex-mini"
|
||||||
|
],
|
||||||
|
"context_window": 400000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"supports_extended_thinking": true,
|
||||||
|
"supports_json_mode": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_images": true,
|
||||||
|
"max_image_size_mb": 20.0,
|
||||||
|
"supports_temperature": true,
|
||||||
|
"temperature_constraint": "fixed",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"description": "GPT-5.1 Codex mini (400K context, 128K output) - Cost-efficient Codex variant with streaming support",
|
||||||
|
"intelligence_score": 16
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"model_name": "x-ai/grok-4",
|
"model_name": "x-ai/grok-4",
|
||||||
"aliases": [
|
"aliases": [
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ Regardless of your default configuration, you can specify models per request:
|
|||||||
| **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks |
|
| **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks |
|
||||||
| **`o4-mini`** | OpenAI | 200K tokens | Latest reasoning model | Optimized for shorter contexts |
|
| **`o4-mini`** | OpenAI | 200K tokens | Latest reasoning model | Optimized for shorter contexts |
|
||||||
| **`gpt4.1`** | OpenAI | 1M tokens | Latest GPT-4 with extended context | Large codebase analysis, comprehensive reviews |
|
| **`gpt4.1`** | OpenAI | 1M tokens | Latest GPT-4 with extended context | Large codebase analysis, comprehensive reviews |
|
||||||
|
| **`gpt5.1`** (GPT-5.1) | OpenAI | 400K tokens | Flagship reasoning model with configurable thinking effort | Complex problems, balanced agent/coding flows |
|
||||||
|
| **`gpt5.1-codex`** (GPT-5.1 Codex) | OpenAI | 400K tokens | Agentic coding specialization (Responses API) | Advanced coding tasks, structured code generation |
|
||||||
|
| **`gpt5.1-codex-mini`** (GPT-5.1 Codex mini) | OpenAI | 400K tokens | Cost-efficient Codex variant with streaming | Balanced coding tasks, cost-conscious development |
|
||||||
| **`gpt5`** (GPT-5) | OpenAI | 400K tokens | Advanced model with reasoning support | Complex problems requiring advanced reasoning |
|
| **`gpt5`** (GPT-5) | OpenAI | 400K tokens | Advanced model with reasoning support | Complex problems requiring advanced reasoning |
|
||||||
| **`gpt5-mini`** (GPT-5 Mini) | OpenAI | 400K tokens | Efficient variant with reasoning | Balanced performance and capability |
|
| **`gpt5-mini`** (GPT-5 Mini) | OpenAI | 400K tokens | Efficient variant with reasoning | Balanced performance and capability |
|
||||||
| **`gpt5-nano`** (GPT-5 Nano) | OpenAI | 400K tokens | Fastest, cheapest GPT-5 variant | Summarization and classification tasks |
|
| **`gpt5-nano`** (GPT-5 Nano) | OpenAI | 400K tokens | Fastest, cheapest GPT-5 variant | Summarization and classification tasks |
|
||||||
@@ -61,6 +64,10 @@ cloud models (expensive/powerful) AND local models (free/private) in the same co
|
|||||||
- **Flash Lite 2.0**: Text-only lightweight model (no thinking support)
|
- **Flash Lite 2.0**: Text-only lightweight model (no thinking support)
|
||||||
- **O3/O4 Models**: Excellent reasoning, systematic analysis, 200K context
|
- **O3/O4 Models**: Excellent reasoning, systematic analysis, 200K context
|
||||||
- **GPT-4.1**: Extended context window (1M tokens), general capabilities
|
- **GPT-4.1**: Extended context window (1M tokens), general capabilities
|
||||||
|
- **GPT-5.1 Series**: Latest flagship reasoning models, 400K context
|
||||||
|
- **GPT-5.1**: Flagship model with configurable thinking effort and vision
|
||||||
|
- **GPT-5.1 Codex**: Agentic coding specialization (Responses API, non-streaming)
|
||||||
|
- **GPT-5.1 Codex mini**: Cost-efficient Codex variant with streaming support
|
||||||
- **GPT-5 Series**: Advanced reasoning models, 400K context
|
- **GPT-5 Series**: Advanced reasoning models, 400K context
|
||||||
- **GPT-5**: Full-featured with reasoning support and vision
|
- **GPT-5**: Full-featured with reasoning support and vision
|
||||||
- **GPT-5 Mini**: Balanced efficiency and capability
|
- **GPT-5 Mini**: Balanced efficiency and capability
|
||||||
@@ -161,7 +168,7 @@ All tools that work with files support **both individual files and entire direct
|
|||||||
**`analyze`** - Analyze files or directories
|
**`analyze`** - Analyze files or directories
|
||||||
- `files`: List of file paths or directories (required)
|
- `files`: List of file paths or directories (required)
|
||||||
- `question`: What to analyze (required)
|
- `question`: What to analyze (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `analysis_type`: architecture|performance|security|quality|general
|
- `analysis_type`: architecture|performance|security|quality|general
|
||||||
- `output_format`: summary|detailed|actionable
|
- `output_format`: summary|detailed|actionable
|
||||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||||
@@ -176,7 +183,7 @@ All tools that work with files support **both individual files and entire direct
|
|||||||
|
|
||||||
**`codereview`** - Review code files or directories
|
**`codereview`** - Review code files or directories
|
||||||
- `files`: List of file paths or directories (required)
|
- `files`: List of file paths or directories (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `review_type`: full|security|performance|quick
|
- `review_type`: full|security|performance|quick
|
||||||
- `focus_on`: Specific aspects to focus on
|
- `focus_on`: Specific aspects to focus on
|
||||||
- `standards`: Coding standards to enforce
|
- `standards`: Coding standards to enforce
|
||||||
@@ -192,7 +199,7 @@ All tools that work with files support **both individual files and entire direct
|
|||||||
|
|
||||||
**`debug`** - Debug with file context
|
**`debug`** - Debug with file context
|
||||||
- `error_description`: Description of the issue (required)
|
- `error_description`: Description of the issue (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `error_context`: Stack trace or logs
|
- `error_context`: Stack trace or logs
|
||||||
- `files`: Files or directories related to the issue
|
- `files`: Files or directories related to the issue
|
||||||
- `runtime_info`: Environment details
|
- `runtime_info`: Environment details
|
||||||
@@ -208,7 +215,7 @@ All tools that work with files support **both individual files and entire direct
|
|||||||
|
|
||||||
**`thinkdeep`** - Extended analysis with file context
|
**`thinkdeep`** - Extended analysis with file context
|
||||||
- `current_analysis`: Your current thinking (required)
|
- `current_analysis`: Your current thinking (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `problem_context`: Additional context
|
- `problem_context`: Additional context
|
||||||
- `focus_areas`: Specific aspects to focus on
|
- `focus_areas`: Specific aspects to focus on
|
||||||
- `files`: Files or directories for context
|
- `files`: Files or directories for context
|
||||||
@@ -224,7 +231,7 @@ All tools that work with files support **both individual files and entire direct
|
|||||||
**`testgen`** - Comprehensive test generation with edge case coverage
|
**`testgen`** - Comprehensive test generation with edge case coverage
|
||||||
- `files`: Code files or directories to generate tests for (required)
|
- `files`: Code files or directories to generate tests for (required)
|
||||||
- `prompt`: Description of what to test, testing objectives, and scope (required)
|
- `prompt`: Description of what to test, testing objectives, and scope (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `test_examples`: Optional existing test files as style/pattern reference
|
- `test_examples`: Optional existing test files as style/pattern reference
|
||||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||||
|
|
||||||
@@ -239,7 +246,7 @@ All tools that work with files support **both individual files and entire direct
|
|||||||
- `files`: Code files or directories to analyze for refactoring opportunities (required)
|
- `files`: Code files or directories to analyze for refactoring opportunities (required)
|
||||||
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
|
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
|
||||||
- `refactor_type`: codesmells|decompose|modernize|organization (required)
|
- `refactor_type`: codesmells|decompose|modernize|organization (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
|
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
|
||||||
- `style_guide_examples`: Optional existing code files to use as style/pattern reference
|
- `style_guide_examples`: Optional existing code files to use as style/pattern reference
|
||||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model
|
|||||||
|
|
||||||
**Default Model Selection:**
|
**Default Model Selection:**
|
||||||
```env
|
```env
|
||||||
# Options: 'auto', 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', etc.
|
# Options: 'auto', 'pro', 'flash', 'gpt5.1', 'gpt5.1-codex', 'gpt5.1-codex-mini', 'o3', 'o3-mini', 'o4-mini', etc.
|
||||||
DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -81,12 +81,14 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
|||||||
|
|
||||||
| Provider | Canonical Models | Notable Aliases |
|
| Provider | Canonical Models | Notable Aliases |
|
||||||
|----------|-----------------|-----------------|
|
|----------|-----------------|-----------------|
|
||||||
| OpenAI | `gpt-5`, `gpt-5-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` |
|
| OpenAI | `gpt-5.1`, `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5`, `gpt-5-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5.1`, `gpt-5.1`, `5.1`, `gpt5.1-codex`, `codex-5.1`, `codex-mini`, `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` |
|
||||||
| Gemini | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-2.0-flash-lite` | `pro`, `gemini-pro`, `flash`, `flash-2.0`, `flashlite` |
|
| Gemini | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-2.0-flash-lite` | `pro`, `gemini-pro`, `flash`, `flash-2.0`, `flashlite` |
|
||||||
| X.AI | `grok-4`, `grok-3`, `grok-3-fast` | `grok`, `grok4`, `grok3`, `grok3fast`, `grokfast` |
|
| X.AI | `grok-4`, `grok-3`, `grok-3-fast` | `grok`, `grok4`, `grok3`, `grok3fast`, `grokfast` |
|
||||||
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
|
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
|
||||||
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
|
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
|
||||||
|
|
||||||
|
Latest OpenAI entries (`gpt-5.1`, `gpt-5.1-codex`, `gpt-5.1-codex-mini`) mirror the official model cards released on November 13, 2025: all three expose 400K-token contexts with 128K-token outputs, reasoning-token support, and multimodal inputs. `gpt-5.1-codex` is Responses-only with streaming disabled, while the base `gpt-5.1` and Codex mini support streaming along with full code-generation flags. Update your manifests if you run custom deployments so these capability bits stay accurate.
|
||||||
|
|
||||||
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support, code generation) without editing Python.
|
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support, code generation) without editing Python.
|
||||||
|
|
||||||
### Code Generation Capability
|
### Code Generation Capability
|
||||||
@@ -105,7 +107,7 @@ The `allow_code_generation` capability enables models to generate complete, prod
|
|||||||
|
|
||||||
**When to Enable:**
|
**When to Enable:**
|
||||||
|
|
||||||
- **Enable for**: Models MORE capable than your primary CLI's model (e.g., GPT-5, GPT-5 Pro when using Claude Code with Sonnet 4.5)
|
- **Enable for**: Models MORE capable than your primary CLI's model (e.g., GPT-5.1 Codex, GPT-5 Pro, GPT-5.1 when using Claude Code with Sonnet 4.5)
|
||||||
- **Purpose**: Get complete implementations from a more powerful reasoning model that your primary CLI can then review and apply
|
- **Purpose**: Get complete implementations from a more powerful reasoning model that your primary CLI can then review and apply
|
||||||
- **Use case**: Large-scale implementations, major refactoring, complete module creation
|
- **Use case**: Large-scale implementations, major refactoring, complete module creation
|
||||||
|
|
||||||
@@ -169,7 +171,7 @@ Control which models can be used from each provider for cost control, compliance
|
|||||||
# Empty or unset = all models allowed (default)
|
# Empty or unset = all models allowed (default)
|
||||||
|
|
||||||
# OpenAI model restrictions
|
# OpenAI model restrictions
|
||||||
OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini
|
OPENAI_ALLOWED_MODELS=gpt-5.1-codex-mini,gpt-5-mini,o3-mini,o4-mini,mini
|
||||||
|
|
||||||
# Gemini model restrictions
|
# Gemini model restrictions
|
||||||
GOOGLE_ALLOWED_MODELS=flash,pro
|
GOOGLE_ALLOWED_MODELS=flash,pro
|
||||||
@@ -193,12 +195,17 @@ OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
|
|||||||
OPENAI_ALLOWED_MODELS=o4-mini
|
OPENAI_ALLOWED_MODELS=o4-mini
|
||||||
GOOGLE_ALLOWED_MODELS=flash
|
GOOGLE_ALLOWED_MODELS=flash
|
||||||
|
|
||||||
|
# High-performance setup
|
||||||
|
OPENAI_ALLOWED_MODELS=gpt-5.1-codex,gpt-5.1
|
||||||
|
GOOGLE_ALLOWED_MODELS=pro
|
||||||
|
|
||||||
# Single model standardization
|
# Single model standardization
|
||||||
OPENAI_ALLOWED_MODELS=o4-mini
|
OPENAI_ALLOWED_MODELS=o4-mini
|
||||||
GOOGLE_ALLOWED_MODELS=pro
|
GOOGLE_ALLOWED_MODELS=pro
|
||||||
|
|
||||||
# Balanced selection
|
# Balanced selection
|
||||||
GOOGLE_ALLOWED_MODELS=flash,pro
|
GOOGLE_ALLOWED_MODELS=flash,pro
|
||||||
|
OPENAI_ALLOWED_MODELS=gpt-5.1-codex-mini,gpt-5-mini,o4-mini
|
||||||
XAI_ALLOWED_MODELS=grok,grok-3-fast
|
XAI_ALLOWED_MODELS=grok,grok-3-fast
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -240,6 +247,8 @@ LOG_LEVEL=DEBUG # Default: shows detailed operational messages
|
|||||||
DEFAULT_MODEL=auto
|
DEFAULT_MODEL=auto
|
||||||
GEMINI_API_KEY=your-gemini-key
|
GEMINI_API_KEY=your-gemini-key
|
||||||
OPENAI_API_KEY=your-openai-key
|
OPENAI_API_KEY=your-openai-key
|
||||||
|
GOOGLE_ALLOWED_MODELS=flash,pro
|
||||||
|
OPENAI_ALLOWED_MODELS=gpt-5.1-codex-mini,gpt-5-mini,o4-mini
|
||||||
XAI_API_KEY=your-xai-key
|
XAI_API_KEY=your-xai-key
|
||||||
LOG_LEVEL=DEBUG
|
LOG_LEVEL=DEBUG
|
||||||
CONVERSATION_TIMEOUT_HOURS=1
|
CONVERSATION_TIMEOUT_HOURS=1
|
||||||
@@ -252,7 +261,7 @@ DEFAULT_MODEL=auto
|
|||||||
GEMINI_API_KEY=your-gemini-key
|
GEMINI_API_KEY=your-gemini-key
|
||||||
OPENAI_API_KEY=your-openai-key
|
OPENAI_API_KEY=your-openai-key
|
||||||
GOOGLE_ALLOWED_MODELS=flash
|
GOOGLE_ALLOWED_MODELS=flash
|
||||||
OPENAI_ALLOWED_MODELS=o4-mini
|
OPENAI_ALLOWED_MODELS=gpt-5.1-codex-mini,o4-mini
|
||||||
LOG_LEVEL=INFO
|
LOG_LEVEL=INFO
|
||||||
CONVERSATION_TIMEOUT_HOURS=3
|
CONVERSATION_TIMEOUT_HOURS=3
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -61,6 +61,9 @@ The curated defaults in `conf/openrouter_models.json` include popular entries su
|
|||||||
| `llama3` | `meta-llama/llama-3-70b` | Large open-weight text model |
|
| `llama3` | `meta-llama/llama-3-70b` | Large open-weight text model |
|
||||||
| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model |
|
| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model |
|
||||||
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | Search-augmented model |
|
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | Search-augmented model |
|
||||||
|
| `gpt5.1`, `gpt-5.1`, `5.1` | `openai/gpt-5.1` | Flagship GPT-5.1 with reasoning and vision |
|
||||||
|
| `gpt5.1-codex`, `codex-5.1` | `openai/gpt-5.1-codex` | Agentic coding specialization (Responses API) |
|
||||||
|
| `codex-mini`, `gpt5.1-codex-mini` | `openai/gpt-5.1-codex-mini` | Cost-efficient Codex variant with streaming |
|
||||||
|
|
||||||
Consult the JSON file for the full list, aliases, and capability flags. Add new entries as OpenRouter releases additional models.
|
Consult the JSON file for the full list, aliases, and capability flags. Add new entries as OpenRouter releases additional models.
|
||||||
|
|
||||||
@@ -78,6 +81,18 @@ Native catalogues (`conf/openai_models.json`, `conf/gemini_models.json`, `conf/x
|
|||||||
- Advertise support for JSON mode or vision if the upstream provider adds it
|
- Advertise support for JSON mode or vision if the upstream provider adds it
|
||||||
- Adjust token limits when providers increase context windows
|
- Adjust token limits when providers increase context windows
|
||||||
|
|
||||||
|
### Latest OpenAI releases
|
||||||
|
|
||||||
|
OpenAI's November 13, 2025 drop introduced `gpt-5.1`, `gpt-5.1-codex`, and `gpt-5.1-codex-mini`, all of which now ship in `conf/openai_models.json`:
|
||||||
|
|
||||||
|
| Model | Highlights | Notes |
|
||||||
|
|-------|------------|-------|
|
||||||
|
| `gpt-5.1` | 400K context, 128K output, multimodal IO, configurable reasoning effort | Streaming enabled; use for balanced agent/coding flows |
|
||||||
|
| `gpt-5.1-codex` | Responses-only agentic coding version of GPT-5.1 | Streaming disabled; `use_openai_response_api=true`; `allow_code_generation=true` |
|
||||||
|
| `gpt-5.1-codex-mini` | Cost-efficient Codex variant | Streaming enabled, retains 400K context and code-generation flag |
|
||||||
|
|
||||||
|
These entries include pricing-friendly aliases (`gpt5.1`, `codex-5.1`, `codex-mini`) plus updated capability flags (`supports_extended_thinking`, `allow_code_generation`). Copy the manifest if you operate custom deployment names so downstream providers inherit the same metadata.
|
||||||
|
|
||||||
Because providers load the manifests on import, you can tweak capabilities without touching Python. Restart the server after editing the JSON files so changes are picked up.
|
Because providers load the manifests on import, you can tweak capabilities without touching Python. Restart the server after editing the JSON files so changes are picked up.
|
||||||
|
|
||||||
To control ordering in auto mode or the `listmodels` summary, adjust the
|
To control ordering in auto mode or the `listmodels` summary, adjust the
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ You need at least one API key. Choose based on your needs:
|
|||||||
|
|
||||||
**OpenAI:**
|
**OpenAI:**
|
||||||
- Visit [OpenAI Platform](https://platform.openai.com/api-keys)
|
- Visit [OpenAI Platform](https://platform.openai.com/api-keys)
|
||||||
- Generate an API key for O3, GPT-5 access
|
- Generate an API key for GPT-5.1, GPT-5.1-Codex, GPT-5, O3 access
|
||||||
|
|
||||||
**X.AI (Grok):**
|
**X.AI (Grok):**
|
||||||
- Visit [X.AI Console](https://console.x.ai/)
|
- Visit [X.AI Console](https://console.x.ai/)
|
||||||
@@ -287,7 +287,7 @@ Add your API keys (at least one required):
|
|||||||
```env
|
```env
|
||||||
# Choose your providers (at least one required)
|
# Choose your providers (at least one required)
|
||||||
GEMINI_API_KEY=your-gemini-api-key-here # For Gemini models
|
GEMINI_API_KEY=your-gemini-api-key-here # For Gemini models
|
||||||
OPENAI_API_KEY=your-openai-api-key-here # For O3, GPT-5
|
OPENAI_API_KEY=your-openai-api-key-here # For GPT-5.1, GPT-5.1-Codex, O3
|
||||||
XAI_API_KEY=your-xai-api-key-here # For Grok models
|
XAI_API_KEY=your-xai-api-key-here # For Grok models
|
||||||
OPENROUTER_API_KEY=your-openrouter-key # For multiple models
|
OPENROUTER_API_KEY=your-openrouter-key # For multiple models
|
||||||
|
|
||||||
@@ -498,7 +498,7 @@ DEFAULT_MODEL=auto
|
|||||||
GEMINI_API_KEY=your-key
|
GEMINI_API_KEY=your-key
|
||||||
OPENAI_API_KEY=your-key
|
OPENAI_API_KEY=your-key
|
||||||
GOOGLE_ALLOWED_MODELS=flash,pro
|
GOOGLE_ALLOWED_MODELS=flash,pro
|
||||||
OPENAI_ALLOWED_MODELS=o4-mini,o3-mini
|
OPENAI_ALLOWED_MODELS=gpt-5.1-codex-mini,gpt-5-mini,o4-mini
|
||||||
```
|
```
|
||||||
|
|
||||||
### Cost-Optimized Setup
|
### Cost-Optimized Setup
|
||||||
@@ -514,7 +514,7 @@ DEFAULT_MODEL=auto
|
|||||||
GEMINI_API_KEY=your-key
|
GEMINI_API_KEY=your-key
|
||||||
OPENAI_API_KEY=your-key
|
OPENAI_API_KEY=your-key
|
||||||
GOOGLE_ALLOWED_MODELS=pro
|
GOOGLE_ALLOWED_MODELS=pro
|
||||||
OPENAI_ALLOWED_MODELS=o3
|
OPENAI_ALLOWED_MODELS=gpt-5.1-codex,gpt-5.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### Local-First Setup
|
### Local-First Setup
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ A straightforward rubric that mirrors typical provider tiers:
|
|||||||
|
|
||||||
| Intelligence | Guidance |
|
| Intelligence | Guidance |
|
||||||
|--------------|----------|
|
|--------------|----------|
|
||||||
| 18–19 | Frontier reasoning models (Gemini 2.5 Pro, GPT‑5) |
|
| 18–19 | Frontier reasoning models (Gemini 2.5 Pro, GPT‑5.1 Codex, GPT‑5.1, GPT‑5) |
|
||||||
| 15–17 | Strong general models with large context (O3 Pro, DeepSeek R1) |
|
| 15–17 | Strong general models with large context (O3 Pro, DeepSeek R1) |
|
||||||
| 12–14 | Balanced assistants (Claude Opus/Sonnet, Mistral Large) |
|
| 12–14 | Balanced assistants (Claude Opus/Sonnet, Mistral Large) |
|
||||||
| 9–11 | Fast distillations (Gemini Flash, GPT-5 Mini, Mistral medium) |
|
| 9–11 | Fast distillations (Gemini Flash, GPT-5 Mini, Mistral medium) |
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ This workflow ensures methodical analysis before expert insights, resulting in d
|
|||||||
|
|
||||||
**Initial Configuration (used in step 1):**
|
**Initial Configuration (used in step 1):**
|
||||||
- `prompt`: What to analyze or look for (required)
|
- `prompt`: What to analyze or look for (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `analysis_type`: architecture|performance|security|quality|general (default: general)
|
- `analysis_type`: architecture|performance|security|quality|general (default: general)
|
||||||
- `output_format`: summary|detailed|actionable (default: detailed)
|
- `output_format`: summary|detailed|actionable (default: detailed)
|
||||||
- `temperature`: Temperature for analysis (0-1, default 0.2)
|
- `temperature`: Temperature for analysis (0-1, default 0.2)
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ word verdict in the end.
|
|||||||
## Tool Parameters
|
## Tool Parameters
|
||||||
|
|
||||||
- `prompt`: Your question or discussion topic (required)
|
- `prompt`: Your question or discussion topic (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `absolute_file_paths`: Optional absolute file or directory paths for additional context
|
- `absolute_file_paths`: Optional absolute file or directory paths for additional context
|
||||||
- `images`: Optional images for visual context (absolute paths)
|
- `images`: Optional images for visual context (absolute paths)
|
||||||
- `working_directory_absolute_path`: **Required** - Absolute path to an existing directory where generated code artifacts will be saved
|
- `working_directory_absolute_path`: **Required** - Absolute path to an existing directory where generated code artifacts will be saved
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ The above prompt will simultaneously run two separate `codereview` tools with tw
|
|||||||
|
|
||||||
**Initial Review Configuration (used in step 1):**
|
**Initial Review Configuration (used in step 1):**
|
||||||
- `prompt`: User's summary of what the code does, expected behavior, constraints, and review objectives (required)
|
- `prompt`: User's summary of what the code does, expected behavior, constraints, and review objectives (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `review_type`: full|security|performance|quick (default: full)
|
- `review_type`: full|security|performance|quick (default: full)
|
||||||
- `focus_on`: Specific aspects to focus on (e.g., "security vulnerabilities", "performance bottlenecks")
|
- `focus_on`: Specific aspects to focus on (e.g., "security vulnerabilities", "performance bottlenecks")
|
||||||
- `standards`: Coding standards to enforce (e.g., "PEP8", "ESLint", "Google Style Guide")
|
- `standards`: Coding standards to enforce (e.g., "PEP8", "ESLint", "Google Style Guide")
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ This structured approach ensures Claude performs methodical groundwork before ex
|
|||||||
- `images`: Visual debugging materials (error screenshots, logs, etc.)
|
- `images`: Visual debugging materials (error screenshots, logs, etc.)
|
||||||
|
|
||||||
**Model Selection:**
|
**Model Selection:**
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||||
- `use_assistant_model`: Whether to use expert analysis phase (default: true, set to false to use Claude only)
|
- `use_assistant_model`: Whether to use expert analysis phase (default: true, set to false to use Claude only)
|
||||||
|
|
||||||
|
|||||||
@@ -140,7 +140,7 @@ Use zen and perform a thorough precommit ensuring there aren't any new regressio
|
|||||||
**Initial Configuration (used in step 1):**
|
**Initial Configuration (used in step 1):**
|
||||||
- `path`: Starting directory to search for repos (REQUIRED for step 1, must be absolute path)
|
- `path`: Starting directory to search for repos (REQUIRED for step 1, must be absolute path)
|
||||||
- `prompt`: The original user request description for the changes (required for context)
|
- `prompt`: The original user request description for the changes (required for context)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `compare_to`: Compare against a branch/tag instead of local changes (optional)
|
- `compare_to`: Compare against a branch/tag instead of local changes (optional)
|
||||||
- `severity_filter`: critical|high|medium|low|all (default: all)
|
- `severity_filter`: critical|high|medium|low|all (default: all)
|
||||||
- `include_staged`: Include staged changes in the review (default: true)
|
- `include_staged`: Include staged changes in the review (default: true)
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ This results in Claude first performing its own expert analysis, encouraging it
|
|||||||
**Initial Configuration (used in step 1):**
|
**Initial Configuration (used in step 1):**
|
||||||
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
|
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
|
||||||
- `refactor_type`: codesmells|decompose|modernize|organization (default: codesmells)
|
- `refactor_type`: codesmells|decompose|modernize|organization (default: codesmells)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
|
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
|
||||||
- `style_guide_examples`: Optional existing code files to use as style/pattern reference (absolute paths)
|
- `style_guide_examples`: Optional existing code files to use as style/pattern reference (absolute paths)
|
||||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ security remediation plan using planner
|
|||||||
- `images`: Architecture diagrams, security documentation, or visual references
|
- `images`: Architecture diagrams, security documentation, or visual references
|
||||||
|
|
||||||
**Initial Security Configuration (used in step 1):**
|
**Initial Security Configuration (used in step 1):**
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `security_scope`: Application context, technology stack, and security boundary definition (required)
|
- `security_scope`: Application context, technology stack, and security boundary definition (required)
|
||||||
- `threat_level`: low|medium|high|critical (default: medium) - determines assessment depth and urgency
|
- `threat_level`: low|medium|high|critical (default: medium) - determines assessment depth and urgency
|
||||||
- `compliance_requirements`: List of compliance frameworks to assess against (e.g., ["PCI DSS", "SOC2"])
|
- `compliance_requirements`: List of compliance frameworks to assess against (e.g., ["PCI DSS", "SOC2"])
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ Test generation excels with extended reasoning models like Gemini Pro or O3, whi
|
|||||||
|
|
||||||
**Initial Configuration (used in step 1):**
|
**Initial Configuration (used in step 1):**
|
||||||
- `prompt`: Description of what to test, testing objectives, and specific scope/focus areas (required)
|
- `prompt`: Description of what to test, testing objectives, and specific scope/focus areas (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `test_examples`: Optional existing test files or directories to use as style/pattern reference (absolute paths)
|
- `test_examples`: Optional existing test files or directories to use as style/pattern reference (absolute paths)
|
||||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||||
- `use_assistant_model`: Whether to use expert test generation phase (default: true, set to false to use Claude only)
|
- `use_assistant_model`: Whether to use expert test generation phase (default: true, set to false to use Claude only)
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ with the best architecture for my project
|
|||||||
## Tool Parameters
|
## Tool Parameters
|
||||||
|
|
||||||
- `prompt`: Your current thinking/analysis to extend and validate (required)
|
- `prompt`: Your current thinking/analysis to extend and validate (required)
|
||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `problem_context`: Additional context about the problem or goal
|
- `problem_context`: Additional context about the problem or goal
|
||||||
- `focus_areas`: Specific aspects to focus on (architecture, performance, security, etc.)
|
- `focus_areas`: Specific aspects to focus on (architecture, performance, security, etc.)
|
||||||
- `files`: Optional file paths or directories for additional context (absolute paths)
|
- `files`: Optional file paths or directories for additional context (absolute paths)
|
||||||
|
|||||||
@@ -115,20 +115,51 @@ class OpenAIModelProvider(RegistryBackedProviderMixin, OpenAICompatibleProvider)
|
|||||||
|
|
||||||
if category == ToolModelCategory.EXTENDED_REASONING:
|
if category == ToolModelCategory.EXTENDED_REASONING:
|
||||||
# Prefer models with extended thinking support
|
# Prefer models with extended thinking support
|
||||||
# GPT-5-Codex first for coding tasks
|
# GPT-5.1 Codex first for coding tasks
|
||||||
preferred = find_first(["gpt-5-codex", "gpt-5-pro", "o3", "o3-pro", "gpt-5"])
|
preferred = find_first(
|
||||||
|
[
|
||||||
|
"gpt-5.1-codex",
|
||||||
|
"gpt-5.1",
|
||||||
|
"gpt-5-codex",
|
||||||
|
"gpt-5-pro",
|
||||||
|
"o3-pro",
|
||||||
|
"gpt-5",
|
||||||
|
"o3",
|
||||||
|
]
|
||||||
|
)
|
||||||
return preferred if preferred else allowed_models[0]
|
return preferred if preferred else allowed_models[0]
|
||||||
|
|
||||||
elif category == ToolModelCategory.FAST_RESPONSE:
|
elif category == ToolModelCategory.FAST_RESPONSE:
|
||||||
# Prefer fast, cost-efficient models
|
# Prefer fast, cost-efficient models
|
||||||
# GPT-5 models for speed, GPT-5-Codex after (premium pricing but cached)
|
# GPT-5.1 models for speed, GPT-5.1-Codex after (premium pricing but cached)
|
||||||
preferred = find_first(["gpt-5", "gpt-5-mini", "gpt-5-codex", "o4-mini", "o3-mini"])
|
preferred = find_first(
|
||||||
|
[
|
||||||
|
"gpt-5.1",
|
||||||
|
"gpt-5.1-codex-mini",
|
||||||
|
"gpt-5",
|
||||||
|
"gpt-5-mini",
|
||||||
|
"gpt-5-codex",
|
||||||
|
"o4-mini",
|
||||||
|
"o3-mini",
|
||||||
|
]
|
||||||
|
)
|
||||||
return preferred if preferred else allowed_models[0]
|
return preferred if preferred else allowed_models[0]
|
||||||
|
|
||||||
else: # BALANCED or default
|
else: # BALANCED or default
|
||||||
# Prefer balanced performance/cost models
|
# Prefer balanced performance/cost models
|
||||||
# Include GPT-5-Codex for coding workflows
|
# Include GPT-5.1 family for latest capabilities
|
||||||
preferred = find_first(["gpt-5", "gpt-5-codex", "gpt-5-pro", "gpt-5-mini", "o4-mini", "o3-mini"])
|
preferred = find_first(
|
||||||
|
[
|
||||||
|
"gpt-5.1",
|
||||||
|
"gpt-5.1-codex",
|
||||||
|
"gpt-5",
|
||||||
|
"gpt-5-codex",
|
||||||
|
"gpt-5-pro",
|
||||||
|
"gpt-5-mini",
|
||||||
|
"o4-mini",
|
||||||
|
"o3-mini",
|
||||||
|
]
|
||||||
|
)
|
||||||
return preferred if preferred else allowed_models[0]
|
return preferred if preferred else allowed_models[0]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -222,10 +222,45 @@ If you encounter issues with cassette testing:
|
|||||||
3. Run semantic matching tests to verify the system
|
3. Run semantic matching tests to verify the system
|
||||||
4. Open an issue if you find a bug in the matching logic
|
4. Open an issue if you find a bug in the matching logic
|
||||||
|
|
||||||
|
## Dual-Model Cassette Coverage
|
||||||
|
|
||||||
|
Some integration tests maintain cassettes for multiple model variants to ensure regression coverage across model families. For example:
|
||||||
|
|
||||||
|
### Consensus Tool Cassettes
|
||||||
|
|
||||||
|
The `test_consensus_integration.py` test uses parameterized fixtures to test both `gpt-5` and `gpt-5.1` models:
|
||||||
|
|
||||||
|
- `tests/openai_cassettes/consensus_step1_gpt5_for.json` - Cassette for gpt-5 model
|
||||||
|
- `tests/openai_cassettes/consensus_step1_gpt51_for.json` - Cassette for gpt-5.1 model
|
||||||
|
|
||||||
|
**When updating consensus cassettes:**
|
||||||
|
|
||||||
|
1. Both cassettes should be updated if the test logic changes
|
||||||
|
2. If only one model's behavior changes, update only that cassette
|
||||||
|
3. The test uses `@pytest.mark.parametrize` to run against both models
|
||||||
|
4. Each cassette path is mapped in the `CONSENSUS_CASSETTES` dictionary
|
||||||
|
|
||||||
|
**To re-record a specific model's cassette:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Delete the specific cassette
|
||||||
|
rm tests/openai_cassettes/consensus_step1_gpt5_for.json
|
||||||
|
|
||||||
|
# Run the test with real API key (it will record for gpt-5)
|
||||||
|
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5] -v
|
||||||
|
|
||||||
|
# Or for gpt-5.1
|
||||||
|
rm tests/openai_cassettes/consensus_step1_gpt51_for.json
|
||||||
|
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5.1] -v
|
||||||
|
```
|
||||||
|
|
||||||
|
This dual-coverage approach ensures that both model families continue to work correctly as the codebase evolves.
|
||||||
|
|
||||||
## Related Files
|
## Related Files
|
||||||
|
|
||||||
- `tests/http_transport_recorder.py` - Cassette recording/replay implementation
|
- `tests/http_transport_recorder.py` - Cassette recording/replay implementation
|
||||||
- `tests/transport_helpers.py` - Helper functions for injecting transports
|
- `tests/transport_helpers.py` - Helper functions for injecting transports
|
||||||
- `tests/test_cassette_semantic_matching.py` - Tests for semantic matching
|
- `tests/test_cassette_semantic_matching.py` - Tests for semantic matching
|
||||||
- `tests/test_o3_pro_output_text_fix.py` - Example of cassette usage
|
- `tests/test_o3_pro_output_text_fix.py` - Example of cassette usage
|
||||||
|
- `tests/test_consensus_integration.py` - Example of dual-model cassette coverage
|
||||||
- `tests/openai_cassettes/` - Directory containing recorded cassettes
|
- `tests/openai_cassettes/` - Directory containing recorded cassettes
|
||||||
|
|||||||
@@ -193,6 +193,7 @@ def disable_force_env_override(monkeypatch):
|
|||||||
monkeypatch.setenv("MAX_CONVERSATION_TURNS", "50")
|
monkeypatch.setenv("MAX_CONVERSATION_TURNS", "50")
|
||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
|
import sys
|
||||||
|
|
||||||
import config
|
import config
|
||||||
import utils.conversation_memory as conversation_memory
|
import utils.conversation_memory as conversation_memory
|
||||||
@@ -200,6 +201,10 @@ def disable_force_env_override(monkeypatch):
|
|||||||
importlib.reload(config)
|
importlib.reload(config)
|
||||||
importlib.reload(conversation_memory)
|
importlib.reload(conversation_memory)
|
||||||
|
|
||||||
|
test_conversation_module = sys.modules.get("tests.test_conversation_memory")
|
||||||
|
if test_conversation_module is not None:
|
||||||
|
test_conversation_module.MAX_CONVERSATION_TURNS = conversation_memory.MAX_CONVERSATION_TURNS
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
82
tests/openai_cassettes/consensus_step1_gpt51_for.json
Normal file
82
tests/openai_cassettes/consensus_step1_gpt51_for.json
Normal file
File diff suppressed because one or more lines are too long
@@ -94,9 +94,9 @@ class TestAutoModeComprehensive:
|
|||||||
"OPENROUTER_API_KEY": None,
|
"OPENROUTER_API_KEY": None,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"EXTENDED_REASONING": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks
|
"EXTENDED_REASONING": "gpt-5.1-codex", # GPT-5.1 Codex prioritized for coding tasks
|
||||||
"FAST_RESPONSE": "gpt-5", # Prefer gpt-5 for speed
|
"FAST_RESPONSE": "gpt-5.1", # Prefer gpt-5.1 for speed
|
||||||
"BALANCED": "gpt-5", # Prefer gpt-5 for balanced
|
"BALANCED": "gpt-5.1", # Prefer gpt-5.1 for balanced
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
# Only X.AI API available
|
# Only X.AI API available
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
monkeypatch.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
|
monkeypatch.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
|
||||||
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5")
|
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5.1")
|
||||||
monkeypatch.setenv("OPENROUTER_ALLOWED_MODELS", "gpt5nano")
|
monkeypatch.setenv("OPENROUTER_ALLOWED_MODELS", "gpt5nano")
|
||||||
monkeypatch.setenv("XAI_ALLOWED_MODELS", "")
|
monkeypatch.setenv("XAI_ALLOWED_MODELS", "")
|
||||||
|
|
||||||
@@ -104,7 +104,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
|||||||
("OPENAI_API_KEY", "test-openai"),
|
("OPENAI_API_KEY", "test-openai"),
|
||||||
("OPENROUTER_API_KEY", "test-openrouter"),
|
("OPENROUTER_API_KEY", "test-openrouter"),
|
||||||
("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro"),
|
("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro"),
|
||||||
("OPENAI_ALLOWED_MODELS", "gpt-5"),
|
("OPENAI_ALLOWED_MODELS", "gpt-5.1"),
|
||||||
("OPENROUTER_ALLOWED_MODELS", "gpt5nano"),
|
("OPENROUTER_ALLOWED_MODELS", "gpt5nano"),
|
||||||
("XAI_ALLOWED_MODELS", ""),
|
("XAI_ALLOWED_MODELS", ""),
|
||||||
):
|
):
|
||||||
@@ -139,7 +139,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
|||||||
assert payload["status"] == "error"
|
assert payload["status"] == "error"
|
||||||
|
|
||||||
available_models = _extract_available_models(payload["content"])
|
available_models = _extract_available_models(payload["content"])
|
||||||
assert set(available_models) == {"gemini-2.5-pro", "gpt-5", "gpt5nano", "openai/gpt-5-nano"}
|
assert set(available_models) == {"gemini-2.5-pro", "gpt-5.1", "gpt5nano", "openai/gpt-5-nano"}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.no_mock_provider
|
@pytest.mark.no_mock_provider
|
||||||
@@ -225,6 +225,6 @@ def test_error_listing_without_restrictions_shows_full_catalog(monkeypatch, rese
|
|||||||
|
|
||||||
available_models = _extract_available_models(payload["content"])
|
available_models = _extract_available_models(payload["content"])
|
||||||
assert "gemini-2.5-pro" in available_models
|
assert "gemini-2.5-pro" in available_models
|
||||||
assert "gpt-5" in available_models
|
assert any(model in available_models for model in {"gpt-5.1", "gpt-5"})
|
||||||
assert "grok-4" in available_models
|
assert "grok-4" in available_models
|
||||||
assert len(available_models) >= 5
|
assert len(available_models) >= 5
|
||||||
|
|||||||
@@ -98,9 +98,9 @@ class TestAutoModeProviderSelection:
|
|||||||
balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
||||||
|
|
||||||
# Should select appropriate OpenAI models based on new preference order
|
# Should select appropriate OpenAI models based on new preference order
|
||||||
assert extended_reasoning == "gpt-5-codex" # GPT-5-Codex prioritized for extended reasoning
|
assert extended_reasoning == "gpt-5.1-codex" # GPT-5.1 Codex prioritized for extended reasoning
|
||||||
assert fast_response == "gpt-5" # gpt-5 comes first in fast response preference
|
assert fast_response == "gpt-5.1" # gpt-5.1 comes first in fast response preference
|
||||||
assert balanced == "gpt-5" # gpt-5 for balanced
|
assert balanced == "gpt-5.1" # gpt-5.1 for balanced
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Restore original environment
|
# Restore original environment
|
||||||
|
|||||||
@@ -16,7 +16,12 @@ from tools.consensus import ConsensusTool
|
|||||||
# Directories for recorded HTTP interactions
|
# Directories for recorded HTTP interactions
|
||||||
CASSETTE_DIR = Path(__file__).parent / "openai_cassettes"
|
CASSETTE_DIR = Path(__file__).parent / "openai_cassettes"
|
||||||
CASSETTE_DIR.mkdir(exist_ok=True)
|
CASSETTE_DIR.mkdir(exist_ok=True)
|
||||||
CONSENSUS_CASSETTE_PATH = CASSETTE_DIR / "consensus_step1_gpt5_for.json"
|
|
||||||
|
# Mapping of OpenAI model names to their cassette files
|
||||||
|
CONSENSUS_CASSETTES = {
|
||||||
|
"gpt-5": CASSETTE_DIR / "consensus_step1_gpt5_for.json",
|
||||||
|
"gpt-5.1": CASSETTE_DIR / "consensus_step1_gpt51_for.json",
|
||||||
|
}
|
||||||
|
|
||||||
GEMINI_REPLAY_DIR = Path(__file__).parent / "gemini_cassettes"
|
GEMINI_REPLAY_DIR = Path(__file__).parent / "gemini_cassettes"
|
||||||
GEMINI_REPLAY_DIR.mkdir(exist_ok=True)
|
GEMINI_REPLAY_DIR.mkdir(exist_ok=True)
|
||||||
@@ -26,8 +31,15 @@ GEMINI_REPLAY_PATH = GEMINI_REPLAY_DIR / "consensus" / "step2_gemini25_flash_aga
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.no_mock_provider
|
@pytest.mark.no_mock_provider
|
||||||
async def test_consensus_multi_model_consultations(monkeypatch):
|
@pytest.mark.parametrize("openai_model", ["gpt-5", "gpt-5.1"])
|
||||||
"""Exercise ConsensusTool against gpt-5 (supporting) and gemini-2.0-flash (critical)."""
|
async def test_consensus_multi_model_consultations(monkeypatch, openai_model):
|
||||||
|
"""Exercise ConsensusTool against OpenAI model (supporting) and gemini-2.5-flash (critical).
|
||||||
|
|
||||||
|
Tests both gpt-5 and gpt-5.1 to ensure regression coverage for both model families.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get the cassette path for this model
|
||||||
|
consensus_cassette_path = CONSENSUS_CASSETTES[openai_model]
|
||||||
|
|
||||||
env_updates = {
|
env_updates = {
|
||||||
"DEFAULT_MODEL": "auto",
|
"DEFAULT_MODEL": "auto",
|
||||||
@@ -43,13 +55,14 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
"CUSTOM_API_URL",
|
"CUSTOM_API_URL",
|
||||||
]
|
]
|
||||||
|
|
||||||
recording_mode = not CONSENSUS_CASSETTE_PATH.exists() or not GEMINI_REPLAY_PATH.exists()
|
recording_mode = not consensus_cassette_path.exists() or not GEMINI_REPLAY_PATH.exists()
|
||||||
if recording_mode:
|
if recording_mode:
|
||||||
openai_key = env_updates["OPENAI_API_KEY"].strip()
|
openai_key = env_updates["OPENAI_API_KEY"].strip()
|
||||||
gemini_key = env_updates["GEMINI_API_KEY"].strip()
|
gemini_key = env_updates["GEMINI_API_KEY"].strip()
|
||||||
if (not openai_key or openai_key.startswith("dummy")) or (not gemini_key or gemini_key.startswith("dummy")):
|
if (not openai_key or openai_key.startswith("dummy")) or (not gemini_key or gemini_key.startswith("dummy")):
|
||||||
pytest.skip(
|
pytest.skip(
|
||||||
"Consensus cassette missing and OPENAI_API_KEY/GEMINI_API_KEY not configured. Provide real keys to record."
|
"Consensus cassette missing and OPENAI_API_KEY/GEMINI_API_KEY "
|
||||||
|
"not configured. Provide real keys to record."
|
||||||
)
|
)
|
||||||
|
|
||||||
GEMINI_REPLAY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
GEMINI_REPLAY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -66,27 +79,43 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
m.setenv("GEMINI_API_KEY", "dummy-key-for-replay")
|
m.setenv("GEMINI_API_KEY", "dummy-key-for-replay")
|
||||||
m.setenv("GOOGLE_GENAI_CLIENT_MODE", "replay")
|
m.setenv("GOOGLE_GENAI_CLIENT_MODE", "replay")
|
||||||
|
|
||||||
|
# Ensure restriction policies allow the latest OpenAI models under test
|
||||||
|
m.setenv("OPENAI_ALLOWED_MODELS", openai_model)
|
||||||
|
|
||||||
m.setenv("GOOGLE_GENAI_REPLAYS_DIRECTORY", str(GEMINI_REPLAY_DIR))
|
m.setenv("GOOGLE_GENAI_REPLAYS_DIRECTORY", str(GEMINI_REPLAY_DIR))
|
||||||
m.setenv("GOOGLE_GENAI_REPLAY_ID", GEMINI_REPLAY_ID)
|
m.setenv("GOOGLE_GENAI_REPLAY_ID", GEMINI_REPLAY_ID)
|
||||||
|
|
||||||
for key in keys_to_clear:
|
for key in keys_to_clear:
|
||||||
m.delenv(key, raising=False)
|
m.delenv(key, raising=False)
|
||||||
|
|
||||||
# Reset providers and register only OpenAI & Gemini for deterministic behavior
|
# Ensure we use the built-in OpenAI catalogue rather than leftovers from
|
||||||
|
# other tests that patch OPENAI_MODELS_CONFIG_PATH.
|
||||||
|
m.delenv("OPENAI_MODELS_CONFIG_PATH", raising=False)
|
||||||
|
|
||||||
|
# Reset providers/restrictions and register only OpenAI & Gemini for deterministic behavior
|
||||||
ModelProviderRegistry.reset_for_testing()
|
ModelProviderRegistry.reset_for_testing()
|
||||||
|
import utils.model_restrictions as model_restrictions
|
||||||
|
|
||||||
|
model_restrictions._restriction_service = None
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai import OpenAIModelProvider
|
||||||
|
|
||||||
|
# Earlier tests may override the OpenAI provider's registry by pointing
|
||||||
|
# OPENAI_MODELS_CONFIG_PATH at fixtures. Force a reload so model
|
||||||
|
# metadata is restored from conf/openai_models.json.
|
||||||
|
OpenAIModelProvider.reload_registry()
|
||||||
|
assert openai_model in OpenAIModelProvider.MODEL_CAPABILITIES
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
|
|
||||||
# Inject HTTP transport for OpenAI interactions
|
# Inject HTTP transport for OpenAI interactions
|
||||||
inject_transport(monkeypatch, CONSENSUS_CASSETTE_PATH)
|
inject_transport(monkeypatch, str(consensus_cassette_path))
|
||||||
|
|
||||||
tool = ConsensusTool()
|
tool = ConsensusTool()
|
||||||
|
|
||||||
models_to_consult = [
|
models_to_consult = [
|
||||||
{"model": "gpt-5", "stance": "for"},
|
{"model": openai_model, "stance": "for"},
|
||||||
{"model": "gemini-2.5-flash", "stance": "against"},
|
{"model": "gemini-2.5-flash", "stance": "against"},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -105,7 +134,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
step1_data = json.loads(step1_response[0].text)
|
step1_data = json.loads(step1_response[0].text)
|
||||||
|
|
||||||
assert step1_data["status"] == "analysis_and_first_model_consulted"
|
assert step1_data["status"] == "analysis_and_first_model_consulted"
|
||||||
assert step1_data["model_consulted"] == "gpt-5"
|
assert step1_data["model_consulted"] == openai_model
|
||||||
assert step1_data["model_response"]["status"] == "success"
|
assert step1_data["model_response"]["status"] == "success"
|
||||||
assert step1_data["model_response"]["metadata"]["provider"] == "openai"
|
assert step1_data["model_response"]["metadata"]["provider"] == "openai"
|
||||||
assert step1_data["model_response"]["verdict"]
|
assert step1_data["model_response"]["verdict"]
|
||||||
@@ -118,7 +147,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
summary_for_step2 = step1_data["model_response"]["verdict"][:200]
|
summary_for_step2 = step1_data["model_response"]["verdict"][:200]
|
||||||
|
|
||||||
step2_arguments = {
|
step2_arguments = {
|
||||||
"step": f"Incorporated gpt-5 perspective: {summary_for_step2}",
|
"step": f"Incorporated {openai_model} perspective: {summary_for_step2}",
|
||||||
"step_number": 2,
|
"step_number": 2,
|
||||||
"total_steps": len(models_to_consult),
|
"total_steps": len(models_to_consult),
|
||||||
"next_step_required": False,
|
"next_step_required": False,
|
||||||
@@ -138,7 +167,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
assert step2_data["model_response"]["metadata"]["provider"] == "google"
|
assert step2_data["model_response"]["metadata"]["provider"] == "google"
|
||||||
assert step2_data["model_response"]["verdict"]
|
assert step2_data["model_response"]["verdict"]
|
||||||
assert step2_data["complete_consensus"]["models_consulted"] == [
|
assert step2_data["complete_consensus"]["models_consulted"] == [
|
||||||
"gpt-5:for",
|
f"{openai_model}:for",
|
||||||
"gemini-2.5-flash:against",
|
"gemini-2.5-flash:against",
|
||||||
]
|
]
|
||||||
assert step2_data["consensus_complete"] is True
|
assert step2_data["consensus_complete"] is True
|
||||||
@@ -159,7 +188,7 @@ async def test_consensus_multi_model_consultations(monkeypatch):
|
|||||||
gemini_provider._client = None
|
gemini_provider._client = None
|
||||||
|
|
||||||
# Ensure cassettes exist for future replays
|
# Ensure cassettes exist for future replays
|
||||||
assert CONSENSUS_CASSETTE_PATH.exists()
|
assert consensus_cassette_path.exists()
|
||||||
assert GEMINI_REPLAY_PATH.exists()
|
assert GEMINI_REPLAY_PATH.exists()
|
||||||
|
|
||||||
# Clean up provider registry state after test
|
# Clean up provider registry state after test
|
||||||
|
|||||||
@@ -37,14 +37,14 @@ class TestIntelligentFallback:
|
|||||||
|
|
||||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False)
|
@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False)
|
||||||
def test_prefers_openai_o3_mini_when_available(self):
|
def test_prefers_openai_o3_mini_when_available(self):
|
||||||
"""Test that gpt-5 is preferred when OpenAI API key is available (based on new preference order)"""
|
"""Test that gpt-5.1 is preferred when OpenAI API key is available (based on new preference order)"""
|
||||||
# Register only OpenAI provider for this test
|
# Register only OpenAI provider for this test
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
|
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
|
||||||
assert fallback_model == "gpt-5" # Based on new preference order: gpt-5 before o4-mini
|
assert fallback_model == "gpt-5.1" # Based on new preference order: gpt-5.1 before o4-mini
|
||||||
|
|
||||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "test-gemini-key"}, clear=False)
|
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "test-gemini-key"}, clear=False)
|
||||||
def test_prefers_gemini_flash_when_openai_unavailable(self):
|
def test_prefers_gemini_flash_when_openai_unavailable(self):
|
||||||
@@ -147,8 +147,8 @@ class TestIntelligentFallback:
|
|||||||
|
|
||||||
history, tokens = build_conversation_history(context, model_context=None)
|
history, tokens = build_conversation_history(context, model_context=None)
|
||||||
|
|
||||||
# Verify that ModelContext was called with gpt-5 (the intelligent fallback based on new preference order)
|
# Verify that ModelContext was called with gpt-5.1 (the intelligent fallback based on new preference order)
|
||||||
mock_context_class.assert_called_once_with("gpt-5")
|
mock_context_class.assert_called_once_with("gpt-5.1")
|
||||||
|
|
||||||
def test_auto_mode_with_gemini_only(self):
|
def test_auto_mode_with_gemini_only(self):
|
||||||
"""Test auto mode behavior when only Gemini API key is available"""
|
"""Test auto mode behavior when only Gemini API key is available"""
|
||||||
|
|||||||
@@ -50,6 +50,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider.validate_model_name("o4-mini") is True
|
assert provider.validate_model_name("o4-mini") is True
|
||||||
assert provider.validate_model_name("gpt-5") is True
|
assert provider.validate_model_name("gpt-5") is True
|
||||||
assert provider.validate_model_name("gpt-5-mini") is True
|
assert provider.validate_model_name("gpt-5-mini") is True
|
||||||
|
assert provider.validate_model_name("gpt-5.1") is True
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex") is True
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex-mini") is True
|
||||||
|
|
||||||
# Test valid aliases
|
# Test valid aliases
|
||||||
assert provider.validate_model_name("mini") is True
|
assert provider.validate_model_name("mini") is True
|
||||||
@@ -59,6 +62,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider.validate_model_name("gpt5") is True
|
assert provider.validate_model_name("gpt5") is True
|
||||||
assert provider.validate_model_name("gpt5-mini") is True
|
assert provider.validate_model_name("gpt5-mini") is True
|
||||||
assert provider.validate_model_name("gpt5mini") is True
|
assert provider.validate_model_name("gpt5mini") is True
|
||||||
|
assert provider.validate_model_name("gpt5.1") is True
|
||||||
|
assert provider.validate_model_name("gpt5.1-codex") is True
|
||||||
|
assert provider.validate_model_name("codex-mini") is True
|
||||||
|
|
||||||
# Test invalid model
|
# Test invalid model
|
||||||
assert provider.validate_model_name("invalid-model") is False
|
assert provider.validate_model_name("invalid-model") is False
|
||||||
@@ -77,6 +83,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider._resolve_model_name("gpt5") == "gpt-5"
|
assert provider._resolve_model_name("gpt5") == "gpt-5"
|
||||||
assert provider._resolve_model_name("gpt5-mini") == "gpt-5-mini"
|
assert provider._resolve_model_name("gpt5-mini") == "gpt-5-mini"
|
||||||
assert provider._resolve_model_name("gpt5mini") == "gpt-5-mini"
|
assert provider._resolve_model_name("gpt5mini") == "gpt-5-mini"
|
||||||
|
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||||
|
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||||
|
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||||
|
|
||||||
# Test full name passthrough
|
# Test full name passthrough
|
||||||
assert provider._resolve_model_name("o3") == "o3"
|
assert provider._resolve_model_name("o3") == "o3"
|
||||||
@@ -86,6 +95,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider._resolve_model_name("o4-mini") == "o4-mini"
|
assert provider._resolve_model_name("o4-mini") == "o4-mini"
|
||||||
assert provider._resolve_model_name("gpt-5") == "gpt-5"
|
assert provider._resolve_model_name("gpt-5") == "gpt-5"
|
||||||
assert provider._resolve_model_name("gpt-5-mini") == "gpt-5-mini"
|
assert provider._resolve_model_name("gpt-5-mini") == "gpt-5-mini"
|
||||||
|
assert provider._resolve_model_name("gpt-5.1") == "gpt-5.1"
|
||||||
|
assert provider._resolve_model_name("gpt-5.1-codex") == "gpt-5.1-codex"
|
||||||
|
assert provider._resolve_model_name("gpt-5.1-codex-mini") == "gpt-5.1-codex-mini"
|
||||||
|
|
||||||
def test_get_capabilities_o3(self):
|
def test_get_capabilities_o3(self):
|
||||||
"""Test getting model capabilities for O3."""
|
"""Test getting model capabilities for O3."""
|
||||||
@@ -146,6 +158,36 @@ class TestOpenAIProvider:
|
|||||||
assert capabilities.supports_function_calling is True
|
assert capabilities.supports_function_calling is True
|
||||||
assert capabilities.supports_temperature is True
|
assert capabilities.supports_temperature is True
|
||||||
|
|
||||||
|
def test_get_capabilities_gpt51(self):
|
||||||
|
"""Test GPT-5.1 capabilities reflect new metadata."""
|
||||||
|
provider = OpenAIModelProvider("test-key")
|
||||||
|
|
||||||
|
capabilities = provider.get_capabilities("gpt-5.1")
|
||||||
|
assert capabilities.model_name == "gpt-5.1"
|
||||||
|
assert capabilities.supports_streaming is True
|
||||||
|
assert capabilities.supports_function_calling is True
|
||||||
|
assert capabilities.supports_json_mode is True
|
||||||
|
assert capabilities.allow_code_generation is True
|
||||||
|
|
||||||
|
def test_get_capabilities_gpt51_codex(self):
|
||||||
|
"""Test GPT-5.1 Codex is responses-only and non-streaming."""
|
||||||
|
provider = OpenAIModelProvider("test-key")
|
||||||
|
|
||||||
|
capabilities = provider.get_capabilities("gpt-5.1-codex")
|
||||||
|
assert capabilities.model_name == "gpt-5.1-codex"
|
||||||
|
assert capabilities.supports_streaming is False
|
||||||
|
assert capabilities.use_openai_response_api is True
|
||||||
|
assert capabilities.allow_code_generation is True
|
||||||
|
|
||||||
|
def test_get_capabilities_gpt51_codex_mini(self):
|
||||||
|
"""Test GPT-5.1 Codex mini exposes streaming and code generation."""
|
||||||
|
provider = OpenAIModelProvider("test-key")
|
||||||
|
|
||||||
|
capabilities = provider.get_capabilities("gpt-5.1-codex-mini")
|
||||||
|
assert capabilities.model_name == "gpt-5.1-codex-mini"
|
||||||
|
assert capabilities.supports_streaming is True
|
||||||
|
assert capabilities.allow_code_generation is True
|
||||||
|
|
||||||
@patch("providers.openai_compatible.OpenAI")
|
@patch("providers.openai_compatible.OpenAI")
|
||||||
def test_generate_content_resolves_alias_before_api_call(self, mock_openai_class):
|
def test_generate_content_resolves_alias_before_api_call(self, mock_openai_class):
|
||||||
"""Test that generate_content resolves aliases before making API calls.
|
"""Test that generate_content resolves aliases before making API calls.
|
||||||
|
|||||||
@@ -98,8 +98,8 @@ class TestModelSelection:
|
|||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
|
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING)
|
||||||
# OpenAI prefers GPT-5-Codex for extended reasoning (coding tasks)
|
# OpenAI prefers GPT-5.1-Codex for extended reasoning (coding tasks)
|
||||||
assert model == "gpt-5-codex"
|
assert model == "gpt-5.1-codex"
|
||||||
|
|
||||||
def test_extended_reasoning_with_gemini_only(self):
|
def test_extended_reasoning_with_gemini_only(self):
|
||||||
"""Test EXTENDED_REASONING prefers pro when only Gemini is available."""
|
"""Test EXTENDED_REASONING prefers pro when only Gemini is available."""
|
||||||
@@ -133,8 +133,8 @@ class TestModelSelection:
|
|||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
|
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
|
||||||
# OpenAI now prefers gpt-5 for fast response (based on our new preference order)
|
# OpenAI now prefers gpt-5.1 for fast response (based on our new preference order)
|
||||||
assert model == "gpt-5"
|
assert model == "gpt-5.1"
|
||||||
|
|
||||||
def test_fast_response_with_gemini_only(self):
|
def test_fast_response_with_gemini_only(self):
|
||||||
"""Test FAST_RESPONSE prefers flash when only Gemini is available."""
|
"""Test FAST_RESPONSE prefers flash when only Gemini is available."""
|
||||||
@@ -167,8 +167,8 @@ class TestModelSelection:
|
|||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
||||||
# OpenAI prefers gpt-5 for balanced (based on our new preference order)
|
# OpenAI prefers gpt-5.1 for balanced (based on our new preference order)
|
||||||
assert model == "gpt-5"
|
assert model == "gpt-5.1"
|
||||||
|
|
||||||
def test_no_category_uses_balanced_logic(self):
|
def test_no_category_uses_balanced_logic(self):
|
||||||
"""Test that no category specified uses balanced logic."""
|
"""Test that no category specified uses balanced logic."""
|
||||||
@@ -195,7 +195,7 @@ class TestFlexibleModelSelection:
|
|||||||
"env": {"OPENAI_API_KEY": "test-key"},
|
"env": {"OPENAI_API_KEY": "test-key"},
|
||||||
"provider_type": ProviderType.OPENAI,
|
"provider_type": ProviderType.OPENAI,
|
||||||
"category": ToolModelCategory.EXTENDED_REASONING,
|
"category": ToolModelCategory.EXTENDED_REASONING,
|
||||||
"expected": "gpt-5-codex", # GPT-5-Codex prioritized for coding tasks
|
"expected": "gpt-5.1-codex", # GPT-5.1-Codex prioritized for coding tasks
|
||||||
},
|
},
|
||||||
# Case 2: Gemini provider for fast response
|
# Case 2: Gemini provider for fast response
|
||||||
{
|
{
|
||||||
@@ -209,7 +209,7 @@ class TestFlexibleModelSelection:
|
|||||||
"env": {"OPENAI_API_KEY": "test-key"},
|
"env": {"OPENAI_API_KEY": "test-key"},
|
||||||
"provider_type": ProviderType.OPENAI,
|
"provider_type": ProviderType.OPENAI,
|
||||||
"category": ToolModelCategory.FAST_RESPONSE,
|
"category": ToolModelCategory.FAST_RESPONSE,
|
||||||
"expected": "gpt-5", # Based on new preference order
|
"expected": "gpt-5.1", # Based on new preference order
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -209,6 +209,9 @@ class TestOpenAIProvider:
|
|||||||
assert provider.validate_model_name("o4-mini")
|
assert provider.validate_model_name("o4-mini")
|
||||||
assert provider.validate_model_name("o4mini")
|
assert provider.validate_model_name("o4mini")
|
||||||
assert provider.validate_model_name("o4-mini")
|
assert provider.validate_model_name("o4-mini")
|
||||||
|
assert provider.validate_model_name("gpt-5.1")
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex")
|
||||||
|
assert provider.validate_model_name("gpt-5.1-codex-mini")
|
||||||
assert not provider.validate_model_name("gpt-4o")
|
assert not provider.validate_model_name("gpt-4o")
|
||||||
assert not provider.validate_model_name("invalid-model")
|
assert not provider.validate_model_name("invalid-model")
|
||||||
|
|
||||||
@@ -219,3 +222,20 @@ class TestOpenAIProvider:
|
|||||||
aliases = ["o3", "o3mini", "o3-mini", "o4-mini", "o4mini"]
|
aliases = ["o3", "o3mini", "o3-mini", "o4-mini", "o4mini"]
|
||||||
for alias in aliases:
|
for alias in aliases:
|
||||||
assert not provider.get_capabilities(alias).supports_extended_thinking
|
assert not provider.get_capabilities(alias).supports_extended_thinking
|
||||||
|
|
||||||
|
def test_gpt51_family_capabilities(self):
|
||||||
|
"""Ensure GPT-5.1 family exposes correct capability flags."""
|
||||||
|
provider = OpenAIModelProvider(api_key="test-key")
|
||||||
|
|
||||||
|
base = provider.get_capabilities("gpt-5.1")
|
||||||
|
assert base.supports_streaming
|
||||||
|
assert base.allow_code_generation
|
||||||
|
|
||||||
|
codex = provider.get_capabilities("gpt-5.1-codex")
|
||||||
|
assert not codex.supports_streaming
|
||||||
|
assert codex.use_openai_response_api
|
||||||
|
assert codex.allow_code_generation
|
||||||
|
|
||||||
|
codex_mini = provider.get_capabilities("gpt-5.1-codex-mini")
|
||||||
|
assert codex_mini.supports_streaming
|
||||||
|
assert codex_mini.allow_code_generation
|
||||||
|
|||||||
@@ -54,6 +54,9 @@ class TestSupportedModelsAliases:
|
|||||||
assert "o3mini" in provider.MODEL_CAPABILITIES["o3-mini"].aliases
|
assert "o3mini" in provider.MODEL_CAPABILITIES["o3-mini"].aliases
|
||||||
assert "o3pro" in provider.MODEL_CAPABILITIES["o3-pro"].aliases
|
assert "o3pro" in provider.MODEL_CAPABILITIES["o3-pro"].aliases
|
||||||
assert "gpt4.1" in provider.MODEL_CAPABILITIES["gpt-4.1"].aliases
|
assert "gpt4.1" in provider.MODEL_CAPABILITIES["gpt-4.1"].aliases
|
||||||
|
assert "gpt5.1" in provider.MODEL_CAPABILITIES["gpt-5.1"].aliases
|
||||||
|
assert "gpt5.1-codex" in provider.MODEL_CAPABILITIES["gpt-5.1-codex"].aliases
|
||||||
|
assert "codex-mini" in provider.MODEL_CAPABILITIES["gpt-5.1-codex-mini"].aliases
|
||||||
|
|
||||||
# Test alias resolution
|
# Test alias resolution
|
||||||
assert provider._resolve_model_name("mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
assert provider._resolve_model_name("mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
||||||
@@ -61,10 +64,14 @@ class TestSupportedModelsAliases:
|
|||||||
assert provider._resolve_model_name("o3pro") == "o3-pro" # o3pro resolves to o3-pro
|
assert provider._resolve_model_name("o3pro") == "o3-pro" # o3pro resolves to o3-pro
|
||||||
assert provider._resolve_model_name("o4mini") == "o4-mini"
|
assert provider._resolve_model_name("o4mini") == "o4-mini"
|
||||||
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1" # gpt4.1 resolves to gpt-4.1
|
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1" # gpt4.1 resolves to gpt-4.1
|
||||||
|
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||||
|
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||||
|
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||||
|
|
||||||
# Test case insensitive resolution
|
# Test case insensitive resolution
|
||||||
assert provider._resolve_model_name("Mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
assert provider._resolve_model_name("Mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
||||||
assert provider._resolve_model_name("O3MINI") == "o3-mini"
|
assert provider._resolve_model_name("O3MINI") == "o3-mini"
|
||||||
|
assert provider._resolve_model_name("Gpt5.1") == "gpt-5.1"
|
||||||
|
|
||||||
def test_xai_provider_aliases(self):
|
def test_xai_provider_aliases(self):
|
||||||
"""Test XAI provider's alias structure."""
|
"""Test XAI provider's alias structure."""
|
||||||
|
|||||||
Reference in New Issue
Block a user