feat: GPT-5.2 support
This commit is contained in:
@@ -55,7 +55,7 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here
|
||||
|
||||
# Optional: Default model to use
|
||||
# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high',
|
||||
# 'gpt-5.1', 'gpt-5.1-codex', 'gpt-5.1-codex-mini', 'gpt-5', 'gpt-5-mini', 'grok',
|
||||
# 'gpt-5.2', 'gpt-5.1-codex', 'gpt-5.1-codex-mini', 'gpt-5', 'gpt-5-mini', 'grok',
|
||||
# 'opus-4.1', 'sonnet-4.1', or any DIAL model if DIAL is configured
|
||||
# When set to 'auto', Claude will select the best model for each task
|
||||
# Defaults to 'auto' if not specified
|
||||
@@ -80,7 +80,8 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
|
||||
# If you want to disable a provider entirely, don't set its API key
|
||||
#
|
||||
# Supported OpenAI models:
|
||||
# - gpt-5.1 (400K context, 128K output, reasoning tokens, streaming enabled)
|
||||
# - gpt-5.2 (400K context, 128K output, reasoning tokens, streaming enabled)
|
||||
# - gpt-5.2-pro (400K context, 272K output, highest reasoning quality, Responses API only)
|
||||
# - gpt-5.1-codex (400K context, 128K output, coding specialization, Responses API only)
|
||||
# - gpt-5.1-codex-mini (400K context, 128K output, cost-efficient Codex with streaming)
|
||||
# - gpt-5 (400K context, 128K output, reasoning tokens)
|
||||
@@ -126,7 +127,7 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
|
||||
#
|
||||
# Examples:
|
||||
# OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini # Only allow mini models (cost control)
|
||||
# OPENAI_ALLOWED_MODELS=gpt-5.1,gpt-5.1-codex # Pin to GPT-5.1 family
|
||||
# OPENAI_ALLOWED_MODELS=gpt-5.2,gpt-5.1-codex # Pin to flagship GPT-5 family
|
||||
# GOOGLE_ALLOWED_MODELS=flash # Only allow Flash (fast responses)
|
||||
# XAI_ALLOWED_MODELS=grok-3 # Only allow standard GROK (not fast variant)
|
||||
# OPENAI_ALLOWED_MODELS=o4-mini # Single model standardization
|
||||
|
||||
@@ -128,7 +128,7 @@ and review into consideration to aid with its final pre-commit review.
|
||||
For best results when using [Claude Code](https://claude.ai/code):
|
||||
|
||||
- **Sonnet 4.5** - All agentic work and orchestration
|
||||
- **Gemini 3.0 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
|
||||
- **Gemini 3.0 Pro** OR **GPT-5.2-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
|
||||
</details>
|
||||
|
||||
<details>
|
||||
@@ -137,7 +137,7 @@ For best results when using [Claude Code](https://claude.ai/code):
|
||||
For best results when using [Codex CLI](https://developers.openai.com/codex/cli):
|
||||
|
||||
- **GPT-5 Codex Medium** - All agentic work and orchestration
|
||||
- **Gemini 3.0 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
|
||||
- **Gemini 3.0 Pro** OR **GPT-5.2-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
|
||||
</details>
|
||||
|
||||
## Quick Start (5 minutes)
|
||||
@@ -208,7 +208,7 @@ PAL activates any provider that has credentials in your `.env`. See `.env.exampl
|
||||
|
||||
**Collaboration & Planning** *(Enabled by default)*
|
||||
- **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.)
|
||||
- **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches. With capable models (GPT-5 Pro, Gemini 3.0 Pro), generates complete code / implementation
|
||||
- **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches. With capable models (GPT-5.2 Pro, Gemini 3.0 Pro), generates complete code / implementation
|
||||
- **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives
|
||||
- **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans
|
||||
- **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5.2 Pro). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5.2 Pro). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
|
||||
@@ -47,14 +47,16 @@
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5-pro",
|
||||
"friendly_name": "OpenAI (GPT-5 Pro)",
|
||||
"model_name": "gpt-5.2-pro",
|
||||
"friendly_name": "OpenAI (GPT-5.2 Pro)",
|
||||
"aliases": [
|
||||
"gpt5.2-pro",
|
||||
"gpt5.2pro",
|
||||
"gpt5pro",
|
||||
"gpt5-pro"
|
||||
],
|
||||
"intelligence_score": 18,
|
||||
"description": "GPT-5 Pro (400K context, 272K output) - Very advanced, reasoning model",
|
||||
"description": "GPT-5.2 Pro (400K context, 272K output) - Very advanced, reasoning model",
|
||||
"context_window": 400000,
|
||||
"max_output_tokens": 272000,
|
||||
"supports_extended_thinking": true,
|
||||
@@ -234,15 +236,18 @@
|
||||
"use_openai_response_api": true
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5.1",
|
||||
"friendly_name": "OpenAI (GPT-5.1)",
|
||||
"model_name": "gpt-5.2",
|
||||
"friendly_name": "OpenAI (GPT-5.2)",
|
||||
"aliases": [
|
||||
"gpt5.2",
|
||||
"gpt-5.2",
|
||||
"5.2",
|
||||
"gpt5.1",
|
||||
"gpt-5.1",
|
||||
"5.1"
|
||||
],
|
||||
"intelligence_score": 18,
|
||||
"description": "GPT-5.1 (400K context, 128K output) - Flagship reasoning model with configurable thinking effort and vision support.",
|
||||
"description": "GPT-5.2 (400K context, 128K output) - Flagship reasoning model with configurable thinking effort and vision support.",
|
||||
"context_window": 400000,
|
||||
"max_output_tokens": 128000,
|
||||
"supports_extended_thinking": true,
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5.2 Pro). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
|
||||
@@ -315,8 +315,10 @@
|
||||
"intelligence_score": 16
|
||||
},
|
||||
{
|
||||
"model_name": "openai/gpt-5-pro",
|
||||
"model_name": "openai/gpt-5.2-pro",
|
||||
"aliases": [
|
||||
"gpt5.2-pro",
|
||||
"gpt5.2pro",
|
||||
"gpt5pro"
|
||||
],
|
||||
"context_window": 400000,
|
||||
@@ -331,7 +333,7 @@
|
||||
"use_openai_response_api": true,
|
||||
"default_reasoning_effort": "high",
|
||||
"allow_code_generation": true,
|
||||
"description": "GPT-5 Pro - Advanced reasoning model with highest quality responses (text+image input, text output only)",
|
||||
"description": "GPT-5.2 Pro - Advanced reasoning model with highest quality responses (text+image input, text output only)",
|
||||
"intelligence_score": 18
|
||||
},
|
||||
{
|
||||
@@ -385,8 +387,11 @@
|
||||
"intelligence_score": 8
|
||||
},
|
||||
{
|
||||
"model_name": "openai/gpt-5.1",
|
||||
"model_name": "openai/gpt-5.2",
|
||||
"aliases": [
|
||||
"gpt5.2",
|
||||
"gpt-5.2",
|
||||
"5.2",
|
||||
"gpt5.1",
|
||||
"gpt-5.1",
|
||||
"5.1"
|
||||
@@ -402,7 +407,7 @@
|
||||
"temperature_constraint": "fixed",
|
||||
"default_reasoning_effort": "medium",
|
||||
"allow_code_generation": true,
|
||||
"description": "GPT-5.1 (400K context, 128K output) - Flagship reasoning model with configurable thinking effort and vision support",
|
||||
"description": "GPT-5.2 (400K context, 128K output) - Flagship reasoning model with configurable thinking effort and vision support",
|
||||
"intelligence_score": 18
|
||||
},
|
||||
{
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5.2 Pro). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
||||
|
||||
@@ -41,7 +41,7 @@ Regardless of your default configuration, you can specify models per request:
|
||||
| **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks |
|
||||
| **`o4-mini`** | OpenAI | 200K tokens | Latest reasoning model | Optimized for shorter contexts |
|
||||
| **`gpt4.1`** | OpenAI | 1M tokens | Latest GPT-4 with extended context | Large codebase analysis, comprehensive reviews |
|
||||
| **`gpt5.1`** (GPT-5.1) | OpenAI | 400K tokens | Flagship reasoning model with configurable thinking effort | Complex problems, balanced agent/coding flows |
|
||||
| **`gpt5.2`** (GPT-5.2) | OpenAI | 400K tokens | Flagship reasoning model with configurable thinking effort | Complex problems, balanced agent/coding flows |
|
||||
| **`gpt5.1-codex`** (GPT-5.1 Codex) | OpenAI | 400K tokens | Agentic coding specialization (Responses API) | Advanced coding tasks, structured code generation |
|
||||
| **`gpt5.1-codex-mini`** (GPT-5.1 Codex mini) | OpenAI | 400K tokens | Cost-efficient Codex variant with streaming | Balanced coding tasks, cost-conscious development |
|
||||
| **`gpt5`** (GPT-5) | OpenAI | 400K tokens | Advanced model with reasoning support | Complex problems requiring advanced reasoning |
|
||||
@@ -64,8 +64,8 @@ cloud models (expensive/powerful) AND local models (free/private) in the same co
|
||||
- **Flash Lite 2.0**: Text-only lightweight model (no thinking support)
|
||||
- **O3/O4 Models**: Excellent reasoning, systematic analysis, 200K context
|
||||
- **GPT-4.1**: Extended context window (1M tokens), general capabilities
|
||||
- **GPT-5.1 Series**: Latest flagship reasoning models, 400K context
|
||||
- **GPT-5.1**: Flagship model with configurable thinking effort and vision
|
||||
- **GPT-5.2 Series**: Latest flagship reasoning models, 400K context
|
||||
- **GPT-5.2**: Flagship model with configurable thinking effort and vision
|
||||
- **GPT-5.1 Codex**: Agentic coding specialization (Responses API, non-streaming)
|
||||
- **GPT-5.1 Codex mini**: Cost-efficient Codex variant with streaming support
|
||||
- **GPT-5 Series**: Advanced reasoning models, 400K context
|
||||
@@ -168,7 +168,7 @@ All tools that work with files support **both individual files and entire direct
|
||||
**`analyze`** - Analyze files or directories
|
||||
- `files`: List of file paths or directories (required)
|
||||
- `question`: What to analyze (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `analysis_type`: architecture|performance|security|quality|general
|
||||
- `output_format`: summary|detailed|actionable
|
||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||
@@ -183,7 +183,7 @@ All tools that work with files support **both individual files and entire direct
|
||||
|
||||
**`codereview`** - Review code files or directories
|
||||
- `files`: List of file paths or directories (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `review_type`: full|security|performance|quick
|
||||
- `focus_on`: Specific aspects to focus on
|
||||
- `standards`: Coding standards to enforce
|
||||
@@ -199,7 +199,7 @@ All tools that work with files support **both individual files and entire direct
|
||||
|
||||
**`debug`** - Debug with file context
|
||||
- `error_description`: Description of the issue (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `error_context`: Stack trace or logs
|
||||
- `files`: Files or directories related to the issue
|
||||
- `runtime_info`: Environment details
|
||||
@@ -215,7 +215,7 @@ All tools that work with files support **both individual files and entire direct
|
||||
|
||||
**`thinkdeep`** - Extended analysis with file context
|
||||
- `current_analysis`: Your current thinking (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `problem_context`: Additional context
|
||||
- `focus_areas`: Specific aspects to focus on
|
||||
- `files`: Files or directories for context
|
||||
@@ -231,7 +231,7 @@ All tools that work with files support **both individual files and entire direct
|
||||
**`testgen`** - Comprehensive test generation with edge case coverage
|
||||
- `files`: Code files or directories to generate tests for (required)
|
||||
- `prompt`: Description of what to test, testing objectives, and scope (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `test_examples`: Optional existing test files as style/pattern reference
|
||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||
|
||||
@@ -246,7 +246,7 @@ All tools that work with files support **both individual files and entire direct
|
||||
- `files`: Code files or directories to analyze for refactoring opportunities (required)
|
||||
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
|
||||
- `refactor_type`: codesmells|decompose|modernize|organization (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
|
||||
- `style_guide_examples`: Optional existing code files to use as style/pattern reference
|
||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||
|
||||
@@ -63,7 +63,7 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model
|
||||
|
||||
**Default Model Selection:**
|
||||
```env
|
||||
# Options: 'auto', 'pro', 'flash', 'gpt5.1', 'gpt5.1-codex', 'gpt5.1-codex-mini', 'o3', 'o3-mini', 'o4-mini', etc.
|
||||
# Options: 'auto', 'pro', 'flash', 'gpt5.2', 'gpt5.1-codex', 'gpt5.1-codex-mini', 'o3', 'o3-mini', 'o4-mini', etc.
|
||||
DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
||||
```
|
||||
|
||||
@@ -81,13 +81,13 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
||||
|
||||
| Provider | Canonical Models | Notable Aliases |
|
||||
|----------|-----------------|-----------------|
|
||||
| OpenAI | `gpt-5.1`, `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5`, `gpt-5-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5.1`, `gpt-5.1`, `5.1`, `gpt5.1-codex`, `codex-5.1`, `codex-mini`, `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` |
|
||||
| OpenAI | `gpt-5.2`, `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5`, `gpt-5.2-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5.2`, `gpt-5.2`, `5.2`, `gpt5.1-codex`, `codex-5.1`, `codex-mini`, `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` |
|
||||
| Gemini | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-2.0-flash-lite` | `pro`, `gemini-pro`, `flash`, `flash-2.0`, `flashlite` |
|
||||
| X.AI | `grok-4`, `grok-3`, `grok-3-fast` | `grok`, `grok4`, `grok3`, `grok3fast`, `grokfast` |
|
||||
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
|
||||
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
|
||||
|
||||
Latest OpenAI entries (`gpt-5.1`, `gpt-5.1-codex`, `gpt-5.1-codex-mini`) mirror the official model cards released on November 13, 2025: all three expose 400K-token contexts with 128K-token outputs, reasoning-token support, and multimodal inputs. `gpt-5.1-codex` is Responses-only with streaming disabled, while the base `gpt-5.1` and Codex mini support streaming along with full code-generation flags. Update your manifests if you run custom deployments so these capability bits stay accurate.
|
||||
Latest OpenAI entries (`gpt-5.2`, `gpt-5.1-codex`, `gpt-5.1-codex-mini`, `gpt-5.2-pro`) expose 400K-token contexts with large outputs, reasoning-token support, and multimodal inputs. `gpt-5.1-codex` and `gpt-5.2-pro` are Responses-only with streaming disabled, while the base `gpt-5.2` and Codex mini support streaming along with full code-generation flags. Update your manifests if you run custom deployments so these capability bits stay accurate.
|
||||
|
||||
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support, code generation) without editing Python.
|
||||
|
||||
@@ -107,7 +107,7 @@ The `allow_code_generation` capability enables models to generate complete, prod
|
||||
|
||||
**When to Enable:**
|
||||
|
||||
- **Enable for**: Models MORE capable than your primary CLI's model (e.g., GPT-5.1 Codex, GPT-5 Pro, GPT-5.1 when using Claude Code with Sonnet 4.5)
|
||||
- **Enable for**: Models MORE capable than your primary CLI's model (e.g., GPT-5.1 Codex, GPT-5.2 Pro, GPT-5.2 when using Claude Code with Sonnet 4.5)
|
||||
- **Purpose**: Get complete implementations from a more powerful reasoning model that your primary CLI can then review and apply
|
||||
- **Use case**: Large-scale implementations, major refactoring, complete module creation
|
||||
|
||||
@@ -132,7 +132,7 @@ The `allow_code_generation` capability enables models to generate complete, prod
|
||||
...
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5-pro",
|
||||
"model_name": "gpt-5.2-pro",
|
||||
"allow_code_generation": true,
|
||||
"intelligence_score": 19,
|
||||
...
|
||||
@@ -142,8 +142,8 @@ The `allow_code_generation` capability enables models to generate complete, prod
|
||||
```
|
||||
|
||||
**Typical Workflow:**
|
||||
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **gpt-5-pro**
|
||||
2. GPT-5-Pro generates structured implementation and shares the complete implementation with PAL
|
||||
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **gpt-5.2-pro**
|
||||
2. GPT-5.2-Pro generates structured implementation and shares the complete implementation with PAL
|
||||
3. PAL saves the code to `pal_generated.code` and asks AI agent to implement the plan
|
||||
4. AI agent continues from the previous context, reads the file, applies the implementation
|
||||
|
||||
@@ -198,7 +198,7 @@ OPENAI_ALLOWED_MODELS=o4-mini
|
||||
GOOGLE_ALLOWED_MODELS=flash
|
||||
|
||||
# High-performance setup
|
||||
OPENAI_ALLOWED_MODELS=gpt-5.1-codex,gpt-5.1
|
||||
OPENAI_ALLOWED_MODELS=gpt-5.1-codex,gpt-5.2
|
||||
GOOGLE_ALLOWED_MODELS=pro
|
||||
|
||||
# Single model standardization
|
||||
|
||||
@@ -61,7 +61,7 @@ The curated defaults in `conf/openrouter_models.json` include popular entries su
|
||||
| `llama3` | `meta-llama/llama-3-70b` | Large open-weight text model |
|
||||
| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model |
|
||||
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | Search-augmented model |
|
||||
| `gpt5.1`, `gpt-5.1`, `5.1` | `openai/gpt-5.1` | Flagship GPT-5.1 with reasoning and vision |
|
||||
| `gpt5.2`, `gpt-5.2`, `5.2` | `openai/gpt-5.2` | Flagship GPT-5.2 with reasoning and vision |
|
||||
| `gpt5.1-codex`, `codex-5.1` | `openai/gpt-5.1-codex` | Agentic coding specialization (Responses API) |
|
||||
| `codex-mini`, `gpt5.1-codex-mini` | `openai/gpt-5.1-codex-mini` | Cost-efficient Codex variant with streaming |
|
||||
|
||||
@@ -77,21 +77,21 @@ View the baseline OpenRouter catalogue in [`conf/openrouter_models.json`](conf/o
|
||||
|
||||
Native catalogues (`conf/openai_models.json`, `conf/gemini_models.json`, `conf/xai_models.json`, `conf/dial_models.json`) follow the same schema. Updating those files lets you:
|
||||
|
||||
- Expose new aliases (e.g., map `enterprise-pro` to `gpt-5-pro`)
|
||||
- Expose new aliases (e.g., map `enterprise-pro` to `gpt-5.2-pro`)
|
||||
- Advertise support for JSON mode or vision if the upstream provider adds it
|
||||
- Adjust token limits when providers increase context windows
|
||||
|
||||
### Latest OpenAI releases
|
||||
|
||||
OpenAI's November 13, 2025 drop introduced `gpt-5.1`, `gpt-5.1-codex`, and `gpt-5.1-codex-mini`, all of which now ship in `conf/openai_models.json`:
|
||||
OpenAI's November 13, 2025 drop introduced `gpt-5.1-codex` and `gpt-5.1-codex-mini`, while the flagship base model is now `gpt-5.2`. All of these ship in `conf/openai_models.json`:
|
||||
|
||||
| Model | Highlights | Notes |
|
||||
|-------|------------|-------|
|
||||
| `gpt-5.1` | 400K context, 128K output, multimodal IO, configurable reasoning effort | Streaming enabled; use for balanced agent/coding flows |
|
||||
| `gpt-5.2` | 400K context, 128K output, multimodal IO, configurable reasoning effort | Streaming enabled; use for balanced agent/coding flows |
|
||||
| `gpt-5.1-codex` | Responses-only agentic coding version of GPT-5.1 | Streaming disabled; `use_openai_response_api=true`; `allow_code_generation=true` |
|
||||
| `gpt-5.1-codex-mini` | Cost-efficient Codex variant | Streaming enabled, retains 400K context and code-generation flag |
|
||||
|
||||
These entries include pricing-friendly aliases (`gpt5.1`, `codex-5.1`, `codex-mini`) plus updated capability flags (`supports_extended_thinking`, `allow_code_generation`). Copy the manifest if you operate custom deployment names so downstream providers inherit the same metadata.
|
||||
These entries include pricing-friendly aliases (`gpt5.2`, `codex-5.1`, `codex-mini`) plus updated capability flags (`supports_extended_thinking`, `allow_code_generation`). Copy the manifest if you operate custom deployment names so downstream providers inherit the same metadata.
|
||||
|
||||
Because providers load the manifests on import, you can tweak capabilities without touching Python. Restart the server after editing the JSON files so changes are picked up.
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ You need at least one API key. Choose based on your needs:
|
||||
|
||||
**OpenAI:**
|
||||
- Visit [OpenAI Platform](https://platform.openai.com/api-keys)
|
||||
- Generate an API key for GPT-5.1, GPT-5.1-Codex, GPT-5, O3 access
|
||||
- Generate an API key for GPT-5.2, GPT-5.1-Codex, GPT-5, O3 access
|
||||
|
||||
**X.AI (Grok):**
|
||||
- Visit [X.AI Console](https://console.x.ai/)
|
||||
@@ -287,7 +287,7 @@ Add your API keys (at least one required):
|
||||
```env
|
||||
# Choose your providers (at least one required)
|
||||
GEMINI_API_KEY=your-gemini-api-key-here # For Gemini models
|
||||
OPENAI_API_KEY=your-openai-api-key-here # For GPT-5.1, GPT-5.1-Codex, O3
|
||||
OPENAI_API_KEY=your-openai-api-key-here # For GPT-5.2, GPT-5.1-Codex, O3
|
||||
XAI_API_KEY=your-xai-api-key-here # For Grok models
|
||||
OPENROUTER_API_KEY=your-openrouter-key # For multiple models
|
||||
|
||||
@@ -514,7 +514,7 @@ DEFAULT_MODEL=auto
|
||||
GEMINI_API_KEY=your-key
|
||||
OPENAI_API_KEY=your-key
|
||||
GOOGLE_ALLOWED_MODELS=pro
|
||||
OPENAI_ALLOWED_MODELS=gpt-5.1-codex,gpt-5.1
|
||||
OPENAI_ALLOWED_MODELS=gpt-5.1-codex,gpt-5.2
|
||||
```
|
||||
|
||||
### Local-First Setup
|
||||
|
||||
@@ -39,7 +39,7 @@ A straightforward rubric that mirrors typical provider tiers:
|
||||
|
||||
| Intelligence | Guidance |
|
||||
|--------------|-------------------------------------------------------------------------------------------|
|
||||
| 18–19 | Frontier reasoning models (Gemini 3.0 Pro, Gemini 2.5 Pro, GPT‑5.1 Codex, GPT‑5.1, GPT‑5) |
|
||||
| 18–19 | Frontier reasoning models (Gemini 3.0 Pro, Gemini 2.5 Pro, GPT‑5.1 Codex, GPT‑5.2 Pro, GPT‑5.2, GPT‑5) |
|
||||
| 15–17 | Strong general models with large context (O3 Pro, DeepSeek R1) |
|
||||
| 12–14 | Balanced assistants (Claude Opus/Sonnet, Mistral Large) |
|
||||
| 9–11 | Fast distillations (Gemini Flash, GPT-5 Mini, Mistral medium) |
|
||||
|
||||
@@ -64,7 +64,7 @@ This workflow ensures methodical analysis before expert insights, resulting in d
|
||||
|
||||
**Initial Configuration (used in step 1):**
|
||||
- `prompt`: What to analyze or look for (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `analysis_type`: architecture|performance|security|quality|general (default: general)
|
||||
- `output_format`: summary|detailed|actionable (default: detailed)
|
||||
- `temperature`: Temperature for analysis (0-1, default 0.2)
|
||||
|
||||
@@ -39,7 +39,7 @@ word verdict in the end.
|
||||
- **Collaborative thinking partner** for your analysis and planning
|
||||
- **Get second opinions** on your designs and approaches
|
||||
- **Brainstorm solutions** and explore alternatives together
|
||||
- **Structured code generation**: When using GPT-5.1 or Gemini 3.0 / 2.5 Pro, get complete, production-ready implementations saved to `pal_generated.code` for your CLI to review and apply
|
||||
- **Structured code generation**: When using GPT-5.2 or Gemini 3.0 / 2.5 Pro, get complete, production-ready implementations saved to `pal_generated.code` for your CLI to review and apply
|
||||
- **Validate your checklists** and implementation plans
|
||||
- **General development questions** and explanations
|
||||
- **Technology comparisons** and best practices
|
||||
@@ -52,7 +52,7 @@ word verdict in the end.
|
||||
## Tool Parameters
|
||||
|
||||
- `prompt`: Your question or discussion topic (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `absolute_file_paths`: Optional absolute file or directory paths for additional context
|
||||
- `images`: Optional images for visual context (absolute paths)
|
||||
- `working_directory_absolute_path`: **Required** - Absolute path to an existing directory where generated code artifacts will be saved
|
||||
@@ -62,11 +62,11 @@ word verdict in the end.
|
||||
|
||||
## Structured Code Generation
|
||||
|
||||
When using advanced reasoning models like **GPT-5 Pro** or **Gemini 3.0 Pro**, the chat tool can generate complete, production-ready code implementations in a structured format.
|
||||
When using advanced reasoning models like **GPT-5.2 Pro** or **Gemini 3.0 Pro**, the chat tool can generate complete, production-ready code implementations in a structured format.
|
||||
|
||||
### How It Works
|
||||
|
||||
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **GPT-5 Pro** or **Gemini 3.0 Pro**
|
||||
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **GPT-5.2 Pro** or **Gemini 3.0 Pro**
|
||||
2. The model generates structured implementation and shares the complete implementation with PAL
|
||||
3. PAL saves the code to `pal_generated.code` and asks AI agent to implement the plan
|
||||
4. AI agent continues from the previous context, reads the file, applies the implementation
|
||||
@@ -85,7 +85,7 @@ For minor changes (small tweaks, bug fixes, algorithm improvements), the model r
|
||||
### Example Usage
|
||||
|
||||
```
|
||||
chat with gpt-5-pro and ask it to make me a standalone, classic version of the
|
||||
chat with gpt-5.2-pro and ask it to make me a standalone, classic version of the
|
||||
Pacman game using pygame that I can run from the commandline. Give me a single
|
||||
script to execute in the end with any / all dependencies setup for me.
|
||||
Do everything using pygame, we have no external resources / images / audio at
|
||||
|
||||
@@ -79,7 +79,7 @@ The above prompt will simultaneously run two separate `codereview` tools with tw
|
||||
|
||||
**Initial Review Configuration (used in step 1):**
|
||||
- `prompt`: User's summary of what the code does, expected behavior, constraints, and review objectives (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `review_type`: full|security|performance|quick (default: full)
|
||||
- `focus_on`: Specific aspects to focus on (e.g., "security vulnerabilities", "performance bottlenecks")
|
||||
- `standards`: Coding standards to enforce (e.g., "PEP8", "ESLint", "Google Style Guide")
|
||||
|
||||
@@ -72,7 +72,7 @@ This structured approach ensures Claude performs methodical groundwork before ex
|
||||
- `images`: Visual debugging materials (error screenshots, logs, etc.)
|
||||
|
||||
**Model Selection:**
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||
- `use_assistant_model`: Whether to use expert analysis phase (default: true, set to false to use Claude only)
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ Use pal and perform a thorough precommit ensuring there aren't any new regressio
|
||||
**Initial Configuration (used in step 1):**
|
||||
- `path`: Starting directory to search for repos (REQUIRED for step 1, must be absolute path)
|
||||
- `prompt`: The original user request description for the changes (required for context)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `compare_to`: Compare against a branch/tag instead of local changes (optional)
|
||||
- `severity_filter`: critical|high|medium|low|all (default: all)
|
||||
- `include_staged`: Include staged changes in the review (default: true)
|
||||
|
||||
@@ -102,7 +102,7 @@ This results in Claude first performing its own expert analysis, encouraging it
|
||||
**Initial Configuration (used in step 1):**
|
||||
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
|
||||
- `refactor_type`: codesmells|decompose|modernize|organization (default: codesmells)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
|
||||
- `style_guide_examples`: Optional existing code files to use as style/pattern reference (absolute paths)
|
||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||
|
||||
@@ -85,7 +85,7 @@ security remediation plan using planner
|
||||
- `images`: Architecture diagrams, security documentation, or visual references
|
||||
|
||||
**Initial Security Configuration (used in step 1):**
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `security_scope`: Application context, technology stack, and security boundary definition (required)
|
||||
- `threat_level`: low|medium|high|critical (default: medium) - determines assessment depth and urgency
|
||||
- `compliance_requirements`: List of compliance frameworks to assess against (e.g., ["PCI DSS", "SOC2"])
|
||||
|
||||
@@ -69,7 +69,7 @@ Test generation excels with extended reasoning models like Gemini Pro or O3, whi
|
||||
|
||||
**Initial Configuration (used in step 1):**
|
||||
- `prompt`: Description of what to test, testing objectives, and specific scope/focus areas (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `test_examples`: Optional existing test files or directories to use as style/pattern reference (absolute paths)
|
||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||
- `use_assistant_model`: Whether to use expert test generation phase (default: true, set to false to use Claude only)
|
||||
|
||||
@@ -30,7 +30,7 @@ with the best architecture for my project
|
||||
## Tool Parameters
|
||||
|
||||
- `prompt`: Your current thinking/analysis to extend and validate (required)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.1|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5.2|gpt5.1-codex|gpt5.1-codex-mini|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||
- `problem_context`: Additional context about the problem or goal
|
||||
- `focus_areas`: Specific aspects to focus on (architecture, performance, security, etc.)
|
||||
- `files`: Optional file paths or directories for additional context (absolute paths)
|
||||
|
||||
@@ -119,9 +119,9 @@ class OpenAIModelProvider(RegistryBackedProviderMixin, OpenAICompatibleProvider)
|
||||
preferred = find_first(
|
||||
[
|
||||
"gpt-5.1-codex",
|
||||
"gpt-5.1",
|
||||
"gpt-5.2",
|
||||
"gpt-5-codex",
|
||||
"gpt-5-pro",
|
||||
"gpt-5.2-pro",
|
||||
"o3-pro",
|
||||
"gpt-5",
|
||||
"o3",
|
||||
@@ -131,10 +131,10 @@ class OpenAIModelProvider(RegistryBackedProviderMixin, OpenAICompatibleProvider)
|
||||
|
||||
elif category == ToolModelCategory.FAST_RESPONSE:
|
||||
# Prefer fast, cost-efficient models
|
||||
# GPT-5.1 models for speed, GPT-5.1-Codex after (premium pricing but cached)
|
||||
# GPT-5.2 models for speed, GPT-5.1-Codex after (premium pricing but cached)
|
||||
preferred = find_first(
|
||||
[
|
||||
"gpt-5.1",
|
||||
"gpt-5.2",
|
||||
"gpt-5.1-codex-mini",
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
@@ -147,14 +147,14 @@ class OpenAIModelProvider(RegistryBackedProviderMixin, OpenAICompatibleProvider)
|
||||
|
||||
else: # BALANCED or default
|
||||
# Prefer balanced performance/cost models
|
||||
# Include GPT-5.1 family for latest capabilities
|
||||
# Include GPT-5.2 family for latest capabilities
|
||||
preferred = find_first(
|
||||
[
|
||||
"gpt-5.1",
|
||||
"gpt-5.2",
|
||||
"gpt-5.1-codex",
|
||||
"gpt-5",
|
||||
"gpt-5-codex",
|
||||
"gpt-5-pro",
|
||||
"gpt-5.2-pro",
|
||||
"gpt-5-mini",
|
||||
"o4-mini",
|
||||
"o3-mini",
|
||||
|
||||
@@ -228,10 +228,10 @@ Some integration tests maintain cassettes for multiple model variants to ensure
|
||||
|
||||
### Consensus Tool Cassettes
|
||||
|
||||
The `test_consensus_integration.py` test uses parameterized fixtures to test both `gpt-5` and `gpt-5.1` models:
|
||||
The `test_consensus_integration.py` test uses parameterized fixtures to test both `gpt-5` and `gpt-5.2` models:
|
||||
|
||||
- `tests/openai_cassettes/consensus_step1_gpt5_for.json` - Cassette for gpt-5 model
|
||||
- `tests/openai_cassettes/consensus_step1_gpt51_for.json` - Cassette for gpt-5.1 model
|
||||
- `tests/openai_cassettes/consensus_step1_gpt52_for.json` - Cassette for gpt-5.2 model
|
||||
|
||||
**When updating consensus cassettes:**
|
||||
|
||||
@@ -249,9 +249,9 @@ rm tests/openai_cassettes/consensus_step1_gpt5_for.json
|
||||
# Run the test with real API key (it will record for gpt-5)
|
||||
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5] -v
|
||||
|
||||
# Or for gpt-5.1
|
||||
rm tests/openai_cassettes/consensus_step1_gpt51_for.json
|
||||
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5.1] -v
|
||||
# Or for gpt-5.2
|
||||
rm tests/openai_cassettes/consensus_step1_gpt52_for.json
|
||||
OPENAI_API_KEY="your-real-key" python -m pytest tests/test_consensus_integration.py::test_consensus_multi_model_consultations[gpt-5.2] -v
|
||||
```
|
||||
|
||||
This dual-coverage approach ensures that both model families continue to work correctly as the codebase evolves.
|
||||
|
||||
File diff suppressed because one or more lines are too long
82
tests/openai_cassettes/consensus_step1_gpt52_for.json
Normal file
82
tests/openai_cassettes/consensus_step1_gpt52_for.json
Normal file
File diff suppressed because one or more lines are too long
@@ -95,8 +95,8 @@ class TestAutoModeComprehensive:
|
||||
},
|
||||
{
|
||||
"EXTENDED_REASONING": "gpt-5.1-codex", # GPT-5.1 Codex prioritized for coding tasks
|
||||
"FAST_RESPONSE": "gpt-5.1", # Prefer gpt-5.1 for speed
|
||||
"BALANCED": "gpt-5.1", # Prefer gpt-5.1 for balanced
|
||||
"FAST_RESPONSE": "gpt-5.2", # Prefer gpt-5.2 for speed
|
||||
"BALANCED": "gpt-5.2", # Prefer gpt-5.2 for balanced
|
||||
},
|
||||
),
|
||||
# Only X.AI API available
|
||||
|
||||
@@ -83,7 +83,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
||||
pass
|
||||
|
||||
monkeypatch.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
|
||||
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5.1")
|
||||
monkeypatch.setenv("OPENAI_ALLOWED_MODELS", "gpt-5.2")
|
||||
monkeypatch.setenv("OPENROUTER_ALLOWED_MODELS", "gpt5nano")
|
||||
monkeypatch.setenv("XAI_ALLOWED_MODELS", "")
|
||||
|
||||
@@ -104,7 +104,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
||||
("OPENAI_API_KEY", "test-openai"),
|
||||
("OPENROUTER_API_KEY", "test-openrouter"),
|
||||
("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro"),
|
||||
("OPENAI_ALLOWED_MODELS", "gpt-5.1"),
|
||||
("OPENAI_ALLOWED_MODELS", "gpt-5.2"),
|
||||
("OPENROUTER_ALLOWED_MODELS", "gpt5nano"),
|
||||
("XAI_ALLOWED_MODELS", ""),
|
||||
):
|
||||
@@ -139,7 +139,7 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
|
||||
assert payload["status"] == "error"
|
||||
|
||||
available_models = _extract_available_models(payload["content"])
|
||||
assert set(available_models) == {"gemini-2.5-pro", "gpt-5.1", "gpt5nano", "openai/gpt-5-nano"}
|
||||
assert set(available_models) == {"gemini-2.5-pro", "gpt-5.2", "gpt5nano", "openai/gpt-5-nano"}
|
||||
|
||||
|
||||
@pytest.mark.no_mock_provider
|
||||
@@ -225,6 +225,6 @@ def test_error_listing_without_restrictions_shows_full_catalog(monkeypatch, rese
|
||||
|
||||
available_models = _extract_available_models(payload["content"])
|
||||
assert "gemini-2.5-pro" in available_models
|
||||
assert any(model in available_models for model in {"gpt-5.1", "gpt-5"})
|
||||
assert any(model in available_models for model in {"gpt-5.2", "gpt-5"})
|
||||
assert "grok-4" in available_models
|
||||
assert len(available_models) >= 5
|
||||
|
||||
@@ -99,8 +99,8 @@ class TestAutoModeProviderSelection:
|
||||
|
||||
# Should select appropriate OpenAI models based on new preference order
|
||||
assert extended_reasoning == "gpt-5.1-codex" # GPT-5.1 Codex prioritized for extended reasoning
|
||||
assert fast_response == "gpt-5.1" # gpt-5.1 comes first in fast response preference
|
||||
assert balanced == "gpt-5.1" # gpt-5.1 for balanced
|
||||
assert fast_response == "gpt-5.2" # gpt-5.2 comes first in fast response preference
|
||||
assert balanced == "gpt-5.2" # gpt-5.2 for balanced
|
||||
|
||||
finally:
|
||||
# Restore original environment
|
||||
|
||||
@@ -20,7 +20,7 @@ CASSETTE_DIR.mkdir(exist_ok=True)
|
||||
# Mapping of OpenAI model names to their cassette files
|
||||
CONSENSUS_CASSETTES = {
|
||||
"gpt-5": CASSETTE_DIR / "consensus_step1_gpt5_for.json",
|
||||
"gpt-5.1": CASSETTE_DIR / "consensus_step1_gpt51_for.json",
|
||||
"gpt-5.2": CASSETTE_DIR / "consensus_step1_gpt52_for.json",
|
||||
}
|
||||
|
||||
GEMINI_REPLAY_DIR = Path(__file__).parent / "gemini_cassettes"
|
||||
@@ -32,11 +32,11 @@ GEMINI_REPLAY_PATH = GEMINI_REPLAY_DIR / "consensus" / "step2_gemini25_flash_aga
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.no_mock_provider
|
||||
@pytest.mark.parametrize("openai_model", ["gpt-5", "gpt-5.1"])
|
||||
@pytest.mark.parametrize("openai_model", ["gpt-5", "gpt-5.2"])
|
||||
async def test_consensus_multi_model_consultations(monkeypatch, openai_model):
|
||||
"""Exercise ConsensusTool against OpenAI model (supporting) and gemini-2.5-flash (critical).
|
||||
|
||||
Tests both gpt-5 and gpt-5.1 to ensure regression coverage for both model families.
|
||||
Tests both gpt-5 and gpt-5.2 to ensure regression coverage for both model families.
|
||||
"""
|
||||
|
||||
# Get the cassette path for this model
|
||||
|
||||
@@ -37,14 +37,14 @@ class TestIntelligentFallback:
|
||||
|
||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False)
|
||||
def test_prefers_openai_o3_mini_when_available(self):
|
||||
"""Test that gpt-5.1 is preferred when OpenAI API key is available (based on new preference order)"""
|
||||
"""Test that gpt-5.2 is preferred when OpenAI API key is available (based on new preference order)"""
|
||||
# Register only OpenAI provider for this test
|
||||
from providers.openai import OpenAIModelProvider
|
||||
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
|
||||
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
|
||||
assert fallback_model == "gpt-5.1" # Based on new preference order: gpt-5.1 before o4-mini
|
||||
assert fallback_model == "gpt-5.2" # Based on new preference order: gpt-5.2 before o4-mini
|
||||
|
||||
@patch.dict(os.environ, {"OPENAI_API_KEY": "", "GEMINI_API_KEY": "test-gemini-key"}, clear=False)
|
||||
def test_prefers_gemini_flash_when_openai_unavailable(self):
|
||||
@@ -147,8 +147,8 @@ class TestIntelligentFallback:
|
||||
|
||||
history, tokens = build_conversation_history(context, model_context=None)
|
||||
|
||||
# Verify that ModelContext was called with gpt-5.1 (the intelligent fallback based on new preference order)
|
||||
mock_context_class.assert_called_once_with("gpt-5.1")
|
||||
# Verify that ModelContext was called with gpt-5.2 (the intelligent fallback based on new preference order)
|
||||
mock_context_class.assert_called_once_with("gpt-5.2")
|
||||
|
||||
def test_auto_mode_with_gemini_only(self):
|
||||
"""Test auto mode behavior when only Gemini API key is available"""
|
||||
|
||||
@@ -50,7 +50,7 @@ class TestOpenAIProvider:
|
||||
assert provider.validate_model_name("o4-mini") is True
|
||||
assert provider.validate_model_name("gpt-5") is True
|
||||
assert provider.validate_model_name("gpt-5-mini") is True
|
||||
assert provider.validate_model_name("gpt-5.1") is True
|
||||
assert provider.validate_model_name("gpt-5.2") is True
|
||||
assert provider.validate_model_name("gpt-5.1-codex") is True
|
||||
assert provider.validate_model_name("gpt-5.1-codex-mini") is True
|
||||
|
||||
@@ -62,6 +62,7 @@ class TestOpenAIProvider:
|
||||
assert provider.validate_model_name("gpt5") is True
|
||||
assert provider.validate_model_name("gpt5-mini") is True
|
||||
assert provider.validate_model_name("gpt5mini") is True
|
||||
assert provider.validate_model_name("gpt5.2") is True
|
||||
assert provider.validate_model_name("gpt5.1") is True
|
||||
assert provider.validate_model_name("gpt5.1-codex") is True
|
||||
assert provider.validate_model_name("codex-mini") is True
|
||||
@@ -83,7 +84,8 @@ class TestOpenAIProvider:
|
||||
assert provider._resolve_model_name("gpt5") == "gpt-5"
|
||||
assert provider._resolve_model_name("gpt5-mini") == "gpt-5-mini"
|
||||
assert provider._resolve_model_name("gpt5mini") == "gpt-5-mini"
|
||||
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||
assert provider._resolve_model_name("gpt5.2") == "gpt-5.2"
|
||||
assert provider._resolve_model_name("gpt5.1") == "gpt-5.2"
|
||||
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||
|
||||
@@ -95,7 +97,8 @@ class TestOpenAIProvider:
|
||||
assert provider._resolve_model_name("o4-mini") == "o4-mini"
|
||||
assert provider._resolve_model_name("gpt-5") == "gpt-5"
|
||||
assert provider._resolve_model_name("gpt-5-mini") == "gpt-5-mini"
|
||||
assert provider._resolve_model_name("gpt-5.1") == "gpt-5.1"
|
||||
assert provider._resolve_model_name("gpt-5.2") == "gpt-5.2"
|
||||
assert provider._resolve_model_name("gpt-5.1") == "gpt-5.2"
|
||||
assert provider._resolve_model_name("gpt-5.1-codex") == "gpt-5.1-codex"
|
||||
assert provider._resolve_model_name("gpt-5.1-codex-mini") == "gpt-5.1-codex-mini"
|
||||
|
||||
@@ -158,12 +161,12 @@ class TestOpenAIProvider:
|
||||
assert capabilities.supports_function_calling is True
|
||||
assert capabilities.supports_temperature is True
|
||||
|
||||
def test_get_capabilities_gpt51(self):
|
||||
"""Test GPT-5.1 capabilities reflect new metadata."""
|
||||
def test_get_capabilities_gpt52(self):
|
||||
"""Test GPT-5.2 capabilities reflect new metadata."""
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
capabilities = provider.get_capabilities("gpt-5.1")
|
||||
assert capabilities.model_name == "gpt-5.1"
|
||||
capabilities = provider.get_capabilities("gpt-5.2")
|
||||
assert capabilities.model_name == "gpt-5.2"
|
||||
assert capabilities.supports_streaming is True
|
||||
assert capabilities.supports_function_calling is True
|
||||
assert capabilities.supports_json_mode is True
|
||||
|
||||
@@ -133,8 +133,8 @@ class TestModelSelection:
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
|
||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE)
|
||||
# OpenAI now prefers gpt-5.1 for fast response (based on our new preference order)
|
||||
assert model == "gpt-5.1"
|
||||
# OpenAI now prefers gpt-5.2 for fast response (based on our new preference order)
|
||||
assert model == "gpt-5.2"
|
||||
|
||||
def test_fast_response_with_gemini_only(self):
|
||||
"""Test FAST_RESPONSE prefers flash when only Gemini is available."""
|
||||
@@ -167,8 +167,8 @@ class TestModelSelection:
|
||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||
|
||||
model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED)
|
||||
# OpenAI prefers gpt-5.1 for balanced (based on our new preference order)
|
||||
assert model == "gpt-5.1"
|
||||
# OpenAI prefers gpt-5.2 for balanced (based on our new preference order)
|
||||
assert model == "gpt-5.2"
|
||||
|
||||
def test_no_category_uses_balanced_logic(self):
|
||||
"""Test that no category specified uses balanced logic."""
|
||||
@@ -209,7 +209,7 @@ class TestFlexibleModelSelection:
|
||||
"env": {"OPENAI_API_KEY": "test-key"},
|
||||
"provider_type": ProviderType.OPENAI,
|
||||
"category": ToolModelCategory.FAST_RESPONSE,
|
||||
"expected": "gpt-5.1", # Based on new preference order
|
||||
"expected": "gpt-5.2", # Based on new preference order
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -209,7 +209,7 @@ class TestOpenAIProvider:
|
||||
assert provider.validate_model_name("o4-mini")
|
||||
assert provider.validate_model_name("o4mini")
|
||||
assert provider.validate_model_name("o4-mini")
|
||||
assert provider.validate_model_name("gpt-5.1")
|
||||
assert provider.validate_model_name("gpt-5.2")
|
||||
assert provider.validate_model_name("gpt-5.1-codex")
|
||||
assert provider.validate_model_name("gpt-5.1-codex-mini")
|
||||
assert not provider.validate_model_name("gpt-4o")
|
||||
@@ -223,11 +223,11 @@ class TestOpenAIProvider:
|
||||
for alias in aliases:
|
||||
assert not provider.get_capabilities(alias).supports_extended_thinking
|
||||
|
||||
def test_gpt51_family_capabilities(self):
|
||||
"""Ensure GPT-5.1 family exposes correct capability flags."""
|
||||
def test_gpt52_family_capabilities(self):
|
||||
"""Ensure GPT-5.2 base model exposes correct capability flags."""
|
||||
provider = OpenAIModelProvider(api_key="test-key")
|
||||
|
||||
base = provider.get_capabilities("gpt-5.1")
|
||||
base = provider.get_capabilities("gpt-5.2")
|
||||
assert base.supports_streaming
|
||||
assert base.allow_code_generation
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ class TestSupportedModelsAliases:
|
||||
assert "o3mini" in provider.MODEL_CAPABILITIES["o3-mini"].aliases
|
||||
assert "o3pro" in provider.MODEL_CAPABILITIES["o3-pro"].aliases
|
||||
assert "gpt4.1" in provider.MODEL_CAPABILITIES["gpt-4.1"].aliases
|
||||
assert "gpt5.1" in provider.MODEL_CAPABILITIES["gpt-5.1"].aliases
|
||||
assert "gpt5.2" in provider.MODEL_CAPABILITIES["gpt-5.2"].aliases
|
||||
assert "gpt5.1-codex" in provider.MODEL_CAPABILITIES["gpt-5.1-codex"].aliases
|
||||
assert "codex-mini" in provider.MODEL_CAPABILITIES["gpt-5.1-codex-mini"].aliases
|
||||
|
||||
@@ -64,14 +64,15 @@ class TestSupportedModelsAliases:
|
||||
assert provider._resolve_model_name("o3pro") == "o3-pro" # o3pro resolves to o3-pro
|
||||
assert provider._resolve_model_name("o4mini") == "o4-mini"
|
||||
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1" # gpt4.1 resolves to gpt-4.1
|
||||
assert provider._resolve_model_name("gpt5.1") == "gpt-5.1"
|
||||
assert provider._resolve_model_name("gpt5.2") == "gpt-5.2"
|
||||
assert provider._resolve_model_name("gpt5.1") == "gpt-5.2"
|
||||
assert provider._resolve_model_name("gpt5.1-codex") == "gpt-5.1-codex"
|
||||
assert provider._resolve_model_name("codex-mini") == "gpt-5.1-codex-mini"
|
||||
|
||||
# Test case insensitive resolution
|
||||
assert provider._resolve_model_name("Mini") == "gpt-5-mini" # mini -> gpt-5-mini now
|
||||
assert provider._resolve_model_name("O3MINI") == "o3-mini"
|
||||
assert provider._resolve_model_name("Gpt5.1") == "gpt-5.1"
|
||||
assert provider._resolve_model_name("Gpt5.1") == "gpt-5.2"
|
||||
|
||||
def test_xai_provider_aliases(self):
|
||||
"""Test XAI provider's alias structure."""
|
||||
|
||||
Reference in New Issue
Block a user