Quick test mode for simulation tests

Fixed o4-mini name, OpenAI removed o4-mini-high
Add max_output_tokens property to ModelCapabilities
This commit is contained in:
Fahad
2025-06-23 18:33:47 +04:00
parent 8c1814d4eb
commit ce6c1fd7ea
35 changed files with 137 additions and 110 deletions

View File

@@ -409,7 +409,7 @@ for most debugging workflows, as Claude is usually able to confidently find the
When in doubt, you can always follow up with a new prompt and ask Claude to share its findings with another model: When in doubt, you can always follow up with a new prompt and ask Claude to share its findings with another model:
```text ```text
Use continuation with thinkdeep, share details with o4-mini-high to find out what the best fix is for this Use continuation with thinkdeep, share details with o4-mini to find out what the best fix is for this
``` ```
**[📖 Read More](docs/tools/debug.md)** - Step-by-step investigation methodology with workflow enforcement **[📖 Read More](docs/tools/debug.md)** - Step-by-step investigation methodology with workflow enforcement

View File

@@ -80,7 +80,12 @@ class CommunicationSimulator:
"""Simulates real-world Claude CLI communication with MCP Gemini server""" """Simulates real-world Claude CLI communication with MCP Gemini server"""
def __init__( def __init__(
self, verbose: bool = False, keep_logs: bool = False, selected_tests: list[str] = None, setup: bool = False, quick_mode: bool = False self,
verbose: bool = False,
keep_logs: bool = False,
selected_tests: list[str] = None,
setup: bool = False,
quick_mode: bool = False,
): ):
self.verbose = verbose self.verbose = verbose
self.keep_logs = keep_logs self.keep_logs = keep_logs
@@ -109,7 +114,7 @@ class CommunicationSimulator:
"content_validation", # Content validation and deduplication "content_validation", # Content validation and deduplication
"model_thinking_config", # Flash/flashlite model testing "model_thinking_config", # Flash/flashlite model testing
"o3_model_selection", # O3 model selection testing "o3_model_selection", # O3 model selection testing
"per_tool_deduplication" # File deduplication for individual tools "per_tool_deduplication", # File deduplication for individual tools
] ]
# If quick mode is enabled, override selected_tests # If quick mode is enabled, override selected_tests
@@ -444,7 +449,9 @@ def parse_arguments():
parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)") parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)")
parser.add_argument("--list-tests", action="store_true", help="List available tests and exit") parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
parser.add_argument("--individual", "-i", help="Run a single test individually") parser.add_argument("--individual", "-i", help="Run a single test individually")
parser.add_argument("--quick", "-q", action="store_true", help="Run quick test mode (6 essential tests for time-limited testing)") parser.add_argument(
"--quick", "-q", action="store_true", help="Run quick test mode (6 essential tests for time-limited testing)"
)
parser.add_argument( parser.add_argument(
"--setup", action="store_true", help="Force setup standalone server environment using run-server.sh" "--setup", action="store_true", help="Force setup standalone server environment using run-server.sh"
) )
@@ -522,7 +529,11 @@ def main():
# Initialize simulator consistently for all use cases # Initialize simulator consistently for all use cases
simulator = CommunicationSimulator( simulator = CommunicationSimulator(
verbose=args.verbose, keep_logs=args.keep_logs, selected_tests=args.tests, setup=args.setup, quick_mode=args.quick verbose=args.verbose,
keep_logs=args.keep_logs,
selected_tests=args.tests,
setup=args.setup,
quick_mode=args.quick,
) )
# Determine execution mode and run # Determine execution mode and run

View File

@@ -22,6 +22,7 @@
"model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')", "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')",
"aliases": "Array of short names users can type instead of the full model name", "aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)", "context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output", "supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling", "supports_function_calling": "Whether the model supports function/tool calling",
@@ -36,6 +37,7 @@
"model_name": "my-local-model", "model_name": "my-local-model",
"aliases": ["shortname", "nickname", "abbrev"], "aliases": ["shortname", "nickname", "abbrev"],
"context_window": 128000, "context_window": 128000,
"max_output_tokens": 32768,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": true, "supports_function_calling": true,
@@ -52,6 +54,7 @@
"model_name": "anthropic/claude-opus-4", "model_name": "anthropic/claude-opus-4",
"aliases": ["opus", "claude-opus", "claude4-opus", "claude-4-opus"], "aliases": ["opus", "claude-opus", "claude4-opus", "claude-4-opus"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 64000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": false, "supports_json_mode": false,
"supports_function_calling": false, "supports_function_calling": false,
@@ -63,6 +66,7 @@
"model_name": "anthropic/claude-sonnet-4", "model_name": "anthropic/claude-sonnet-4",
"aliases": ["sonnet", "claude-sonnet", "claude4-sonnet", "claude-4-sonnet", "claude"], "aliases": ["sonnet", "claude-sonnet", "claude4-sonnet", "claude-4-sonnet", "claude"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 64000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": false, "supports_json_mode": false,
"supports_function_calling": false, "supports_function_calling": false,
@@ -74,6 +78,7 @@
"model_name": "anthropic/claude-3.5-haiku", "model_name": "anthropic/claude-3.5-haiku",
"aliases": ["haiku", "claude-haiku", "claude3-haiku", "claude-3-haiku"], "aliases": ["haiku", "claude-haiku", "claude3-haiku", "claude-3-haiku"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 64000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": false, "supports_json_mode": false,
"supports_function_calling": false, "supports_function_calling": false,
@@ -85,6 +90,7 @@
"model_name": "google/gemini-2.5-pro", "model_name": "google/gemini-2.5-pro",
"aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"], "aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"],
"context_window": 1048576, "context_window": 1048576,
"max_output_tokens": 65536,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": false, "supports_function_calling": false,
@@ -96,6 +102,7 @@
"model_name": "google/gemini-2.5-flash", "model_name": "google/gemini-2.5-flash",
"aliases": ["flash","gemini-flash", "flash-openrouter", "flash-2.5"], "aliases": ["flash","gemini-flash", "flash-openrouter", "flash-2.5"],
"context_window": 1048576, "context_window": 1048576,
"max_output_tokens": 65536,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": false, "supports_function_calling": false,
@@ -107,6 +114,7 @@
"model_name": "mistralai/mistral-large-2411", "model_name": "mistralai/mistral-large-2411",
"aliases": ["mistral-large", "mistral"], "aliases": ["mistral-large", "mistral"],
"context_window": 128000, "context_window": 128000,
"max_output_tokens": 32000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": true, "supports_function_calling": true,
@@ -118,6 +126,7 @@
"model_name": "meta-llama/llama-3-70b", "model_name": "meta-llama/llama-3-70b",
"aliases": ["llama", "llama3", "llama3-70b", "llama-70b", "llama3-openrouter"], "aliases": ["llama", "llama3", "llama3-70b", "llama-70b", "llama3-openrouter"],
"context_window": 8192, "context_window": 8192,
"max_output_tokens": 8192,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": false, "supports_json_mode": false,
"supports_function_calling": false, "supports_function_calling": false,
@@ -129,6 +138,7 @@
"model_name": "deepseek/deepseek-r1-0528", "model_name": "deepseek/deepseek-r1-0528",
"aliases": ["deepseek-r1", "deepseek", "r1", "deepseek-thinking"], "aliases": ["deepseek-r1", "deepseek", "r1", "deepseek-thinking"],
"context_window": 65536, "context_window": 65536,
"max_output_tokens": 32768,
"supports_extended_thinking": true, "supports_extended_thinking": true,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": false, "supports_function_calling": false,
@@ -140,6 +150,7 @@
"model_name": "perplexity/llama-3-sonar-large-32k-online", "model_name": "perplexity/llama-3-sonar-large-32k-online",
"aliases": ["perplexity", "sonar", "perplexity-online"], "aliases": ["perplexity", "sonar", "perplexity-online"],
"context_window": 32768, "context_window": 32768,
"max_output_tokens": 32768,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": false, "supports_json_mode": false,
"supports_function_calling": false, "supports_function_calling": false,
@@ -151,6 +162,7 @@
"model_name": "openai/o3", "model_name": "openai/o3",
"aliases": ["o3"], "aliases": ["o3"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 100000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": true, "supports_function_calling": true,
@@ -164,6 +176,7 @@
"model_name": "openai/o3-mini", "model_name": "openai/o3-mini",
"aliases": ["o3-mini", "o3mini"], "aliases": ["o3-mini", "o3mini"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 100000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": true, "supports_function_calling": true,
@@ -177,6 +190,7 @@
"model_name": "openai/o3-mini-high", "model_name": "openai/o3-mini-high",
"aliases": ["o3-mini-high", "o3mini-high"], "aliases": ["o3-mini-high", "o3mini-high"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 100000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": true, "supports_function_calling": true,
@@ -190,6 +204,7 @@
"model_name": "openai/o3-pro", "model_name": "openai/o3-pro",
"aliases": ["o3-pro", "o3pro"], "aliases": ["o3-pro", "o3pro"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 100000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": true, "supports_function_calling": true,
@@ -203,6 +218,7 @@
"model_name": "openai/o4-mini", "model_name": "openai/o4-mini",
"aliases": ["o4-mini", "o4mini"], "aliases": ["o4-mini", "o4mini"],
"context_window": 200000, "context_window": 200000,
"max_output_tokens": 100000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": true, "supports_json_mode": true,
"supports_function_calling": true, "supports_function_calling": true,
@@ -212,23 +228,11 @@
"temperature_constraint": "fixed", "temperature_constraint": "fixed",
"description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision" "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision"
}, },
{
"model_name": "openai/o4-mini-high",
"aliases": ["o4-mini-high", "o4mini-high", "o4minihigh", "o4minihi"],
"context_window": 200000,
"supports_extended_thinking": false,
"supports_json_mode": true,
"supports_function_calling": true,
"supports_images": true,
"max_image_size_mb": 20.0,
"supports_temperature": false,
"temperature_constraint": "fixed",
"description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks with vision"
},
{ {
"model_name": "llama3.2", "model_name": "llama3.2",
"aliases": ["local-llama", "local", "llama3.2", "ollama-llama"], "aliases": ["local-llama", "local", "llama3.2", "ollama-llama"],
"context_window": 128000, "context_window": 128000,
"max_output_tokens": 64000,
"supports_extended_thinking": false, "supports_extended_thinking": false,
"supports_json_mode": false, "supports_json_mode": false,
"supports_function_calling": false, "supports_function_calling": false,

View File

@@ -38,7 +38,6 @@ Regardless of your default configuration, you can specify models per request:
| **`o3`** | OpenAI | 200K tokens | Strong logical reasoning | Debugging logic errors, systematic analysis | | **`o3`** | OpenAI | 200K tokens | Strong logical reasoning | Debugging logic errors, systematic analysis |
| **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks | | **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks |
| **`o4-mini`** | OpenAI | 200K tokens | Latest reasoning model | Optimized for shorter contexts | | **`o4-mini`** | OpenAI | 200K tokens | Latest reasoning model | Optimized for shorter contexts |
| **`o4-mini-high`** | OpenAI | 200K tokens | Enhanced reasoning | Complex tasks requiring deeper analysis |
| **`gpt4.1`** | OpenAI | 1M tokens | Latest GPT-4 with extended context | Large codebase analysis, comprehensive reviews | | **`gpt4.1`** | OpenAI | 1M tokens | Latest GPT-4 with extended context | Large codebase analysis, comprehensive reviews |
| **`llama`** (Llama 3.2) | Custom/Local | 128K tokens | Local inference, privacy | On-device analysis, cost-free processing | | **`llama`** (Llama 3.2) | Custom/Local | 128K tokens | Local inference, privacy | On-device analysis, cost-free processing |
| **Any model** | OpenRouter | Varies | Access to GPT-4, Claude, Llama, etc. | User-specified or based on task requirements | | **Any model** | OpenRouter | Varies | Access to GPT-4, Claude, Llama, etc. | User-specified or based on task requirements |
@@ -69,7 +68,7 @@ OPENAI_ALLOWED_MODELS=o4-mini
# High-performance: Quality over cost # High-performance: Quality over cost
GOOGLE_ALLOWED_MODELS=pro GOOGLE_ALLOWED_MODELS=pro
OPENAI_ALLOWED_MODELS=o3,o4-mini-high OPENAI_ALLOWED_MODELS=o3,o4-mini
``` ```
**Important Notes:** **Important Notes:**
@@ -144,7 +143,7 @@ All tools that work with files support **both individual files and entire direct
**`analyze`** - Analyze files or directories **`analyze`** - Analyze files or directories
- `files`: List of file paths or directories (required) - `files`: List of file paths or directories (required)
- `question`: What to analyze (required) - `question`: What to analyze (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `analysis_type`: architecture|performance|security|quality|general - `analysis_type`: architecture|performance|security|quality|general
- `output_format`: summary|detailed|actionable - `output_format`: summary|detailed|actionable
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
@@ -159,7 +158,7 @@ All tools that work with files support **both individual files and entire direct
**`codereview`** - Review code files or directories **`codereview`** - Review code files or directories
- `files`: List of file paths or directories (required) - `files`: List of file paths or directories (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `review_type`: full|security|performance|quick - `review_type`: full|security|performance|quick
- `focus_on`: Specific aspects to focus on - `focus_on`: Specific aspects to focus on
- `standards`: Coding standards to enforce - `standards`: Coding standards to enforce
@@ -175,7 +174,7 @@ All tools that work with files support **both individual files and entire direct
**`debug`** - Debug with file context **`debug`** - Debug with file context
- `error_description`: Description of the issue (required) - `error_description`: Description of the issue (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `error_context`: Stack trace or logs - `error_context`: Stack trace or logs
- `files`: Files or directories related to the issue - `files`: Files or directories related to the issue
- `runtime_info`: Environment details - `runtime_info`: Environment details
@@ -191,7 +190,7 @@ All tools that work with files support **both individual files and entire direct
**`thinkdeep`** - Extended analysis with file context **`thinkdeep`** - Extended analysis with file context
- `current_analysis`: Your current thinking (required) - `current_analysis`: Your current thinking (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `problem_context`: Additional context - `problem_context`: Additional context
- `focus_areas`: Specific aspects to focus on - `focus_areas`: Specific aspects to focus on
- `files`: Files or directories for context - `files`: Files or directories for context
@@ -207,7 +206,7 @@ All tools that work with files support **both individual files and entire direct
**`testgen`** - Comprehensive test generation with edge case coverage **`testgen`** - Comprehensive test generation with edge case coverage
- `files`: Code files or directories to generate tests for (required) - `files`: Code files or directories to generate tests for (required)
- `prompt`: Description of what to test, testing objectives, and scope (required) - `prompt`: Description of what to test, testing objectives, and scope (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `test_examples`: Optional existing test files as style/pattern reference - `test_examples`: Optional existing test files as style/pattern reference
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
@@ -222,7 +221,7 @@ All tools that work with files support **both individual files and entire direct
- `files`: Code files or directories to analyze for refactoring opportunities (required) - `files`: Code files or directories to analyze for refactoring opportunities (required)
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required) - `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
- `refactor_type`: codesmells|decompose|modernize|organization (required) - `refactor_type`: codesmells|decompose|modernize|organization (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security') - `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
- `style_guide_examples`: Optional existing code files to use as style/pattern reference - `style_guide_examples`: Optional existing code files to use as style/pattern reference
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)

View File

@@ -63,7 +63,7 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model
**Default Model Selection:** **Default Model Selection:**
```env ```env
# Options: 'auto', 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high', etc. # Options: 'auto', 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', etc.
DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
``` ```
@@ -74,7 +74,6 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
- **`o3`**: Strong logical reasoning (200K context) - **`o3`**: Strong logical reasoning (200K context)
- **`o3-mini`**: Balanced speed/quality (200K context) - **`o3-mini`**: Balanced speed/quality (200K context)
- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts - **`o4-mini`**: Latest reasoning model, optimized for shorter contexts
- **`o4-mini-high`**: Enhanced O4 with higher reasoning effort
- **`grok`**: GROK-3 advanced reasoning (131K context) - **`grok`**: GROK-3 advanced reasoning (131K context)
- **Custom models**: via OpenRouter or local APIs - **Custom models**: via OpenRouter or local APIs
@@ -120,7 +119,6 @@ OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
- `o3` (200K context, high reasoning) - `o3` (200K context, high reasoning)
- `o3-mini` (200K context, balanced) - `o3-mini` (200K context, balanced)
- `o4-mini` (200K context, latest balanced) - `o4-mini` (200K context, latest balanced)
- `o4-mini-high` (200K context, enhanced reasoning)
- `mini` (shorthand for o4-mini) - `mini` (shorthand for o4-mini)
**Gemini Models:** **Gemini Models:**

View File

@@ -65,7 +65,7 @@ This workflow ensures methodical analysis before expert insights, resulting in d
**Initial Configuration (used in step 1):** **Initial Configuration (used in step 1):**
- `prompt`: What to analyze or look for (required) - `prompt`: What to analyze or look for (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `analysis_type`: architecture|performance|security|quality|general (default: general) - `analysis_type`: architecture|performance|security|quality|general (default: general)
- `output_format`: summary|detailed|actionable (default: detailed) - `output_format`: summary|detailed|actionable (default: detailed)
- `temperature`: Temperature for analysis (0-1, default 0.2) - `temperature`: Temperature for analysis (0-1, default 0.2)

View File

@@ -33,7 +33,7 @@ and then debate with the other models to give me a final verdict
## Tool Parameters ## Tool Parameters
- `prompt`: Your question or discussion topic (required) - `prompt`: Your question or discussion topic (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `files`: Optional files for context (absolute paths) - `files`: Optional files for context (absolute paths)
- `images`: Optional images for visual context (absolute paths) - `images`: Optional images for visual context (absolute paths)
- `temperature`: Response creativity (0-1, default 0.5) - `temperature`: Response creativity (0-1, default 0.5)

View File

@@ -80,7 +80,7 @@ The above prompt will simultaneously run two separate `codereview` tools with tw
**Initial Review Configuration (used in step 1):** **Initial Review Configuration (used in step 1):**
- `prompt`: User's summary of what the code does, expected behavior, constraints, and review objectives (required) - `prompt`: User's summary of what the code does, expected behavior, constraints, and review objectives (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `review_type`: full|security|performance|quick (default: full) - `review_type`: full|security|performance|quick (default: full)
- `focus_on`: Specific aspects to focus on (e.g., "security vulnerabilities", "performance bottlenecks") - `focus_on`: Specific aspects to focus on (e.g., "security vulnerabilities", "performance bottlenecks")
- `standards`: Coding standards to enforce (e.g., "PEP8", "ESLint", "Google Style Guide") - `standards`: Coding standards to enforce (e.g., "PEP8", "ESLint", "Google Style Guide")

View File

@@ -73,7 +73,7 @@ This structured approach ensures Claude performs methodical groundwork before ex
- `images`: Visual debugging materials (error screenshots, logs, etc.) - `images`: Visual debugging materials (error screenshots, logs, etc.)
**Model Selection:** **Model Selection:**
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini (default: server default)
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
- `use_websearch`: Enable web search for documentation and solutions (default: true) - `use_websearch`: Enable web search for documentation and solutions (default: true)
- `use_assistant_model`: Whether to use expert analysis phase (default: true, set to false to use Claude only) - `use_assistant_model`: Whether to use expert analysis phase (default: true, set to false to use Claude only)

View File

@@ -135,7 +135,7 @@ Use zen and perform a thorough precommit ensuring there aren't any new regressio
**Initial Configuration (used in step 1):** **Initial Configuration (used in step 1):**
- `path`: Starting directory to search for repos (default: current directory, absolute path required) - `path`: Starting directory to search for repos (default: current directory, absolute path required)
- `prompt`: The original user request description for the changes (required for context) - `prompt`: The original user request description for the changes (required for context)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `compare_to`: Compare against a branch/tag instead of local changes (optional) - `compare_to`: Compare against a branch/tag instead of local changes (optional)
- `severity_filter`: critical|high|medium|low|all (default: all) - `severity_filter`: critical|high|medium|low|all (default: all)
- `include_staged`: Include staged changes in the review (default: true) - `include_staged`: Include staged changes in the review (default: true)

View File

@@ -103,7 +103,7 @@ This results in Claude first performing its own expert analysis, encouraging it
**Initial Configuration (used in step 1):** **Initial Configuration (used in step 1):**
- `prompt`: Description of refactoring goals, context, and specific areas of focus (required) - `prompt`: Description of refactoring goals, context, and specific areas of focus (required)
- `refactor_type`: codesmells|decompose|modernize|organization (default: codesmells) - `refactor_type`: codesmells|decompose|modernize|organization (default: codesmells)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security') - `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')
- `style_guide_examples`: Optional existing code files to use as style/pattern reference (absolute paths) - `style_guide_examples`: Optional existing code files to use as style/pattern reference (absolute paths)
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)

View File

@@ -86,7 +86,7 @@ security remediation plan using planner
- `images`: Architecture diagrams, security documentation, or visual references - `images`: Architecture diagrams, security documentation, or visual references
**Initial Security Configuration (used in step 1):** **Initial Security Configuration (used in step 1):**
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `security_scope`: Application context, technology stack, and security boundary definition (required) - `security_scope`: Application context, technology stack, and security boundary definition (required)
- `threat_level`: low|medium|high|critical (default: medium) - determines assessment depth and urgency - `threat_level`: low|medium|high|critical (default: medium) - determines assessment depth and urgency
- `compliance_requirements`: List of compliance frameworks to assess against (e.g., ["PCI DSS", "SOC2"]) - `compliance_requirements`: List of compliance frameworks to assess against (e.g., ["PCI DSS", "SOC2"])

View File

@@ -70,7 +70,7 @@ Test generation excels with extended reasoning models like Gemini Pro or O3, whi
**Initial Configuration (used in step 1):** **Initial Configuration (used in step 1):**
- `prompt`: Description of what to test, testing objectives, and specific scope/focus areas (required) - `prompt`: Description of what to test, testing objectives, and specific scope/focus areas (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `test_examples`: Optional existing test files or directories to use as style/pattern reference (absolute paths) - `test_examples`: Optional existing test files or directories to use as style/pattern reference (absolute paths)
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
- `use_assistant_model`: Whether to use expert test generation phase (default: true, set to false to use Claude only) - `use_assistant_model`: Whether to use expert test generation phase (default: true, set to false to use Claude only)

View File

@@ -30,7 +30,7 @@ with the best architecture for my project
## Tool Parameters ## Tool Parameters
- `prompt`: Your current thinking/analysis to extend and validate (required) - `prompt`: Your current thinking/analysis to extend and validate (required)
- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) - `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default)
- `problem_context`: Additional context about the problem or goal - `problem_context`: Additional context about the problem or goal
- `focus_areas`: Specific aspects to focus on (architecture, performance, security, etc.) - `focus_areas`: Specific aspects to focus on (architecture, performance, security, etc.)
- `files`: Optional file paths or directories for additional context (absolute paths) - `files`: Optional file paths or directories for additional context (absolute paths)

View File

@@ -132,6 +132,7 @@ class ModelCapabilities:
model_name: str model_name: str
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI" friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
context_window: int # Total context window size in tokens context_window: int # Total context window size in tokens
max_output_tokens: int # Maximum output tokens per request
supports_extended_thinking: bool = False supports_extended_thinking: bool = False
supports_system_prompts: bool = True supports_system_prompts: bool = True
supports_streaming: bool = True supports_streaming: bool = True

View File

@@ -158,6 +158,7 @@ class CustomProvider(OpenAICompatibleProvider):
model_name=resolved_name, model_name=resolved_name,
friendly_name=f"{self.FRIENDLY_NAME} ({resolved_name})", friendly_name=f"{self.FRIENDLY_NAME} ({resolved_name})",
context_window=32_768, # Conservative default context_window=32_768, # Conservative default
max_output_tokens=32_768, # Conservative default max output
supports_extended_thinking=False, # Most custom models don't support this supports_extended_thinking=False, # Most custom models don't support this
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -187,7 +188,7 @@ class CustomProvider(OpenAICompatibleProvider):
Returns: Returns:
True if model is intended for custom/local endpoint True if model is intended for custom/local endpoint
""" """
logging.debug(f"Custom provider validating model: '{model_name}'") # logging.debug(f"Custom provider validating model: '{model_name}'")
# Try to resolve through registry first # Try to resolve through registry first
config = self._registry.resolve(model_name) config = self._registry.resolve(model_name)
@@ -195,12 +196,12 @@ class CustomProvider(OpenAICompatibleProvider):
model_id = config.model_name model_id = config.model_name
# Use explicit is_custom flag for clean validation # Use explicit is_custom flag for clean validation
if config.is_custom: if config.is_custom:
logging.debug(f"Model '{model_name}' -> '{model_id}' validated via registry (custom model)") logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' validated via registry")
return True return True
else: else:
# This is a cloud/OpenRouter model - CustomProvider should NOT handle these # This is a cloud/OpenRouter model - CustomProvider should NOT handle these
# Let OpenRouter provider handle them instead # Let OpenRouter provider handle them instead
logging.debug(f"Model '{model_name}' -> '{model_id}' rejected (cloud model, defer to OpenRouter)") # logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' not custom (defer to OpenRouter)")
return False return False
# Handle version tags for unknown models (e.g., "my-model:latest") # Handle version tags for unknown models (e.g., "my-model:latest")

View File

@@ -37,6 +37,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="o3-2025-04-16", model_name="o3-2025-04-16",
friendly_name="DIAL (O3)", friendly_name="DIAL (O3)",
context_window=200_000, context_window=200_000,
max_output_tokens=100_000,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -54,6 +55,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="o4-mini-2025-04-16", model_name="o4-mini-2025-04-16",
friendly_name="DIAL (O4-mini)", friendly_name="DIAL (O4-mini)",
context_window=200_000, context_window=200_000,
max_output_tokens=100_000,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -71,6 +73,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="anthropic.claude-sonnet-4-20250514-v1:0", model_name="anthropic.claude-sonnet-4-20250514-v1:0",
friendly_name="DIAL (Sonnet 4)", friendly_name="DIAL (Sonnet 4)",
context_window=200_000, context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -88,6 +91,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="anthropic.claude-sonnet-4-20250514-v1:0-with-thinking", model_name="anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
friendly_name="DIAL (Sonnet 4 Thinking)", friendly_name="DIAL (Sonnet 4 Thinking)",
context_window=200_000, context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=True, # Thinking mode variant supports_extended_thinking=True, # Thinking mode variant
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -105,6 +109,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="anthropic.claude-opus-4-20250514-v1:0", model_name="anthropic.claude-opus-4-20250514-v1:0",
friendly_name="DIAL (Opus 4)", friendly_name="DIAL (Opus 4)",
context_window=200_000, context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -122,6 +127,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="anthropic.claude-opus-4-20250514-v1:0-with-thinking", model_name="anthropic.claude-opus-4-20250514-v1:0-with-thinking",
friendly_name="DIAL (Opus 4 Thinking)", friendly_name="DIAL (Opus 4 Thinking)",
context_window=200_000, context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=True, # Thinking mode variant supports_extended_thinking=True, # Thinking mode variant
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -139,6 +145,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="gemini-2.5-pro-preview-03-25-google-search", model_name="gemini-2.5-pro-preview-03-25-google-search",
friendly_name="DIAL (Gemini 2.5 Pro Search)", friendly_name="DIAL (Gemini 2.5 Pro Search)",
context_window=1_000_000, context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False, # DIAL doesn't expose thinking mode supports_extended_thinking=False, # DIAL doesn't expose thinking mode
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -156,6 +163,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="gemini-2.5-pro-preview-05-06", model_name="gemini-2.5-pro-preview-05-06",
friendly_name="DIAL (Gemini 2.5 Pro)", friendly_name="DIAL (Gemini 2.5 Pro)",
context_window=1_000_000, context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -173,6 +181,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
model_name="gemini-2.5-flash-preview-05-20", model_name="gemini-2.5-flash-preview-05-20",
friendly_name="DIAL (Gemini Flash 2.5)", friendly_name="DIAL (Gemini Flash 2.5)",
context_window=1_000_000, context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -24,6 +24,7 @@ class GeminiModelProvider(ModelProvider):
model_name="gemini-2.0-flash", model_name="gemini-2.0-flash",
friendly_name="Gemini (Flash 2.0)", friendly_name="Gemini (Flash 2.0)",
context_window=1_048_576, # 1M tokens context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True, # Experimental thinking mode supports_extended_thinking=True, # Experimental thinking mode
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -42,6 +43,7 @@ class GeminiModelProvider(ModelProvider):
model_name="gemini-2.0-flash-lite", model_name="gemini-2.0-flash-lite",
friendly_name="Gemin (Flash Lite 2.0)", friendly_name="Gemin (Flash Lite 2.0)",
context_window=1_048_576, # 1M tokens context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=False, # Not supported per user request supports_extended_thinking=False, # Not supported per user request
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -59,6 +61,7 @@ class GeminiModelProvider(ModelProvider):
model_name="gemini-2.5-flash", model_name="gemini-2.5-flash",
friendly_name="Gemini (Flash 2.5)", friendly_name="Gemini (Flash 2.5)",
context_window=1_048_576, # 1M tokens context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True, supports_extended_thinking=True,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -77,6 +80,7 @@ class GeminiModelProvider(ModelProvider):
model_name="gemini-2.5-pro", model_name="gemini-2.5-pro",
friendly_name="Gemini (Pro 2.5)", friendly_name="Gemini (Pro 2.5)",
context_window=1_048_576, # 1M tokens context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True, supports_extended_thinking=True,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -687,7 +687,6 @@ class OpenAICompatibleProvider(ModelProvider):
"o3-mini", "o3-mini",
"o3-pro", "o3-pro",
"o4-mini", "o4-mini",
"o4-mini-high",
# Note: Claude models would be handled by a separate provider # Note: Claude models would be handled by a separate provider
} }
supports = model_name.lower() in vision_models supports = model_name.lower() in vision_models

View File

@@ -24,6 +24,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="o3", model_name="o3",
friendly_name="OpenAI (O3)", friendly_name="OpenAI (O3)",
context_window=200_000, # 200K tokens context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -41,6 +42,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="o3-mini", model_name="o3-mini",
friendly_name="OpenAI (O3-mini)", friendly_name="OpenAI (O3-mini)",
context_window=200_000, # 200K tokens context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -58,6 +60,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="o3-pro-2025-06-10", model_name="o3-pro-2025-06-10",
friendly_name="OpenAI (O3-Pro)", friendly_name="OpenAI (O3-Pro)",
context_window=200_000, # 200K tokens context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -75,6 +78,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="o4-mini", model_name="o4-mini",
friendly_name="OpenAI (O4-mini)", friendly_name="OpenAI (O4-mini)",
context_window=200_000, # 200K tokens context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -85,30 +89,14 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
supports_temperature=False, # O4 models don't accept temperature parameter supports_temperature=False, # O4 models don't accept temperature parameter
temperature_constraint=create_temperature_constraint("fixed"), temperature_constraint=create_temperature_constraint("fixed"),
description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning", description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
aliases=["mini", "o4mini"], aliases=["mini", "o4mini", "o4-mini"],
),
"o4-mini-high": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o4-mini-high",
friendly_name="OpenAI (O4-mini-high)",
context_window=200_000, # 200K tokens
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # O4 models support vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=False, # O4 models don't accept temperature parameter
temperature_constraint=create_temperature_constraint("fixed"),
description="Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks",
aliases=["o4minihigh", "o4minihi", "mini-high"],
), ),
"gpt-4.1-2025-04-14": ModelCapabilities( "gpt-4.1-2025-04-14": ModelCapabilities(
provider=ProviderType.OPENAI, provider=ProviderType.OPENAI,
model_name="gpt-4.1-2025-04-14", model_name="gpt-4.1-2025-04-14",
friendly_name="OpenAI (GPT 4.1)", friendly_name="OpenAI (GPT 4.1)",
context_window=1_000_000, # 1M tokens context_window=1_000_000, # 1M tokens
max_output_tokens=32_768,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -101,6 +101,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
model_name=resolved_name, model_name=resolved_name,
friendly_name=self.FRIENDLY_NAME, friendly_name=self.FRIENDLY_NAME,
context_window=32_768, # Conservative default context window context_window=32_768, # Conservative default context window
max_output_tokens=32_768,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -24,8 +24,6 @@ class ModelProviderRegistry:
cls._instance._providers = {} cls._instance._providers = {}
cls._instance._initialized_providers = {} cls._instance._initialized_providers = {}
logging.debug(f"REGISTRY: Created instance {cls._instance}") logging.debug(f"REGISTRY: Created instance {cls._instance}")
else:
logging.debug(f"REGISTRY: Returning existing instance {cls._instance}")
return cls._instance return cls._instance
@classmethod @classmethod

View File

@@ -26,6 +26,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
model_name="grok-3", model_name="grok-3",
friendly_name="X.AI (Grok 3)", friendly_name="X.AI (Grok 3)",
context_window=131_072, # 131K tokens context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
@@ -43,6 +44,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
model_name="grok-3-fast", model_name="grok-3-fast",
friendly_name="X.AI (Grok 3 Fast)", friendly_name="X.AI (Grok 3 Fast)",
context_window=131_072, # 131K tokens context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -15,6 +15,7 @@ def create_mock_provider(model_name="gemini-2.5-flash", context_window=1_048_576
model_name=model_name, model_name=model_name,
friendly_name="Gemini", friendly_name="Gemini",
context_window=context_window, context_window=context_window,
max_output_tokens=8192,
supports_extended_thinking=False, supports_extended_thinking=False,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -211,7 +211,7 @@ class TestAliasTargetRestrictions:
# Verify the polymorphic method was called # Verify the polymorphic method was called
mock_provider.list_all_known_models.assert_called_once() mock_provider.list_all_known_models.assert_called_once()
@patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini-high"}) # Restrict to specific model @patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini"}) # Restrict to specific model
def test_complex_alias_chains_handled_correctly(self): def test_complex_alias_chains_handled_correctly(self):
"""Test that complex alias chains are handled correctly in restrictions.""" """Test that complex alias chains are handled correctly in restrictions."""
# Clear cached restriction service # Clear cached restriction service
@@ -221,12 +221,11 @@ class TestAliasTargetRestrictions:
provider = OpenAIModelProvider(api_key="test-key") provider = OpenAIModelProvider(api_key="test-key")
# Only o4-mini-high should be allowed # Only o4-mini should be allowed
assert provider.validate_model_name("o4-mini-high") assert provider.validate_model_name("o4-mini")
# Other models should be blocked # Other models should be blocked
assert not provider.validate_model_name("o4-mini") assert not provider.validate_model_name("o3")
assert not provider.validate_model_name("mini") # This resolves to o4-mini
assert not provider.validate_model_name("o3-mini") assert not provider.validate_model_name("o3-mini")
def test_critical_regression_validation_sees_alias_targets(self): def test_critical_regression_validation_sees_alias_targets(self):
@@ -307,7 +306,7 @@ class TestAliasTargetRestrictions:
it appear that target-based restrictions don't work. it appear that target-based restrictions don't work.
""" """
# Test with a made-up restriction scenario # Test with a made-up restriction scenario
with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini-high,o3-mini"}): with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini,o3-mini"}):
# Clear cached restriction service # Clear cached restriction service
import utils.model_restrictions import utils.model_restrictions
@@ -318,7 +317,7 @@ class TestAliasTargetRestrictions:
# These specific target models should be recognized as valid # These specific target models should be recognized as valid
all_known = provider.list_all_known_models() all_known = provider.list_all_known_models()
assert "o4-mini-high" in all_known, "Target model o4-mini-high should be known" assert "o4-mini" in all_known, "Target model o4-mini should be known"
assert "o3-mini" in all_known, "Target model o3-mini should be known" assert "o3-mini" in all_known, "Target model o3-mini should be known"
# Validation should not warn about these being unrecognized # Validation should not warn about these being unrecognized
@@ -329,11 +328,11 @@ class TestAliasTargetRestrictions:
# Should not warn about our allowed models being unrecognized # Should not warn about our allowed models being unrecognized
all_warnings = [str(call) for call in mock_logger.warning.call_args_list] all_warnings = [str(call) for call in mock_logger.warning.call_args_list]
for warning in all_warnings: for warning in all_warnings:
assert "o4-mini-high" not in warning or "not a recognized" not in warning assert "o4-mini" not in warning or "not a recognized" not in warning
assert "o3-mini" not in warning or "not a recognized" not in warning assert "o3-mini" not in warning or "not a recognized" not in warning
# The restriction should actually work # The restriction should actually work
assert provider.validate_model_name("o4-mini-high") assert provider.validate_model_name("o4-mini")
assert provider.validate_model_name("o3-mini") assert provider.validate_model_name("o3-mini")
assert not provider.validate_model_name("o4-mini") # not in allowed list assert not provider.validate_model_name("o3-pro") # not in allowed list
assert not provider.validate_model_name("o3") # not in allowed list assert not provider.validate_model_name("o3") # not in allowed list

View File

@@ -64,7 +64,7 @@ class TestAutoMode:
models_with_descriptions[model_name] = description models_with_descriptions[model_name] = description
# Check all expected models are present with meaningful descriptions # Check all expected models are present with meaningful descriptions
expected_models = ["flash", "pro", "o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"] expected_models = ["flash", "pro", "o3", "o3-mini", "o3-pro", "o4-mini"]
for model in expected_models: for model in expected_models:
# Model should exist somewhere in the providers # Model should exist somewhere in the providers
# Note: Some models might not be available if API keys aren't configured # Note: Some models might not be available if API keys aren't configured

View File

@@ -118,7 +118,7 @@ class TestBuggyBehaviorPrevention:
provider = OpenAIModelProvider(api_key="test-key") provider = OpenAIModelProvider(api_key="test-key")
# Simulate a scenario where admin wants to restrict specific targets # Simulate a scenario where admin wants to restrict specific targets
with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini,o4-mini-high"}): with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini,o4-mini"}):
# Clear cached restriction service # Clear cached restriction service
import utils.model_restrictions import utils.model_restrictions
@@ -126,19 +126,21 @@ class TestBuggyBehaviorPrevention:
# These should work because they're explicitly allowed # These should work because they're explicitly allowed
assert provider.validate_model_name("o3-mini") assert provider.validate_model_name("o3-mini")
assert provider.validate_model_name("o4-mini-high") assert provider.validate_model_name("o4-mini")
# These should be blocked # These should be blocked
assert not provider.validate_model_name("o4-mini") # Not in allowed list assert not provider.validate_model_name("o3-pro") # Not in allowed list
assert not provider.validate_model_name("o3") # Not in allowed list assert not provider.validate_model_name("o3") # Not in allowed list
assert not provider.validate_model_name("mini") # Resolves to o4-mini, not allowed
# This should be ALLOWED because it resolves to o4-mini which is in the allowed list
assert provider.validate_model_name("mini") # Resolves to o4-mini, which IS allowed
# Verify our list_all_known_models includes the restricted models # Verify our list_all_known_models includes the restricted models
all_known = provider.list_all_known_models() all_known = provider.list_all_known_models()
assert "o3-mini" in all_known # Should be known (and allowed) assert "o3-mini" in all_known # Should be known (and allowed)
assert "o4-mini-high" in all_known # Should be known (and allowed) assert "o4-mini" in all_known # Should be known (and allowed)
assert "o4-mini" in all_known # Should be known (but blocked) assert "o3-pro" in all_known # Should be known (but blocked)
assert "mini" in all_known # Should be known (but blocked) assert "mini" in all_known # Should be known (and allowed since it resolves to o4-mini)
def test_demonstration_of_old_vs_new_interface(self): def test_demonstration_of_old_vs_new_interface(self):
""" """

View File

@@ -149,7 +149,7 @@ class TestModelEnumeration:
("o3", False), # OpenAI - not available without API key ("o3", False), # OpenAI - not available without API key
("grok", False), # X.AI - not available without API key ("grok", False), # X.AI - not available without API key
("gemini-2.5-flash", False), # Full Gemini name - not available without API key ("gemini-2.5-flash", False), # Full Gemini name - not available without API key
("o4-mini-high", False), # OpenAI variant - not available without API key ("o4-mini", False), # OpenAI variant - not available without API key
("grok-3-fast", False), # X.AI variant - not available without API key ("grok-3-fast", False), # X.AI variant - not available without API key
], ],
) )

View File

@@ -93,7 +93,7 @@ class TestModelRestrictionService:
with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini,o4-mini"}): with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini,o4-mini"}):
service = ModelRestrictionService() service = ModelRestrictionService()
models = ["o3", "o3-mini", "o4-mini", "o4-mini-high"] models = ["o3", "o3-mini", "o4-mini", "o3-pro"]
filtered = service.filter_models(ProviderType.OPENAI, models) filtered = service.filter_models(ProviderType.OPENAI, models)
assert filtered == ["o3-mini", "o4-mini"] assert filtered == ["o3-mini", "o4-mini"]
@@ -573,7 +573,7 @@ class TestShorthandRestrictions:
# Other models should not work # Other models should not work
assert not openai_provider.validate_model_name("o3") assert not openai_provider.validate_model_name("o3")
assert not openai_provider.validate_model_name("o4-mini-high") assert not openai_provider.validate_model_name("o3-pro")
@patch.dict( @patch.dict(
os.environ, os.environ,

View File

@@ -185,7 +185,7 @@ class TestO3TemperatureParameterFixSimple:
provider = OpenAIModelProvider(api_key="test-key") provider = OpenAIModelProvider(api_key="test-key")
# Test O3/O4 models that should NOT support temperature parameter # Test O3/O4 models that should NOT support temperature parameter
o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"] o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini"]
for model in o3_o4_models: for model in o3_o4_models:
capabilities = provider.get_capabilities(model) capabilities = provider.get_capabilities(model)

View File

@@ -47,14 +47,13 @@ class TestOpenAIProvider:
assert provider.validate_model_name("o3-mini") is True assert provider.validate_model_name("o3-mini") is True
assert provider.validate_model_name("o3-pro") is True assert provider.validate_model_name("o3-pro") is True
assert provider.validate_model_name("o4-mini") is True assert provider.validate_model_name("o4-mini") is True
assert provider.validate_model_name("o4-mini-high") is True assert provider.validate_model_name("o4-mini") is True
# Test valid aliases # Test valid aliases
assert provider.validate_model_name("mini") is True assert provider.validate_model_name("mini") is True
assert provider.validate_model_name("o3mini") is True assert provider.validate_model_name("o3mini") is True
assert provider.validate_model_name("o4mini") is True assert provider.validate_model_name("o4mini") is True
assert provider.validate_model_name("o4minihigh") is True assert provider.validate_model_name("o4mini") is True
assert provider.validate_model_name("o4minihi") is True
# Test invalid model # Test invalid model
assert provider.validate_model_name("invalid-model") is False assert provider.validate_model_name("invalid-model") is False
@@ -69,15 +68,14 @@ class TestOpenAIProvider:
assert provider._resolve_model_name("mini") == "o4-mini" assert provider._resolve_model_name("mini") == "o4-mini"
assert provider._resolve_model_name("o3mini") == "o3-mini" assert provider._resolve_model_name("o3mini") == "o3-mini"
assert provider._resolve_model_name("o4mini") == "o4-mini" assert provider._resolve_model_name("o4mini") == "o4-mini"
assert provider._resolve_model_name("o4minihigh") == "o4-mini-high" assert provider._resolve_model_name("o4mini") == "o4-mini"
assert provider._resolve_model_name("o4minihi") == "o4-mini-high"
# Test full name passthrough # Test full name passthrough
assert provider._resolve_model_name("o3") == "o3" assert provider._resolve_model_name("o3") == "o3"
assert provider._resolve_model_name("o3-mini") == "o3-mini" assert provider._resolve_model_name("o3-mini") == "o3-mini"
assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10" assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10"
assert provider._resolve_model_name("o4-mini") == "o4-mini" assert provider._resolve_model_name("o4-mini") == "o4-mini"
assert provider._resolve_model_name("o4-mini-high") == "o4-mini-high" assert provider._resolve_model_name("o4-mini") == "o4-mini"
def test_get_capabilities_o3(self): def test_get_capabilities_o3(self):
"""Test getting model capabilities for O3.""" """Test getting model capabilities for O3."""
@@ -184,11 +182,11 @@ class TestOpenAIProvider:
call_kwargs = mock_client.chat.completions.create.call_args[1] call_kwargs = mock_client.chat.completions.create.call_args[1]
assert call_kwargs["model"] == "o3-mini" assert call_kwargs["model"] == "o3-mini"
# Test o4minihigh -> o4-mini-high # Test o4mini -> o4-mini
mock_response.model = "o4-mini-high" mock_response.model = "o4-mini"
provider.generate_content(prompt="Test", model_name="o4minihigh", temperature=1.0) provider.generate_content(prompt="Test", model_name="o4mini", temperature=1.0)
call_kwargs = mock_client.chat.completions.create.call_args[1] call_kwargs = mock_client.chat.completions.create.call_args[1]
assert call_kwargs["model"] == "o4-mini-high" assert call_kwargs["model"] == "o4-mini"
@patch("providers.openai_compatible.OpenAI") @patch("providers.openai_compatible.OpenAI")
def test_generate_content_no_alias_passthrough(self, mock_openai_class): def test_generate_content_no_alias_passthrough(self, mock_openai_class):

View File

@@ -77,7 +77,7 @@ class TestOpenRouterProvider:
assert provider._resolve_model_name("o3-mini") == "openai/o3-mini" assert provider._resolve_model_name("o3-mini") == "openai/o3-mini"
assert provider._resolve_model_name("o3mini") == "openai/o3-mini" assert provider._resolve_model_name("o3mini") == "openai/o3-mini"
assert provider._resolve_model_name("o4-mini") == "openai/o4-mini" assert provider._resolve_model_name("o4-mini") == "openai/o4-mini"
assert provider._resolve_model_name("o4-mini-high") == "openai/o4-mini-high" assert provider._resolve_model_name("o4-mini") == "openai/o4-mini"
assert provider._resolve_model_name("claude") == "anthropic/claude-sonnet-4" assert provider._resolve_model_name("claude") == "anthropic/claude-sonnet-4"
assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411" assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411"
assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528" assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528"

View File

@@ -24,7 +24,16 @@ class TestOpenRouterModelRegistry:
def test_custom_config_path(self): def test_custom_config_path(self):
"""Test registry with custom config path.""" """Test registry with custom config path."""
# Create temporary config # Create temporary config
config_data = {"models": [{"model_name": "test/model-1", "aliases": ["test1", "t1"], "context_window": 4096}]} config_data = {
"models": [
{
"model_name": "test/model-1",
"aliases": ["test1", "t1"],
"context_window": 4096,
"max_output_tokens": 2048,
}
]
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(config_data, f) json.dump(config_data, f)
@@ -42,7 +51,11 @@ class TestOpenRouterModelRegistry:
def test_environment_variable_override(self): def test_environment_variable_override(self):
"""Test OPENROUTER_MODELS_PATH environment variable.""" """Test OPENROUTER_MODELS_PATH environment variable."""
# Create custom config # Create custom config
config_data = {"models": [{"model_name": "env/model", "aliases": ["envtest"], "context_window": 8192}]} config_data = {
"models": [
{"model_name": "env/model", "aliases": ["envtest"], "context_window": 8192, "max_output_tokens": 4096}
]
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(config_data, f) json.dump(config_data, f)
@@ -127,11 +140,12 @@ class TestOpenRouterModelRegistry:
"""Test that duplicate aliases are detected.""" """Test that duplicate aliases are detected."""
config_data = { config_data = {
"models": [ "models": [
{"model_name": "test/model-1", "aliases": ["dupe"], "context_window": 4096}, {"model_name": "test/model-1", "aliases": ["dupe"], "context_window": 4096, "max_output_tokens": 2048},
{ {
"model_name": "test/model-2", "model_name": "test/model-2",
"aliases": ["DUPE"], # Same alias, different case "aliases": ["DUPE"], # Same alias, different case
"context_window": 8192, "context_window": 8192,
"max_output_tokens": 2048,
}, },
] ]
} }
@@ -207,6 +221,7 @@ class TestOpenRouterModelRegistry:
friendly_name="OpenRouter (test/full-featured)", friendly_name="OpenRouter (test/full-featured)",
aliases=["full"], aliases=["full"],
context_window=128000, context_window=128000,
max_output_tokens=8192,
supports_extended_thinking=True, supports_extended_thinking=True,
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,

View File

@@ -215,9 +215,7 @@ class TestOpenAIProvider:
assert provider.validate_model_name("o3-mini") # Backwards compatibility assert provider.validate_model_name("o3-mini") # Backwards compatibility
assert provider.validate_model_name("o4-mini") assert provider.validate_model_name("o4-mini")
assert provider.validate_model_name("o4mini") assert provider.validate_model_name("o4mini")
assert provider.validate_model_name("o4-mini-high") assert provider.validate_model_name("o4-mini")
assert provider.validate_model_name("o4minihigh")
assert provider.validate_model_name("o4minihi")
assert not provider.validate_model_name("gpt-4o") assert not provider.validate_model_name("gpt-4o")
assert not provider.validate_model_name("invalid-model") assert not provider.validate_model_name("invalid-model")
@@ -229,4 +227,4 @@ class TestOpenAIProvider:
assert not provider.supports_thinking_mode("o3mini") assert not provider.supports_thinking_mode("o3mini")
assert not provider.supports_thinking_mode("o3-mini") assert not provider.supports_thinking_mode("o3-mini")
assert not provider.supports_thinking_mode("o4-mini") assert not provider.supports_thinking_mode("o4-mini")
assert not provider.supports_thinking_mode("o4-mini-high") assert not provider.supports_thinking_mode("o4-mini")

View File

@@ -51,15 +51,14 @@ class TestSupportedModelsAliases:
assert "o4mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases assert "o4mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases
assert "o3mini" in provider.SUPPORTED_MODELS["o3-mini"].aliases assert "o3mini" in provider.SUPPORTED_MODELS["o3-mini"].aliases
assert "o3-pro" in provider.SUPPORTED_MODELS["o3-pro-2025-06-10"].aliases assert "o3-pro" in provider.SUPPORTED_MODELS["o3-pro-2025-06-10"].aliases
assert "o4minihigh" in provider.SUPPORTED_MODELS["o4-mini-high"].aliases assert "o4mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases
assert "o4minihi" in provider.SUPPORTED_MODELS["o4-mini-high"].aliases
assert "gpt4.1" in provider.SUPPORTED_MODELS["gpt-4.1-2025-04-14"].aliases assert "gpt4.1" in provider.SUPPORTED_MODELS["gpt-4.1-2025-04-14"].aliases
# Test alias resolution # Test alias resolution
assert provider._resolve_model_name("mini") == "o4-mini" assert provider._resolve_model_name("mini") == "o4-mini"
assert provider._resolve_model_name("o3mini") == "o3-mini" assert provider._resolve_model_name("o3mini") == "o3-mini"
assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10" assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10"
assert provider._resolve_model_name("o4minihigh") == "o4-mini-high" assert provider._resolve_model_name("o4mini") == "o4-mini"
assert provider._resolve_model_name("gpt4.1") == "gpt-4.1-2025-04-14" assert provider._resolve_model_name("gpt4.1") == "gpt-4.1-2025-04-14"
# Test case insensitive resolution # Test case insensitive resolution