diff --git a/README.md b/README.md index 40552da..504a5a2 100644 --- a/README.md +++ b/README.md @@ -409,7 +409,7 @@ for most debugging workflows, as Claude is usually able to confidently find the When in doubt, you can always follow up with a new prompt and ask Claude to share its findings with another model: ```text -Use continuation with thinkdeep, share details with o4-mini-high to find out what the best fix is for this +Use continuation with thinkdeep, share details with o4-mini to find out what the best fix is for this ``` **[📖 Read More](docs/tools/debug.md)** - Step-by-step investigation methodology with workflow enforcement diff --git a/communication_simulator_test.py b/communication_simulator_test.py index 6bd1a07..e471b33 100644 --- a/communication_simulator_test.py +++ b/communication_simulator_test.py @@ -80,7 +80,12 @@ class CommunicationSimulator: """Simulates real-world Claude CLI communication with MCP Gemini server""" def __init__( - self, verbose: bool = False, keep_logs: bool = False, selected_tests: list[str] = None, setup: bool = False, quick_mode: bool = False + self, + verbose: bool = False, + keep_logs: bool = False, + selected_tests: list[str] = None, + setup: bool = False, + quick_mode: bool = False, ): self.verbose = verbose self.keep_logs = keep_logs @@ -104,12 +109,12 @@ class CommunicationSimulator: # Define quick mode tests (essential tests for time-limited testing) # Focus on tests that work with current tool configurations self.quick_mode_tests = [ - "cross_tool_continuation", # Cross-tool conversation memory - "basic_conversation", # Basic chat functionality - "content_validation", # Content validation and deduplication - "model_thinking_config", # Flash/flashlite model testing - "o3_model_selection", # O3 model selection testing - "per_tool_deduplication" # File deduplication for individual tools + "cross_tool_continuation", # Cross-tool conversation memory + "basic_conversation", # Basic chat functionality + "content_validation", # Content validation and deduplication + "model_thinking_config", # Flash/flashlite model testing + "o3_model_selection", # O3 model selection testing + "per_tool_deduplication", # File deduplication for individual tools ] # If quick mode is enabled, override selected_tests @@ -444,7 +449,9 @@ def parse_arguments(): parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)") parser.add_argument("--list-tests", action="store_true", help="List available tests and exit") parser.add_argument("--individual", "-i", help="Run a single test individually") - parser.add_argument("--quick", "-q", action="store_true", help="Run quick test mode (6 essential tests for time-limited testing)") + parser.add_argument( + "--quick", "-q", action="store_true", help="Run quick test mode (6 essential tests for time-limited testing)" + ) parser.add_argument( "--setup", action="store_true", help="Force setup standalone server environment using run-server.sh" ) @@ -522,7 +529,11 @@ def main(): # Initialize simulator consistently for all use cases simulator = CommunicationSimulator( - verbose=args.verbose, keep_logs=args.keep_logs, selected_tests=args.tests, setup=args.setup, quick_mode=args.quick + verbose=args.verbose, + keep_logs=args.keep_logs, + selected_tests=args.tests, + setup=args.setup, + quick_mode=args.quick, ) # Determine execution mode and run diff --git a/conf/custom_models.json b/conf/custom_models.json index 2a3bcf3..f794d00 100644 --- a/conf/custom_models.json +++ b/conf/custom_models.json @@ -22,6 +22,7 @@ "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')", "aliases": "Array of short names users can type instead of the full model name", "context_window": "Total number of tokens the model can process (input + output combined)", + "max_output_tokens": "Maximum number of tokens the model can generate in a single response", "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", "supports_json_mode": "Whether the model can guarantee valid JSON output", "supports_function_calling": "Whether the model supports function/tool calling", @@ -36,6 +37,7 @@ "model_name": "my-local-model", "aliases": ["shortname", "nickname", "abbrev"], "context_window": 128000, + "max_output_tokens": 32768, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, @@ -52,6 +54,7 @@ "model_name": "anthropic/claude-opus-4", "aliases": ["opus", "claude-opus", "claude4-opus", "claude-4-opus"], "context_window": 200000, + "max_output_tokens": 64000, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, @@ -63,6 +66,7 @@ "model_name": "anthropic/claude-sonnet-4", "aliases": ["sonnet", "claude-sonnet", "claude4-sonnet", "claude-4-sonnet", "claude"], "context_window": 200000, + "max_output_tokens": 64000, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, @@ -74,6 +78,7 @@ "model_name": "anthropic/claude-3.5-haiku", "aliases": ["haiku", "claude-haiku", "claude3-haiku", "claude-3-haiku"], "context_window": 200000, + "max_output_tokens": 64000, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, @@ -85,6 +90,7 @@ "model_name": "google/gemini-2.5-pro", "aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"], "context_window": 1048576, + "max_output_tokens": 65536, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": false, @@ -96,6 +102,7 @@ "model_name": "google/gemini-2.5-flash", "aliases": ["flash","gemini-flash", "flash-openrouter", "flash-2.5"], "context_window": 1048576, + "max_output_tokens": 65536, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": false, @@ -107,6 +114,7 @@ "model_name": "mistralai/mistral-large-2411", "aliases": ["mistral-large", "mistral"], "context_window": 128000, + "max_output_tokens": 32000, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, @@ -118,6 +126,7 @@ "model_name": "meta-llama/llama-3-70b", "aliases": ["llama", "llama3", "llama3-70b", "llama-70b", "llama3-openrouter"], "context_window": 8192, + "max_output_tokens": 8192, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, @@ -129,6 +138,7 @@ "model_name": "deepseek/deepseek-r1-0528", "aliases": ["deepseek-r1", "deepseek", "r1", "deepseek-thinking"], "context_window": 65536, + "max_output_tokens": 32768, "supports_extended_thinking": true, "supports_json_mode": true, "supports_function_calling": false, @@ -140,6 +150,7 @@ "model_name": "perplexity/llama-3-sonar-large-32k-online", "aliases": ["perplexity", "sonar", "perplexity-online"], "context_window": 32768, + "max_output_tokens": 32768, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, @@ -151,6 +162,7 @@ "model_name": "openai/o3", "aliases": ["o3"], "context_window": 200000, + "max_output_tokens": 100000, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, @@ -164,6 +176,7 @@ "model_name": "openai/o3-mini", "aliases": ["o3-mini", "o3mini"], "context_window": 200000, + "max_output_tokens": 100000, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, @@ -177,6 +190,7 @@ "model_name": "openai/o3-mini-high", "aliases": ["o3-mini-high", "o3mini-high"], "context_window": 200000, + "max_output_tokens": 100000, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, @@ -190,6 +204,7 @@ "model_name": "openai/o3-pro", "aliases": ["o3-pro", "o3pro"], "context_window": 200000, + "max_output_tokens": 100000, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, @@ -203,6 +218,7 @@ "model_name": "openai/o4-mini", "aliases": ["o4-mini", "o4mini"], "context_window": 200000, + "max_output_tokens": 100000, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, @@ -212,23 +228,11 @@ "temperature_constraint": "fixed", "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision" }, - { - "model_name": "openai/o4-mini-high", - "aliases": ["o4-mini-high", "o4mini-high", "o4minihigh", "o4minihi"], - "context_window": 200000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "supports_temperature": false, - "temperature_constraint": "fixed", - "description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks with vision" - }, { "model_name": "llama3.2", "aliases": ["local-llama", "local", "llama3.2", "ollama-llama"], "context_window": 128000, + "max_output_tokens": 64000, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md index 65dc7f3..9383354 100644 --- a/docs/advanced-usage.md +++ b/docs/advanced-usage.md @@ -38,7 +38,6 @@ Regardless of your default configuration, you can specify models per request: | **`o3`** | OpenAI | 200K tokens | Strong logical reasoning | Debugging logic errors, systematic analysis | | **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks | | **`o4-mini`** | OpenAI | 200K tokens | Latest reasoning model | Optimized for shorter contexts | -| **`o4-mini-high`** | OpenAI | 200K tokens | Enhanced reasoning | Complex tasks requiring deeper analysis | | **`gpt4.1`** | OpenAI | 1M tokens | Latest GPT-4 with extended context | Large codebase analysis, comprehensive reviews | | **`llama`** (Llama 3.2) | Custom/Local | 128K tokens | Local inference, privacy | On-device analysis, cost-free processing | | **Any model** | OpenRouter | Varies | Access to GPT-4, Claude, Llama, etc. | User-specified or based on task requirements | @@ -69,7 +68,7 @@ OPENAI_ALLOWED_MODELS=o4-mini # High-performance: Quality over cost GOOGLE_ALLOWED_MODELS=pro -OPENAI_ALLOWED_MODELS=o3,o4-mini-high +OPENAI_ALLOWED_MODELS=o3,o4-mini ``` **Important Notes:** @@ -144,7 +143,7 @@ All tools that work with files support **both individual files and entire direct **`analyze`** - Analyze files or directories - `files`: List of file paths or directories (required) - `question`: What to analyze (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `analysis_type`: architecture|performance|security|quality|general - `output_format`: summary|detailed|actionable - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) @@ -159,7 +158,7 @@ All tools that work with files support **both individual files and entire direct **`codereview`** - Review code files or directories - `files`: List of file paths or directories (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `review_type`: full|security|performance|quick - `focus_on`: Specific aspects to focus on - `standards`: Coding standards to enforce @@ -175,7 +174,7 @@ All tools that work with files support **both individual files and entire direct **`debug`** - Debug with file context - `error_description`: Description of the issue (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `error_context`: Stack trace or logs - `files`: Files or directories related to the issue - `runtime_info`: Environment details @@ -191,7 +190,7 @@ All tools that work with files support **both individual files and entire direct **`thinkdeep`** - Extended analysis with file context - `current_analysis`: Your current thinking (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `problem_context`: Additional context - `focus_areas`: Specific aspects to focus on - `files`: Files or directories for context @@ -207,7 +206,7 @@ All tools that work with files support **both individual files and entire direct **`testgen`** - Comprehensive test generation with edge case coverage - `files`: Code files or directories to generate tests for (required) - `prompt`: Description of what to test, testing objectives, and scope (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `test_examples`: Optional existing test files as style/pattern reference - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) @@ -222,7 +221,7 @@ All tools that work with files support **both individual files and entire direct - `files`: Code files or directories to analyze for refactoring opportunities (required) - `prompt`: Description of refactoring goals, context, and specific areas of focus (required) - `refactor_type`: codesmells|decompose|modernize|organization (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security') - `style_guide_examples`: Optional existing code files to use as style/pattern reference - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) diff --git a/docs/configuration.md b/docs/configuration.md index 8107cc4..473b6de 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -63,7 +63,7 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model **Default Model Selection:** ```env -# Options: 'auto', 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high', etc. +# Options: 'auto', 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', etc. DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) ``` @@ -74,7 +74,6 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) - **`o3`**: Strong logical reasoning (200K context) - **`o3-mini`**: Balanced speed/quality (200K context) - **`o4-mini`**: Latest reasoning model, optimized for shorter contexts -- **`o4-mini-high`**: Enhanced O4 with higher reasoning effort - **`grok`**: GROK-3 advanced reasoning (131K context) - **Custom models**: via OpenRouter or local APIs @@ -120,7 +119,6 @@ OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral - `o3` (200K context, high reasoning) - `o3-mini` (200K context, balanced) - `o4-mini` (200K context, latest balanced) -- `o4-mini-high` (200K context, enhanced reasoning) - `mini` (shorthand for o4-mini) **Gemini Models:** diff --git a/docs/tools/analyze.md b/docs/tools/analyze.md index 379b20d..618a0be 100644 --- a/docs/tools/analyze.md +++ b/docs/tools/analyze.md @@ -65,7 +65,7 @@ This workflow ensures methodical analysis before expert insights, resulting in d **Initial Configuration (used in step 1):** - `prompt`: What to analyze or look for (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `analysis_type`: architecture|performance|security|quality|general (default: general) - `output_format`: summary|detailed|actionable (default: detailed) - `temperature`: Temperature for analysis (0-1, default 0.2) diff --git a/docs/tools/chat.md b/docs/tools/chat.md index 1c2b507..b7557eb 100644 --- a/docs/tools/chat.md +++ b/docs/tools/chat.md @@ -33,7 +33,7 @@ and then debate with the other models to give me a final verdict ## Tool Parameters - `prompt`: Your question or discussion topic (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `files`: Optional files for context (absolute paths) - `images`: Optional images for visual context (absolute paths) - `temperature`: Response creativity (0-1, default 0.5) diff --git a/docs/tools/codereview.md b/docs/tools/codereview.md index 9ba650c..9037cc2 100644 --- a/docs/tools/codereview.md +++ b/docs/tools/codereview.md @@ -80,7 +80,7 @@ The above prompt will simultaneously run two separate `codereview` tools with tw **Initial Review Configuration (used in step 1):** - `prompt`: User's summary of what the code does, expected behavior, constraints, and review objectives (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `review_type`: full|security|performance|quick (default: full) - `focus_on`: Specific aspects to focus on (e.g., "security vulnerabilities", "performance bottlenecks") - `standards`: Coding standards to enforce (e.g., "PEP8", "ESLint", "Google Style Guide") diff --git a/docs/tools/debug.md b/docs/tools/debug.md index 7efc454..6e7f20d 100644 --- a/docs/tools/debug.md +++ b/docs/tools/debug.md @@ -73,7 +73,7 @@ This structured approach ensures Claude performs methodical groundwork before ex - `images`: Visual debugging materials (error screenshots, logs, etc.) **Model Selection:** -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini (default: server default) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `use_websearch`: Enable web search for documentation and solutions (default: true) - `use_assistant_model`: Whether to use expert analysis phase (default: true, set to false to use Claude only) diff --git a/docs/tools/precommit.md b/docs/tools/precommit.md index a218bd4..d70c1ab 100644 --- a/docs/tools/precommit.md +++ b/docs/tools/precommit.md @@ -135,7 +135,7 @@ Use zen and perform a thorough precommit ensuring there aren't any new regressio **Initial Configuration (used in step 1):** - `path`: Starting directory to search for repos (default: current directory, absolute path required) - `prompt`: The original user request description for the changes (required for context) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `compare_to`: Compare against a branch/tag instead of local changes (optional) - `severity_filter`: critical|high|medium|low|all (default: all) - `include_staged`: Include staged changes in the review (default: true) diff --git a/docs/tools/refactor.md b/docs/tools/refactor.md index 8314a4e..6407a4a 100644 --- a/docs/tools/refactor.md +++ b/docs/tools/refactor.md @@ -103,7 +103,7 @@ This results in Claude first performing its own expert analysis, encouraging it **Initial Configuration (used in step 1):** - `prompt`: Description of refactoring goals, context, and specific areas of focus (required) - `refactor_type`: codesmells|decompose|modernize|organization (default: codesmells) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `focus_areas`: Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security') - `style_guide_examples`: Optional existing code files to use as style/pattern reference (absolute paths) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) diff --git a/docs/tools/secaudit.md b/docs/tools/secaudit.md index 36c4b8f..280452f 100644 --- a/docs/tools/secaudit.md +++ b/docs/tools/secaudit.md @@ -86,7 +86,7 @@ security remediation plan using planner - `images`: Architecture diagrams, security documentation, or visual references **Initial Security Configuration (used in step 1):** -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `security_scope`: Application context, technology stack, and security boundary definition (required) - `threat_level`: low|medium|high|critical (default: medium) - determines assessment depth and urgency - `compliance_requirements`: List of compliance frameworks to assess against (e.g., ["PCI DSS", "SOC2"]) diff --git a/docs/tools/testgen.md b/docs/tools/testgen.md index e19d042..0d74a98 100644 --- a/docs/tools/testgen.md +++ b/docs/tools/testgen.md @@ -70,7 +70,7 @@ Test generation excels with extended reasoning models like Gemini Pro or O3, whi **Initial Configuration (used in step 1):** - `prompt`: Description of what to test, testing objectives, and specific scope/focus areas (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `test_examples`: Optional existing test files or directories to use as style/pattern reference (absolute paths) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `use_assistant_model`: Whether to use expert test generation phase (default: true, set to false to use Claude only) diff --git a/docs/tools/thinkdeep.md b/docs/tools/thinkdeep.md index 5180a8b..26d5322 100644 --- a/docs/tools/thinkdeep.md +++ b/docs/tools/thinkdeep.md @@ -30,7 +30,7 @@ with the best architecture for my project ## Tool Parameters - `prompt`: Your current thinking/analysis to extend and validate (required) -- `model`: auto|pro|flash|o3|o3-mini|o4-mini|o4-mini-high|gpt4.1 (default: server default) +- `model`: auto|pro|flash|o3|o3-mini|o4-mini|gpt4.1 (default: server default) - `problem_context`: Additional context about the problem or goal - `focus_areas`: Specific aspects to focus on (architecture, performance, security, etc.) - `files`: Optional file paths or directories for additional context (absolute paths) diff --git a/providers/base.py b/providers/base.py index 06f60fe..aff8705 100644 --- a/providers/base.py +++ b/providers/base.py @@ -132,6 +132,7 @@ class ModelCapabilities: model_name: str friendly_name: str # Human-friendly name like "Gemini" or "OpenAI" context_window: int # Total context window size in tokens + max_output_tokens: int # Maximum output tokens per request supports_extended_thinking: bool = False supports_system_prompts: bool = True supports_streaming: bool = True diff --git a/providers/custom.py b/providers/custom.py index 021bba5..d32d494 100644 --- a/providers/custom.py +++ b/providers/custom.py @@ -158,6 +158,7 @@ class CustomProvider(OpenAICompatibleProvider): model_name=resolved_name, friendly_name=f"{self.FRIENDLY_NAME} ({resolved_name})", context_window=32_768, # Conservative default + max_output_tokens=32_768, # Conservative default max output supports_extended_thinking=False, # Most custom models don't support this supports_system_prompts=True, supports_streaming=True, @@ -187,7 +188,7 @@ class CustomProvider(OpenAICompatibleProvider): Returns: True if model is intended for custom/local endpoint """ - logging.debug(f"Custom provider validating model: '{model_name}'") + # logging.debug(f"Custom provider validating model: '{model_name}'") # Try to resolve through registry first config = self._registry.resolve(model_name) @@ -195,12 +196,12 @@ class CustomProvider(OpenAICompatibleProvider): model_id = config.model_name # Use explicit is_custom flag for clean validation if config.is_custom: - logging.debug(f"Model '{model_name}' -> '{model_id}' validated via registry (custom model)") + logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' validated via registry") return True else: # This is a cloud/OpenRouter model - CustomProvider should NOT handle these # Let OpenRouter provider handle them instead - logging.debug(f"Model '{model_name}' -> '{model_id}' rejected (cloud model, defer to OpenRouter)") + # logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' not custom (defer to OpenRouter)") return False # Handle version tags for unknown models (e.g., "my-model:latest") diff --git a/providers/dial.py b/providers/dial.py index f019415..e0c4a29 100644 --- a/providers/dial.py +++ b/providers/dial.py @@ -37,6 +37,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="o3-2025-04-16", friendly_name="DIAL (O3)", context_window=200_000, + max_output_tokens=100_000, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -54,6 +55,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="o4-mini-2025-04-16", friendly_name="DIAL (O4-mini)", context_window=200_000, + max_output_tokens=100_000, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -71,6 +73,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="anthropic.claude-sonnet-4-20250514-v1:0", friendly_name="DIAL (Sonnet 4)", context_window=200_000, + max_output_tokens=64_000, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -88,6 +91,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="anthropic.claude-sonnet-4-20250514-v1:0-with-thinking", friendly_name="DIAL (Sonnet 4 Thinking)", context_window=200_000, + max_output_tokens=64_000, supports_extended_thinking=True, # Thinking mode variant supports_system_prompts=True, supports_streaming=True, @@ -105,6 +109,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="anthropic.claude-opus-4-20250514-v1:0", friendly_name="DIAL (Opus 4)", context_window=200_000, + max_output_tokens=64_000, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -122,6 +127,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="anthropic.claude-opus-4-20250514-v1:0-with-thinking", friendly_name="DIAL (Opus 4 Thinking)", context_window=200_000, + max_output_tokens=64_000, supports_extended_thinking=True, # Thinking mode variant supports_system_prompts=True, supports_streaming=True, @@ -139,6 +145,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="gemini-2.5-pro-preview-03-25-google-search", friendly_name="DIAL (Gemini 2.5 Pro Search)", context_window=1_000_000, + max_output_tokens=65_536, supports_extended_thinking=False, # DIAL doesn't expose thinking mode supports_system_prompts=True, supports_streaming=True, @@ -156,6 +163,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="gemini-2.5-pro-preview-05-06", friendly_name="DIAL (Gemini 2.5 Pro)", context_window=1_000_000, + max_output_tokens=65_536, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -173,6 +181,7 @@ class DIALModelProvider(OpenAICompatibleProvider): model_name="gemini-2.5-flash-preview-05-20", friendly_name="DIAL (Gemini Flash 2.5)", context_window=1_000_000, + max_output_tokens=65_536, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, diff --git a/providers/gemini.py b/providers/gemini.py index 1118699..51916b0 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -24,6 +24,7 @@ class GeminiModelProvider(ModelProvider): model_name="gemini-2.0-flash", friendly_name="Gemini (Flash 2.0)", context_window=1_048_576, # 1M tokens + max_output_tokens=65_536, supports_extended_thinking=True, # Experimental thinking mode supports_system_prompts=True, supports_streaming=True, @@ -42,6 +43,7 @@ class GeminiModelProvider(ModelProvider): model_name="gemini-2.0-flash-lite", friendly_name="Gemin (Flash Lite 2.0)", context_window=1_048_576, # 1M tokens + max_output_tokens=65_536, supports_extended_thinking=False, # Not supported per user request supports_system_prompts=True, supports_streaming=True, @@ -59,6 +61,7 @@ class GeminiModelProvider(ModelProvider): model_name="gemini-2.5-flash", friendly_name="Gemini (Flash 2.5)", context_window=1_048_576, # 1M tokens + max_output_tokens=65_536, supports_extended_thinking=True, supports_system_prompts=True, supports_streaming=True, @@ -77,6 +80,7 @@ class GeminiModelProvider(ModelProvider): model_name="gemini-2.5-pro", friendly_name="Gemini (Pro 2.5)", context_window=1_048_576, # 1M tokens + max_output_tokens=65_536, supports_extended_thinking=True, supports_system_prompts=True, supports_streaming=True, diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index fec4484..17ce60d 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -687,7 +687,6 @@ class OpenAICompatibleProvider(ModelProvider): "o3-mini", "o3-pro", "o4-mini", - "o4-mini-high", # Note: Claude models would be handled by a separate provider } supports = model_name.lower() in vision_models diff --git a/providers/openai_provider.py b/providers/openai_provider.py index e065ee1..d977869 100644 --- a/providers/openai_provider.py +++ b/providers/openai_provider.py @@ -24,6 +24,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="o3", friendly_name="OpenAI (O3)", context_window=200_000, # 200K tokens + max_output_tokens=65536, # 64K max output tokens supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -41,6 +42,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="o3-mini", friendly_name="OpenAI (O3-mini)", context_window=200_000, # 200K tokens + max_output_tokens=65536, # 64K max output tokens supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -58,6 +60,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="o3-pro-2025-06-10", friendly_name="OpenAI (O3-Pro)", context_window=200_000, # 200K tokens + max_output_tokens=65536, # 64K max output tokens supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -75,6 +78,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="o4-mini", friendly_name="OpenAI (O4-mini)", context_window=200_000, # 200K tokens + max_output_tokens=65536, # 64K max output tokens supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -85,30 +89,14 @@ class OpenAIModelProvider(OpenAICompatibleProvider): supports_temperature=False, # O4 models don't accept temperature parameter temperature_constraint=create_temperature_constraint("fixed"), description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning", - aliases=["mini", "o4mini"], - ), - "o4-mini-high": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="o4-mini-high", - friendly_name="OpenAI (O4-mini-high)", - context_window=200_000, # 200K tokens - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, # O4 models support vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=False, # O4 models don't accept temperature parameter - temperature_constraint=create_temperature_constraint("fixed"), - description="Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks", - aliases=["o4minihigh", "o4minihi", "mini-high"], + aliases=["mini", "o4mini", "o4-mini"], ), "gpt-4.1-2025-04-14": ModelCapabilities( provider=ProviderType.OPENAI, model_name="gpt-4.1-2025-04-14", friendly_name="OpenAI (GPT 4.1)", context_window=1_000_000, # 1M tokens + max_output_tokens=32_768, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, diff --git a/providers/openrouter.py b/providers/openrouter.py index 3d90238..18d3d5e 100644 --- a/providers/openrouter.py +++ b/providers/openrouter.py @@ -101,6 +101,7 @@ class OpenRouterProvider(OpenAICompatibleProvider): model_name=resolved_name, friendly_name=self.FRIENDLY_NAME, context_window=32_768, # Conservative default context window + max_output_tokens=32_768, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, diff --git a/providers/registry.py b/providers/registry.py index da7a9b5..4ab5732 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -24,8 +24,6 @@ class ModelProviderRegistry: cls._instance._providers = {} cls._instance._initialized_providers = {} logging.debug(f"REGISTRY: Created instance {cls._instance}") - else: - logging.debug(f"REGISTRY: Returning existing instance {cls._instance}") return cls._instance @classmethod diff --git a/providers/xai.py b/providers/xai.py index 2b6fd04..dcb14a1 100644 --- a/providers/xai.py +++ b/providers/xai.py @@ -26,6 +26,7 @@ class XAIModelProvider(OpenAICompatibleProvider): model_name="grok-3", friendly_name="X.AI (Grok 3)", context_window=131_072, # 131K tokens + max_output_tokens=131072, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, @@ -43,6 +44,7 @@ class XAIModelProvider(OpenAICompatibleProvider): model_name="grok-3-fast", friendly_name="X.AI (Grok 3 Fast)", context_window=131_072, # 131K tokens + max_output_tokens=131072, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, diff --git a/tests/mock_helpers.py b/tests/mock_helpers.py index eb283b6..1122af1 100644 --- a/tests/mock_helpers.py +++ b/tests/mock_helpers.py @@ -15,6 +15,7 @@ def create_mock_provider(model_name="gemini-2.5-flash", context_window=1_048_576 model_name=model_name, friendly_name="Gemini", context_window=context_window, + max_output_tokens=8192, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, diff --git a/tests/test_alias_target_restrictions.py b/tests/test_alias_target_restrictions.py index 7b182e6..dd36b83 100644 --- a/tests/test_alias_target_restrictions.py +++ b/tests/test_alias_target_restrictions.py @@ -211,7 +211,7 @@ class TestAliasTargetRestrictions: # Verify the polymorphic method was called mock_provider.list_all_known_models.assert_called_once() - @patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini-high"}) # Restrict to specific model + @patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini"}) # Restrict to specific model def test_complex_alias_chains_handled_correctly(self): """Test that complex alias chains are handled correctly in restrictions.""" # Clear cached restriction service @@ -221,12 +221,11 @@ class TestAliasTargetRestrictions: provider = OpenAIModelProvider(api_key="test-key") - # Only o4-mini-high should be allowed - assert provider.validate_model_name("o4-mini-high") + # Only o4-mini should be allowed + assert provider.validate_model_name("o4-mini") # Other models should be blocked - assert not provider.validate_model_name("o4-mini") - assert not provider.validate_model_name("mini") # This resolves to o4-mini + assert not provider.validate_model_name("o3") assert not provider.validate_model_name("o3-mini") def test_critical_regression_validation_sees_alias_targets(self): @@ -307,7 +306,7 @@ class TestAliasTargetRestrictions: it appear that target-based restrictions don't work. """ # Test with a made-up restriction scenario - with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini-high,o3-mini"}): + with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o4-mini,o3-mini"}): # Clear cached restriction service import utils.model_restrictions @@ -318,7 +317,7 @@ class TestAliasTargetRestrictions: # These specific target models should be recognized as valid all_known = provider.list_all_known_models() - assert "o4-mini-high" in all_known, "Target model o4-mini-high should be known" + assert "o4-mini" in all_known, "Target model o4-mini should be known" assert "o3-mini" in all_known, "Target model o3-mini should be known" # Validation should not warn about these being unrecognized @@ -329,11 +328,11 @@ class TestAliasTargetRestrictions: # Should not warn about our allowed models being unrecognized all_warnings = [str(call) for call in mock_logger.warning.call_args_list] for warning in all_warnings: - assert "o4-mini-high" not in warning or "not a recognized" not in warning + assert "o4-mini" not in warning or "not a recognized" not in warning assert "o3-mini" not in warning or "not a recognized" not in warning # The restriction should actually work - assert provider.validate_model_name("o4-mini-high") + assert provider.validate_model_name("o4-mini") assert provider.validate_model_name("o3-mini") - assert not provider.validate_model_name("o4-mini") # not in allowed list + assert not provider.validate_model_name("o3-pro") # not in allowed list assert not provider.validate_model_name("o3") # not in allowed list diff --git a/tests/test_auto_mode.py b/tests/test_auto_mode.py index 74d8ae3..f96feb3 100644 --- a/tests/test_auto_mode.py +++ b/tests/test_auto_mode.py @@ -64,7 +64,7 @@ class TestAutoMode: models_with_descriptions[model_name] = description # Check all expected models are present with meaningful descriptions - expected_models = ["flash", "pro", "o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"] + expected_models = ["flash", "pro", "o3", "o3-mini", "o3-pro", "o4-mini"] for model in expected_models: # Model should exist somewhere in the providers # Note: Some models might not be available if API keys aren't configured diff --git a/tests/test_buggy_behavior_prevention.py b/tests/test_buggy_behavior_prevention.py index e960f1f..e925e31 100644 --- a/tests/test_buggy_behavior_prevention.py +++ b/tests/test_buggy_behavior_prevention.py @@ -118,7 +118,7 @@ class TestBuggyBehaviorPrevention: provider = OpenAIModelProvider(api_key="test-key") # Simulate a scenario where admin wants to restrict specific targets - with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini,o4-mini-high"}): + with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini,o4-mini"}): # Clear cached restriction service import utils.model_restrictions @@ -126,19 +126,21 @@ class TestBuggyBehaviorPrevention: # These should work because they're explicitly allowed assert provider.validate_model_name("o3-mini") - assert provider.validate_model_name("o4-mini-high") + assert provider.validate_model_name("o4-mini") # These should be blocked - assert not provider.validate_model_name("o4-mini") # Not in allowed list + assert not provider.validate_model_name("o3-pro") # Not in allowed list assert not provider.validate_model_name("o3") # Not in allowed list - assert not provider.validate_model_name("mini") # Resolves to o4-mini, not allowed + + # This should be ALLOWED because it resolves to o4-mini which is in the allowed list + assert provider.validate_model_name("mini") # Resolves to o4-mini, which IS allowed # Verify our list_all_known_models includes the restricted models all_known = provider.list_all_known_models() assert "o3-mini" in all_known # Should be known (and allowed) - assert "o4-mini-high" in all_known # Should be known (and allowed) - assert "o4-mini" in all_known # Should be known (but blocked) - assert "mini" in all_known # Should be known (but blocked) + assert "o4-mini" in all_known # Should be known (and allowed) + assert "o3-pro" in all_known # Should be known (but blocked) + assert "mini" in all_known # Should be known (and allowed since it resolves to o4-mini) def test_demonstration_of_old_vs_new_interface(self): """ diff --git a/tests/test_model_enumeration.py b/tests/test_model_enumeration.py index 548f785..0a78b17 100644 --- a/tests/test_model_enumeration.py +++ b/tests/test_model_enumeration.py @@ -149,7 +149,7 @@ class TestModelEnumeration: ("o3", False), # OpenAI - not available without API key ("grok", False), # X.AI - not available without API key ("gemini-2.5-flash", False), # Full Gemini name - not available without API key - ("o4-mini-high", False), # OpenAI variant - not available without API key + ("o4-mini", False), # OpenAI variant - not available without API key ("grok-3-fast", False), # X.AI variant - not available without API key ], ) diff --git a/tests/test_model_restrictions.py b/tests/test_model_restrictions.py index bd34a81..6a93bd5 100644 --- a/tests/test_model_restrictions.py +++ b/tests/test_model_restrictions.py @@ -93,7 +93,7 @@ class TestModelRestrictionService: with patch.dict(os.environ, {"OPENAI_ALLOWED_MODELS": "o3-mini,o4-mini"}): service = ModelRestrictionService() - models = ["o3", "o3-mini", "o4-mini", "o4-mini-high"] + models = ["o3", "o3-mini", "o4-mini", "o3-pro"] filtered = service.filter_models(ProviderType.OPENAI, models) assert filtered == ["o3-mini", "o4-mini"] @@ -573,7 +573,7 @@ class TestShorthandRestrictions: # Other models should not work assert not openai_provider.validate_model_name("o3") - assert not openai_provider.validate_model_name("o4-mini-high") + assert not openai_provider.validate_model_name("o3-pro") @patch.dict( os.environ, diff --git a/tests/test_o3_temperature_fix_simple.py b/tests/test_o3_temperature_fix_simple.py index da0ea60..0a27256 100644 --- a/tests/test_o3_temperature_fix_simple.py +++ b/tests/test_o3_temperature_fix_simple.py @@ -185,7 +185,7 @@ class TestO3TemperatureParameterFixSimple: provider = OpenAIModelProvider(api_key="test-key") # Test O3/O4 models that should NOT support temperature parameter - o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"] + o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini"] for model in o3_o4_models: capabilities = provider.get_capabilities(model) diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py index baab182..3429be9 100644 --- a/tests/test_openai_provider.py +++ b/tests/test_openai_provider.py @@ -47,14 +47,13 @@ class TestOpenAIProvider: assert provider.validate_model_name("o3-mini") is True assert provider.validate_model_name("o3-pro") is True assert provider.validate_model_name("o4-mini") is True - assert provider.validate_model_name("o4-mini-high") is True + assert provider.validate_model_name("o4-mini") is True # Test valid aliases assert provider.validate_model_name("mini") is True assert provider.validate_model_name("o3mini") is True assert provider.validate_model_name("o4mini") is True - assert provider.validate_model_name("o4minihigh") is True - assert provider.validate_model_name("o4minihi") is True + assert provider.validate_model_name("o4mini") is True # Test invalid model assert provider.validate_model_name("invalid-model") is False @@ -69,15 +68,14 @@ class TestOpenAIProvider: assert provider._resolve_model_name("mini") == "o4-mini" assert provider._resolve_model_name("o3mini") == "o3-mini" assert provider._resolve_model_name("o4mini") == "o4-mini" - assert provider._resolve_model_name("o4minihigh") == "o4-mini-high" - assert provider._resolve_model_name("o4minihi") == "o4-mini-high" + assert provider._resolve_model_name("o4mini") == "o4-mini" # Test full name passthrough assert provider._resolve_model_name("o3") == "o3" assert provider._resolve_model_name("o3-mini") == "o3-mini" assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10" assert provider._resolve_model_name("o4-mini") == "o4-mini" - assert provider._resolve_model_name("o4-mini-high") == "o4-mini-high" + assert provider._resolve_model_name("o4-mini") == "o4-mini" def test_get_capabilities_o3(self): """Test getting model capabilities for O3.""" @@ -184,11 +182,11 @@ class TestOpenAIProvider: call_kwargs = mock_client.chat.completions.create.call_args[1] assert call_kwargs["model"] == "o3-mini" - # Test o4minihigh -> o4-mini-high - mock_response.model = "o4-mini-high" - provider.generate_content(prompt="Test", model_name="o4minihigh", temperature=1.0) + # Test o4mini -> o4-mini + mock_response.model = "o4-mini" + provider.generate_content(prompt="Test", model_name="o4mini", temperature=1.0) call_kwargs = mock_client.chat.completions.create.call_args[1] - assert call_kwargs["model"] == "o4-mini-high" + assert call_kwargs["model"] == "o4-mini" @patch("providers.openai_compatible.OpenAI") def test_generate_content_no_alias_passthrough(self, mock_openai_class): diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py index 6d427ba..454f372 100644 --- a/tests/test_openrouter_provider.py +++ b/tests/test_openrouter_provider.py @@ -77,7 +77,7 @@ class TestOpenRouterProvider: assert provider._resolve_model_name("o3-mini") == "openai/o3-mini" assert provider._resolve_model_name("o3mini") == "openai/o3-mini" assert provider._resolve_model_name("o4-mini") == "openai/o4-mini" - assert provider._resolve_model_name("o4-mini-high") == "openai/o4-mini-high" + assert provider._resolve_model_name("o4-mini") == "openai/o4-mini" assert provider._resolve_model_name("claude") == "anthropic/claude-sonnet-4" assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411" assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528" diff --git a/tests/test_openrouter_registry.py b/tests/test_openrouter_registry.py index f6ea000..6387ebe 100644 --- a/tests/test_openrouter_registry.py +++ b/tests/test_openrouter_registry.py @@ -24,7 +24,16 @@ class TestOpenRouterModelRegistry: def test_custom_config_path(self): """Test registry with custom config path.""" # Create temporary config - config_data = {"models": [{"model_name": "test/model-1", "aliases": ["test1", "t1"], "context_window": 4096}]} + config_data = { + "models": [ + { + "model_name": "test/model-1", + "aliases": ["test1", "t1"], + "context_window": 4096, + "max_output_tokens": 2048, + } + ] + } with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(config_data, f) @@ -42,7 +51,11 @@ class TestOpenRouterModelRegistry: def test_environment_variable_override(self): """Test OPENROUTER_MODELS_PATH environment variable.""" # Create custom config - config_data = {"models": [{"model_name": "env/model", "aliases": ["envtest"], "context_window": 8192}]} + config_data = { + "models": [ + {"model_name": "env/model", "aliases": ["envtest"], "context_window": 8192, "max_output_tokens": 4096} + ] + } with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(config_data, f) @@ -127,11 +140,12 @@ class TestOpenRouterModelRegistry: """Test that duplicate aliases are detected.""" config_data = { "models": [ - {"model_name": "test/model-1", "aliases": ["dupe"], "context_window": 4096}, + {"model_name": "test/model-1", "aliases": ["dupe"], "context_window": 4096, "max_output_tokens": 2048}, { "model_name": "test/model-2", "aliases": ["DUPE"], # Same alias, different case "context_window": 8192, + "max_output_tokens": 2048, }, ] } @@ -207,6 +221,7 @@ class TestOpenRouterModelRegistry: friendly_name="OpenRouter (test/full-featured)", aliases=["full"], context_window=128000, + max_output_tokens=8192, supports_extended_thinking=True, supports_system_prompts=True, supports_streaming=True, diff --git a/tests/test_providers.py b/tests/test_providers.py index 5401bc9..036ae9b 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -215,9 +215,7 @@ class TestOpenAIProvider: assert provider.validate_model_name("o3-mini") # Backwards compatibility assert provider.validate_model_name("o4-mini") assert provider.validate_model_name("o4mini") - assert provider.validate_model_name("o4-mini-high") - assert provider.validate_model_name("o4minihigh") - assert provider.validate_model_name("o4minihi") + assert provider.validate_model_name("o4-mini") assert not provider.validate_model_name("gpt-4o") assert not provider.validate_model_name("invalid-model") @@ -229,4 +227,4 @@ class TestOpenAIProvider: assert not provider.supports_thinking_mode("o3mini") assert not provider.supports_thinking_mode("o3-mini") assert not provider.supports_thinking_mode("o4-mini") - assert not provider.supports_thinking_mode("o4-mini-high") + assert not provider.supports_thinking_mode("o4-mini") diff --git a/tests/test_supported_models_aliases.py b/tests/test_supported_models_aliases.py index 6ed899f..1eb76b5 100644 --- a/tests/test_supported_models_aliases.py +++ b/tests/test_supported_models_aliases.py @@ -51,15 +51,14 @@ class TestSupportedModelsAliases: assert "o4mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases assert "o3mini" in provider.SUPPORTED_MODELS["o3-mini"].aliases assert "o3-pro" in provider.SUPPORTED_MODELS["o3-pro-2025-06-10"].aliases - assert "o4minihigh" in provider.SUPPORTED_MODELS["o4-mini-high"].aliases - assert "o4minihi" in provider.SUPPORTED_MODELS["o4-mini-high"].aliases + assert "o4mini" in provider.SUPPORTED_MODELS["o4-mini"].aliases assert "gpt4.1" in provider.SUPPORTED_MODELS["gpt-4.1-2025-04-14"].aliases # Test alias resolution assert provider._resolve_model_name("mini") == "o4-mini" assert provider._resolve_model_name("o3mini") == "o3-mini" assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10" - assert provider._resolve_model_name("o4minihigh") == "o4-mini-high" + assert provider._resolve_model_name("o4mini") == "o4-mini" assert provider._resolve_model_name("gpt4.1") == "gpt-4.1-2025-04-14" # Test case insensitive resolution