diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 8a6b403..4535237 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -36,10 +36,32 @@ Please provide a clear and concise description of what this PR does. ## Testing -- [ ] Unit tests pass -- [ ] Integration tests pass (if applicable) -- [ ] Manual testing completed -- [ ] Documentation updated (if needed) +**Please review our [Testing Guide](../docs/testing.md) before submitting.** + +### Run all linting and tests (required): +```bash +# Activate virtual environment first +source venv/bin/activate + +# Run all linting checks +ruff check . +black --check . +isort --check-only . + +# Run all unit tests +python -m pytest -xvs + +# If you made tool changes, also run simulator tests +python communication_simulator_test.py +``` + +- [ ] All linting passes (ruff, black, isort) +- [ ] All unit tests pass +- [ ] **For new features**: Unit tests added in `tests/` +- [ ] **For tool changes**: Simulator tests added in `simulator_tests/` +- [ ] **For bug fixes**: Tests added to prevent regression +- [ ] Simulator tests pass (if applicable) +- [ ] Manual testing completed with realistic scenarios ## Related Issues @@ -48,11 +70,12 @@ Fixes #(issue number) ## Checklist - [ ] PR title follows the format guidelines above -- [ ] Code follows the project's style guidelines +- [ ] Activated venv and ran all linting: `source venv/bin/activate && ruff check . && black --check . && isort --check-only .` - [ ] Self-review completed -- [ ] Tests added/updated as needed +- [ ] **Tests added for ALL changes** (see Testing section above) - [ ] Documentation updated as needed -- [ ] All tests passing +- [ ] All unit tests passing: `python -m pytest -xvs` +- [ ] Relevant simulator tests passing (if tool changes) - [ ] Ready for review ## Additional Notes diff --git a/README.md b/README.md index 4d5e795..c9be6ff 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ git clone https://github.com/BeehiveInnovations/zen-mcp-server.git cd zen-mcp-server # One-command setup (includes Redis for AI conversations) -./setup-docker.sh +./run-server.sh ``` **What this does:** @@ -153,6 +153,9 @@ nano .env # WORKSPACE_ROOT=/Users/your-username (automatically configured) # Note: At least one API key OR custom URL is required + +# After making changes to .env, restart the server: +# ./run-server.sh ``` ### 4. Configure Claude @@ -184,7 +187,7 @@ This will open a folder revealing `claude_desktop_config.json`. 2. ** Update Docker Configuration** -The setup script shows you the exact configuration. It looks like this. When you ran `setup-docker.sh` it should +The setup script shows you the exact configuration. It looks like this. When you ran `run-server.sh` it should have produced a configuration for you to copy: ```json @@ -500,18 +503,24 @@ DEFAULT_MODEL=auto # Claude picks the best model automatically # API Keys (at least one required) GEMINI_API_KEY=your-gemini-key # Enables Gemini Pro & Flash -OPENAI_API_KEY=your-openai-key # Enables O3, O3-mini +OPENAI_API_KEY=your-openai-key # Enables O3, O3mini, O4-mini, O4-mini-high ``` **Available Models:** - **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis - **`flash`** (Gemini 2.0 Flash): Ultra-fast responses - **`o3`**: Strong logical reasoning -- **`o3-mini`**: Balanced speed/quality +- **`o3mini`**: Balanced speed/quality +- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts +- **`o4-mini-high`**: Enhanced O4 with higher reasoning effort - **Custom models**: via OpenRouter or local APIs (Ollama, vLLM, etc.) For detailed configuration options, see the [Advanced Usage Guide](docs/advanced-usage.md). +## Testing + +For information on running tests and contributing, see the [Testing Guide](docs/testing.md). + ## License Apache 2.0 License - see LICENSE file for details. diff --git a/communication_simulator_test.py b/communication_simulator_test.py index 72ef24b..bcdb0ee 100644 --- a/communication_simulator_test.py +++ b/communication_simulator_test.py @@ -17,7 +17,7 @@ Usage: --tests: Run specific tests only (space-separated) --list-tests: List all available tests --individual: Run a single test individually - --rebuild: Force rebuild Docker environment using setup-docker.sh + --rebuild: Force rebuild Docker environment using run-server.sh Available tests: basic_conversation - Basic conversation flow with chat tool @@ -115,9 +115,9 @@ class CommunicationSimulator: self.temp_dir = tempfile.mkdtemp(prefix="mcp_test_") self.logger.debug(f"Created temp directory: {self.temp_dir}") - # Only run setup-docker.sh if rebuild is requested + # Only run run-server.sh if rebuild is requested if self.rebuild: - if not self._run_setup_docker(): + if not self._run_server_script(): return False # Always verify containers are running (regardless of rebuild) @@ -127,34 +127,34 @@ class CommunicationSimulator: self.logger.error(f"Failed to setup test environment: {e}") return False - def _run_setup_docker(self) -> bool: - """Run the setup-docker.sh script""" + def _run_server_script(self) -> bool: + """Run the run-server.sh script""" try: - self.logger.info("Running setup-docker.sh...") + self.logger.info("Running run-server.sh...") - # Check if setup-docker.sh exists - setup_script = "./setup-docker.sh" + # Check if run-server.sh exists + setup_script = "./run-server.sh" if not os.path.exists(setup_script): - self.logger.error(f"setup-docker.sh not found at {setup_script}") + self.logger.error(f"run-server.sh not found at {setup_script}") return False # Make sure it's executable result = self._run_command(["chmod", "+x", setup_script], capture_output=True) if result.returncode != 0: - self.logger.error(f"Failed to make setup-docker.sh executable: {result.stderr}") + self.logger.error(f"Failed to make run-server.sh executable: {result.stderr}") return False # Run the setup script result = self._run_command([setup_script], capture_output=True) if result.returncode != 0: - self.logger.error(f"setup-docker.sh failed: {result.stderr}") + self.logger.error(f"run-server.sh failed: {result.stderr}") return False - self.logger.info("setup-docker.sh completed successfully") + self.logger.info("run-server.sh completed successfully") return True except Exception as e: - self.logger.error(f"Failed to run setup-docker.sh: {e}") + self.logger.error(f"Failed to run run-server.sh: {e}") return False def _verify_existing_containers(self) -> bool: @@ -345,9 +345,9 @@ class CommunicationSimulator: try: self.logger.info("Cleaning up test environment...") - # Note: We don't stop Docker services ourselves - let setup-docker.sh handle Docker lifecycle + # Note: We don't stop Docker services ourselves - let run-server.sh handle Docker lifecycle if not self.keep_logs: - self.logger.info("Test completed. Docker containers left running (use setup-docker.sh to manage)") + self.logger.info("Test completed. Docker containers left running (use run-server.sh to manage)") else: self.logger.info("Keeping logs and Docker services running for inspection") @@ -375,7 +375,7 @@ def parse_arguments(): parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)") parser.add_argument("--list-tests", action="store_true", help="List available tests and exit") parser.add_argument("--individual", "-i", help="Run a single test individually") - parser.add_argument("--rebuild", action="store_true", help="Force rebuild Docker environment using setup-docker.sh") + parser.add_argument("--rebuild", action="store_true", help="Force rebuild Docker environment using run-server.sh") return parser.parse_args() diff --git a/conf/custom_models.json b/conf/custom_models.json index 611e4f0..0838930 100644 --- a/conf/custom_models.json +++ b/conf/custom_models.json @@ -130,15 +130,42 @@ "supports_function_calling": true, "description": "OpenAI's o3 model - well-rounded and powerful across domains" }, + { + "model_name": "openai/o3-mini", + "aliases": ["o3-mini", "o3mini"], + "context_window": 200000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "description": "OpenAI's o3-mini model - balanced performance and speed" + }, { "model_name": "openai/o3-mini-high", - "aliases": ["o3-mini", "o3mini", "o3-mini-high", "o3mini-high"], + "aliases": ["o3-mini-high", "o3mini-high"], "context_window": 200000, "supports_extended_thinking": false, "supports_json_mode": true, "supports_function_calling": true, "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems" }, + { + "model_name": "openai/o4-mini", + "aliases": ["o4-mini", "o4mini"], + "context_window": 200000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning" + }, + { + "model_name": "openai/o4-mini-high", + "aliases": ["o4-mini-high", "o4mini-high", "o4minihigh", "o4minihi"], + "context_window": 200000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks" + }, { "model_name": "llama3.2", "aliases": ["local-llama", "local", "llama3.2", "ollama-llama"], diff --git a/config.py b/config.py index c487f75..389d86f 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "4.3.0" +__version__ = "4.3.1" # Last update date in ISO format __updated__ = "2025-06-14" # Primary maintainer @@ -32,23 +32,44 @@ IS_AUTO_MODE = DEFAULT_MODEL.lower() == "auto" # Model capabilities descriptions for auto mode # These help Claude choose the best model for each task +# +# IMPORTANT: These are the built-in natively supported models: +# - When GEMINI_API_KEY is set: Enables "flash", "pro" (and their full names) +# - When OPENAI_API_KEY is set: Enables "o3", "o3mini", "o4-mini", "o4-mini-high" +# - When both are set: All models below are available +# - When neither is set but OpenRouter/Custom API is configured: These model +# aliases will automatically map to equivalent models via the proxy provider +# +# In auto mode (DEFAULT_MODEL=auto), Claude will see these descriptions and +# intelligently select the best model for each task. The descriptions appear +# in the tool schema to guide Claude's selection based on task requirements. MODEL_CAPABILITIES_DESC = { + # Gemini models - Available when GEMINI_API_KEY is configured "flash": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations", "pro": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis", + # OpenAI models - Available when OPENAI_API_KEY is configured "o3": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis", "o3-mini": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity", - # Full model names also supported + "o4-mini": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning", + "o4-mini-high": "Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks", + # Full model names also supported (for explicit specification) "gemini-2.5-flash-preview-05-20": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations", "gemini-2.5-pro-preview-06-05": ( "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis" ), } -# Note: When only OpenRouter is configured, these model aliases automatically map to equivalent models: -# - "flash" → "google/gemini-2.5-flash-preview-05-20" -# - "pro" → "google/gemini-2.5-pro-preview-06-05" -# - "o3" → "openai/gpt-4o" -# - "o3-mini" → "openai/gpt-4o-mini" +# OpenRouter/Custom API Fallback Behavior: +# When only OpenRouter or Custom API is configured (no native API keys), these +# model aliases automatically map to equivalent models through the proxy: +# - "flash" → "google/gemini-2.5-flash-preview-05-20" (via OpenRouter) +# - "pro" → "google/gemini-2.5-pro-preview-06-05" (via OpenRouter) +# - "o3" → "openai/o3" (via OpenRouter) +# - "o3mini" → "openai/o3-mini" (via OpenRouter) +# - "o4-mini" → "openai/o4-mini" (via OpenRouter) +# - "o4-mini-high" → "openai/o4-mini-high" (via OpenRouter) +# +# This ensures the same model names work regardless of which provider is configured. # Temperature defaults for different tool types diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md index a45e1fd..a88f178 100644 --- a/docs/advanced-usage.md +++ b/docs/advanced-usage.md @@ -55,6 +55,8 @@ DEFAULT_MODEL=flash # Always use Flash DEFAULT_MODEL=o3 # Always use O3 ``` +**Important:** After changing any configuration in `.env` (including `DEFAULT_MODEL`, API keys, or other settings), restart the server with `./run-server.sh` to apply the changes. + **Per-Request Model Override:** Regardless of your default setting, you can specify models per request: - "Use **pro** for deep security analysis of auth.py" diff --git a/docs/testing.md b/docs/testing.md new file mode 100644 index 0000000..4601bfe --- /dev/null +++ b/docs/testing.md @@ -0,0 +1,126 @@ +# Testing Guide + +This project includes comprehensive test coverage through unit tests and integration simulator tests. + +## Running Tests + +### Prerequisites +- Python virtual environment activated: `source venv/bin/activate` +- All dependencies installed: `pip install -r requirements.txt` +- Docker containers running (for simulator tests): `./run-server.sh` + +### Unit Tests + +Run all unit tests with pytest: +```bash +# Run all tests with verbose output +python -m pytest -xvs + +# Run specific test file +python -m pytest tests/test_providers.py -xvs +``` + +### Simulator Tests + +Simulator tests replicate real-world Claude CLI interactions with the MCP server running in Docker. Unlike unit tests that test isolated functions, simulator tests validate the complete end-to-end flow including: +- Actual MCP protocol communication +- Docker container interactions +- Multi-turn conversations across tools +- Log output validation + +**Important**: Simulator tests require `LOG_LEVEL=DEBUG` in your `.env` file to validate detailed execution logs. + +#### Running All Simulator Tests +```bash +# Run all simulator tests +python communication_simulator_test.py + +# Run with verbose output for debugging +python communication_simulator_test.py --verbose + +# Keep Docker logs after tests for inspection +python communication_simulator_test.py --keep-logs +``` + +#### Running Individual Tests +To run a single simulator test in isolation (useful for debugging or test development): + +```bash +# Run a specific test by name +python communication_simulator_test.py --individual basic_conversation + +# Examples of available tests: +python communication_simulator_test.py --individual content_validation +python communication_simulator_test.py --individual cross_tool_continuation +python communication_simulator_test.py --individual redis_validation +``` + +#### Other Options +```bash +# List all available simulator tests with descriptions +python communication_simulator_test.py --list-tests + +# Run multiple specific tests (not all) +python communication_simulator_test.py --tests basic_conversation content_validation + +# Force Docker environment rebuild before running tests +python communication_simulator_test.py --rebuild +``` + +### Code Quality Checks + +Before committing, ensure all linting passes: +```bash +# Run all linting checks +ruff check . +black --check . +isort --check-only . + +# Auto-fix issues +ruff check . --fix +black . +isort . +``` + +## What Each Test Suite Covers + +### Unit Tests (256 tests) +Test isolated components and functions: +- **Provider functionality**: Model initialization, API interactions, capability checks +- **Tool operations**: All MCP tools (chat, analyze, debug, etc.) +- **Conversation memory**: Threading, continuation, history management +- **File handling**: Path validation, token limits, deduplication +- **Auto mode**: Model selection logic and fallback behavior + +### Simulator Tests (14 tests) +Validate real-world usage scenarios by simulating actual Claude prompts: +- **Basic conversations**: Multi-turn chat functionality with real prompts +- **Cross-tool continuation**: Context preservation across different tools +- **File deduplication**: Efficient handling of repeated file references +- **Model selection**: Proper routing to configured providers +- **Token allocation**: Context window management in practice +- **Redis validation**: Conversation persistence and retrieval + +## Contributing: Test Requirements + +When contributing to this project: + +1. **New features MUST include tests**: + - Add unit tests in `tests/` for new functions or classes + - Test both success and error cases + +2. **Tool changes require simulator tests**: + - Add simulator tests in `simulator_tests/` for new or modified tools + - Use realistic prompts that demonstrate the feature + - Validate output through Docker logs + +3. **Test naming conventions**: + - Unit tests: `test__.py` + - Simulator tests: `test__.py` + +4. **Before submitting PR**: + - Run all unit tests: `python -m pytest -xvs` + - Run relevant simulator tests + - Ensure all linting passes + +Remember: Tests are documentation. They show how features are intended to be used and help prevent regressions. \ No newline at end of file diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 78909c9..34a054d 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -43,7 +43,7 @@ cat .env If you need to update your API keys, edit the `.env` file and then run: ```bash -./setup-docker.sh +./run-server.sh ``` This will validate your configuration and restart the services. @@ -73,7 +73,7 @@ See [Logging Documentation](logging.md) for more details on accessing logs. **"API key environment variable is required"** - Add your API key to the `.env` file -- Run: `./setup-docker.sh` to validate and restart +- Run: `./run-server.sh` to validate and restart **File path errors** - Always use absolute paths: `/Users/you/project/file.py` diff --git a/examples/claude_config_macos.json b/examples/claude_config_macos.json index 475ead8..cab866d 100644 --- a/examples/claude_config_macos.json +++ b/examples/claude_config_macos.json @@ -1,7 +1,7 @@ { "comment": "macOS configuration using Docker", "comment2": "Ensure Docker is running and containers are started", - "comment3": "Run './setup-docker.sh' first to set up the environment", + "comment3": "Run './run-server.sh' first to set up the environment", "mcpServers": { "zen": { "command": "docker", diff --git a/examples/claude_config_wsl.json b/examples/claude_config_wsl.json index 44ea28f..f73747e 100644 --- a/examples/claude_config_wsl.json +++ b/examples/claude_config_wsl.json @@ -1,7 +1,7 @@ { "comment": "Windows configuration using WSL with Docker", "comment2": "Ensure Docker Desktop is running and WSL integration is enabled", - "comment3": "Run './setup-docker.sh' in WSL first to set up the environment", + "comment3": "Run './run-server.sh' in WSL first to set up the environment", "mcpServers": { "zen": { "command": "wsl.exe", diff --git a/providers/openai.py b/providers/openai.py index 9284ff0..c8d73ea 100644 --- a/providers/openai.py +++ b/providers/openai.py @@ -22,6 +22,19 @@ class OpenAIModelProvider(OpenAICompatibleProvider): "context_window": 200_000, # 200K tokens "supports_extended_thinking": False, }, + "o4-mini": { + "context_window": 200_000, # 200K tokens + "supports_extended_thinking": False, + }, + "o4-mini-high": { + "context_window": 200_000, # 200K tokens + "supports_extended_thinking": False, + }, + # Shorthands + "o3mini": "o3-mini", + "o4mini": "o4-mini", + "o4minihigh": "o4-mini-high", + "o4minihi": "o4-mini-high", } def __init__(self, api_key: str, **kwargs): @@ -32,14 +45,17 @@ class OpenAIModelProvider(OpenAICompatibleProvider): def get_capabilities(self, model_name: str) -> ModelCapabilities: """Get capabilities for a specific OpenAI model.""" - if model_name not in self.SUPPORTED_MODELS: + # Resolve shorthand + resolved_name = self._resolve_model_name(model_name) + + if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str): raise ValueError(f"Unsupported OpenAI model: {model_name}") - config = self.SUPPORTED_MODELS[model_name] + config = self.SUPPORTED_MODELS[resolved_name] # Define temperature constraints per model - if model_name in ["o3", "o3-mini"]: - # O3 models only support temperature=1.0 + if resolved_name in ["o3", "o3-mini", "o4-mini", "o4-mini-high"]: + # O3 and O4 reasoning models only support temperature=1.0 temp_constraint = FixedTemperatureConstraint(1.0) else: # Other OpenAI models support 0.0-2.0 range @@ -63,10 +79,19 @@ class OpenAIModelProvider(OpenAICompatibleProvider): def validate_model_name(self, model_name: str) -> bool: """Validate if the model name is supported.""" - return model_name in self.SUPPORTED_MODELS + resolved_name = self._resolve_model_name(model_name) + return resolved_name in self.SUPPORTED_MODELS and isinstance(self.SUPPORTED_MODELS[resolved_name], dict) def supports_thinking_mode(self, model_name: str) -> bool: """Check if the model supports extended thinking mode.""" # Currently no OpenAI models support extended thinking # This may change with future O3 models return False + + def _resolve_model_name(self, model_name: str) -> str: + """Resolve model shorthand to full name.""" + # Check if it's a shorthand + shorthand_value = self.SUPPORTED_MODELS.get(model_name) + if isinstance(shorthand_value, str): + return shorthand_value + return model_name diff --git a/setup-docker.sh b/run-server.sh similarity index 97% rename from setup-docker.sh rename to run-server.sh index 0b88acb..8176e60 100755 --- a/setup-docker.sh +++ b/run-server.sh @@ -3,8 +3,12 @@ # Exit on any error, undefined variables, and pipe failures set -euo pipefail -# Modern Docker setup script for Zen MCP Server with Redis -# This script sets up the complete Docker environment including Redis for conversation threading +# Run/Restart script for Zen MCP Server with Redis +# This script builds, starts, and manages the Docker environment including Redis for conversation threading +# Run this script to: +# - Initial setup of the Docker environment +# - Restart services after changing .env configuration +# - Rebuild and restart after code changes # Spinner function for long-running operations show_spinner() { diff --git a/simulator_tests/test_o3_model_selection.py b/simulator_tests/test_o3_model_selection.py index 4a837e2..1feef35 100644 --- a/simulator_tests/test_o3_model_selection.py +++ b/simulator_tests/test_o3_model_selection.py @@ -71,6 +71,15 @@ class O3ModelSelectionTest(BaseSimulatorTest): self.logger.info(" ℹ️ Only OpenRouter configured - O3 models will be routed through OpenRouter") return self._run_openrouter_o3_test() + # If neither OpenAI nor OpenRouter is configured, skip the test + if not has_openai and not has_openrouter: + self.logger.info(" ⚠️ Neither OpenAI nor OpenRouter API keys configured - skipping test") + self.logger.info( + " ℹ️ This test requires either OPENAI_API_KEY or OPENROUTER_API_KEY to be set in .env" + ) + self.logger.info(" ✅ Test skipped (no API keys configured)") + return True # Return True to indicate test passed/skipped + # Original test for when OpenAI is configured self.logger.info(" ℹ️ OpenAI API configured - expecting direct OpenAI API calls") diff --git a/tests/conftest.py b/tests/conftest.py index ed68d78..deabdae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -85,8 +85,10 @@ def mock_provider_availability(request, monkeypatch): the tools don't require model selection unless explicitly testing auto mode. """ # Skip this fixture for tests that need real providers - if hasattr(request, "node") and request.node.get_closest_marker("no_mock_provider"): - return + if hasattr(request, "node"): + marker = request.node.get_closest_marker("no_mock_provider") + if marker: + return from unittest.mock import MagicMock diff --git a/tests/test_conversation_field_mapping.py b/tests/test_conversation_field_mapping.py index 5c846fd..834b18c 100644 --- a/tests/test_conversation_field_mapping.py +++ b/tests/test_conversation_field_mapping.py @@ -2,7 +2,6 @@ Test that conversation history is correctly mapped to tool-specific fields """ -import os from datetime import datetime from unittest.mock import MagicMock, patch @@ -130,8 +129,7 @@ async def test_unknown_tool_defaults_to_prompt(): with patch("utils.conversation_memory.get_thread", return_value=mock_context): with patch("utils.conversation_memory.add_turn", return_value=True): with patch("utils.conversation_memory.build_conversation_history", return_value=("History", 500)): - # The test uses the conftest fixture which should handle provider mocking - # We just need to ensure the arguments are correct + # The autouse fixture should handle provider mocking arguments = { "continuation_id": "test-thread-456", "prompt": "User input", diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py index 5fd5e00..81cdeb2 100644 --- a/tests/test_openrouter_provider.py +++ b/tests/test_openrouter_provider.py @@ -72,7 +72,10 @@ class TestOpenRouterProvider: assert provider._resolve_model_name("opus") == "anthropic/claude-3-opus" assert provider._resolve_model_name("sonnet") == "anthropic/claude-3-sonnet" assert provider._resolve_model_name("o3") == "openai/o3" - assert provider._resolve_model_name("o3-mini") == "openai/o3-mini-high" + assert provider._resolve_model_name("o3-mini") == "openai/o3-mini" + assert provider._resolve_model_name("o3mini") == "openai/o3-mini" + assert provider._resolve_model_name("o4-mini") == "openai/o4-mini" + assert provider._resolve_model_name("o4-mini-high") == "openai/o4-mini-high" assert provider._resolve_model_name("claude") == "anthropic/claude-3-sonnet" assert provider._resolve_model_name("mistral") == "mistral/mistral-large" assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528" diff --git a/tests/test_providers.py b/tests/test_providers.py index fa548cb..23fb3c3 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -183,12 +183,31 @@ class TestOpenAIProvider: assert capabilities.context_window == 200_000 assert not capabilities.supports_extended_thinking + def test_get_capabilities_o4_mini(self): + """Test getting O4-mini model capabilities""" + provider = OpenAIModelProvider(api_key="test-key") + + capabilities = provider.get_capabilities("o4-mini") + + assert capabilities.provider == ProviderType.OPENAI + assert capabilities.model_name == "o4-mini" + assert capabilities.context_window == 200_000 + assert not capabilities.supports_extended_thinking + # Check temperature constraint is fixed at 1.0 + assert capabilities.temperature_constraint.value == 1.0 + def test_validate_model_names(self): """Test model name validation""" provider = OpenAIModelProvider(api_key="test-key") assert provider.validate_model_name("o3") - assert provider.validate_model_name("o3-mini") + assert provider.validate_model_name("o3mini") + assert provider.validate_model_name("o3-mini") # Backwards compatibility + assert provider.validate_model_name("o4-mini") + assert provider.validate_model_name("o4mini") + assert provider.validate_model_name("o4-mini-high") + assert provider.validate_model_name("o4minihigh") + assert provider.validate_model_name("o4minihi") assert not provider.validate_model_name("gpt-4o") assert not provider.validate_model_name("invalid-model") @@ -197,4 +216,7 @@ class TestOpenAIProvider: provider = OpenAIModelProvider(api_key="test-key") assert not provider.supports_thinking_mode("o3") + assert not provider.supports_thinking_mode("o3mini") assert not provider.supports_thinking_mode("o3-mini") + assert not provider.supports_thinking_mode("o4-mini") + assert not provider.supports_thinking_mode("o4-mini-high")