Renamed setup script to avoid confusion (https://github.com/BeehiveInnovations/zen-mcp-server/issues/35)

Further fixes to tests Pass O3 simulation test when keys are not set, along with a notice Updated docs on testing, simulation tests / contributing Support for OpenAI o4-mini and o4-mini-high
2025-06-14 09:28:20 +04:00
parent c5f682c7b0
commit 746380eb7f
17 changed files with 324 additions and 53 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -36,10 +36,32 @@ Please provide a clear and concise description of what this PR does.

 ## Testing

- [ ] Unit tests pass
- [ ] Integration tests pass (if applicable)
- [ ] Manual testing completed
- [ ] Documentation updated (if needed)
+**Please review our [Testing Guide](../docs/testing.md) before submitting.**
+
+### Run all linting and tests (required):
+```bash
+# Activate virtual environment first
+source venv/bin/activate
+
+# Run all linting checks
+ruff check .
+black --check .
+isort --check-only .
+
+# Run all unit tests
+python -m pytest -xvs
+
+# If you made tool changes, also run simulator tests
+python communication_simulator_test.py
+```
+
+- [ ] All linting passes (ruff, black, isort)
+- [ ] All unit tests pass
+- [ ] **For new features**: Unit tests added in `tests/`
+- [ ] **For tool changes**: Simulator tests added in `simulator_tests/`
+- [ ] **For bug fixes**: Tests added to prevent regression
+- [ ] Simulator tests pass (if applicable)
+- [ ] Manual testing completed with realistic scenarios

 ## Related Issues

@@ -48,11 +70,12 @@ Fixes #(issue number)
 ## Checklist

 - [ ] PR title follows the format guidelines above
- [ ] Code follows the project's style guidelines
+- [ ] Activated venv and ran all linting: `source venv/bin/activate && ruff check . && black --check . && isort --check-only .`
 - [ ] Self-review completed
- [ ] Tests added/updated as needed
+- [ ] **Tests added for ALL changes** (see Testing section above)
 - [ ] Documentation updated as needed
- [ ] All tests passing
+- [ ] All unit tests passing: `python -m pytest -xvs`
+- [ ] Relevant simulator tests passing (if tool changes)
 - [ ] Ready for review

 ## Additional Notes
--- a/README.md
+++ b/README.md
@@ -124,7 +124,7 @@ git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
 cd zen-mcp-server

 # One-command setup (includes Redis for AI conversations)
-./setup-docker.sh
+./run-server.sh
 ```

 **What this does:**
@@ -153,6 +153,9 @@ nano .env
 # WORKSPACE_ROOT=/Users/your-username  (automatically configured)

 # Note: At least one API key OR custom URL is required
+
+# After making changes to .env, restart the server:
+# ./run-server.sh
 ```

 ### 4. Configure Claude
@@ -184,7 +187,7 @@ This will open a folder revealing `claude_desktop_config.json`.

 2. ** Update Docker Configuration**

-The setup script shows you the exact configuration. It looks like this. When you ran `setup-docker.sh` it should
+The setup script shows you the exact configuration. It looks like this. When you ran `run-server.sh` it should
 have produced a configuration for you to copy:

 ```json
@@ -500,18 +503,24 @@ DEFAULT_MODEL=auto  # Claude picks the best model automatically

 # API Keys (at least one required)
 GEMINI_API_KEY=your-gemini-key    # Enables Gemini Pro & Flash
-OPENAI_API_KEY=your-openai-key    # Enables O3, O3-mini
+OPENAI_API_KEY=your-openai-key    # Enables O3, O3mini, O4-mini, O4-mini-high
 ```

 **Available Models:**
 - **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis
 - **`flash`** (Gemini 2.0 Flash): Ultra-fast responses
 - **`o3`**: Strong logical reasoning  
- **`o3-mini`**: Balanced speed/quality
+- **`o3mini`**: Balanced speed/quality
+- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts
+- **`o4-mini-high`**: Enhanced O4 with higher reasoning effort
 - **Custom models**: via OpenRouter or local APIs (Ollama, vLLM, etc.)

 For detailed configuration options, see the [Advanced Usage Guide](docs/advanced-usage.md).

+## Testing
+
+For information on running tests and contributing, see the [Testing Guide](docs/testing.md).
+
 ## License

 Apache 2.0 License - see LICENSE file for details.
--- a/communication_simulator_test.py
+++ b/communication_simulator_test.py
@@ -17,7 +17,7 @@ Usage:
    --tests: Run specific tests only (space-separated)
    --list-tests: List all available tests
    --individual: Run a single test individually
-    --rebuild: Force rebuild Docker environment using setup-docker.sh
+    --rebuild: Force rebuild Docker environment using run-server.sh

 Available tests:
    basic_conversation          - Basic conversation flow with chat tool
@@ -115,9 +115,9 @@ class CommunicationSimulator:
            self.temp_dir = tempfile.mkdtemp(prefix="mcp_test_")
            self.logger.debug(f"Created temp directory: {self.temp_dir}")

-            # Only run setup-docker.sh if rebuild is requested
+            # Only run run-server.sh if rebuild is requested
            if self.rebuild:
-                if not self._run_setup_docker():
+                if not self._run_server_script():
                    return False

            # Always verify containers are running (regardless of rebuild)
@@ -127,34 +127,34 @@ class CommunicationSimulator:
            self.logger.error(f"Failed to setup test environment: {e}")
            return False

-    def _run_setup_docker(self) -> bool:
-        """Run the setup-docker.sh script"""
+    def _run_server_script(self) -> bool:
+        """Run the run-server.sh script"""
        try:
-            self.logger.info("Running setup-docker.sh...")
+            self.logger.info("Running run-server.sh...")

-            # Check if setup-docker.sh exists
-            setup_script = "./setup-docker.sh"
+            # Check if run-server.sh exists
+            setup_script = "./run-server.sh"
            if not os.path.exists(setup_script):
-                self.logger.error(f"setup-docker.sh not found at {setup_script}")
+                self.logger.error(f"run-server.sh not found at {setup_script}")
                return False

            # Make sure it's executable
            result = self._run_command(["chmod", "+x", setup_script], capture_output=True)
            if result.returncode != 0:
-                self.logger.error(f"Failed to make setup-docker.sh executable: {result.stderr}")
+                self.logger.error(f"Failed to make run-server.sh executable: {result.stderr}")
                return False

            # Run the setup script
            result = self._run_command([setup_script], capture_output=True)
            if result.returncode != 0:
-                self.logger.error(f"setup-docker.sh failed: {result.stderr}")
+                self.logger.error(f"run-server.sh failed: {result.stderr}")
                return False

-            self.logger.info("setup-docker.sh completed successfully")
+            self.logger.info("run-server.sh completed successfully")
            return True

        except Exception as e:
-            self.logger.error(f"Failed to run setup-docker.sh: {e}")
+            self.logger.error(f"Failed to run run-server.sh: {e}")
            return False

    def _verify_existing_containers(self) -> bool:
@@ -345,9 +345,9 @@ class CommunicationSimulator:
        try:
            self.logger.info("Cleaning up test environment...")

-            # Note: We don't stop Docker services ourselves - let setup-docker.sh handle Docker lifecycle
+            # Note: We don't stop Docker services ourselves - let run-server.sh handle Docker lifecycle
            if not self.keep_logs:
-                self.logger.info("Test completed. Docker containers left running (use setup-docker.sh to manage)")
+                self.logger.info("Test completed. Docker containers left running (use run-server.sh to manage)")
            else:
                self.logger.info("Keeping logs and Docker services running for inspection")

@@ -375,7 +375,7 @@ def parse_arguments():
    parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)")
    parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
    parser.add_argument("--individual", "-i", help="Run a single test individually")
-    parser.add_argument("--rebuild", action="store_true", help="Force rebuild Docker environment using setup-docker.sh")
+    parser.add_argument("--rebuild", action="store_true", help="Force rebuild Docker environment using run-server.sh")

    return parser.parse_args()

--- a/conf/custom_models.json
+++ b/conf/custom_models.json
@@ -130,15 +130,42 @@
      "supports_function_calling": true,
      "description": "OpenAI's o3 model - well-rounded and powerful across domains"
    },
+    {
+      "model_name": "openai/o3-mini",
+      "aliases": ["o3-mini", "o3mini"],
+      "context_window": 200000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "description": "OpenAI's o3-mini model - balanced performance and speed"
+    },
    {
      "model_name": "openai/o3-mini-high",
-      "aliases": ["o3-mini", "o3mini", "o3-mini-high", "o3mini-high"],
+      "aliases": ["o3-mini-high", "o3mini-high"],
      "context_window": 200000,
      "supports_extended_thinking": false,
      "supports_json_mode": true,
      "supports_function_calling": true,
      "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems"
    },
+    {
+      "model_name": "openai/o4-mini",
+      "aliases": ["o4-mini", "o4mini"],
+      "context_window": 200000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning"
+    },
+    {
+      "model_name": "openai/o4-mini-high",
+      "aliases": ["o4-mini-high", "o4mini-high", "o4minihigh", "o4minihi"],
+      "context_window": 200000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks"
+    },
    {
      "model_name": "llama3.2",
      "aliases": ["local-llama", "local", "llama3.2", "ollama-llama"],
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "4.3.0"
+__version__ = "4.3.1"
 # Last update date in ISO format
 __updated__ = "2025-06-14"
 # Primary maintainer
@@ -32,23 +32,44 @@ IS_AUTO_MODE = DEFAULT_MODEL.lower() == "auto"

 # Model capabilities descriptions for auto mode
 # These help Claude choose the best model for each task
+#
+# IMPORTANT: These are the built-in natively supported models:
+# - When GEMINI_API_KEY is set: Enables "flash", "pro" (and their full names)
+# - When OPENAI_API_KEY is set: Enables "o3", "o3mini", "o4-mini", "o4-mini-high"
+# - When both are set: All models below are available
+# - When neither is set but OpenRouter/Custom API is configured: These model
+#   aliases will automatically map to equivalent models via the proxy provider
+#
+# In auto mode (DEFAULT_MODEL=auto), Claude will see these descriptions and
+# intelligently select the best model for each task. The descriptions appear
+# in the tool schema to guide Claude's selection based on task requirements.
 MODEL_CAPABILITIES_DESC = {
+    # Gemini models - Available when GEMINI_API_KEY is configured
    "flash": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
    "pro": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
+    # OpenAI models - Available when OPENAI_API_KEY is configured
    "o3": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
    "o3-mini": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
-    # Full model names also supported
+    "o4-mini": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
+    "o4-mini-high": "Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks",
+    # Full model names also supported (for explicit specification)
    "gemini-2.5-flash-preview-05-20": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
    "gemini-2.5-pro-preview-06-05": (
        "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"
    ),
 }

-# Note: When only OpenRouter is configured, these model aliases automatically map to equivalent models:
-# - "flash" → "google/gemini-2.5-flash-preview-05-20"
-# - "pro" → "google/gemini-2.5-pro-preview-06-05"
-# - "o3" → "openai/gpt-4o"
-# - "o3-mini" → "openai/gpt-4o-mini"
+# OpenRouter/Custom API Fallback Behavior:
+# When only OpenRouter or Custom API is configured (no native API keys), these
+# model aliases automatically map to equivalent models through the proxy:
+# - "flash" → "google/gemini-2.5-flash-preview-05-20" (via OpenRouter)
+# - "pro" → "google/gemini-2.5-pro-preview-06-05" (via OpenRouter)
+# - "o3" → "openai/o3" (via OpenRouter)
+# - "o3mini" → "openai/o3-mini" (via OpenRouter)
+# - "o4-mini" → "openai/o4-mini" (via OpenRouter)
+# - "o4-mini-high" → "openai/o4-mini-high" (via OpenRouter)
+#
+# This ensures the same model names work regardless of which provider is configured.


 # Temperature defaults for different tool types
--- a/docs/advanced-usage.md
+++ b/docs/advanced-usage.md
@@ -55,6 +55,8 @@ DEFAULT_MODEL=flash                         # Always use Flash
 DEFAULT_MODEL=o3                           # Always use O3
 ```

+**Important:** After changing any configuration in `.env` (including `DEFAULT_MODEL`, API keys, or other settings), restart the server with `./run-server.sh` to apply the changes.
+
 **Per-Request Model Override:**
 Regardless of your default setting, you can specify models per request:
 - "Use **pro** for deep security analysis of auth.py"
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -0,0 +1,126 @@
+# Testing Guide
+
+This project includes comprehensive test coverage through unit tests and integration simulator tests.
+
+## Running Tests
+
+### Prerequisites
+- Python virtual environment activated: `source venv/bin/activate`
+- All dependencies installed: `pip install -r requirements.txt`
+- Docker containers running (for simulator tests): `./run-server.sh`
+
+### Unit Tests
+
+Run all unit tests with pytest:
+```bash
+# Run all tests with verbose output
+python -m pytest -xvs
+
+# Run specific test file
+python -m pytest tests/test_providers.py -xvs
+```
+
+### Simulator Tests
+
+Simulator tests replicate real-world Claude CLI interactions with the MCP server running in Docker. Unlike unit tests that test isolated functions, simulator tests validate the complete end-to-end flow including:
+- Actual MCP protocol communication
+- Docker container interactions
+- Multi-turn conversations across tools
+- Log output validation
+
+**Important**: Simulator tests require `LOG_LEVEL=DEBUG` in your `.env` file to validate detailed execution logs.
+
+#### Running All Simulator Tests
+```bash
+# Run all simulator tests
+python communication_simulator_test.py
+
+# Run with verbose output for debugging
+python communication_simulator_test.py --verbose
+
+# Keep Docker logs after tests for inspection
+python communication_simulator_test.py --keep-logs
+```
+
+#### Running Individual Tests
+To run a single simulator test in isolation (useful for debugging or test development):
+
+```bash
+# Run a specific test by name
+python communication_simulator_test.py --individual basic_conversation
+
+# Examples of available tests:
+python communication_simulator_test.py --individual content_validation
+python communication_simulator_test.py --individual cross_tool_continuation
+python communication_simulator_test.py --individual redis_validation
+```
+
+#### Other Options
+```bash
+# List all available simulator tests with descriptions
+python communication_simulator_test.py --list-tests
+
+# Run multiple specific tests (not all)
+python communication_simulator_test.py --tests basic_conversation content_validation
+
+# Force Docker environment rebuild before running tests
+python communication_simulator_test.py --rebuild
+```
+
+### Code Quality Checks
+
+Before committing, ensure all linting passes:
+```bash
+# Run all linting checks
+ruff check .
+black --check .
+isort --check-only .
+
+# Auto-fix issues
+ruff check . --fix
+black .
+isort .
+```
+
+## What Each Test Suite Covers
+
+### Unit Tests (256 tests)
+Test isolated components and functions:
+- **Provider functionality**: Model initialization, API interactions, capability checks
+- **Tool operations**: All MCP tools (chat, analyze, debug, etc.)
+- **Conversation memory**: Threading, continuation, history management
+- **File handling**: Path validation, token limits, deduplication
+- **Auto mode**: Model selection logic and fallback behavior
+
+### Simulator Tests (14 tests)
+Validate real-world usage scenarios by simulating actual Claude prompts:
+- **Basic conversations**: Multi-turn chat functionality with real prompts
+- **Cross-tool continuation**: Context preservation across different tools
+- **File deduplication**: Efficient handling of repeated file references
+- **Model selection**: Proper routing to configured providers
+- **Token allocation**: Context window management in practice
+- **Redis validation**: Conversation persistence and retrieval
+
+## Contributing: Test Requirements
+
+When contributing to this project:
+
+1. **New features MUST include tests**:
+   - Add unit tests in `tests/` for new functions or classes
+   - Test both success and error cases
+   
+2. **Tool changes require simulator tests**:
+   - Add simulator tests in `simulator_tests/` for new or modified tools
+   - Use realistic prompts that demonstrate the feature
+   - Validate output through Docker logs
+   
+3. **Test naming conventions**:
+   - Unit tests: `test_<feature>_<scenario>.py`
+   - Simulator tests: `test_<tool>_<behavior>.py`
+
+4. **Before submitting PR**:
+   - Run all unit tests: `python -m pytest -xvs`
+   - Run relevant simulator tests
+   - Ensure all linting passes
+
+Remember: Tests are documentation. They show how features are intended to be used and help prevent regressions.
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -43,7 +43,7 @@ cat .env
 If you need to update your API keys, edit the `.env` file and then run:

 ```bash
-./setup-docker.sh
+./run-server.sh
 ```

 This will validate your configuration and restart the services.
@@ -73,7 +73,7 @@ See [Logging Documentation](logging.md) for more details on accessing logs.

 **"API key environment variable is required"**
 - Add your API key to the `.env` file
- Run: `./setup-docker.sh` to validate and restart
+- Run: `./run-server.sh` to validate and restart

 **File path errors**
 - Always use absolute paths: `/Users/you/project/file.py`
--- a/examples/claude_config_macos.json
+++ b/examples/claude_config_macos.json
@@ -1,7 +1,7 @@
 {
  "comment": "macOS configuration using Docker",
  "comment2": "Ensure Docker is running and containers are started",
-  "comment3": "Run './setup-docker.sh' first to set up the environment",
+  "comment3": "Run './run-server.sh' first to set up the environment",
  "mcpServers": {
    "zen": {
      "command": "docker",
--- a/examples/claude_config_wsl.json
+++ b/examples/claude_config_wsl.json
@@ -1,7 +1,7 @@
 {
  "comment": "Windows configuration using WSL with Docker",
  "comment2": "Ensure Docker Desktop is running and WSL integration is enabled",
-  "comment3": "Run './setup-docker.sh' in WSL first to set up the environment",
+  "comment3": "Run './run-server.sh' in WSL first to set up the environment",
  "mcpServers": {
    "zen": {
      "command": "wsl.exe",
--- a/providers/openai.py
+++ b/providers/openai.py
@@ -22,6 +22,19 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            "context_window": 200_000,  # 200K tokens
            "supports_extended_thinking": False,
        },
+        "o4-mini": {
+            "context_window": 200_000,  # 200K tokens
+            "supports_extended_thinking": False,
+        },
+        "o4-mini-high": {
+            "context_window": 200_000,  # 200K tokens
+            "supports_extended_thinking": False,
+        },
+        # Shorthands
+        "o3mini": "o3-mini",
+        "o4mini": "o4-mini",
+        "o4minihigh": "o4-mini-high",
+        "o4minihi": "o4-mini-high",
    }

    def __init__(self, api_key: str, **kwargs):
@@ -32,14 +45,17 @@ class OpenAIModelProvider(OpenAICompatibleProvider):

    def get_capabilities(self, model_name: str) -> ModelCapabilities:
        """Get capabilities for a specific OpenAI model."""
-        if model_name not in self.SUPPORTED_MODELS:
+        # Resolve shorthand
+        resolved_name = self._resolve_model_name(model_name)
+
+        if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str):
            raise ValueError(f"Unsupported OpenAI model: {model_name}")

-        config = self.SUPPORTED_MODELS[model_name]
+        config = self.SUPPORTED_MODELS[resolved_name]

        # Define temperature constraints per model
-        if model_name in ["o3", "o3-mini"]:
-            # O3 models only support temperature=1.0
+        if resolved_name in ["o3", "o3-mini", "o4-mini", "o4-mini-high"]:
+            # O3 and O4 reasoning models only support temperature=1.0
            temp_constraint = FixedTemperatureConstraint(1.0)
        else:
            # Other OpenAI models support 0.0-2.0 range
@@ -63,10 +79,19 @@ class OpenAIModelProvider(OpenAICompatibleProvider):

    def validate_model_name(self, model_name: str) -> bool:
        """Validate if the model name is supported."""
-        return model_name in self.SUPPORTED_MODELS
+        resolved_name = self._resolve_model_name(model_name)
+        return resolved_name in self.SUPPORTED_MODELS and isinstance(self.SUPPORTED_MODELS[resolved_name], dict)

    def supports_thinking_mode(self, model_name: str) -> bool:
        """Check if the model supports extended thinking mode."""
        # Currently no OpenAI models support extended thinking
        # This may change with future O3 models
        return False
+
+    def _resolve_model_name(self, model_name: str) -> str:
+        """Resolve model shorthand to full name."""
+        # Check if it's a shorthand
+        shorthand_value = self.SUPPORTED_MODELS.get(model_name)
+        if isinstance(shorthand_value, str):
+            return shorthand_value
+        return model_name
--- a/setup-docker.sh
+++ b/setup-docker.sh
@@ -3,8 +3,12 @@
 # Exit on any error, undefined variables, and pipe failures
 set -euo pipefail

-# Modern Docker setup script for Zen MCP Server with Redis
-# This script sets up the complete Docker environment including Redis for conversation threading
+# Run/Restart script for Zen MCP Server with Redis
+# This script builds, starts, and manages the Docker environment including Redis for conversation threading
+# Run this script to:
+# - Initial setup of the Docker environment
+# - Restart services after changing .env configuration
+# - Rebuild and restart after code changes

 # Spinner function for long-running operations
 show_spinner() {
--- a/simulator_tests/test_o3_model_selection.py
+++ b/simulator_tests/test_o3_model_selection.py
@@ -71,6 +71,15 @@ class O3ModelSelectionTest(BaseSimulatorTest):
                self.logger.info("  ℹ️  Only OpenRouter configured - O3 models will be routed through OpenRouter")
                return self._run_openrouter_o3_test()

+            # If neither OpenAI nor OpenRouter is configured, skip the test
+            if not has_openai and not has_openrouter:
+                self.logger.info("  ⚠️  Neither OpenAI nor OpenRouter API keys configured - skipping test")
+                self.logger.info(
+                    "  ℹ️  This test requires either OPENAI_API_KEY or OPENROUTER_API_KEY to be set in .env"
+                )
+                self.logger.info("  ✅ Test skipped (no API keys configured)")
+                return True  # Return True to indicate test passed/skipped
+
            # Original test for when OpenAI is configured
            self.logger.info("  ℹ️  OpenAI API configured - expecting direct OpenAI API calls")

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -85,8 +85,10 @@ def mock_provider_availability(request, monkeypatch):
    the tools don't require model selection unless explicitly testing auto mode.
    """
    # Skip this fixture for tests that need real providers
-    if hasattr(request, "node") and request.node.get_closest_marker("no_mock_provider"):
-        return
+    if hasattr(request, "node"):
+        marker = request.node.get_closest_marker("no_mock_provider")
+        if marker:
+            return

    from unittest.mock import MagicMock

--- a/tests/test_conversation_field_mapping.py
+++ b/tests/test_conversation_field_mapping.py
@@ -2,7 +2,6 @@
 Test that conversation history is correctly mapped to tool-specific fields
 """

-import os
 from datetime import datetime
 from unittest.mock import MagicMock, patch

@@ -130,8 +129,7 @@ async def test_unknown_tool_defaults_to_prompt():
    with patch("utils.conversation_memory.get_thread", return_value=mock_context):
        with patch("utils.conversation_memory.add_turn", return_value=True):
            with patch("utils.conversation_memory.build_conversation_history", return_value=("History", 500)):
-                # The test uses the conftest fixture which should handle provider mocking
-                # We just need to ensure the arguments are correct
+                # The autouse fixture should handle provider mocking
                arguments = {
                    "continuation_id": "test-thread-456",
                    "prompt": "User input",
--- a/tests/test_openrouter_provider.py
+++ b/tests/test_openrouter_provider.py
@@ -72,7 +72,10 @@ class TestOpenRouterProvider:
        assert provider._resolve_model_name("opus") == "anthropic/claude-3-opus"
        assert provider._resolve_model_name("sonnet") == "anthropic/claude-3-sonnet"
        assert provider._resolve_model_name("o3") == "openai/o3"
-        assert provider._resolve_model_name("o3-mini") == "openai/o3-mini-high"
+        assert provider._resolve_model_name("o3-mini") == "openai/o3-mini"
+        assert provider._resolve_model_name("o3mini") == "openai/o3-mini"
+        assert provider._resolve_model_name("o4-mini") == "openai/o4-mini"
+        assert provider._resolve_model_name("o4-mini-high") == "openai/o4-mini-high"
        assert provider._resolve_model_name("claude") == "anthropic/claude-3-sonnet"
        assert provider._resolve_model_name("mistral") == "mistral/mistral-large"
        assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528"
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -183,12 +183,31 @@ class TestOpenAIProvider:
        assert capabilities.context_window == 200_000
        assert not capabilities.supports_extended_thinking

+    def test_get_capabilities_o4_mini(self):
+        """Test getting O4-mini model capabilities"""
+        provider = OpenAIModelProvider(api_key="test-key")
+
+        capabilities = provider.get_capabilities("o4-mini")
+
+        assert capabilities.provider == ProviderType.OPENAI
+        assert capabilities.model_name == "o4-mini"
+        assert capabilities.context_window == 200_000
+        assert not capabilities.supports_extended_thinking
+        # Check temperature constraint is fixed at 1.0
+        assert capabilities.temperature_constraint.value == 1.0
+
    def test_validate_model_names(self):
        """Test model name validation"""
        provider = OpenAIModelProvider(api_key="test-key")

        assert provider.validate_model_name("o3")
-        assert provider.validate_model_name("o3-mini")
+        assert provider.validate_model_name("o3mini")
+        assert provider.validate_model_name("o3-mini")  # Backwards compatibility
+        assert provider.validate_model_name("o4-mini")
+        assert provider.validate_model_name("o4mini")
+        assert provider.validate_model_name("o4-mini-high")
+        assert provider.validate_model_name("o4minihigh")
+        assert provider.validate_model_name("o4minihi")
        assert not provider.validate_model_name("gpt-4o")
        assert not provider.validate_model_name("invalid-model")

@@ -197,4 +216,7 @@ class TestOpenAIProvider:
        provider = OpenAIModelProvider(api_key="test-key")

        assert not provider.supports_thinking_mode("o3")
+        assert not provider.supports_thinking_mode("o3mini")
        assert not provider.supports_thinking_mode("o3-mini")
+        assert not provider.supports_thinking_mode("o4-mini")
+        assert not provider.supports_thinking_mode("o4-mini-high")