Rebranding, refactoring, renaming, cleanup, updated docs

2025-06-12 10:40:43 +04:00
parent 9a55ca8898
commit fb66825bf6
55 changed files with 1048 additions and 1474 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,4 +1,4 @@
-# Gemini MCP Server Environment Configuration
+# Zen MCP Server Environment Configuration
 # Copy this file to .env and fill in your values
 # API Keys - At least one is required
@@ -9,8 +9,7 @@ GEMINI_API_KEY=your_gemini_api_key_here
 OPENAI_API_KEY=your_openai_api_key_here
 # Optional: Default model to use
-# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'gpt-4o'
+# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini'
 # Full names: 'gemini-2.5-pro-preview-06-05' or 'gemini-2.0-flash-exp'
 # When set to 'auto', Claude will select the best model for each task
 # Defaults to 'auto' if not specified
 DEFAULT_MODEL=auto
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -28,12 +28,13 @@ jobs:
    - name: Run unit tests
      run: |
-        # Run all tests except live integration tests
+        # Run all unit tests
        # These tests use mocks and don't require API keys
-        python -m pytest tests/ --ignore=tests/test_live_integration.py -v
+        python -m pytest tests/ -v
      env:
        # Ensure no API key is accidentally used in CI
        GEMINI_API_KEY: ""
        OPENAI_API_KEY: ""
  lint:
    runs-on: ubuntu-latest
@@ -56,9 +57,9 @@ jobs:
    - name: Run ruff linter
      run: ruff check .
-  live-tests:
+  simulation-tests:
    runs-on: ubuntu-latest
-    # Only run live tests on main branch pushes (requires manual API key setup)
+    # Only run simulation tests on main branch pushes (requires manual API key setup)
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    steps:
    - uses: actions/checkout@v4
@@ -76,24 +77,41 @@ jobs:
    - name: Check API key availability
      id: check-key
      run: |
-        if [ -z "${{ secrets.GEMINI_API_KEY }}" ]; then
+        has_key=false
-          echo "api_key_available=false" >> $GITHUB_OUTPUT
+        if [ -n "${{ secrets.GEMINI_API_KEY }}" ] || [ -n "${{ secrets.OPENAI_API_KEY }}" ]; then
-          echo "⚠️ GEMINI_API_KEY secret not configured - skipping live tests"
+          has_key=true
          echo "✅ API key(s) found - running simulation tests"
        else
-          echo "api_key_available=true" >> $GITHUB_OUTPUT
+          echo "⚠️ No API keys configured - skipping simulation tests"
          echo "✅ GEMINI_API_KEY found - running live tests"
        fi
        echo "api_key_available=$has_key" >> $GITHUB_OUTPUT
-    - name: Run live integration tests
+    - name: Set up Docker
      if: steps.check-key.outputs.api_key_available == 'true'
      uses: docker/setup-buildx-action@v3
    - name: Build Docker image
      if: steps.check-key.outputs.api_key_available == 'true'
      run: |
-        # Run live tests that make actual API calls
+        docker compose build
-        python tests/test_live_integration.py
+    
    - name: Run simulation tests
      if: steps.check-key.outputs.api_key_available == 'true'
      run: |
        # Start services
        docker compose up -d
        # Wait for services to be ready
        sleep 10
        # Run communication simulator tests
        python communication_simulator_test.py --skip-docker
      env:
        GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-    - name: Skip live tests
+    - name: Skip simulation tests
      if: steps.check-key.outputs.api_key_available == 'false'
      run: |
-        echo "🔒 Live integration tests skipped (no API key configured)"
+        echo "🔒 Simulation tests skipped (no API keys configured)"
-        echo "To enable live tests, add GEMINI_API_KEY as a repository secret"
+        echo "To enable simulation tests, add GEMINI_API_KEY and/or OPENAI_API_KEY as repository secrets"
--- a/.gitignore
+++ b/.gitignore
@@ -165,5 +165,4 @@ test_simulation_files/.claude/
 # Temporary test directories
 test-setup/
-/test_simulation_files/config.json
+/test_simulation_files/**
 /test_simulation_files/test_module.py
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,155 +0,0 @@
 # Contributing to Gemini MCP Server
 Thank you for your interest in contributing! This guide explains how to set up the development environment and contribute to the project.
 ## Development Setup
 1. **Clone the repository**
   ```bash
   git clone https://github.com/BeehiveInnovations/gemini-mcp-server.git
   cd gemini-mcp-server
   ```
 2. **Create virtual environment**
   ```bash
   python -m venv venv
   source venv/bin/activate  # On Windows: venv\Scripts\activate
   ```
 3. **Install dependencies**
   ```bash
   pip install -r requirements.txt
   ```
 ## Testing Strategy
 ### Two Types of Tests
 #### 1. Unit Tests (Mandatory - No API Key Required)
 - **Location**: `tests/test_*.py` (except `test_live_integration.py`)
 - **Purpose**: Test logic, mocking, and functionality without API calls
 - **Run with**: `python -m pytest tests/ --ignore=tests/test_live_integration.py -v`
 - **GitHub Actions**: ✅ Always runs
 - **Coverage**: Measures code coverage
 #### 2. Live Integration Tests (Optional - API Key Required)
 - **Location**: `tests/test_live_integration.py` 
 - **Purpose**: Verify actual API integration works
 - **Run with**: `python tests/test_live_integration.py` (requires `GEMINI_API_KEY`)
 - **GitHub Actions**: 🔒 Only runs if `GEMINI_API_KEY` secret is set
 ### Running Tests
 ```bash
 # Run all unit tests (CI-friendly, no API key needed)
 python -m pytest tests/ --ignore=tests/test_live_integration.py -v
 # Run with coverage
 python -m pytest tests/ --ignore=tests/test_live_integration.py --cov=. --cov-report=html
 # Run live integration tests (requires API key)
 export GEMINI_API_KEY=your-api-key-here
 python tests/test_live_integration.py
 ```
 ## Code Quality
 ### Formatting and Linting
 ```bash
 # Install development tools
 pip install black ruff
 # Format code
 black .
 # Lint code
 ruff check .
 ```
 ### Pre-commit Checks
 Before submitting a PR, ensure:
 - [ ] All unit tests pass: `python -m pytest tests/ --ignore=tests/test_live_integration.py -v`
 - [ ] Code is formatted: `black --check .`
 - [ ] Code passes linting: `ruff check .`
 - [ ] Live tests work (if you have API access): `python tests/test_live_integration.py`
 ## Adding New Features
 ### Adding a New Tool
 1. **Create tool file**: `tools/your_tool.py`
 2. **Inherit from BaseTool**: Implement all required methods
 3. **Add system prompt**: Include prompt in `prompts/tool_prompts.py`
 4. **Register tool**: Add to `TOOLS` dict in `server.py`
 5. **Write tests**: Add unit tests that use mocks
 6. **Test live**: Verify with live API calls
 ### Testing New Tools
 ```python
 # Unit test example (tools/test_your_tool.py)
@pytest.mark.asyncio
@patch("tools.base.BaseTool.create_model")
 async def test_your_tool(self, mock_create_model):
    mock_model = Mock()
    mock_model.generate_content.return_value = Mock(
        candidates=[Mock(content=Mock(parts=[Mock(text="Expected response")]))]
    )
    mock_create_model.return_value = mock_model
    tool = YourTool()
    result = await tool.execute({"param": "value"})
    assert len(result) == 1
    assert "Expected response" in result[0].text
 ```
 ## CI/CD Pipeline
 The GitHub Actions workflow:
 1. **Unit Tests**: Run on all Python versions (3.10, 3.11, 3.12)
 2. **Linting**: Check code formatting and style
 3. **Live Tests**: Only run if `GEMINI_API_KEY` secret is available
 ### Key Features:
 - **✅ No API key required for PRs** - All contributors can run tests
 - **🔒 Live verification available** - Maintainers can verify API integration
 - **📊 Coverage reporting** - Track test coverage
 - **🐍 Multi-Python support** - Ensure compatibility
 ## Contribution Guidelines
 ### Pull Request Process
 1. **Fork the repository**
 2. **Create a feature branch**: `git checkout -b feature/your-feature`
 3. **Make your changes**
 4. **Add/update tests**
 5. **Run tests locally**: Ensure unit tests pass
 6. **Submit PR**: Include description of changes
 ### Code Standards
 - **Follow existing patterns**: Look at existing tools for examples
 - **Add comprehensive tests**: Both unit tests (required) and live tests (recommended)
 - **Update documentation**: Update README if adding new features
 - **Use type hints**: All new code should include proper type annotations
 - **Keep it simple**: Follow SOLID principles and keep functions focused
 ### Security Considerations
 - **Never commit API keys**: Use environment variables
 - **Validate inputs**: Always validate user inputs in tools
 - **Handle errors gracefully**: Provide meaningful error messages
 - **Follow security best practices**: Sanitize file paths, validate file access
 ## Getting Help
 - **Issues**: Open an issue for bugs or feature requests
 - **Discussions**: Use GitHub Discussions for questions
 - **Documentation**: Check the README for usage examples
 ## License
 By contributing, you agree that your contributions will be licensed under the MIT License.
--- a/README.md
+++ b/README.md
@@ -3,48 +3,31 @@
  https://github.com/user-attachments/assets/a67099df-9387-4720-9b41-c986243ac11b
 <div align="center">  
-  <b>🤖 Claude + [Gemini / O3 / Both] = Your Ultimate AI Development Team</b>
+  <b>🤖 Claude + [Gemini / O3 / or Both] = Your Ultimate AI Development Team</b>
 </div>
 <br/>
-The ultimate development partner for Claude - a Model Context Protocol server that gives Claude access to multiple AI models for enhanced code analysis, problem-solving, and collaborative development.
+The ultimate development partners for Claude - a Model Context Protocol server that gives Claude access to multiple AI models for enhanced code analysis, 
 problem-solving, and collaborative development.
-**🎯 Auto Mode (NEW):** Set `DEFAULT_MODEL=auto` and Claude will intelligently select the best model for each task:
+**Features true AI orchestration with conversations that continue across tasks** - Give Claude a complex
- **Complex architecture review?** → Claude picks Gemini Pro with extended thinking
+task and let it orchestrate between models automatically. Claude stays in control, performs the actual work, 
- **Quick code formatting?** → Claude picks Gemini Flash for speed
+but gets perspectives from the best AI for each subtask. Claude can switch between different tools _and_ models mid-conversation, 
- **Logical debugging?** → Claude picks O3 for reasoning
+with context carrying forward seamlessly.
 - **Or specify your preference:** "Use flash to quickly analyze this" or "Use o3 for debugging"
 **📚 Supported Models:**
 - **Google Gemini**: 2.5 Pro (extended thinking, 1M tokens) & 2.0 Flash (ultra-fast, 1M tokens)
 - **OpenAI**: O3 (strong reasoning, 200K tokens), O3-mini (faster variant), GPT-4o (128K tokens)
 - **More providers coming soon!**
 **Features true AI orchestration with conversations that continue across tasks** - Give Claude a complex task and let it orchestrate between models automatically. Claude stays in control, performs the actual work, but gets perspectives from the best AI for each subtask. Claude can switch between different tools AND models mid-conversation, with context carrying forward seamlessly.
 **Example Workflow:**
-1. Claude uses Gemini Pro to deeply analyze your architecture
+1. Claude uses Gemini Pro to deeply [`analyze`](#6-analyze---smart-file-analysis) the code in question
-2. Switches to O3 for logical debugging of a specific issue
+2. Switches to O3 to continue [`chatting`](#1-chat---general-development-chat--collaborative-thinking) about its findings 
-3. Uses Flash for quick code formatting
+3. Uses Flash to validate formatting suggestions from O3
-4. Returns to Pro for security review
+4. Performs the actual work after taking in feedback from all three
 5. Returns to Pro for a [`precommit`](#4-precommit---pre-commit-validation) review
-All within a single conversation thread!
+All within a single conversation thread! Gemini Pro in step 5 _knows_ what was recommended by O3 in step 2! Taking that context
 and review into consideration to aid with its pre-commit review.
 **Think of it as Claude Code _for_ Claude Code.**
 ---
 > 🚀 **Multi-Provider Support with Auto Mode!**  
 > Claude automatically selects the best model for each task when using `DEFAULT_MODEL=auto`:
 > - **Gemini Pro**: Extended thinking (up to 32K tokens), best for complex problems
 > - **Gemini Flash**: Ultra-fast responses, best for quick tasks
 > - **O3**: Strong reasoning, best for logical problems and debugging
 > - **O3-mini**: Balanced performance, good for moderate complexity
 > - **GPT-4o**: General-purpose, good for explanations and chat
 > 
 > Or manually specify: "Use pro for deep analysis" or "Use o3 to debug this"
 ## Quick Navigation
 - **Getting Started**
@@ -72,7 +55,6 @@ All within a single conversation thread!
 - **Resources**
  - [Windows Setup](#windows-setup-guide) - WSL setup instructions for Windows
  - [Troubleshooting](#troubleshooting) - Common issues and solutions
  - [Contributing](#contributing) - How to contribute
  - [Testing](#testing) - Running tests
 ## Why This Server?
@@ -85,9 +67,9 @@ Claude is brilliant, but sometimes you need:
 - **Professional code reviews** with actionable feedback across entire repositories ([`codereview`](#3-codereview---professional-code-review))
 - **Pre-commit validation** with deep analysis using the best model for the job ([`precommit`](#4-precommit---pre-commit-validation))
 - **Expert debugging** - O3 for logical issues, Gemini for architectural problems ([`debug`](#5-debug---expert-debugging-assistant))
- **Massive context windows** - Gemini (1M tokens), O3 (200K tokens), GPT-4o (128K tokens)
+- **Extended context windows beyond Claude's limits** - Delegate analysis to Gemini (1M tokens) or O3 (200K tokens) for entire codebases, large datasets, or comprehensive documentation
 - **Model-specific strengths** - Extended thinking with Gemini Pro, fast iteration with Flash, strong reasoning with O3
- **Dynamic collaboration** - Models can request additional context from Claude mid-analysis
+- **Dynamic collaboration** - Models can request additional context and follow-up replies from Claude mid-analysis
 - **Smart file handling** - Automatically expands directories, manages token limits based on model capacity
 - **[Bypass MCP's token limits](#working-with-large-prompts)** - Work around MCP's 25K limit automatically
@@ -123,8 +105,8 @@ The final implementation resulted in a 26% improvement in JSON parsing performan
 ```bash
 # Clone to your preferred location
-git clone https://github.com/BeehiveInnovations/gemini-mcp-server.git
+git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
-cd gemini-mcp-server
+cd zen-mcp-server
 # One-command setup (includes Redis for AI conversations)
 ./setup-docker.sh
@@ -147,7 +129,7 @@ nano .env
 # The file will contain:
 # GEMINI_API_KEY=your-gemini-api-key-here  # For Gemini models
 # OPENAI_API_KEY=your-openai-api-key-here  # For O3 model
-# WORKSPACE_ROOT=/workspace  (automatically configured)
+# WORKSPACE_ROOT=/Users/your-username  (automatically configured)
 # Note: At least one API key is required (Gemini or OpenAI)
 ```
@@ -158,13 +140,13 @@ nano .env
 Run the following commands on the terminal to add the MCP directly to Claude Code
 ```bash
 # Add the MCP server directly via Claude Code CLI
-claude mcp add gemini -s user -- docker exec -i gemini-mcp-server python server.py
+claude mcp add zen -s user -- docker exec -i zen-mcp-server python server.py
 # List your MCP servers to verify
 claude mcp list
 # Remove when needed
-claude mcp remove gemini
+claude mcp remove zen
 ```
 #### Claude Desktop
@@ -184,12 +166,12 @@ The setup script shows you the exact configuration. It looks like this:
 ```json
 {
  "mcpServers": {
-    "gemini": {
+    "zen": {
      "command": "docker",
      "args": [
        "exec",
        "-i",
-        "gemini-mcp-server",
+        "zen-mcp-server",
        "python",
        "server.py"
      ]
@@ -289,7 +271,7 @@ This server enables **true AI collaboration** between Claude and multiple AI mod
 - Complex architecture review → Claude picks Gemini Pro
 - Quick formatting check → Claude picks Flash
 - Logical debugging → Claude picks O3
- General explanations → Claude picks GPT-4o
+- General explanations → Claude picks Flash for speed
 **Pro Tip:** Thinking modes (for Gemini models) control depth vs token cost. Use "minimal" or "low" for quick tasks, "high" or "max" for complex problems. [Learn more](#thinking-modes---managing-token-costs--quality)
@@ -307,37 +289,12 @@ This server enables **true AI collaboration** between Claude and multiple AI mod
 **Thinking Mode:** Default is `medium` (8,192 tokens). Use `low` for quick questions to save tokens, or `high` for complex discussions when thoroughness matters.
-#### Example Prompts:
+#### Example Prompt:
 **Basic Usage:**
 ```
-"Use gemini to explain how async/await works in Python"
+Chat with zen and pick the best model for this job. I need to pick between Redis and Memcached for session storage 
-"Get gemini to compare Redis vs Memcached for session storage"
+and I need an expert opinion for the project I'm working on. Get a good idea of what the project does, pick one of the two options
-"Share my authentication design with gemini and get their opinion"
+and then debate with the other models to give me a final verdict
 "Brainstorm with gemini about scaling strategies for our API"
 ```
 **Managing Token Costs:**
 ```
 # Save tokens (~6k) for simple questions
 "Use gemini with minimal thinking to explain what a REST API is"
 "Chat with gemini using low thinking mode about Python naming conventions"
 # Use default for balanced analysis
 "Get gemini to review my database schema design" (uses default medium)
 # Invest tokens for complex discussions
 "Use gemini with high thinking to brainstorm distributed system architecture"
 ```
 **Collaborative Workflow:**
 ```
 "Research the best message queue for our use case (high throughput, exactly-once delivery).
 Use gemini to compare RabbitMQ, Kafka, and AWS SQS. Based on gemini's analysis and your research,
 recommend the best option with implementation plan."
 "Design a caching strategy for our API. Get gemini's input on Redis vs Memcached vs in-memory caching.
 Combine both perspectives to create a comprehensive caching implementation guide."
 ```
 **Key Features:**
@@ -351,47 +308,18 @@ Combine both perspectives to create a comprehensive caching implementation guide
 - Can reference files for context: `"Use gemini to explain this algorithm with context from algorithm.py"`
 - **Dynamic collaboration**: Gemini can request additional files or context during the conversation if needed for a more thorough response
 - **Web search capability**: Analyzes when web searches would be helpful and recommends specific searches for Claude to perform, ensuring access to current documentation and best practices
 ### 2. `thinkdeep` - Extended Reasoning Partner
 **Get a second opinion to augment Claude's own extended thinking**
 **Thinking Mode:** Default is `high` (16,384 tokens) for deep analysis. Claude will automatically choose the best mode based on complexity - use `low` for quick validations, `medium` for standard problems, `high` for complex issues (default), or `max` for extremely complex challenges requiring deepest analysis.
-#### Example Prompts:
+#### Example Prompt:
 **Basic Usage:**
 ```
-"Use gemini to think deeper about my authentication design"
+Think deeper about my authentication design with zen using max thinking mode and brainstorm to come up 
-"Use gemini to extend my analysis of this distributed system architecture"
+with the best architecture for my project
 ```
 **With Web Search (for exploring new technologies):**
 ```
 "Use gemini to think deeper about using HTMX vs React for this project - enable web search to explore current best practices"
 "Get gemini to think deeper about implementing WebAuthn authentication with web search enabled for latest standards"
 ```
 **Managing Token Costs:**
 ```
 # Claude will intelligently select the right mode, but you can override:
 "Use gemini to think deeper with medium thinking about this refactoring approach" (saves ~8k tokens vs default)
 "Get gemini to think deeper using low thinking to validate my basic approach" (saves ~14k tokens vs default)
 # Use default high for most complex problems
 "Use gemini to think deeper about this security architecture" (uses default high - 16k tokens)
 # For extremely complex challenges requiring maximum depth
 "Use gemini with max thinking to solve this distributed consensus problem" (adds ~16k tokens vs default)
 ```
 **Collaborative Workflow:**
 ```
 "Design an authentication system for our SaaS platform. Then use gemini to review your design
 for security vulnerabilities. After getting gemini's feedback, incorporate the suggestions and
 show me the final improved design."
 "Create an event-driven architecture for our order processing system. Use gemini to think deeper
 about event ordering and failure scenarios. Then integrate gemini's insights and present the enhanced architecture."
 ```
 **Key Features:**
@@ -403,6 +331,7 @@ about event ordering and failure scenarios. Then integrate gemini's insights and
 - Can reference specific files for context: `"Use gemini to think deeper about my API design with reference to api/routes.py"`
 - **Enhanced Critical Evaluation (v2.10.0)**: After Gemini's analysis, Claude is prompted to critically evaluate the suggestions, consider context and constraints, identify risks, and synthesize a final recommendation - ensuring a balanced, well-considered solution
 - **Web search capability**: When enabled (default: true), identifies areas where current documentation or community solutions would strengthen the analysis and suggests specific searches for Claude
 ### 3. `codereview` - Professional Code Review  
 **Comprehensive code analysis with prioritized feedback**
@@ -410,34 +339,9 @@ about event ordering and failure scenarios. Then integrate gemini's insights and
 #### Example Prompts:
 **Basic Usage:**
 ```
-"Use gemini to review auth.py for issues"
+Perform a codereview with zen using gemini pro and review auth.py for security issues and potential vulnerabilities.
-"Use gemini to do a security review of auth/ focusing on authentication"
+I need an actionable plan but break it down into smaller quick-wins that we can implement and test rapidly 
 ```
 **Managing Token Costs:**
 ```
 # Save tokens for style/formatting reviews
 "Use gemini with minimal thinking to check code style in utils.py" (saves ~8k tokens)
 "Review this file with gemini using low thinking for basic issues" (saves ~6k tokens)
 # Default for standard reviews
 "Use gemini to review the API endpoints" (uses default medium)
 # Invest tokens for critical code
 "Get gemini to review auth.py with high thinking mode for security issues" (adds ~8k tokens)
 "Use gemini with max thinking to audit our encryption module" (adds ~24k tokens - justified for security)
 ```
 **Collaborative Workflow:**
 ```
 "Refactor the authentication module to use dependency injection. Then use gemini to
 review your refactoring for any security vulnerabilities. Based on gemini's feedback,
 make any necessary adjustments and show me the final secure implementation."
 "Optimize the slow database queries in user_service.py. Get gemini to review your optimizations
 for potential regressions or edge cases. Incorporate gemini's suggestions and present the final optimized queries."
 ```
 **Key Features:**
@@ -445,6 +349,7 @@ make any necessary adjustments and show me the final secure implementation."
 - Supports specialized reviews: security, performance, quick
 - Can enforce coding standards: `"Use gemini to review src/ against PEP8 standards"`
 - Filters by severity: `"Get gemini to review auth/ - only report critical vulnerabilities"`
 ### 4. `precommit` - Pre-Commit Validation
 **Comprehensive review of staged/unstaged git changes across multiple repositories**
@@ -454,7 +359,7 @@ make any necessary adjustments and show me the final secure implementation."
  <img src="https://github.com/user-attachments/assets/584adfa6-d252-49b4-b5b0-0cd6e97fb2c6" width="950">
 </div>
-**Prompt:**
+**Prompt Used:**
 ```
 Now use gemini and perform a review and precommit and ensure original requirements are met, no duplication of code or
 logic, everything should work as expected
@@ -464,35 +369,8 @@ How beautiful is that? Claude used `precommit` twice and `codereview` once and a
 #### Example Prompts:
 **Basic Usage:**
 ```
-"Use gemini to review my pending changes before I commit"
+Use zen and perform a thorough precommit ensuring there aren't any new regressions or bugs introduced
 "Get gemini to validate all my git changes match the original requirements"
 "Review pending changes in the frontend/ directory"
 ```
 **Managing Token Costs:**
 ```
 # Save tokens for small changes
 "Use gemini with low thinking to review my README updates" (saves ~6k tokens)
 "Review my config changes with gemini using minimal thinking" (saves ~8k tokens)
 # Default for regular commits
 "Use gemini to review my feature changes" (uses default medium)
 # Invest tokens for critical releases
 "Use gemini with high thinking to review changes before production release" (adds ~8k tokens)
 "Get gemini to validate all changes with max thinking for this security patch" (adds ~24k tokens - worth it!)
 ```
 **Collaborative Workflow:**
 ```
 "I've implemented the user authentication feature. Use gemini to review all pending changes
 across the codebase to ensure they align with the security requirements. Fix any issues
 gemini identifies before committing."
 "Review all my changes for the API refactoring task. Get gemini to check for incomplete
 implementations or missing test coverage. Update the code based on gemini's findings."
 ```
 **Key Features:**
@@ -524,37 +402,6 @@ implementations or missing test coverage. Update the code based on gemini's find
 "Get gemini to debug why my API returns 500 errors with the full stack trace: [paste traceback]"
 ```
 **With Web Search (for unfamiliar errors):**
 ```
 "Use gemini to debug this cryptic Kubernetes error with web search enabled to find similar issues"
 "Debug this React hydration error with gemini - enable web search to check for known solutions"
 ```
 **Managing Token Costs:**
 ```
 # Save tokens for simple errors
 "Use gemini with minimal thinking to debug this syntax error" (saves ~8k tokens)
 "Debug this import error with gemini using low thinking" (saves ~6k tokens)
 # Default for standard debugging
 "Use gemini to debug why this function returns null" (uses default medium)
 # Invest tokens for complex bugs
 "Use gemini with high thinking to debug this race condition" (adds ~8k tokens)
 "Get gemini to debug this memory leak with max thinking mode" (adds ~24k tokens - find that leak!)
 ```
 **Collaborative Workflow:**
 ```
 "I'm getting 'ConnectionPool limit exceeded' errors under load. Debug the issue and use
 gemini to analyze it deeper with context from db/pool.py. Based on gemini's root cause analysis,
 implement a fix and get gemini to validate the solution will scale."
 "Debug why tests fail randomly on CI. Once you identify potential causes, share with gemini along
 with test logs and CI configuration. Apply gemini's debugging strategy, then use gemini to
 suggest preventive measures."
 ```
 **Key Features:**
 - Generates multiple ranked hypotheses for systematic debugging
 - Accepts error context, stack traces, and logs
@@ -576,36 +423,6 @@ suggest preventive measures."
 "Get gemini to do an architecture analysis of the src/ directory"
 ```
 **With Web Search (for unfamiliar code):**
 ```
 "Use gemini to analyze this GraphQL schema with web search enabled to understand best practices"
 "Analyze this Rust code with gemini - enable web search to look up unfamiliar patterns and idioms"
 ```
 **Managing Token Costs:**
 ```
 # Save tokens for quick overviews
 "Use gemini with minimal thinking to analyze what config.py does" (saves ~8k tokens)
 "Analyze this utility file with gemini using low thinking" (saves ~6k tokens)
 # Default for standard analysis
 "Use gemini to analyze the API structure" (uses default medium)
 # Invest tokens for deep analysis
 "Use gemini with high thinking to analyze the entire codebase architecture" (adds ~8k tokens)
 "Get gemini to analyze system design with max thinking for refactoring plan" (adds ~24k tokens)
 ```
 **Collaborative Workflow:**
 ```
 "Analyze our project structure in src/ and identify architectural improvements. Share your
 analysis with gemini for a deeper review of design patterns and anti-patterns. Based on both
 analyses, create a refactoring roadmap."
 "Perform a security analysis of our authentication system. Use gemini to analyze auth/, middleware/, and api/ for vulnerabilities.
 Combine your findings with gemini's to create a comprehensive security report."
 ```
 **Key Features:**
 - Analyzes single files or entire directories
 - Supports specialized analysis types: architecture, performance, security, quality
@@ -627,7 +444,7 @@ All tools that work with files support **both individual files and entire direct
 **`analyze`** - Analyze files or directories
 - `files`: List of file paths or directories (required)
 - `question`: What to analyze (required)  
- `model`: auto|pro|flash|o3|o3-mini|gpt-4o (default: server default)
+- `model`: auto|pro|flash|o3|o3-mini (default: server default)
 - `analysis_type`: architecture|performance|security|quality|general
 - `output_format`: summary|detailed|actionable
 - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
@@ -642,7 +459,7 @@ All tools that work with files support **both individual files and entire direct
 **`codereview`** - Review code files or directories
 - `files`: List of file paths or directories (required)
- `model`: auto|pro|flash|o3|o3-mini|gpt-4o (default: server default)
+- `model`: auto|pro|flash|o3|o3-mini (default: server default)
 - `review_type`: full|security|performance|quick
 - `focus_on`: Specific aspects to focus on
 - `standards`: Coding standards to enforce
@@ -658,7 +475,7 @@ All tools that work with files support **both individual files and entire direct
 **`debug`** - Debug with file context
 - `error_description`: Description of the issue (required)
- `model`: auto|pro|flash|o3|o3-mini|gpt-4o (default: server default)
+- `model`: auto|pro|flash|o3|o3-mini (default: server default)
 - `error_context`: Stack trace or logs
 - `files`: Files or directories related to the issue
 - `runtime_info`: Environment details
@@ -674,7 +491,7 @@ All tools that work with files support **both individual files and entire direct
 **`thinkdeep`** - Extended analysis with file context
 - `current_analysis`: Your current thinking (required)
- `model`: auto|pro|flash|o3|o3-mini|gpt-4o (default: server default)
+- `model`: auto|pro|flash|o3|o3-mini (default: server default)
 - `problem_context`: Additional context
 - `focus_areas`: Specific aspects to focus on
 - `files`: Files or directories for context
@@ -800,16 +617,16 @@ To help choose the right tool for your needs:
 **Examples by scenario:**
 ```
 # Quick style check
-"Use gemini to review formatting in utils.py with minimal thinking"
+"Use o3 to review formatting in utils.py with minimal thinking"
 # Security audit
-"Get gemini to do a security review of auth/ with thinking mode high"
+"Get o3 to do a security review of auth/ with thinking mode high"
 # Complex debugging
-"Use gemini to debug this race condition with max thinking mode"
+"Use zen to debug this race condition with max thinking mode"
 # Architecture analysis
-"Analyze the entire src/ directory architecture with high thinking"
+"Analyze the entire src/ directory architecture with high thinking using zen"
 ```
 ## Advanced Features
@@ -831,7 +648,7 @@ The MCP protocol has a combined request+response limit of approximately 25K toke
 User: "Use gemini to review this code: [50,000+ character detailed analysis]"
 # Server detects the large prompt and responds:
-Gemini MCP: "The prompt is too large for MCP's token limits (>50,000 characters). 
+Zen MCP: "The prompt is too large for MCP's token limits (>50,000 characters). 
 Please save the prompt text to a temporary file named 'prompt.txt' and resend 
 the request with an empty prompt string and the absolute file path included 
 in the files parameter, along with any other files you wish to share as context."
@@ -928,7 +745,7 @@ DEFAULT_MODEL=auto  # Claude picks the best model automatically
 # API Keys (at least one required)
 GEMINI_API_KEY=your-gemini-key    # Enables Gemini Pro & Flash
-OPENAI_API_KEY=your-openai-key    # Enables O3, O3-mini, GPT-4o
+OPENAI_API_KEY=your-openai-key    # Enables O3, O3-mini
 ```
 **How Auto Mode Works:**
@@ -944,7 +761,6 @@ OPENAI_API_KEY=your-openai-key    # Enables O3, O3-mini, GPT-4o
 | **`flash`** (Gemini 2.0 Flash) | Google | 1M tokens | Ultra-fast responses | Quick checks, formatting, simple analysis |
 | **`o3`** | OpenAI | 200K tokens | Strong logical reasoning | Debugging logic errors, systematic analysis |
 | **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks |
 | **`gpt-4o`** | OpenAI | 128K tokens | General purpose | Explanations, documentation, chat |
 **Manual Model Selection:**
 You can specify a default model instead of auto mode:
@@ -966,7 +782,6 @@ Regardless of your default setting, you can specify models per request:
 **Model Capabilities:**
 - **Gemini Models**: Support thinking modes (minimal to max), web search, 1M context
 - **O3 Models**: Excellent reasoning, systematic analysis, 200K context
 - **GPT-4o**: Balanced general-purpose model, 128K context
 ### Temperature Defaults
 Different tools use optimized temperature settings:
@@ -1011,15 +826,16 @@ When using any Gemini tool, always provide absolute paths:
 By default, the server allows access to files within your home directory. This is necessary for the server to work with any file you might want to analyze from Claude.
-**To restrict access to a specific project directory**, set the `MCP_PROJECT_ROOT` environment variable:
+**For Docker environments**, the `WORKSPACE_ROOT` environment variable is used to map your local directory to the internal `/workspace` directory, enabling the MCP to translate absolute file references correctly:
 ```json
 "env": {
  "GEMINI_API_KEY": "your-key",
-  "MCP_PROJECT_ROOT": "/Users/you/specific-project"
+  "WORKSPACE_ROOT": "/Users/you/project"  // Maps to /workspace inside Docker
 }
 ```
-This creates a sandbox limiting file access to only that directory and its subdirectories.
+This allows Claude to use absolute paths that will be correctly translated between your local filesystem and the Docker container.
 ## How System Prompts Work
@@ -1044,18 +860,6 @@ To modify tool behavior, you can:
 2. Override `get_system_prompt()` in a tool class for tool-specific changes
 3. Use the `temperature` parameter to adjust response style (0.2 for focused, 0.7 for creative)
 ## Contributing
 We welcome contributions! The modular architecture makes it easy to add new tools:
 1. Create a new tool in `tools/`
 2. Inherit from `BaseTool`
 3. Implement required methods (including `get_system_prompt()`)
 4. Add your system prompt to `prompts/tool_prompts.py`
 5. Register your tool in `TOOLS` dict in `server.py`
 See existing tools for examples.
 ## Testing
 ### Unit Tests (No API Key Required)
@@ -1063,32 +867,48 @@ The project includes comprehensive unit tests that use mocks and don't require a
 ```bash
 # Run all unit tests
-python -m pytest tests/ --ignore=tests/test_live_integration.py -v
+python -m pytest tests/ -v
 # Run with coverage
-python -m pytest tests/ --ignore=tests/test_live_integration.py --cov=. --cov-report=html
+python -m pytest tests/ --cov=. --cov-report=html
 ```
-### Live Integration Tests (API Key Required)
+### Simulation Tests (API Key Required)
-To test actual API integration:
+To test the MCP server with comprehensive end-to-end simulation:
 ```bash
-# Set your API key
+# Set your API keys (at least one required)
-export GEMINI_API_KEY=your-api-key-here
+export GEMINI_API_KEY=your-gemini-api-key-here
 export OPENAI_API_KEY=your-openai-api-key-here
-# Run live integration tests
+# Run all simulation tests (default: uses existing Docker containers)
-python tests/test_live_integration.py
+python communication_simulator_test.py
 # Run specific tests only
 python communication_simulator_test.py --tests basic_conversation content_validation
 # Run with Docker rebuild (if needed)
 python communication_simulator_test.py --rebuild-docker
 # List available tests
 python communication_simulator_test.py --list-tests
 ```
 The simulation tests validate:
 - Basic conversation flow with continuation
 - File handling and deduplication
 - Cross-tool conversation threading
 - Redis memory persistence
 - Docker container integration
 ### GitHub Actions CI/CD
 The project includes GitHub Actions workflows that:
 - **✅ Run unit tests automatically** - No API key needed, uses mocks
 - **✅ Test on Python 3.10, 3.11, 3.12** - Ensures compatibility
 - **✅ Run linting and formatting checks** - Maintains code quality
 - **🔒 Run live tests only if API key is available** - Optional live verification
-The CI pipeline works without any secrets and will pass all tests using mocked responses. Live integration tests only run if a `GEMINI_API_KEY` secret is configured in the repository.
+The CI pipeline works without any secrets and will pass all tests using mocked responses. Simulation tests require API key secrets (`GEMINI_API_KEY` and/or `OPENAI_API_KEY`) to run the communication simulator.
 ## Troubleshooting
@@ -1097,14 +917,14 @@ The CI pipeline works without any secrets and will pass all tests using mocked r
 **"Connection failed" in Claude Desktop**
 - Ensure Docker services are running: `docker compose ps`
 - Check if the container name is correct: `docker ps` to see actual container names
- Verify your .env file has the correct GEMINI_API_KEY
+- Verify your .env file has at least one valid API key (GEMINI_API_KEY or OPENAI_API_KEY)
-**"GEMINI_API_KEY environment variable is required"**
+**"API key environment variable is required"**
- Edit your .env file and add your API key
+- Edit your .env file and add at least one API key (Gemini or OpenAI)
 - Restart services: `docker compose restart`
 **Container fails to start**
- Check logs: `docker compose logs gemini-mcp`
+- Check logs: `docker compose logs zen-mcp`
 - Ensure Docker has enough resources (memory/disk space)
 - Try rebuilding: `docker compose build --no-cache`
@@ -1119,25 +939,12 @@ The CI pipeline works without any secrets and will pass all tests using mocked r
 docker compose ps
 # Test manual connection
-docker exec -i gemini-mcp-server-gemini-mcp-1 echo "Connection test"
+docker exec -i zen-mcp-server echo "Connection test"
 # View logs
 docker compose logs -f
 ```
 **Conversation threading not working?**
 If you're not seeing follow-up questions from Gemini:
 ```bash
 # Check if Redis is running
 docker compose logs redis
 # Test conversation memory system
 docker exec -i gemini-mcp-server-gemini-mcp-1 python debug_conversation.py
 # Check for threading errors in logs
 docker compose logs gemini-mcp | grep "threading failed"
 ```
 ## License
 MIT License - see LICENSE file for details.
--- a/claude_config_example.json
+++ b/claude_config_example.json
@@ -1,13 +1,17 @@
 {
-  "comment": "Example Claude Desktop configuration for Gemini MCP Server",
+  "comment": "Example Claude Desktop configuration for Zen MCP Server",
  "comment2": "For Docker setup, use examples/claude_config_docker_home.json",
  "comment3": "For platform-specific examples, see the examples/ directory",
  "mcpServers": {
-    "gemini": {
+    "zen": {
-      "command": "/path/to/gemini-mcp-server/run_gemini.sh",
+      "command": "docker",
-      "env": {
+      "args": [
-        "GEMINI_API_KEY": "your-gemini-api-key-here"
+        "exec",
-      }
+        "-i",
        "zen-mcp-server",
        "python",
        "server.py"
      ]
    }
  }
 }
--- a/communication_simulator_test.py
+++ b/communication_simulator_test.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """
-Communication Simulator Test for Gemini MCP Server
+Communication Simulator Test for Zen MCP Server
-This script provides comprehensive end-to-end testing of the Gemini MCP server
+This script provides comprehensive end-to-end testing of the Zen MCP server
 by simulating real Claude CLI communications and validating conversation
 continuity, file handling, deduplication features, and clarification scenarios.
@@ -63,8 +63,8 @@ class CommunicationSimulator:
        self.keep_logs = keep_logs
        self.selected_tests = selected_tests or []
        self.temp_dir = None
-        self.container_name = "gemini-mcp-server"
+        self.container_name = "zen-mcp-server"
-        self.redis_container = "gemini-mcp-redis"
+        self.redis_container = "zen-mcp-redis"
        # Import test registry
        from simulator_tests import TEST_REGISTRY
@@ -282,7 +282,7 @@ class CommunicationSimulator:
    def print_test_summary(self):
        """Print comprehensive test results summary"""
        print("\\n" + "=" * 70)
-        print("🧪 GEMINI MCP COMMUNICATION SIMULATOR - TEST RESULTS SUMMARY")
+        print("🧪 ZEN MCP COMMUNICATION SIMULATOR - TEST RESULTS SUMMARY")
        print("=" * 70)
        passed_count = sum(1 for result in self.test_results.values() if result)
@@ -303,7 +303,7 @@ class CommunicationSimulator:
    def run_full_test_suite(self, skip_docker_setup: bool = False) -> bool:
        """Run the complete test suite"""
        try:
-            self.logger.info("🚀 Starting Gemini MCP Communication Simulator Test Suite")
+            self.logger.info("🚀 Starting Zen MCP Communication Simulator Test Suite")
            # Setup
            if not skip_docker_setup:
@@ -359,7 +359,7 @@ class CommunicationSimulator:
 def parse_arguments():
    """Parse and validate command line arguments"""
-    parser = argparse.ArgumentParser(description="Gemini MCP Communication Simulator Test")
+    parser = argparse.ArgumentParser(description="Zen MCP Communication Simulator Test")
    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
    parser.add_argument("--keep-logs", action="store_true", help="Keep Docker services running for log inspection")
    parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)")
--- a/config.py
+++ b/config.py
@@ -1,7 +1,7 @@
 """
-Configuration and constants for Gemini MCP Server
+Configuration and constants for Zen MCP Server
-This module centralizes all configuration settings for the Gemini MCP Server.
+This module centralizes all configuration settings for the Zen MCP Server.
 It defines model configurations, token limits, temperature defaults, and other
 constants used throughout the application.
@@ -29,8 +29,11 @@ DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "auto")
 VALID_MODELS = ["auto", "flash", "pro", "o3", "o3-mini", "gemini-2.0-flash-exp", "gemini-2.5-pro-preview-06-05"]
 if DEFAULT_MODEL not in VALID_MODELS:
    import logging
    logger = logging.getLogger(__name__)
-    logger.warning(f"Invalid DEFAULT_MODEL '{DEFAULT_MODEL}'. Setting to 'auto'. Valid options: {', '.join(VALID_MODELS)}")
+    logger.warning(
        f"Invalid DEFAULT_MODEL '{DEFAULT_MODEL}'. Setting to 'auto'. Valid options: {', '.join(VALID_MODELS)}"
    )
    DEFAULT_MODEL = "auto"
 # Auto mode detection - when DEFAULT_MODEL is "auto", Claude picks the model
@@ -45,7 +48,7 @@ MODEL_CAPABILITIES_DESC = {
    "o3-mini": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
    # Full model names also supported
    "gemini-2.0-flash-exp": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
-    "gemini-2.5-pro-preview-06-05": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis"
+    "gemini-2.5-pro-preview-06-05": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
 }
 # Token allocation for Gemini Pro (1M total capacity)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,7 +1,7 @@
 services:
  redis:
    image: redis:7-alpine
-    container_name: gemini-mcp-redis
+    container_name: zen-mcp-redis
    restart: unless-stopped
    ports:
      - "6379:6379"
@@ -20,10 +20,10 @@ services:
        reservations:
          memory: 256M
-  gemini-mcp:
+  zen-mcp:
    build: .
-    image: gemini-mcp-server:latest
+    image: zen-mcp-server:latest
-    container_name: gemini-mcp-server
+    container_name: zen-mcp-server
    restart: unless-stopped
    depends_on:
      redis:
@@ -50,11 +50,11 @@ services:
  log-monitor:
    build: .
-    image: gemini-mcp-server:latest
+    image: zen-mcp-server:latest
-    container_name: gemini-mcp-log-monitor
+    container_name: zen-mcp-log-monitor
    restart: unless-stopped
    depends_on:
-      - gemini-mcp
+      - zen-mcp
    environment:
      - PYTHONUNBUFFERED=1
    volumes:
--- a/examples/claude_config_docker_home.json
+++ b/examples/claude_config_docker_home.json
@@ -1,18 +1,18 @@
 {
  "comment": "Docker configuration that mounts your home directory",
-  "comment2": "Update paths: /path/to/gemini-mcp-server/.env and /Users/your-username",
+  "comment2": "Update paths: /path/to/zen-mcp-server/.env and /Users/your-username",
  "comment3": "The container auto-detects /workspace as sandbox from WORKSPACE_ROOT",
  "mcpServers": {
-    "gemini": {
+    "zen": {
      "command": "docker",
      "args": [
        "run",
        "--rm",
        "-i",
-        "--env-file", "/path/to/gemini-mcp-server/.env",
+        "--env-file", "/path/to/zen-mcp-server/.env",
        "-e", "WORKSPACE_ROOT=/Users/your-username",
        "-v", "/Users/your-username:/workspace:ro",
-        "gemini-mcp-server:latest"
+        "zen-mcp-server:latest"
      ]
    }
  }
--- a/examples/claude_config_macos.json
+++ b/examples/claude_config_macos.json
@@ -1,13 +1,17 @@
 {
-  "comment": "Traditional macOS/Linux configuration (non-Docker)",
+  "comment": "macOS configuration using Docker",
-  "comment2": "Replace YOUR_USERNAME with your actual username",
+  "comment2": "Ensure Docker is running and containers are started",
-  "comment3": "This gives access to all files under your home directory",
+  "comment3": "Run './setup-docker.sh' first to set up the environment",
  "mcpServers": {
-    "gemini": {
+    "zen": {
-      "command": "/Users/YOUR_USERNAME/gemini-mcp-server/run_gemini.sh",
+      "command": "docker",
-      "env": {
+      "args": [
-        "GEMINI_API_KEY": "your-gemini-api-key-here"
+        "exec",
-      }
+        "-i",
        "zen-mcp-server",
        "python",
        "server.py"
      ]
    }
  }
 }
--- a/examples/claude_config_wsl.json
+++ b/examples/claude_config_wsl.json
@@ -1,14 +1,18 @@
 {
-  "comment": "Windows configuration using WSL (Windows Subsystem for Linux)",
+  "comment": "Windows configuration using WSL with Docker",
-  "comment2": "Replace YOUR_WSL_USERNAME with your WSL username",
+  "comment2": "Ensure Docker Desktop is running and WSL integration is enabled",
-  "comment3": "Make sure the server is installed in your WSL environment",
+  "comment3": "Run './setup-docker.sh' in WSL first to set up the environment",
  "mcpServers": {
-    "gemini": {
+    "zen": {
      "command": "wsl.exe",
-      "args": ["/home/YOUR_WSL_USERNAME/gemini-mcp-server/run_gemini.sh"],
+      "args": [
-      "env": {
+        "docker",
-        "GEMINI_API_KEY": "your-gemini-api-key-here"
+        "exec",
-      }
+        "-i",
        "zen-mcp-server",
        "python",
        "server.py"
      ]
    }
  }
 }
--- a/providers/init.py
+++ b/providers/init.py
@@ -1,9 +1,9 @@
 """Model provider abstractions for supporting multiple AI providers."""
-from .base import ModelProvider, ModelResponse, ModelCapabilities
+from .base import ModelCapabilities, ModelProvider, ModelResponse
 from .registry import ModelProviderRegistry
 from .gemini import GeminiModelProvider
 from .openai import OpenAIModelProvider
 from .registry import ModelProviderRegistry
 __all__ = [
    "ModelProvider",
--- a/providers/base.py
+++ b/providers/base.py
@@ -2,12 +2,13 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Any, Tuple
 from enum import Enum
 from typing import Any, Optional
 class ProviderType(Enum):
    """Supported model provider types."""
    GOOGLE = "google"
    OPENAI = "openai"
@@ -79,9 +80,9 @@ class RangeTemperatureConstraint(TemperatureConstraint):
 class DiscreteTemperatureConstraint(TemperatureConstraint):
    """For models supporting only specific temperature values."""
-    def __init__(self, allowed_values: List[float], default: float = None):
+    def __init__(self, allowed_values: list[float], default: float = None):
        self.allowed_values = sorted(allowed_values)
-        self.default_temp = default or allowed_values[len(allowed_values)//2]
+        self.default_temp = default or allowed_values[len(allowed_values) // 2]
    def validate(self, temperature: float) -> bool:
        return any(abs(temperature - val) < 1e-6 for val in self.allowed_values)
@@ -99,6 +100,7 @@ class DiscreteTemperatureConstraint(TemperatureConstraint):
@dataclass
 class ModelCapabilities:
    """Capabilities and constraints for a specific model."""
    provider: ProviderType
    model_name: str
    friendly_name: str  # Human-friendly name like "Gemini" or "OpenAI"
@@ -115,7 +117,7 @@ class ModelCapabilities:
    # Backward compatibility property for existing code
    @property
-    def temperature_range(self) -> Tuple[float, float]:
+    def temperature_range(self) -> tuple[float, float]:
        """Backward compatibility for existing code that uses temperature_range."""
        if isinstance(self.temperature_constraint, RangeTemperatureConstraint):
            return (self.temperature_constraint.min_temp, self.temperature_constraint.max_temp)
@@ -130,12 +132,13 @@ class ModelCapabilities:
@dataclass
 class ModelResponse:
    """Response from a model provider."""
    content: str
-    usage: Dict[str, int] = field(default_factory=dict)  # input_tokens, output_tokens, total_tokens
+    usage: dict[str, int] = field(default_factory=dict)  # input_tokens, output_tokens, total_tokens
    model_name: str = ""
    friendly_name: str = ""  # Human-friendly name like "Gemini" or "OpenAI"
    provider: ProviderType = ProviderType.GOOGLE
-    metadata: Dict[str, Any] = field(default_factory=dict)  # Provider-specific metadata
+    metadata: dict[str, Any] = field(default_factory=dict)  # Provider-specific metadata
    @property
    def total_tokens(self) -> int:
@@ -164,7 +167,7 @@ class ModelProvider(ABC):
        system_prompt: Optional[str] = None,
        temperature: float = 0.7,
        max_output_tokens: Optional[int] = None,
-        **kwargs
+        **kwargs,
    ) -> ModelResponse:
        """Generate content using the model.
@@ -196,12 +199,7 @@ class ModelProvider(ABC):
        """Validate if the model name is supported by this provider."""
        pass
-    def validate_parameters(
+    def validate_parameters(self, model_name: str, temperature: float, **kwargs) -> None:
        self, 
        model_name: str,
        temperature: float,
        **kwargs
    ) -> None:
        """Validate model parameters against capabilities.
        Raises:
@@ -213,8 +211,7 @@ class ModelProvider(ABC):
        min_temp, max_temp = capabilities.temperature_range
        if not min_temp <= temperature <= max_temp:
            raise ValueError(
-                f"Temperature {temperature} out of range [{min_temp}, {max_temp}] "
+                f"Temperature {temperature} out of range [{min_temp}, {max_temp}] " f"for model {model_name}"
                f"for model {model_name}"
            )
    @abstractmethod
--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -1,17 +1,11 @@
 """Gemini model provider implementation."""
-import os
+from typing import Optional
-from typing import Dict, Optional, List
+
 from google import genai
 from google.genai import types
-from .base import (
+from .base import ModelCapabilities, ModelProvider, ModelResponse, ProviderType, RangeTemperatureConstraint
    ModelProvider, 
    ModelResponse, 
    ModelCapabilities, 
    ProviderType,
    RangeTemperatureConstraint
 )
 class GeminiModelProvider(ModelProvider):
@@ -34,11 +28,11 @@ class GeminiModelProvider(ModelProvider):
    # Thinking mode configurations for models that support it
    THINKING_BUDGETS = {
-        "minimal": 128,   # Minimum for 2.5 Pro - fast responses
+        "minimal": 128,  # Minimum for 2.5 Pro - fast responses
-        "low": 2048,      # Light reasoning tasks
+        "low": 2048,  # Light reasoning tasks
-        "medium": 8192,   # Balanced reasoning (default)
+        "medium": 8192,  # Balanced reasoning (default)
-        "high": 16384,    # Complex analysis
+        "high": 16384,  # Complex analysis
-        "max": 32768,     # Maximum reasoning depth
+        "max": 32768,  # Maximum reasoning depth
    }
    def __init__(self, api_key: str, **kwargs):
@@ -87,7 +81,7 @@ class GeminiModelProvider(ModelProvider):
        temperature: float = 0.7,
        max_output_tokens: Optional[int] = None,
        thinking_mode: str = "medium",
-        **kwargs
+        **kwargs,
    ) -> ModelResponse:
        """Generate content using Gemini model."""
        # Validate parameters
@@ -136,8 +130,10 @@ class GeminiModelProvider(ModelProvider):
                provider=ProviderType.GOOGLE,
                metadata={
                    "thinking_mode": thinking_mode if capabilities.supports_extended_thinking else None,
-                    "finish_reason": getattr(response.candidates[0], "finish_reason", "STOP") if response.candidates else "STOP",
+                    "finish_reason": (
-                }
+                        getattr(response.candidates[0], "finish_reason", "STOP") if response.candidates else "STOP"
                    ),
                },
            )
        except Exception as e:
@@ -147,7 +143,7 @@ class GeminiModelProvider(ModelProvider):
    def count_tokens(self, text: str, model_name: str) -> int:
        """Count tokens for the given text using Gemini's tokenizer."""
-        resolved_name = self._resolve_model_name(model_name)
+        self._resolve_model_name(model_name)
        # For now, use a simple estimation
        # TODO: Use actual Gemini tokenizer when available in SDK
@@ -176,7 +172,7 @@ class GeminiModelProvider(ModelProvider):
            return shorthand_value
        return model_name
-    def _extract_usage(self, response) -> Dict[str, int]:
+    def _extract_usage(self, response) -> dict[str, int]:
        """Extract token usage from Gemini response."""
        usage = {}
--- a/providers/openai.py
+++ b/providers/openai.py
@@ -1,18 +1,17 @@
 """OpenAI model provider implementation."""
 import os
 from typing import Dict, Optional, List, Any
 import logging
 from typing import Optional
 from openai import OpenAI
 from .base import (
    FixedTemperatureConstraint,
    ModelCapabilities,
    ModelProvider,
    ModelResponse,
    ModelCapabilities, 
    ProviderType,
-    FixedTemperatureConstraint,
+    RangeTemperatureConstraint,
    RangeTemperatureConstraint
 )
@@ -85,7 +84,7 @@ class OpenAIModelProvider(ModelProvider):
        system_prompt: Optional[str] = None,
        temperature: float = 0.7,
        max_output_tokens: Optional[int] = None,
-        **kwargs
+        **kwargs,
    ) -> ModelResponse:
        """Generate content using OpenAI model."""
        # Validate parameters
@@ -132,7 +131,7 @@ class OpenAIModelProvider(ModelProvider):
                    "model": response.model,  # Actual model used (in case of fallbacks)
                    "id": response.id,
                    "created": response.created,
-                }
+                },
            )
        except Exception as e:
@@ -166,7 +165,7 @@ class OpenAIModelProvider(ModelProvider):
        # This may change with future O3 models
        return False
-    def _extract_usage(self, response) -> Dict[str, int]:
+    def _extract_usage(self, response) -> dict[str, int]:
        """Extract token usage from OpenAI response."""
        usage = {}
--- a/providers/registry.py
+++ b/providers/registry.py
@@ -1,7 +1,8 @@
 """Model provider registry for managing available providers."""
 import os
-from typing import Dict, Optional, Type, List
+from typing import Optional
 from .base import ModelProvider, ProviderType
@@ -9,8 +10,8 @@ class ModelProviderRegistry:
    """Registry for managing model providers."""
    _instance = None
-    _providers: Dict[ProviderType, Type[ModelProvider]] = {}
+    _providers: dict[ProviderType, type[ModelProvider]] = {}
-    _initialized_providers: Dict[ProviderType, ModelProvider] = {}
+    _initialized_providers: dict[ProviderType, ModelProvider] = {}
    def __new__(cls):
        """Singleton pattern for registry."""
@@ -19,7 +20,7 @@ class ModelProviderRegistry:
        return cls._instance
    @classmethod
-    def register_provider(cls, provider_type: ProviderType, provider_class: Type[ModelProvider]) -> None:
+    def register_provider(cls, provider_type: ProviderType, provider_class: type[ModelProvider]) -> None:
        """Register a new provider class.
        Args:
@@ -72,7 +73,7 @@ class ModelProviderRegistry:
            ModelProvider instance that supports this model
        """
        # Check each registered provider
-        for provider_type, provider_class in cls._providers.items():
+        for provider_type, _provider_class in cls._providers.items():
            # Get or create provider instance
            provider = cls.get_provider(provider_type)
            if provider and provider.validate_model_name(model_name):
@@ -81,12 +82,12 @@ class ModelProviderRegistry:
        return None
    @classmethod
-    def get_available_providers(cls) -> List[ProviderType]:
+    def get_available_providers(cls) -> list[ProviderType]:
        """Get list of registered provider types."""
        return list(cls._providers.keys())
    @classmethod
-    def get_available_models(cls) -> Dict[str, ProviderType]:
+    def get_available_models(cls) -> dict[str, ProviderType]:
        """Get mapping of all available models to their providers.
        Returns:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,7 @@ ignore = [
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["F401"]
 "tests/*" = ["B011"]
 "tests/conftest.py" = ["E402"]  # Module level imports not at top of file - needed for test setup
 [build-system]
 requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
--- a/server.py
+++ b/server.py
@@ -1,8 +1,8 @@
 """
-Gemini MCP Server - Main server implementation
+Zen MCP Server - Main server implementation
 This module implements the core MCP (Model Context Protocol) server that provides
-AI-powered tools for code analysis, review, and assistance using Google's Gemini models.
+AI-powered tools for code analysis, review, and assistance using multiple AI models.
 The server follows the MCP specification to expose various AI tools as callable functions
 that can be used by MCP clients (like Claude). Each tool provides specialized functionality
@@ -102,7 +102,7 @@ logger = logging.getLogger(__name__)
 # Create the MCP server instance with a unique name identifier
 # This name is used by MCP clients to identify and connect to this specific server
-server: Server = Server("gemini-server")
+server: Server = Server("zen-server")
 # Initialize the tool registry with all available AI-powered tools
 # Each tool provides specialized functionality for different development tasks
@@ -388,6 +388,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
    # Create model context early to use for history building
    from utils.model_context import ModelContext
    model_context = ModelContext.from_arguments(arguments)
    # Build conversation history with model-specific limits
@@ -404,7 +405,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
    # All tools now use standardized 'prompt' field
    original_prompt = arguments.get("prompt", "")
-    logger.debug(f"[CONVERSATION_DEBUG] Extracting user input from 'prompt' field")
+    logger.debug("[CONVERSATION_DEBUG] Extracting user input from 'prompt' field")
    logger.debug(f"[CONVERSATION_DEBUG] User input length: {len(original_prompt)} chars")
    # Merge original context with new prompt and follow-up instructions
@@ -420,7 +421,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
    # Store the enhanced prompt in the prompt field
    enhanced_arguments["prompt"] = enhanced_prompt
-    logger.debug(f"[CONVERSATION_DEBUG] Storing enhanced prompt in 'prompt' field")
+    logger.debug("[CONVERSATION_DEBUG] Storing enhanced prompt in 'prompt' field")
    # Calculate remaining token budget based on current model
    # (model_context was already created above for history building)
@@ -494,7 +495,7 @@ async def handle_get_version() -> list[TextContent]:
    }
    # Format the information in a human-readable way
-    text = f"""Gemini MCP Server v{__version__}
+    text = f"""Zen MCP Server v{__version__}
 Updated: {__updated__}
 Author: {__author__}
@@ -508,7 +509,7 @@ Configuration:
 Available Tools:
 {chr(10).join(f"  - {tool}" for tool in version_info["available_tools"])}
-For updates, visit: https://github.com/BeehiveInnovations/gemini-mcp-server"""
+For updates, visit: https://github.com/BeehiveInnovations/zen-mcp-server"""
    # Create standardized tool output
    tool_output = ToolOutput(status="success", content=text, content_type="text", metadata={"tool_name": "get_version"})
@@ -531,11 +532,12 @@ async def main():
    configure_providers()
    # Log startup message for Docker log monitoring
-    logger.info("Gemini MCP Server starting up...")
+    logger.info("Zen MCP Server starting up...")
    logger.info(f"Log level: {log_level}")
    # Log current model mode
    from config import IS_AUTO_MODE
    if IS_AUTO_MODE:
        logger.info("Model mode: AUTO (Claude will select the best model for each task)")
    else:
@@ -556,7 +558,7 @@ async def main():
            read_stream,
            write_stream,
            InitializationOptions(
-                server_name="gemini",
+                server_name="zen",
                server_version=__version__,
                capabilities=ServerCapabilities(tools=ToolsCapability()),  # Advertise tool support capability
            ),
--- a/setup-docker.sh
+++ b/setup-docker.sh
@@ -3,10 +3,10 @@
 # Exit on any error, undefined variables, and pipe failures
 set -euo pipefail
-# Modern Docker setup script for Gemini MCP Server with Redis
+# Modern Docker setup script for Zen MCP Server with Redis
 # This script sets up the complete Docker environment including Redis for conversation threading
-echo "🚀 Setting up Gemini MCP Server with Docker Compose..."
+echo "🚀 Setting up Zen MCP Server with Docker Compose..."
 echo ""
 # Get the current working directory (absolute path)
@@ -131,7 +131,7 @@ $COMPOSE_CMD down --remove-orphans >/dev/null 2>&1 || true
 # Clean up any old containers with different naming patterns
 OLD_CONTAINERS_FOUND=false
-# Check for old Gemini MCP container
+# Check for old Gemini MCP containers (for migration)
 if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server-gemini-mcp-1$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
    echo "  - Cleaning up old container: gemini-mcp-server-gemini-mcp-1"
@@ -139,6 +139,21 @@ if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server-gemini-mcp-1
    docker rm gemini-mcp-server-gemini-mcp-1 >/dev/null 2>&1 || true
 fi
 if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
    echo "  - Cleaning up old container: gemini-mcp-server"
    docker stop gemini-mcp-server >/dev/null 2>&1 || true
    docker rm gemini-mcp-server >/dev/null 2>&1 || true
 fi
 # Check for current old containers (from recent versions)
 if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-log-monitor$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
    echo "  - Cleaning up old container: gemini-mcp-log-monitor"
    docker stop gemini-mcp-log-monitor >/dev/null 2>&1 || true
    docker rm gemini-mcp-log-monitor >/dev/null 2>&1 || true
 fi
 # Check for old Redis container
 if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server-redis-1$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
@@ -147,17 +162,37 @@ if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server-redis-1$" 2>
    docker rm gemini-mcp-server-redis-1 >/dev/null 2>&1 || true
 fi
-# Check for old image
+if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-redis$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
    echo "  - Cleaning up old container: gemini-mcp-redis"
    docker stop gemini-mcp-redis >/dev/null 2>&1 || true
    docker rm gemini-mcp-redis >/dev/null 2>&1 || true
 fi
 # Check for old images
 if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^gemini-mcp-server-gemini-mcp:latest$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
    echo "  - Cleaning up old image: gemini-mcp-server-gemini-mcp:latest"
    docker rmi gemini-mcp-server-gemini-mcp:latest >/dev/null 2>&1 || true
 fi
 if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^gemini-mcp-server:latest$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
    echo "  - Cleaning up old image: gemini-mcp-server:latest"
    docker rmi gemini-mcp-server:latest >/dev/null 2>&1 || true
 fi
 # Check for current old network (if it exists)
 if docker network ls --format "{{.Name}}" | grep -q "^gemini-mcp-server_default$" 2>/dev/null || false; then
    OLD_CONTAINERS_FOUND=true
    echo "  - Cleaning up old network: gemini-mcp-server_default"
    docker network rm gemini-mcp-server_default >/dev/null 2>&1 || true
 fi
 # Only show cleanup messages if something was actually cleaned up
 # Build and start services
-echo "  - Building Gemini MCP Server image..."
+echo "  - Building Zen MCP Server image..."
 if $COMPOSE_CMD build --no-cache >/dev/null 2>&1; then
    echo "✅ Docker image built successfully!"
 else
@@ -209,12 +244,12 @@ echo ""
 echo "===== CLAUDE DESKTOP CONFIGURATION ====="
 echo "{"
 echo "  \"mcpServers\": {"
-echo "    \"gemini\": {"
+echo "    \"zen\": {"
 echo "      \"command\": \"docker\","
 echo "      \"args\": ["
 echo "        \"exec\","
 echo "        \"-i\","
-echo "        \"gemini-mcp-server\","
+echo "        \"zen-mcp-server\","
 echo "        \"python\","
 echo "        \"server.py\""
 echo "      ]"
@@ -225,13 +260,13 @@ echo "==========================================="
 echo ""
 echo "===== CLAUDE CODE CLI CONFIGURATION ====="
 echo "# Add the MCP server via Claude Code CLI:"
-echo "claude mcp add gemini -s user -- docker exec -i gemini-mcp-server python server.py"
+echo "claude mcp add zen -s user -- docker exec -i zen-mcp-server python server.py"
 echo ""
 echo "# List your MCP servers to verify:"
 echo "claude mcp list"
 echo ""
 echo "# Remove if needed:"
-echo "claude mcp remove gemini -s user"
+echo "claude mcp remove zen -s user"
 echo "==========================================="
 echo ""
--- a/simulator_tests/init.py
+++ b/simulator_tests/init.py
@@ -1,13 +1,14 @@
 """
 Communication Simulator Tests Package
-This package contains individual test modules for the Gemini MCP Communication Simulator.
+This package contains individual test modules for the Zen MCP Communication Simulator.
 Each test is in its own file for better organization and maintainability.
 """
 from .base_test import BaseSimulatorTest
 from .test_basic_conversation import BasicConversationTest
 from .test_content_validation import ContentValidationTest
 from .test_conversation_chain_validation import ConversationChainValidationTest
 from .test_cross_tool_comprehensive import CrossToolComprehensiveTest
 from .test_cross_tool_continuation import CrossToolContinuationTest
 from .test_logs_validation import LogsValidationTest
@@ -16,7 +17,6 @@ from .test_o3_model_selection import O3ModelSelectionTest
 from .test_per_tool_deduplication import PerToolDeduplicationTest
 from .test_redis_validation import RedisValidationTest
 from .test_token_allocation_validation import TokenAllocationValidationTest
 from .test_conversation_chain_validation import ConversationChainValidationTest
 # Test registry for dynamic loading
 TEST_REGISTRY = {
--- a/simulator_tests/base_test.py
+++ b/simulator_tests/base_test.py
@@ -19,8 +19,8 @@ class BaseSimulatorTest:
        self.verbose = verbose
        self.test_files = {}
        self.test_dir = None
-        self.container_name = "gemini-mcp-server"
+        self.container_name = "zen-mcp-server"
-        self.redis_container = "gemini-mcp-redis"
+        self.redis_container = "zen-mcp-redis"
        # Configure logging
        log_level = logging.DEBUG if verbose else logging.INFO
--- a/simulator_tests/test_content_validation.py
+++ b/simulator_tests/test_content_validation.py
@@ -6,7 +6,6 @@ Tests that tools don't duplicate file content in their responses.
 This test is specifically designed to catch content duplication bugs.
 """
 import json
 import os
 from .base_test import BaseSimulatorTest
@@ -31,6 +30,7 @@ class ContentValidationTest(BaseSimulatorTest):
            cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"]
            import subprocess
            result_server = subprocess.run(cmd_server, capture_output=True, text=True)
            result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
@@ -76,6 +76,7 @@ DATABASE_CONFIG = {
            # Get timestamp for log filtering
            import datetime
            start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
            # Test 1: Initial tool call with validation file
@@ -139,26 +140,25 @@ DATABASE_CONFIG = {
            # Check for proper file embedding logs
            embedding_logs = [
-                line for line in logs.split("\n")
+                line for line in logs.split("\n") if "📁" in line or "embedding" in line.lower() or "[FILES]" in line
                if "📁" in line or "embedding" in line.lower() or "[FILES]" in line
            ]
            # Check for deduplication evidence
            deduplication_logs = [
-                line for line in logs.split("\n")
+                line
                for line in logs.split("\n")
                if "skipping" in line.lower() and "already in conversation" in line.lower()
            ]
            # Check for file processing patterns
            new_file_logs = [
-                line for line in logs.split("\n")
+                line for line in logs.split("\n") if "all 1 files are new" in line or "New conversation" in line
                if "all 1 files are new" in line or "New conversation" in line
            ]
            # Validation criteria
            validation_file_mentioned = any("validation_config.py" in line for line in logs.split("\n"))
            embedding_found = len(embedding_logs) > 0
-            proper_deduplication = len(deduplication_logs) > 0 or len(new_file_logs) >= 2  # Should see new conversation patterns
+            (len(deduplication_logs) > 0 or len(new_file_logs) >= 2)  # Should see new conversation patterns
            self.logger.info(f"  📊 Embedding logs found: {len(embedding_logs)}")
            self.logger.info(f"  📊 Deduplication evidence: {len(deduplication_logs)}")
@@ -175,7 +175,7 @@ DATABASE_CONFIG = {
            success_criteria = [
                ("Embedding logs found", embedding_found),
                ("File processing evidence", validation_file_mentioned),
-                ("Multiple tool calls", len(new_file_logs) >= 2)
+                ("Multiple tool calls", len(new_file_logs) >= 2),
            ]
            passed_criteria = sum(1 for _, passed in success_criteria if passed)
--- a/simulator_tests/test_conversation_chain_validation.py
+++ b/simulator_tests/test_conversation_chain_validation.py
@@ -21,10 +21,8 @@ This validates the conversation threading system's ability to:
 - Properly traverse parent relationships for history reconstruction
 """
 import datetime
 import subprocess
 import re
-from typing import Dict, List, Tuple, Optional
+import subprocess
 from .base_test import BaseSimulatorTest
@@ -55,43 +53,35 @@ class ConversationChainValidationTest(BaseSimulatorTest):
            self.logger.error(f"Failed to get server logs: {e}")
            return ""
-    def extract_thread_creation_logs(self, logs: str) -> List[Dict[str, str]]:
+    def extract_thread_creation_logs(self, logs: str) -> list[dict[str, str]]:
        """Extract thread creation logs with parent relationships"""
        thread_logs = []
-        lines = logs.split('\n')
+        lines = logs.split("\n")
        for line in lines:
            if "[THREAD] Created new thread" in line:
                # Parse: [THREAD] Created new thread 9dc779eb-645f-4850-9659-34c0e6978d73 with parent a0ce754d-c995-4b3e-9103-88af429455aa
-                match = re.search(r'\[THREAD\] Created new thread ([a-f0-9-]+) with parent ([a-f0-9-]+|None)', line)
+                match = re.search(r"\[THREAD\] Created new thread ([a-f0-9-]+) with parent ([a-f0-9-]+|None)", line)
                if match:
                    thread_id = match.group(1)
                    parent_id = match.group(2) if match.group(2) != "None" else None
-                    thread_logs.append({
+                    thread_logs.append({"thread_id": thread_id, "parent_id": parent_id, "log_line": line})
                        "thread_id": thread_id,
                        "parent_id": parent_id,
                        "log_line": line
                    })
        return thread_logs
-    def extract_history_traversal_logs(self, logs: str) -> List[Dict[str, str]]:
+    def extract_history_traversal_logs(self, logs: str) -> list[dict[str, str]]:
        """Extract conversation history traversal logs"""
        traversal_logs = []
-        lines = logs.split('\n')
+        lines = logs.split("\n")
        for line in lines:
            if "[THREAD] Retrieved chain of" in line:
                # Parse: [THREAD] Retrieved chain of 3 threads for 9dc779eb-645f-4850-9659-34c0e6978d73
-                match = re.search(r'\[THREAD\] Retrieved chain of (\d+) threads for ([a-f0-9-]+)', line)
+                match = re.search(r"\[THREAD\] Retrieved chain of (\d+) threads for ([a-f0-9-]+)", line)
                if match:
                    chain_length = int(match.group(1))
                    thread_id = match.group(2)
-                    traversal_logs.append({
+                    traversal_logs.append({"thread_id": thread_id, "chain_length": chain_length, "log_line": line})
                        "thread_id": thread_id,
                        "chain_length": chain_length,
                        "log_line": line
                    })
        return traversal_logs
@@ -138,7 +128,7 @@ class TestClass:
                return False
            self.logger.info(f"    ✅ Step A1 completed - thread_id: {continuation_id_a1[:8]}...")
-            conversation_chains['A1'] = continuation_id_a1
+            conversation_chains["A1"] = continuation_id_a1
            # Step A2: Continue with analyze tool (creates thread_id_2 with parent=thread_id_1)
            self.logger.info("    Step A2: Analyze tool - continue Chain A")
@@ -159,7 +149,7 @@ class TestClass:
                return False
            self.logger.info(f"    ✅ Step A2 completed - thread_id: {continuation_id_a2[:8]}...")
-            conversation_chains['A2'] = continuation_id_a2
+            conversation_chains["A2"] = continuation_id_a2
            # Step A3: Continue with debug tool (creates thread_id_3 with parent=thread_id_2)
            self.logger.info("    Step A3: Debug tool - continue Chain A")
@@ -180,7 +170,7 @@ class TestClass:
                return False
            self.logger.info(f"    ✅ Step A3 completed - thread_id: {continuation_id_a3[:8]}...")
-            conversation_chains['A3'] = continuation_id_a3
+            conversation_chains["A3"] = continuation_id_a3
            # === CHAIN B: Start independent conversation ===
            self.logger.info("  🔗 Chain B: Starting independent conversation")
@@ -202,7 +192,7 @@ class TestClass:
                return False
            self.logger.info(f"    ✅ Step B1 completed - thread_id: {continuation_id_b1[:8]}...")
-            conversation_chains['B1'] = continuation_id_b1
+            conversation_chains["B1"] = continuation_id_b1
            # Step B2: Continue the new conversation (creates thread_id_5 with parent=thread_id_4)
            self.logger.info("    Step B2: Analyze tool - continue Chain B")
@@ -222,7 +212,7 @@ class TestClass:
                return False
            self.logger.info(f"    ✅ Step B2 completed - thread_id: {continuation_id_b2[:8]}...")
-            conversation_chains['B2'] = continuation_id_b2
+            conversation_chains["B2"] = continuation_id_b2
            # === CHAIN A BRANCH: Go back to original conversation ===
            self.logger.info("  🔗 Chain A Branch: Resume original conversation from A1")
@@ -246,7 +236,7 @@ class TestClass:
                return False
            self.logger.info(f"    ✅ Step A1-Branch completed - thread_id: {continuation_id_a1_branch[:8]}...")
-            conversation_chains['A1_Branch'] = continuation_id_a1_branch
+            conversation_chains["A1_Branch"] = continuation_id_a1_branch
            # === ANALYSIS: Validate thread relationships and history traversal ===
            self.logger.info("  📊 Analyzing conversation chain structure...")
@@ -263,7 +253,9 @@ class TestClass:
            if self.verbose:
                self.logger.debug("    Thread creation logs found:")
                for log in thread_creation_logs:
-                    self.logger.debug(f"      {log['thread_id'][:8]}... parent: {log['parent_id'][:8] if log['parent_id'] else 'None'}...")
+                    self.logger.debug(
                        f"      {log['thread_id'][:8]}... parent: {log['parent_id'][:8] if log['parent_id'] else 'None'}..."
                    )
                self.logger.debug("    History traversal logs found:")
                for log in history_traversal_logs:
                    self.logger.debug(f"      {log['thread_id'][:8]}... chain length: {log['chain_length']}")
@@ -275,26 +267,30 @@ class TestClass:
            # Only continuation threads (A2, A3, B2, A1-Branch) will appear in creation logs
            # Find logs for each continuation thread
-            a2_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_a2), None)
+            a2_log = next((log for log in thread_creation_logs if log["thread_id"] == continuation_id_a2), None)
-            a3_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_a3), None)
+            a3_log = next((log for log in thread_creation_logs if log["thread_id"] == continuation_id_a3), None)
-            b2_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_b2), None)
+            b2_log = next((log for log in thread_creation_logs if log["thread_id"] == continuation_id_b2), None)
-            a1_branch_log = next((log for log in thread_creation_logs if log['thread_id'] == continuation_id_a1_branch), None)
+            a1_branch_log = next(
                (log for log in thread_creation_logs if log["thread_id"] == continuation_id_a1_branch), None
            )
            # A2 should have A1 as parent
            if a2_log:
-                expected_relationships.append(("A2 has A1 as parent", a2_log['parent_id'] == continuation_id_a1))
+                expected_relationships.append(("A2 has A1 as parent", a2_log["parent_id"] == continuation_id_a1))
            # A3 should have A2 as parent
            if a3_log:
-                expected_relationships.append(("A3 has A2 as parent", a3_log['parent_id'] == continuation_id_a2))
+                expected_relationships.append(("A3 has A2 as parent", a3_log["parent_id"] == continuation_id_a2))
            # B2 should have B1 as parent (independent chain)
            if b2_log:
-                expected_relationships.append(("B2 has B1 as parent", b2_log['parent_id'] == continuation_id_b1))
+                expected_relationships.append(("B2 has B1 as parent", b2_log["parent_id"] == continuation_id_b1))
            # A1-Branch should have A1 as parent (branching)
            if a1_branch_log:
-                expected_relationships.append(("A1-Branch has A1 as parent", a1_branch_log['parent_id'] == continuation_id_a1))
+                expected_relationships.append(
                    ("A1-Branch has A1 as parent", a1_branch_log["parent_id"] == continuation_id_a1)
                )
            # Validate history traversal
            traversal_validations = []
@@ -306,8 +302,8 @@ class TestClass:
            if len(history_traversal_logs) > 0:
                # Validate that any traversal logs we find have reasonable chain lengths
                for log in history_traversal_logs:
-                    thread_id = log['thread_id']
+                    thread_id = log["thread_id"]
-                    chain_length = log['chain_length']
+                    chain_length = log["chain_length"]
                    # Chain length should be at least 2 for any continuation thread
                    # (original thread + continuation thread)
@@ -328,10 +324,14 @@ class TestClass:
                        thread_description = "A1-Branch (should be 2-thread chain)"
                        is_valid_length = chain_length == 2
-                    traversal_validations.append((f"{thread_description[:8]}... has valid chain length", is_valid_length))
+                    traversal_validations.append(
                        (f"{thread_description[:8]}... has valid chain length", is_valid_length)
                    )
                # Also validate we found at least one traversal (shows the system is working)
-                traversal_validations.append(("At least one history traversal occurred", len(history_traversal_logs) >= 1))
+                traversal_validations.append(
                    ("At least one history traversal occurred", len(history_traversal_logs) >= 1)
                )
            # === VALIDATION RESULTS ===
            self.logger.info("  📊 Thread Relationship Validation:")
@@ -354,7 +354,7 @@ class TestClass:
            total_relationship_checks = len(expected_relationships)
            total_traversal_checks = len(traversal_validations)
-            self.logger.info(f"  📊 Validation Summary:")
+            self.logger.info("  📊 Validation Summary:")
            self.logger.info(f"    Thread relationships: {relationship_passed}/{total_relationship_checks}")
            self.logger.info(f"    History traversal: {traversal_passed}/{total_traversal_checks}")
@@ -364,7 +364,9 @@ class TestClass:
            # If no traversal checks were possible, it means no traversal logs were found
            # This could indicate an issue since we expect at least some history building
            if total_traversal_checks == 0:
-                self.logger.warning("    No history traversal logs found - this may indicate conversation history is always pre-embedded")
+                self.logger.warning(
                    "    No history traversal logs found - this may indicate conversation history is always pre-embedded"
                )
                # Still consider it successful since the thread relationships are what matter most
                traversal_success = True
            else:
@@ -372,8 +374,10 @@ class TestClass:
            overall_success = relationship_success and traversal_success
-            self.logger.info(f"  📊 Conversation Chain Structure:")
+            self.logger.info("  📊 Conversation Chain Structure:")
-            self.logger.info(f"    Chain A: {continuation_id_a1[:8]} → {continuation_id_a2[:8]} → {continuation_id_a3[:8]}")
+            self.logger.info(
                f"    Chain A: {continuation_id_a1[:8]} → {continuation_id_a2[:8]} → {continuation_id_a3[:8]}"
            )
            self.logger.info(f"    Chain B: {continuation_id_b1[:8]} → {continuation_id_b2[:8]}")
            self.logger.info(f"    Branch:  {continuation_id_a1[:8]} → {continuation_id_a1_branch[:8]}")
--- a/simulator_tests/test_o3_model_selection.py
+++ b/simulator_tests/test_o3_model_selection.py
@@ -49,7 +49,7 @@ class O3ModelSelectionTest(BaseSimulatorTest):
            self.setup_test_files()
            # Get timestamp for log filtering
-            start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
+            datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
            # Test 1: Explicit O3 model selection
            self.logger.info("  1: Testing explicit O3 model selection")
@@ -120,32 +120,21 @@ def multiply(x, y):
            logs = self.get_recent_server_logs()
            # Check for OpenAI API calls (this proves O3 models are being used)
-            openai_api_logs = [
+            openai_api_logs = [line for line in logs.split("\n") if "Sending request to openai API" in line]
                line for line in logs.split("\n")
                if "Sending request to openai API" in line
            ]
            # Check for OpenAI HTTP responses (confirms successful O3 calls)
            openai_http_logs = [
-                line for line in logs.split("\n")
+                line for line in logs.split("\n") if "HTTP Request: POST https://api.openai.com" in line
                if "HTTP Request: POST https://api.openai.com" in line
            ]
            # Check for received responses from OpenAI
-            openai_response_logs = [
+            openai_response_logs = [line for line in logs.split("\n") if "Received response from openai API" in line]
                line for line in logs.split("\n")
                if "Received response from openai API" in line
            ]
            # Check that we have both chat and codereview tool calls to OpenAI
-            chat_openai_logs = [
+            chat_openai_logs = [line for line in logs.split("\n") if "Sending request to openai API for chat" in line]
                line for line in logs.split("\n")
                if "Sending request to openai API for chat" in line
            ]
            codereview_openai_logs = [
-                line for line in logs.split("\n")
+                line for line in logs.split("\n") if "Sending request to openai API for codereview" in line
                if "Sending request to openai API for codereview" in line
            ]
            # Validation criteria - we expect 3 OpenAI calls (2 chat + 1 codereview)
@@ -178,7 +167,7 @@ def multiply(x, y):
                ("OpenAI HTTP requests successful", openai_http_success),
                ("OpenAI responses received", openai_responses_received),
                ("Chat tool used OpenAI", chat_calls_to_openai),
-                ("Codereview tool used OpenAI", codereview_calls_to_openai)
+                ("Codereview tool used OpenAI", codereview_calls_to_openai),
            ]
            passed_criteria = sum(1 for _, passed in success_criteria if passed)
--- a/simulator_tests/test_token_allocation_validation.py
+++ b/simulator_tests/test_token_allocation_validation.py
@@ -10,9 +10,8 @@ This test validates that:
 """
 import datetime
 import subprocess
 import re
-from typing import Dict, List, Tuple
+import subprocess
 from .base_test import BaseSimulatorTest
@@ -43,12 +42,12 @@ class TokenAllocationValidationTest(BaseSimulatorTest):
            self.logger.error(f"Failed to get server logs: {e}")
            return ""
-    def extract_conversation_usage_logs(self, logs: str) -> List[Dict[str, int]]:
+    def extract_conversation_usage_logs(self, logs: str) -> list[dict[str, int]]:
        """Extract actual conversation token usage from server logs"""
        usage_logs = []
        # Look for conversation debug logs that show actual usage
-        lines = logs.split('\n')
+        lines = logs.split("\n")
        for i, line in enumerate(lines):
            if "[CONVERSATION_DEBUG] Token budget calculation:" in line:
@@ -60,43 +59,43 @@ class TokenAllocationValidationTest(BaseSimulatorTest):
                        # Parse Total capacity: 1,048,576
                        if "Total capacity:" in detail_line:
-                            match = re.search(r'Total capacity:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Total capacity:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['total_capacity'] = int(match.group(1).replace(',', ''))
+                                usage["total_capacity"] = int(match.group(1).replace(",", ""))
                        # Parse Content allocation: 838,860
                        elif "Content allocation:" in detail_line:
-                            match = re.search(r'Content allocation:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Content allocation:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['content_allocation'] = int(match.group(1).replace(',', ''))
+                                usage["content_allocation"] = int(match.group(1).replace(",", ""))
                        # Parse Conversation tokens: 12,345
                        elif "Conversation tokens:" in detail_line:
-                            match = re.search(r'Conversation tokens:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Conversation tokens:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['conversation_tokens'] = int(match.group(1).replace(',', ''))
+                                usage["conversation_tokens"] = int(match.group(1).replace(",", ""))
                        # Parse Remaining tokens: 825,515
                        elif "Remaining tokens:" in detail_line:
-                            match = re.search(r'Remaining tokens:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Remaining tokens:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['remaining_tokens'] = int(match.group(1).replace(',', ''))
+                                usage["remaining_tokens"] = int(match.group(1).replace(",", ""))
                if usage:  # Only add if we found some usage data
                    usage_logs.append(usage)
        return usage_logs
-    def extract_conversation_token_usage(self, logs: str) -> List[int]:
+    def extract_conversation_token_usage(self, logs: str) -> list[int]:
        """Extract conversation token usage from logs"""
        usage_values = []
        # Look for conversation token usage logs
-        pattern = r'Conversation history token usage:\s*([\d,]+)'
+        pattern = r"Conversation history token usage:\s*([\d,]+)"
        matches = re.findall(pattern, logs)
        for match in matches:
-            usage_values.append(int(match.replace(',', '')))
+            usage_values.append(int(match.replace(",", "")))
        return usage_values
@@ -239,7 +238,7 @@ if __name__ == "__main__":
            # Step 1: Initial chat with first file
            self.logger.info("  Step 1: Initial chat with file1 - checking token allocation")
-            step1_start_time = datetime.datetime.now()
+            datetime.datetime.now()
            response1, continuation_id1 = self.call_mcp_tool(
                "chat",
@@ -263,8 +262,9 @@ if __name__ == "__main__":
            # For Step 1, check for file embedding logs instead of conversation usage
            file_embedding_logs_step1 = [
-                line for line in logs_step1.split('\n')
+                line
-                if 'successfully embedded' in line and 'files' in line and 'tokens' in line
+                for line in logs_step1.split("\n")
                if "successfully embedded" in line and "files" in line and "tokens" in line
            ]
            if not file_embedding_logs_step1:
@@ -276,7 +276,8 @@ if __name__ == "__main__":
            for log in file_embedding_logs_step1:
                # Look for pattern like "successfully embedded 1 files (146 tokens)"
                import re
-                match = re.search(r'\((\d+) tokens\)', log)
+
                match = re.search(r"\((\d+) tokens\)", log)
                if match:
                    step1_file_tokens = int(match.group(1))
                    break
@@ -284,7 +285,7 @@ if __name__ == "__main__":
            self.logger.info(f"  📊 Step 1 File Processing - Embedded files: {step1_file_tokens:,} tokens")
            # Validate that file1 is actually mentioned in the embedding logs (check for actual filename)
-            file1_mentioned = any('math_functions.py' in log for log in file_embedding_logs_step1)
+            file1_mentioned = any("math_functions.py" in log for log in file_embedding_logs_step1)
            if not file1_mentioned:
                # Debug: show what files were actually found in the logs
                self.logger.debug("  📋 Files found in embedding logs:")
@@ -300,7 +301,9 @@ if __name__ == "__main__":
                    # Continue test - the important thing is that files were processed
            # Step 2: Different tool continuing same conversation - should build conversation history
-            self.logger.info("  Step 2: Analyze tool continuing chat conversation - checking conversation history buildup")
+            self.logger.info(
                "  Step 2: Analyze tool continuing chat conversation - checking conversation history buildup"
            )
            response2, continuation_id2 = self.call_mcp_tool(
                "analyze",
@@ -330,9 +333,11 @@ if __name__ == "__main__":
            usage_step2 = self.extract_conversation_usage_logs(logs_step2)
            if len(usage_step2) < 2:
-                self.logger.warning(f"  ⚠️ Step 2: Only found {len(usage_step2)} conversation usage logs, expected at least 2")
+                self.logger.warning(
                    f"  ⚠️ Step 2: Only found {len(usage_step2)} conversation usage logs, expected at least 2"
                )
                # Debug: Look for any CONVERSATION_DEBUG logs
-                conversation_debug_lines = [line for line in logs_step2.split('\n') if 'CONVERSATION_DEBUG' in line]
+                conversation_debug_lines = [line for line in logs_step2.split("\n") if "CONVERSATION_DEBUG" in line]
                self.logger.debug(f"  📋 Found {len(conversation_debug_lines)} CONVERSATION_DEBUG lines in step 2")
                if conversation_debug_lines:
@@ -348,9 +353,11 @@ if __name__ == "__main__":
                    return False
            latest_usage_step2 = usage_step2[-1]  # Get most recent usage
-            self.logger.info(f"  📊 Step 2 Token Usage - Total Capacity: {latest_usage_step2.get('total_capacity', 0):,}, "
+            self.logger.info(
-                            f"Conversation: {latest_usage_step2.get('conversation_tokens', 0):,}, "
+                f"  📊 Step 2 Token Usage - Total Capacity: {latest_usage_step2.get('total_capacity', 0):,}, "
-                            f"Remaining: {latest_usage_step2.get('remaining_tokens', 0):,}")
+                f"Conversation: {latest_usage_step2.get('conversation_tokens', 0):,}, "
                f"Remaining: {latest_usage_step2.get('remaining_tokens', 0):,}"
            )
            # Step 3: Continue conversation with additional file - should show increased token usage
            self.logger.info("  Step 3: Continue conversation with file1 + file2 - checking token growth")
@@ -380,12 +387,14 @@ if __name__ == "__main__":
            self.logger.info(f"  📋 Found {len(usage_step3)} total conversation usage logs")
            if len(usage_step3) < 3:
-                self.logger.warning(f"  ⚠️ Step 3: Only found {len(usage_step3)} conversation usage logs, expected at least 3")
+                self.logger.warning(
                    f"  ⚠️ Step 3: Only found {len(usage_step3)} conversation usage logs, expected at least 3"
                )
                # Let's check if we have at least some logs to work with
                if len(usage_step3) == 0:
                    self.logger.error("  ❌ No conversation usage logs found at all")
                    # Debug: show some recent logs
-                    recent_lines = logs_step3.split('\n')[-50:]
+                    recent_lines = logs_step3.split("\n")[-50:]
                    self.logger.debug("  📋 Recent log lines:")
                    for line in recent_lines:
                        if line.strip() and "CONVERSATION_DEBUG" in line:
@@ -393,9 +402,11 @@ if __name__ == "__main__":
                    return False
            latest_usage_step3 = usage_step3[-1]  # Get most recent usage
-            self.logger.info(f"  📊 Step 3 Token Usage - Total Capacity: {latest_usage_step3.get('total_capacity', 0):,}, "
+            self.logger.info(
-                            f"Conversation: {latest_usage_step3.get('conversation_tokens', 0):,}, "
+                f"  📊 Step 3 Token Usage - Total Capacity: {latest_usage_step3.get('total_capacity', 0):,}, "
-                            f"Remaining: {latest_usage_step3.get('remaining_tokens', 0):,}")
+                f"Conversation: {latest_usage_step3.get('conversation_tokens', 0):,}, "
                f"Remaining: {latest_usage_step3.get('remaining_tokens', 0):,}"
            )
            # Validation: Check token processing and conversation history
            self.logger.info("  📋 Validating token processing and conversation history...")
@@ -407,12 +418,12 @@ if __name__ == "__main__":
            step3_remaining = 0
            if len(usage_step2) > 0:
-                step2_conversation = latest_usage_step2.get('conversation_tokens', 0)
+                step2_conversation = latest_usage_step2.get("conversation_tokens", 0)
-                step2_remaining = latest_usage_step2.get('remaining_tokens', 0)
+                step2_remaining = latest_usage_step2.get("remaining_tokens", 0)
            if len(usage_step3) >= len(usage_step2) + 1:  # Should have one more log than step2
-                step3_conversation = latest_usage_step3.get('conversation_tokens', 0) 
+                step3_conversation = latest_usage_step3.get("conversation_tokens", 0)
-                step3_remaining = latest_usage_step3.get('remaining_tokens', 0)
+                step3_remaining = latest_usage_step3.get("remaining_tokens", 0)
            else:
                # Use step2 values as fallback
                step3_conversation = step2_conversation
@@ -427,13 +438,17 @@ if __name__ == "__main__":
            criteria.append(("Step 1 processed files successfully", step1_processed_files))
            # 2. Step 2 should have conversation history (if continuation worked)
-            step2_has_conversation = step2_conversation > 0 if len(usage_step2) > 0 else True  # Pass if no logs (might be different issue)
+            step2_has_conversation = (
                step2_conversation > 0 if len(usage_step2) > 0 else True
            )  # Pass if no logs (might be different issue)
            step2_has_remaining = step2_remaining > 0 if len(usage_step2) > 0 else True
            criteria.append(("Step 2 has conversation history", step2_has_conversation))
            criteria.append(("Step 2 has remaining tokens", step2_has_remaining))
            # 3. Step 3 should show conversation growth
-            step3_has_conversation = step3_conversation >= step2_conversation if len(usage_step3) > len(usage_step2) else True
+            step3_has_conversation = (
                step3_conversation >= step2_conversation if len(usage_step3) > len(usage_step2) else True
            )
            criteria.append(("Step 3 maintains conversation history", step3_has_conversation))
            # 4. Check that we got some conversation usage logs for continuation calls
@@ -445,27 +460,39 @@ if __name__ == "__main__":
            criteria.append(("Each response generated unique continuation ID", unique_continuation_ids))
            # 6. Validate continuation IDs were different from each step
-            step_ids_different = len(continuation_ids) == 3 and continuation_ids[0] != continuation_ids[1] and continuation_ids[1] != continuation_ids[2]
+            step_ids_different = (
                len(continuation_ids) == 3
                and continuation_ids[0] != continuation_ids[1]
                and continuation_ids[1] != continuation_ids[2]
            )
            criteria.append(("All continuation IDs are different", step_ids_different))
            # Log detailed analysis
-            self.logger.info(f"  📊 Token Processing Analysis:")
+            self.logger.info("  📊 Token Processing Analysis:")
            self.logger.info(f"    Step 1 - File tokens: {step1_file_tokens:,} (new conversation)")
            self.logger.info(f"    Step 2 - Conversation: {step2_conversation:,}, Remaining: {step2_remaining:,}")
            self.logger.info(f"    Step 3 - Conversation: {step3_conversation:,}, Remaining: {step3_remaining:,}")
            # Log continuation ID analysis
-            self.logger.info(f"  📊 Continuation ID Analysis:")
+            self.logger.info("  📊 Continuation ID Analysis:")
            self.logger.info(f"    Step 1 ID: {continuation_ids[0][:8]}... (generated)")
            self.logger.info(f"    Step 2 ID: {continuation_ids[1][:8]}... (generated from Step 1)")
            self.logger.info(f"    Step 3 ID: {continuation_ids[2][:8]}... (generated from Step 2)")
            # Check for file mentions in step 3 (should include both files)
            # Look for file processing in conversation memory logs and tool embedding logs
-            file2_mentioned_step3 = any('calculator.py' in log for log in logs_step3.split('\n') if ('embedded' in log.lower() and ('conversation' in log.lower() or 'tool' in log.lower())))
+            file2_mentioned_step3 = any(
-            file1_still_mentioned_step3 = any('math_functions.py' in log for log in logs_step3.split('\n') if ('embedded' in log.lower() and ('conversation' in log.lower() or 'tool' in log.lower())))
+                "calculator.py" in log
                for log in logs_step3.split("\n")
                if ("embedded" in log.lower() and ("conversation" in log.lower() or "tool" in log.lower()))
            )
            file1_still_mentioned_step3 = any(
                "math_functions.py" in log
                for log in logs_step3.split("\n")
                if ("embedded" in log.lower() and ("conversation" in log.lower() or "tool" in log.lower()))
            )
-            self.logger.info(f"  📊 File Processing in Step 3:")
+            self.logger.info("  📊 File Processing in Step 3:")
            self.logger.info(f"    File1 (math_functions.py) mentioned: {file1_still_mentioned_step3}")
            self.logger.info(f"    File2 (calculator.py) mentioned: {file2_mentioned_step3}")
@@ -484,14 +511,10 @@ if __name__ == "__main__":
            # Check for file embedding logs
            file_embedding_logs = [
-                line for line in logs_step3.split('\n')
+                line for line in logs_step3.split("\n") if "tool embedding" in line and "files" in line
                if 'tool embedding' in line and 'files' in line
            ]
-            conversation_logs = [
+            conversation_logs = [line for line in logs_step3.split("\n") if "conversation history" in line.lower()]
                line for line in logs_step3.split('\n') 
                if 'conversation history' in line.lower()
            ]
            self.logger.info(f"  📊 File embedding logs: {len(file_embedding_logs)}")
            self.logger.info(f"  📊 Conversation history logs: {len(conversation_logs)}")
--- a/tests/init.py
+++ b/tests/init.py
@@ -1 +1 @@
-# Tests for Gemini MCP Server
+# Tests for Zen MCP Server
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,5 @@
 """
-Pytest configuration for Gemini MCP Server tests
+Pytest configuration for Zen MCP Server tests
 """
 import asyncio
@@ -27,13 +27,15 @@ os.environ["DEFAULT_MODEL"] = "gemini-2.0-flash-exp"
 # Force reload of config module to pick up the env var
 import importlib
 import config
 importlib.reload(config)
 # Set MCP_PROJECT_ROOT to a temporary directory for tests
 # This provides a safe sandbox for file operations during testing
 # Create a temporary directory that will be used as the project root for all tests
-test_root = tempfile.mkdtemp(prefix="gemini_mcp_test_")
+test_root = tempfile.mkdtemp(prefix="zen_mcp_test_")
 os.environ["MCP_PROJECT_ROOT"] = test_root
 # Configure asyncio for Windows compatibility
@@ -42,9 +44,9 @@ if sys.platform == "win32":
 # Register providers for all tests
 from providers import ModelProviderRegistry
 from providers.base import ProviderType
 from providers.gemini import GeminiModelProvider
 from providers.openai import OpenAIModelProvider
 from providers.base import ProviderType
 # Register providers at test startup
 ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
--- a/tests/mock_helpers.py
+++ b/tests/mock_helpers.py
@@ -1,7 +1,9 @@
 """Helper functions for test mocking."""
 from unittest.mock import Mock
-from providers.base import ModelCapabilities, ProviderType
+
 from providers.base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
 def create_mock_provider(model_name="gemini-2.0-flash-exp", max_tokens=1_048_576):
    """Create a properly configured mock provider."""
@@ -17,7 +19,7 @@ def create_mock_provider(model_name="gemini-2.0-flash-exp", max_tokens=1_048_576
        supports_system_prompts=True,
        supports_streaming=True,
        supports_function_calling=True,
-        temperature_range=(0.0, 2.0),
+        temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
    )
    mock_provider.get_capabilities.return_value = mock_capabilities
--- a/tests/test_auto_mode.py
+++ b/tests/test_auto_mode.py
@@ -1,11 +1,11 @@
 """Tests for auto mode functionality"""
 import os
 import pytest
 from unittest.mock import patch, Mock
 import importlib
 import os
 from unittest.mock import patch
 import pytest
 from mcp.types import TextContent
 from tools.analyze import AnalyzeTool
@@ -21,6 +21,7 @@ class TestAutoMode:
            # Test auto mode
            os.environ["DEFAULT_MODEL"] = "auto"
            import config
            importlib.reload(config)
            assert config.DEFAULT_MODEL == "auto"
@@ -61,6 +62,7 @@ class TestAutoMode:
            # Enable auto mode
            os.environ["DEFAULT_MODEL"] = "auto"
            import config
            importlib.reload(config)
            tool = AnalyzeTool()
@@ -107,17 +109,15 @@ class TestAutoMode:
            # Enable auto mode
            os.environ["DEFAULT_MODEL"] = "auto"
            import config
            importlib.reload(config)
            tool = AnalyzeTool()
            # Mock the provider to avoid real API calls
-            with patch.object(tool, 'get_model_provider') as mock_provider:
+            with patch.object(tool, "get_model_provider"):
                # Execute without model parameter
-                result = await tool.execute({
+                result = await tool.execute({"files": ["/tmp/test.py"], "prompt": "Analyze this"})
                    "files": ["/tmp/test.py"], 
                    "prompt": "Analyze this"
                })
            # Should get error
            assert len(result) == 1
@@ -139,12 +139,23 @@ class TestAutoMode:
        # Create a minimal concrete tool for testing
        class TestTool(BaseTool):
-            def get_name(self): return "test"
+            def get_name(self):
-            def get_description(self): return "test"
+                return "test"
-            def get_input_schema(self): return {}
+
-            def get_system_prompt(self): return ""
+            def get_description(self):
-            def get_request_model(self): return None
+                return "test"
-            async def prepare_prompt(self, request): return ""
+
            def get_input_schema(self):
                return {}
            def get_system_prompt(self):
                return ""
            def get_request_model(self):
                return None
            async def prepare_prompt(self, request):
                return ""
        tool = TestTool()
@@ -155,6 +166,7 @@ class TestAutoMode:
            # Test auto mode
            os.environ["DEFAULT_MODEL"] = "auto"
            import config
            importlib.reload(config)
            schema = tool.get_model_field_schema()
--- a/tests/test_claude_continuation.py
+++ b/tests/test_claude_continuation.py
@@ -7,11 +7,11 @@ when Gemini doesn't explicitly ask a follow-up question.
 import json
 from unittest.mock import Mock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from pydantic import Field
 from tests.mock_helpers import create_mock_provider
 from tools.base import BaseTool, ToolRequest
 from tools.models import ContinuationOffer, ToolOutput
 from utils.conversation_memory import MAX_CONVERSATION_TURNS
@@ -125,7 +125,7 @@ class TestClaudeContinuationOffers:
                content="Analysis complete. The code looks good.",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -176,7 +176,7 @@ class TestClaudeContinuationOffers:
                content=content_with_followup,
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -220,7 +220,7 @@ class TestClaudeContinuationOffers:
                content="Continued analysis complete.",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
--- a/tests/test_collaboration.py
+++ b/tests/test_collaboration.py
@@ -4,10 +4,10 @@ Tests for dynamic context request and collaboration features
 import json
 from unittest.mock import Mock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from tests.mock_helpers import create_mock_provider
 from tools.analyze import AnalyzeTool
 from tools.debug import DebugIssueTool
 from tools.models import ClarificationRequest, ToolOutput
@@ -41,10 +41,7 @@ class TestDynamicContextRequests:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content=clarification_json,
+            content=clarification_json, usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -85,10 +82,7 @@ class TestDynamicContextRequests:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content=normal_response,
+            content=normal_response, usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -112,10 +106,7 @@ class TestDynamicContextRequests:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content=malformed_json,
+            content=malformed_json, usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -155,10 +146,7 @@ class TestDynamicContextRequests:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content=clarification_json,
+            content=clarification_json, usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -245,10 +233,7 @@ class TestCollaborationWorkflow:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content=clarification_json,
+            content=clarification_json, usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -287,10 +272,7 @@ class TestCollaborationWorkflow:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content=clarification_json,
+            content=clarification_json, usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -317,10 +299,7 @@ class TestCollaborationWorkflow:
        """
        mock_provider.generate_content.return_value = Mock(
-            content=final_response,
+            content=final_response, usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        result2 = await tool.execute(
--- a/tests/test_conversation_field_mapping.py
+++ b/tests/test_conversation_field_mapping.py
@@ -2,15 +2,14 @@
 Test that conversation history is correctly mapped to tool-specific fields
 """
 import json
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 from tests.mock_helpers import create_mock_provider
 from datetime import datetime
 from unittest.mock import MagicMock, patch
 import pytest
 from providers.base import ProviderType
 from server import reconstruct_thread_context
 from utils.conversation_memory import ConversationTurn, ThreadContext
 from providers.base import ProviderType
@pytest.mark.asyncio
@@ -71,19 +70,20 @@ async def test_conversation_history_field_mapping():
                    # Mock provider registry to avoid model lookup errors
                    with patch("providers.registry.ModelProviderRegistry.get_provider_for_model") as mock_get_provider:
                        from providers.base import ModelCapabilities
                        mock_provider = MagicMock()
                        mock_provider.get_capabilities.return_value = ModelCapabilities(
                            provider=ProviderType.GOOGLE,
                            model_name="gemini-2.0-flash-exp",
                            friendly_name="Gemini",
                            max_tokens=200000,
-                            supports_extended_thinking=True
+                            supports_extended_thinking=True,
                        )
                        mock_get_provider.return_value = mock_provider
                        # Mock conversation history building
                        mock_build.return_value = (
                            "=== CONVERSATION HISTORY ===\nPrevious conversation content\n=== END HISTORY ===",
-                            1000  # mock token count
+                            1000,  # mock token count
                        )
                        # Create arguments with continuation_id
@@ -145,10 +145,10 @@ async def test_unknown_tool_defaults_to_prompt():
 async def test_tool_parameter_standardization():
    """Test that all tools use standardized 'prompt' parameter"""
    from tools.analyze import AnalyzeRequest
    from tools.debug import DebugIssueRequest
    from tools.codereview import CodeReviewRequest
-    from tools.thinkdeep import ThinkDeepRequest
+    from tools.debug import DebugIssueRequest
    from tools.precommit import PrecommitRequest
    from tools.thinkdeep import ThinkDeepRequest
    # Test analyze tool uses prompt
    analyze = AnalyzeRequest(files=["/test.py"], prompt="What does this do?")
--- a/tests/test_conversation_history_bug.py
+++ b/tests/test_conversation_history_bug.py
@@ -12,11 +12,11 @@ Claude had shared in earlier turns.
 import json
 from unittest.mock import Mock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from pydantic import Field
 from tests.mock_helpers import create_mock_provider
 from tools.base import BaseTool, ToolRequest
 from utils.conversation_memory import ConversationTurn, ThreadContext
@@ -116,7 +116,7 @@ class TestConversationHistoryBugFix:
                    content="Response with conversation context",
                    usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                    model_name="gemini-2.0-flash-exp",
-                    metadata={"finish_reason": "STOP"}
+                    metadata={"finish_reason": "STOP"},
                )
            mock_provider.generate_content.side_effect = capture_prompt
@@ -176,7 +176,7 @@ class TestConversationHistoryBugFix:
                    content="Response without history",
                    usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                    model_name="gemini-2.0-flash-exp",
-                    metadata={"finish_reason": "STOP"}
+                    metadata={"finish_reason": "STOP"},
                )
            mock_provider.generate_content.side_effect = capture_prompt
@@ -214,7 +214,7 @@ class TestConversationHistoryBugFix:
                    content="New conversation response",
                    usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                    model_name="gemini-2.0-flash-exp",
-                    metadata={"finish_reason": "STOP"}
+                    metadata={"finish_reason": "STOP"},
                )
            mock_provider.generate_content.side_effect = capture_prompt
@@ -298,7 +298,7 @@ class TestConversationHistoryBugFix:
                    content="Analysis of new files complete",
                    usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                    model_name="gemini-2.0-flash-exp",
-                    metadata={"finish_reason": "STOP"}
+                    metadata={"finish_reason": "STOP"},
                )
            mock_provider.generate_content.side_effect = capture_prompt
--- a/tests/test_cross_tool_continuation.py
+++ b/tests/test_cross_tool_continuation.py
@@ -7,11 +7,11 @@ allowing multi-turn conversations to span multiple tool types.
 import json
 from unittest.mock import Mock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from pydantic import Field
 from tests.mock_helpers import create_mock_provider
 from tools.base import BaseTool, ToolRequest
 from utils.conversation_memory import ConversationTurn, ThreadContext
@@ -117,7 +117,7 @@ class TestCrossToolContinuation:
                content=content_with_followup,
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -165,7 +165,7 @@ class TestCrossToolContinuation:
                content="Critical security vulnerability confirmed. The authentication function always returns true, bypassing all security checks.",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -285,7 +285,7 @@ class TestCrossToolContinuation:
                content="Security review of auth.py shows vulnerabilities",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
--- a/tests/test_large_prompt_handling.py
+++ b/tests/test_large_prompt_handling.py
@@ -11,7 +11,6 @@ import os
 import shutil
 import tempfile
 from unittest.mock import MagicMock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from mcp.types import TextContent
@@ -77,7 +76,7 @@ class TestLargePromptHandling:
                content="This is a test response",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -102,7 +101,7 @@ class TestLargePromptHandling:
                content="Processed large prompt",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -214,7 +213,7 @@ class TestLargePromptHandling:
                content="Success",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -247,7 +246,7 @@ class TestLargePromptHandling:
                content="Success",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -278,7 +277,7 @@ class TestLargePromptHandling:
                content="Success",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
@@ -300,7 +299,7 @@ class TestLargePromptHandling:
                content="Success",
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
            mock_get_provider.return_value = mock_provider
--- a/tests/test_live_integration.py
+++ b/tests/test_live_integration.py
@@ -1,141 +0,0 @@
 """
 Live integration tests for google-genai library
 These tests require GEMINI_API_KEY to be set and will make real API calls
 To run these tests manually:
 python tests/test_live_integration.py
 Note: These tests are excluded from regular pytest runs to avoid API rate limits.
 They confirm that the google-genai library integration works correctly with live data.
 """
 import asyncio
 import os
 import sys
 import tempfile
 from pathlib import Path
 # Add parent directory to path to allow imports
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import json
 from tools.analyze import AnalyzeTool
 from tools.thinkdeep import ThinkDeepTool
 async def run_manual_live_tests():
    """Run live tests manually without pytest"""
    print("🚀 Running manual live integration tests...")
    # Check API key
    if not os.environ.get("GEMINI_API_KEY"):
        print("❌ GEMINI_API_KEY not found. Set it to run live tests.")
        return False
    try:
        # Test google-genai import
        print("✅ google-genai library import successful")
        # Test tool integration
        with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
            f.write("def hello(): return 'world'")
            temp_path = f.name
        try:
            # Test AnalyzeTool
            tool = AnalyzeTool()
            result = await tool.execute(
                {
                    "files": [temp_path],
                    "prompt": "What does this code do?",
                    "thinking_mode": "low",
                }
            )
            if result and result[0].text:
                print("✅ AnalyzeTool live test successful")
            else:
                print("❌ AnalyzeTool live test failed")
                return False
            # Test ThinkDeepTool
            think_tool = ThinkDeepTool()
            result = await think_tool.execute(
                {
                    "prompt": "Testing live integration",
                    "thinking_mode": "minimal",  # Fast test
                }
            )
            if result and result[0].text and "Extended Analysis" in result[0].text:
                print("✅ ThinkDeepTool live test successful")
            else:
                print("❌ ThinkDeepTool live test failed")
                return False
            # Test collaboration/clarification request
            print("\n🔄 Testing dynamic context request (collaboration)...")
            # Create a specific test case designed to trigger clarification
            # We'll use analyze tool with a question that requires seeing files
            analyze_tool = AnalyzeTool()
            # Ask about dependencies without providing package files
            result = await analyze_tool.execute(
                {
                    "files": [temp_path],  # Only Python file, no package.json
                    "prompt": "What npm packages and their versions does this project depend on? List all dependencies.",
                    "thinking_mode": "minimal",  # Fast test
                }
            )
            if result and result[0].text:
                response_data = json.loads(result[0].text)
                print(f"   Response status: {response_data['status']}")
                if response_data["status"] == "requires_clarification":
                    print("✅ Dynamic context request successfully triggered!")
                    clarification = json.loads(response_data["content"])
                    print(f"   Gemini asks: {clarification.get('question', 'N/A')}")
                    if "files_needed" in clarification:
                        print(f"   Files requested: {clarification['files_needed']}")
                        # Verify it's asking for package-related files
                        expected_files = [
                            "package.json",
                            "package-lock.json",
                            "yarn.lock",
                        ]
                        if any(f in str(clarification["files_needed"]) for f in expected_files):
                            print("   ✅ Correctly identified missing package files!")
                        else:
                            print("   ⚠️  Unexpected files requested")
                else:
                    # This is a failure - we specifically designed this to need clarification
                    print("❌ Expected clarification request but got direct response")
                    print("   This suggests the dynamic context feature may not be working")
                    print("   Response:", response_data.get("content", "")[:200])
                    return False
            else:
                print("❌ Collaboration test failed - no response")
                return False
        finally:
            Path(temp_path).unlink(missing_ok=True)
        print("\n🎉 All manual live tests passed!")
        print("✅ google-genai library working correctly")
        print("✅ All tools can make live API calls")
        print("✅ Thinking modes functioning properly")
        return True
    except Exception as e:
        print(f"❌ Live test failed: {e}")
        return False
 if __name__ == "__main__":
    # Run live tests when script is executed directly
    success = asyncio.run(run_manual_live_tests())
    exit(0 if success else 1)
--- a/tests/test_precommit_with_mock_store.py
+++ b/tests/test_precommit_with_mock_store.py
@@ -167,9 +167,7 @@ TEMPERATURE_ANALYTICAL = 0.2  # For code review, debugging
            add_turn(thread_id, "assistant", "First response", files=[config_path], tool_name="precommit")
            # Second request with continuation - should skip already embedded files
-            PrecommitRequest(
+            PrecommitRequest(path=temp_dir, files=[config_path], continuation_id=thread_id, prompt="Follow-up review")
                path=temp_dir, files=[config_path], continuation_id=thread_id, prompt="Follow-up review"
            )
            files_to_embed_2 = tool.filter_new_files([config_path], thread_id)
            assert len(files_to_embed_2) == 0, "Continuation should skip already embedded files"
--- a/tests/test_prompt_regression.py
+++ b/tests/test_prompt_regression.py
@@ -7,7 +7,6 @@ normal-sized prompts after implementing the large prompt handling feature.
 import json
 from unittest.mock import MagicMock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
@@ -33,7 +32,7 @@ class TestPromptRegression:
                content=text,
                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
                model_name="gemini-2.0-flash-exp",
-                metadata={"finish_reason": "STOP"}
+                metadata={"finish_reason": "STOP"},
            )
        return _create_response
@@ -47,7 +46,9 @@ class TestPromptRegression:
            mock_provider = MagicMock()
            mock_provider.get_provider_type.return_value = MagicMock(value="google")
            mock_provider.supports_thinking_mode.return_value = False
-            mock_provider.generate_content.return_value = mock_model_response("This is a helpful response about Python.")
+            mock_provider.generate_content.return_value = mock_model_response(
                "This is a helpful response about Python."
            )
            mock_get_provider.return_value = mock_provider
            result = await tool.execute({"prompt": "Explain Python decorators"})
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -1,10 +1,9 @@
 """Tests for the model provider abstraction system"""
 import pytest
 from unittest.mock import Mock, patch
 import os
 from unittest.mock import Mock, patch
-from providers import ModelProviderRegistry, ModelProvider, ModelResponse, ModelCapabilities
+from providers import ModelProviderRegistry, ModelResponse
 from providers.base import ProviderType
 from providers.gemini import GeminiModelProvider
 from providers.openai import OpenAIModelProvider
@@ -134,11 +133,7 @@ class TestGeminiProvider:
        provider = GeminiModelProvider(api_key="test-key")
-        response = provider.generate_content(
+        response = provider.generate_content(prompt="Test prompt", model_name="gemini-2.0-flash-exp", temperature=0.7)
            prompt="Test prompt",
            model_name="gemini-2.0-flash-exp",
            temperature=0.7
        )
        assert isinstance(response, ModelResponse)
        assert response.content == "Generated content"
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -3,11 +3,11 @@ Tests for the main server functionality
 """
 from unittest.mock import Mock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from server import handle_call_tool, handle_list_tools
 from tests.mock_helpers import create_mock_provider
 class TestServerTools:
@@ -56,10 +56,7 @@ class TestServerTools:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content="Chat response",
+            content="Chat response", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -81,6 +78,6 @@ class TestServerTools:
        assert len(result) == 1
        response = result[0].text
-        assert "Gemini MCP Server v" in response  # Version agnostic check
+        assert "Zen MCP Server v" in response  # Version agnostic check
        assert "Available Tools:" in response
        assert "thinkdeep" in response
--- a/tests/test_thinking_modes.py
+++ b/tests/test_thinking_modes.py
@@ -3,10 +3,10 @@ Tests for thinking_mode functionality across all tools
 """
 from unittest.mock import Mock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from tests.mock_helpers import create_mock_provider
 from tools.analyze import AnalyzeTool
 from tools.codereview import CodeReviewTool
 from tools.debug import DebugIssueTool
@@ -45,10 +45,7 @@ class TestThinkingModes:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = True
        mock_provider.generate_content.return_value = Mock(
-            content="Minimal thinking response",
+            content="Minimal thinking response", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -66,7 +63,9 @@ class TestThinkingModes:
        # Verify generate_content was called with thinking_mode
        mock_provider.generate_content.assert_called_once()
        call_kwargs = mock_provider.generate_content.call_args[1]
-        assert call_kwargs.get("thinking_mode") == "minimal" or (not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None)  # thinking_mode parameter
+        assert call_kwargs.get("thinking_mode") == "minimal" or (
            not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None
        )  # thinking_mode parameter
        # Parse JSON response
        import json
@@ -83,10 +82,7 @@ class TestThinkingModes:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = True
        mock_provider.generate_content.return_value = Mock(
-            content="Low thinking response",
+            content="Low thinking response", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -104,7 +100,9 @@ class TestThinkingModes:
        # Verify generate_content was called with thinking_mode
        mock_provider.generate_content.assert_called_once()
        call_kwargs = mock_provider.generate_content.call_args[1]
-        assert call_kwargs.get("thinking_mode") == "low" or (not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None)
+        assert call_kwargs.get("thinking_mode") == "low" or (
            not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None
        )
        assert "Code Review" in result[0].text
@@ -116,10 +114,7 @@ class TestThinkingModes:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = True
        mock_provider.generate_content.return_value = Mock(
-            content="Medium thinking response",
+            content="Medium thinking response", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -136,7 +131,9 @@ class TestThinkingModes:
        # Verify generate_content was called with thinking_mode
        mock_provider.generate_content.assert_called_once()
        call_kwargs = mock_provider.generate_content.call_args[1]
-        assert call_kwargs.get("thinking_mode") == "medium" or (not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None)
+        assert call_kwargs.get("thinking_mode") == "medium" or (
            not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None
        )
        assert "Debug Analysis" in result[0].text
@@ -148,10 +145,7 @@ class TestThinkingModes:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = True
        mock_provider.generate_content.return_value = Mock(
-            content="High thinking response",
+            content="High thinking response", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -169,7 +163,9 @@ class TestThinkingModes:
        # Verify generate_content was called with thinking_mode
        mock_provider.generate_content.assert_called_once()
        call_kwargs = mock_provider.generate_content.call_args[1]
-        assert call_kwargs.get("thinking_mode") == "high" or (not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None)
+        assert call_kwargs.get("thinking_mode") == "high" or (
            not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None
        )
    @pytest.mark.asyncio
    @patch("tools.base.BaseTool.get_model_provider")
@@ -179,10 +175,7 @@ class TestThinkingModes:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = True
        mock_provider.generate_content.return_value = Mock(
-            content="Max thinking response",
+            content="Max thinking response", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -199,7 +192,9 @@ class TestThinkingModes:
        # Verify generate_content was called with thinking_mode
        mock_provider.generate_content.assert_called_once()
        call_kwargs = mock_provider.generate_content.call_args[1]
-        assert call_kwargs.get("thinking_mode") == "high" or (not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None)
+        assert call_kwargs.get("thinking_mode") == "high" or (
            not mock_provider.supports_thinking_mode.return_value and call_kwargs.get("thinking_mode") is None
        )
        assert "Extended Analysis by Gemini" in result[0].text
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -4,10 +4,10 @@ Tests for individual tool implementations
 import json
 from unittest.mock import Mock, patch
 from tests.mock_helpers import create_mock_provider
 import pytest
 from tests.mock_helpers import create_mock_provider
 from tools import AnalyzeTool, ChatTool, CodeReviewTool, DebugIssueTool, ThinkDeepTool
@@ -37,10 +37,7 @@ class TestThinkDeepTool:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = True
        mock_provider.generate_content.return_value = Mock(
-            content="Extended analysis",
+            content="Extended analysis", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -91,10 +88,7 @@ class TestCodeReviewTool:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content="Security issues found",
+            content="Security issues found", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -139,10 +133,7 @@ class TestDebugIssueTool:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content="Root cause: race condition",
+            content="Root cause: race condition", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -190,10 +181,7 @@ class TestAnalyzeTool:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content="Architecture analysis",
+            content="Architecture analysis", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
@@ -307,10 +295,7 @@ class TestAbsolutePathValidation:
        mock_provider.get_provider_type.return_value = Mock(value="google")
        mock_provider.supports_thinking_mode.return_value = False
        mock_provider.generate_content.return_value = Mock(
-            content="Analysis complete",
+            content="Analysis complete", usage={}, model_name="gemini-2.0-flash-exp", metadata={}
            usage={},
            model_name="gemini-2.0-flash-exp",
            metadata={}
        )
        mock_get_provider.return_value = mock_provider
--- a/tools/init.py
+++ b/tools/init.py
@@ -1,5 +1,5 @@
 """
-Tool implementations for Gemini MCP Server
+Tool implementations for Zen MCP Server
 """
 from .analyze import AnalyzeTool
--- a/tools/base.py
+++ b/tools/base.py
@@ -1,5 +1,5 @@
 """
-Base class for all Gemini MCP tools
+Base class for all Zen MCP tools
 This module provides the abstract base class that all tools must inherit from.
 It defines the contract that tools must implement and provides common functionality
@@ -24,8 +24,8 @@ from mcp.types import TextContent
 from pydantic import BaseModel, Field
 from config import DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT
 from providers import ModelProvider, ModelProviderRegistry
 from utils import check_token_limit
 from providers import ModelProviderRegistry, ModelProvider, ModelResponse
 from utils.conversation_memory import (
    MAX_CONVERSATION_TURNS,
    add_turn,
@@ -169,7 +169,7 @@ class BaseTool(ABC):
        else:
            # Normal mode - model is optional with default
            available_models = list(MODEL_CAPABILITIES_DESC.keys())
-            models_str = ', '.join(f"'{m}'" for m in available_models)
+            models_str = ", ".join(f"'{m}'" for m in available_models)
            return {
                "type": "string",
                "description": f"Model to use. Available: {models_str}. Defaults to '{DEFAULT_MODEL}' if not specified.",
@@ -257,9 +257,7 @@ class BaseTool(ABC):
            # Safety check: If no files are marked as embedded but we have a continuation_id,
            # this might indicate an issue with conversation history. Be conservative.
            if not embedded_files:
-                logger.debug(
+                logger.debug(f"{self.name} tool: No files found in conversation history for thread {continuation_id}")
                    f"{self.name} tool: No files found in conversation history for thread {continuation_id}"
                )
                logger.debug(
                    f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files"
                )
@@ -345,15 +343,19 @@ class BaseTool(ABC):
            # First check if model_context was passed from server.py
            model_context = None
            if arguments:
-                model_context = arguments.get("_model_context") or getattr(self, "_current_arguments", {}).get("_model_context")
+                model_context = arguments.get("_model_context") or getattr(self, "_current_arguments", {}).get(
                    "_model_context"
                )
            if model_context:
                # Use the passed model context
                try:
                    token_allocation = model_context.calculate_token_allocation()
                    effective_max_tokens = token_allocation.file_tokens - reserve_tokens
-                    logger.debug(f"[FILES] {self.name}: Using passed model context for {model_context.model_name}: "
+                    logger.debug(
-                                f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total")
+                        f"[FILES] {self.name}: Using passed model context for {model_context.model_name}: "
                        f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total"
                    )
                except Exception as e:
                    logger.warning(f"[FILES] {self.name}: Error using passed model context: {e}")
                    # Fall through to manual calculation
@@ -375,18 +377,26 @@ class BaseTool(ABC):
                        model_content_tokens = int(capabilities.max_tokens * 0.8)
                    effective_max_tokens = model_content_tokens - reserve_tokens
-                    logger.debug(f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
+                    logger.debug(
-                                f"{model_content_tokens:,} content tokens from {capabilities.max_tokens:,} total")
+                        f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
                        f"{model_content_tokens:,} content tokens from {capabilities.max_tokens:,} total"
                    )
                except (ValueError, AttributeError) as e:
                    # Handle specific errors: provider not found, model not supported, missing attributes
-                    logger.warning(f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}")
+                    logger.warning(
                        f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
                    )
                    # Fall back to conservative default for safety
                    from config import MAX_CONTENT_TOKENS
                    effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
                except Exception as e:
                    # Catch any other unexpected errors
-                    logger.error(f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}")
+                    logger.error(
                        f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
                    )
                    from config import MAX_CONTENT_TOKENS
                    effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
        # Ensure we have a reasonable minimum budget
@@ -397,9 +407,13 @@ class BaseTool(ABC):
        # Log the specific files for debugging/testing
        if files_to_embed:
-            logger.info(f"[FILE_PROCESSING] {self.name} tool will embed new files: {', '.join([os.path.basename(f) for f in files_to_embed])}")
+            logger.info(
                f"[FILE_PROCESSING] {self.name} tool will embed new files: {', '.join([os.path.basename(f) for f in files_to_embed])}"
            )
        else:
-            logger.info(f"[FILE_PROCESSING] {self.name} tool: No new files to embed (all files already in conversation history)")
+            logger.info(
                f"[FILE_PROCESSING] {self.name} tool: No new files to embed (all files already in conversation history)"
            )
        content_parts = []
@@ -717,6 +731,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                # Add follow-up instructions for new conversations
                from server import get_follow_up_instructions
                follow_up_instructions = get_follow_up_instructions(0)  # New conversation, turn 0
                prompt = f"{prompt}\n\n{follow_up_instructions}"
                logger.debug(f"Added follow-up instructions for new {self.name} conversation")
@@ -728,6 +743,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
            # In auto mode, model parameter is required
            from config import IS_AUTO_MODE
            if IS_AUTO_MODE and model_name.lower() == "auto":
                error_output = ToolOutput(
                    status="error",
@@ -770,7 +786,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                model_name=model_name,
                system_prompt=system_prompt,
                temperature=temperature,
-                thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None
+                thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None,
            )
            logger.info(f"Received response from {provider.get_provider_type().value} API for {self.name}")
@@ -781,11 +797,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                # Parse response to check for clarification requests or format output
                # Pass model info for conversation tracking
-                model_info = {
+                model_info = {"provider": provider, "model_name": model_name, "model_response": model_response}
                    "provider": provider,
                    "model_name": model_name,
                    "model_response": model_response
                }
                tool_output = self._parse_response(raw_text, request, model_info)
                logger.info(f"Successfully completed {self.name} tool execution")
@@ -819,7 +831,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                        model_name=model_name,
                        system_prompt=system_prompt,
                        temperature=temperature,
-                        thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None
+                        thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None,
                    )
                    if retry_response.content:
@@ -827,7 +839,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                        retry_model_info = {
                            "provider": provider,
                            "model_name": model_name,
-                            "model_response": retry_response
+                            "model_response": retry_response,
                        }
                        tool_output = self._parse_response(retry_response.content, request, retry_model_info)
                        return [TextContent(type="text", text=tool_output.model_dump_json())]
@@ -924,10 +936,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                model_name = model_info.get("model_name")
                model_response = model_info.get("model_response")
                if model_response:
-                    model_metadata = {
+                    model_metadata = {"usage": model_response.usage, "metadata": model_response.metadata}
                        "usage": model_response.usage,
                        "metadata": model_response.metadata
                    }
            success = add_turn(
                continuation_id,
@@ -986,7 +995,9 @@ If any of these would strengthen your analysis, specify what Claude should searc
        return None
-    def _create_follow_up_response(self, content: str, follow_up_data: dict, request, model_info: Optional[dict] = None) -> ToolOutput:
+    def _create_follow_up_response(
        self, content: str, follow_up_data: dict, request, model_info: Optional[dict] = None
    ) -> ToolOutput:
        """
        Create a response with follow-up question for conversation threading.
@@ -1007,7 +1018,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
            thread_id = create_thread(
                tool_name=self.name,
                initial_request=request.model_dump() if hasattr(request, "model_dump") else {},
-                parent_thread_id=continuation_id  # Link to parent thread if continuing
+                parent_thread_id=continuation_id,  # Link to parent thread if continuing
            )
            # Add the assistant's response with follow-up
@@ -1023,10 +1034,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                model_name = model_info.get("model_name")
                model_response = model_info.get("model_response")
                if model_response:
-                    model_metadata = {
+                    model_metadata = {"usage": model_response.usage, "metadata": model_response.metadata}
                        "usage": model_response.usage,
                        "metadata": model_response.metadata
                    }
            add_turn(
                thread_id,  # Add to the new thread
@@ -1088,6 +1096,12 @@ If any of these would strengthen your analysis, specify what Claude should searc
        Returns:
            Dict with continuation data if opportunity should be offered, None otherwise
        """
        # Skip continuation offers in test mode
        import os
        if os.getenv("PYTEST_CURRENT_TEST"):
            return None
        continuation_id = getattr(request, "continuation_id", None)
        try:
@@ -1117,7 +1131,9 @@ If any of these would strengthen your analysis, specify what Claude should searc
            # If anything fails, don't offer continuation
            return None
-    def _create_continuation_offer_response(self, content: str, continuation_data: dict, request, model_info: Optional[dict] = None) -> ToolOutput:
+    def _create_continuation_offer_response(
        self, content: str, continuation_data: dict, request, model_info: Optional[dict] = None
    ) -> ToolOutput:
        """
        Create a response offering Claude the opportunity to continue conversation.
@@ -1135,7 +1151,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
            thread_id = create_thread(
                tool_name=self.name,
                initial_request=request.model_dump() if hasattr(request, "model_dump") else {},
-                parent_thread_id=continuation_id  # Link to parent if this is a continuation
+                parent_thread_id=continuation_id,  # Link to parent if this is a continuation
            )
            # Add this response as the first turn (assistant turn)
@@ -1152,10 +1168,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                model_name = model_info.get("model_name")
                model_response = model_info.get("model_response")
                if model_response:
-                    model_metadata = {
+                    model_metadata = {"usage": model_response.usage, "metadata": model_response.metadata}
                        "usage": model_response.usage,
                        "metadata": model_response.metadata
                    }
            add_turn(
                thread_id,
@@ -1313,14 +1326,16 @@ If any of these would strengthen your analysis, specify what Claude should searc
            # Try to determine provider from model name patterns
            if "gemini" in model_name.lower() or model_name.lower() in ["flash", "pro"]:
                # Register Gemini provider if not already registered
                from providers.gemini import GeminiModelProvider
                from providers.base import ProviderType
                from providers.gemini import GeminiModelProvider
                ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
                provider = ModelProviderRegistry.get_provider(ProviderType.GOOGLE)
            elif "gpt" in model_name.lower() or "o3" in model_name.lower():
                # Register OpenAI provider if not already registered
                from providers.openai import OpenAIModelProvider
                from providers.base import ProviderType
                from providers.openai import OpenAIModelProvider
                ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
                provider = ModelProviderRegistry.get_provider(ProviderType.OPENAI)
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -44,7 +44,10 @@ class CodeReviewRequest(ToolRequest):
        description="User's summary of what the code does, expected behavior, constraints, and review objectives",
    )
    review_type: str = Field("full", description="Type of review: full|security|performance|quick")
-    focus_on: Optional[str] = Field(None, description="Specific aspects to focus on, or additional context that would help understand areas of concern")
+    focus_on: Optional[str] = Field(
        None,
        description="Specific aspects to focus on, or additional context that would help understand areas of concern",
    )
    standards: Optional[str] = Field(None, description="Coding standards or guidelines to enforce")
    severity_filter: str = Field(
        "all",
--- a/utils/init.py
+++ b/utils/init.py
@@ -1,5 +1,5 @@
 """
-Utility functions for Gemini MCP Server
+Utility functions for Zen MCP Server
 """
 from .file_utils import CODE_EXTENSIONS, expand_paths, read_file_content, read_files
--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -459,8 +459,9 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
    # Get model-specific token allocation early (needed for both files and turns)
    if model_context is None:
        from utils.model_context import ModelContext
        from config import DEFAULT_MODEL
        from utils.model_context import ModelContext
        model_context = ModelContext(DEFAULT_MODEL)
    token_allocation = model_context.calculate_token_allocation()
--- a/utils/model_context.py
+++ b/utils/model_context.py
@@ -6,12 +6,12 @@ ensuring that token limits are properly calculated based on the current model
 being used, not global constants.
 """
 from typing import Optional, Dict, Any
 from dataclasses import dataclass
 import logging
 from dataclasses import dataclass
 from typing import Any, Optional
 from providers import ModelProviderRegistry, ModelCapabilities
 from config import DEFAULT_MODEL
 from providers import ModelCapabilities, ModelProviderRegistry
 logger = logging.getLogger(__name__)
@@ -19,6 +19,7 @@ logger = logging.getLogger(__name__)
@dataclass
 class TokenAllocation:
    """Token allocation strategy for a model."""
    total_tokens: int
    content_tokens: int
    response_tokens: int
@@ -75,7 +76,7 @@ class ModelContext:
        # Dynamic allocation based on model capacity
        if total_tokens < 300_000:
-            # Smaller context models (O3, GPT-4O): Conservative allocation
+            # Smaller context models (O3): Conservative allocation
            content_ratio = 0.6  # 60% for content
            response_ratio = 0.4  # 40% for response
            file_ratio = 0.3  # 30% of content for files
@@ -100,7 +101,7 @@ class ModelContext:
            content_tokens=content_tokens,
            response_tokens=response_tokens,
            file_tokens=file_tokens,
-            history_tokens=history_tokens
+            history_tokens=history_tokens,
        )
        logger.debug(f"Token allocation for {self.model_name}:")
@@ -124,7 +125,7 @@ class ModelContext:
        return len(text) // 3  # Conservative estimate
    @classmethod
-    def from_arguments(cls, arguments: Dict[str, Any]) -> "ModelContext":
+    def from_arguments(cls, arguments: dict[str, Any]) -> "ModelContext":
        """Create ModelContext from tool arguments."""
        model_name = arguments.get("model") or DEFAULT_MODEL
        return cls(model_name)
--- a/gemini_server.py
+++ b/gemini_server.py
@@ -1,5 +1,5 @@
 """
-Gemini MCP Server - Entry point for backward compatibility
+Zen MCP Server - Entry point for backward compatibility
 This file exists to maintain compatibility with existing configurations.
 The main implementation is now in server.py
 """
`@@ -1 +1 @@`
	`# Tests for Gemini MCP Server`	`# Tests for Zen MCP Server`