From 22a3fb91ed2025a3191839f08906feb7650def10 Mon Sep 17 00:00:00 2001
From: Fahad <fahad@2doapp.com>
Date: Wed, 11 Jun 2025 20:10:25 +0400
Subject: [PATCH] feat: Add comprehensive dynamic configuration system v3.3.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Major Features Added

### 🎯 Dynamic Configuration System
- **Environment-aware model selection**: DEFAULT_MODEL with 'pro'/'flash' shortcuts
- **Configurable thinking modes**: DEFAULT_THINKING_MODE_THINKDEEP for extended reasoning
- **All tool schemas now dynamic**: Show actual current defaults instead of hardcoded values
- **Enhanced setup workflow**: Copy from .env.example with smart customization

### 🔧 Model & Thinking Configuration
- **Smart model resolution**: Support both shortcuts ('pro', 'flash') and full model names
- **Thinking mode optimization**: Only apply thinking budget to models that support it
- **Flash model compatibility**: Works without thinking config, still beneficial via system prompts
- **Dynamic schema descriptions**: Tool parameters show current environment values

### 🚀 Enhanced Developer Experience
- **Fail-fast Docker setup**: GEMINI_API_KEY required upfront in docker-compose
- **Comprehensive startup logging**: Shows current model and thinking mode defaults
- **Enhanced get_version tool**: Reports all dynamic configuration values
- **Better .env documentation**: Clear token consumption details and model options

### 🧪 Comprehensive Testing
- **Live model validation**: New simulator test validates Pro vs Flash thinking behavior
- **Dynamic configuration tests**: Verify environment variable overrides work correctly
- **Complete test coverage**: All 139 unit tests pass, including new model config tests

### 📋 Configuration Files Updated
- **docker-compose.yml**: Fail-fast API key validation, thinking mode support
- **setup-docker.sh**: Copy from .env.example instead of manual creation
- **.env.example**: Detailed documentation with token consumption per thinking mode
- **.gitignore**: Added test-setup/ for cleanup

### 🛠 Technical Improvements
- **Removed setup.py**: Fully Docker-based deployment (no longer needed)
- **REDIS_URL smart defaults**: Auto-configured for Docker, still configurable for dev
- **All tools updated**: Consistent dynamic model parameter descriptions
- **Enhanced error handling**: Better model resolution and validation

## Breaking Changes
- Removed setup.py (Docker-only deployment)
- Model parameter descriptions now show actual defaults (dynamic)

## Migration Guide
- Update .env files using new .env.example format
- Use 'pro'/'flash' shortcuts or full model names
- Set DEFAULT_THINKING_MODE_THINKDEEP for custom thinking depth

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .env.example                                  |  20 +-
 .gitignore                                    |   3 +
 README.md                                     |  38 +++-
 config.py                                     |  15 +-
 docker-compose.yml                            |   4 +-
 server.py                                     |  19 +-
 setup-docker.sh                               |  59 +++---
 setup.py                                      |  52 -----
 simulator_tests/__init__.py                   |   3 +
 simulator_tests/test_model_thinking_config.py | 177 ++++++++++++++++++
 tests/test_config.py                          |   4 +-
 tools/analyze.py                              |   6 +
 tools/base.py                                 |  26 ++-
 tools/chat.py                                 |   6 +
 tools/codereview.py                           |   6 +
 tools/debug.py                                |   6 +
 tools/precommit.py                            |   8 +
 tools/thinkdeep.py                            |  15 +-
 18 files changed, 351 insertions(+), 116 deletions(-)
 delete mode 100644 setup.py
 create mode 100644 simulator_tests/test_model_thinking_config.py

diff --git a/.env.example b/.env.example
index fc516a7..6091b15 100644
--- a/.env.example
+++ b/.env.example
@@ -5,10 +5,22 @@
 # Get your API key from: https://makersuite.google.com/app/apikey
 GEMINI_API_KEY=your_gemini_api_key_here
 
-# Optional: Redis connection URL for conversation memory
-# Defaults to redis://localhost:6379/0
-# For Docker: redis://redis:6379/0
-REDIS_URL=redis://localhost:6379/0
+# Optional: Default model to use
+# Full names: 'gemini-2.5-pro-preview-06-05' or 'gemini-2.0-flash-exp'
+# Defaults to gemini-2.5-pro-preview-06-05 if not specified
+DEFAULT_MODEL=gemini-2.5-pro-preview-06-05
+
+# Optional: Default thinking mode for ThinkDeep tool
+# NOTE: Only applies to models that support extended thinking (e.g., Gemini 2.5 Pro)
+#       Flash models (2.0) will use system prompt engineering instead
+# Token consumption per mode:
+#   minimal: 128 tokens   - Quick analysis, fastest response
+#   low:     2,048 tokens - Light reasoning tasks  
+#   medium:  8,192 tokens - Balanced reasoning (good for most cases)
+#   high:    16,384 tokens - Complex analysis (recommended for thinkdeep)
+#   max:     32,768 tokens - Maximum reasoning depth, slowest but most thorough
+# Defaults to 'high' if not specified
+DEFAULT_THINKING_MODE_THINKDEEP=high
 
 # Optional: Workspace root directory for file access
 # This should be the HOST path that contains all files Claude might reference
diff --git a/.gitignore b/.gitignore
index ece8694..ceb055a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -162,3 +162,6 @@ coverage.xml
 
 # Test simulation artifacts (dynamically created during testing)
 test_simulation_files/.claude/
+
+# Temporary test directories
+test-setup/
diff --git a/README.md b/README.md
index 84ea01f..e33ec0b 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,7 @@ The ultimate development partner for Claude - a Model Context Protocol server th
   - [`analyze`](#6-analyze---smart-file-analysis) - File analysis
 
 - **Advanced Topics**
+  - [Model Configuration](#model-configuration) - Pro vs Flash model selection
   - [Thinking Modes](#thinking-modes---managing-token-costs--quality) - Control depth vs cost
   - [Working with Large Prompts](#working-with-large-prompts) - Bypass MCP's 25K token limit
   - [Web Search Integration](#web-search-integration) - Smart search recommendations
@@ -587,6 +588,7 @@ All tools that work with files support **both individual files and entire direct
 **`analyze`** - Analyze files or directories
 - `files`: List of file paths or directories (required)
 - `question`: What to analyze (required)
+- `model`: pro|flash (default: server default)
 - `analysis_type`: architecture|performance|security|quality|general
 - `output_format`: summary|detailed|actionable
 - `thinking_mode`: minimal|low|medium|high|max (default: medium)
@@ -594,11 +596,13 @@ All tools that work with files support **both individual files and entire direct
 
 ```
 "Use gemini to analyze the src/ directory for architectural patterns"
-"Get gemini to analyze main.py and tests/ to understand test coverage"
+"Use flash to quickly analyze main.py and tests/ to understand test coverage"
+"Use pro for deep analysis of the entire backend/ directory structure"
 ```
 
 **`codereview`** - Review code files or directories
 - `files`: List of file paths or directories (required)
+- `model`: pro|flash (default: server default)
 - `review_type`: full|security|performance|quick
 - `focus_on`: Specific aspects to focus on
 - `standards`: Coding standards to enforce
@@ -606,12 +610,13 @@ All tools that work with files support **both individual files and entire direct
 - `thinking_mode`: minimal|low|medium|high|max (default: medium)
 
 ```
-"Use gemini to review the entire api/ directory for security issues"
-"Get gemini to review src/ with focus on performance, only show critical issues"
+"Use pro to review the entire api/ directory for security issues"
+"Use flash to quickly review src/ with focus on performance, only show critical issues"
 ```
 
 **`debug`** - Debug with file context
 - `error_description`: Description of the issue (required)
+- `model`: pro|flash (default: server default)
 - `error_context`: Stack trace or logs
 - `files`: Files or directories related to the issue
 - `runtime_info`: Environment details
@@ -625,6 +630,7 @@ All tools that work with files support **both individual files and entire direct
 
 **`thinkdeep`** - Extended analysis with file context
 - `current_analysis`: Your current thinking (required)
+- `model`: pro|flash (default: server default)
 - `problem_context`: Additional context
 - `focus_areas`: Specific aspects to focus on
 - `files`: Files or directories for context
@@ -866,7 +872,31 @@ This enables better integration, error handling, and support for the dynamic con
 The server includes several configurable properties that control its behavior:
 
 ### Model Configuration
-- **`GEMINI_MODEL`**: `"gemini-2.5-pro-preview-06-05"` - The latest Gemini 2.5 Pro model with native thinking support
+
+**Default Model (Environment Variable):**
+- **`DEFAULT_MODEL`**: Set your preferred default model globally
+  - Default: `"gemini-2.5-pro-preview-06-05"` (extended thinking capabilities)
+  - Alternative: `"gemini-2.0-flash-exp"` (faster responses)
+
+**Per-Tool Model Selection:**
+All tools support a `model` parameter for flexible model switching:
+- **`"pro"`** → Gemini 2.5 Pro (extended thinking, slower, higher quality)
+- **`"flash"`** → Gemini 2.0 Flash (faster responses, lower cost)
+- **Full model names** → Direct model specification
+
+**Examples:**
+```env
+# Set default globally in .env file
+DEFAULT_MODEL=flash
+```
+
+```
+# Per-tool usage in Claude
+"Use flash to quickly analyze this function"
+"Use pro for deep architectural analysis"
+```
+
+**Token Limits:**
 - **`MAX_CONTEXT_TOKENS`**: `1,000,000` - Maximum input context (1M tokens for Gemini 2.5 Pro)
 
 ### Temperature Defaults
diff --git a/config.py b/config.py
index 5cdd020..7b2fe8d 100644
--- a/config.py
+++ b/config.py
@@ -13,15 +13,15 @@ import os
 # Version and metadata
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
-# setup.py imports these values to avoid duplication
-__version__ = "3.2.0"  # Semantic versioning: MAJOR.MINOR.PATCH
-__updated__ = "2025-06-10"  # Last update date in ISO format
+__version__ = "3.3.0"  # Semantic versioning: MAJOR.MINOR.PATCH
+__updated__ = "2025-06-11"  # Last update date in ISO format
 __author__ = "Fahad Gilani"  # Primary maintainer
 
 # Model configuration
-# GEMINI_MODEL: The Gemini model used for all AI operations
+# DEFAULT_MODEL: The default model used for all AI operations
 # This should be a stable, high-performance model suitable for code analysis
-GEMINI_MODEL = "gemini-2.5-pro-preview-06-05"
+# Can be overridden by setting DEFAULT_MODEL environment variable
+DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "gemini-2.5-pro-preview-06-05")
 
 # Token allocation for Gemini Pro (1M total capacity)
 # MAX_CONTEXT_TOKENS: Total model capacity
@@ -48,6 +48,11 @@ TEMPERATURE_BALANCED = 0.5  # For general chat
 # Used when brainstorming, exploring alternatives, or architectural discussions
 TEMPERATURE_CREATIVE = 0.7  # For architecture, deep thinking
 
+# Thinking Mode Defaults
+# DEFAULT_THINKING_MODE_THINKDEEP: Default thinking depth for extended reasoning tool
+# Higher modes use more computational budget but provide deeper analysis
+DEFAULT_THINKING_MODE_THINKDEEP = os.getenv("DEFAULT_THINKING_MODE_THINKDEEP", "high")
+
 # MCP Protocol Limits
 # MCP_PROMPT_SIZE_LIMIT: Maximum character size for prompts sent directly through MCP
 # The MCP protocol has a combined request+response limit of ~25K tokens.
diff --git a/docker-compose.yml b/docker-compose.yml
index 888ca76..0c88ad7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -29,7 +29,9 @@ services:
       redis:
         condition: service_healthy
     environment:
-      - GEMINI_API_KEY=${GEMINI_API_KEY}
+      - GEMINI_API_KEY=${GEMINI_API_KEY:?GEMINI_API_KEY is required. Please set it in your .env file or environment.}
+      - DEFAULT_MODEL=${DEFAULT_MODEL:-gemini-2.5-pro-preview-06-05}
+      - DEFAULT_THINKING_MODE_THINKDEEP=${DEFAULT_THINKING_MODE_THINKDEEP:-high}
       - REDIS_URL=redis://redis:6379/0
       # Use HOME not PWD: Claude needs access to any absolute file path, not just current project,
       # and Claude Code could be running from multiple locations at the same time
diff --git a/server.py b/server.py
index cd7ef42..b5dab00 100644
--- a/server.py
+++ b/server.py
@@ -32,7 +32,7 @@ from mcp.server.stdio import stdio_server
 from mcp.types import ServerCapabilities, TextContent, Tool, ToolsCapability
 
 from config import (
-    GEMINI_MODEL,
+    DEFAULT_MODEL,
     MAX_CONTEXT_TOKENS,
     __author__,
     __updated__,
@@ -435,12 +435,16 @@ async def handle_get_version() -> list[TextContent]:
     Returns:
         Formatted text with version and configuration details
     """
+    # Import thinking mode here to avoid circular imports
+    from config import DEFAULT_THINKING_MODE_THINKDEEP
+
     # Gather comprehensive server information
     version_info = {
         "version": __version__,
         "updated": __updated__,
         "author": __author__,
-        "gemini_model": GEMINI_MODEL,
+        "default_model": DEFAULT_MODEL,
+        "default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP,
         "max_context_tokens": f"{MAX_CONTEXT_TOKENS:,}",
         "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
         "server_started": datetime.now().isoformat(),
@@ -453,7 +457,8 @@ Updated: {__updated__}
 Author: {__author__}
 
 Configuration:
-- Gemini Model: {GEMINI_MODEL}
+- Default Model: {DEFAULT_MODEL}
+- Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP}
 - Max Context: {MAX_CONTEXT_TOKENS:,} tokens
 - Python: {version_info["python_version"]}
 - Started: {version_info["server_started"]}
@@ -486,7 +491,13 @@ async def main():
     # Log startup message for Docker log monitoring
     logger.info("Gemini MCP Server starting up...")
     logger.info(f"Log level: {log_level}")
-    logger.info(f"Using model: {GEMINI_MODEL}")
+    logger.info(f"Using default model: {DEFAULT_MODEL}")
+
+    # Import here to avoid circular imports
+    from config import DEFAULT_THINKING_MODE_THINKDEEP
+
+    logger.info(f"Default thinking mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP}")
+
     logger.info(f"Available tools: {list(TOOLS.keys())}")
     logger.info("Server ready - waiting for tool requests...")
 
diff --git a/setup-docker.sh b/setup-docker.sh
index 1c78d2c..fe5492c 100755
--- a/setup-docker.sh
+++ b/setup-docker.sh
@@ -17,41 +17,34 @@ if [ -f .env ]; then
     echo "⚠️  .env file already exists! Updating if needed..."
     echo ""
 else
-    # Check if GEMINI_API_KEY is already set in environment
-    if [ -n "$GEMINI_API_KEY" ]; then
-        API_KEY_VALUE="$GEMINI_API_KEY"
-        echo "✅ Found existing GEMINI_API_KEY in environment"
-    else
-        API_KEY_VALUE="your-gemini-api-key-here"
+    # Copy from .env.example and customize
+    if [ ! -f .env.example ]; then
+        echo "❌ .env.example file not found! This file should exist in the project directory."
+        exit 1
     fi
     
-    # Create the .env file
-    cat > .env << EOF
-# Gemini MCP Server Docker Environment Configuration
-# Generated on $(date)
-
-# Your Gemini API key (get one from https://makersuite.google.com/app/apikey)
-# IMPORTANT: Replace this with your actual API key
-GEMINI_API_KEY=$API_KEY_VALUE
-
-# Redis configuration (automatically set for Docker Compose)
-REDIS_URL=redis://redis:6379/0
-
-# Workspace root - host path that maps to /workspace in container
-# This should be the host directory path that contains all files Claude might reference
-# We use $HOME (not $PWD) because Claude needs access to ANY absolute file path,
-# not just files within the current project directory. Additionally, Claude Code
-# could be running from multiple locations at the same time.
-WORKSPACE_ROOT=$HOME
-
-# Logging level (DEBUG, INFO, WARNING, ERROR)
-# DEBUG: Shows detailed operational messages, conversation threading, tool execution flow
-# INFO: Shows general operational messages (default)
-# WARNING: Shows only warnings and errors
-# ERROR: Shows only errors
-# Uncomment and change to DEBUG if you need detailed troubleshooting information
-LOG_LEVEL=INFO
-EOF
+    # Copy .env.example to .env
+    cp .env.example .env
+    echo "✅ Created .env from .env.example"
+    
+    # Customize the API key if it's set in environment
+    if [ -n "$GEMINI_API_KEY" ]; then
+        # Replace the placeholder API key with the actual value
+        if command -v sed >/dev/null 2>&1; then
+            sed -i.bak "s/your_gemini_api_key_here/$GEMINI_API_KEY/" .env && rm .env.bak
+            echo "✅ Updated .env with existing GEMINI_API_KEY from environment"
+        else
+            echo "⚠️  Found GEMINI_API_KEY in environment, but sed not available. Please update .env manually."
+        fi
+    else
+        echo "⚠️  GEMINI_API_KEY not found in environment. Please edit .env and add your API key."
+    fi
+    
+    # Update WORKSPACE_ROOT to use current user's home directory
+    if command -v sed >/dev/null 2>&1; then
+        sed -i.bak "s|WORKSPACE_ROOT=/Users/your-username|WORKSPACE_ROOT=$HOME|" .env && rm .env.bak
+        echo "✅ Updated WORKSPACE_ROOT to $HOME"
+    fi
     echo "✅ Created .env file with Redis configuration"
     echo ""
 fi
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 87b681a..0000000
--- a/setup.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""
-Setup configuration for Gemini MCP Server
-"""
-
-from pathlib import Path
-
-from setuptools import setup
-
-# Import version and author from config to maintain single source of truth
-from config import __author__, __version__
-
-# Read README for long description
-readme_path = Path(__file__).parent / "README.md"
-long_description = ""
-if readme_path.exists():
-    long_description = readme_path.read_text(encoding="utf-8")
-
-setup(
-    name="gemini-mcp-server",
-    version=__version__,
-    description="Model Context Protocol server for Google Gemini",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    author=__author__,
-    python_requires=">=3.10",
-    py_modules=["gemini_server"],
-    install_requires=[
-        "mcp>=1.0.0",
-        "google-genai>=1.19.0",
-        "pydantic>=2.0.0",
-    ],
-    extras_require={
-        "dev": [
-            "pytest>=7.4.0",
-            "pytest-asyncio>=0.21.0",
-            "pytest-mock>=3.11.0",
-        ]
-    },
-    entry_points={
-        "console_scripts": [
-            "gemini-mcp-server=gemini_server:main",
-        ],
-    },
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Intended Audience :: Developers",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-    ],
-)
diff --git a/simulator_tests/__init__.py b/simulator_tests/__init__.py
index 8150270..a83b50c 100644
--- a/simulator_tests/__init__.py
+++ b/simulator_tests/__init__.py
@@ -11,6 +11,7 @@ from .test_content_validation import ContentValidationTest
 from .test_cross_tool_comprehensive import CrossToolComprehensiveTest
 from .test_cross_tool_continuation import CrossToolContinuationTest
 from .test_logs_validation import LogsValidationTest
+from .test_model_thinking_config import TestModelThinkingConfig
 from .test_per_tool_deduplication import PerToolDeduplicationTest
 from .test_redis_validation import RedisValidationTest
 
@@ -23,6 +24,7 @@ TEST_REGISTRY = {
     "cross_tool_comprehensive": CrossToolComprehensiveTest,
     "logs_validation": LogsValidationTest,
     "redis_validation": RedisValidationTest,
+    "model_thinking_config": TestModelThinkingConfig,
 }
 
 __all__ = [
@@ -34,5 +36,6 @@ __all__ = [
     "CrossToolComprehensiveTest",
     "LogsValidationTest",
     "RedisValidationTest",
+    "TestModelThinkingConfig",
     "TEST_REGISTRY",
 ]
diff --git a/simulator_tests/test_model_thinking_config.py b/simulator_tests/test_model_thinking_config.py
new file mode 100644
index 0000000..dce19e2
--- /dev/null
+++ b/simulator_tests/test_model_thinking_config.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""
+Model Thinking Configuration Test
+
+Tests that thinking configuration is properly applied only to models that support it,
+and that Flash models work correctly without thinking config.
+"""
+
+from .base_test import BaseSimulatorTest
+
+
+class TestModelThinkingConfig(BaseSimulatorTest):
+    """Test model-specific thinking configuration behavior"""
+
+    @property
+    def test_name(self) -> str:
+        return "model_thinking_config"
+
+    @property
+    def test_description(self) -> str:
+        return "Model-specific thinking configuration behavior"
+
+    def test_pro_model_with_thinking_config(self):
+        """Test that Pro model uses thinking configuration"""
+        self.logger.info("Testing Pro model with thinking configuration...")
+
+        try:
+            # Test with explicit pro model and high thinking mode
+            response, continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "What is 2 + 2? Please think carefully and explain.",
+                    "model": "pro",  # Should resolve to gemini-2.5-pro-preview-06-05
+                    "thinking_mode": "high",  # Should use thinking_config
+                },
+            )
+
+            if not response:
+                raise Exception("Pro model test failed: No response received")
+
+            self.logger.info("✅ Pro model with thinking config works correctly")
+            return True
+
+        except Exception as e:
+            self.logger.error(f"❌ Pro model test failed: {e}")
+            return False
+
+    def test_flash_model_without_thinking_config(self):
+        """Test that Flash model works without thinking configuration"""
+        self.logger.info("Testing Flash model without thinking configuration...")
+
+        try:
+            # Test with explicit flash model and thinking mode (should be ignored)
+            response, continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "What is 3 + 3? Give a quick answer.",
+                    "model": "flash",  # Should resolve to gemini-2.0-flash-exp
+                    "thinking_mode": "high",  # Should be ignored for Flash model
+                },
+            )
+
+            if not response:
+                raise Exception("Flash model test failed: No response received")
+
+            self.logger.info("✅ Flash model without thinking config works correctly")
+            return True
+
+        except Exception as e:
+            if "thinking" in str(e).lower() and ("not supported" in str(e).lower() or "invalid" in str(e).lower()):
+                raise Exception(f"Flash model incorrectly tried to use thinking config: {e}")
+            self.logger.error(f"❌ Flash model test failed: {e}")
+            return False
+
+    def test_model_resolution_logic(self):
+        """Test that model resolution works correctly for both shortcuts and full names"""
+        self.logger.info("Testing model resolution logic...")
+
+        test_cases = [
+            ("pro", "should work with Pro model"),
+            ("flash", "should work with Flash model"),
+            ("gemini-2.5-pro-preview-06-05", "should work with full Pro model name"),
+            ("gemini-2.0-flash-exp", "should work with full Flash model name"),
+        ]
+
+        success_count = 0
+
+        for model_name, description in test_cases:
+            try:
+                response, continuation_id = self.call_mcp_tool(
+                    "chat",
+                    {
+                        "prompt": f"Test with {model_name}: What is 1 + 1?",
+                        "model": model_name,
+                        "thinking_mode": "medium",
+                    },
+                )
+
+                if not response:
+                    raise Exception(f"No response received for model {model_name}")
+
+                self.logger.info(f"✅ {model_name} {description}")
+                success_count += 1
+
+            except Exception as e:
+                self.logger.error(f"❌ {model_name} failed: {e}")
+                return False
+
+        return success_count == len(test_cases)
+
+    def test_default_model_behavior(self):
+        """Test behavior with server default model (no explicit model specified)"""
+        self.logger.info("Testing default model behavior...")
+
+        try:
+            # Test without specifying model (should use server default)
+            response, continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Test default model: What is 4 + 4?",
+                    # No model specified - should use DEFAULT_MODEL from config
+                    "thinking_mode": "medium",
+                },
+            )
+
+            if not response:
+                raise Exception("Default model test failed: No response received")
+
+            self.logger.info("✅ Default model behavior works correctly")
+            return True
+
+        except Exception as e:
+            self.logger.error(f"❌ Default model test failed: {e}")
+            return False
+
+    def run_test(self) -> bool:
+        """Run all model thinking configuration tests"""
+        self.logger.info(f"📝 Test: {self.test_description}")
+
+        try:
+            # Test Pro model with thinking config
+            if not self.test_pro_model_with_thinking_config():
+                return False
+
+            # Test Flash model without thinking config
+            if not self.test_flash_model_without_thinking_config():
+                return False
+
+            # Test model resolution logic
+            if not self.test_model_resolution_logic():
+                return False
+
+            # Test default model behavior
+            if not self.test_default_model_behavior():
+                return False
+
+            self.logger.info(f"✅ All {self.test_name} tests passed!")
+            return True
+
+        except Exception as e:
+            self.logger.error(f"❌ {self.test_name} test failed: {e}")
+            return False
+
+
+def main():
+    """Run the model thinking configuration tests"""
+    import sys
+
+    verbose = "--verbose" in sys.argv or "-v" in sys.argv
+    test = TestModelThinkingConfig(verbose=verbose)
+
+    success = test.run_test()
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_config.py b/tests/test_config.py
index 1582aa2..50c09c5 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -3,7 +3,7 @@ Tests for configuration
 """
 
 from config import (
-    GEMINI_MODEL,
+    DEFAULT_MODEL,
     MAX_CONTEXT_TOKENS,
     TEMPERATURE_ANALYTICAL,
     TEMPERATURE_BALANCED,
@@ -31,7 +31,7 @@ class TestConfig:
 
     def test_model_config(self):
         """Test model configuration"""
-        assert GEMINI_MODEL == "gemini-2.5-pro-preview-06-05"
+        assert DEFAULT_MODEL == "gemini-2.5-pro-preview-06-05"
         assert MAX_CONTEXT_TOKENS == 1_000_000
 
     def test_temperature_defaults(self):
diff --git a/tools/analyze.py b/tools/analyze.py
index 520afc9..54d4193 100644
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -42,6 +42,8 @@ class AnalyzeTool(BaseTool):
         )
 
     def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
         return {
             "type": "object",
             "properties": {
@@ -50,6 +52,10 @@ class AnalyzeTool(BaseTool):
                     "items": {"type": "string"},
                     "description": "Files or directories to analyze (must be absolute paths)",
                 },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                 "question": {
                     "type": "string",
                     "description": "What to analyze or look for",
diff --git a/tools/base.py b/tools/base.py
index 3f06ffe..3c66ed0 100644
--- a/tools/base.py
+++ b/tools/base.py
@@ -25,7 +25,7 @@ from google.genai import types
 from mcp.types import TextContent
 from pydantic import BaseModel, Field
 
-from config import GEMINI_MODEL, MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT
+from config import DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT
 from utils import check_token_limit
 from utils.conversation_memory import (
     MAX_CONVERSATION_TURNS,
@@ -50,7 +50,10 @@ class ToolRequest(BaseModel):
     these common fields.
     """
 
-    model: Optional[str] = Field(None, description="Model to use (defaults to Gemini 2.5 Pro)")
+    model: Optional[str] = Field(
+        None,
+        description=f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+    )
     temperature: Optional[float] = Field(None, description="Temperature for response (tool-specific defaults)")
     # Thinking mode controls how much computational budget the model uses for reasoning
     # Higher values allow for more complex reasoning but increase latency and cost
@@ -625,7 +628,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                 # No need to rebuild it here - prompt already contains conversation history
 
             # Extract model configuration from request or use defaults
-            model_name = getattr(request, "model", None) or GEMINI_MODEL
+            model_name = getattr(request, "model", None) or DEFAULT_MODEL
             temperature = getattr(request, "temperature", None)
             if temperature is None:
                 temperature = self.get_default_temperature()
@@ -1064,13 +1067,22 @@ If any of these would strengthen your analysis, specify what Claude should searc
         temperature and thinking budget configuration for models that support it.
 
         Args:
-            model_name: Name of the Gemini model to use
+            model_name: Name of the Gemini model to use (or shorthand like 'flash', 'pro')
             temperature: Temperature setting for response generation
             thinking_mode: Thinking depth mode (affects computational budget)
 
         Returns:
             Model instance configured and ready for generation
         """
+        # Define model shorthands for user convenience
+        model_shorthands = {
+            "pro": "gemini-2.5-pro-preview-06-05",
+            "flash": "gemini-2.0-flash-exp",
+        }
+
+        # Resolve shorthand to full model name
+        resolved_model_name = model_shorthands.get(model_name.lower(), model_name)
+
         # Map thinking modes to computational budget values
         # Higher budgets allow for more complex reasoning but increase latency
         thinking_budgets = {
@@ -1085,7 +1097,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
 
         # Gemini 2.5 models support thinking configuration for enhanced reasoning
         # Skip special handling in test environment to allow mocking
-        if "2.5" in model_name and not os.environ.get("PYTEST_CURRENT_TEST"):
+        if "2.5" in resolved_model_name and not os.environ.get("PYTEST_CURRENT_TEST"):
             try:
                 # Retrieve API key for Gemini client creation
                 api_key = os.environ.get("GEMINI_API_KEY")
@@ -1144,7 +1156,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
 
                         return ResponseWrapper(response.text)
 
-                return ModelWrapper(client, model_name, temperature, thinking_budget)
+                return ModelWrapper(client, resolved_model_name, temperature, thinking_budget)
 
             except Exception:
                 # Fall back to regular API if thinking configuration fails
@@ -1197,4 +1209,4 @@ If any of these would strengthen your analysis, specify what Claude should searc
 
                 return ResponseWrapper(response.text)
 
-        return SimpleModelWrapper(client, model_name, temperature)
+        return SimpleModelWrapper(client, resolved_model_name, temperature)
diff --git a/tools/chat.py b/tools/chat.py
index fcacac5..9b12de0 100644
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -44,6 +44,8 @@ class ChatTool(BaseTool):
         )
 
     def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
         return {
             "type": "object",
             "properties": {
@@ -56,6 +58,10 @@ class ChatTool(BaseTool):
                     "items": {"type": "string"},
                     "description": "Optional files for context (must be absolute paths)",
                 },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                 "temperature": {
                     "type": "number",
                     "description": "Response creativity (0-1, default 0.5)",
diff --git a/tools/codereview.py b/tools/codereview.py
index ec75e79..59512da 100644
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -79,6 +79,8 @@ class CodeReviewTool(BaseTool):
         )
 
     def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
         return {
             "type": "object",
             "properties": {
@@ -87,6 +89,10 @@ class CodeReviewTool(BaseTool):
                     "items": {"type": "string"},
                     "description": "Code files or directories to review (must be absolute paths)",
                 },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                 "context": {
                     "type": "string",
                     "description": "User's summary of what the code does, expected behavior, constraints, and review objectives",
diff --git a/tools/debug.py b/tools/debug.py
index 1350914..fd76980 100644
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -50,6 +50,8 @@ class DebugIssueTool(BaseTool):
         )
 
     def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
         return {
             "type": "object",
             "properties": {
@@ -57,6 +59,10 @@ class DebugIssueTool(BaseTool):
                     "type": "string",
                     "description": "Error message, symptoms, or issue description",
                 },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                 "error_context": {
                     "type": "string",
                     "description": "Stack trace, logs, or additional error context",
diff --git a/tools/precommit.py b/tools/precommit.py
index 7ffc45f..c5c280d 100644
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -98,7 +98,15 @@ class Precommit(BaseTool):
         )
 
     def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
         schema = self.get_request_model().model_json_schema()
+        # Ensure model parameter has enhanced description
+        if "properties" in schema and "model" in schema["properties"]:
+            schema["properties"]["model"] = {
+                "type": "string",
+                "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+            }
         # Ensure use_websearch is in the schema with proper description
         if "properties" in schema and "use_websearch" not in schema["properties"]:
             schema["properties"]["use_websearch"] = {
diff --git a/tools/thinkdeep.py b/tools/thinkdeep.py
index e7d4b3b..e15ded4 100644
--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -48,6 +48,8 @@ class ThinkDeepTool(BaseTool):
         )
 
     def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
         return {
             "type": "object",
             "properties": {
@@ -55,6 +57,10 @@ class ThinkDeepTool(BaseTool):
                     "type": "string",
                     "description": "Your current thinking/analysis to extend and validate",
                 },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                 "problem_context": {
                     "type": "string",
                     "description": "Additional context about the problem or goal",
@@ -78,8 +84,7 @@ class ThinkDeepTool(BaseTool):
                 "thinking_mode": {
                     "type": "string",
                     "enum": ["minimal", "low", "medium", "high", "max"],
-                    "description": "Thinking depth: minimal (128), low (2048), medium (8192), high (16384), max (32768)",
-                    "default": "high",
+                    "description": f"Thinking depth: minimal (128), low (2048), medium (8192), high (16384), max (32768). Defaults to '{self.get_default_thinking_mode()}' if not specified.",
                 },
                 "use_websearch": {
                     "type": "boolean",
@@ -101,8 +106,10 @@ class ThinkDeepTool(BaseTool):
         return TEMPERATURE_CREATIVE
 
     def get_default_thinking_mode(self) -> str:
-        """ThinkDeep uses high thinking by default"""
-        return "high"
+        """ThinkDeep uses configurable thinking mode, defaults to high"""
+        from config import DEFAULT_THINKING_MODE_THINKDEEP
+
+        return DEFAULT_THINKING_MODE_THINKDEEP
 
     def get_request_model(self):
         return ThinkDeepRequest