From 7ea790ef88fbeeb010052863a9abb7b6eb641e0c Mon Sep 17 00:00:00 2001 From: Fahad Date: Tue, 10 Jun 2025 07:20:24 +0400 Subject: [PATCH] fix: Docker path translation for review_changes and code deduplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixed review_changes tool to properly translate host paths to container paths in Docker - Prevents "No such file or directory" errors when running in Docker containers - Added proper error handling with clear messages when paths are inaccessible refactor: Centralized token limit validation across all tools - Added _validate_token_limit method to BaseTool to eliminate code duplication - Reduced ~25 lines of duplicated code across 5 tools (analyze, chat, debug_issue, review_code, think_deeper) - Maintains exact same error messages and behavior feat: Enhanced large prompt handling - Added support for prompts >50K chars by requesting file-based input - Preserves MCP's ~25K token capacity for responses - All tools now check prompt size before processing test: Added comprehensive Docker path integration tests - Tests for path translation, security validation, and error handling - Tests for review_changes tool specifically with Docker paths - Fixed failing think_deeper test (updated default from "max" to "high") chore: Code quality improvements - Applied black formatting across all files - Fixed import sorting with isort - All tests passing (96 tests) - Standardized error handling follows MCP TextContent format The changes ensure consistent behavior across all environments while reducing code duplication and improving maintainability. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .env.example | 18 +- README.md | 80 +++++- claude_config_example.json | 3 + config.py | 12 +- examples/claude_config_docker_home.json | 20 ++ examples/claude_config_macos.json | 3 + examples/claude_config_windows.json | 3 + examples/claude_config_wsl.json | 3 + prompts/__init__.py | 9 +- prompts/tool_prompts.py | 4 + server.py | 19 +- setup-docker-env.bat | 15 +- setup-docker-env.ps1 | 17 +- setup-docker-env.sh | 15 +- tests/test_collaboration.py | 2 +- tests/test_config.py | 13 +- tests/test_docker_path_integration.py | 251 ++++++++++++++++++ tests/test_large_prompt_handling.py | 313 ++++++++++++++++++++++ tests/test_live_integration.py | 2 +- tests/test_prompt_regression.py | 333 ++++++++++++++++++++++++ tests/test_review_changes.py | 5 +- tests/test_thinking_modes.py | 6 +- tests/test_tools.py | 10 +- tests/test_utils.py | 3 +- tools/__init__.py | 2 +- tools/analyze.py | 42 ++- tools/base.py | 109 +++++++- tools/chat.py | 43 ++- tools/debug_issue.py | 56 +++- tools/models.py | 7 +- tools/review_changes.py | 50 +++- tools/review_code.py | 43 ++- tools/think_deeper.py | 89 +++++-- utils/__init__.py | 3 +- utils/file_utils.py | 110 +++++--- utils/git_utils.py | 3 +- 36 files changed, 1540 insertions(+), 176 deletions(-) create mode 100644 examples/claude_config_docker_home.json create mode 100644 tests/test_docker_path_integration.py create mode 100644 tests/test_large_prompt_handling.py create mode 100644 tests/test_prompt_regression.py diff --git a/.env.example b/.env.example index de2ce61..381729b 100644 --- a/.env.example +++ b/.env.example @@ -1,15 +1,21 @@ -# Example .env file for Gemini MCP Server Docker setup +# Example .env file for Gemini MCP Server # Copy this to .env and update with your actual values # Your Gemini API key (required) # Get one from: https://makersuite.google.com/app/apikey GEMINI_API_KEY=your-gemini-api-key-here -# The absolute path to your project directory on the host machine -# This will be mounted to /workspace in the container +# Docker-specific environment variables (optional) +# These are set automatically by the Docker setup scripts +# You typically don't need to set these manually + +# WORKSPACE_ROOT: Used for Docker path translation +# Automatically set when using Docker wrapper scripts # Example: /Users/username/my-project (macOS/Linux) # Example: C:\Users\username\my-project (Windows) -WORKSPACE_ROOT=/path/to/your/project +# WORKSPACE_ROOT=/path/to/your/project -# Optional: Logging level (DEBUG, INFO, WARNING, ERROR) -# LOG_LEVEL=INFO \ No newline at end of file +# MCP_PROJECT_ROOT: Restricts file access to a specific directory +# If not set, defaults to user's home directory +# Set this to limit file access to a specific project folder +# MCP_PROJECT_ROOT=/path/to/allowed/directory \ No newline at end of file diff --git a/README.md b/README.md index 68d85e7..47265e2 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ The ultimate development partner for Claude - a Model Context Protocol server th - **Advanced Topics** - [Thinking Modes](#thinking-modes---managing-token-costs--quality) - Control depth vs cost + - [Working with Large Prompts](#working-with-large-prompts) - Bypass MCP's 25K token limit - [Collaborative Workflows](#collaborative-workflows) - Multi-tool patterns - [Tool Parameters](#tool-parameters) - Detailed parameter reference - [Docker Architecture](#docker-architecture) - How Docker integration works @@ -48,6 +49,7 @@ Claude is brilliant, but sometimes you need: - **Deep code analysis** across massive codebases that exceed Claude's context limits ([`analyze`](#6-analyze---smart-file-analysis)) - **Dynamic collaboration** - Gemini can request additional context from Claude mid-analysis for more thorough insights - **Smart file handling** - Automatically expands directories, filters irrelevant files, and manages token limits when analyzing `"main.py, src/, tests/"` +- **Bypass MCP's token limits** - Work around MCP's 25K combined token limit by automatically handling large prompts as files, preserving the full capacity for responses This server makes Gemini your development sidekick, handling what Claude can't or extending what Claude starts. @@ -104,7 +106,16 @@ setup-docker-env.bat # Windows (PowerShell): .\setup-docker-env.ps1 +``` +**Important:** The setup script will: +- Create a `.env` file for your API key +- **Display the exact Claude Desktop configuration to copy** - save this output! +- Show you where to paste the configuration + +**Docker File Access:** Docker containers can only access files within mounted directories. The generated configuration mounts your home directory by default. To access files elsewhere, modify the `-v` parameter in the configuration. + +```bash # 2. Edit .env and add your Gemini API key # The .env file will contain: # WORKSPACE_ROOT=/your/current/directory (automatically set) @@ -112,6 +123,8 @@ setup-docker-env.bat # 3. Build the Docker image docker build -t gemini-mcp-server . + +# 4. Copy the configuration from step 1 into Claude Desktop ``` **That's it!** Docker handles all Python dependencies and environment setup for you. @@ -177,7 +190,9 @@ Choose your configuration based on your setup method: **Important for Docker setup:** - Replace `/path/to/gemini-mcp-server/.env` with the full path to your .env file -- Replace `/path/to/your/project` with the directory containing code you want to analyze +- Docker containers can ONLY access files within the mounted directory (`-v` parameter) +- The examples below mount your home directory for broad file access +- To access files elsewhere, change the mount path (e.g., `-v /specific/project:/workspace:ro`) - The container runs temporarily when Claude needs it (no persistent Docker containers) - Communication happens via stdio - Docker's `-i` flag connects the container's stdin/stdout to Claude @@ -196,8 +211,10 @@ Choose your configuration based on your setup method: "run", "--rm", "-i", - "--env-file", "/Users/john/gemini-mcp-server/.env", - "-v", "/Users/john/my-project:/workspace:ro", + "--env-file", "/path/to/gemini-mcp-server/.env", + "-e", "WORKSPACE_ROOT=/Users/YOUR_USERNAME", + "-e", "MCP_PROJECT_ROOT=/workspace", + "-v", "/Users/YOUR_USERNAME:/workspace:ro", "gemini-mcp-server:latest" ] } @@ -215,8 +232,10 @@ Choose your configuration based on your setup method: "run", "--rm", "-i", - "--env-file", "C:/Users/john/gemini-mcp-server/.env", - "-v", "C:/Users/john/my-project:/workspace:ro", + "--env-file", "C:/path/to/gemini-mcp-server/.env", + "-e", "WORKSPACE_ROOT=C:/Users/YOUR_USERNAME", + "-e", "MCP_PROJECT_ROOT=/workspace", + "-v", "C:/Users/YOUR_USERNAME:/workspace:ro", "gemini-mcp-server:latest" ] } @@ -224,6 +243,8 @@ Choose your configuration based on your setup method: } ``` +> **Note**: Run `setup-docker-env.sh` (macOS/Linux) or `setup-docker-env.ps1` (Windows) to generate this configuration automatically with your paths. + #### Option B: Traditional Configuration **macOS/Linux:** @@ -472,7 +493,7 @@ Combine both perspectives to create a comprehensive caching implementation guide **Get a second opinion to augment Claude's own extended thinking** -**Thinking Mode:** Default is `max` (32,768 tokens) for deepest analysis. Reduce to save tokens if you need faster/cheaper responses. +**Thinking Mode:** Default is `high` (16,384 tokens) for deep analysis. Claude will automatically choose the best mode based on complexity - use `low` for quick validations, `medium` for standard problems, `high` for complex issues (default), or `max` for extremely complex challenges requiring deepest analysis. #### Example Prompts: @@ -484,15 +505,15 @@ Combine both perspectives to create a comprehensive caching implementation guide **Managing Token Costs:** ``` -# Save significant tokens when deep analysis isn't critical -"Use gemini to think deeper with medium thinking about this refactoring approach" (saves ~24k tokens) -"Get gemini to think deeper using high thinking mode about this design" (saves ~16k tokens) +# Claude will intelligently select the right mode, but you can override: +"Use gemini to think deeper with medium thinking about this refactoring approach" (saves ~8k tokens vs default) +"Get gemini to think deeper using low thinking to validate my basic approach" (saves ~14k tokens vs default) -# Use default max only for critical analysis -"Use gemini to think deeper about this security architecture" (uses default max - 32k tokens) +# Use default high for most complex problems +"Use gemini to think deeper about this security architecture" (uses default high - 16k tokens) -# For simple validations -"Use gemini with low thinking to validate my basic approach" (saves ~30k tokens!) +# For extremely complex challenges requiring maximum depth +"Use gemini with max thinking to solve this distributed consensus problem" (adds ~16k tokens vs default) ``` **Collaborative Workflow:** @@ -512,6 +533,7 @@ about event ordering and failure scenarios. Then integrate gemini's insights and - Offers alternative perspectives and approaches - Validates architectural decisions and design patterns - Can reference specific files for context: `"Use gemini to think deeper about my API design with reference to api/routes.py"` +- **Enhanced Critical Evaluation (v2.10.0)**: After Gemini's analysis, Claude is prompted to critically evaluate the suggestions, consider context and constraints, identify risks, and synthesize a final recommendation - ensuring a balanced, well-considered solution **Triggers:** think deeper, ultrathink, extend my analysis, validate my approach @@ -895,6 +917,38 @@ You can control thinking modes using natural language in your prompts. Remember: ## Advanced Features +### Working with Large Prompts + +The MCP protocol has a combined request+response limit of approximately 25K tokens. This server intelligently works around this limitation by automatically handling large prompts as files: + +**How it works:** +1. When you send a prompt larger than the configured limit (default: 50K characters ~10-12K tokens), the server detects this +2. It responds with a special status asking Claude to save the prompt to a file named `prompt.txt` +3. Claude saves the prompt and resends the request with the file path instead +4. The server reads the file content directly into Gemini's 1M token context +5. The full MCP token capacity is preserved for the response + +**Example scenario:** +``` +# You have a massive code review request with detailed context +User: "Use gemini to review this code: [50,000+ character detailed analysis]" + +# Server detects the large prompt and responds: +Gemini MCP: "The prompt is too large for MCP's token limits (>50,000 characters). +Please save the prompt text to a temporary file named 'prompt.txt' and resend +the request with an empty prompt string and the absolute file path included +in the files parameter, along with any other files you wish to share as context." + +# Claude automatically handles this: +- Saves your prompt to /tmp/prompt.txt +- Resends: "Use gemini to review this code" with files=["/tmp/prompt.txt", "/path/to/code.py"] + +# Server processes the large prompt through Gemini's 1M context +# Returns comprehensive analysis within MCP's response limits +``` + +This feature ensures you can send arbitrarily large prompts to Gemini without hitting MCP's protocol limitations, while maximizing the available space for detailed responses. + ### Dynamic Context Requests Tools can request additional context from Claude during execution. When Gemini needs more information to provide a thorough analysis, it will ask Claude for specific files or clarification, enabling true collaborative problem-solving. diff --git a/claude_config_example.json b/claude_config_example.json index bf85cb2..3a01726 100644 --- a/claude_config_example.json +++ b/claude_config_example.json @@ -1,4 +1,7 @@ { + "comment": "Example Claude Desktop configuration for Gemini MCP Server", + "comment2": "For Docker setup, use examples/claude_config_docker_home.json", + "comment3": "For platform-specific examples, see the examples/ directory", "mcpServers": { "gemini": { "command": "/path/to/gemini-mcp-server/run_gemini.sh", diff --git a/config.py b/config.py index 5ef77f9..058b04f 100644 --- a/config.py +++ b/config.py @@ -10,8 +10,8 @@ Configuration values can be overridden by environment variables where appropriat # Version and metadata # These values are used in server responses and for tracking releases -__version__ = "2.9.0" # Semantic versioning: MAJOR.MINOR.PATCH -__updated__ = "2025-06-09" # Last update date in ISO format +__version__ = "2.10.0" # Semantic versioning: MAJOR.MINOR.PATCH +__updated__ = "2025-06-10" # Last update date in ISO format __author__ = "Fahad Gilani" # Primary maintainer # Model configuration @@ -41,3 +41,11 @@ TEMPERATURE_BALANCED = 0.5 # For general chat # TEMPERATURE_CREATIVE: Higher temperature for exploratory tasks # Used when brainstorming, exploring alternatives, or architectural discussions TEMPERATURE_CREATIVE = 0.7 # For architecture, deep thinking + +# MCP Protocol Limits +# MCP_PROMPT_SIZE_LIMIT: Maximum character size for prompts sent directly through MCP +# The MCP protocol has a combined request+response limit of ~25K tokens. +# To ensure we have enough space for responses, we limit direct prompt input +# to 50K characters (roughly ~10-12K tokens). Larger prompts must be sent +# as files to bypass MCP's token constraints. +MCP_PROMPT_SIZE_LIMIT = 50_000 # 50K characters diff --git a/examples/claude_config_docker_home.json b/examples/claude_config_docker_home.json new file mode 100644 index 0000000..07d9883 --- /dev/null +++ b/examples/claude_config_docker_home.json @@ -0,0 +1,20 @@ +{ + "comment": "Docker configuration that mounts your home directory", + "comment2": "Update paths: /path/to/gemini-mcp-server/.env and /Users/your-username", + "comment3": "The container can only access files within the mounted directory", + "mcpServers": { + "gemini": { + "command": "docker", + "args": [ + "run", + "--rm", + "-i", + "--env-file", "/path/to/gemini-mcp-server/.env", + "-e", "WORKSPACE_ROOT=/Users/your-username", + "-e", "MCP_PROJECT_ROOT=/workspace", + "-v", "/Users/your-username:/workspace:ro", + "gemini-mcp-server:latest" + ] + } + } +} \ No newline at end of file diff --git a/examples/claude_config_macos.json b/examples/claude_config_macos.json index 8e821ee..572bf88 100644 --- a/examples/claude_config_macos.json +++ b/examples/claude_config_macos.json @@ -1,4 +1,7 @@ { + "comment": "Traditional macOS/Linux configuration (non-Docker)", + "comment2": "Replace YOUR_USERNAME with your actual username", + "comment3": "This gives access to all files under your home directory", "mcpServers": { "gemini": { "command": "/Users/YOUR_USERNAME/gemini-mcp-server/run_gemini.sh", diff --git a/examples/claude_config_windows.json b/examples/claude_config_windows.json index 1a10db1..ec59210 100644 --- a/examples/claude_config_windows.json +++ b/examples/claude_config_windows.json @@ -1,4 +1,7 @@ { + "comment": "Traditional Windows configuration (non-Docker)", + "comment2": "Replace YOUR_USERNAME with your actual username", + "comment3": "Note the double backslashes in the path", "mcpServers": { "gemini": { "command": "C:\\Users\\YOUR_USERNAME\\gemini-mcp-server\\run_gemini.bat", diff --git a/examples/claude_config_wsl.json b/examples/claude_config_wsl.json index 9c96ac1..ff0053d 100644 --- a/examples/claude_config_wsl.json +++ b/examples/claude_config_wsl.json @@ -1,4 +1,7 @@ { + "comment": "Windows configuration using WSL (Windows Subsystem for Linux)", + "comment2": "Replace YOUR_WSL_USERNAME with your WSL username", + "comment3": "Make sure the server is installed in your WSL environment", "mcpServers": { "gemini": { "command": "wsl.exe", diff --git a/prompts/__init__.py b/prompts/__init__.py index 970b456..7818681 100644 --- a/prompts/__init__.py +++ b/prompts/__init__.py @@ -2,13 +2,8 @@ System prompts for Gemini tools """ -from .tool_prompts import ( - ANALYZE_PROMPT, - CHAT_PROMPT, - DEBUG_ISSUE_PROMPT, - REVIEW_CODE_PROMPT, - THINK_DEEPER_PROMPT, -) +from .tool_prompts import (ANALYZE_PROMPT, CHAT_PROMPT, DEBUG_ISSUE_PROMPT, + REVIEW_CODE_PROMPT, THINK_DEEPER_PROMPT) __all__ = [ "THINK_DEEPER_PROMPT", diff --git a/prompts/tool_prompts.py b/prompts/tool_prompts.py index 83bf444..63df0e9 100644 --- a/prompts/tool_prompts.py +++ b/prompts/tool_prompts.py @@ -16,6 +16,10 @@ Your role is to: 4. Focus on aspects Claude might have missed or couldn't fully explore 5. Suggest implementation strategies and architectural improvements +IMPORTANT: Your analysis will be critically evaluated by Claude before final decisions are made. +Focus on providing diverse perspectives, uncovering hidden complexities, and challenging assumptions +rather than providing definitive answers. Your goal is to enrich the decision-making process. + Key areas to consider (in priority order): 1. **Security vulnerabilities and attack vectors** - This is paramount. Consider: - Authentication/authorization flaws diff --git a/server.py b/server.py index 1a28635..eeebdab 100644 --- a/server.py +++ b/server.py @@ -30,21 +30,10 @@ from mcp.server.models import InitializationOptions from mcp.server.stdio import stdio_server from mcp.types import TextContent, Tool -from config import ( - GEMINI_MODEL, - MAX_CONTEXT_TOKENS, - __author__, - __updated__, - __version__, -) -from tools import ( - AnalyzeTool, - ChatTool, - DebugIssueTool, - ReviewChanges, - ReviewCodeTool, - ThinkDeeperTool, -) +from config import (GEMINI_MODEL, MAX_CONTEXT_TOKENS, __author__, __updated__, + __version__) +from tools import (AnalyzeTool, ChatTool, DebugIssueTool, ReviewChanges, + ReviewCodeTool, ThinkDeeperTool) # Configure logging for server operations # Set to INFO level to capture important operational messages without being too verbose diff --git a/setup-docker-env.bat b/setup-docker-env.bat index db55022..e859ddf 100644 --- a/setup-docker-env.bat +++ b/setup-docker-env.bat @@ -40,6 +40,16 @@ echo ===== COPY BELOW THIS LINE ===== echo { echo "mcpServers": { echo "gemini": { +echo "command": "%CURRENT_DIR%\gemini-mcp-docker.bat" +echo } +echo } +echo } +echo ===== COPY ABOVE THIS LINE ===== +echo. +echo Alternative: If you prefer the direct Docker command ^(static workspace^): +echo { +echo "mcpServers": { +echo "gemini": { echo "command": "docker", echo "args": [ echo "run", @@ -52,10 +62,9 @@ echo ] echo } echo } echo } -echo ===== COPY ABOVE THIS LINE ===== echo. echo Config file location: echo Windows: %%APPDATA%%\Claude\claude_desktop_config.json echo. -echo Note: The configuration above mounts the current directory ^(%CURRENT_DIR%^) -echo as the workspace. You can change this path to any project directory you want to analyze. \ No newline at end of file +echo Note: The first configuration uses a wrapper script that allows you to run Claude +echo from any directory. The second configuration mounts a fixed directory ^(%CURRENT_DIR%^). \ No newline at end of file diff --git a/setup-docker-env.ps1 b/setup-docker-env.ps1 index 1921c19..254038e 100644 --- a/setup-docker-env.ps1 +++ b/setup-docker-env.ps1 @@ -38,6 +38,18 @@ Write-Host "3. Copy this configuration to your Claude Desktop config:" Write-Host "" Write-Host "===== COPY BELOW THIS LINE =====" -ForegroundColor Cyan Write-Host @" +{ + "mcpServers": { + "gemini": { + "command": "$CurrentDir\gemini-mcp-docker.ps1" + } + } +} +"@ +Write-Host "===== COPY ABOVE THIS LINE =====" -ForegroundColor Cyan +Write-Host "" +Write-Host "Alternative: If you prefer the direct Docker command (static workspace):" +Write-Host @" { "mcpServers": { "gemini": { @@ -54,11 +66,10 @@ Write-Host @" } } "@ -Write-Host "===== COPY ABOVE THIS LINE =====" -ForegroundColor Cyan Write-Host "" Write-Host "Config file location:" Write-Host " Windows: %APPDATA%\Claude\claude_desktop_config.json" Write-Host "" -Write-Host "Note: The configuration above mounts the current directory ($CurrentDir)" -Write-Host "as the workspace. You can change this path to any project directory you want to analyze." +Write-Host "Note: The first configuration uses a wrapper script that allows you to run Claude" +Write-Host "from any directory. The second configuration mounts a fixed directory ($CurrentDir)." Write-Host "Docker on Windows accepts both forward slashes and backslashes in paths." \ No newline at end of file diff --git a/setup-docker-env.sh b/setup-docker-env.sh index 58c6aef..f6b630f 100755 --- a/setup-docker-env.sh +++ b/setup-docker-env.sh @@ -17,9 +17,8 @@ else # Gemini MCP Server Docker Environment Configuration # Generated on $(date) -# The absolute path to your project root on the host machine -# This should be the directory containing your code that you want to analyze -WORKSPACE_ROOT=$CURRENT_DIR +# WORKSPACE_ROOT is not needed for the wrapper script approach +# It will be set dynamically when you run the container # Your Gemini API key (get one from https://makersuite.google.com/app/apikey) # IMPORTANT: Replace this with your actual API key @@ -46,7 +45,9 @@ echo " \"run\"," echo " \"--rm\"," echo " \"-i\"," echo " \"--env-file\", \"$CURRENT_DIR/.env\"," -echo " \"-v\", \"$CURRENT_DIR:/workspace:ro\"," +echo " \"-e\", \"WORKSPACE_ROOT=$HOME\"," +echo " \"-e\", \"MCP_PROJECT_ROOT=/workspace\"," +echo " \"-v\", \"$HOME:/workspace:ro\"," echo " \"gemini-mcp-server:latest\"" echo " ]" echo " }" @@ -58,5 +59,7 @@ echo "Config file location:" echo " macOS: ~/Library/Application Support/Claude/claude_desktop_config.json" echo " Windows: %APPDATA%\\Claude\\claude_desktop_config.json" echo "" -echo "Note: The configuration above mounts the current directory ($CURRENT_DIR)" -echo "as the workspace. You can change this path to any project directory you want to analyze." \ No newline at end of file +echo "Note: This configuration mounts your home directory ($HOME)." +echo "Docker can ONLY access files within the mounted directory." +echo "To mount a different directory, change the -v parameter." +echo "Example: -v \"/path/to/project:/workspace:ro\"" \ No newline at end of file diff --git a/tests/test_collaboration.py b/tests/test_collaboration.py index fe7ca7a..3786187 100644 --- a/tests/test_collaboration.py +++ b/tests/test_collaboration.py @@ -9,7 +9,7 @@ import pytest from tools.analyze import AnalyzeTool from tools.debug_issue import DebugIssueTool -from tools.models import ToolOutput, ClarificationRequest +from tools.models import ClarificationRequest, ToolOutput class TestDynamicContextRequests: diff --git a/tests/test_config.py b/tests/test_config.py index 1582aa2..c8dd95f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,16 +2,9 @@ Tests for configuration """ -from config import ( - GEMINI_MODEL, - MAX_CONTEXT_TOKENS, - TEMPERATURE_ANALYTICAL, - TEMPERATURE_BALANCED, - TEMPERATURE_CREATIVE, - __author__, - __updated__, - __version__, -) +from config import (GEMINI_MODEL, MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL, + TEMPERATURE_BALANCED, TEMPERATURE_CREATIVE, __author__, + __updated__, __version__) class TestConfig: diff --git a/tests/test_docker_path_integration.py b/tests/test_docker_path_integration.py new file mode 100644 index 0000000..a901010 --- /dev/null +++ b/tests/test_docker_path_integration.py @@ -0,0 +1,251 @@ +""" +Integration tests for Docker path translation + +These tests verify the actual behavior when running in a Docker-like environment +by creating temporary directories and testing the path translation logic. +""" + +import importlib +import os +import tempfile +from pathlib import Path + +import pytest + +# We'll reload the module to test different environment configurations +import utils.file_utils + + +def test_docker_path_translation_integration(): + """Test path translation in a simulated Docker environment""" + + with tempfile.TemporaryDirectory() as tmpdir: + # Set up directories + host_workspace = Path(tmpdir) / "host_workspace" + host_workspace.mkdir() + container_workspace = Path(tmpdir) / "container_workspace" + container_workspace.mkdir() + + # Create a test file structure + (host_workspace / "src").mkdir() + test_file = host_workspace / "src" / "test.py" + test_file.write_text("# test file") + + # Set environment variables and reload the module + original_env = os.environ.copy() + try: + os.environ["WORKSPACE_ROOT"] = str(host_workspace) + os.environ["MCP_PROJECT_ROOT"] = str(container_workspace) + + # Reload the module to pick up new environment variables + importlib.reload(utils.file_utils) + + # Mock the CONTAINER_WORKSPACE to point to our test directory + utils.file_utils.CONTAINER_WORKSPACE = container_workspace + + # Test the translation + from utils.file_utils import _get_secure_container_path + + # This should translate the host path to container path + host_path = str(test_file) + result = _get_secure_container_path(host_path) + + # Verify the translation worked + expected = str(container_workspace / "src" / "test.py") + assert result == expected + + finally: + # Restore original environment + os.environ.clear() + os.environ.update(original_env) + importlib.reload(utils.file_utils) + + +def test_docker_security_validation(): + """Test that path traversal attempts are properly blocked""" + + with tempfile.TemporaryDirectory() as tmpdir: + # Set up directories + host_workspace = Path(tmpdir) / "workspace" + host_workspace.mkdir() + secret_dir = Path(tmpdir) / "secret" + secret_dir.mkdir() + secret_file = secret_dir / "password.txt" + secret_file.write_text("secret") + + # Create a symlink inside workspace pointing to secret + symlink = host_workspace / "link_to_secret" + symlink.symlink_to(secret_file) + + original_env = os.environ.copy() + try: + os.environ["WORKSPACE_ROOT"] = str(host_workspace) + os.environ["MCP_PROJECT_ROOT"] = str(host_workspace) + + # Reload the module + importlib.reload(utils.file_utils) + utils.file_utils.CONTAINER_WORKSPACE = Path("/workspace") + + from utils.file_utils import resolve_and_validate_path + + # Trying to access the symlink should fail + with pytest.raises(PermissionError): + resolve_and_validate_path(str(symlink)) + + finally: + os.environ.clear() + os.environ.update(original_env) + importlib.reload(utils.file_utils) + + +def test_no_docker_environment(): + """Test that paths are unchanged when Docker environment is not set""" + + original_env = os.environ.copy() + try: + # Clear Docker-related environment variables + os.environ.pop("WORKSPACE_ROOT", None) + os.environ.pop("MCP_PROJECT_ROOT", None) + + # Reload the module + importlib.reload(utils.file_utils) + + from utils.file_utils import _get_secure_container_path + + # Path should remain unchanged + test_path = "/some/random/path.py" + assert _get_secure_container_path(test_path) == test_path + + finally: + os.environ.clear() + os.environ.update(original_env) + importlib.reload(utils.file_utils) + + +def test_review_changes_docker_path_translation(): + """Test that review_changes tool properly translates Docker paths""" + + with tempfile.TemporaryDirectory() as tmpdir: + # Set up directories to simulate Docker mount + host_workspace = Path(tmpdir) / "host_workspace" + host_workspace.mkdir() + container_workspace = Path(tmpdir) / "container_workspace" + container_workspace.mkdir() + + # Create a git repository in the container workspace + project_dir = container_workspace / "project" + project_dir.mkdir() + + # Initialize git repo + import subprocess + + subprocess.run(["git", "init"], cwd=project_dir, capture_output=True) + + # Create a test file + test_file = project_dir / "test.py" + test_file.write_text("print('hello')") + + # Stage the file + subprocess.run(["git", "add", "test.py"], cwd=project_dir, capture_output=True) + + original_env = os.environ.copy() + try: + # Simulate Docker environment + os.environ["WORKSPACE_ROOT"] = str(host_workspace) + os.environ["MCP_PROJECT_ROOT"] = str(container_workspace) + + # Reload the module + importlib.reload(utils.file_utils) + utils.file_utils.CONTAINER_WORKSPACE = container_workspace + + # Import after reloading to get updated environment + from tools.review_changes import ReviewChanges + + # Create tool instance + tool = ReviewChanges() + + # Test path translation in prepare_prompt + request = tool.get_request_model()( + path=str( + host_workspace / "project" + ), # Host path that needs translation + review_type="quick", + severity_filter="all", + ) + + # This should translate the path and find the git repository + import asyncio + + result = asyncio.run(tool.prepare_prompt(request)) + + # Should find the repository (not raise an error about inaccessible path) + # If we get here without exception, the path was successfully translated + assert isinstance(result, str) + # The result should contain git diff information or indicate no changes + assert ( + "No git repositories found" not in result or "changes" in result.lower() + ) + + finally: + os.environ.clear() + os.environ.update(original_env) + importlib.reload(utils.file_utils) + + +def test_review_changes_docker_path_error(): + """Test that review_changes tool raises error for inaccessible paths""" + + with tempfile.TemporaryDirectory() as tmpdir: + # Set up directories to simulate Docker mount + host_workspace = Path(tmpdir) / "host_workspace" + host_workspace.mkdir() + container_workspace = Path(tmpdir) / "container_workspace" + container_workspace.mkdir() + + # Create a path outside the mounted workspace + outside_path = Path(tmpdir) / "outside_workspace" + outside_path.mkdir() + + original_env = os.environ.copy() + try: + # Simulate Docker environment + os.environ["WORKSPACE_ROOT"] = str(host_workspace) + os.environ["MCP_PROJECT_ROOT"] = str(container_workspace) + + # Reload the module + importlib.reload(utils.file_utils) + utils.file_utils.CONTAINER_WORKSPACE = container_workspace + + # Import after reloading to get updated environment + from tools.review_changes import ReviewChanges + + # Create tool instance + tool = ReviewChanges() + + # Test path translation with an inaccessible path + request = tool.get_request_model()( + path=str(outside_path), # Path outside the mounted workspace + review_type="quick", + severity_filter="all", + ) + + # This should raise a ValueError + import asyncio + + with pytest.raises(ValueError) as exc_info: + asyncio.run(tool.prepare_prompt(request)) + + # Check the error message + assert "not accessible from within the Docker container" in str( + exc_info.value + ) + assert "mounted workspace" in str(exc_info.value) + + finally: + os.environ.clear() + os.environ.update(original_env) + importlib.reload(utils.file_utils) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_large_prompt_handling.py b/tests/test_large_prompt_handling.py new file mode 100644 index 0000000..5ae48df --- /dev/null +++ b/tests/test_large_prompt_handling.py @@ -0,0 +1,313 @@ +""" +Tests for large prompt handling functionality. + +This test module verifies that the MCP server correctly handles +prompts that exceed the 50,000 character limit by requesting +Claude to save them to a file and resend. +""" + +import json +import os +import shutil +import tempfile +from unittest.mock import MagicMock, patch + +import pytest +from mcp.types import TextContent + +from config import MCP_PROMPT_SIZE_LIMIT +from tools.analyze import AnalyzeTool +from tools.chat import ChatTool +from tools.debug_issue import DebugIssueTool +from tools.review_changes import ReviewChanges +from tools.review_code import ReviewCodeTool +from tools.think_deeper import ThinkDeeperTool + + +class TestLargePromptHandling: + """Test suite for large prompt handling across all tools.""" + + @pytest.fixture + def large_prompt(self): + """Create a prompt larger than MCP_PROMPT_SIZE_LIMIT characters.""" + return "x" * (MCP_PROMPT_SIZE_LIMIT + 1000) + + @pytest.fixture + def normal_prompt(self): + """Create a normal-sized prompt.""" + return "This is a normal prompt that should work fine." + + @pytest.fixture + def temp_prompt_file(self, large_prompt): + """Create a temporary prompt.txt file with large content.""" + # Create temp file with exact name "prompt.txt" + temp_dir = tempfile.mkdtemp() + file_path = os.path.join(temp_dir, "prompt.txt") + with open(file_path, "w") as f: + f.write(large_prompt) + return file_path + + @pytest.mark.asyncio + async def test_chat_large_prompt_detection(self, large_prompt): + """Test that chat tool detects large prompts.""" + tool = ChatTool() + result = await tool.execute({"prompt": large_prompt}) + + assert len(result) == 1 + assert isinstance(result[0], TextContent) + + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + assert f"{MCP_PROMPT_SIZE_LIMIT:,} characters" in output["content"] + assert output["metadata"]["prompt_size"] == len(large_prompt) + assert output["metadata"]["limit"] == MCP_PROMPT_SIZE_LIMIT + + @pytest.mark.asyncio + async def test_chat_normal_prompt_works(self, normal_prompt): + """Test that chat tool works normally with regular prompts.""" + tool = ChatTool() + + # Mock the model to avoid actual API calls + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_response = MagicMock() + mock_response.candidates = [ + MagicMock( + content=MagicMock( + parts=[MagicMock(text="This is a test response")] + ), + finish_reason="STOP", + ) + ] + mock_model.generate_content.return_value = mock_response + mock_create_model.return_value = mock_model + + result = await tool.execute({"prompt": normal_prompt}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + assert "This is a test response" in output["content"] + + @pytest.mark.asyncio + async def test_chat_prompt_file_handling(self, temp_prompt_file, large_prompt): + """Test that chat tool correctly handles prompt.txt files.""" + tool = ChatTool() + + # Mock the model + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_response = MagicMock() + mock_response.candidates = [ + MagicMock( + content=MagicMock(parts=[MagicMock(text="Processed large prompt")]), + finish_reason="STOP", + ) + ] + mock_model.generate_content.return_value = mock_response + mock_create_model.return_value = mock_model + + # Mock read_file_content to avoid security checks + with patch("tools.base.read_file_content") as mock_read_file: + mock_read_file.return_value = large_prompt + + # Execute with empty prompt and prompt.txt file + result = await tool.execute({"prompt": "", "files": [temp_prompt_file]}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + + # Verify read_file_content was called with the prompt file + mock_read_file.assert_called_once_with(temp_prompt_file) + + # Verify the large content was used + call_args = mock_model.generate_content.call_args[0][0] + assert large_prompt in call_args + + # Cleanup + temp_dir = os.path.dirname(temp_prompt_file) + shutil.rmtree(temp_dir) + + @pytest.mark.asyncio + async def test_think_deeper_large_analysis(self, large_prompt): + """Test that think_deeper tool detects large current_analysis.""" + tool = ThinkDeeperTool() + result = await tool.execute({"current_analysis": large_prompt}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + + @pytest.mark.asyncio + async def test_review_code_large_focus(self, large_prompt): + """Test that review_code tool detects large focus_on field.""" + tool = ReviewCodeTool() + result = await tool.execute( + {"files": ["/some/file.py"], "focus_on": large_prompt} + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + + @pytest.mark.asyncio + async def test_review_changes_large_original_request(self, large_prompt): + """Test that review_changes tool detects large original_request.""" + tool = ReviewChanges() + result = await tool.execute( + {"path": "/some/path", "original_request": large_prompt} + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + + @pytest.mark.asyncio + async def test_debug_issue_large_error_description(self, large_prompt): + """Test that debug_issue tool detects large error_description.""" + tool = DebugIssueTool() + result = await tool.execute({"error_description": large_prompt}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + + @pytest.mark.asyncio + async def test_debug_issue_large_error_context(self, large_prompt, normal_prompt): + """Test that debug_issue tool detects large error_context.""" + tool = DebugIssueTool() + result = await tool.execute( + {"error_description": normal_prompt, "error_context": large_prompt} + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + + @pytest.mark.asyncio + async def test_analyze_large_question(self, large_prompt): + """Test that analyze tool detects large question.""" + tool = AnalyzeTool() + result = await tool.execute( + {"files": ["/some/file.py"], "question": large_prompt} + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + + @pytest.mark.asyncio + async def test_multiple_files_with_prompt_txt(self, temp_prompt_file): + """Test handling of prompt.txt alongside other files.""" + tool = ChatTool() + other_file = "/some/other/file.py" + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_response = MagicMock() + mock_response.candidates = [ + MagicMock( + content=MagicMock(parts=[MagicMock(text="Success")]), + finish_reason="STOP", + ) + ] + mock_model.generate_content.return_value = mock_response + mock_create_model.return_value = mock_model + + # Mock read_files to avoid file system access + with patch("tools.chat.read_files") as mock_read_files: + mock_read_files.return_value = ("File content", "Summary") + + await tool.execute( + {"prompt": "", "files": [temp_prompt_file, other_file]} + ) + + # Verify prompt.txt was removed from files list + mock_read_files.assert_called_once() + files_arg = mock_read_files.call_args[0][0] + assert len(files_arg) == 1 + assert files_arg[0] == other_file + + temp_dir = os.path.dirname(temp_prompt_file) + shutil.rmtree(temp_dir) + + @pytest.mark.asyncio + async def test_boundary_case_exactly_at_limit(self): + """Test prompt exactly at MCP_PROMPT_SIZE_LIMIT characters (should pass).""" + tool = ChatTool() + exact_prompt = "x" * MCP_PROMPT_SIZE_LIMIT + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_response = MagicMock() + mock_response.candidates = [ + MagicMock( + content=MagicMock(parts=[MagicMock(text="Success")]), + finish_reason="STOP", + ) + ] + mock_model.generate_content.return_value = mock_response + mock_create_model.return_value = mock_model + + result = await tool.execute({"prompt": exact_prompt}) + output = json.loads(result[0].text) + assert output["status"] == "success" + + @pytest.mark.asyncio + async def test_boundary_case_just_over_limit(self): + """Test prompt just over MCP_PROMPT_SIZE_LIMIT characters (should trigger file request).""" + tool = ChatTool() + over_prompt = "x" * (MCP_PROMPT_SIZE_LIMIT + 1) + + result = await tool.execute({"prompt": over_prompt}) + output = json.loads(result[0].text) + assert output["status"] == "requires_file_prompt" + + @pytest.mark.asyncio + async def test_empty_prompt_no_file(self): + """Test empty prompt without prompt.txt file.""" + tool = ChatTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_response = MagicMock() + mock_response.candidates = [ + MagicMock( + content=MagicMock(parts=[MagicMock(text="Success")]), + finish_reason="STOP", + ) + ] + mock_model.generate_content.return_value = mock_response + mock_create_model.return_value = mock_model + + result = await tool.execute({"prompt": ""}) + output = json.loads(result[0].text) + assert output["status"] == "success" + + @pytest.mark.asyncio + async def test_prompt_file_read_error(self): + """Test handling when prompt.txt can't be read.""" + tool = ChatTool() + bad_file = "/nonexistent/prompt.txt" + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_response = MagicMock() + mock_response.candidates = [ + MagicMock( + content=MagicMock(parts=[MagicMock(text="Success")]), + finish_reason="STOP", + ) + ] + mock_model.generate_content.return_value = mock_response + mock_create_model.return_value = mock_model + + # Should continue with empty prompt when file can't be read + result = await tool.execute({"prompt": "", "files": [bad_file]}) + output = json.loads(result[0].text) + assert output["status"] == "success" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_live_integration.py b/tests/test_live_integration.py index fc3ba9d..d7782b6 100644 --- a/tests/test_live_integration.py +++ b/tests/test_live_integration.py @@ -9,10 +9,10 @@ Note: These tests are excluded from regular pytest runs to avoid API rate limits They confirm that the google-genai library integration works correctly with live data. """ +import asyncio import os import sys import tempfile -import asyncio from pathlib import Path # Add parent directory to path to allow imports diff --git a/tests/test_prompt_regression.py b/tests/test_prompt_regression.py new file mode 100644 index 0000000..82ef619 --- /dev/null +++ b/tests/test_prompt_regression.py @@ -0,0 +1,333 @@ +""" +Regression tests to ensure normal prompt handling still works after large prompt changes. + +This test module verifies that all tools continue to work correctly with +normal-sized prompts after implementing the large prompt handling feature. +""" + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from tools.analyze import AnalyzeTool +from tools.chat import ChatTool +from tools.debug_issue import DebugIssueTool +from tools.review_changes import ReviewChanges +from tools.review_code import ReviewCodeTool +from tools.think_deeper import ThinkDeeperTool + + +class TestPromptRegression: + """Regression test suite for normal prompt handling.""" + + @pytest.fixture + def mock_model_response(self): + """Create a mock model response.""" + + def _create_response(text="Test response"): + mock_response = MagicMock() + mock_response.candidates = [ + MagicMock( + content=MagicMock(parts=[MagicMock(text=text)]), + finish_reason="STOP", + ) + ] + return mock_response + + return _create_response + + @pytest.mark.asyncio + async def test_chat_normal_prompt(self, mock_model_response): + """Test chat tool with normal prompt.""" + tool = ChatTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response( + "This is a helpful response about Python." + ) + mock_create_model.return_value = mock_model + + result = await tool.execute({"prompt": "Explain Python decorators"}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + assert "helpful response about Python" in output["content"] + + # Verify model was called + mock_model.generate_content.assert_called_once() + + @pytest.mark.asyncio + async def test_chat_with_files(self, mock_model_response): + """Test chat tool with files parameter.""" + tool = ChatTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response() + mock_create_model.return_value = mock_model + + # Mock file reading + with patch("tools.chat.read_files") as mock_read_files: + mock_read_files.return_value = ("File content here", "Summary") + + result = await tool.execute( + {"prompt": "Analyze this code", "files": ["/path/to/file.py"]} + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + mock_read_files.assert_called_once_with(["/path/to/file.py"]) + + @pytest.mark.asyncio + async def test_think_deeper_normal_analysis(self, mock_model_response): + """Test think_deeper tool with normal analysis.""" + tool = ThinkDeeperTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response( + "Here's a deeper analysis with edge cases..." + ) + mock_create_model.return_value = mock_model + + result = await tool.execute( + { + "current_analysis": "I think we should use a cache for performance", + "problem_context": "Building a high-traffic API", + "focus_areas": ["scalability", "reliability"], + } + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + assert "Extended Analysis by Gemini" in output["content"] + assert "deeper analysis" in output["content"] + + @pytest.mark.asyncio + async def test_review_code_normal_review(self, mock_model_response): + """Test review_code tool with normal inputs.""" + tool = ReviewCodeTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response( + "Found 3 issues: 1) Missing error handling..." + ) + mock_create_model.return_value = mock_model + + # Mock file reading + with patch("tools.review_code.read_files") as mock_read_files: + mock_read_files.return_value = ("def main(): pass", "1 file") + + result = await tool.execute( + { + "files": ["/path/to/code.py"], + "review_type": "security", + "focus_on": "Look for SQL injection vulnerabilities", + } + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + assert "Found 3 issues" in output["content"] + + @pytest.mark.asyncio + async def test_review_changes_normal_request(self, mock_model_response): + """Test review_changes tool with normal original_request.""" + tool = ReviewChanges() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response( + "Changes look good, implementing feature as requested..." + ) + mock_create_model.return_value = mock_model + + # Mock git operations + with patch("tools.review_changes.find_git_repositories") as mock_find_repos: + with patch("tools.review_changes.get_git_status") as mock_git_status: + mock_find_repos.return_value = ["/path/to/repo"] + mock_git_status.return_value = { + "modified": ["file.py"], + "untracked": [], + } + + result = await tool.execute( + { + "path": "/path/to/repo", + "original_request": "Add user authentication feature with JWT tokens", + } + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + + @pytest.mark.asyncio + async def test_debug_issue_normal_error(self, mock_model_response): + """Test debug_issue tool with normal error description.""" + tool = DebugIssueTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response( + "Root cause: The variable is undefined. Fix: Initialize it..." + ) + mock_create_model.return_value = mock_model + + result = await tool.execute( + { + "error_description": "TypeError: Cannot read property 'name' of undefined", + "error_context": "at line 42 in user.js\n console.log(user.name)", + "runtime_info": "Node.js v16.14.0", + } + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + assert "Debug Analysis" in output["content"] + assert "Root cause" in output["content"] + + @pytest.mark.asyncio + async def test_analyze_normal_question(self, mock_model_response): + """Test analyze tool with normal question.""" + tool = AnalyzeTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response( + "The code follows MVC pattern with clear separation..." + ) + mock_create_model.return_value = mock_model + + # Mock file reading + with patch("tools.analyze.read_files") as mock_read_files: + mock_read_files.return_value = ("class UserController: ...", "3 files") + + result = await tool.execute( + { + "files": ["/path/to/project"], + "question": "What design patterns are used in this codebase?", + "analysis_type": "architecture", + } + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + assert "MVC pattern" in output["content"] + + @pytest.mark.asyncio + async def test_empty_optional_fields(self, mock_model_response): + """Test tools work with empty optional fields.""" + tool = ChatTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response() + mock_create_model.return_value = mock_model + + # Test with no files parameter + result = await tool.execute({"prompt": "Hello"}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + + @pytest.mark.asyncio + async def test_thinking_modes_work(self, mock_model_response): + """Test that thinking modes are properly passed through.""" + tool = ChatTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response() + mock_create_model.return_value = mock_model + + result = await tool.execute( + {"prompt": "Test", "thinking_mode": "high", "temperature": 0.8} + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + + # Verify create_model was called with correct parameters + mock_create_model.assert_called_once() + call_args = mock_create_model.call_args + assert call_args[0][2] == "high" # thinking_mode + assert call_args[0][1] == 0.8 # temperature + + @pytest.mark.asyncio + async def test_special_characters_in_prompts(self, mock_model_response): + """Test prompts with special characters work correctly.""" + tool = ChatTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response() + mock_create_model.return_value = mock_model + + special_prompt = 'Test with "quotes" and\nnewlines\tand tabs' + result = await tool.execute({"prompt": special_prompt}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + + @pytest.mark.asyncio + async def test_mixed_file_paths(self, mock_model_response): + """Test handling of various file path formats.""" + tool = AnalyzeTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response() + mock_create_model.return_value = mock_model + + with patch("tools.analyze.read_files") as mock_read_files: + mock_read_files.return_value = ("Content", "Summary") + + result = await tool.execute( + { + "files": [ + "/absolute/path/file.py", + "/Users/name/project/src/", + "/home/user/code.js", + ], + "question": "Analyze these files", + } + ) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + mock_read_files.assert_called_once() + + @pytest.mark.asyncio + async def test_unicode_content(self, mock_model_response): + """Test handling of unicode content in prompts.""" + tool = ChatTool() + + with patch.object(tool, "create_model") as mock_create_model: + mock_model = MagicMock() + mock_model.generate_content.return_value = mock_model_response() + mock_create_model.return_value = mock_model + + unicode_prompt = "Explain this: 你好世界 مرحبا بالعالم" + result = await tool.execute({"prompt": unicode_prompt}) + + assert len(result) == 1 + output = json.loads(result[0].text) + assert output["status"] == "success" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_review_changes.py b/tests/test_review_changes.py index a2b1220..f186e86 100644 --- a/tests/test_review_changes.py +++ b/tests/test_review_changes.py @@ -7,10 +7,7 @@ from unittest.mock import Mock, patch import pytest -from tools.review_changes import ( - ReviewChanges, - ReviewChangesRequest, -) +from tools.review_changes import ReviewChanges, ReviewChangesRequest class TestReviewChangesTool: diff --git a/tests/test_thinking_modes.py b/tests/test_thinking_modes.py index 313836f..708eae5 100644 --- a/tests/test_thinking_modes.py +++ b/tests/test_thinking_modes.py @@ -25,7 +25,7 @@ class TestThinkingModes: def test_default_thinking_modes(self): """Test that tools have correct default thinking modes""" tools = [ - (ThinkDeeperTool(), "max"), + (ThinkDeeperTool(), "high"), (AnalyzeTool(), "medium"), (ReviewCodeTool(), "medium"), (DebugIssueTool(), "medium"), @@ -156,14 +156,14 @@ class TestThinkingModes: result = await tool.execute( { "current_analysis": "Initial analysis", - # Not specifying thinking_mode, should use default (max) + # Not specifying thinking_mode, should use default (high) } ) # Verify create_model was called with default thinking_mode mock_create_model.assert_called_once() args = mock_create_model.call_args[0] - assert args[2] == "max" + assert args[2] == "high" assert "Extended Analysis by Gemini" in result[0].text diff --git a/tests/test_tools.py b/tests/test_tools.py index b4d631a..c1db573 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -7,7 +7,8 @@ from unittest.mock import Mock, patch import pytest -from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool, ChatTool +from tools import (AnalyzeTool, ChatTool, DebugIssueTool, ReviewCodeTool, + ThinkDeeperTool) class TestThinkDeeperTool: @@ -47,8 +48,11 @@ class TestThinkDeeperTool: ) assert len(result) == 1 - assert "Extended Analysis by Gemini:" in result[0].text - assert "Extended analysis" in result[0].text + # Parse the JSON response + output = json.loads(result[0].text) + assert output["status"] == "success" + assert "Extended Analysis by Gemini" in output["content"] + assert "Extended analysis" in output["content"] class TestReviewCodeTool: diff --git a/tests/test_utils.py b/tests/test_utils.py index 859d6ac..cfda13d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,7 +2,8 @@ Tests for utility functions """ -from utils import check_token_limit, estimate_tokens, read_file_content, read_files +from utils import (check_token_limit, estimate_tokens, read_file_content, + read_files) class TestFileUtils: diff --git a/tools/__init__.py b/tools/__init__.py index 1d66a1b..e9c4a7d 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -5,8 +5,8 @@ Tool implementations for Gemini MCP Server from .analyze import AnalyzeTool from .chat import ChatTool from .debug_issue import DebugIssueTool -from .review_code import ReviewCodeTool from .review_changes import ReviewChanges +from .review_code import ReviewCodeTool from .think_deeper import ThinkDeeperTool __all__ = [ diff --git a/tools/analyze.py b/tools/analyze.py index 3887f0c..dfd4da3 100644 --- a/tools/analyze.py +++ b/tools/analyze.py @@ -4,13 +4,15 @@ Analyze tool - General-purpose code and file analysis from typing import Any, Dict, List, Optional +from mcp.types import TextContent from pydantic import Field -from config import MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL +from config import TEMPERATURE_ANALYTICAL from prompts import ANALYZE_PROMPT -from utils import check_token_limit, read_files +from utils import read_files from .base import BaseTool, ToolRequest +from .models import ToolOutput class AnalyzeRequest(ToolRequest): @@ -99,18 +101,42 @@ class AnalyzeTool(BaseTool): def get_request_model(self): return AnalyzeRequest + async def execute(self, arguments: Dict[str, Any]) -> List[TextContent]: + """Override execute to check question size before processing""" + # First validate request + request_model = self.get_request_model() + request = request_model(**arguments) + + # Check question size + size_check = self.check_prompt_size(request.question) + if size_check: + return [ + TextContent( + type="text", text=ToolOutput(**size_check).model_dump_json() + ) + ] + + # Continue with normal execution + return await super().execute(arguments) + async def prepare_prompt(self, request: AnalyzeRequest) -> str: """Prepare the analysis prompt""" + # Check for prompt.txt in files + prompt_content, updated_files = self.handle_prompt_file(request.files) + + # If prompt.txt was found, use it as the question + if prompt_content: + request.question = prompt_content + + # Update request files list + if updated_files is not None: + request.files = updated_files + # Read all files file_content, summary = read_files(request.files) # Check token limits - within_limit, estimated_tokens = check_token_limit(file_content) - if not within_limit: - raise ValueError( - f"Files too large (~{estimated_tokens:,} tokens). " - f"Maximum is {MAX_CONTEXT_TOKENS:,} tokens." - ) + self._validate_token_limit(file_content, "Files") # Build analysis instructions analysis_focus = [] diff --git a/tools/base.py b/tools/base.py index 9475a66..a6f4765 100644 --- a/tools/base.py +++ b/tools/base.py @@ -13,17 +13,20 @@ Key responsibilities: - Support for clarification requests when more information is needed """ -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Literal -import os import json +import os +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Literal, Optional from google import genai from google.genai import types from mcp.types import TextContent from pydantic import BaseModel, Field -from .models import ToolOutput, ClarificationRequest +from config import MCP_PROMPT_SIZE_LIMIT +from utils.file_utils import read_file_content + +from .models import ClarificationRequest, ToolOutput class ToolRequest(BaseModel): @@ -194,6 +197,80 @@ class BaseTool(ABC): return None + def check_prompt_size(self, text: str) -> Optional[Dict[str, Any]]: + """ + Check if a text field is too large for MCP's token limits. + + The MCP protocol has a combined request+response limit of ~25K tokens. + To ensure adequate space for responses, we limit prompt input to a + configurable character limit (default 50K chars ~= 10-12K tokens). + Larger prompts are handled by having Claude save them to a file, + bypassing MCP's token constraints while preserving response capacity. + + Args: + text: The text to check + + Returns: + Optional[Dict[str, Any]]: Response asking for file handling if too large, None otherwise + """ + if text and len(text) > MCP_PROMPT_SIZE_LIMIT: + return { + "status": "requires_file_prompt", + "content": ( + f"The prompt is too large for MCP's token limits (>{MCP_PROMPT_SIZE_LIMIT:,} characters). " + "Please save the prompt text to a temporary file named 'prompt.txt' and " + "resend the request with an empty prompt string and the absolute file path included " + "in the files parameter, along with any other files you wish to share as context." + ), + "content_type": "text", + "metadata": { + "prompt_size": len(text), + "limit": MCP_PROMPT_SIZE_LIMIT, + "instructions": "Save prompt to 'prompt.txt' and include absolute path in files parameter", + }, + } + return None + + def handle_prompt_file( + self, files: Optional[List[str]] + ) -> tuple[Optional[str], Optional[List[str]]]: + """ + Check for and handle prompt.txt in the files list. + + If prompt.txt is found, reads its content and removes it from the files list. + This file is treated specially as the main prompt, not as an embedded file. + + This mechanism allows us to work around MCP's ~25K token limit by having + Claude save large prompts to a file, effectively using the file transfer + mechanism to bypass token constraints while preserving response capacity. + + Args: + files: List of file paths + + Returns: + tuple: (prompt_content, updated_files_list) + """ + if not files: + return None, files + + prompt_content = None + updated_files = [] + + for file_path in files: + # Check if the filename is exactly "prompt.txt" + # This ensures we don't match files like "myprompt.txt" or "prompt.txt.bak" + if os.path.basename(file_path) == "prompt.txt": + try: + prompt_content = read_file_content(file_path) + except Exception: + # If we can't read the file, we'll just skip it + # The error will be handled elsewhere + pass + else: + updated_files.append(file_path) + + return prompt_content, updated_files if updated_files else None + async def execute(self, arguments: Dict[str, Any]) -> List[TextContent]: """ Execute the tool with the provided arguments. @@ -378,6 +455,30 @@ class BaseTool(ABC): """ return response + def _validate_token_limit(self, text: str, context_type: str = "Context") -> None: + """ + Validate token limit and raise ValueError if exceeded. + + This centralizes the token limit check that was previously duplicated + in all prepare_prompt methods across tools. + + Args: + text: The text to check + context_type: Description of what's being checked (for error message) + + Raises: + ValueError: If text exceeds MAX_CONTEXT_TOKENS + """ + from config import MAX_CONTEXT_TOKENS + from utils import check_token_limit + + within_limit, estimated_tokens = check_token_limit(text) + if not within_limit: + raise ValueError( + f"{context_type} too large (~{estimated_tokens:,} tokens). " + f"Maximum is {MAX_CONTEXT_TOKENS:,} tokens." + ) + def create_model( self, model_name: str, temperature: float, thinking_mode: str = "medium" ): diff --git a/tools/chat.py b/tools/chat.py index 91d287b..fb55b1e 100644 --- a/tools/chat.py +++ b/tools/chat.py @@ -4,13 +4,15 @@ Chat tool - General development chat and collaborative thinking from typing import Any, Dict, List, Optional +from mcp.types import TextContent from pydantic import Field -from config import MAX_CONTEXT_TOKENS, TEMPERATURE_BALANCED +from config import TEMPERATURE_BALANCED from prompts import CHAT_PROMPT -from utils import check_token_limit, read_files +from utils import read_files from .base import BaseTool, ToolRequest +from .models import ToolOutput class ChatRequest(ToolRequest): @@ -79,22 +81,43 @@ class ChatTool(BaseTool): def get_request_model(self): return ChatRequest + async def execute(self, arguments: Dict[str, Any]) -> List[TextContent]: + """Override execute to check prompt size before processing""" + # First validate request + request_model = self.get_request_model() + request = request_model(**arguments) + + # Check prompt size + size_check = self.check_prompt_size(request.prompt) + if size_check: + return [ + TextContent( + type="text", text=ToolOutput(**size_check).model_dump_json() + ) + ] + + # Continue with normal execution + return await super().execute(arguments) + async def prepare_prompt(self, request: ChatRequest) -> str: """Prepare the chat prompt with optional context files""" - user_content = request.prompt + # Check for prompt.txt in files + prompt_content, updated_files = self.handle_prompt_file(request.files) + + # Use prompt.txt content if available, otherwise use the prompt field + user_content = prompt_content if prompt_content else request.prompt + + # Update request files list + if updated_files is not None: + request.files = updated_files # Add context files if provided if request.files: file_content, _ = read_files(request.files) - user_content = f"{request.prompt}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===" + user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ====" # Check token limits - within_limit, estimated_tokens = check_token_limit(user_content) - if not within_limit: - raise ValueError( - f"Content too large (~{estimated_tokens:,} tokens). " - f"Maximum is {MAX_CONTEXT_TOKENS:,} tokens." - ) + self._validate_token_limit(user_content, "Content") # Combine system prompt with user content full_prompt = f"""{self.get_system_prompt()} diff --git a/tools/debug_issue.py b/tools/debug_issue.py index defbb63..c7efb77 100644 --- a/tools/debug_issue.py +++ b/tools/debug_issue.py @@ -4,13 +4,15 @@ Debug Issue tool - Root cause analysis and debugging assistance from typing import Any, Dict, List, Optional +from mcp.types import TextContent from pydantic import Field -from config import MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL +from config import TEMPERATURE_ANALYTICAL from prompts import DEBUG_ISSUE_PROMPT -from utils import check_token_limit, read_files +from utils import read_files from .base import BaseTool, ToolRequest +from .models import ToolOutput class DebugIssueRequest(ToolRequest): @@ -98,8 +100,51 @@ class DebugIssueTool(BaseTool): def get_request_model(self): return DebugIssueRequest + async def execute(self, arguments: Dict[str, Any]) -> List[TextContent]: + """Override execute to check error_description and error_context size before processing""" + # First validate request + request_model = self.get_request_model() + request = request_model(**arguments) + + # Check error_description size + size_check = self.check_prompt_size(request.error_description) + if size_check: + return [ + TextContent( + type="text", text=ToolOutput(**size_check).model_dump_json() + ) + ] + + # Check error_context size if provided + if request.error_context: + size_check = self.check_prompt_size(request.error_context) + if size_check: + return [ + TextContent( + type="text", text=ToolOutput(**size_check).model_dump_json() + ) + ] + + # Continue with normal execution + return await super().execute(arguments) + async def prepare_prompt(self, request: DebugIssueRequest) -> str: """Prepare the debugging prompt""" + # Check for prompt.txt in files + prompt_content, updated_files = self.handle_prompt_file(request.files) + + # If prompt.txt was found, use it as error_description or error_context + # Priority: if error_description is empty, use it there, otherwise use as error_context + if prompt_content: + if not request.error_description or request.error_description == "": + request.error_description = prompt_content + else: + request.error_context = prompt_content + + # Update request files list + if updated_files is not None: + request.files = updated_files + # Build context sections context_parts = [ f"=== ISSUE DESCRIPTION ===\n{request.error_description}\n=== END DESCRIPTION ===" @@ -130,12 +175,7 @@ class DebugIssueTool(BaseTool): full_context = "\n".join(context_parts) # Check token limits - within_limit, estimated_tokens = check_token_limit(full_context) - if not within_limit: - raise ValueError( - f"Context too large (~{estimated_tokens:,} tokens). " - f"Maximum is {MAX_CONTEXT_TOKENS:,} tokens." - ) + self._validate_token_limit(full_context, "Context") # Combine everything full_prompt = f"""{self.get_system_prompt()} diff --git a/tools/models.py b/tools/models.py index 72dbf53..5ac50cb 100644 --- a/tools/models.py +++ b/tools/models.py @@ -2,14 +2,17 @@ Data models for tool responses and interactions """ +from typing import Any, Dict, List, Literal, Optional + from pydantic import BaseModel, Field -from typing import Literal, Optional, Dict, Any, List class ToolOutput(BaseModel): """Standardized output format for all tools""" - status: Literal["success", "error", "requires_clarification"] = "success" + status: Literal[ + "success", "error", "requires_clarification", "requires_file_prompt" + ] = "success" content: str = Field(..., description="The main content/response from the tool") content_type: Literal["text", "markdown", "json"] = "text" metadata: Optional[Dict[str, Any]] = Field(default_factory=dict) diff --git a/tools/review_changes.py b/tools/review_changes.py index 2b47f69..30e15dc 100644 --- a/tools/review_changes.py +++ b/tools/review_changes.py @@ -6,15 +6,18 @@ import os import re from typing import Any, Dict, List, Literal, Optional +from mcp.types import TextContent from pydantic import Field from config import MAX_CONTEXT_TOKENS from prompts.tool_prompts import REVIEW_CHANGES_PROMPT -from utils.file_utils import read_files -from utils.git_utils import find_git_repositories, get_git_status, run_git_command +from utils.file_utils import _get_secure_container_path, read_files +from utils.git_utils import (find_git_repositories, get_git_status, + run_git_command) from utils.token_utils import estimate_tokens from .base import BaseTool, ToolRequest +from .models import ToolOutput class ReviewChangesRequest(ToolRequest): @@ -111,10 +114,51 @@ class ReviewChanges(BaseTool): # Limit length to avoid filesystem issues return name[:100] + async def execute(self, arguments: Dict[str, Any]) -> List[TextContent]: + """Override execute to check original_request size before processing""" + # First validate request + request_model = self.get_request_model() + request = request_model(**arguments) + + # Check original_request size if provided + if request.original_request: + size_check = self.check_prompt_size(request.original_request) + if size_check: + return [ + TextContent( + type="text", text=ToolOutput(**size_check).model_dump_json() + ) + ] + + # Continue with normal execution + return await super().execute(arguments) + async def prepare_prompt(self, request: ReviewChangesRequest) -> str: """Prepare the prompt with git diff information.""" + # Check for prompt.txt in files + prompt_content, updated_files = self.handle_prompt_file(request.files) + + # If prompt.txt was found, use it as original_request + if prompt_content: + request.original_request = prompt_content + + # Update request files list + if updated_files is not None: + request.files = updated_files + + # Translate the path if running in Docker + translated_path = _get_secure_container_path(request.path) + + # Check if the path translation resulted in an error path + if translated_path.startswith("/inaccessible/"): + raise ValueError( + f"The path '{request.path}' is not accessible from within the Docker container. " + f"The Docker container can only access files within the mounted workspace. " + f"Please ensure the path is within the mounted directory or adjust your Docker volume mounts." + ) + # Find all git repositories - repositories = find_git_repositories(request.path, request.max_depth) + repositories = find_git_repositories(translated_path, request.max_depth) if not repositories: return "No git repositories found in the specified path." diff --git a/tools/review_code.py b/tools/review_code.py index a4593f5..45682a4 100644 --- a/tools/review_code.py +++ b/tools/review_code.py @@ -16,13 +16,15 @@ Key Features: from typing import Any, Dict, List, Optional +from mcp.types import TextContent from pydantic import Field -from config import MAX_CONTEXT_TOKENS, TEMPERATURE_ANALYTICAL +from config import TEMPERATURE_ANALYTICAL from prompts import REVIEW_CODE_PROMPT -from utils import check_token_limit, read_files +from utils import read_files from .base import BaseTool, ToolRequest +from .models import ToolOutput class ReviewCodeRequest(ToolRequest): @@ -128,6 +130,25 @@ class ReviewCodeTool(BaseTool): def get_request_model(self): return ReviewCodeRequest + async def execute(self, arguments: Dict[str, Any]) -> List[TextContent]: + """Override execute to check focus_on size before processing""" + # First validate request + request_model = self.get_request_model() + request = request_model(**arguments) + + # Check focus_on size if provided + if request.focus_on: + size_check = self.check_prompt_size(request.focus_on) + if size_check: + return [ + TextContent( + type="text", text=ToolOutput(**size_check).model_dump_json() + ) + ] + + # Continue with normal execution + return await super().execute(arguments) + async def prepare_prompt(self, request: ReviewCodeRequest) -> str: """ Prepare the code review prompt with customized instructions. @@ -144,16 +165,22 @@ class ReviewCodeTool(BaseTool): Raises: ValueError: If the code exceeds token limits """ + # Check for prompt.txt in files + prompt_content, updated_files = self.handle_prompt_file(request.files) + + # If prompt.txt was found, use it as focus_on + if prompt_content: + request.focus_on = prompt_content + + # Update request files list + if updated_files is not None: + request.files = updated_files + # Read all requested files, expanding directories as needed file_content, summary = read_files(request.files) # Validate that the code fits within model context limits - within_limit, estimated_tokens = check_token_limit(file_content) - if not within_limit: - raise ValueError( - f"Code too large (~{estimated_tokens:,} tokens). " - f"Maximum is {MAX_CONTEXT_TOKENS:,} tokens." - ) + self._validate_token_limit(file_content, "Code") # Build customized review instructions based on review type review_focus = [] diff --git a/tools/think_deeper.py b/tools/think_deeper.py index 7343818..ca5da2a 100644 --- a/tools/think_deeper.py +++ b/tools/think_deeper.py @@ -4,13 +4,15 @@ Think Deeper tool - Extended reasoning and problem-solving from typing import Any, Dict, List, Optional +from mcp.types import TextContent from pydantic import Field -from config import MAX_CONTEXT_TOKENS, TEMPERATURE_CREATIVE +from config import TEMPERATURE_CREATIVE from prompts import THINK_DEEPER_PROMPT -from utils import check_token_limit, read_files +from utils import read_files from .base import BaseTool, ToolRequest +from .models import ToolOutput class ThinkDeeperRequest(ToolRequest): @@ -44,7 +46,11 @@ class ThinkDeeperTool(BaseTool): "Use this when you need to extend your analysis, explore alternatives, or validate approaches. " "Perfect for: architecture decisions, complex bugs, performance challenges, security analysis. " "Triggers: 'think deeper', 'ultrathink', 'extend my analysis', 'explore alternatives'. " - "I'll challenge assumptions, find edge cases, and provide alternative solutions." + "I'll challenge assumptions, find edge cases, and provide alternative solutions. " + "IMPORTANT: Choose the appropriate thinking_mode based on task complexity - " + "'low' for quick analysis, 'medium' for standard problems, 'high' for complex issues (default), " + "'max' for extremely complex challenges requiring deepest analysis. " + "When in doubt, err on the side of a higher mode for truly deep thought and evaluation." ) def get_input_schema(self) -> Dict[str, Any]: @@ -79,7 +85,7 @@ class ThinkDeeperTool(BaseTool): "type": "string", "enum": ["minimal", "low", "medium", "high", "max"], "description": "Thinking depth: minimal (128), low (2048), medium (8192), high (16384), max (32768)", - "default": "max", + "default": "high", }, }, "required": ["current_analysis"], @@ -92,17 +98,47 @@ class ThinkDeeperTool(BaseTool): return TEMPERATURE_CREATIVE def get_default_thinking_mode(self) -> str: - """ThinkDeeper uses maximum thinking by default""" - return "max" + """ThinkDeeper uses high thinking by default""" + return "high" def get_request_model(self): return ThinkDeeperRequest + async def execute(self, arguments: Dict[str, Any]) -> List[TextContent]: + """Override execute to check current_analysis size before processing""" + # First validate request + request_model = self.get_request_model() + request = request_model(**arguments) + + # Check current_analysis size + size_check = self.check_prompt_size(request.current_analysis) + if size_check: + return [ + TextContent( + type="text", text=ToolOutput(**size_check).model_dump_json() + ) + ] + + # Continue with normal execution + return await super().execute(arguments) + async def prepare_prompt(self, request: ThinkDeeperRequest) -> str: """Prepare the full prompt for extended thinking""" + # Check for prompt.txt in files + prompt_content, updated_files = self.handle_prompt_file(request.files) + + # Use prompt.txt content if available, otherwise use the current_analysis field + current_analysis = ( + prompt_content if prompt_content else request.current_analysis + ) + + # Update request files list + if updated_files is not None: + request.files = updated_files + # Build context parts context_parts = [ - f"=== CLAUDE'S CURRENT ANALYSIS ===\n{request.current_analysis}\n=== END ANALYSIS ===" + f"=== CLAUDE'S CURRENT ANALYSIS ===\n{current_analysis}\n=== END ANALYSIS ===" ] if request.problem_context: @@ -120,12 +156,7 @@ class ThinkDeeperTool(BaseTool): full_context = "\n".join(context_parts) # Check token limits - within_limit, estimated_tokens = check_token_limit(full_context) - if not within_limit: - raise ValueError( - f"Context too large (~{estimated_tokens:,} tokens). " - f"Maximum is {MAX_CONTEXT_TOKENS:,} tokens." - ) + self._validate_token_limit(full_context, "Context") # Add focus areas instruction if specified focus_instruction = "" @@ -150,5 +181,33 @@ Please provide deep analysis that extends Claude's thinking with: return full_prompt def format_response(self, response: str, request: ThinkDeeperRequest) -> str: - """Format the response with clear attribution""" - return f"Extended Analysis by Gemini:\n\n{response}" + """Format the response with clear attribution and critical thinking prompt""" + return f"""## Extended Analysis by Gemini + +{response} + +--- + +## Critical Evaluation Required + +Claude, now that you've received Gemini's extended analysis, please: + +1. **Critically evaluate each suggestion** - Which points are truly valuable? Which might have limitations or trade-offs? + +2. **Consider technical constraints** - How do these suggestions fit with: + - Existing codebase patterns and conventions + - Performance and scalability requirements + - Security implications and best practices + - Architecture and design principles + +3. **Identify potential risks** - What could go wrong with each approach? Are there hidden complexities or edge cases? + +4. **Synthesize your final recommendation** - Based on: + - Your original analysis + - Gemini's suggestions and critiques + - Technical feasibility and correctness + - A balanced assessment of trade-offs + +5. **Formulate your conclusion** - What is the best technical solution considering all perspectives? + +Remember: Gemini's analysis is meant to challenge and extend your thinking, not replace it. Use these insights to arrive at a more robust, well-considered solution.""" diff --git a/utils/__init__.py b/utils/__init__.py index 7fee1d5..9df05bf 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -2,7 +2,8 @@ Utility functions for Gemini MCP Server """ -from .file_utils import read_file_content, read_files, expand_paths, CODE_EXTENSIONS +from .file_utils import (CODE_EXTENSIONS, expand_paths, read_file_content, + read_files) from .token_utils import check_token_limit, estimate_tokens __all__ = [ diff --git a/utils/file_utils.py b/utils/file_utils.py index 183e12e..13eee62 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -18,11 +18,14 @@ Security Model: - Symbolic links are resolved to ensure they stay within bounds """ +import logging import os from pathlib import Path -from typing import List, Optional, Tuple, Set +from typing import List, Optional, Set, Tuple -from .token_utils import estimate_tokens, MAX_CONTEXT_TOKENS +from .token_utils import MAX_CONTEXT_TOKENS, estimate_tokens + +logger = logging.getLogger(__name__) # Get workspace root for Docker path translation # When running in Docker with a mounted workspace, WORKSPACE_ROOT contains @@ -141,53 +144,71 @@ CODE_EXTENSIONS = { } -def translate_docker_path(path_str: str) -> str: +def _get_secure_container_path(path_str: str) -> str: """ - Translate host paths to container paths when running in Docker. + Securely translate host paths to container paths when running in Docker. - When running in Docker with WORKSPACE_ROOT set, this function translates - absolute paths from the host filesystem to their equivalent paths inside - the container. This enables seamless operation where Claude sends host - paths but the server runs in a container. + This function implements critical security measures: + 1. Uses os.path.realpath() to resolve symlinks before validation + 2. Validates that paths are within the mounted workspace + 3. Provides detailed logging for debugging Args: - path_str: Original path string from the client + path_str: Original path string from the client (potentially a host path) Returns: - Translated path string (unchanged if not in Docker mode) + Translated container path, or original path if not in Docker environment """ if not WORKSPACE_ROOT or not CONTAINER_WORKSPACE.exists(): - # Not running in Docker mode, return path unchanged + # Not in the configured Docker environment, no translation needed return path_str try: - # Resolve both paths to handle different path formats (forward/backslashes) - workspace_root_path = Path(WORKSPACE_ROOT).resolve() - host_path = Path(path_str).resolve() + # Use os.path.realpath for security - it resolves symlinks completely + # This prevents symlink attacks that could escape the workspace + real_workspace_root = Path(os.path.realpath(WORKSPACE_ROOT)) + real_host_path = Path(os.path.realpath(path_str)) - # Get the relative path from workspace root - relative_path = host_path.relative_to(workspace_root_path) + # Security check: ensure the path is within the mounted workspace + # This prevents path traversal attacks (e.g., ../../../etc/passwd) + relative_path = real_host_path.relative_to(real_workspace_root) - # Construct container path using forward slashes (Linux format in container) + # Construct the container path container_path = CONTAINER_WORKSPACE / relative_path - return container_path.as_posix() + + # Log the translation for debugging (but not sensitive paths) + if str(container_path) != path_str: + logger.info( + f"Translated host path to container: {path_str} -> {container_path}" + ) + + return str(container_path) except ValueError: - # Path is not within the workspace root, return unchanged - return path_str - except Exception: - # Any other error (invalid path, etc.), return unchanged - return path_str + # Path is not within the host's WORKSPACE_ROOT + # In Docker, we cannot access files outside the mounted volume + logger.warning( + f"Path '{path_str}' is outside the mounted workspace '{WORKSPACE_ROOT}'. " + f"Docker containers can only access files within the mounted directory." + ) + # Return a clear error path that will fail gracefully + return f"/inaccessible/outside/mounted/volume{path_str}" + except Exception as e: + # Log unexpected errors but don't expose internal details to clients + logger.warning(f"Path translation failed for '{path_str}': {type(e).__name__}") + # Return a clear error path that will fail gracefully + return f"/inaccessible/translation/error{path_str}" def resolve_and_validate_path(path_str: str) -> Path: """ - Validates that a path is absolute and resolves it. + Resolves, translates, and validates a path against security policies. This is the primary security function that ensures all file access - is properly sandboxed. It enforces two critical security policies: - 1. All paths must be absolute (no ambiguity) - 2. All paths must resolve to within PROJECT_ROOT (sandboxing) + is properly sandboxed. It enforces three critical policies: + 1. Translate host paths to container paths if applicable (Docker environment) + 2. All paths must be absolute (no ambiguity) + 3. All paths must resolve to within PROJECT_ROOT (sandboxing) Args: path_str: Path string (must be absolute) @@ -196,16 +217,17 @@ def resolve_and_validate_path(path_str: str) -> Path: Resolved Path object that is guaranteed to be within PROJECT_ROOT Raises: - ValueError: If path is not absolute + ValueError: If path is not absolute or otherwise invalid PermissionError: If path is outside allowed directory """ - # Translate Docker paths if necessary - path_str = translate_docker_path(path_str) + # Step 1: Translate Docker paths first (if applicable) + # This must happen before any other validation + translated_path_str = _get_secure_container_path(path_str) - # Create a Path object from the user-provided path - user_path = Path(path_str) + # Step 2: Create a Path object from the (potentially translated) path + user_path = Path(translated_path_str) - # Security Policy 1: Require absolute paths to prevent ambiguity + # Step 3: Security Policy - Require absolute paths # Relative paths could be interpreted differently depending on working directory if not user_path.is_absolute(): raise ValueError( @@ -213,14 +235,20 @@ def resolve_and_validate_path(path_str: str) -> Path: f"Received: {path_str}" ) - # Resolve the absolute path (follows symlinks, removes .. and .) + # Step 4: Resolve the absolute path (follows symlinks, removes .. and .) + # This is critical for security as it reveals the true destination of symlinks resolved_path = user_path.resolve() - # Security Policy 2: Ensure the resolved path is within PROJECT_ROOT + # Step 5: Security Policy - Ensure the resolved path is within PROJECT_ROOT # This prevents directory traversal attacks (e.g., /project/../../../etc/passwd) try: resolved_path.relative_to(PROJECT_ROOT) except ValueError: + # Provide detailed error for debugging while avoiding information disclosure + logger.warning( + f"Access denied - path outside project root. " + f"Requested: {path_str}, Resolved: {resolved_path}, Root: {PROJECT_ROOT}" + ) raise PermissionError( f"Path outside project root: {path_str}\n" f"Project root: {PROJECT_ROOT}\n" @@ -319,7 +347,17 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> Tuple[str, i path = resolve_and_validate_path(file_path) except (ValueError, PermissionError) as e: # Return error in a format that provides context to the AI - content = f"\n--- ERROR ACCESSING FILE: {file_path} ---\nError: {str(e)}\n--- END FILE ---\n" + error_msg = str(e) + # Add Docker-specific help if we're in Docker and path is inaccessible + if WORKSPACE_ROOT and CONTAINER_WORKSPACE.exists(): + # We're in Docker + error_msg = ( + f"File is outside the Docker mounted directory. " + f"When running in Docker, only files within the mounted workspace are accessible. " + f"Current mounted directory: {WORKSPACE_ROOT}. " + f"To access files in a different directory, please run Claude from that directory." + ) + content = f"\n--- ERROR ACCESSING FILE: {file_path} ---\nError: {error_msg}\n--- END FILE ---\n" return content, estimate_tokens(content) try: diff --git a/utils/git_utils.py b/utils/git_utils.py index 3c085d5..4a262f6 100644 --- a/utils/git_utils.py +++ b/utils/git_utils.py @@ -18,9 +18,8 @@ Security Considerations: """ import subprocess -from typing import Dict, List, Tuple from pathlib import Path - +from typing import Dict, List, Tuple # Directories to ignore when searching for git repositories # These are typically build artifacts, dependencies, or cache directories