From 8ac5bbb5afb3396c01edf2fbecd3e57a0bcb8366 Mon Sep 17 00:00:00 2001 From: Fahad Date: Sat, 14 Jun 2025 00:26:59 +0400 Subject: [PATCH] Fixed workspace path mapping Refactoring Improved system prompts, more generalized Home folder protection and detection Retry logic for gemini --- README.md | 2 +- config.py | 4 +- docker-compose.yml | 4 +- docs/custom_models.md | 6 +- docs/logging.md | 43 ++ docs/setup-troubleshooting.md | 175 ------ docs/troubleshooting.md | 102 ++++ log_monitor.py | 33 ++ prompts/tool_prompts.py | 558 ++++++++---------- providers/gemini.py | 92 ++- server.py | 39 +- .../test_conversation_chain_validation.py | 7 +- simulator_tests/test_ollama_custom_url.py | 2 +- simulator_tests/test_openrouter_models.py | 4 +- test_mapping.py | 4 +- tests/test_claude_continuation.py | 4 +- tests/test_collaboration.py | 18 +- tests/test_file_protection.py | 300 ++++++++++ tests/test_large_prompt_handling.py | 16 +- tools/base.py | 62 +- tools/models.py | 6 +- utils/file_utils.py | 191 +++++- 22 files changed, 1094 insertions(+), 578 deletions(-) create mode 100644 docs/logging.md delete mode 100644 docs/setup-troubleshooting.md create mode 100644 docs/troubleshooting.md create mode 100644 tests/test_file_protection.py diff --git a/README.md b/README.md index c443f5e..4d5e795 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ and review into consideration to aid with its pre-commit review. - [Complete Advanced Guide](docs/advanced-usage.md) - Model configuration, thinking modes, workflows, tool parameters - **Setup & Support** - - [Setup & Troubleshooting Guide](docs/setup-troubleshooting.md) - Testing, troubleshooting, common issues + - [Troubleshooting Guide](docs/troubleshooting.md) - Common issues and debugging steps - [License](#license) - Apache 2.0 ## Why This Server? diff --git a/config.py b/config.py index 7cceff2..a153bf9 100644 --- a/config.py +++ b/config.py @@ -45,8 +45,8 @@ MODEL_CAPABILITIES_DESC = { } # Note: When only OpenRouter is configured, these model aliases automatically map to equivalent models: -# - "flash" → "google/gemini-flash-1.5-8b" -# - "pro" → "google/gemini-pro-1.5" +# - "flash" → "google/gemini-2.5-flash-preview-05-20" +# - "pro" → "google/gemini-2.5-pro-preview-06-05" # - "o3" → "openai/gpt-4o" # - "o3-mini" → "openai/gpt-4o-mini" diff --git a/docker-compose.yml b/docker-compose.yml index bc2d36f..d8dcb3f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,10 +44,12 @@ services: # Use HOME not PWD: Claude needs access to any absolute file path, not just current project, # and Claude Code could be running from multiple locations at the same time - WORKSPACE_ROOT=${WORKSPACE_ROOT:-${HOME}} + # USER_HOME helps detect and protect against scanning the home directory root + - USER_HOME=${HOME} - LOG_LEVEL=${LOG_LEVEL:-DEBUG} - PYTHONUNBUFFERED=1 volumes: - - ${HOME:-/tmp}:/workspace:ro + - ${WORKSPACE_ROOT:-${HOME}}:/workspace:ro - mcp_logs:/tmp # Shared volume for logs - /etc/localtime:/etc/localtime:ro stdin_open: true diff --git a/docs/custom_models.md b/docs/custom_models.md index aa4c9c1..45becd6 100644 --- a/docs/custom_models.md +++ b/docs/custom_models.md @@ -46,8 +46,8 @@ The server uses `conf/custom_models.json` to map convenient aliases to both Open | `haiku` | `anthropic/claude-3-haiku` | | `gpt4o`, `4o` | `openai/gpt-4o` | | `gpt4o-mini`, `4o-mini` | `openai/gpt-4o-mini` | -| `pro`, `gemini` | `google/gemini-pro-1.5` | -| `flash` | `google/gemini-flash-1.5-8b` | +| `pro`, `gemini` | `google/gemini-2.5-pro-preview-06-05` | +| `flash` | `google/gemini-2.5-flash-preview-05-20` | | `mistral` | `mistral/mistral-large` | | `deepseek`, `coder` | `deepseek/deepseek-coder` | | `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | @@ -156,7 +156,7 @@ CUSTOM_MODEL_NAME=your-loaded-model # OpenRouter models: "Use opus for deep analysis" # → anthropic/claude-3-opus "Use sonnet to review this code" # → anthropic/claude-3-sonnet -"Use pro via zen to analyze this" # → google/gemini-pro-1.5 +"Use pro via zen to analyze this" # → google/gemini-2.5-pro-preview-06-05 "Use gpt4o via zen to analyze this" # → openai/gpt-4o "Use mistral via zen to optimize" # → mistral/mistral-large diff --git a/docs/logging.md b/docs/logging.md new file mode 100644 index 0000000..cd977ae --- /dev/null +++ b/docs/logging.md @@ -0,0 +1,43 @@ +# Logging + +## Viewing Logs in Docker + +To monitor MCP server activity in real-time: + +```bash +# View all container logs +docker-compose logs -f +``` + +## Log Files + +Logs are stored in the container's `/tmp/` directory and rotate daily at midnight, keeping 7 days of history: + +- **`mcp_server.log`** - Main server operations +- **`mcp_activity.log`** - Tool calls and conversations +- **`mcp_server_overflow.log`** - Overflow protection for large logs + +## Accessing Log Files + +To access log files directly: + +```bash +# Enter the container +docker exec -it zen-mcp-server /bin/sh + +# View current logs +cat /tmp/mcp_server.log +cat /tmp/mcp_activity.log + +# View previous days (with date suffix) +cat /tmp/mcp_server.log.2024-06-14 +``` + +## Log Level + +Set verbosity with `LOG_LEVEL` in your `.env` file or docker-compose.yml: + +```yaml +environment: + - LOG_LEVEL=DEBUG # Options: DEBUG, INFO, WARNING, ERROR +``` \ No newline at end of file diff --git a/docs/setup-troubleshooting.md b/docs/setup-troubleshooting.md deleted file mode 100644 index 1fa56cf..0000000 --- a/docs/setup-troubleshooting.md +++ /dev/null @@ -1,175 +0,0 @@ -# Setup and Troubleshooting Guide - -This guide covers platform-specific setup instructions, file path requirements, testing procedures, and troubleshooting common issues. - -## Table of Contents - -- [File Path Requirements](#file-path-requirements) -- [Testing](#testing) -- [Troubleshooting](#troubleshooting) - -## Windows Users - -**Windows users must use WSL2** - Install WSL2 with Ubuntu, then follow the same setup as Linux/macOS. All commands should be run in your WSL2 terminal. - -```powershell -# Install WSL2 (run as Administrator in PowerShell) -wsl --install -d Ubuntu -``` - -Once WSL2 is installed, the setup process is identical to Linux/macOS. - -## File Path Requirements - -**All file paths must be absolute paths.** - -When using any tool, always provide absolute paths: -``` -✅ "Use zen to analyze /Users/you/project/src/main.py" -❌ "Use zen to analyze ./src/main.py" (will be rejected) -``` - -### Security & File Access - -By default, the server allows access to files within your home directory. This is necessary for the server to work with any file you might want to analyze from Claude. - -**For Docker environments**, the `WORKSPACE_ROOT` environment variable is used to map your local directory to the internal `/workspace` directory, enabling the MCP to translate absolute file references correctly: - -```json -"env": { - "GEMINI_API_KEY": "your-key", - "WORKSPACE_ROOT": "/Users/you/project" // Maps to /workspace inside Docker -} -``` - -This allows Claude to use absolute paths that will be correctly translated between your local filesystem and the Docker container. - -## Testing - -### Unit Tests (No API Key Required) -The project includes comprehensive unit tests that use mocks and don't require a Gemini API key: - -```bash -# Run all unit tests -python -m pytest tests/ -v - -# Run with coverage -python -m pytest tests/ --cov=. --cov-report=html -``` - -### Simulation Tests (API Key Required) -To test the MCP server with comprehensive end-to-end simulation: - -```bash -# Set your API keys (at least one required) -export GEMINI_API_KEY=your-gemini-api-key-here -export OPENAI_API_KEY=your-openai-api-key-here - -# Run all simulation tests (default: uses existing Docker containers) -python communication_simulator_test.py - -# Run specific tests only -python communication_simulator_test.py --tests basic_conversation content_validation - -# Run with Docker rebuild (if needed) -python communication_simulator_test.py --rebuild-docker - -# List available tests -python communication_simulator_test.py --list-tests -``` - -The simulation tests validate: -- Basic conversation flow with continuation -- File handling and deduplication -- Cross-tool conversation threading -- Redis memory persistence -- Docker container integration - -### GitHub Actions CI/CD -The project includes GitHub Actions workflows that: - -- **✅ Run unit tests automatically** - No API key needed, uses mocks -- **✅ Test on Python 3.10, 3.11, 3.12** - Ensures compatibility -- **✅ Run linting and formatting checks** - Maintains code quality - -The CI pipeline works without any secrets and will pass all tests using mocked responses. Simulation tests require API key secrets (`GEMINI_API_KEY` and/or `OPENAI_API_KEY`) to run the communication simulator. - -## Troubleshooting - -### Docker Issues - -**"Connection failed" in Claude Desktop** -- Ensure Docker services are running: `docker compose ps` -- Check if the container name is correct: `docker ps` to see actual container names -- Verify your .env file has at least one valid API key (GEMINI_API_KEY or OPENAI_API_KEY) - -**"API key environment variable is required"** -- Edit your .env file and add at least one API key (Gemini or OpenAI) -- Restart services: `docker compose restart` - -**Container fails to start** -- Check logs: `docker compose logs zen-mcp` -- Ensure Docker has enough resources (memory/disk space) -- Try rebuilding: `docker compose build --no-cache` - -**"spawn ENOENT" or execution issues** -- Verify the container is running: `docker compose ps` -- Check that Docker Desktop is running -- Ensure WSL2 integration is enabled in Docker Desktop (Windows users) - -**Testing your Docker setup:** -```bash -# Check if services are running -docker compose ps - -# Test manual connection -docker exec -i zen-mcp-server echo "Connection test" - -# View logs -docker compose logs -f -``` - -### Common Setup Issues - -**File permission issues** -- Use `sudo chmod +x setup-docker.sh` if the script isn't executable -- Ensure your user is in the docker group: `sudo usermod -aG docker $USER` - -**WSL2 issues (Windows users)** -- Ensure you're running Windows 10 version 2004+ or Windows 11 -- Enable Docker Desktop WSL2 integration in settings -- Always run commands in WSL2 terminal, not Windows Command Prompt - -### API Key Issues - -**Invalid API key errors** -- Double-check your API keys are correct -- Ensure there are no extra spaces or characters in your .env file -- For Gemini: Verify your key works at [Google AI Studio](https://makersuite.google.com/app/apikey) -- For OpenAI: Verify your key works at [OpenAI Platform](https://platform.openai.com/api-keys) - -**Rate limiting** -- Gemini free tier has limited access to latest models -- Consider upgrading to a paid API plan for better performance -- OpenAI O3 requires sufficient credits in your account - -### Performance Issues - -**Slow responses** -- Check your internet connection -- Try using a different model (e.g., Flash instead of Pro for faster responses) -- Use lower thinking modes to save tokens and reduce response time - -**High token usage** -- Review the [thinking modes section](advanced-usage.md#thinking-modes) to optimize costs -- Use `minimal` or `low` thinking modes for simple tasks -- Consider the auto mode to let Claude choose appropriate models - -### Getting Help - -If you encounter issues not covered here: - -1. **Check the logs**: `docker compose logs -f` -2. **Verify your setup**: Run through the quickstart guide again -3. **Test with simple commands**: Start with basic functionality before complex workflows -4. **Report bugs**: Create an issue at the project repository with detailed error messages and your setup information \ No newline at end of file diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..78909c9 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,102 @@ +# Troubleshooting Guide + +## Quick Debugging Steps + +If you're experiencing issues with the Zen MCP Server, follow these steps: + +### 1. Check MCP Connection + +Open Claude Desktop and type `/mcp` to see if zen is connected: +- ✅ If zen appears in the list, the connection is working +- ❌ If not listed or shows an error, continue to step 2 + +### 2. Launch Claude with Debug Mode + +Close Claude Desktop and restart with debug logging: + +```bash +# macOS/Linux +claude --debug + +# Windows (in WSL2) +claude.exe --debug +``` + +Look for error messages in the console output, especially: +- API key errors +- Docker connection issues +- File permission errors + +### 3. Verify API Keys + +Check that your API keys are properly set: + +```bash +# Check your .env file +cat .env + +# Ensure at least one key is set: +# GEMINI_API_KEY=your-key-here +# OPENAI_API_KEY=your-key-here +``` + +If you need to update your API keys, edit the `.env` file and then run: + +```bash +./setup-docker.sh +``` + +This will validate your configuration and restart the services. + +### 4. Check Docker Logs + +View the container logs for detailed error information: + +```bash +# Check if containers are running +docker-compose ps + +# View all logs +docker-compose logs -f + +# View specific service logs +docker-compose logs -f zen-mcp +``` + +See [Logging Documentation](logging.md) for more details on accessing logs. + +### 5. Common Issues + +**"Connection failed" in Claude Desktop** +- Ensure Docker is running: `docker ps` +- Restart services: `docker-compose restart` + +**"API key environment variable is required"** +- Add your API key to the `.env` file +- Run: `./setup-docker.sh` to validate and restart + +**File path errors** +- Always use absolute paths: `/Users/you/project/file.py` +- Never use relative paths: `./file.py` + +### 6. Still Having Issues? + +If the problem persists after trying these steps: + +1. **Reproduce the issue** - Note the exact steps that cause the problem +2. **Collect logs** - Save relevant error messages from Claude debug mode and Docker logs +3. **Open a GitHub issue** with: + - Your operating system + - Error messages + - Steps to reproduce + - What you've already tried + +## Windows Users + +**Important**: Windows users must use WSL2. Install it with: + +```powershell +wsl --install -d Ubuntu +``` + +Then follow the standard setup inside WSL2. \ No newline at end of file diff --git a/log_monitor.py b/log_monitor.py index fc5218e..09adfdf 100644 --- a/log_monitor.py +++ b/log_monitor.py @@ -14,27 +14,33 @@ def monitor_mcp_activity(): log_file = "/tmp/mcp_server.log" activity_file = "/tmp/mcp_activity.log" debug_file = "/tmp/gemini_debug.log" + overflow_file = "/tmp/mcp_server_overflow.log" print(f"[{datetime.now().strftime('%H:%M:%S')}] MCP Log Monitor started") print(f"[{datetime.now().strftime('%H:%M:%S')}] Monitoring: {log_file}") print(f"[{datetime.now().strftime('%H:%M:%S')}] Activity file: {activity_file}") print(f"[{datetime.now().strftime('%H:%M:%S')}] Debug file: {debug_file}") + print(f"[{datetime.now().strftime('%H:%M:%S')}] Overflow file: {overflow_file}") + print(f"[{datetime.now().strftime('%H:%M:%S')}] Note: Logs rotate daily at midnight, keeping 7 days of history") print("-" * 60) # Track file positions and sizes for rotation detection log_pos = 0 activity_pos = 0 debug_pos = 0 + overflow_pos = 0 # Track file sizes to detect rotation log_size = 0 activity_size = 0 debug_size = 0 + overflow_size = 0 # Ensure files exist Path(log_file).touch() Path(activity_file).touch() Path(debug_file).touch() + Path(overflow_file).touch() # Initialize file sizes if os.path.exists(log_file): @@ -46,6 +52,9 @@ def monitor_mcp_activity(): if os.path.exists(debug_file): debug_size = os.path.getsize(debug_file) debug_pos = debug_size # Start from end to avoid old logs + if os.path.exists(overflow_file): + overflow_size = os.path.getsize(overflow_file) + overflow_pos = overflow_size # Start from end to avoid old logs while True: try: @@ -139,6 +148,30 @@ def monitor_mcp_activity(): if line: print(f"[{datetime.now().strftime('%H:%M:%S')}] DEBUG: {line}") + # Check overflow file for warnings/errors when main log gets too large + if os.path.exists(overflow_file): + # Check for log rotation + current_overflow_size = os.path.getsize(overflow_file) + if current_overflow_size < overflow_size: + # File was rotated - start from beginning + overflow_pos = 0 + overflow_size = current_overflow_size + print(f"[{datetime.now().strftime('%H:%M:%S')}] Overflow log rotated - restarting from beginning") + + with open(overflow_file) as f: + f.seek(overflow_pos) + new_lines = f.readlines() + overflow_pos = f.tell() + overflow_size = current_overflow_size + + for line in new_lines: + line = line.strip() + if line: + if "ERROR" in line: + print(f"[{datetime.now().strftime('%H:%M:%S')}] 🚨 OVERFLOW: {line}") + elif "WARNING" in line: + print(f"[{datetime.now().strftime('%H:%M:%S')}] ⚠️ OVERFLOW: {line}") + time.sleep(0.5) # Check every 500ms except KeyboardInterrupt: diff --git a/prompts/tool_prompts.py b/prompts/tool_prompts.py index 4b08605..a220830 100644 --- a/prompts/tool_prompts.py +++ b/prompts/tool_prompts.py @@ -2,76 +2,55 @@ System prompts for each tool """ -THINKDEEP_PROMPT = """You are a senior development partner collaborating with Claude Code on complex problems. -Claude has shared their analysis with you for deeper exploration, validation, and extension. +THINKDEEP_PROMPT = """ +ROLE +You are a senior engineering collaborator working with Claude on complex software problems. Claude will send you content—analysis, prompts, questions, ideas, or theories—to deepen, validate, and extend. -IMPORTANT: If you need additional context (e.g., related files, system architecture, requirements) -to provide thorough analysis, you MUST respond ONLY with this JSON format: -{"status": "requires_clarification", "question": "Your specific question", - "files_needed": ["architecture.md", "requirements.txt"]} +IF MORE INFORMATION IS NEEDED +If you need additional context (e.g., related files, system architecture, requirements, code snippets) to provide +thorough analysis, you MUST ONLY respond with this exact JSON (and nothing else). Do NOT ask for the same file you've +been provided unless for some reason its content is missing or incomplete: +{"status": "clarification_required", "question": "", + "files_needed": ["[file name here]", "[or some folder/]"]} -CRITICAL: First analyze the problem context to understand the technology stack, programming languages, -frameworks, and development environment. Then tailor your analysis to focus on the most relevant concerns -for that specific technology ecosystem while considering alternatives that might be more suitable. +GUIDELINES +1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints. +2. Stay on scope: avoid speculative or oversized ideas; keep suggestions practical and implementable. +3. Challenge and enrich: find gaps, question assumptions, surface hidden complexities. +4. Provide actionable next steps: concrete advice, trade-offs, and implementation tactics. +5. Use concise, direct, technical language; assume an experienced engineering audience. -Your role is to: -1. Build upon Claude's thinking - identify gaps, extend ideas, and suggest alternatives -2. Challenge assumptions constructively and identify potential issues -3. Provide concrete, actionable insights that complement Claude's analysis -4. Focus on aspects Claude might have missed or couldn't fully explore -5. Suggest implementation strategies and architectural improvements +KEY FOCUS AREAS (apply when relevant) +- Architecture & Design: modularity, patterns, API boundaries, dependencies +- Performance & Scalability: algorithm efficiency, concurrency, caching +- Security & Safety: validation, authentication/authorization, error handling, vulnerabilities +- Quality & Maintainability: readability, testing, monitoring, refactoring +- Integration & Deployment: external systems, compatibility, operational concerns -IMPORTANT: Your analysis will be critically evaluated by Claude before final decisions are made. -Focus on providing diverse perspectives, uncovering hidden complexities, and challenging assumptions -rather than providing definitive answers. Your goal is to enrich the decision-making process. +EVALUATION +Your response will be reviewed by Claude before any decision is made. Aim to enhance decision-making rather +than deliver final answers. -CRITICAL: Stay grounded to the specific project scope and requirements. Avoid speculative or overly -ambitious suggestions that deviate from the core problem being analyzed. Your insights should be -practical, actionable, and directly relevant to the current context and constraints. - -Key Analysis Areas (Apply where relevant to the specific context) - -### Technical Architecture & Design -- Code structure, organization, and modularity -- Design patterns and architectural decisions -- Interface design and API boundaries -- Dependency management and coupling - -### Performance & Scalability -- Algorithm efficiency and optimization opportunities -- Resource usage patterns and bottlenecks -- Concurrency and parallelism considerations -- Caching and data flow optimization - -### Security & Safety -- Input validation and data handling -- Authentication and authorization patterns -- Error handling and defensive programming -- Potential vulnerabilities and attack vectors - -### Quality & Maintainability -- Code clarity, readability, and documentation -- Testing strategies and coverage -- Error handling and monitoring -- Technical debt and refactoring opportunities - -### Integration & Compatibility -- External system interactions -- Backward compatibility considerations -- Cross-platform or cross-environment concerns -- Deployment and operational aspects - -Be direct and technical. Assume Claude and the user are experienced developers who want -deep, nuanced analysis rather than basic explanations. Your goal is to be the perfect -development partner that extends Claude's capabilities across diverse technology stacks.""" +REMINDERS +- Ground all insights in the current project's scope and constraints. +- If additional information is necessary, such as code snippets, files, project details, use the clarification JSON +- Prefer depth over breadth; propose alternatives ONLY when they materially improve the current approach and add value +- Your goal is to be the perfect development partner that extends Claude's capabilities and thought process +""" -CODEREVIEW_PROMPT = """You are an expert code reviewer with deep knowledge of software engineering best practices. -Your expertise spans security, performance, maintainability, and architectural patterns. +CODEREVIEW_PROMPT = """ +ROLE +You are an expert code reviewer with deep knowledge of software-engineering best practices across security, +performance, maintainability, and architecture. Your task is to review the code supplied by the user and deliver + precise, actionable feedback. -IMPORTANT: If you need additional context (e.g., related files, configuration, dependencies) to provide -a complete and accurate review, you MUST respond ONLY with this JSON format: -{"status": "requires_clarification", "question": "Your specific question", "files_needed": ["file1.py", "config.py"]} +IF MORE INFORMATION IS NEEDED +If you need additional context (e.g., related files, configuration, dependencies) to provide +a complete and accurate review, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the +same file you've been provided unless for some reason its content is missing or incomplete: +{"status": "clarification_required", "question": "", + "files_needed": ["[file name here]", "[or some folder/]"]} CRITICAL: Align your review with the user's context and expectations. Focus on issues that matter for their specific use case, constraints, and objectives. Don't provide a generic "find everything" review - tailor @@ -83,84 +62,78 @@ Focus on concrete, actionable fixes for the specific code provided. DO NOT OVERSTEP: Limit your review to the actual code submitted. Do not suggest wholesale changes, technology migrations, or improvements unrelated to the specific issues found. Remain grounded in -the immediate task of reviewing the provided code for quality, security, and correctness. +the immediate task of reviewing the provided code for quality, security, and correctness. Avoid suggesting major +refactors, migrations, or unrelated "nice-to-haves." Your review approach: -1. First, understand the user's context, expectations, and constraints +1. First, understand the user's context, expectations, constraints and objectives 2. Identify issues that matter for their specific use case, in order of severity (Critical > High > Medium > Low) -3. Provide specific, actionable fixes with code examples -4. Consider security vulnerabilities, performance issues, and maintainability relevant to their goals -5. Acknowledge good practices when you see them -6. Be constructive but thorough - don't sugarcoat serious issues that impact their objectives +3. Provide specific, actionable, precise fixes with code snippets where helpful +4. Evaluate security, performance, and maintainability as they relate to the user's goals +5. Acknowledge well-implemented aspects to reinforce good practice +6. Remain constructive and unambiguous - do not downplay serious flaws +7. Where further investigation and analysis is required, be direct and suggest which code or related file needs to be +reviewed -Review categories (adapt based on technology stack and code structure): +SEVERITY DEFINITIONS +🔴 CRITICAL: Security flaws or defects that cause crashes, data loss, or undefined behavior +🟠 HIGH: Bugs, performance bottlenecks, or anti-patterns that impair usability or scalability +🟡 MEDIUM: Maintainability concerns, code smells, test gaps +🟢 LOW: Style nits or minor improvements -IMPORTANT: First analyze the codebase to understand the technology stack, frameworks, and patterns in use. -Then identify which of these recommended categories apply and consider additional technology-specific concerns. +EVALUATION AREAS (apply as relevant to the project or code) +- Security: Authentication/authorization flaws, input validation, crypto, sensitive-data handling +- Performance & Scalability: algorithmic complexity, resource usage, concurrency, caching +- Code Quality: readability, structure, error handling, documentation +- Testing: unit/integration coverage, edge cases, reliability of test suite +- Dependencies: version health, vulnerabilities, maintenance burden +- Architecture: modularity, design patterns, separation of concerns +- Operations: logging, monitoring, configuration management -**Recommended base categories:** -- 🔴 CRITICAL: Security vulnerabilities (including but not limited to): - - Authentication/authorization flaws - - Input validation vulnerabilities - - SQL/NoSQL/Command injection risks - - Cross-site scripting (XSS) vulnerabilities - - Sensitive data exposure or leakage - - Insecure cryptographic practices - - API security issues - - Session management flaws - - Data loss risks, crashes -- 🟠 HIGH: Bugs, performance issues, bad practices -- 🟡 MEDIUM: Code smells, maintainability issues -- 🟢 LOW: Style issues, minor improvements +OUTPUT FORMAT +For each issue use: -**Key areas to evaluate based on codebase characteristics:** -- **Security patterns**: Authentication, authorization, input validation, data protection -- **Performance considerations**: Algorithm efficiency, resource usage, scaling implications -- **Code quality**: Structure, readability, maintainability, error handling -- **Testing coverage**: Unit tests, integration tests, edge cases -- **Dependencies**: Security, compatibility, maintenance burden -- **Architecture**: Design patterns, modularity, separation of concerns -- **Operational aspects**: Logging, monitoring, configuration management +[SEVERITY] File:Line – Issue description +→ Fix: Specific solution (code example only if appropriate, and only as much as needed) -Always examine the code structure and imports to identify the specific technologies in use, then focus your -review on the most relevant categories for that technology stack. +After listing issues, add: +• **Overall code quality summary** (one short paragraph) +• **Top 3 priority fixes** (quick bullets) +• **Positive aspects** worth retaining -Format each issue as: -[SEVERITY] File:Line - Issue description -→ Fix: Specific solution with code example - -Also provide: -- Summary of overall code quality -- Top 3 priority fixes -- Positive aspects worth preserving""" +Remember: If required information is missing, use the clarification JSON above instead of guessing. +""" -DEBUG_ISSUE_PROMPT = """You are an expert debugger and problem solver. Your role is to analyze errors, -trace issues to their root cause, and provide actionable solutions. +DEBUG_ISSUE_PROMPT = """ +ROLE +You are an expert debugger and problem-solver. Analyze errors, trace root causes, and propose the minimal fix required. +Bugs can ONLY be found and fixed from given code. These cannot be made up or imagined. -IMPORTANT: If you lack critical information to proceed (e.g., missing files, ambiguous error details, +IF MORE INFORMATION IS NEEDED +If you lack critical information to proceed (e.g., missing files, ambiguous error details, insufficient context), OR if the provided diagnostics (log files, crash reports, stack traces) appear irrelevant, -incomplete, or insufficient for proper analysis, you MUST respond ONLY with this JSON format: -{ - "status": "requires_clarification", - "question": "What specific information you need from Claude or the user to proceed with debugging", - "files_needed": ["file1.py", "file2.py"] -} +incomplete, or insufficient for proper analysis, you MUST respond ONLY with this JSON format (and nothing else). +Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: +{"status": "clarification_required", "question": "", + "files_needed": ["[file name here]", "[or some folder/]"]} CRITICAL: Your primary objective is to identify the root cause of the specific issue at hand and suggest the minimal fix required to resolve it. Stay focused on the main problem - avoid suggesting extensive refactoring, architectural changes, or unrelated improvements. SCOPE DISCIPLINE: Address ONLY the reported issue. Do not propose additional optimizations, code cleanup, -or improvements beyond what's needed to fix the specific problem. Resist the urge to suggest broader changes +or improvements beyond what's needed to fix the specific problem. You are a debug assistant, trying to help identify +the root cause and minimal fix for an issue. Resist the urge to suggest broader changes even if you notice other potential issues. DEBUGGING STRATEGY: 1. Read and analyze ALL provided files, error messages, logs, and diagnostic information thoroughly 2. Understand any requirements, constraints, or context given in the problem description -3. Correlate diagnostics with code to identify the precise failure point -4. Work backwards from symptoms to find the underlying cause -5. Focus exclusively on resolving the reported issue with the simplest effective solution +3. If any information is incomplete or not enough, you must respond with the JSON format above and nothing else. +4. Correlate diagnostics and any given logs or error statements with code to identify the precise failure point +5. Work backwards from symptoms to find the underlying root cause +6. Focus exclusively on resolving the reported issue with the simplest effective solution Your debugging approach should generate focused hypotheses ranked by likelihood, with emphasis on identifying the exact root cause and implementing minimal, targeted fixes. @@ -173,254 +146,205 @@ introduce new issues or break existing functionality. Consider: - What potential side effects or unintended consequences might occur Review your suggested changes carefully and validate they solve ONLY the specific issue without causing regressions. -Use this format for structured debugging analysis: +OUTPUT FORMAT ## Summary -Brief description of the issue and its impact. +Brief description of the problem and its impact. ## Hypotheses (Ranked by Likelihood) ### 1. [HYPOTHESIS NAME] (Confidence: High/Medium/Low) -**Root Cause:** Specific technical explanation of what's causing the issue -**Evidence:** What in the error/context supports this hypothesis -**Correlation:** How diagnostics/symptoms directly point to this cause -**Validation:** Immediate action to test/validate this hypothesis -**Minimal Fix:** Smallest, most targeted change to resolve this specific issue -**Regression Check:** Analysis of how this fix might affect other parts of the system and confirmation it won't -introduce new issues +**Root Cause:** Technical explanation. +**Evidence:** Logs or code clues supporting this hypothesis. +**Correlation:** How symptoms map to the cause. +**Validation:** Quick test to confirm. +**Minimal Fix:** Smallest change to resolve the issue. +**Regression Check:** Why this fix is safe. -### 2. [HYPOTHESIS NAME] (Confidence: High/Medium/Low) -[Same format...] +### 2. [HYPOTHESIS NAME] (Confidence: …) +[Repeat format as above] ## Immediate Actions -Steps to take regardless of root cause (e.g., error handling, logging) +Steps to take regardless of which hypothesis is correct (e.g., extra logging). ## Prevention Strategy -*Only provide if specifically requested - focus on immediate fix first* -Minimal steps to prevent this specific issue from recurring, directly related to the root cause identified. -**Targeted recommendations:** Specific to the exact problem resolved, not general best practices""" +*Provide only if explicitly requested.* +Targeted measures to prevent this exact issue from recurring. +""" -ANALYZE_PROMPT = """You are an expert software analyst helping developers understand and work with code. -Your role is to provide deep, insightful analysis that helps developers make informed decisions. +ANALYZE_PROMPT = """ +ROLE +You are a senior software analyst performing a holistic technical audit of the given code or project. Your mission is +to help engineers understand how a codebase aligns with long-term goals, architectural soundness, scalability, +and maintainability—not just spot routine code-review issues. -IMPORTANT: If you need additional context (e.g., dependencies, configuration files, test files) -to provide complete analysis, you MUST respond ONLY with this JSON format: -{"status": "requires_clarification", "question": "Your specific question", "files_needed": ["package.json", "tests/"]} +IF MORE INFORMATION IS NEEDED +If you need additional context (e.g., dependencies, configuration files, test files) to provide complete analysis, you +MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless +for some reason its content is missing or incomplete: +{"status": "clarification_required", "question": "", + "files_needed": ["[file name here]", "[or some folder/]"]} -CRITICAL: First analyze the codebase to understand the technology stack, programming languages, frameworks, -project type, and development patterns. Then tailor your analysis to focus on the most relevant concerns and -best practices for that specific technology ecosystem. +SCOPE & FOCUS +• Understand the code's purpose and architecture and the overall scope and scale of the project +• Identify strengths, risks, and strategic improvement areas that affect future development +• Avoid line-by-line bug hunts or minor style critiques—those are covered by CodeReview +• Recommend practical, proportional changes; no "rip-and-replace" proposals unless the architecture is untenable -STAY GROUNDED: Focus exclusively on analyzing the provided code and files. Do not suggest major architectural -changes, technology replacements, or extensive refactoring unless directly related to specific issues identified. -Keep recommendations practical and proportional to the scope of the analysis request. +ANALYSIS STRATEGY +1. Map the tech stack, frameworks, deployment model, and constraints +2. Determine how well current architecture serves stated business and scaling goals +3. Surface systemic risks (tech debt hot-spots, brittle modules, growth bottlenecks) +4. Highlight opportunities for strategic refactors or pattern adoption that yield high ROI +5. Provide clear, actionable insights with just enough detail to guide decision-making -Your analysis should: -1. Understand the code's purpose and architecture -2. Identify patterns and anti-patterns -3. Assess code quality and maintainability -4. Find potential issues or improvements -5. Provide actionable insights +KEY DIMENSIONS (apply as relevant) +• **Architectural Alignment** – layering, domain boundaries, CQRS/eventing, micro-vs-monolith fit +• **Scalability & Performance Trajectory** – data flow, caching strategy, concurrency model +• **Maintainability & Tech Debt** – module cohesion, coupling, code ownership, documentation health +• **Security & Compliance Posture** – systemic exposure points, secrets management, threat surfaces +• **Operational Readiness** – observability, deployment pipeline, rollback/DR strategy +• **Future Proofing** – ease of feature addition, language/version roadmap, community support -## Key Analysis Areas (Apply based on project context) +DELIVERABLE FORMAT -### Code Structure & Organization -- Module/package organization and boundaries -- Dependency management and coupling -- Interface design and API contracts -- Configuration and environment handling +## Executive Overview +One paragraph summarizing architecture fitness, key risks, and standout strengths. -### Quality & Maintainability -- Code clarity, readability, and documentation -- Error handling and defensive programming -- Testing strategies and coverage -- Performance characteristics and optimization opportunities +## Strategic Findings (Ordered by Impact) -### Project Architecture -- Design patterns and architectural decisions -- Data flow and state management -- Integration points and external dependencies -- Deployment and operational considerations +### 1. [FINDING NAME] +**Insight:** Very concise statement of what matters and why. +**Evidence:** Specific modules/files/metrics/code illustrating the point. +**Impact:** How this affects scalability, maintainability, or business goals. +**Recommendation:** Actionable next step (e.g., adopt pattern X, consolidate service Y). +**Effort vs. Benefit:** Relative estimate (Low/Medium/High effort; Low/Medium/High payoff). -Focus on (adapt priority based on project type and technology): +### 2. [FINDING NAME] +[Repeat format...] -1. **Security considerations (evaluate relevance to the technology stack):** - - Authentication and authorization patterns - - Input validation and sanitization - - Data handling and exposure risks - - Dependency vulnerabilities - - Cryptographic implementations - - API security design -2. **Architecture and design patterns (technology-appropriate):** - - Code structure and organization - - Design patterns and architectural decisions - - Modularity and separation of concerns - - Dependency management and coupling -3. **Performance and scalability (context-relevant):** - - Algorithm efficiency - - Resource usage patterns - - Concurrency and parallelism - - Caching strategies - - Database query optimization -4. **Code quality and maintainability:** - - Code clarity and readability - - Error handling patterns - - Logging and monitoring - - Testing coverage and quality - - Documentation completeness -5. **Technology-specific best practices:** - - Language idioms and conventions - - Framework usage patterns - - Platform-specific optimizations - - Community standards adherence +## Quick Wins +Bullet list of low-effort changes offering immediate value. -Be thorough but concise. Prioritize the most important findings and always provide -concrete examples and suggestions for improvement tailored to the specific technology stack.""" +## Long-Term Roadmap Suggestions +High-level guidance for phased improvements (optional—include only if explicitly requested). + +Remember: focus on system-level insights that inform strategic decisions; leave granular bug fixing and style nits to +the codereview tool. +""" -CHAT_PROMPT = """You are a senior development partner and collaborative thinking companion to Claude Code. -You excel at brainstorming, validating ideas, and providing thoughtful second opinions on technical decisions. +CHAT_PROMPT = """ +You are a senior engineering thought-partner collaborating with Claude. Your mission is to brainstorm, validate ideas, +and offer well-reasoned second opinions on technical decisions. -IMPORTANT: If Claude is discussing specific code, functions, or project components, and you need additional -context (e.g., related files, configuration, dependencies, test files) to provide meaningful collaboration, -you MUST respond ONLY with this JSON format: -{"status": "requires_clarification", "question": "Your specific question", "files_needed": ["file1.py", "config.py"]} +IF MORE INFORMATION IS NEEDED +If Claude is discussing specific code, functions, or project components that was not given as part of the context, +and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful +collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been +provided unless for some reason its content is missing or incomplete: +{"status": "clarification_required", "question": "", + "files_needed": ["[file name here]", "[or some folder/]"]} -CRITICAL: Always understand the technology stack, programming languages, -frameworks, and development environment being discussed. Then ground your collaboration within that -specific technology ecosystem - focus on approaches, patterns, and solutions that are relevant and -appropriate for the actual project scope and constraints. Avoid suggesting technologies, frameworks, -or approaches that deviate significantly from the existing stack unless there's a compelling technical reason. -Focus on practical solutions that work within the current environment and constraints. +SCOPE & FOCUS +• Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints. +• Recommend new technologies or patterns ONLY with a clear, compelling benefit that aligns with stated goals. +• Keep proposals practical and implementable; avoid speculative or off-stack detours. -Your collaborative approach: -1. Engage deeply with shared ideas - build upon, extend, and explore alternatives within the project context -2. Think through edge cases, failure modes, and unintended consequences specific to the technology stack -3. Provide balanced perspectives considering trade-offs and implications relevant to the current environment -4. Challenge assumptions constructively while respecting the existing approach and technology choices -5. Offer concrete examples and actionable insights that fit within the project's scope and constraints +COLLABORATION APPROACH +1. Engage deeply with Claude's input - extend, refine, and explore alternatives within the existing context. +2. Examine edge cases, failure modes, and unintended consequences specific to the code / stack in use. +3. Present balanced perspectives, outlining trade-offs and their implications. +4. Challenge assumptions constructively while respecting current design choices and goals. +5. Provide concrete examples and actionable next steps that fit within scope. Direct, achievable next-steps where +needed. -When brainstorming or discussing: -- Consider multiple angles and approaches that are compatible with the existing technology stack -- Identify potential pitfalls early, especially those relevant to the specific frameworks/languages in use -- Suggest creative solutions and alternatives that work within the current project constraints -- Think about scalability, maintainability, and real-world usage within the existing architecture -- Draw from industry best practices and patterns specific to the technologies being used -- Focus on solutions that can be implemented with the current tools and infrastructure +BRAINSTORMING GUIDELINES +• Offer multiple viable strategies compatible with the current environment but keep it to the point. +• Suggest creative solutions and alternatives that work within the current project constraints, scope and limitations +• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice +• Evaluate scalability, maintainability, and operational realities inside the existing architecture and current +framework. +• Reference industry best practices relevant to the technologies in use +• Communicate concisely and technically, assuming an experienced engineering audience. -Always approach discussions as a peer - be direct, technical, and thorough. Your goal is to be -the ideal thinking partner who helps explore ideas deeply, validates approaches, and uncovers -insights that might be missed in solo analysis. Think step by step through complex problems -and don't hesitate to explore tangential but relevant considerations that remain within the -project's technological and architectural boundaries.""" +REMEMBER +Act as a peer, not a lecturer. Aim for depth over breadth, stay within project boundaries, and help the team +reach sound, actionable decisions. +""" -PRECOMMIT_PROMPT = """You are an expert code change analyst specializing in pre-commit review of git diffs. -Your role is to act as a seasoned senior developer performing a final review before code is committed. +PRECOMMIT_PROMPT = """ +ROLE +You are an expert pre-commit reviewer. Analyse git diffs as a senior developer giving a final sign-off to production. -IMPORTANT: If you need additional context (e.g., related files not in the diff, test files, -configuration) -to provide thorough analysis, you MUST respond ONLY with this JSON format: -{"status": "requires_clarification", "question": "Your specific question", - "files_needed": ["related_file.py", "tests/"]} +IF MORE INFORMATION IS NEEDED +If you need additional context (e.g., related files not in the diff, test files, configuration) to provide thorough +analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON +format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is +missing or incomplete: +{"status": "clarification_required", "question": "", + "files_needed": ["[file name here]", "[or some folder/]"]} -You will receive: -1. Git diffs showing staged/unstaged changes or branch comparisons -2. The original request describing what should be implemented -3. File paths and repository structure context +INPUTS PROVIDED +1. Git diff (staged or branch comparison) +2. Original request / acceptance criteria or some context around what changed +3. File names and related code -CRITICAL: First analyze the changes to understand the technology stack, frameworks, and patterns in use. -Then tailor your review to focus on the most relevant concerns for that specific technology stack while -ignoring categories that don't apply. +SCOPE & FOCUS +• Review **only** the changes in the diff and the given code +• From the diff, infer what got changed and why, determine if the changes make logical sense +• Ensure they correctly implement the request, are secure (where applicable), efficient, and maintainable and do not +cause potential regressions +• Do **not** propose broad refactors or off-scope improvements. -SCOPE LIMITATION: Review ONLY the specific changes shown in the diff. Do not suggest broader refactoring, -architectural changes, or improvements outside the scope of what's being committed. Focus on ensuring the -changes are correct, secure, and don't introduce issues. +REVIEW METHOD +1. Identify tech stack, frameworks, and patterns present in the diff. +2. Evaluate changes against the original request for completeness and intent alignment. +3. Detect issues, prioritising by severity (**Critical → High → Medium → Low**). +4. Highlight incomplete changes, or changes that would cause bugs, crashes or data loss or race conditions +5. Provide precise fixes or improvements; every issue must include a clear remediation. +6. Acknowledge good patterns to reinforce best practice. -Your review should focus on applicable areas from the following categories: +CORE ANALYSIS (adapt to the diff and stack) +• **Security** – injection risks, auth/authz flaws, sensitive-data exposure, insecure dependencies, memory safety +• **Bugs & Logic Errors** – off-by-one, null refs, race conditions, incorrect branching +• **Performance** – inefficient algorithms, resource leaks, blocking operations +• **Code Quality** – DRY violations, complexity, SOLID adherence -## Core Analysis (Adapt based on code context and technology) -- **Security Vulnerabilities (CRITICAL PRIORITY - evaluate which apply to this codebase):** - - Injection flaws (SQL, NoSQL, OS command, LDAP, XPath, etc.) - if data persistence/system calls present - - Authentication and authorization weaknesses - if auth mechanisms present - - Sensitive data exposure (passwords, tokens, PII) - if handling sensitive data - - XML/XXE vulnerabilities - if XML processing present - - Broken access control - if access control mechanisms present - - Security misconfiguration - if configuration management present - - Cross-site scripting (XSS) - if web interfaces present - - Insecure deserialization - if serialization/deserialization present - - Using components with known vulnerabilities - if third-party dependencies present - - Insufficient logging and monitoring - if production/deployed code - - API security issues - if API endpoints present - - Memory safety issues - if manual memory management (C/C++/Rust/etc.) - - **Review ALL code changes, not just new additions** -- **Bugs & Logic Errors:** Off-by-one errors, null references, race conditions, incorrect assumptions -- **Performance Issues:** Inefficient algorithms, resource leaks, blocking operations - (adapt to application type) -- **Code Quality:** DRY violations, SOLID principle adherence, complexity - (universal but consider language idioms) +ADDITIONAL ANALYSIS (apply only when relevant) +• Language/runtime concerns – memory management, concurrency, exception handling +• System/integration – config handling, external calls, operational impact +• Testing – coverage gaps for new logic +• Change-specific pitfalls – unused new functions, partial enum updates, scope creep, risky deletions +• Determine if there are any new dependencies added but not declared, or new functionality added but not used +• Determine unintended side effects: could changes in file_A break module_B even if module_B wasn't changed? +• Flag changes unrelated to the original request that may introduce needless complexity or an anti-pattern +• Determine if there are code removal risks: was removed code truly dead, or could removal break functionality? +• Missing documentation around new methods / parameters, or missing comments around complex logic and code that +requires it -## Additional Analysis Areas (Apply only if relevant to the specific changes) -**Consider these categories based on what the code changes actually involve:** - -### Language & Runtime Concerns -- Memory management and resource handling -- Concurrency and thread safety -- Error handling and exception management -- Type safety and null handling -- Performance implications - -### System & Integration -- Security patterns and data protection -- External system interactions -- Configuration and environment handling -- Testing coverage and quality -- Deployment and operational impact - -## Change-Specific Analysis (Your Unique Value) -1. **Alignment with Intent:** Does this diff correctly and completely implement the original request? - Flag any missed requirements. - -2. **Incomplete Changes:** - - New functions added but never called - - API endpoints defined but no client code - - Enums/constants added but switch/if statements not updated - - Dependencies added but not properly used - -3. **Test Coverage Gaps:** Flag new business logic lacking corresponding test changes - -4. **Unintended Side Effects:** Could changes in file_A break module_B even if module_B wasn't changed? - -5. **Documentation Mismatches:** Were docstrings/docs updated for changed function signatures? - -6. **Configuration Risks:** What are downstream impacts of config changes? - -7. **Scope Creep:** Flag changes unrelated to the original request - -8. **Code Removal Risks:** Was removed code truly dead, or could removal break functionality? - -## Output Format +OUTPUT FORMAT ### Repository Summary -For each repository with changes: - -**Repository: /path/to/repo** -- Status: X files changed -- Overall: Brief assessment and critical issues count +**Repository:** /path/to/repo +- Files changed: X +- Overall assessment: brief statement with critical issue count ### Issues by Severity -[CRITICAL] Descriptive title +[CRITICAL] Short title - File: path/to/file.py:line -- Description: Clear explanation -- Fix: Specific solution with code +- Description: what & why +- Fix: specific change (code snippet if helpful) -[HIGH] Descriptive title -... +[HIGH] ... ### Recommendations - Top priority fixes before commit -- Suggestions for improvement -- Good practices to preserve +- Notable positives to keep -Be thorough but actionable. Every issue must have a clear fix. Acknowledge good changes when you see them.""" +Be thorough yet actionable. Focus on the diff, map every issue to a concrete fix, and keep comments aligned + with the stated implementation goals. Your goal is to help flag anything that could potentially slip through + and break critical, production quality code. +""" diff --git a/providers/gemini.py b/providers/gemini.py index 588ad2b..b63195e 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -1,5 +1,6 @@ """Gemini model provider implementation.""" +import time from typing import Optional from google import genai @@ -117,35 +118,74 @@ class GeminiModelProvider(ModelProvider): actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode]) generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget) - try: - # Generate content - response = self.client.models.generate_content( - model=resolved_name, - contents=full_prompt, - config=generation_config, - ) + # Retry logic with exponential backoff + max_retries = 2 # Total of 2 attempts (1 initial + 1 retry) + base_delay = 1.0 # Start with 1 second delay - # Extract usage information if available - usage = self._extract_usage(response) + last_exception = None - return ModelResponse( - content=response.text, - usage=usage, - model_name=resolved_name, - friendly_name="Gemini", - provider=ProviderType.GOOGLE, - metadata={ - "thinking_mode": thinking_mode if capabilities.supports_extended_thinking else None, - "finish_reason": ( - getattr(response.candidates[0], "finish_reason", "STOP") if response.candidates else "STOP" - ), - }, - ) + for attempt in range(max_retries): + try: + # Generate content + response = self.client.models.generate_content( + model=resolved_name, + contents=full_prompt, + config=generation_config, + ) - except Exception as e: - # Log error and re-raise with more context - error_msg = f"Gemini API error for model {resolved_name}: {str(e)}" - raise RuntimeError(error_msg) from e + # Extract usage information if available + usage = self._extract_usage(response) + + return ModelResponse( + content=response.text, + usage=usage, + model_name=resolved_name, + friendly_name="Gemini", + provider=ProviderType.GOOGLE, + metadata={ + "thinking_mode": thinking_mode if capabilities.supports_extended_thinking else None, + "finish_reason": ( + getattr(response.candidates[0], "finish_reason", "STOP") if response.candidates else "STOP" + ), + }, + ) + + except Exception as e: + last_exception = e + + # Check if this is a retryable error + error_str = str(e).lower() + is_retryable = any( + term in error_str + for term in [ + "timeout", + "connection", + "network", + "temporary", + "unavailable", + "retry", + "429", + "500", + "502", + "503", + "504", + ] + ) + + # If this is the last attempt or not retryable, give up + if attempt == max_retries - 1 or not is_retryable: + break + + # Calculate delay with exponential backoff + delay = base_delay * (2**attempt) + + # Log retry attempt (could add logging here if needed) + # For now, just sleep and retry + time.sleep(delay) + + # If we get here, all retries failed + error_msg = f"Gemini API error for model {resolved_name} after {max_retries} attempts: {str(last_exception)}" + raise RuntimeError(error_msg) from last_exception def count_tokens(self, text: str, model_name: str) -> int: """Count tokens for the given text using Gemini's tokenizer.""" diff --git a/server.py b/server.py index 4e5a8f0..7a0da6b 100644 --- a/server.py +++ b/server.py @@ -24,7 +24,7 @@ import os import sys import time from datetime import datetime -from logging.handlers import RotatingFileHandler +from logging.handlers import RotatingFileHandler, TimedRotatingFileHandler from typing import Any from mcp.server import Server @@ -81,21 +81,50 @@ for handler in logging.getLogger().handlers: handler.setFormatter(LocalTimeFormatter(log_format)) # Add rotating file handler for Docker log monitoring + try: - # Main server log with rotation (10MB max, keep 2 files) - file_handler = RotatingFileHandler("/tmp/mcp_server.log", maxBytes=10 * 1024 * 1024, backupCount=2) + # Main server log with daily rotation (keep 7 days of logs) + # Using 'midnight' interval rotates at midnight each day + # Filename will have date suffix like mcp_server.log.2024-06-14 + file_handler = TimedRotatingFileHandler( + "/tmp/mcp_server.log", + when="midnight", # Rotate at midnight + interval=1, # Every 1 day + backupCount=7, # Keep 7 days of logs + encoding="utf-8", + ) file_handler.setLevel(getattr(logging, log_level, logging.INFO)) file_handler.setFormatter(LocalTimeFormatter(log_format)) + # Add suffix pattern for rotated files + file_handler.suffix = "%Y-%m-%d" logging.getLogger().addHandler(file_handler) - # Create a special logger for MCP activity tracking with rotation + # Create a special logger for MCP activity tracking with daily rotation mcp_logger = logging.getLogger("mcp_activity") - mcp_file_handler = RotatingFileHandler("/tmp/mcp_activity.log", maxBytes=10 * 1024 * 1024, backupCount=2) + mcp_file_handler = TimedRotatingFileHandler( + "/tmp/mcp_activity.log", + when="midnight", # Rotate at midnight + interval=1, # Every 1 day + backupCount=7, # Keep 7 days of logs + encoding="utf-8", + ) mcp_file_handler.setLevel(logging.INFO) mcp_file_handler.setFormatter(LocalTimeFormatter("%(asctime)s - %(message)s")) + mcp_file_handler.suffix = "%Y-%m-%d" mcp_logger.addHandler(mcp_file_handler) mcp_logger.setLevel(logging.INFO) + # Also keep a size-based rotation as backup (100MB max per file) + # This prevents any single day's log from growing too large + from logging.handlers import RotatingFileHandler + + size_handler = RotatingFileHandler( + "/tmp/mcp_server_overflow.log", maxBytes=100 * 1024 * 1024, backupCount=3 # 100MB + ) + size_handler.setLevel(logging.WARNING) # Only warnings and errors + size_handler.setFormatter(LocalTimeFormatter(log_format)) + logging.getLogger().addHandler(size_handler) + except Exception as e: print(f"Warning: Could not set up file logging: {e}", file=sys.stderr) diff --git a/simulator_tests/test_conversation_chain_validation.py b/simulator_tests/test_conversation_chain_validation.py index af6eb11..639e753 100644 --- a/simulator_tests/test_conversation_chain_validation.py +++ b/simulator_tests/test_conversation_chain_validation.py @@ -98,6 +98,10 @@ class ConversationChainValidationTest(BaseSimulatorTest): '''Simple test function for conversation continuity testing''' return "Hello from conversation chain test" +def buggy_function(x, y): + '''Function with a bug - incorrect operator''' + return x - y # BUG: Should be x + y for addition + class TestClass: def method(self): return "Method in test class" @@ -223,7 +227,8 @@ class TestClass: response_a1_branch, continuation_id_a1_branch = self.call_mcp_tool( "debug", { - "prompt": "Let's debug this from a different angle now.", + "prompt": "buggy_function(5, 3) returns 2 but should return 8 for addition", + "error_context": "Unit test failure: expected buggy_function(5, 3) to return 8 (5+3) but got 2. Function appears to be subtracting instead of adding.", "files": [test_file_path], "continuation_id": continuation_id_a1, # Go back to original! "model": "flash", diff --git a/simulator_tests/test_ollama_custom_url.py b/simulator_tests/test_ollama_custom_url.py index 7a58a84..9451759 100644 --- a/simulator_tests/test_ollama_custom_url.py +++ b/simulator_tests/test_ollama_custom_url.py @@ -227,7 +227,7 @@ if __name__ == "__main__": ] # Special handling for clarification requests from local models - if "requires_clarification" in response.lower(): + if "clarification_required" in response.lower(): if files_provided: # If we provided actual files, clarification request is a FAILURE self.logger.error( diff --git a/simulator_tests/test_openrouter_models.py b/simulator_tests/test_openrouter_models.py index 1da13d4..63316d7 100644 --- a/simulator_tests/test_openrouter_models.py +++ b/simulator_tests/test_openrouter_models.py @@ -66,7 +66,7 @@ class OpenRouterModelsTest(BaseSimulatorTest): self.setup_test_files() # Test 1: Flash alias mapping to OpenRouter - self.logger.info(" 1: Testing 'flash' alias (should map to google/gemini-flash-1.5-8b)") + self.logger.info(" 1: Testing 'flash' alias (should map to google/gemini-2.5-flash-preview-05-20)") response1, continuation_id = self.call_mcp_tool( "chat", @@ -86,7 +86,7 @@ class OpenRouterModelsTest(BaseSimulatorTest): self.logger.info(f" ✅ Got continuation_id: {continuation_id}") # Test 2: Pro alias mapping to OpenRouter - self.logger.info(" 2: Testing 'pro' alias (should map to google/gemini-pro-1.5)") + self.logger.info(" 2: Testing 'pro' alias (should map to google/gemini-2.5-pro-preview-06-05)") response2, _ = self.call_mcp_tool( "chat", diff --git a/test_mapping.py b/test_mapping.py index 4bb6ea2..1ccde29 100644 --- a/test_mapping.py +++ b/test_mapping.py @@ -14,14 +14,14 @@ class MappingTest(BaseSimulatorTest): def test_mapping(self): """Test model alias mapping""" - # Test with 'flash' alias - should map to google/gemini-flash-1.5-8b + # Test with 'flash' alias - should map to google/gemini-2.5-flash-preview-05-20 print("\nTesting 'flash' alias mapping...") response, continuation_id = self.call_mcp_tool( "chat", { "prompt": "Say 'Hello from Flash model!'", - "model": "flash", # Should be mapped to google/gemini-flash-1.5-8b + "model": "flash", # Should be mapped to google/gemini-2.5-flash-preview-05-20 "temperature": 0.1, }, ) diff --git a/tests/test_claude_continuation.py b/tests/test_claude_continuation.py index 4f2a20b..5fd81d7 100644 --- a/tests/test_claude_continuation.py +++ b/tests/test_claude_continuation.py @@ -186,8 +186,8 @@ class TestClaudeContinuationOffers: offer = response_data["continuation_offer"] assert "continuation_id" in offer assert offer["remaining_turns"] == MAX_CONVERSATION_TURNS - 1 - assert "You have" in offer["message_to_user"] - assert "more exchange(s) available" in offer["message_to_user"] + assert "You have" in offer["note"] + assert "more exchange(s) available" in offer["note"] @patch("utils.conversation_memory.get_redis_client") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) diff --git a/tests/test_collaboration.py b/tests/test_collaboration.py index e2e5254..80d4ddf 100644 --- a/tests/test_collaboration.py +++ b/tests/test_collaboration.py @@ -31,7 +31,7 @@ class TestDynamicContextRequests: # Mock model to return a clarification request clarification_json = json.dumps( { - "status": "requires_clarification", + "status": "clarification_required", "question": "I need to see the package.json file to understand dependencies", "files_needed": ["package.json", "package-lock.json"], } @@ -56,7 +56,7 @@ class TestDynamicContextRequests: # Parse the response response_data = json.loads(result[0].text) - assert response_data["status"] == "requires_clarification" + assert response_data["status"] == "clarification_required" assert response_data["content_type"] == "json" # Parse the clarification request @@ -100,7 +100,7 @@ class TestDynamicContextRequests: @patch("tools.base.BaseTool.get_model_provider") async def test_malformed_clarification_request_treated_as_normal(self, mock_get_provider, analyze_tool): """Test that malformed JSON clarification requests are treated as normal responses""" - malformed_json = '{"status": "requires_clarification", "prompt": "Missing closing brace"' + malformed_json = '{"status": "clarification_required", "prompt": "Missing closing brace"' mock_provider = create_mock_provider() mock_provider.get_provider_type.return_value = Mock(value="google") @@ -125,7 +125,7 @@ class TestDynamicContextRequests: """Test clarification request with suggested next action""" clarification_json = json.dumps( { - "status": "requires_clarification", + "status": "clarification_required", "question": "I need to see the database configuration to diagnose the connection error", "files_needed": ["config/database.yml", "src/db.py"], "suggested_next_action": { @@ -160,7 +160,7 @@ class TestDynamicContextRequests: assert len(result) == 1 response_data = json.loads(result[0].text) - assert response_data["status"] == "requires_clarification" + assert response_data["status"] == "clarification_required" clarification = json.loads(response_data["content"]) assert "suggested_next_action" in clarification @@ -223,7 +223,7 @@ class TestCollaborationWorkflow: # Mock Gemini to request package.json when asked about dependencies clarification_json = json.dumps( { - "status": "requires_clarification", + "status": "clarification_required", "question": "I need to see the package.json file to analyze npm dependencies", "files_needed": ["package.json", "package-lock.json"], } @@ -247,7 +247,7 @@ class TestCollaborationWorkflow: response = json.loads(result[0].text) assert ( - response["status"] == "requires_clarification" + response["status"] == "clarification_required" ), "Should request clarification when asked about dependencies without package files" clarification = json.loads(response["content"]) @@ -262,7 +262,7 @@ class TestCollaborationWorkflow: # Step 1: Initial request returns clarification needed clarification_json = json.dumps( { - "status": "requires_clarification", + "status": "clarification_required", "question": "I need to see the configuration file to understand the connection settings", "files_needed": ["config.py"], } @@ -284,7 +284,7 @@ class TestCollaborationWorkflow: ) response1 = json.loads(result1[0].text) - assert response1["status"] == "requires_clarification" + assert response1["status"] == "clarification_required" # Step 2: Claude would provide additional context and re-invoke # This simulates the second call with more context diff --git a/tests/test_file_protection.py b/tests/test_file_protection.py new file mode 100644 index 0000000..434e432 --- /dev/null +++ b/tests/test_file_protection.py @@ -0,0 +1,300 @@ +""" +Test file protection mechanisms to ensure MCP doesn't scan: +1. Its own directory +2. User's home directory root +3. Excluded directories +""" + +import os +from pathlib import Path +from unittest.mock import patch + +from utils.file_utils import ( + MCP_SIGNATURE_FILES, + expand_paths, + get_user_home_directory, + is_home_directory_root, + is_mcp_directory, +) + + +class TestMCPDirectoryDetection: + """Test MCP self-detection to prevent scanning its own code.""" + + def test_detect_mcp_directory_with_all_signatures(self, tmp_path): + """Test detection when all signature files are present.""" + # Create a fake MCP directory with signature files + for sig_file in list(MCP_SIGNATURE_FILES)[:4]: # Use 4 files + if "/" in sig_file: + (tmp_path / sig_file).parent.mkdir(parents=True, exist_ok=True) + (tmp_path / sig_file).touch() + + assert is_mcp_directory(tmp_path) is True + + def test_no_detection_with_few_signatures(self, tmp_path): + """Test no detection with only 1-2 signature files.""" + # Create only 2 signature files (less than threshold) + for sig_file in list(MCP_SIGNATURE_FILES)[:2]: + if "/" in sig_file: + (tmp_path / sig_file).parent.mkdir(parents=True, exist_ok=True) + (tmp_path / sig_file).touch() + + assert is_mcp_directory(tmp_path) is False + + def test_no_detection_on_regular_directory(self, tmp_path): + """Test no detection on regular project directories.""" + # Create some random Python files + (tmp_path / "app.py").touch() + (tmp_path / "main.py").touch() + (tmp_path / "utils.py").touch() + + assert is_mcp_directory(tmp_path) is False + + def test_no_detection_on_file(self, tmp_path): + """Test no detection when path is a file, not directory.""" + file_path = tmp_path / "test.py" + file_path.touch() + + assert is_mcp_directory(file_path) is False + + def test_mcp_directory_excluded_from_scan(self, tmp_path): + """Test that MCP directories are excluded during path expansion.""" + # Create a project with MCP as subdirectory + project_root = tmp_path / "my_project" + project_root.mkdir() + + # Add some project files + (project_root / "app.py").write_text("# My app") + (project_root / "config.py").write_text("# Config") + + # Create MCP subdirectory + mcp_dir = project_root / "gemini-mcp-server" + mcp_dir.mkdir() + for sig_file in list(MCP_SIGNATURE_FILES)[:4]: + if "/" in sig_file: + (mcp_dir / sig_file).parent.mkdir(parents=True, exist_ok=True) + (mcp_dir / sig_file).write_text("# MCP file") + + # Also add a regular file to MCP dir + (mcp_dir / "test.py").write_text("# Should not be included") + + # Scan the project - use parent as SECURITY_ROOT to avoid workspace root check + with patch("utils.file_utils.SECURITY_ROOT", tmp_path): + files = expand_paths([str(project_root)]) + + # Verify project files are included but MCP files are not + file_names = [Path(f).name for f in files] + assert "app.py" in file_names + assert "config.py" in file_names + assert "test.py" not in file_names # From MCP dir + assert "server.py" not in file_names # From MCP dir + + +class TestHomeDirectoryProtection: + """Test protection against scanning user's home directory root.""" + + def test_detect_exact_home_directory(self): + """Test detection of exact home directory path.""" + with patch("utils.file_utils.get_user_home_directory") as mock_home: + mock_home.return_value = Path("/Users/testuser") + + assert is_home_directory_root(Path("/Users/testuser")) is True + assert is_home_directory_root(Path("/Users/testuser/")) is True + + def test_allow_home_subdirectories(self): + """Test that subdirectories of home are allowed.""" + with patch("utils.file_utils.get_user_home_directory") as mock_home: + mock_home.return_value = Path("/Users/testuser") + + assert is_home_directory_root(Path("/Users/testuser/projects")) is False + assert is_home_directory_root(Path("/Users/testuser/Documents/code")) is False + + def test_detect_home_patterns_macos(self): + """Test detection of macOS home directory patterns.""" + # Test various macOS home patterns + assert is_home_directory_root(Path("/Users/john")) is True + assert is_home_directory_root(Path("/Users/jane")) is True + # But subdirectories should be allowed + assert is_home_directory_root(Path("/Users/john/projects")) is False + + def test_detect_home_patterns_linux(self): + """Test detection of Linux home directory patterns.""" + assert is_home_directory_root(Path("/home/ubuntu")) is True + assert is_home_directory_root(Path("/home/user")) is True + # But subdirectories should be allowed + assert is_home_directory_root(Path("/home/ubuntu/code")) is False + + def test_detect_home_patterns_windows(self): + """Test detection of Windows home directory patterns.""" + assert is_home_directory_root(Path("C:\\Users\\John")) is True + assert is_home_directory_root(Path("C:/Users/Jane")) is True + # But subdirectories should be allowed + assert is_home_directory_root(Path("C:\\Users\\John\\Documents")) is False + + def test_home_directory_excluded_from_scan(self, tmp_path): + """Test that home directory root is excluded during path expansion.""" + with patch("utils.file_utils.get_user_home_directory") as mock_home: + mock_home.return_value = tmp_path + with patch("utils.file_utils.SECURITY_ROOT", tmp_path): + # Try to scan home directory + files = expand_paths([str(tmp_path)]) + # Should return empty as home root is skipped + assert files == [] + + +class TestUserHomeEnvironmentVariable: + """Test USER_HOME environment variable handling.""" + + def test_user_home_from_env(self): + """Test USER_HOME is used when set.""" + test_home = "/Users/dockeruser" + with patch.dict(os.environ, {"USER_HOME": test_home}): + home = get_user_home_directory() + assert home == Path(test_home).resolve() + + def test_fallback_to_workspace_root_in_docker(self): + """Test fallback to WORKSPACE_ROOT in Docker when USER_HOME not set.""" + with patch("utils.file_utils.WORKSPACE_ROOT", "/Users/realuser"): + with patch("utils.file_utils.CONTAINER_WORKSPACE") as mock_container: + mock_container.exists.return_value = True + # Clear USER_HOME to test fallback + with patch.dict(os.environ, {"USER_HOME": ""}, clear=False): + home = get_user_home_directory() + assert str(home) == "/Users/realuser" + + def test_fallback_to_system_home(self): + """Test fallback to system home when not in Docker.""" + with patch.dict(os.environ, {}, clear=True): + with patch("utils.file_utils.CONTAINER_WORKSPACE") as mock_container: + mock_container.exists.return_value = False + with patch("pathlib.Path.home") as mock_home: + mock_home.return_value = Path("/home/user") + home = get_user_home_directory() + assert home == Path("/home/user") + + +class TestExcludedDirectories: + """Test that excluded directories are properly filtered.""" + + def test_excluded_dirs_not_scanned(self, tmp_path): + """Test that directories in EXCLUDED_DIRS are skipped.""" + # Create a project with various directories + project = tmp_path / "project" + project.mkdir() + + # Create some allowed files + (project / "main.py").write_text("# Main") + (project / "app.py").write_text("# App") + + # Create excluded directories with files + for excluded in ["node_modules", ".git", "build", "__pycache__", ".venv"]: + excluded_dir = project / excluded + excluded_dir.mkdir() + (excluded_dir / "test.py").write_text("# Should not be included") + (excluded_dir / "data.json").write_text("{}") + + # Create a nested allowed directory + src = project / "src" + src.mkdir() + (src / "utils.py").write_text("# Utils") + + with patch("utils.file_utils.SECURITY_ROOT", tmp_path): + files = expand_paths([str(project)]) + + file_names = [Path(f).name for f in files] + + # Check allowed files are included + assert "main.py" in file_names + assert "app.py" in file_names + assert "utils.py" in file_names + + # Check excluded files are not included + assert "test.py" not in file_names + assert "data.json" not in file_names + + def test_new_excluded_directories(self, tmp_path): + """Test newly added excluded directories like .next, .nuxt, etc.""" + project = tmp_path / "webapp" + project.mkdir() + + # Create files in new excluded directories + for excluded in [".next", ".nuxt", "bower_components", ".expo"]: + excluded_dir = project / excluded + excluded_dir.mkdir() + (excluded_dir / "generated.js").write_text("// Generated") + + # Create an allowed file + (project / "index.js").write_text("// Index") + + with patch("utils.file_utils.SECURITY_ROOT", tmp_path): + files = expand_paths([str(project)]) + + file_names = [Path(f).name for f in files] + + assert "index.js" in file_names + assert "generated.js" not in file_names + + +class TestIntegrationScenarios: + """Test realistic integration scenarios.""" + + def test_project_with_mcp_clone_inside(self, tmp_path): + """Test scanning a project that has MCP cloned inside it.""" + # Setup: User project with MCP cloned as a tool + user_project = tmp_path / "my-awesome-project" + user_project.mkdir() + + # User's project files + (user_project / "README.md").write_text("# My Project") + (user_project / "main.py").write_text("print('Hello')") + src = user_project / "src" + src.mkdir() + (src / "app.py").write_text("# App code") + + # MCP cloned inside the project + mcp = user_project / "tools" / "gemini-mcp-server" + mcp.mkdir(parents=True) + for sig_file in list(MCP_SIGNATURE_FILES)[:4]: + if "/" in sig_file: + (mcp / sig_file).parent.mkdir(parents=True, exist_ok=True) + (mcp / sig_file).write_text("# MCP code") + (mcp / "LICENSE").write_text("MIT License") + (mcp / "README.md").write_text("# Gemini MCP") + + # Also add node_modules (should be excluded) + node_modules = user_project / "node_modules" + node_modules.mkdir() + (node_modules / "package.json").write_text("{}") + + with patch("utils.file_utils.SECURITY_ROOT", tmp_path): + files = expand_paths([str(user_project)]) + + file_paths = [str(f) for f in files] + + # User files should be included + assert any("my-awesome-project/README.md" in p for p in file_paths) + assert any("my-awesome-project/main.py" in p for p in file_paths) + assert any("src/app.py" in p for p in file_paths) + + # MCP files should NOT be included + assert not any("gemini-mcp-server" in p for p in file_paths) + assert not any("zen_server.py" in p for p in file_paths) + + # node_modules should NOT be included + assert not any("node_modules" in p for p in file_paths) + + def test_cannot_scan_above_workspace_root(self, tmp_path): + """Test that we cannot scan outside the workspace root.""" + workspace = tmp_path / "workspace" + workspace.mkdir() + + # Create a file in workspace + (workspace / "allowed.py").write_text("# Allowed") + + # Create a file outside workspace + (tmp_path / "outside.py").write_text("# Outside") + + with patch("utils.file_utils.SECURITY_ROOT", workspace): + # Try to expand paths outside workspace - should return empty list + files = expand_paths([str(tmp_path)]) + assert files == [] # Path outside workspace is skipped silently diff --git a/tests/test_large_prompt_handling.py b/tests/test_large_prompt_handling.py index 943de76..0208630 100644 --- a/tests/test_large_prompt_handling.py +++ b/tests/test_large_prompt_handling.py @@ -57,7 +57,7 @@ class TestLargePromptHandling: assert isinstance(result[0], TextContent) output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" assert f"{MCP_PROMPT_SIZE_LIMIT:,} characters" in output["content"] assert output["metadata"]["prompt_size"] == len(large_prompt) assert output["metadata"]["limit"] == MCP_PROMPT_SIZE_LIMIT @@ -141,7 +141,7 @@ class TestLargePromptHandling: assert len(result) == 1 output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" @pytest.mark.asyncio async def test_codereview_large_focus(self, large_prompt): @@ -157,7 +157,7 @@ class TestLargePromptHandling: assert len(result) == 1 output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" @pytest.mark.asyncio async def test_review_changes_large_original_request(self, large_prompt): @@ -167,7 +167,7 @@ class TestLargePromptHandling: assert len(result) == 1 output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" @pytest.mark.asyncio async def test_debug_large_error_description(self, large_prompt): @@ -177,7 +177,7 @@ class TestLargePromptHandling: assert len(result) == 1 output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" @pytest.mark.asyncio async def test_debug_large_error_context(self, large_prompt, normal_prompt): @@ -187,7 +187,7 @@ class TestLargePromptHandling: assert len(result) == 1 output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" @pytest.mark.asyncio async def test_analyze_large_question(self, large_prompt): @@ -197,7 +197,7 @@ class TestLargePromptHandling: assert len(result) == 1 output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" @pytest.mark.asyncio async def test_multiple_files_with_prompt_txt(self, temp_prompt_file): @@ -262,7 +262,7 @@ class TestLargePromptHandling: result = await tool.execute({"prompt": over_prompt}) output = json.loads(result[0].text) - assert output["status"] == "requires_file_prompt" + assert output["status"] == "resend_prompt" @pytest.mark.asyncio async def test_empty_prompt_no_file(self): diff --git a/tools/base.py b/tools/base.py index 59622c6..ed9c1f5 100644 --- a/tools/base.py +++ b/tools/base.py @@ -687,18 +687,19 @@ When recommending searches, be specific about what information you need and why """ if text and len(text) > MCP_PROMPT_SIZE_LIMIT: return { - "status": "requires_file_prompt", + "status": "resend_prompt", "content": ( f"The prompt is too large for MCP's token limits (>{MCP_PROMPT_SIZE_LIMIT:,} characters). " - "Please save the prompt text to a temporary file named 'prompt.txt' and " - "resend the request with an empty prompt string and the absolute file path included " - "in the files parameter, along with any other files you wish to share as context." + "Please save the prompt text to a temporary file named 'prompt.txt' in the current directory and " + "resend request with the absolute file path in the files parameter, along with any other files " + "you wish to share as context. You may leave the prompt text itself empty." ), "content_type": "text", "metadata": { "prompt_size": len(text), "limit": MCP_PROMPT_SIZE_LIMIT, - "instructions": "Save prompt to 'prompt.txt' and include absolute path in files parameter", + "instructions": "Save prompt to 'prompt.txt' in current folder and include absolute path in files" + " parameter", }, } return None @@ -791,7 +792,7 @@ When recommending searches, be specific about what information you need and why # Set up logger for this tool execution logger = logging.getLogger(f"tools.{self.name}") - logger.info(f"Starting {self.name} tool execution with arguments: {list(arguments.keys())}") + logger.info(f"🔧 {self.name} tool called with arguments: {list(arguments.keys())}") # Validate request using the tool's Pydantic model # This ensures all required fields are present and properly typed @@ -911,7 +912,7 @@ When recommending searches, be specific about what information you need and why # Pass model info for conversation tracking model_info = {"provider": provider, "model_name": model_name, "model_response": model_response} tool_output = self._parse_response(raw_text, request, model_info) - logger.info(f"Successfully completed {self.name} tool execution") + logger.info(f"✅ {self.name} tool completed successfully") else: # Handle cases where the model couldn't generate a response @@ -990,16 +991,24 @@ When recommending searches, be specific about what information you need and why # Try to parse as JSON to check for clarification requests potential_json = json.loads(raw_text.strip()) - if isinstance(potential_json, dict) and potential_json.get("status") == "requires_clarification": + if isinstance(potential_json, dict) and potential_json.get("status") == "clarification_required": # Validate the clarification request structure clarification = ClarificationRequest(**potential_json) + logger.debug(f"{self.name} tool requested clarification: {clarification.question}") + # Extract model information for metadata + metadata = { + "original_request": (request.model_dump() if hasattr(request, "model_dump") else str(request)) + } + if model_info: + model_name = model_info.get("model_name") + if model_name: + metadata["model_used"] = model_name + return ToolOutput( - status="requires_clarification", + status="clarification_required", content=clarification.model_dump_json(), content_type="json", - metadata={ - "original_request": (request.model_dump() if hasattr(request, "model_dump") else str(request)) - }, + metadata=metadata, ) except (json.JSONDecodeError, ValueError, TypeError): @@ -1056,11 +1065,18 @@ When recommending searches, be specific about what information you need and why "markdown" if any(marker in formatted_content for marker in ["##", "**", "`", "- ", "1. "]) else "text" ) + # Extract model information for metadata + metadata = {"tool_name": self.name} + if model_info: + model_name = model_info.get("model_name") + if model_name: + metadata["model_used"] = model_name + return ToolOutput( status="success", content=formatted_content, content_type=content_type, - metadata={"tool_name": self.name}, + metadata=metadata, ) def _check_continuation_opportunity(self, request) -> Optional[dict]: @@ -1165,7 +1181,7 @@ When recommending searches, be specific about what information you need and why remaining_turns = continuation_data["remaining_turns"] continuation_offer = ContinuationOffer( continuation_id=thread_id, - message_to_user=( + note=( f"If you'd like to continue this discussion or need to provide me with further details or context, " f"you can use the continuation_id '{thread_id}' with any tool and any model. " f"You have {remaining_turns} more exchange(s) available in this conversation thread." @@ -1177,23 +1193,37 @@ When recommending searches, be specific about what information you need and why remaining_turns=remaining_turns, ) + # Extract model information for metadata + metadata = {"tool_name": self.name, "thread_id": thread_id, "remaining_turns": remaining_turns} + if model_info: + model_name = model_info.get("model_name") + if model_name: + metadata["model_used"] = model_name + return ToolOutput( status="continuation_available", content=content, content_type="markdown", continuation_offer=continuation_offer, - metadata={"tool_name": self.name, "thread_id": thread_id, "remaining_turns": remaining_turns}, + metadata=metadata, ) except Exception as e: # If threading fails, return normal response but log the error logger = logging.getLogger(f"tools.{self.name}") logger.warning(f"Conversation threading failed in {self.name}: {str(e)}") + # Extract model information for metadata + metadata = {"tool_name": self.name, "threading_error": str(e)} + if model_info: + model_name = model_info.get("model_name") + if model_name: + metadata["model_used"] = model_name + return ToolOutput( status="success", content=content, content_type="markdown", - metadata={"tool_name": self.name, "threading_error": str(e)}, + metadata=metadata, ) @abstractmethod diff --git a/tools/models.py b/tools/models.py index 5db924b..81825b9 100644 --- a/tools/models.py +++ b/tools/models.py @@ -13,7 +13,7 @@ class ContinuationOffer(BaseModel): continuation_id: str = Field( ..., description="Thread continuation ID for multi-turn conversations across different tools" ) - message_to_user: str = Field(..., description="Message explaining continuation opportunity to Claude") + note: str = Field(..., description="Message explaining continuation opportunity to Claude") suggested_tool_params: Optional[dict[str, Any]] = Field( None, description="Suggested parameters for continued tool usage" ) @@ -26,8 +26,8 @@ class ToolOutput(BaseModel): status: Literal[ "success", "error", - "requires_clarification", - "requires_file_prompt", + "clarification_required", + "resend_prompt", "continuation_available", ] = "success" content: Optional[str] = Field(None, description="The main content/response from the tool") diff --git a/utils/file_utils.py b/utils/file_utils.py index d52228a..3b8c1b3 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -119,8 +119,164 @@ EXCLUDED_DIRS = { ".tox", "htmlcov", ".coverage", + # Additional build and temp directories + "out", + ".next", + ".nuxt", + ".cache", + ".temp", + ".tmp", + "bower_components", + "vendor", + ".sass-cache", + ".gradle", + ".m2", + "coverage", + # OS-specific directories + ".DS_Store", + "Thumbs.db", + # Python specific + "*.egg-info", + ".eggs", + "wheels", + ".Python", + # IDE and editor directories + ".sublime", + ".atom", + ".brackets", + "*.swp", + "*.swo", + "*~", + # Documentation build + "_build", + "site", + # Mobile development + ".expo", + ".flutter", } +# MCP signature files - presence of these indicates the MCP's own directory +# Used to prevent the MCP from scanning its own codebase +MCP_SIGNATURE_FILES = { + "zen_server.py", + "server.py", + "tools/precommit.py", + "utils/file_utils.py", + "prompts/tool_prompts.py", +} + + +def is_mcp_directory(path: Path) -> bool: + """ + Check if a directory is the MCP server's own directory. + + This prevents the MCP from including its own code when scanning projects + where the MCP has been cloned as a subdirectory. + + Args: + path: Directory path to check + + Returns: + True if this appears to be the MCP directory + """ + if not path.is_dir(): + return False + + # Check for multiple signature files to be sure + matches = 0 + for sig_file in MCP_SIGNATURE_FILES: + if (path / sig_file).exists(): + matches += 1 + if matches >= 3: # Require at least 3 matches to be certain + logger.info(f"Detected MCP directory at {path}, will exclude from scanning") + return True + return False + + +def get_user_home_directory() -> Optional[Path]: + """ + Get the user's home directory based on environment variables. + + In Docker, USER_HOME should be set to the mounted home path. + Outside Docker, we use Path.home() or environment variables. + + Returns: + User's home directory path or None if not determinable + """ + # Check for explicit USER_HOME env var (set in docker-compose.yml) + user_home = os.environ.get("USER_HOME") + if user_home: + return Path(user_home).resolve() + + # In container, check if we're running in Docker + if CONTAINER_WORKSPACE.exists(): + # We're in Docker but USER_HOME not set - use WORKSPACE_ROOT as fallback + if WORKSPACE_ROOT: + return Path(WORKSPACE_ROOT).resolve() + + # Outside Docker, use system home + return Path.home() + + +def is_home_directory_root(path: Path) -> bool: + """ + Check if the given path is the user's home directory root. + + This prevents scanning the entire home directory which could include + sensitive data and non-project files. + + Args: + path: Directory path to check + + Returns: + True if this is the home directory root + """ + user_home = get_user_home_directory() + if not user_home: + return False + + try: + resolved_path = path.resolve() + resolved_home = user_home.resolve() + + # Check if this is exactly the home directory + if resolved_path == resolved_home: + logger.warning( + f"Attempted to scan user home directory root: {path}. " f"Please specify a subdirectory instead." + ) + return True + + # Also check common home directory patterns + path_str = str(resolved_path).lower() + home_patterns = [ + "/users/", # macOS + "/home/", # Linux + "c:\\users\\", # Windows + "c:/users/", # Windows with forward slashes + ] + + for pattern in home_patterns: + if pattern in path_str: + # Extract the user directory path + # e.g., /Users/fahad or /home/username + parts = path_str.split(pattern) + if len(parts) > 1: + # Get the part after the pattern + after_pattern = parts[1] + # Check if we're at the user's root (no subdirectories) + if "/" not in after_pattern and "\\" not in after_pattern: + logger.warning( + f"Attempted to scan user home directory root: {path}. " + f"Please specify a subdirectory instead." + ) + return True + + except Exception as e: + logger.debug(f"Error checking if path is home directory: {e}") + + return False + + # Common code file extensions that are automatically included when processing directories # This set can be extended to support additional file types CODE_EXTENSIONS = { @@ -344,13 +500,12 @@ def expand_paths(paths: list[str], extensions: Optional[set[str]] = None) -> lis if not path_obj.exists(): continue - # Safety check: Prevent reading entire workspace root - # This could expose too many files and cause performance issues + # Safety checks for directory scanning if path_obj.is_dir(): resolved_workspace = SECURITY_ROOT.resolve() resolved_path = path_obj.resolve() - # Check if this is the entire workspace root directory + # Check 1: Prevent reading entire workspace root if resolved_path == resolved_workspace: logger.warning( f"Ignoring request to read entire workspace directory: {path}. " @@ -358,6 +513,20 @@ def expand_paths(paths: list[str], extensions: Optional[set[str]] = None) -> lis ) continue + # Check 2: Prevent scanning user's home directory root + if is_home_directory_root(path_obj): + logger.warning( + f"Skipping home directory root: {path}. " f"Please specify a project subdirectory instead." + ) + continue + + # Check 3: Skip if this is the MCP's own directory + if is_mcp_directory(path_obj): + logger.info( + f"Skipping MCP server directory: {path}. " f"The MCP server code is excluded from project scans." + ) + continue + if path_obj.is_file(): # Add file directly if str(path_obj) not in seen: @@ -369,7 +538,21 @@ def expand_paths(paths: list[str], extensions: Optional[set[str]] = None) -> lis for root, dirs, files in os.walk(path_obj): # Filter directories in-place to skip hidden and excluded directories # This prevents descending into .git, .venv, __pycache__, node_modules, etc. - dirs[:] = [d for d in dirs if not d.startswith(".") and d not in EXCLUDED_DIRS] + original_dirs = dirs[:] + dirs[:] = [] + for d in original_dirs: + # Skip hidden directories + if d.startswith("."): + continue + # Skip excluded directories + if d in EXCLUDED_DIRS: + continue + # Skip MCP directories found during traversal + dir_path = Path(root) / d + if is_mcp_directory(dir_path): + logger.debug(f"Skipping MCP directory during traversal: {dir_path}") + continue + dirs.append(d) for file in files: # Skip hidden files (e.g., .DS_Store, .gitignore)