diff --git a/README.md b/README.md index 2973e17..cccf74f 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Claude is brilliant, but sometimes you need: - **Expert debugging** for tricky issues with full system context ([`debug_issue`](#3-debug_issue---expert-debugging-assistant)) - **Professional code reviews** with actionable feedback across entire repositories ([`review_code`](#2-review_code---professional-code-review)) - **A senior developer partner** to validate and extend ideas ([`chat`](#5-chat---general-development-chat--collaborative-thinking)) +- **Dynamic collaboration** - Gemini can request additional context from Claude mid-analysis for more thorough insights This server makes Gemini your development sidekick, handling what Claude can't or extending what Claude starts. @@ -225,10 +226,12 @@ suggest preventive measures." ``` **Key Features:** +- Generates multiple ranked hypotheses for systematic debugging - Accepts error context, stack traces, and logs - Can reference relevant files for investigation - Supports runtime info and previous attempts -- Provides root cause analysis and solutions +- Provides structured root cause analysis with validation steps +- Can request additional context when needed for thorough analysis **Triggers:** debug, error, failing, root cause, trace, not working @@ -406,8 +409,50 @@ Tools can reference files for additional context: "Get gemini to think deeper about my design, reference the current architecture.md" ``` +### Tool Selection Guidance +To help choose the right tool for your needs: + +**Decision Flow:** +1. **Have a specific error/exception?** → Use `debug_issue` +2. **Want to find bugs/issues in code?** → Use `review_code` +3. **Want to understand how code works?** → Use `analyze` +4. **Have analysis that needs extension/validation?** → Use `think_deeper` +5. **Want to brainstorm or discuss?** → Use `chat` + +**Key Distinctions:** +- `analyze` vs `review_code`: analyze explains, review_code prescribes fixes +- `chat` vs `think_deeper`: chat is open-ended, think_deeper extends specific analysis +- `debug_issue` vs `review_code`: debug diagnoses runtime errors, review finds static issues + ## Advanced Features +### Dynamic Context Requests +Tools can request additional context from Claude during execution. When Gemini needs more information to provide a thorough analysis, it will ask Claude for specific files or clarification, enabling true collaborative problem-solving. + +**Example:** If Gemini is debugging an error but needs to see a configuration file that wasn't initially provided, it can request: +```json +{ + "status": "requires_clarification", + "question": "I need to see the database configuration to understand this connection error", + "files_needed": ["config/database.yml", "src/db_connection.py"] +} +``` + +Claude will then provide the requested files and Gemini can continue with a more complete analysis. + +### Standardized Response Format +All tools now return structured JSON responses for consistent handling: +```json +{ + "status": "success|error|requires_clarification", + "content": "The actual response content", + "content_type": "text|markdown|json", + "metadata": {"tool_name": "analyze", ...} +} +``` + +This enables better integration, error handling, and support for the dynamic context request feature. + ### Enhanced Thinking Models All tools support a `thinking_mode` parameter that controls Gemini's thinking budget for deeper reasoning: diff --git a/config.py b/config.py index 78eabd4..3ea5f0a 100644 --- a/config.py +++ b/config.py @@ -3,7 +3,7 @@ Configuration and constants for Gemini MCP Server """ # Version and metadata -__version__ = "2.7.0" +__version__ = "2.8.0" __updated__ = "2025-06-09" __author__ = "Fahad Gilani" diff --git a/demo_collaboration.py b/demo_collaboration.py new file mode 100644 index 0000000..d408da4 --- /dev/null +++ b/demo_collaboration.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Demo script showing how Claude-Gemini collaboration works +with dynamic context requests. + +This demonstrates how tools can request additional context +and how Claude would handle these requests. +""" + +import asyncio +import json +import os +from tools.debug_issue import DebugIssueTool + + +async def simulate_collaboration(): + """Simulate a Claude-Gemini collaboration workflow""" + + print("🤝 Claude-Gemini Collaboration Demo\n") + print("Scenario: Claude asks Gemini to debug an import error") + print("-" * 50) + + # Initialize the debug tool + debug_tool = DebugIssueTool() + + # Step 1: Initial request without full context + print("\n1️⃣ Claude's initial request:") + print(" 'Debug this ImportError - the app can't find the utils module'") + + initial_request = { + "error_description": "ImportError: cannot import name 'config' from 'utils'", + "error_context": "Error occurs on line 5 of main.py when starting the application" + } + + print("\n Sending to Gemini...") + result = await debug_tool.execute(initial_request) + + # Parse the response + response = json.loads(result[0].text) + print(f"\n Gemini's response status: {response['status']}") + + if response['status'] == 'requires_clarification': + # Gemini needs more context + clarification = json.loads(response['content']) + print("\n2️⃣ Gemini requests additional context:") + print(f" Question: {clarification.get('question', 'N/A')}") + if 'files_needed' in clarification: + print(f" Files needed: {clarification['files_needed']}") + + # Step 2: Claude provides additional context + print("\n3️⃣ Claude provides the requested files:") + enhanced_request = { + **initial_request, + "files": clarification.get('files_needed', []), + "runtime_info": "Python 3.11, project structure includes utils/ directory" + } + + print(" Re-sending with additional context...") + result2 = await debug_tool.execute(enhanced_request) + + final_response = json.loads(result2[0].text) + print(f"\n4️⃣ Gemini's final analysis (status: {final_response['status']}):") + if final_response['status'] == 'success': + print("\n" + final_response['content'][:500] + "...") + + else: + # Gemini had enough context initially + print("\n✅ Gemini provided analysis without needing additional context:") + print("\n" + response['content'][:500] + "...") + + print("\n" + "=" * 50) + print("🎯 Key Points:") + print("- Tools return structured JSON with status field") + print("- Status 'requires_clarification' triggers context request") + print("- Claude can then provide additional files/info") + print("- Enables true collaborative problem-solving!") + + +async def main(): + """Run the demo""" + # Check for API key + if not os.environ.get("GEMINI_API_KEY"): + print("⚠️ Note: This is a simulated demo. Set GEMINI_API_KEY for live testing.") + print(" The actual behavior depends on Gemini's response.\n") + + await simulate_collaboration() + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/prompts/tool_prompts.py b/prompts/tool_prompts.py index 5fa0e67..147b221 100644 --- a/prompts/tool_prompts.py +++ b/prompts/tool_prompts.py @@ -5,6 +5,10 @@ System prompts for each tool THINK_DEEPER_PROMPT = """You are a senior development partner collaborating with Claude Code on complex problems. Claude has shared their analysis with you for deeper exploration, validation, and extension. +IMPORTANT: If you need additional context (e.g., related files, system architecture, requirements) +to provide thorough analysis, you MUST respond ONLY with this JSON format: +{"status": "requires_clarification", "question": "Your specific question", "files_needed": ["architecture.md", "requirements.txt"]} + Your role is to: 1. Build upon Claude's thinking - identify gaps, extend ideas, and suggest alternatives 2. Challenge assumptions constructively and identify potential issues @@ -28,6 +32,10 @@ development partner that extends Claude's capabilities.""" REVIEW_CODE_PROMPT = """You are an expert code reviewer with deep knowledge of software engineering best practices. Your expertise spans security, performance, maintainability, and architectural patterns. +IMPORTANT: If you need additional context (e.g., related files, configuration, dependencies) to provide +a complete and accurate review, you MUST respond ONLY with this JSON format: +{"status": "requires_clarification", "question": "Your specific question", "files_needed": ["file1.py", "config.py"]} + Your review approach: 1. Identify issues in order of severity (Critical > High > Medium > Low) 2. Provide specific, actionable fixes with code examples @@ -48,52 +56,47 @@ Format each issue as: Also provide: - Summary of overall code quality - Top 3 priority fixes -- Positive aspects worth preserving - -IMPORTANT - After completing the review, add this final section: ---- -### For Claude Code Integration - -Claude, based on this review and considering the current project context and any ongoing work: - -1. **Feasibility Analysis**: Which of these recommendations are most feasible to implement given the current state of the project? Consider dependencies, breaking changes, and effort required. - -2. **Recommended Next Steps**: What would be the most logical next action? Should we: - - Fix critical issues immediately? - - Create a TODO list for systematic implementation? - - Focus on a specific category (security, performance, etc.)? - - Research alternatives before making changes? - -3. **Implementation Order**: If implementing multiple fixes, what order would minimize risk and maximize benefit? - -Please analyze these recommendations in context and suggest the most appropriate path forward.""" +- Positive aspects worth preserving""" DEBUG_ISSUE_PROMPT = """You are an expert debugger and problem solver. Your role is to analyze errors, trace issues to their root cause, and provide actionable solutions. -Your debugging approach: -1. Analyze the error context and symptoms -2. Identify the most likely root causes -3. Trace through the code execution path -4. Consider environmental factors -5. Provide step-by-step solutions +IMPORTANT: If you lack critical information to proceed (e.g., missing files, ambiguous error details, +insufficient context), you MUST respond ONLY with this JSON format: +{"status": "requires_clarification", "question": "Your specific question", "files_needed": ["file1.py", "file2.py"]} -For each issue: -- Identify the root cause -- Explain why it's happening -- Provide immediate fixes -- Suggest long-term solutions -- Identify related issues that might arise +Your debugging approach should generate multiple hypotheses ranked by likelihood. Provide a structured +analysis with clear reasoning and next steps for each potential cause. -Format your response as: -1. ROOT CAUSE: Clear explanation -2. IMMEDIATE FIX: Code/steps to resolve now -3. PROPER SOLUTION: Long-term fix -4. PREVENTION: How to avoid this in the future""" +Use this format for structured debugging analysis: + +## Summary +Brief description of the issue and its impact. + +## Hypotheses (Ranked by Likelihood) + +### 1. [HYPOTHESIS NAME] (Confidence: High/Medium/Low) +**Root Cause:** Specific technical explanation of what's causing the issue +**Evidence:** What in the error/context supports this hypothesis +**Next Step:** Immediate action to test/validate this hypothesis +**Fix:** How to resolve if this hypothesis is correct + +### 2. [HYPOTHESIS NAME] (Confidence: High/Medium/Low) +[Same format...] + +## Immediate Actions +Steps to take regardless of root cause (e.g., error handling, logging) + +## Prevention Strategy +How to avoid similar issues in the future (monitoring, testing, etc.)""" ANALYZE_PROMPT = """You are an expert software analyst helping developers understand and work with code. Your role is to provide deep, insightful analysis that helps developers make informed decisions. +IMPORTANT: If you need additional context (e.g., dependencies, configuration files, test files) +to provide complete analysis, you MUST respond ONLY with this JSON format: +{"status": "requires_clarification", "question": "Your specific question", "files_needed": ["package.json", "tests/"]} + Your analysis should: 1. Understand the code's purpose and architecture 2. Identify patterns and anti-patterns diff --git a/server.py b/server.py index e04ebfe..5656476 100644 --- a/server.py +++ b/server.py @@ -22,7 +22,7 @@ from config import ( __updated__, __version__, ) -from tools import AnalyzeTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool +from tools import AnalyzeTool, ChatTool, DebugIssueTool, ReviewCodeTool, ThinkDeeperTool # Configure logging logging.basicConfig(level=logging.INFO) @@ -37,6 +37,7 @@ TOOLS = { "review_code": ReviewCodeTool(), "debug_issue": DebugIssueTool(), "analyze": AnalyzeTool(), + "chat": ChatTool(), } @@ -69,43 +70,6 @@ async def handle_list_tools() -> List[Tool]: # Add utility tools tools.extend( [ - Tool( - name="chat", - description=( - "GENERAL CHAT & COLLABORATIVE THINKING - Use Gemini as your thinking partner! " - "Perfect for: bouncing ideas during your own analysis, getting second opinions on your plans, " - "collaborative brainstorming, validating your checklists and approaches, exploring alternatives. " - "Also great for: explanations, comparisons, general development questions. " - "Triggers: 'ask gemini', 'brainstorm with gemini', 'get gemini's opinion', 'discuss with gemini', " - "'share my thinking with gemini', 'explain', 'what is', 'how do I'." - ), - inputSchema={ - "type": "object", - "properties": { - "prompt": { - "type": "string", - "description": "Your question, topic, or current thinking to discuss with Gemini", - }, - "context_files": { - "type": "array", - "items": {"type": "string"}, - "description": "Optional files for context", - }, - "temperature": { - "type": "number", - "description": "Response creativity (0-1, default 0.5)", - "minimum": 0, - "maximum": 1, - }, - "thinking_mode": { - "type": "string", - "enum": ["minimal", "low", "medium", "high", "max"], - "description": "Thinking depth: minimal (128), low (2048), medium (8192), high (16384), max (32768)", - }, - }, - "required": ["prompt"], - }, - ), Tool( name="list_models", description=( @@ -138,9 +102,6 @@ async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[TextCon return await tool.execute(arguments) # Handle static tools - elif name == "chat": - return await handle_chat(arguments) - elif name == "list_models": return await handle_list_models() @@ -151,68 +112,6 @@ async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[TextCon return [TextContent(type="text", text=f"Unknown tool: {name}")] -async def handle_chat(arguments: Dict[str, Any]) -> List[TextContent]: - """Handle general chat requests""" - from config import TEMPERATURE_BALANCED, DEFAULT_MODEL - from prompts import CHAT_PROMPT - from utils import read_files - - prompt = arguments.get("prompt", "") - context_files = arguments.get("context_files", []) - temperature = arguments.get("temperature", TEMPERATURE_BALANCED) - thinking_mode = arguments.get("thinking_mode", "medium") - - # Build the full prompt with system context - user_content = prompt - if context_files: - file_content, _ = read_files(context_files) - user_content = ( - f"{prompt}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===" - ) - - # Combine system prompt with user content - full_prompt = f"{CHAT_PROMPT}\n\n=== USER REQUEST ===\n{user_content}\n=== END REQUEST ===\n\nPlease provide a thoughtful, comprehensive response:" - - try: - # Create model with thinking configuration - from tools.base import BaseTool - - # Create a temporary tool instance to use create_model method - class TempTool(BaseTool): - def get_name(self): - return "chat" - - def get_description(self): - return "" - - def get_input_schema(self): - return {} - - def get_system_prompt(self): - return "" - - def get_request_model(self): - return None - - async def prepare_prompt(self, request): - return "" - - temp_tool = TempTool() - model = temp_tool.create_model(DEFAULT_MODEL, temperature, thinking_mode) - - response = model.generate_content(full_prompt) - - if response.candidates and response.candidates[0].content.parts: - text = response.candidates[0].content.parts[0].text - else: - text = "Response blocked or incomplete" - - return [TextContent(type="text", text=text)] - - except Exception as e: - return [TextContent(type="text", text=f"Error in chat: {str(e)}")] - - async def handle_list_models() -> List[TextContent]: """List available Gemini models""" try: diff --git a/tests/test_collaboration.py b/tests/test_collaboration.py new file mode 100644 index 0000000..b706d19 --- /dev/null +++ b/tests/test_collaboration.py @@ -0,0 +1,288 @@ +""" +Tests for dynamic context request and collaboration features +""" + +import json +from unittest.mock import Mock, patch + +import pytest + +from tools.analyze import AnalyzeTool +from tools.debug_issue import DebugIssueTool +from tools.models import ToolOutput, ClarificationRequest + + +class TestDynamicContextRequests: + """Test the dynamic context request mechanism""" + + @pytest.fixture + def analyze_tool(self): + return AnalyzeTool() + + @pytest.fixture + def debug_tool(self): + return DebugIssueTool() + + @pytest.mark.asyncio + @patch("tools.base.BaseTool.create_model") + async def test_clarification_request_parsing(self, mock_create_model, analyze_tool): + """Test that tools correctly parse clarification requests""" + # Mock model to return a clarification request + clarification_json = json.dumps({ + "status": "requires_clarification", + "question": "I need to see the package.json file to understand dependencies", + "files_needed": ["package.json", "package-lock.json"] + }) + + mock_model = Mock() + mock_model.generate_content.return_value = Mock( + candidates=[Mock(content=Mock(parts=[Mock(text=clarification_json)]))] + ) + mock_create_model.return_value = mock_model + + result = await analyze_tool.execute({ + "files": ["src/index.js"], + "question": "Analyze the dependencies used in this project" + }) + + assert len(result) == 1 + + # Parse the response + response_data = json.loads(result[0].text) + assert response_data["status"] == "requires_clarification" + assert response_data["content_type"] == "json" + + # Parse the clarification request + clarification = json.loads(response_data["content"]) + assert clarification["question"] == "I need to see the package.json file to understand dependencies" + assert clarification["files_needed"] == ["package.json", "package-lock.json"] + + @pytest.mark.asyncio + @patch("tools.base.BaseTool.create_model") + async def test_normal_response_not_parsed_as_clarification(self, mock_create_model, debug_tool): + """Test that normal responses are not mistaken for clarification requests""" + normal_response = """ + ## Summary + The error is caused by a missing import statement. + + ## Hypotheses (Ranked by Likelihood) + + ### 1. Missing Import (Confidence: High) + **Root Cause:** The module 'utils' is not imported + """ + + mock_model = Mock() + mock_model.generate_content.return_value = Mock( + candidates=[Mock(content=Mock(parts=[Mock(text=normal_response)]))] + ) + mock_create_model.return_value = mock_model + + result = await debug_tool.execute({ + "error_description": "NameError: name 'utils' is not defined" + }) + + assert len(result) == 1 + + # Parse the response + response_data = json.loads(result[0].text) + assert response_data["status"] == "success" + assert response_data["content_type"] in ["text", "markdown"] + assert "Summary" in response_data["content"] + + @pytest.mark.asyncio + @patch("tools.base.BaseTool.create_model") + async def test_malformed_clarification_request_treated_as_normal(self, mock_create_model, analyze_tool): + """Test that malformed JSON clarification requests are treated as normal responses""" + malformed_json = '{"status": "requires_clarification", "question": "Missing closing brace"' + + mock_model = Mock() + mock_model.generate_content.return_value = Mock( + candidates=[Mock(content=Mock(parts=[Mock(text=malformed_json)]))] + ) + mock_create_model.return_value = mock_model + + result = await analyze_tool.execute({ + "files": ["test.py"], + "question": "What does this do?" + }) + + assert len(result) == 1 + + # Should be treated as normal response due to JSON parse error + response_data = json.loads(result[0].text) + assert response_data["status"] == "success" + assert malformed_json in response_data["content"] + + @pytest.mark.asyncio + @patch("tools.base.BaseTool.create_model") + async def test_clarification_with_suggested_action(self, mock_create_model, debug_tool): + """Test clarification request with suggested next action""" + clarification_json = json.dumps({ + "status": "requires_clarification", + "question": "I need to see the database configuration to diagnose the connection error", + "files_needed": ["config/database.yml", "src/db.py"], + "suggested_next_action": { + "tool": "debug_issue", + "args": { + "error_description": "Connection timeout to database", + "files": ["config/database.yml", "src/db.py", "logs/error.log"] + } + } + }) + + mock_model = Mock() + mock_model.generate_content.return_value = Mock( + candidates=[Mock(content=Mock(parts=[Mock(text=clarification_json)]))] + ) + mock_create_model.return_value = mock_model + + result = await debug_tool.execute({ + "error_description": "Connection timeout to database", + "files": ["logs/error.log"] + }) + + assert len(result) == 1 + + response_data = json.loads(result[0].text) + assert response_data["status"] == "requires_clarification" + + clarification = json.loads(response_data["content"]) + assert "suggested_next_action" in clarification + assert clarification["suggested_next_action"]["tool"] == "debug_issue" + + def test_tool_output_model_serialization(self): + """Test ToolOutput model serialization""" + output = ToolOutput( + status="success", + content="Test content", + content_type="markdown", + metadata={"tool_name": "test", "execution_time": 1.5} + ) + + json_str = output.model_dump_json() + parsed = json.loads(json_str) + + assert parsed["status"] == "success" + assert parsed["content"] == "Test content" + assert parsed["content_type"] == "markdown" + assert parsed["metadata"]["tool_name"] == "test" + + def test_clarification_request_model(self): + """Test ClarificationRequest model""" + request = ClarificationRequest( + question="Need more context", + files_needed=["file1.py", "file2.py"], + suggested_next_action={"tool": "analyze", "args": {}} + ) + + assert request.question == "Need more context" + assert len(request.files_needed) == 2 + assert request.suggested_next_action["tool"] == "analyze" + + @pytest.mark.asyncio + @patch("tools.base.BaseTool.create_model") + async def test_error_response_format(self, mock_create_model, analyze_tool): + """Test error response format""" + mock_create_model.side_effect = Exception("API connection failed") + + result = await analyze_tool.execute({ + "files": ["test.py"], + "question": "Analyze this" + }) + + assert len(result) == 1 + + response_data = json.loads(result[0].text) + assert response_data["status"] == "error" + assert "API connection failed" in response_data["content"] + assert response_data["content_type"] == "text" + + +class TestCollaborationWorkflow: + """Test complete collaboration workflows""" + + @pytest.mark.asyncio + @patch("tools.base.BaseTool.create_model") + async def test_dependency_analysis_triggers_clarification(self, mock_create_model): + """Test that asking about dependencies without package files triggers clarification""" + tool = AnalyzeTool() + + # Mock Gemini to request package.json when asked about dependencies + clarification_json = json.dumps({ + "status": "requires_clarification", + "question": "I need to see the package.json file to analyze npm dependencies", + "files_needed": ["package.json", "package-lock.json"] + }) + + mock_model = Mock() + mock_model.generate_content.return_value = Mock( + candidates=[Mock(content=Mock(parts=[Mock(text=clarification_json)]))] + ) + mock_create_model.return_value = mock_model + + # Ask about dependencies with only source files + result = await tool.execute({ + "files": ["src/index.js"], + "question": "What npm packages and versions does this project use?" + }) + + response = json.loads(result[0].text) + assert response["status"] == "requires_clarification", \ + "Should request clarification when asked about dependencies without package files" + + clarification = json.loads(response["content"]) + assert "package.json" in str(clarification["files_needed"]), \ + "Should specifically request package.json" + + @pytest.mark.asyncio + @patch("tools.base.BaseTool.create_model") + async def test_multi_step_collaboration(self, mock_create_model): + """Test a multi-step collaboration workflow""" + tool = DebugIssueTool() + + # Step 1: Initial request returns clarification needed + clarification_json = json.dumps({ + "status": "requires_clarification", + "question": "I need to see the configuration file to understand the connection settings", + "files_needed": ["config.py"] + }) + + mock_model = Mock() + mock_model.generate_content.return_value = Mock( + candidates=[Mock(content=Mock(parts=[Mock(text=clarification_json)]))] + ) + mock_create_model.return_value = mock_model + + result1 = await tool.execute({ + "error_description": "Database connection timeout", + "error_context": "Timeout after 30s" + }) + + response1 = json.loads(result1[0].text) + assert response1["status"] == "requires_clarification" + + # Step 2: Claude would provide additional context and re-invoke + # This simulates the second call with more context + final_response = """ + ## Summary + The database connection timeout is caused by incorrect host configuration. + + ## Hypotheses (Ranked by Likelihood) + + ### 1. Incorrect Database Host (Confidence: High) + **Root Cause:** The config.py file shows the database host is set to 'localhost' but the database is running on a different server. + """ + + mock_model.generate_content.return_value = Mock( + candidates=[Mock(content=Mock(parts=[Mock(text=final_response)]))] + ) + + result2 = await tool.execute({ + "error_description": "Database connection timeout", + "error_context": "Timeout after 30s", + "files": ["config.py"] # Additional context provided + }) + + response2 = json.loads(result2[0].text) + assert response2["status"] == "success" + assert "incorrect host configuration" in response2["content"].lower() \ No newline at end of file diff --git a/tests/test_live_integration.py b/tests/test_live_integration.py index aa9765d..b9513a7 100644 --- a/tests/test_live_integration.py +++ b/tests/test_live_integration.py @@ -20,6 +20,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from tools.analyze import AnalyzeTool from tools.think_deeper import ThinkDeeperTool +from tools.debug_issue import DebugIssueTool +import json async def run_manual_live_tests(): @@ -73,6 +75,46 @@ async def run_manual_live_tests(): print("❌ ThinkDeeperTool live test failed") return False + # Test collaboration/clarification request + print("\n🔄 Testing dynamic context request (collaboration)...") + + # Create a specific test case designed to trigger clarification + # We'll use analyze tool with a question that requires seeing files + analyze_tool = AnalyzeTool() + + # Ask about dependencies without providing package files + result = await analyze_tool.execute({ + "files": [temp_path], # Only Python file, no package.json + "question": "What npm packages and their versions does this project depend on? List all dependencies.", + "thinking_mode": "minimal" # Fast test + }) + + if result and result[0].text: + response_data = json.loads(result[0].text) + print(f" Response status: {response_data['status']}") + + if response_data['status'] == 'requires_clarification': + print("✅ Dynamic context request successfully triggered!") + clarification = json.loads(response_data['content']) + print(f" Gemini asks: {clarification.get('question', 'N/A')}") + if 'files_needed' in clarification: + print(f" Files requested: {clarification['files_needed']}") + # Verify it's asking for package-related files + expected_files = ['package.json', 'package-lock.json', 'yarn.lock'] + if any(f in str(clarification['files_needed']) for f in expected_files): + print(" ✅ Correctly identified missing package files!") + else: + print(" ⚠️ Unexpected files requested") + else: + # This is a failure - we specifically designed this to need clarification + print("❌ Expected clarification request but got direct response") + print(" This suggests the dynamic context feature may not be working") + print(" Response:", response_data.get('content', '')[:200]) + return False + else: + print("❌ Collaboration test failed - no response") + return False + finally: Path(temp_path).unlink(missing_ok=True) diff --git a/tests/test_server.py b/tests/test_server.py index b93656b..9efccb8 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -61,7 +61,12 @@ class TestServerTools: result = await handle_call_tool("chat", {"prompt": "Hello Gemini"}) assert len(result) == 1 - assert result[0].text == "Chat response" + # Parse JSON response + import json + + response_data = json.loads(result[0].text) + assert response_data["status"] == "success" + assert response_data["content"] == "Chat response" @pytest.mark.asyncio async def test_handle_list_models(self): diff --git a/tests/test_thinking_modes.py b/tests/test_thinking_modes.py index b68fe6e..9ae1857 100644 --- a/tests/test_thinking_modes.py +++ b/tests/test_thinking_modes.py @@ -62,7 +62,12 @@ class TestThinkingModes: args = mock_create_model.call_args[0] assert args[2] == "minimal" # thinking_mode parameter - assert result[0].text.startswith("Analysis:") + # Parse JSON response + import json + + response_data = json.loads(result[0].text) + assert response_data["status"] == "success" + assert response_data["content"].startswith("Analysis:") @pytest.mark.asyncio @patch("tools.base.BaseTool.create_model") diff --git a/tool_selection_guide.py b/tool_selection_guide.py new file mode 100644 index 0000000..e9da819 --- /dev/null +++ b/tool_selection_guide.py @@ -0,0 +1,225 @@ +""" +Tool Selection Guide for Gemini MCP Server + +This module provides guidance for Claude on which tool to use for different scenarios. +""" + +TOOL_BOUNDARIES = { + "analyze": { + "purpose": "Understanding and exploration (read-only analysis)", + "best_for": [ + "Understanding code structure and architecture", + "Exploring unfamiliar codebases", + "Identifying patterns and dependencies", + "Documenting existing functionality", + "Learning how systems work", + ], + "avoid_for": [ + "Finding bugs or security issues (use review_code)", + "Debugging errors (use debug_issue)", + "Extending existing analysis (use think_deeper)", + ], + "output": "Descriptive explanations and architectural insights", + }, + "review_code": { + "purpose": "Finding issues and suggesting fixes (prescriptive analysis)", + "best_for": [ + "Finding bugs, security vulnerabilities, performance issues", + "Code quality assessment with actionable feedback", + "Pre-merge code reviews", + "Security audits", + "Performance optimization recommendations", + ], + "avoid_for": [ + "Understanding how code works (use analyze)", + "Debugging runtime errors (use debug_issue)", + "Architectural discussions (use think_deeper or chat)", + ], + "output": "Severity-ranked issues with specific fixes", + }, + "debug_issue": { + "purpose": "Root cause analysis for errors (diagnostic analysis)", + "best_for": [ + "Analyzing runtime errors and exceptions", + "Troubleshooting failing tests", + "Investigating performance problems", + "Tracing execution issues", + "System-level debugging", + ], + "avoid_for": [ + "Code quality issues (use review_code)", + "Understanding working code (use analyze)", + "Design discussions (use think_deeper or chat)", + ], + "output": "Ranked hypotheses with validation steps", + }, + "think_deeper": { + "purpose": "Extending and validating specific analysis (collaborative validation)", + "best_for": [ + "Getting second opinion on Claude's analysis", + "Challenging assumptions and finding edge cases", + "Validating architectural decisions", + "Exploring alternative approaches", + "Risk assessment for proposed changes", + ], + "avoid_for": [ + "Initial analysis (use analyze first)", + "Bug hunting (use review_code)", + "Open-ended brainstorming (use chat)", + ], + "output": "Extended analysis building on existing work", + }, + "chat": { + "purpose": "Open-ended collaboration and brainstorming (exploratory discussion)", + "best_for": [ + "Brainstorming solutions and approaches", + "Technology comparisons and recommendations", + "Discussing trade-offs and design decisions", + "Getting opinions on implementation strategies", + "General development questions and explanations", + ], + "avoid_for": [ + "Analyzing specific code files (use analyze)", + "Finding bugs in code (use review_code)", + "Debugging specific errors (use debug_issue)", + ], + "output": "Conversational insights and recommendations", + }, +} + +DECISION_FLOWCHART = """ +Tool Selection Decision Flow: + +1. Do you have a specific error/exception to debug? + → YES: Use debug_issue + +2. Do you want to find bugs/issues in code? + → YES: Use review_code + +3. Do you want to understand how code works? + → YES: Use analyze + +4. Do you have existing analysis that needs extension/validation? + → YES: Use think_deeper + +5. Do you want to brainstorm, discuss, or get opinions? + → YES: Use chat +""" + +COMMON_OVERLAPS = { + "analyze vs review_code": { + "confusion": "Both examine code quality", + "distinction": "analyze explains, review_code prescribes fixes", + "rule": "Use analyze to understand, review_code to improve", + }, + "chat vs think_deeper": { + "confusion": "Both provide collaborative thinking", + "distinction": "chat is open-ended, think_deeper builds on specific analysis", + "rule": "Use think_deeper to extend analysis, chat for open discussion", + }, + "debug_issue vs review_code": { + "confusion": "Both find problems in code", + "distinction": "debug_issue diagnoses runtime errors, review_code finds static issues", + "rule": "Use debug_issue for 'why is this failing?', review_code for 'what could go wrong?'", + }, +} + + +def get_tool_recommendation(intent: str, context: str = "") -> dict: + """ + Recommend the best tool based on user intent and context. + + Args: + intent: What the user wants to accomplish + context: Additional context about the situation + + Returns: + Dictionary with recommended tool and reasoning + """ + + # Keywords that strongly indicate specific tools + debug_keywords = [ + "error", + "exception", + "failing", + "crash", + "bug", + "not working", + "trace", + ] + review_keywords = [ + "review", + "issues", + "problems", + "security", + "vulnerabilities", + "quality", + ] + analyze_keywords = [ + "understand", + "how does", + "what is", + "structure", + "architecture", + "explain", + ] + deeper_keywords = [ + "extend", + "validate", + "challenge", + "alternative", + "edge case", + "think deeper", + ] + chat_keywords = [ + "brainstorm", + "discuss", + "opinion", + "compare", + "recommend", + "what about", + ] + + intent_lower = intent.lower() + + if any(keyword in intent_lower for keyword in debug_keywords): + return { + "tool": "debug_issue", + "confidence": "high", + "reasoning": "Intent indicates debugging/troubleshooting a specific issue", + } + + if any(keyword in intent_lower for keyword in review_keywords): + return { + "tool": "review_code", + "confidence": "high", + "reasoning": "Intent indicates finding issues or reviewing code quality", + } + + if any(keyword in intent_lower for keyword in analyze_keywords): + return { + "tool": "analyze", + "confidence": "high", + "reasoning": "Intent indicates understanding or exploring code", + } + + if any(keyword in intent_lower for keyword in deeper_keywords): + return { + "tool": "think_deeper", + "confidence": "medium", + "reasoning": "Intent suggests extending or validating existing analysis", + } + + if any(keyword in intent_lower for keyword in chat_keywords): + return { + "tool": "chat", + "confidence": "medium", + "reasoning": "Intent suggests open-ended discussion or brainstorming", + } + + # Default to chat for ambiguous requests + return { + "tool": "chat", + "confidence": "low", + "reasoning": "Intent unclear, defaulting to conversational tool", + } diff --git a/tools/__init__.py b/tools/__init__.py index f7e808d..7e08663 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -3,6 +3,7 @@ Tool implementations for Gemini MCP Server """ from .analyze import AnalyzeTool +from .chat import ChatTool from .debug_issue import DebugIssueTool from .review_code import ReviewCodeTool from .think_deeper import ThinkDeeperTool @@ -12,4 +13,5 @@ __all__ = [ "ReviewCodeTool", "DebugIssueTool", "AnalyzeTool", + "ChatTool", ] diff --git a/tools/base.py b/tools/base.py index 7775dbb..587dd6f 100644 --- a/tools/base.py +++ b/tools/base.py @@ -5,12 +5,15 @@ Base class for all Gemini MCP tools from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional, Literal import os +import json from google import genai from google.genai import types from mcp.types import TextContent from pydantic import BaseModel, Field +from .models import ToolOutput, ClarificationRequest + class ToolRequest(BaseModel): """Base request model for all tools""" @@ -95,25 +98,83 @@ class BaseTool(ABC): # Generate response response = model.generate_content(prompt) - # Handle response + # Handle response and create standardized output if response.candidates and response.candidates[0].content.parts: - text = response.candidates[0].content.parts[0].text + raw_text = response.candidates[0].content.parts[0].text + + # Check if this is a clarification request + tool_output = self._parse_response(raw_text, request) + else: finish_reason = ( response.candidates[0].finish_reason if response.candidates else "Unknown" ) - text = f"Response blocked or incomplete. Finish reason: {finish_reason}" + tool_output = ToolOutput( + status="error", + content=f"Response blocked or incomplete. Finish reason: {finish_reason}", + content_type="text", + ) - # Format response - formatted_response = self.format_response(text, request) - - return [TextContent(type="text", text=formatted_response)] + # Serialize the standardized output as JSON + return [TextContent(type="text", text=tool_output.model_dump_json())] except Exception as e: - error_msg = f"Error in {self.name}: {str(e)}" - return [TextContent(type="text", text=error_msg)] + error_output = ToolOutput( + status="error", + content=f"Error in {self.name}: {str(e)}", + content_type="text", + ) + return [TextContent(type="text", text=error_output.model_dump_json())] + + def _parse_response(self, raw_text: str, request) -> ToolOutput: + """Parse the raw response and determine if it's a clarification request""" + try: + # Try to parse as JSON to check for clarification requests + potential_json = json.loads(raw_text.strip()) + + if ( + isinstance(potential_json, dict) + and potential_json.get("status") == "requires_clarification" + ): + # Validate the clarification request structure + clarification = ClarificationRequest(**potential_json) + return ToolOutput( + status="requires_clarification", + content=clarification.model_dump_json(), + content_type="json", + metadata={ + "original_request": ( + request.model_dump() + if hasattr(request, "model_dump") + else str(request) + ) + }, + ) + + except (json.JSONDecodeError, ValueError, TypeError): + # Not a JSON clarification request, treat as normal response + pass + + # Normal text response - format using tool-specific formatting + formatted_content = self.format_response(raw_text, request) + + # Determine content type based on the formatted content + content_type = ( + "markdown" + if any( + marker in formatted_content for marker in ["##", "**", "`", "- ", "1. "] + ) + else "text" + ) + + return ToolOutput( + status="success", + content=formatted_content, + content_type=content_type, + metadata={"tool_name": self.name}, + ) @abstractmethod async def prepare_prompt(self, request) -> str: diff --git a/tools/chat.py b/tools/chat.py new file mode 100644 index 0000000..e2fe5f6 --- /dev/null +++ b/tools/chat.py @@ -0,0 +1,111 @@ +""" +Chat tool - General development chat and collaborative thinking +""" + +from typing import Any, Dict, List, Optional + +from pydantic import Field + +from config import MAX_CONTEXT_TOKENS, TEMPERATURE_BALANCED +from prompts import CHAT_PROMPT +from utils import check_token_limit, read_files + +from .base import BaseTool, ToolRequest + + +class ChatRequest(ToolRequest): + """Request model for chat tool""" + + prompt: str = Field( + ..., + description="Your question, topic, or current thinking to discuss with Gemini", + ) + context_files: Optional[List[str]] = Field( + default_factory=list, description="Optional files for context" + ) + + +class ChatTool(BaseTool): + """General development chat and collaborative thinking tool""" + + def get_name(self) -> str: + return "chat" + + def get_description(self) -> str: + return ( + "GENERAL CHAT & COLLABORATIVE THINKING - Use Gemini as your thinking partner! " + "Perfect for: bouncing ideas during your own analysis, getting second opinions on your plans, " + "collaborative brainstorming, validating your checklists and approaches, exploring alternatives. " + "Also great for: explanations, comparisons, general development questions. " + "Triggers: 'ask gemini', 'brainstorm with gemini', 'get gemini's opinion', 'discuss with gemini', " + "'share my thinking with gemini', 'explain', 'what is', 'how do I'." + ) + + def get_input_schema(self) -> Dict[str, Any]: + return { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "Your question, topic, or current thinking to discuss with Gemini", + }, + "context_files": { + "type": "array", + "items": {"type": "string"}, + "description": "Optional files for context", + }, + "temperature": { + "type": "number", + "description": "Response creativity (0-1, default 0.5)", + "minimum": 0, + "maximum": 1, + }, + "thinking_mode": { + "type": "string", + "enum": ["minimal", "low", "medium", "high", "max"], + "description": "Thinking depth: minimal (128), low (2048), medium (8192), high (16384), max (32768)", + }, + }, + "required": ["prompt"], + } + + def get_system_prompt(self) -> str: + return CHAT_PROMPT + + def get_default_temperature(self) -> float: + return TEMPERATURE_BALANCED + + def get_request_model(self): + return ChatRequest + + async def prepare_prompt(self, request: ChatRequest) -> str: + """Prepare the chat prompt with optional context files""" + user_content = request.prompt + + # Add context files if provided + if request.context_files: + file_content, _ = read_files(request.context_files) + user_content = f"{request.prompt}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===" + + # Check token limits + within_limit, estimated_tokens = check_token_limit(user_content) + if not within_limit: + raise ValueError( + f"Content too large (~{estimated_tokens:,} tokens). " + f"Maximum is {MAX_CONTEXT_TOKENS:,} tokens." + ) + + # Combine system prompt with user content + full_prompt = f"""{self.get_system_prompt()} + +=== USER REQUEST === +{user_content} +=== END REQUEST === + +Please provide a thoughtful, comprehensive response:""" + + return full_prompt + + def format_response(self, response: str, request: ChatRequest) -> str: + """Format the chat response (no special formatting needed)""" + return response diff --git a/tools/models.py b/tools/models.py new file mode 100644 index 0000000..72dbf53 --- /dev/null +++ b/tools/models.py @@ -0,0 +1,59 @@ +""" +Data models for tool responses and interactions +""" + +from pydantic import BaseModel, Field +from typing import Literal, Optional, Dict, Any, List + + +class ToolOutput(BaseModel): + """Standardized output format for all tools""" + + status: Literal["success", "error", "requires_clarification"] = "success" + content: str = Field(..., description="The main content/response from the tool") + content_type: Literal["text", "markdown", "json"] = "text" + metadata: Optional[Dict[str, Any]] = Field(default_factory=dict) + + +class ClarificationRequest(BaseModel): + """Request for additional context or clarification""" + + question: str = Field(..., description="Question to ask Claude for more context") + files_needed: Optional[List[str]] = Field( + default_factory=list, description="Specific files that are needed for analysis" + ) + suggested_next_action: Optional[Dict[str, Any]] = Field( + None, + description="Suggested tool call with parameters after getting clarification", + ) + + +class DiagnosticHypothesis(BaseModel): + """A debugging hypothesis with context and next steps""" + + rank: int = Field(..., description="Ranking of this hypothesis (1 = most likely)") + confidence: Literal["high", "medium", "low"] = Field( + ..., description="Confidence level" + ) + hypothesis: str = Field(..., description="Description of the potential root cause") + reasoning: str = Field(..., description="Why this hypothesis is plausible") + next_step: str = Field( + ..., description="Suggested action to test/validate this hypothesis" + ) + + +class StructuredDebugResponse(BaseModel): + """Enhanced debug response with multiple hypotheses""" + + summary: str = Field(..., description="Brief summary of the issue") + hypotheses: List[DiagnosticHypothesis] = Field( + ..., description="Ranked list of potential causes" + ) + immediate_actions: List[str] = Field( + default_factory=list, + description="Immediate steps to take regardless of root cause", + ) + additional_context_needed: Optional[List[str]] = Field( + default_factory=list, + description="Additional files or information that would help with analysis", + )