Re-imagined and re-written Debug tool. Instead of prompting Claude to perform initial analysis (and hoping it did), the tool now works through the debug process as an 'investigation', encouraging Claud to gather its 'findings' / 'hypothesis', stepping back as needed, collecting files it's gone through and keeping track of files relevant to the issue at hand. This structured investiion is then passed to the other model with far greater insight than the original debug tool ever could.

Improved prompts, guard against overengineering and flag that as an antipattern
2025-06-19 10:22:30 +04:00
parent 2641c78f8d
commit fccfb0d999
16 changed files with 2243 additions and 707 deletions
--- a/tests/test_debug.py
+++ b/tests/test_debug.py
@@ -0,0 +1,514 @@
+"""
+Tests for the debug tool.
+"""
+
+from unittest.mock import patch
+
+import pytest
+
+from tools.debug import DebugInvestigationRequest, DebugIssueTool
+from tools.models import ToolModelCategory
+
+
+class TestDebugTool:
+    """Test suite for DebugIssueTool."""
+
+    def test_tool_metadata(self):
+        """Test basic tool metadata and configuration."""
+        tool = DebugIssueTool()
+
+        assert tool.get_name() == "debug"
+        assert "DEBUG & ROOT CAUSE ANALYSIS" in tool.get_description()
+        assert tool.get_default_temperature() == 0.2  # TEMPERATURE_ANALYTICAL
+        assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
+        assert tool.requires_model() is False  # Since it manages its own model calls
+
+    def test_request_validation(self):
+        """Test Pydantic request model validation."""
+        # Valid investigation step request
+        step_request = DebugInvestigationRequest(
+            step="Investigating null pointer exception in UserService",
+            step_number=1,
+            total_steps=5,
+            next_step_required=True,
+            findings="Found that UserService.getUser() is called with null ID",
+        )
+        assert step_request.step == "Investigating null pointer exception in UserService"
+        assert step_request.step_number == 1
+        assert step_request.next_step_required is True
+        assert step_request.confidence == "low"  # default
+
+        # Request with optional fields
+        detailed_request = DebugInvestigationRequest(
+            step="Deep dive into getUser method implementation",
+            step_number=2,
+            total_steps=5,
+            next_step_required=True,
+            findings="Method doesn't validate input parameters",
+            files_checked=["/src/UserService.java", "/src/UserController.java"],
+            relevant_files=["/src/UserService.java"],
+            relevant_methods=["UserService.getUser", "UserController.handleRequest"],
+            hypothesis="Null ID passed from controller without validation",
+            confidence="medium",
+        )
+        assert len(detailed_request.files_checked) == 2
+        assert len(detailed_request.relevant_files) == 1
+        assert detailed_request.confidence == "medium"
+
+        # Missing required fields should fail
+        with pytest.raises(ValueError):
+            DebugInvestigationRequest()  # Missing all required fields
+
+        with pytest.raises(ValueError):
+            DebugInvestigationRequest(step="test")  # Missing other required fields
+
+    def test_input_schema_generation(self):
+        """Test JSON schema generation for MCP client."""
+        tool = DebugIssueTool()
+        schema = tool.get_input_schema()
+
+        assert schema["type"] == "object"
+        # Investigation fields
+        assert "step" in schema["properties"]
+        assert "step_number" in schema["properties"]
+        assert "total_steps" in schema["properties"]
+        assert "next_step_required" in schema["properties"]
+        assert "findings" in schema["properties"]
+        assert "files_checked" in schema["properties"]
+        assert "relevant_files" in schema["properties"]
+        assert "relevant_methods" in schema["properties"]
+        assert "hypothesis" in schema["properties"]
+        assert "confidence" in schema["properties"]
+        assert "backtrack_from_step" in schema["properties"]
+        assert "continuation_id" in schema["properties"]
+        assert "images" in schema["properties"]  # Now supported for visual debugging
+
+        # Check excluded fields are NOT present
+        assert "model" not in schema["properties"]
+        assert "temperature" not in schema["properties"]
+        assert "thinking_mode" not in schema["properties"]
+        assert "use_websearch" not in schema["properties"]
+
+        # Check required fields
+        assert "step" in schema["required"]
+        assert "step_number" in schema["required"]
+        assert "total_steps" in schema["required"]
+        assert "next_step_required" in schema["required"]
+        assert "findings" in schema["required"]
+
+    def test_model_category_for_debugging(self):
+        """Test that debug uses extended reasoning category."""
+        tool = DebugIssueTool()
+        category = tool.get_model_category()
+
+        # Debugging needs deep thinking
+        assert category == ToolModelCategory.EXTENDED_REASONING
+
+    @pytest.mark.asyncio
+    async def test_execute_first_investigation_step(self):
+        """Test execute method for first investigation step."""
+        tool = DebugIssueTool()
+        arguments = {
+            "step": "Investigating intermittent session validation failures in production",
+            "step_number": 1,
+            "total_steps": 5,
+            "next_step_required": True,
+            "findings": "Users report random session invalidation, occurs more during high traffic",
+            "files_checked": ["/api/session_manager.py"],
+            "relevant_files": ["/api/session_manager.py"],
+        }
+
+        # Mock conversation memory functions
+        with patch("utils.conversation_memory.create_thread", return_value="debug-uuid-123"):
+            with patch("utils.conversation_memory.add_turn"):
+                result = await tool.execute(arguments)
+
+        # Should return a list with TextContent
+        assert len(result) == 1
+        assert result[0].type == "text"
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(result[0].text)
+
+        assert parsed_response["status"] == "investigation_in_progress"
+        assert parsed_response["step_number"] == 1
+        assert parsed_response["total_steps"] == 5
+        assert parsed_response["next_step_required"] is True
+        assert parsed_response["continuation_id"] == "debug-uuid-123"
+        assert parsed_response["investigation_status"]["files_checked"] == 1
+        assert parsed_response["investigation_status"]["relevant_files"] == 1
+
+    @pytest.mark.asyncio
+    async def test_execute_subsequent_investigation_step(self):
+        """Test execute method for subsequent investigation step."""
+        tool = DebugIssueTool()
+
+        # Set up initial state
+        tool.initial_issue = "Session validation failures"
+        tool.consolidated_findings["files_checked"].add("/api/session_manager.py")
+
+        arguments = {
+            "step": "Examining session cleanup method for concurrent modification issues",
+            "step_number": 2,
+            "total_steps": 5,
+            "next_step_required": True,
+            "findings": "Found dictionary modification during iteration in cleanup_expired_sessions",
+            "files_checked": ["/api/session_manager.py", "/api/utils.py"],
+            "relevant_files": ["/api/session_manager.py"],
+            "relevant_methods": ["SessionManager.cleanup_expired_sessions"],
+            "hypothesis": "Dictionary modified during iteration causing RuntimeError",
+            "confidence": "high",
+            "continuation_id": "debug-uuid-123",
+        }
+
+        # Mock conversation memory functions
+        with patch("utils.conversation_memory.add_turn"):
+            result = await tool.execute(arguments)
+
+        # Should return a list with TextContent
+        assert len(result) == 1
+        assert result[0].type == "text"
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(result[0].text)
+
+        assert parsed_response["step_number"] == 2
+        assert parsed_response["next_step_required"] is True
+        assert parsed_response["continuation_id"] == "debug-uuid-123"
+        assert parsed_response["investigation_status"]["files_checked"] == 2  # Cumulative
+        assert parsed_response["investigation_status"]["relevant_methods"] == 1
+        assert parsed_response["investigation_status"]["current_confidence"] == "high"
+
+    @pytest.mark.asyncio
+    async def test_execute_final_investigation_step(self):
+        """Test execute method for final investigation step with expert analysis."""
+        tool = DebugIssueTool()
+
+        # Set up investigation history
+        tool.initial_issue = "Session validation failures"
+        tool.investigation_history = [
+            {
+                "step_number": 1,
+                "step": "Initial investigation of session validation failures",
+                "findings": "Initial investigation",
+                "files_checked": ["/api/utils.py"],
+            },
+            {
+                "step_number": 2,
+                "step": "Deeper analysis of session manager",
+                "findings": "Found dictionary issue",
+                "files_checked": ["/api/session_manager.py"],
+            },
+        ]
+        tool.consolidated_findings = {
+            "files_checked": {"/api/session_manager.py", "/api/utils.py"},
+            "relevant_files": {"/api/session_manager.py"},
+            "relevant_methods": {"SessionManager.cleanup_expired_sessions"},
+            "findings": ["Step 1: Initial investigation", "Step 2: Found dictionary issue"],
+            "hypotheses": [{"step": 2, "hypothesis": "Dictionary modified during iteration", "confidence": "high"}],
+            "images": [],
+        }
+
+        arguments = {
+            "step": "Confirmed the root cause and identified fix",
+            "step_number": 3,
+            "total_steps": 3,
+            "next_step_required": False,  # Final step
+            "findings": "Root cause confirmed: dictionary modification during iteration in cleanup method",
+            "files_checked": ["/api/session_manager.py"],
+            "relevant_files": ["/api/session_manager.py"],
+            "relevant_methods": ["SessionManager.cleanup_expired_sessions"],
+            "hypothesis": "Dictionary modification during iteration causes intermittent RuntimeError",
+            "confidence": "high",
+            "continuation_id": "debug-uuid-123",
+        }
+
+        # Mock the expert analysis call
+        mock_expert_response = {
+            "status": "analysis_complete",
+            "summary": "Dictionary modification during iteration bug identified",
+            "hypotheses": [
+                {
+                    "name": "CONCURRENT_MODIFICATION",
+                    "confidence": "High",
+                    "root_cause": "Modifying dictionary while iterating",
+                    "minimal_fix": "Create list of keys to delete first",
+                }
+            ],
+        }
+
+        # Mock conversation memory and file reading
+        with patch("utils.conversation_memory.add_turn"):
+            with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
+                with patch.object(tool, "_prepare_file_content_for_prompt", return_value=("file content", 100)):
+                    result = await tool.execute(arguments)
+
+        # Should return a list with TextContent
+        assert len(result) == 1
+        response_text = result[0].text
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(response_text)
+
+        # Check final step structure
+        assert parsed_response["status"] == "calling_expert_analysis"
+        assert parsed_response["investigation_complete"] is True
+        assert parsed_response["expert_analysis"]["status"] == "analysis_complete"
+        assert "complete_investigation" in parsed_response
+        assert parsed_response["complete_investigation"]["steps_taken"] == 3  # All steps including current
+
+    @pytest.mark.asyncio
+    async def test_execute_with_backtracking(self):
+        """Test execute method with backtracking to revise findings."""
+        tool = DebugIssueTool()
+
+        # Set up some investigation history with all required fields
+        tool.investigation_history = [
+            {
+                "step": "Initial investigation",
+                "step_number": 1,
+                "findings": "Initial findings",
+                "files_checked": ["file1.py"],
+                "relevant_files": [],
+                "relevant_methods": [],
+                "hypothesis": None,
+                "confidence": "low",
+            },
+            {
+                "step": "Wrong direction",
+                "step_number": 2,
+                "findings": "Wrong path",
+                "files_checked": ["file2.py"],
+                "relevant_files": [],
+                "relevant_methods": [],
+                "hypothesis": None,
+                "confidence": "low",
+            },
+        ]
+        tool.consolidated_findings = {
+            "files_checked": {"file1.py", "file2.py"},
+            "relevant_files": set(),
+            "relevant_methods": set(),
+            "findings": ["Step 1: Initial findings", "Step 2: Wrong path"],
+            "hypotheses": [],
+            "images": [],
+        }
+
+        arguments = {
+            "step": "Backtracking to revise approach",
+            "step_number": 3,
+            "total_steps": 5,
+            "next_step_required": True,
+            "findings": "Taking a different investigation approach",
+            "files_checked": ["file3.py"],
+            "backtrack_from_step": 2,  # Backtrack from step 2
+            "continuation_id": "debug-uuid-123",
+        }
+
+        # Mock conversation memory functions
+        with patch("utils.conversation_memory.add_turn"):
+            result = await tool.execute(arguments)
+
+        # Should return a list with TextContent
+        assert len(result) == 1
+        response_text = result[0].text
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(response_text)
+
+        assert parsed_response["status"] == "investigation_in_progress"
+        # After backtracking from step 2, history should have step 1 plus the new step
+        assert len(tool.investigation_history) == 2  # Step 1 + new step 3
+        assert tool.investigation_history[0]["step_number"] == 1
+        assert tool.investigation_history[1]["step_number"] == 3  # The new step that triggered backtrack
+
+    @pytest.mark.asyncio
+    async def test_execute_adjusts_total_steps(self):
+        """Test execute method adjusts total steps when current step exceeds estimate."""
+        tool = DebugIssueTool()
+        arguments = {
+            "step": "Additional investigation needed",
+            "step_number": 8,
+            "total_steps": 5,  # Current step exceeds total
+            "next_step_required": True,
+            "findings": "More complexity discovered",
+            "continuation_id": "debug-uuid-123",
+        }
+
+        # Mock conversation memory functions
+        with patch("utils.conversation_memory.add_turn"):
+            result = await tool.execute(arguments)
+
+        # Should return a list with TextContent
+        assert len(result) == 1
+        response_text = result[0].text
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(response_text)
+
+        # Total steps should be adjusted to match current step
+        assert parsed_response["total_steps"] == 8
+        assert parsed_response["step_number"] == 8
+
+    @pytest.mark.asyncio
+    async def test_execute_error_handling(self):
+        """Test execute method error handling."""
+        tool = DebugIssueTool()
+        # Invalid arguments - missing required fields
+        arguments = {
+            "step": "Invalid request"
+            # Missing required fields
+        }
+
+        result = await tool.execute(arguments)
+
+        # Should return error response
+        assert len(result) == 1
+        response_text = result[0].text
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(response_text)
+
+        assert parsed_response["status"] == "investigation_failed"
+        assert "error" in parsed_response
+
+    def test_prepare_investigation_summary(self):
+        """Test investigation summary preparation."""
+        tool = DebugIssueTool()
+        tool.consolidated_findings = {
+            "files_checked": {"file1.py", "file2.py", "file3.py"},
+            "relevant_files": {"file1.py", "file2.py"},
+            "relevant_methods": {"Class1.method1", "Class2.method2"},
+            "findings": [
+                "Step 1: Initial investigation findings",
+                "Step 2: Discovered potential issue",
+                "Step 3: Confirmed root cause",
+            ],
+            "hypotheses": [
+                {"step": 1, "hypothesis": "Initial hypothesis", "confidence": "low"},
+                {"step": 2, "hypothesis": "Refined hypothesis", "confidence": "medium"},
+                {"step": 3, "hypothesis": "Final hypothesis", "confidence": "high"},
+            ],
+            "images": [],
+        }
+
+        summary = tool._prepare_investigation_summary()
+
+        assert "SYSTEMATIC INVESTIGATION SUMMARY" in summary
+        assert "Files examined: 3" in summary
+        assert "Relevant files identified: 2" in summary
+        assert "Methods/functions involved: 2" in summary
+        assert "INVESTIGATION PROGRESSION" in summary
+        assert "Step 1:" in summary
+        assert "Step 2:" in summary
+        assert "Step 3:" in summary
+        assert "HYPOTHESIS EVOLUTION" in summary
+        assert "low confidence" in summary
+        assert "medium confidence" in summary
+        assert "high confidence" in summary
+
+    def test_extract_error_context(self):
+        """Test error context extraction from findings."""
+        tool = DebugIssueTool()
+        tool.consolidated_findings = {
+            "findings": [
+                "Step 1: Found no issues initially",
+                "Step 2: Discovered ERROR: Dictionary size changed during iteration",
+                "Step 3: Stack trace shows RuntimeError in cleanup method",
+                "Step 4: Exception occurs intermittently",
+            ],
+        }
+
+        error_context = tool._extract_error_context()
+
+        assert error_context is not None
+        assert "ERROR: Dictionary size changed" in error_context
+        assert "Stack trace shows RuntimeError" in error_context
+        assert "Exception occurs intermittently" in error_context
+        assert "Found no issues initially" not in error_context  # Should not include non-error findings
+
+    def test_reprocess_consolidated_findings(self):
+        """Test reprocessing of consolidated findings after backtracking."""
+        tool = DebugIssueTool()
+        tool.investigation_history = [
+            {
+                "step_number": 1,
+                "findings": "Initial findings",
+                "files_checked": ["file1.py"],
+                "relevant_files": ["file1.py"],
+                "relevant_methods": ["method1"],
+                "hypothesis": "Initial hypothesis",
+                "confidence": "low",
+            },
+            {
+                "step_number": 2,
+                "findings": "Second findings",
+                "files_checked": ["file2.py"],
+                "relevant_files": [],
+                "relevant_methods": ["method2"],
+            },
+        ]
+
+        tool._reprocess_consolidated_findings()
+
+        assert tool.consolidated_findings["files_checked"] == {"file1.py", "file2.py"}
+        assert tool.consolidated_findings["relevant_files"] == {"file1.py"}
+        assert tool.consolidated_findings["relevant_methods"] == {"method1", "method2"}
+        assert len(tool.consolidated_findings["findings"]) == 2
+        assert len(tool.consolidated_findings["hypotheses"]) == 1
+        assert tool.consolidated_findings["hypotheses"][0]["hypothesis"] == "Initial hypothesis"
+
+
+# Integration test
+class TestDebugToolIntegration:
+    """Integration tests for debug tool."""
+
+    def setup_method(self):
+        """Set up model context for integration tests."""
+        from utils.model_context import ModelContext
+
+        self.tool = DebugIssueTool()
+        self.tool._model_context = ModelContext("flash")  # Test model
+
+    @pytest.mark.asyncio
+    async def test_complete_investigation_flow(self):
+        """Test complete investigation flow from start to expert analysis."""
+        # Step 1: Initial investigation
+        arguments = {
+            "step": "Investigating memory leak in data processing pipeline",
+            "step_number": 1,
+            "total_steps": 3,
+            "next_step_required": True,
+            "findings": "High memory usage observed during batch processing",
+            "files_checked": ["/processor/main.py"],
+        }
+
+        # Mock conversation memory and expert analysis
+        with patch("utils.conversation_memory.create_thread", return_value="debug-flow-uuid"):
+            with patch("utils.conversation_memory.add_turn"):
+                result = await self.tool.execute(arguments)
+
+        # Verify response structure
+        assert len(result) == 1
+        response_text = result[0].text
+
+        # Parse the JSON response
+        import json
+
+        parsed_response = json.loads(response_text)
+
+        assert parsed_response["status"] == "investigation_in_progress"
+        assert parsed_response["step_number"] == 1
+        assert parsed_response["continuation_id"] == "debug-flow-uuid"