my-pal-mcp-server/tests/test_debug.py

"""
Tests for the debug tool.
"""

from unittest.mock import patch

import pytest

from tools.debug import DebugInvestigationRequest, DebugIssueTool
from tools.models import ToolModelCategory


class TestDebugTool:
    """Test suite for DebugIssueTool."""

    def test_tool_metadata(self):
        """Test basic tool metadata and configuration."""
        tool = DebugIssueTool()

        assert tool.get_name() == "debug"
        assert "DEBUG & ROOT CAUSE ANALYSIS" in tool.get_description()
        assert tool.get_default_temperature() == 0.2  # TEMPERATURE_ANALYTICAL
        assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
        assert tool.requires_model() is True  # Requires model resolution for expert analysis

    def test_request_validation(self):
        """Test Pydantic request model validation."""
        # Valid investigation step request
        step_request = DebugInvestigationRequest(
            step="Investigating null pointer exception in UserService",
            step_number=1,
            total_steps=5,
            next_step_required=True,
            findings="Found that UserService.getUser() is called with null ID",
        )
        assert step_request.step == "Investigating null pointer exception in UserService"
        assert step_request.step_number == 1
        assert step_request.next_step_required is True
        assert step_request.confidence == "low"  # default

        # Request with optional fields
        detailed_request = DebugInvestigationRequest(
            step="Deep dive into getUser method implementation",
            step_number=2,
            total_steps=5,
            next_step_required=True,
            findings="Method doesn't validate input parameters",
            files_checked=["/src/UserService.java", "/src/UserController.java"],
            relevant_files=["/src/UserService.java"],
            relevant_methods=["UserService.getUser", "UserController.handleRequest"],
            hypothesis="Null ID passed from controller without validation",
            confidence="medium",
        )
        assert len(detailed_request.files_checked) == 2
        assert len(detailed_request.relevant_files) == 1
        assert detailed_request.confidence == "medium"

        # Missing required fields should fail
        with pytest.raises(ValueError):
            DebugInvestigationRequest()  # Missing all required fields

        with pytest.raises(ValueError):
            DebugInvestigationRequest(step="test")  # Missing other required fields

    def test_input_schema_generation(self):
        """Test JSON schema generation for MCP client."""
        tool = DebugIssueTool()
        schema = tool.get_input_schema()

        assert schema["type"] == "object"
        # Investigation fields
        assert "step" in schema["properties"]
        assert "step_number" in schema["properties"]
        assert "total_steps" in schema["properties"]
        assert "next_step_required" in schema["properties"]
        assert "findings" in schema["properties"]
        assert "files_checked" in schema["properties"]
        assert "relevant_files" in schema["properties"]
        assert "relevant_methods" in schema["properties"]
        assert "hypothesis" in schema["properties"]
        assert "confidence" in schema["properties"]
        assert "backtrack_from_step" in schema["properties"]
        assert "continuation_id" in schema["properties"]
        assert "images" in schema["properties"]  # Now supported for visual debugging

        # Check model field is present (fixed from previous bug)
        assert "model" in schema["properties"]
        # Check excluded fields are NOT present
        assert "temperature" not in schema["properties"]
        assert "thinking_mode" not in schema["properties"]
        assert "use_websearch" not in schema["properties"]

        # Check required fields
        assert "step" in schema["required"]
        assert "step_number" in schema["required"]
        assert "total_steps" in schema["required"]
        assert "next_step_required" in schema["required"]
        assert "findings" in schema["required"]

    def test_model_category_for_debugging(self):
        """Test that debug uses extended reasoning category."""
        tool = DebugIssueTool()
        category = tool.get_model_category()

        # Debugging needs deep thinking
        assert category == ToolModelCategory.EXTENDED_REASONING

    @pytest.mark.asyncio
    async def test_execute_first_investigation_step(self):
        """Test execute method for first investigation step."""
        tool = DebugIssueTool()
        arguments = {
            "step": "Investigating intermittent session validation failures in production",
            "step_number": 1,
            "total_steps": 5,
            "next_step_required": True,
            "findings": "Users report random session invalidation, occurs more during high traffic",
            "files_checked": ["/api/session_manager.py"],
            "relevant_files": ["/api/session_manager.py"],
        }

        # Mock conversation memory functions
        with patch("utils.conversation_memory.create_thread", return_value="debug-uuid-123"):
            with patch("utils.conversation_memory.add_turn"):
                result = await tool.execute(arguments)

        # Should return a list with TextContent
        assert len(result) == 1
        assert result[0].type == "text"

        # Parse the JSON response
        import json

        parsed_response = json.loads(result[0].text)

        # Debug tool now returns "pause_for_investigation" for ongoing steps
        assert parsed_response["status"] == "pause_for_investigation"
        assert parsed_response["step_number"] == 1
        assert parsed_response["total_steps"] == 5
        assert parsed_response["next_step_required"] is True
        assert parsed_response["continuation_id"] == "debug-uuid-123"
        assert parsed_response["investigation_status"]["files_checked"] == 1
        assert parsed_response["investigation_status"]["relevant_files"] == 1
        assert parsed_response["investigation_required"] is True
        assert "required_actions" in parsed_response

    @pytest.mark.asyncio
    async def test_execute_subsequent_investigation_step(self):
        """Test execute method for subsequent investigation step."""
        tool = DebugIssueTool()

        # Set up initial state
        tool.initial_issue = "Session validation failures"
        tool.consolidated_findings["files_checked"].add("/api/session_manager.py")

        arguments = {
            "step": "Examining session cleanup method for concurrent modification issues",
            "step_number": 2,
            "total_steps": 5,
            "next_step_required": True,
            "findings": "Found dictionary modification during iteration in cleanup_expired_sessions",
            "files_checked": ["/api/session_manager.py", "/api/utils.py"],
            "relevant_files": ["/api/session_manager.py"],
            "relevant_methods": ["SessionManager.cleanup_expired_sessions"],
            "hypothesis": "Dictionary modified during iteration causing RuntimeError",
            "confidence": "high",
            "continuation_id": "debug-uuid-123",
        }

        # Mock conversation memory functions
        with patch("utils.conversation_memory.add_turn"):
            result = await tool.execute(arguments)

        # Should return a list with TextContent
        assert len(result) == 1
        assert result[0].type == "text"

        # Parse the JSON response
        import json

        parsed_response = json.loads(result[0].text)

        assert parsed_response["step_number"] == 2
        assert parsed_response["next_step_required"] is True
        assert parsed_response["continuation_id"] == "debug-uuid-123"
        assert parsed_response["investigation_status"]["files_checked"] == 2  # Cumulative
        assert parsed_response["investigation_status"]["relevant_methods"] == 1
        assert parsed_response["investigation_status"]["current_confidence"] == "high"

    @pytest.mark.asyncio
    async def test_execute_final_investigation_step(self):
        """Test execute method for final investigation step with expert analysis."""
        tool = DebugIssueTool()

        # Set up investigation history
        tool.initial_issue = "Session validation failures"
        tool.investigation_history = [
            {
                "step_number": 1,
                "step": "Initial investigation of session validation failures",
                "findings": "Initial investigation",
                "files_checked": ["/api/utils.py"],
            },
            {
                "step_number": 2,
                "step": "Deeper analysis of session manager",
                "findings": "Found dictionary issue",
                "files_checked": ["/api/session_manager.py"],
            },
        ]
        tool.consolidated_findings = {
            "files_checked": {"/api/session_manager.py", "/api/utils.py"},
            "relevant_files": {"/api/session_manager.py"},
            "relevant_methods": {"SessionManager.cleanup_expired_sessions"},
            "findings": ["Step 1: Initial investigation", "Step 2: Found dictionary issue"],
            "hypotheses": [{"step": 2, "hypothesis": "Dictionary modified during iteration", "confidence": "high"}],
            "images": [],
        }

        arguments = {
            "step": "Confirmed the root cause and identified fix",
            "step_number": 3,
            "total_steps": 3,
            "next_step_required": False,  # Final step
            "findings": "Root cause confirmed: dictionary modification during iteration in cleanup method",
            "files_checked": ["/api/session_manager.py"],
            "relevant_files": ["/api/session_manager.py"],
            "relevant_methods": ["SessionManager.cleanup_expired_sessions"],
            "hypothesis": "Dictionary modification during iteration causes intermittent RuntimeError",
            "confidence": "high",
            "continuation_id": "debug-uuid-123",
        }

        # Mock the expert analysis call
        mock_expert_response = {
            "status": "analysis_complete",
            "summary": "Dictionary modification during iteration bug identified",
            "hypotheses": [
                {
                    "name": "CONCURRENT_MODIFICATION",
                    "confidence": "High",
                    "root_cause": "Modifying dictionary while iterating",
                    "minimal_fix": "Create list of keys to delete first",
                }
            ],
        }

        # Mock conversation memory and file reading
        with patch("utils.conversation_memory.add_turn"):
            with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
                with patch.object(tool, "_prepare_file_content_for_prompt", return_value=("file content", 100)):
                    result = await tool.execute(arguments)

        # Should return a list with TextContent
        assert len(result) == 1
        response_text = result[0].text

        # Parse the JSON response
        import json

        parsed_response = json.loads(response_text)

        # Check final step structure
        assert parsed_response["status"] == "calling_expert_analysis"
        assert parsed_response["investigation_complete"] is True
        assert parsed_response["expert_analysis"]["status"] == "analysis_complete"
        assert "complete_investigation" in parsed_response
        assert parsed_response["complete_investigation"]["steps_taken"] == 3  # All steps including current

    @pytest.mark.asyncio
    async def test_execute_with_backtracking(self):
        """Test execute method with backtracking to revise findings."""
        tool = DebugIssueTool()

        # Set up some investigation history with all required fields
        tool.investigation_history = [
            {
                "step": "Initial investigation",
                "step_number": 1,
                "findings": "Initial findings",
                "files_checked": ["file1.py"],
                "relevant_files": [],
                "relevant_methods": [],
                "hypothesis": None,
                "confidence": "low",
            },
            {
                "step": "Wrong direction",
                "step_number": 2,
                "findings": "Wrong path",
                "files_checked": ["file2.py"],
                "relevant_files": [],
                "relevant_methods": [],
                "hypothesis": None,
                "confidence": "low",
            },
        ]
        tool.consolidated_findings = {
            "files_checked": {"file1.py", "file2.py"},
            "relevant_files": set(),
            "relevant_methods": set(),
            "findings": ["Step 1: Initial findings", "Step 2: Wrong path"],
            "hypotheses": [],
            "images": [],
        }

        arguments = {
            "step": "Backtracking to revise approach",
            "step_number": 3,
            "total_steps": 5,
            "next_step_required": True,
            "findings": "Taking a different investigation approach",
            "files_checked": ["file3.py"],
            "backtrack_from_step": 2,  # Backtrack from step 2
            "continuation_id": "debug-uuid-123",
        }

        # Mock conversation memory functions
        with patch("utils.conversation_memory.add_turn"):
            result = await tool.execute(arguments)

        # Should return a list with TextContent
        # Debug tool now returns "pause_for_investigation" for ongoing steps
        assert len(result) == 1
        response_text = result[0].text

        # Parse the JSON response
        import json

        parsed_response = json.loads(response_text)

        assert parsed_response["status"] == "pause_for_investigation"
        # After backtracking from step 2, history should have step 1 plus the new step
        assert len(tool.investigation_history) == 2  # Step 1 + new step 3
        assert tool.investigation_history[0]["step_number"] == 1
        assert tool.investigation_history[1]["step_number"] == 3  # The new step that triggered backtrack

    @pytest.mark.asyncio
    async def test_execute_adjusts_total_steps(self):
        """Test execute method adjusts total steps when current step exceeds estimate."""
        tool = DebugIssueTool()
        arguments = {
            "step": "Additional investigation needed",
            "step_number": 8,
            "total_steps": 5,  # Current step exceeds total
            "next_step_required": True,
            "findings": "More complexity discovered",
            "continuation_id": "debug-uuid-123",
        }

        # Mock conversation memory functions
        with patch("utils.conversation_memory.add_turn"):
            result = await tool.execute(arguments)

        # Should return a list with TextContent
        assert len(result) == 1
        response_text = result[0].text

        # Parse the JSON response
        import json

        parsed_response = json.loads(response_text)

        # Total steps should be adjusted to match current step
        assert parsed_response["total_steps"] == 8
        assert parsed_response["step_number"] == 8

    @pytest.mark.asyncio
    async def test_execute_error_handling(self):
        """Test execute method error handling."""
        tool = DebugIssueTool()
        # Invalid arguments - missing required fields
        arguments = {
            "step": "Invalid request"
            # Missing required fields
        }

        result = await tool.execute(arguments)

        # Should return error response
        assert len(result) == 1
        response_text = result[0].text

        # Parse the JSON response
        import json

        parsed_response = json.loads(response_text)

        assert parsed_response["status"] == "investigation_failed"
        assert "error" in parsed_response

    @pytest.mark.asyncio
    async def test_execute_with_string_instead_of_list_fields(self):
        """Test execute method handles string inputs for list fields gracefully."""
        tool = DebugIssueTool()
        arguments = {
            "step": "Investigating issue with string inputs",
            "step_number": 1,
            "total_steps": 3,
            "next_step_required": True,
            "findings": "Testing string input handling",
            # These should be lists but passing strings to test the fix
            "files_checked": "relevant_files",  # String instead of list
            "relevant_files": "some_string",  # String instead of list
            "relevant_methods": "another_string",  # String instead of list
        }

        # Mock conversation memory functions
        with patch("utils.conversation_memory.create_thread", return_value="debug-string-test"):
            with patch("utils.conversation_memory.add_turn"):
                # Should handle gracefully without crashing
                result = await tool.execute(arguments)

        # Should return a valid response
        assert len(result) == 1
        assert result[0].type == "text"

        # Parse the JSON response
        import json

        parsed_response = json.loads(result[0].text)

        # Should complete successfully with empty lists
        assert parsed_response["status"] == "pause_for_investigation"
        assert parsed_response["step_number"] == 1
        assert parsed_response["investigation_status"]["files_checked"] == 0  # Empty due to string conversion
        assert parsed_response["investigation_status"]["relevant_files"] == 0
        assert parsed_response["investigation_status"]["relevant_methods"] == 0

        # Verify internal state - should have empty sets, not individual characters
        assert tool.consolidated_findings["files_checked"] == set()
        assert tool.consolidated_findings["relevant_files"] == set()
        assert tool.consolidated_findings["relevant_methods"] == set()
        # Should NOT have individual characters like {'r', 'e', 'l', 'e', 'v', 'a', 'n', 't', '_', 'f', 'i', 'l', 'e', 's'}

    def test_prepare_investigation_summary(self):
        """Test investigation summary preparation."""
        tool = DebugIssueTool()
        tool.consolidated_findings = {
            "files_checked": {"file1.py", "file2.py", "file3.py"},
            "relevant_files": {"file1.py", "file2.py"},
            "relevant_methods": {"Class1.method1", "Class2.method2"},
            "findings": [
                "Step 1: Initial investigation findings",
                "Step 2: Discovered potential issue",
                "Step 3: Confirmed root cause",
            ],
            "hypotheses": [
                {"step": 1, "hypothesis": "Initial hypothesis", "confidence": "low"},
                {"step": 2, "hypothesis": "Refined hypothesis", "confidence": "medium"},
                {"step": 3, "hypothesis": "Final hypothesis", "confidence": "high"},
            ],
            "images": [],
        }

        summary = tool._prepare_investigation_summary()

        assert "SYSTEMATIC INVESTIGATION SUMMARY" in summary
        assert "Files examined: 3" in summary
        assert "Relevant files identified: 2" in summary
        assert "Methods/functions involved: 2" in summary
        assert "INVESTIGATION PROGRESSION" in summary
        assert "Step 1:" in summary
        assert "Step 2:" in summary
        assert "Step 3:" in summary
        assert "HYPOTHESIS EVOLUTION" in summary
        assert "low confidence" in summary
        assert "medium confidence" in summary
        assert "high confidence" in summary

    def test_extract_error_context(self):
        """Test error context extraction from findings."""
        tool = DebugIssueTool()
        tool.consolidated_findings = {
            "findings": [
                "Step 1: Found no issues initially",
                "Step 2: Discovered ERROR: Dictionary size changed during iteration",
                "Step 3: Stack trace shows RuntimeError in cleanup method",
                "Step 4: Exception occurs intermittently",
            ],
        }

        error_context = tool._extract_error_context()

        assert error_context is not None
        assert "ERROR: Dictionary size changed" in error_context
        assert "Stack trace shows RuntimeError" in error_context
        assert "Exception occurs intermittently" in error_context
        assert "Found no issues initially" not in error_context  # Should not include non-error findings

    def test_reprocess_consolidated_findings(self):
        """Test reprocessing of consolidated findings after backtracking."""
        tool = DebugIssueTool()
        tool.investigation_history = [
            {
                "step_number": 1,
                "findings": "Initial findings",
                "files_checked": ["file1.py"],
                "relevant_files": ["file1.py"],
                "relevant_methods": ["method1"],
                "hypothesis": "Initial hypothesis",
                "confidence": "low",
            },
            {
                "step_number": 2,
                "findings": "Second findings",
                "files_checked": ["file2.py"],
                "relevant_files": [],
                "relevant_methods": ["method2"],
            },
        ]

        tool._reprocess_consolidated_findings()

        assert tool.consolidated_findings["files_checked"] == {"file1.py", "file2.py"}
        assert tool.consolidated_findings["relevant_files"] == {"file1.py"}
        assert tool.consolidated_findings["relevant_methods"] == {"method1", "method2"}
        assert len(tool.consolidated_findings["findings"]) == 2
        assert len(tool.consolidated_findings["hypotheses"]) == 1
        assert tool.consolidated_findings["hypotheses"][0]["hypothesis"] == "Initial hypothesis"


# Integration test
class TestDebugToolIntegration:
    """Integration tests for debug tool."""

    def setup_method(self):
        """Set up model context for integration tests."""
        from utils.model_context import ModelContext

        self.tool = DebugIssueTool()
        self.tool._model_context = ModelContext("flash")  # Test model

    @pytest.mark.asyncio
    async def test_complete_investigation_flow(self):
        """Test complete investigation flow from start to expert analysis."""
        # Step 1: Initial investigation
        arguments = {
            "step": "Investigating memory leak in data processing pipeline",
            "step_number": 1,
            "total_steps": 3,
            "next_step_required": True,
            "findings": "High memory usage observed during batch processing",
            "files_checked": ["/processor/main.py"],
        }

        # Mock conversation memory and expert analysis
        with patch("utils.conversation_memory.create_thread", return_value="debug-flow-uuid"):
            with patch("utils.conversation_memory.add_turn"):
                result = await self.tool.execute(arguments)

        # Verify response structure
        # Debug tool now returns "pause_for_investigation" for ongoing steps
        assert len(result) == 1
        response_text = result[0].text

        # Parse the JSON response
        import json

        parsed_response = json.loads(response_text)

        assert parsed_response["status"] == "pause_for_investigation"
        assert parsed_response["step_number"] == 1
        assert parsed_response["continuation_id"] == "debug-flow-uuid"

    @pytest.mark.asyncio
    async def test_model_context_initialization_in_expert_analysis(self):
        """Real integration test that model context is properly initialized when expert analysis is called."""
        tool = DebugIssueTool()

        # Do NOT manually set up model context - let the method do it itself

        # Set up investigation state for final step
        tool.initial_issue = "Memory leak investigation"
        tool.investigation_history = [
            {
                "step_number": 1,
                "step": "Initial investigation",
                "findings": "Found memory issues",
                "files_checked": [],
            }
        ]
        tool.consolidated_findings = {
            "files_checked": set(),
            "relevant_files": set(),  # No files to avoid file I/O in this test
            "relevant_methods": {"process_data"},
            "findings": ["Step 1: Found memory issues"],
            "hypotheses": [],
            "images": [],
        }

        # Test the _call_expert_analysis method directly to verify ModelContext is properly handled
        # This is the real test - we're testing that the method can be called without the ModelContext error
        try:
            # Only mock the API call itself, not the model resolution infrastructure
            from unittest.mock import MagicMock

            mock_provider = MagicMock()
            mock_response = MagicMock()
            mock_response.content = '{"status": "analysis_complete", "summary": "Test completed"}'
            mock_provider.generate_content.return_value = mock_response

            # Use the real get_model_provider method but override its result to avoid API calls
            original_get_provider = tool.get_model_provider
            tool.get_model_provider = lambda model_name: mock_provider

            try:
                # Create mock arguments and request for model resolution
                from tools.debug import DebugInvestigationRequest

                mock_arguments = {"model": None}  # No model specified, should fall back to DEFAULT_MODEL
                mock_request = DebugInvestigationRequest(
                    step="Test step", step_number=1, total_steps=1, next_step_required=False, findings="Test findings"
                )

                # This should NOT raise a ModelContext error - the method should set up context itself
                result = await tool._call_expert_analysis(
                    initial_issue="Test issue",
                    investigation_summary="Test summary",
                    relevant_files=[],  # Empty to avoid file operations
                    relevant_methods=["test_method"],
                    final_hypothesis="Test hypothesis",
                    error_context=None,
                    images=[],
                    model_info=None,  # No pre-resolved model info
                    arguments=mock_arguments,  # Provide arguments for model resolution
                    request=mock_request,  # Provide request for model resolution
                )

                # Should complete without ModelContext error
                assert "error" not in result
                assert result["status"] == "analysis_complete"

                # Verify the model context was actually set up
                assert hasattr(tool, "_model_context")
                assert hasattr(tool, "_current_model_name")
                # Should use DEFAULT_MODEL when no model specified
                from config import DEFAULT_MODEL

                assert tool._current_model_name == DEFAULT_MODEL

            finally:
                # Restore original method
                tool.get_model_provider = original_get_provider

        except RuntimeError as e:
            if "ModelContext not initialized" in str(e):
                pytest.fail("ModelContext error still occurs - the fix is not working properly")
            else:
                raise  # Re-raise other RuntimeErrors