my-pal-mcp-server/tests/test_debug_comprehensive_workflow.py

"""
Comprehensive test demonstrating debug tool's self-investigation pattern
and continuation ID functionality working together end-to-end.
"""

import json
from unittest.mock import patch

import pytest

from tools.debug import DebugIssueTool
from utils.conversation_memory import (
    ConversationTurn,
    ThreadContext,
    build_conversation_history,
    get_conversation_file_list,
)


class TestDebugComprehensiveWorkflow:
    """Test the complete debug workflow from investigation to expert analysis to continuation."""

    @pytest.mark.asyncio
    async def test_full_debug_workflow_with_continuation(self):
        """Test complete debug workflow: investigation → expert analysis → continuation to another tool."""
        tool = DebugIssueTool()

        # Step 1: Initial investigation
        with patch("utils.conversation_memory.create_thread", return_value="debug-workflow-uuid"):
            with patch("utils.conversation_memory.add_turn") as mock_add_turn:
                result1 = await tool.execute(
                    {
                        "step": "Investigating memory leak in user session handler",
                        "step_number": 1,
                        "total_steps": 3,
                        "next_step_required": True,
                        "findings": "High memory usage detected in session handler",
                        "files_checked": ["/api/sessions.py"],
                        "images": ["/screenshots/memory_profile.png"],
                    }
                )

        # Verify step 1 response
        assert len(result1) == 1
        response1 = json.loads(result1[0].text)
        assert response1["status"] == "pause_for_investigation"
        assert response1["step_number"] == 1
        assert response1["continuation_id"] == "debug-workflow-uuid"

        # Verify conversation turn was added
        assert mock_add_turn.called
        call_args = mock_add_turn.call_args
        if call_args:
            # Check if args were passed positionally or as keywords
            args = call_args.args if hasattr(call_args, "args") else call_args[0]
            if args and len(args) >= 3:
                assert args[0] == "debug-workflow-uuid"
                assert args[1] == "assistant"
                # Debug tool now returns "pause_for_investigation" for ongoing steps
                assert json.loads(args[2])["status"] == "pause_for_investigation"

        # Step 2: Continue investigation with findings
        with patch("utils.conversation_memory.add_turn") as mock_add_turn:
            result2 = await tool.execute(
                {
                    "step": "Found circular references in session cache preventing garbage collection",
                    "step_number": 2,
                    "total_steps": 3,
                    "next_step_required": True,
                    "findings": "Session objects hold references to themselves through event handlers",
                    "files_checked": ["/api/sessions.py", "/api/cache.py"],
                    "relevant_files": ["/api/sessions.py"],
                    "relevant_methods": ["SessionHandler.__init__", "SessionHandler.add_event_listener"],
                    "hypothesis": "Circular references preventing garbage collection",
                    "confidence": "high",
                    "continuation_id": "debug-workflow-uuid",
                }
            )

        # Verify step 2 response
        response2 = json.loads(result2[0].text)
        # Debug tool now returns "pause_for_investigation" for ongoing steps
        assert response2["status"] == "pause_for_investigation"
        assert response2["step_number"] == 2
        assert response2["investigation_status"]["files_checked"] == 2
        assert response2["investigation_status"]["relevant_methods"] == 2
        assert response2["investigation_status"]["current_confidence"] == "high"

        # Step 3: Final investigation with expert analysis
        # Mock the expert analysis response
        mock_expert_response = {
            "status": "analysis_complete",
            "summary": "Memory leak caused by circular references in session event handlers",
            "hypotheses": [
                {
                    "name": "CIRCULAR_REFERENCE_LEAK",
                    "confidence": "High (95%)",
                    "evidence": ["Event handlers hold strong references", "No weak references used"],
                    "root_cause": "SessionHandler stores callbacks that reference the handler itself",
                    "potential_fixes": [
                        {
                            "description": "Use weakref for event handler callbacks",
                            "files_to_modify": ["/api/sessions.py"],
                            "complexity": "Low",
                        }
                    ],
                    "minimal_fix": "Replace self references in callbacks with weakref.ref(self)",
                }
            ],
            "investigation_summary": {
                "pattern": "Classic circular reference memory leak",
                "severity": "High - causes unbounded memory growth",
                "recommended_action": "Implement weakref solution immediately",
            },
        }

        with patch("utils.conversation_memory.add_turn") as mock_add_turn:
            with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
                result3 = await tool.execute(
                    {
                        "step": "Investigation complete - confirmed circular reference memory leak pattern",
                        "step_number": 3,
                        "total_steps": 3,
                        "next_step_required": False,  # Triggers expert analysis
                        "findings": "Circular references between SessionHandler and event callbacks prevent GC",
                        "files_checked": ["/api/sessions.py", "/api/cache.py"],
                        "relevant_files": ["/api/sessions.py"],
                        "relevant_methods": ["SessionHandler.__init__", "SessionHandler.add_event_listener"],
                        "hypothesis": "Circular references in event handler callbacks causing memory leak",
                        "confidence": "high",
                        "continuation_id": "debug-workflow-uuid",
                        "model": "flash",
                    }
                )

        # Verify final response with expert analysis
        response3 = json.loads(result3[0].text)
        assert response3["status"] == "calling_expert_analysis"
        assert response3["investigation_complete"] is True
        assert "expert_analysis" in response3

        expert = response3["expert_analysis"]
        assert expert["status"] == "analysis_complete"
        assert "CIRCULAR_REFERENCE_LEAK" in expert["hypotheses"][0]["name"]
        assert "weakref" in expert["hypotheses"][0]["minimal_fix"]

        # Verify complete investigation summary
        assert "complete_investigation" in response3
        complete = response3["complete_investigation"]
        assert complete["steps_taken"] == 3
        assert "/api/sessions.py" in complete["files_examined"]
        assert "SessionHandler.add_event_listener" in complete["relevant_methods"]

        # Step 4: Test continuation to another tool (e.g., analyze)
        # Create a mock thread context representing the debug conversation
        debug_context = ThreadContext(
            thread_id="debug-workflow-uuid",
            created_at="2025-01-01T00:00:00Z",
            last_updated_at="2025-01-01T00:10:00Z",
            tool_name="debug",
            turns=[
                ConversationTurn(
                    role="user",
                    content="Step 1: Investigating memory leak",
                    timestamp="2025-01-01T00:01:00Z",
                    tool_name="debug",
                    files=["/api/sessions.py"],
                    images=["/screenshots/memory_profile.png"],
                ),
                ConversationTurn(
                    role="assistant",
                    content=json.dumps(response1),
                    timestamp="2025-01-01T00:02:00Z",
                    tool_name="debug",
                ),
                ConversationTurn(
                    role="user",
                    content="Step 2: Found circular references",
                    timestamp="2025-01-01T00:03:00Z",
                    tool_name="debug",
                ),
                ConversationTurn(
                    role="assistant",
                    content=json.dumps(response2),
                    timestamp="2025-01-01T00:04:00Z",
                    tool_name="debug",
                ),
                ConversationTurn(
                    role="user",
                    content="Step 3: Investigation complete",
                    timestamp="2025-01-01T00:05:00Z",
                    tool_name="debug",
                ),
                ConversationTurn(
                    role="assistant",
                    content=json.dumps(response3),
                    timestamp="2025-01-01T00:06:00Z",
                    tool_name="debug",
                ),
            ],
            initial_context={},
        )

        # Test that another tool can use the continuation
        with patch("utils.conversation_memory.get_thread", return_value=debug_context):
            # Mock file reading
            def mock_read_file(file_path):
                if file_path == "/api/sessions.py":
                    return "# SessionHandler with circular refs\nclass SessionHandler:\n    pass", 20
                elif file_path == "/screenshots/memory_profile.png":
                    # Images return empty string for content but 0 tokens
                    return "", 0
                elif file_path == "/api/cache.py":
                    return "# Cache module", 5
                return "", 0

            # Build conversation history for another tool
            from utils.model_context import ModelContext

            model_context = ModelContext("flash")
            history, tokens = build_conversation_history(debug_context, model_context, read_files_func=mock_read_file)

            # Verify history contains all debug information
            assert "=== CONVERSATION HISTORY (CONTINUATION) ===" in history
            assert "Thread: debug-workflow-uuid" in history
            assert "Tool: debug" in history

            # Check investigation progression
            assert "Step 1: Investigating memory leak" in history
            assert "Step 2: Found circular references" in history
            assert "Step 3: Investigation complete" in history

            # Check expert analysis is included
            assert "CIRCULAR_REFERENCE_LEAK" in history
            assert "weakref" in history
            assert "memory leak" in history

            # Check files are referenced in conversation history
            assert "/api/sessions.py" in history

            # File content would be in referenced files section if the files were readable
            # In our test they're not real files so they won't be embedded
            # But the expert analysis content should be there
            assert "Memory leak caused by circular references" in history

            # Verify file list includes all files from investigation
            file_list = get_conversation_file_list(debug_context)
            assert "/api/sessions.py" in file_list

    @pytest.mark.asyncio
    async def test_debug_investigation_state_machine(self):
        """Test the debug tool's investigation state machine behavior."""
        tool = DebugIssueTool()

        # Test state transitions
        states = []

        # Initial state
        with patch("utils.conversation_memory.create_thread", return_value="state-test-uuid"):
            with patch("utils.conversation_memory.add_turn"):
                result = await tool.execute(
                    {
                        "step": "Starting investigation",
                        "step_number": 1,
                        "total_steps": 2,
                        "next_step_required": True,
                        "findings": "Initial findings",
                    }
                )
                states.append(json.loads(result[0].text))

        # Verify initial state
        # Debug tool now returns "pause_for_investigation" for ongoing steps
        assert states[0]["status"] == "pause_for_investigation"
        assert states[0]["step_number"] == 1
        assert states[0]["next_step_required"] is True
        assert states[0]["investigation_required"] is True
        assert "required_actions" in states[0]

        # Final state (triggers expert analysis)
        mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"}

        with patch("utils.conversation_memory.add_turn"):
            with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
                result = await tool.execute(
                    {
                        "step": "Final findings",
                        "step_number": 2,
                        "total_steps": 2,
                        "next_step_required": False,
                        "findings": "Complete findings",
                        "continuation_id": "state-test-uuid",
                        "model": "flash",
                    }
                )
                states.append(json.loads(result[0].text))

        # Verify final state
        assert states[1]["status"] == "calling_expert_analysis"
        assert states[1]["investigation_complete"] is True
        assert "expert_analysis" in states[1]

    @pytest.mark.asyncio
    async def test_debug_backtracking_preserves_continuation(self):
        """Test that backtracking preserves continuation ID and investigation state."""
        tool = DebugIssueTool()

        # Start investigation
        with patch("utils.conversation_memory.create_thread", return_value="backtrack-test-uuid"):
            with patch("utils.conversation_memory.add_turn"):
                result1 = await tool.execute(
                    {
                        "step": "Initial hypothesis",
                        "step_number": 1,
                        "total_steps": 3,
                        "next_step_required": True,
                        "findings": "Initial findings",
                    }
                )

        response1 = json.loads(result1[0].text)
        continuation_id = response1["continuation_id"]

        # Step 2 - wrong direction
        with patch("utils.conversation_memory.add_turn"):
            await tool.execute(
                {
                    "step": "Wrong hypothesis",
                    "step_number": 2,
                    "total_steps": 3,
                    "next_step_required": True,
                    "findings": "Dead end",
                    "hypothesis": "Wrong initial hypothesis",
                    "confidence": "low",
                    "continuation_id": continuation_id,
                }
            )

        # Backtrack from step 2
        with patch("utils.conversation_memory.add_turn"):
            result3 = await tool.execute(
                {
                    "step": "Backtracking - new hypothesis",
                    "step_number": 3,
                    "total_steps": 4,  # Adjusted total
                    "next_step_required": True,
                    "findings": "New direction",
                    "hypothesis": "New hypothesis after backtracking",
                    "confidence": "medium",
                    "backtrack_from_step": 2,
                    "continuation_id": continuation_id,
                }
            )

        response3 = json.loads(result3[0].text)

        # Verify continuation preserved through backtracking
        assert response3["continuation_id"] == continuation_id
        assert response3["step_number"] == 3
        assert response3["total_steps"] == 4

        # Verify investigation status after backtracking
        # When we backtrack, investigation continues
        assert response3["investigation_status"]["files_checked"] == 0  # Reset after backtrack
        assert response3["investigation_status"]["current_confidence"] == "medium"

        # The key thing is the continuation ID is preserved
        # and we've adjusted our approach (total_steps increased)