369 lines
16 KiB
Python
369 lines
16 KiB
Python
"""
|
|
Comprehensive test demonstrating debug tool's self-investigation pattern
|
|
and continuation ID functionality working together end-to-end.
|
|
"""
|
|
|
|
import json
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from tools.debug import DebugIssueTool
|
|
from utils.conversation_memory import (
|
|
ConversationTurn,
|
|
ThreadContext,
|
|
build_conversation_history,
|
|
get_conversation_file_list,
|
|
)
|
|
|
|
|
|
class TestDebugComprehensiveWorkflow:
|
|
"""Test the complete debug workflow from investigation to expert analysis to continuation."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_full_debug_workflow_with_continuation(self):
|
|
"""Test complete debug workflow: investigation → expert analysis → continuation to another tool."""
|
|
tool = DebugIssueTool()
|
|
|
|
# Step 1: Initial investigation
|
|
with patch("utils.conversation_memory.create_thread", return_value="debug-workflow-uuid"):
|
|
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
|
|
result1 = await tool.execute(
|
|
{
|
|
"step": "Investigating memory leak in user session handler",
|
|
"step_number": 1,
|
|
"total_steps": 3,
|
|
"next_step_required": True,
|
|
"findings": "High memory usage detected in session handler",
|
|
"files_checked": ["/api/sessions.py"],
|
|
"images": ["/screenshots/memory_profile.png"],
|
|
}
|
|
)
|
|
|
|
# Verify step 1 response
|
|
assert len(result1) == 1
|
|
response1 = json.loads(result1[0].text)
|
|
assert response1["status"] == "pause_for_investigation"
|
|
assert response1["step_number"] == 1
|
|
assert response1["continuation_id"] == "debug-workflow-uuid"
|
|
|
|
# Verify conversation turn was added
|
|
assert mock_add_turn.called
|
|
call_args = mock_add_turn.call_args
|
|
if call_args:
|
|
# Check if args were passed positionally or as keywords
|
|
args = call_args.args if hasattr(call_args, "args") else call_args[0]
|
|
if args and len(args) >= 3:
|
|
assert args[0] == "debug-workflow-uuid"
|
|
assert args[1] == "assistant"
|
|
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
|
assert json.loads(args[2])["status"] == "pause_for_investigation"
|
|
|
|
# Step 2: Continue investigation with findings
|
|
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
|
|
result2 = await tool.execute(
|
|
{
|
|
"step": "Found circular references in session cache preventing garbage collection",
|
|
"step_number": 2,
|
|
"total_steps": 3,
|
|
"next_step_required": True,
|
|
"findings": "Session objects hold references to themselves through event handlers",
|
|
"files_checked": ["/api/sessions.py", "/api/cache.py"],
|
|
"relevant_files": ["/api/sessions.py"],
|
|
"relevant_methods": ["SessionHandler.__init__", "SessionHandler.add_event_listener"],
|
|
"hypothesis": "Circular references preventing garbage collection",
|
|
"confidence": "high",
|
|
"continuation_id": "debug-workflow-uuid",
|
|
}
|
|
)
|
|
|
|
# Verify step 2 response
|
|
response2 = json.loads(result2[0].text)
|
|
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
|
assert response2["status"] == "pause_for_investigation"
|
|
assert response2["step_number"] == 2
|
|
assert response2["investigation_status"]["files_checked"] == 2
|
|
assert response2["investigation_status"]["relevant_methods"] == 2
|
|
assert response2["investigation_status"]["current_confidence"] == "high"
|
|
|
|
# Step 3: Final investigation with expert analysis
|
|
# Mock the expert analysis response
|
|
mock_expert_response = {
|
|
"status": "analysis_complete",
|
|
"summary": "Memory leak caused by circular references in session event handlers",
|
|
"hypotheses": [
|
|
{
|
|
"name": "CIRCULAR_REFERENCE_LEAK",
|
|
"confidence": "High (95%)",
|
|
"evidence": ["Event handlers hold strong references", "No weak references used"],
|
|
"root_cause": "SessionHandler stores callbacks that reference the handler itself",
|
|
"potential_fixes": [
|
|
{
|
|
"description": "Use weakref for event handler callbacks",
|
|
"files_to_modify": ["/api/sessions.py"],
|
|
"complexity": "Low",
|
|
}
|
|
],
|
|
"minimal_fix": "Replace self references in callbacks with weakref.ref(self)",
|
|
}
|
|
],
|
|
"investigation_summary": {
|
|
"pattern": "Classic circular reference memory leak",
|
|
"severity": "High - causes unbounded memory growth",
|
|
"recommended_action": "Implement weakref solution immediately",
|
|
},
|
|
}
|
|
|
|
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
|
|
with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
|
|
result3 = await tool.execute(
|
|
{
|
|
"step": "Investigation complete - confirmed circular reference memory leak pattern",
|
|
"step_number": 3,
|
|
"total_steps": 3,
|
|
"next_step_required": False, # Triggers expert analysis
|
|
"findings": "Circular references between SessionHandler and event callbacks prevent GC",
|
|
"files_checked": ["/api/sessions.py", "/api/cache.py"],
|
|
"relevant_files": ["/api/sessions.py"],
|
|
"relevant_methods": ["SessionHandler.__init__", "SessionHandler.add_event_listener"],
|
|
"hypothesis": "Circular references in event handler callbacks causing memory leak",
|
|
"confidence": "high",
|
|
"continuation_id": "debug-workflow-uuid",
|
|
"model": "flash",
|
|
}
|
|
)
|
|
|
|
# Verify final response with expert analysis
|
|
response3 = json.loads(result3[0].text)
|
|
assert response3["status"] == "calling_expert_analysis"
|
|
assert response3["investigation_complete"] is True
|
|
assert "expert_analysis" in response3
|
|
|
|
expert = response3["expert_analysis"]
|
|
assert expert["status"] == "analysis_complete"
|
|
assert "CIRCULAR_REFERENCE_LEAK" in expert["hypotheses"][0]["name"]
|
|
assert "weakref" in expert["hypotheses"][0]["minimal_fix"]
|
|
|
|
# Verify complete investigation summary
|
|
assert "complete_investigation" in response3
|
|
complete = response3["complete_investigation"]
|
|
assert complete["steps_taken"] == 3
|
|
assert "/api/sessions.py" in complete["files_examined"]
|
|
assert "SessionHandler.add_event_listener" in complete["relevant_methods"]
|
|
|
|
# Step 4: Test continuation to another tool (e.g., analyze)
|
|
# Create a mock thread context representing the debug conversation
|
|
debug_context = ThreadContext(
|
|
thread_id="debug-workflow-uuid",
|
|
created_at="2025-01-01T00:00:00Z",
|
|
last_updated_at="2025-01-01T00:10:00Z",
|
|
tool_name="debug",
|
|
turns=[
|
|
ConversationTurn(
|
|
role="user",
|
|
content="Step 1: Investigating memory leak",
|
|
timestamp="2025-01-01T00:01:00Z",
|
|
tool_name="debug",
|
|
files=["/api/sessions.py"],
|
|
images=["/screenshots/memory_profile.png"],
|
|
),
|
|
ConversationTurn(
|
|
role="assistant",
|
|
content=json.dumps(response1),
|
|
timestamp="2025-01-01T00:02:00Z",
|
|
tool_name="debug",
|
|
),
|
|
ConversationTurn(
|
|
role="user",
|
|
content="Step 2: Found circular references",
|
|
timestamp="2025-01-01T00:03:00Z",
|
|
tool_name="debug",
|
|
),
|
|
ConversationTurn(
|
|
role="assistant",
|
|
content=json.dumps(response2),
|
|
timestamp="2025-01-01T00:04:00Z",
|
|
tool_name="debug",
|
|
),
|
|
ConversationTurn(
|
|
role="user",
|
|
content="Step 3: Investigation complete",
|
|
timestamp="2025-01-01T00:05:00Z",
|
|
tool_name="debug",
|
|
),
|
|
ConversationTurn(
|
|
role="assistant",
|
|
content=json.dumps(response3),
|
|
timestamp="2025-01-01T00:06:00Z",
|
|
tool_name="debug",
|
|
),
|
|
],
|
|
initial_context={},
|
|
)
|
|
|
|
# Test that another tool can use the continuation
|
|
with patch("utils.conversation_memory.get_thread", return_value=debug_context):
|
|
# Mock file reading
|
|
def mock_read_file(file_path):
|
|
if file_path == "/api/sessions.py":
|
|
return "# SessionHandler with circular refs\nclass SessionHandler:\n pass", 20
|
|
elif file_path == "/screenshots/memory_profile.png":
|
|
# Images return empty string for content but 0 tokens
|
|
return "", 0
|
|
elif file_path == "/api/cache.py":
|
|
return "# Cache module", 5
|
|
return "", 0
|
|
|
|
# Build conversation history for another tool
|
|
from utils.model_context import ModelContext
|
|
|
|
model_context = ModelContext("flash")
|
|
history, tokens = build_conversation_history(debug_context, model_context, read_files_func=mock_read_file)
|
|
|
|
# Verify history contains all debug information
|
|
assert "=== CONVERSATION HISTORY (CONTINUATION) ===" in history
|
|
assert "Thread: debug-workflow-uuid" in history
|
|
assert "Tool: debug" in history
|
|
|
|
# Check investigation progression
|
|
assert "Step 1: Investigating memory leak" in history
|
|
assert "Step 2: Found circular references" in history
|
|
assert "Step 3: Investigation complete" in history
|
|
|
|
# Check expert analysis is included
|
|
assert "CIRCULAR_REFERENCE_LEAK" in history
|
|
assert "weakref" in history
|
|
assert "memory leak" in history
|
|
|
|
# Check files are referenced in conversation history
|
|
assert "/api/sessions.py" in history
|
|
|
|
# File content would be in referenced files section if the files were readable
|
|
# In our test they're not real files so they won't be embedded
|
|
# But the expert analysis content should be there
|
|
assert "Memory leak caused by circular references" in history
|
|
|
|
# Verify file list includes all files from investigation
|
|
file_list = get_conversation_file_list(debug_context)
|
|
assert "/api/sessions.py" in file_list
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_debug_investigation_state_machine(self):
|
|
"""Test the debug tool's investigation state machine behavior."""
|
|
tool = DebugIssueTool()
|
|
|
|
# Test state transitions
|
|
states = []
|
|
|
|
# Initial state
|
|
with patch("utils.conversation_memory.create_thread", return_value="state-test-uuid"):
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result = await tool.execute(
|
|
{
|
|
"step": "Starting investigation",
|
|
"step_number": 1,
|
|
"total_steps": 2,
|
|
"next_step_required": True,
|
|
"findings": "Initial findings",
|
|
}
|
|
)
|
|
states.append(json.loads(result[0].text))
|
|
|
|
# Verify initial state
|
|
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
|
assert states[0]["status"] == "pause_for_investigation"
|
|
assert states[0]["step_number"] == 1
|
|
assert states[0]["next_step_required"] is True
|
|
assert states[0]["investigation_required"] is True
|
|
assert "required_actions" in states[0]
|
|
|
|
# Final state (triggers expert analysis)
|
|
mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"}
|
|
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
|
|
result = await tool.execute(
|
|
{
|
|
"step": "Final findings",
|
|
"step_number": 2,
|
|
"total_steps": 2,
|
|
"next_step_required": False,
|
|
"findings": "Complete findings",
|
|
"continuation_id": "state-test-uuid",
|
|
"model": "flash",
|
|
}
|
|
)
|
|
states.append(json.loads(result[0].text))
|
|
|
|
# Verify final state
|
|
assert states[1]["status"] == "calling_expert_analysis"
|
|
assert states[1]["investigation_complete"] is True
|
|
assert "expert_analysis" in states[1]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_debug_backtracking_preserves_continuation(self):
|
|
"""Test that backtracking preserves continuation ID and investigation state."""
|
|
tool = DebugIssueTool()
|
|
|
|
# Start investigation
|
|
with patch("utils.conversation_memory.create_thread", return_value="backtrack-test-uuid"):
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result1 = await tool.execute(
|
|
{
|
|
"step": "Initial hypothesis",
|
|
"step_number": 1,
|
|
"total_steps": 3,
|
|
"next_step_required": True,
|
|
"findings": "Initial findings",
|
|
}
|
|
)
|
|
|
|
response1 = json.loads(result1[0].text)
|
|
continuation_id = response1["continuation_id"]
|
|
|
|
# Step 2 - wrong direction
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
await tool.execute(
|
|
{
|
|
"step": "Wrong hypothesis",
|
|
"step_number": 2,
|
|
"total_steps": 3,
|
|
"next_step_required": True,
|
|
"findings": "Dead end",
|
|
"hypothesis": "Wrong initial hypothesis",
|
|
"confidence": "low",
|
|
"continuation_id": continuation_id,
|
|
}
|
|
)
|
|
|
|
# Backtrack from step 2
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result3 = await tool.execute(
|
|
{
|
|
"step": "Backtracking - new hypothesis",
|
|
"step_number": 3,
|
|
"total_steps": 4, # Adjusted total
|
|
"next_step_required": True,
|
|
"findings": "New direction",
|
|
"hypothesis": "New hypothesis after backtracking",
|
|
"confidence": "medium",
|
|
"backtrack_from_step": 2,
|
|
"continuation_id": continuation_id,
|
|
}
|
|
)
|
|
|
|
response3 = json.loads(result3[0].text)
|
|
|
|
# Verify continuation preserved through backtracking
|
|
assert response3["continuation_id"] == continuation_id
|
|
assert response3["step_number"] == 3
|
|
assert response3["total_steps"] == 4
|
|
|
|
# Verify investigation status after backtracking
|
|
# When we backtrack, investigation continues
|
|
assert response3["investigation_status"]["files_checked"] == 0 # Reset after backtrack
|
|
assert response3["investigation_status"]["current_confidence"] == "medium"
|
|
|
|
# The key thing is the continuation ID is preserved
|
|
# and we've adjusted our approach (total_steps increased)
|