Re-imagined and re-written Debug tool. Instead of prompting Claude to perform initial analysis (and hoping it did), the tool now works through the debug process as an 'investigation', encouraging Claud to gather its 'findings' / 'hypothesis', stepping back as needed, collecting files it's gone through and keeping track of files relevant to the issue at hand. This structured investiion is then passed to the other model with far greater insight than the original debug tool ever could.

Improved prompts, guard against overengineering and flag that as an antipattern
This commit is contained in:
Fahad
2025-06-19 10:22:30 +04:00
parent 2641c78f8d
commit fccfb0d999
16 changed files with 2243 additions and 707 deletions

View File

@@ -0,0 +1,363 @@
"""
Comprehensive test demonstrating debug tool's self-investigation pattern
and continuation ID functionality working together end-to-end.
"""
import json
from unittest.mock import patch
import pytest
from tools.debug import DebugIssueTool
from utils.conversation_memory import (
ConversationTurn,
ThreadContext,
build_conversation_history,
get_conversation_file_list,
)
class TestDebugComprehensiveWorkflow:
"""Test the complete debug workflow from investigation to expert analysis to continuation."""
@pytest.mark.asyncio
async def test_full_debug_workflow_with_continuation(self):
"""Test complete debug workflow: investigation → expert analysis → continuation to another tool."""
tool = DebugIssueTool()
# Step 1: Initial investigation
with patch("utils.conversation_memory.create_thread", return_value="debug-workflow-uuid"):
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
result1 = await tool.execute(
{
"step": "Investigating memory leak in user session handler",
"step_number": 1,
"total_steps": 3,
"next_step_required": True,
"findings": "High memory usage detected in session handler",
"files_checked": ["/api/sessions.py"],
"images": ["/screenshots/memory_profile.png"],
}
)
# Verify step 1 response
assert len(result1) == 1
response1 = json.loads(result1[0].text)
assert response1["status"] == "investigation_in_progress"
assert response1["step_number"] == 1
assert response1["continuation_id"] == "debug-workflow-uuid"
# Verify conversation turn was added
assert mock_add_turn.called
call_args = mock_add_turn.call_args
if call_args:
# Check if args were passed positionally or as keywords
args = call_args.args if hasattr(call_args, "args") else call_args[0]
if args and len(args) >= 3:
assert args[0] == "debug-workflow-uuid"
assert args[1] == "assistant"
assert json.loads(args[2])["status"] == "investigation_in_progress"
# Step 2: Continue investigation with findings
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
result2 = await tool.execute(
{
"step": "Found circular references in session cache preventing garbage collection",
"step_number": 2,
"total_steps": 3,
"next_step_required": True,
"findings": "Session objects hold references to themselves through event handlers",
"files_checked": ["/api/sessions.py", "/api/cache.py"],
"relevant_files": ["/api/sessions.py"],
"relevant_methods": ["SessionHandler.__init__", "SessionHandler.add_event_listener"],
"hypothesis": "Circular references preventing garbage collection",
"confidence": "high",
"continuation_id": "debug-workflow-uuid",
}
)
# Verify step 2 response
response2 = json.loads(result2[0].text)
assert response2["status"] == "investigation_in_progress"
assert response2["step_number"] == 2
assert response2["investigation_status"]["files_checked"] == 2
assert response2["investigation_status"]["relevant_methods"] == 2
assert response2["investigation_status"]["current_confidence"] == "high"
# Step 3: Final investigation with expert analysis
# Mock the expert analysis response
mock_expert_response = {
"status": "analysis_complete",
"summary": "Memory leak caused by circular references in session event handlers",
"hypotheses": [
{
"name": "CIRCULAR_REFERENCE_LEAK",
"confidence": "High (95%)",
"evidence": ["Event handlers hold strong references", "No weak references used"],
"root_cause": "SessionHandler stores callbacks that reference the handler itself",
"potential_fixes": [
{
"description": "Use weakref for event handler callbacks",
"files_to_modify": ["/api/sessions.py"],
"complexity": "Low",
}
],
"minimal_fix": "Replace self references in callbacks with weakref.ref(self)",
}
],
"investigation_summary": {
"pattern": "Classic circular reference memory leak",
"severity": "High - causes unbounded memory growth",
"recommended_action": "Implement weakref solution immediately",
},
}
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
result3 = await tool.execute(
{
"step": "Investigation complete - confirmed circular reference memory leak pattern",
"step_number": 3,
"total_steps": 3,
"next_step_required": False, # Triggers expert analysis
"findings": "Circular references between SessionHandler and event callbacks prevent GC",
"files_checked": ["/api/sessions.py", "/api/cache.py"],
"relevant_files": ["/api/sessions.py"],
"relevant_methods": ["SessionHandler.__init__", "SessionHandler.add_event_listener"],
"hypothesis": "Circular references in event handler callbacks causing memory leak",
"confidence": "high",
"continuation_id": "debug-workflow-uuid",
"model": "flash",
}
)
# Verify final response with expert analysis
response3 = json.loads(result3[0].text)
assert response3["status"] == "calling_expert_analysis"
assert response3["investigation_complete"] is True
assert "expert_analysis" in response3
expert = response3["expert_analysis"]
assert expert["status"] == "analysis_complete"
assert "CIRCULAR_REFERENCE_LEAK" in expert["hypotheses"][0]["name"]
assert "weakref" in expert["hypotheses"][0]["minimal_fix"]
# Verify complete investigation summary
assert "complete_investigation" in response3
complete = response3["complete_investigation"]
assert complete["steps_taken"] == 3
assert "/api/sessions.py" in complete["files_examined"]
assert "SessionHandler.add_event_listener" in complete["relevant_methods"]
# Step 4: Test continuation to another tool (e.g., analyze)
# Create a mock thread context representing the debug conversation
debug_context = ThreadContext(
thread_id="debug-workflow-uuid",
created_at="2025-01-01T00:00:00Z",
last_updated_at="2025-01-01T00:10:00Z",
tool_name="debug",
turns=[
ConversationTurn(
role="user",
content="Step 1: Investigating memory leak",
timestamp="2025-01-01T00:01:00Z",
tool_name="debug",
files=["/api/sessions.py"],
images=["/screenshots/memory_profile.png"],
),
ConversationTurn(
role="assistant",
content=json.dumps(response1),
timestamp="2025-01-01T00:02:00Z",
tool_name="debug",
),
ConversationTurn(
role="user",
content="Step 2: Found circular references",
timestamp="2025-01-01T00:03:00Z",
tool_name="debug",
),
ConversationTurn(
role="assistant",
content=json.dumps(response2),
timestamp="2025-01-01T00:04:00Z",
tool_name="debug",
),
ConversationTurn(
role="user",
content="Step 3: Investigation complete",
timestamp="2025-01-01T00:05:00Z",
tool_name="debug",
),
ConversationTurn(
role="assistant",
content=json.dumps(response3),
timestamp="2025-01-01T00:06:00Z",
tool_name="debug",
),
],
initial_context={},
)
# Test that another tool can use the continuation
with patch("utils.conversation_memory.get_thread", return_value=debug_context):
# Mock file reading
def mock_read_file(file_path):
if file_path == "/api/sessions.py":
return "# SessionHandler with circular refs\nclass SessionHandler:\n pass", 20
elif file_path == "/screenshots/memory_profile.png":
# Images return empty string for content but 0 tokens
return "", 0
elif file_path == "/api/cache.py":
return "# Cache module", 5
return "", 0
# Build conversation history for another tool
from utils.model_context import ModelContext
model_context = ModelContext("flash")
history, tokens = build_conversation_history(debug_context, model_context, read_files_func=mock_read_file)
# Verify history contains all debug information
assert "=== CONVERSATION HISTORY (CONTINUATION) ===" in history
assert "Thread: debug-workflow-uuid" in history
assert "Tool: debug" in history
# Check investigation progression
assert "Step 1: Investigating memory leak" in history
assert "Step 2: Found circular references" in history
assert "Step 3: Investigation complete" in history
# Check expert analysis is included
assert "CIRCULAR_REFERENCE_LEAK" in history
assert "weakref" in history
assert "memory leak" in history
# Check files are referenced in conversation history
assert "/api/sessions.py" in history
# File content would be in referenced files section if the files were readable
# In our test they're not real files so they won't be embedded
# But the expert analysis content should be there
assert "Memory leak caused by circular references" in history
# Verify file list includes all files from investigation
file_list = get_conversation_file_list(debug_context)
assert "/api/sessions.py" in file_list
@pytest.mark.asyncio
async def test_debug_investigation_state_machine(self):
"""Test the debug tool's investigation state machine behavior."""
tool = DebugIssueTool()
# Test state transitions
states = []
# Initial state
with patch("utils.conversation_memory.create_thread", return_value="state-test-uuid"):
with patch("utils.conversation_memory.add_turn"):
result = await tool.execute(
{
"step": "Starting investigation",
"step_number": 1,
"total_steps": 2,
"next_step_required": True,
"findings": "Initial findings",
}
)
states.append(json.loads(result[0].text))
# Verify initial state
assert states[0]["status"] == "investigation_in_progress"
assert states[0]["step_number"] == 1
assert states[0]["next_step_required"] is True
# Final state (triggers expert analysis)
mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"}
with patch("utils.conversation_memory.add_turn"):
with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
result = await tool.execute(
{
"step": "Final findings",
"step_number": 2,
"total_steps": 2,
"next_step_required": False,
"findings": "Complete findings",
"continuation_id": "state-test-uuid",
"model": "flash",
}
)
states.append(json.loads(result[0].text))
# Verify final state
assert states[1]["status"] == "calling_expert_analysis"
assert states[1]["investigation_complete"] is True
assert "expert_analysis" in states[1]
@pytest.mark.asyncio
async def test_debug_backtracking_preserves_continuation(self):
"""Test that backtracking preserves continuation ID and investigation state."""
tool = DebugIssueTool()
# Start investigation
with patch("utils.conversation_memory.create_thread", return_value="backtrack-test-uuid"):
with patch("utils.conversation_memory.add_turn"):
result1 = await tool.execute(
{
"step": "Initial hypothesis",
"step_number": 1,
"total_steps": 3,
"next_step_required": True,
"findings": "Initial findings",
}
)
response1 = json.loads(result1[0].text)
continuation_id = response1["continuation_id"]
# Step 2 - wrong direction
with patch("utils.conversation_memory.add_turn"):
await tool.execute(
{
"step": "Wrong hypothesis",
"step_number": 2,
"total_steps": 3,
"next_step_required": True,
"findings": "Dead end",
"hypothesis": "Wrong initial hypothesis",
"confidence": "low",
"continuation_id": continuation_id,
}
)
# Backtrack from step 2
with patch("utils.conversation_memory.add_turn"):
result3 = await tool.execute(
{
"step": "Backtracking - new hypothesis",
"step_number": 3,
"total_steps": 4, # Adjusted total
"next_step_required": True,
"findings": "New direction",
"hypothesis": "New hypothesis after backtracking",
"confidence": "medium",
"backtrack_from_step": 2,
"continuation_id": continuation_id,
}
)
response3 = json.loads(result3[0].text)
# Verify continuation preserved through backtracking
assert response3["continuation_id"] == continuation_id
assert response3["step_number"] == 3
assert response3["total_steps"] == 4
# Verify investigation status after backtracking
# When we backtrack, investigation continues
assert response3["investigation_status"]["files_checked"] == 0 # Reset after backtrack
assert response3["investigation_status"]["current_confidence"] == "medium"
# The key thing is the continuation ID is preserved
# and we've adjusted our approach (total_steps increased)