515 lines
21 KiB
Python
515 lines
21 KiB
Python
"""
|
|
Tests for the debug tool.
|
|
"""
|
|
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from tools.debug import DebugInvestigationRequest, DebugIssueTool
|
|
from tools.models import ToolModelCategory
|
|
|
|
|
|
class TestDebugTool:
|
|
"""Test suite for DebugIssueTool."""
|
|
|
|
def test_tool_metadata(self):
|
|
"""Test basic tool metadata and configuration."""
|
|
tool = DebugIssueTool()
|
|
|
|
assert tool.get_name() == "debug"
|
|
assert "DEBUG & ROOT CAUSE ANALYSIS" in tool.get_description()
|
|
assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL
|
|
assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
|
|
assert tool.requires_model() is False # Since it manages its own model calls
|
|
|
|
def test_request_validation(self):
|
|
"""Test Pydantic request model validation."""
|
|
# Valid investigation step request
|
|
step_request = DebugInvestigationRequest(
|
|
step="Investigating null pointer exception in UserService",
|
|
step_number=1,
|
|
total_steps=5,
|
|
next_step_required=True,
|
|
findings="Found that UserService.getUser() is called with null ID",
|
|
)
|
|
assert step_request.step == "Investigating null pointer exception in UserService"
|
|
assert step_request.step_number == 1
|
|
assert step_request.next_step_required is True
|
|
assert step_request.confidence == "low" # default
|
|
|
|
# Request with optional fields
|
|
detailed_request = DebugInvestigationRequest(
|
|
step="Deep dive into getUser method implementation",
|
|
step_number=2,
|
|
total_steps=5,
|
|
next_step_required=True,
|
|
findings="Method doesn't validate input parameters",
|
|
files_checked=["/src/UserService.java", "/src/UserController.java"],
|
|
relevant_files=["/src/UserService.java"],
|
|
relevant_methods=["UserService.getUser", "UserController.handleRequest"],
|
|
hypothesis="Null ID passed from controller without validation",
|
|
confidence="medium",
|
|
)
|
|
assert len(detailed_request.files_checked) == 2
|
|
assert len(detailed_request.relevant_files) == 1
|
|
assert detailed_request.confidence == "medium"
|
|
|
|
# Missing required fields should fail
|
|
with pytest.raises(ValueError):
|
|
DebugInvestigationRequest() # Missing all required fields
|
|
|
|
with pytest.raises(ValueError):
|
|
DebugInvestigationRequest(step="test") # Missing other required fields
|
|
|
|
def test_input_schema_generation(self):
|
|
"""Test JSON schema generation for MCP client."""
|
|
tool = DebugIssueTool()
|
|
schema = tool.get_input_schema()
|
|
|
|
assert schema["type"] == "object"
|
|
# Investigation fields
|
|
assert "step" in schema["properties"]
|
|
assert "step_number" in schema["properties"]
|
|
assert "total_steps" in schema["properties"]
|
|
assert "next_step_required" in schema["properties"]
|
|
assert "findings" in schema["properties"]
|
|
assert "files_checked" in schema["properties"]
|
|
assert "relevant_files" in schema["properties"]
|
|
assert "relevant_methods" in schema["properties"]
|
|
assert "hypothesis" in schema["properties"]
|
|
assert "confidence" in schema["properties"]
|
|
assert "backtrack_from_step" in schema["properties"]
|
|
assert "continuation_id" in schema["properties"]
|
|
assert "images" in schema["properties"] # Now supported for visual debugging
|
|
|
|
# Check excluded fields are NOT present
|
|
assert "model" not in schema["properties"]
|
|
assert "temperature" not in schema["properties"]
|
|
assert "thinking_mode" not in schema["properties"]
|
|
assert "use_websearch" not in schema["properties"]
|
|
|
|
# Check required fields
|
|
assert "step" in schema["required"]
|
|
assert "step_number" in schema["required"]
|
|
assert "total_steps" in schema["required"]
|
|
assert "next_step_required" in schema["required"]
|
|
assert "findings" in schema["required"]
|
|
|
|
def test_model_category_for_debugging(self):
|
|
"""Test that debug uses extended reasoning category."""
|
|
tool = DebugIssueTool()
|
|
category = tool.get_model_category()
|
|
|
|
# Debugging needs deep thinking
|
|
assert category == ToolModelCategory.EXTENDED_REASONING
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_first_investigation_step(self):
|
|
"""Test execute method for first investigation step."""
|
|
tool = DebugIssueTool()
|
|
arguments = {
|
|
"step": "Investigating intermittent session validation failures in production",
|
|
"step_number": 1,
|
|
"total_steps": 5,
|
|
"next_step_required": True,
|
|
"findings": "Users report random session invalidation, occurs more during high traffic",
|
|
"files_checked": ["/api/session_manager.py"],
|
|
"relevant_files": ["/api/session_manager.py"],
|
|
}
|
|
|
|
# Mock conversation memory functions
|
|
with patch("utils.conversation_memory.create_thread", return_value="debug-uuid-123"):
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result = await tool.execute(arguments)
|
|
|
|
# Should return a list with TextContent
|
|
assert len(result) == 1
|
|
assert result[0].type == "text"
|
|
|
|
# Parse the JSON response
|
|
import json
|
|
|
|
parsed_response = json.loads(result[0].text)
|
|
|
|
assert parsed_response["status"] == "investigation_in_progress"
|
|
assert parsed_response["step_number"] == 1
|
|
assert parsed_response["total_steps"] == 5
|
|
assert parsed_response["next_step_required"] is True
|
|
assert parsed_response["continuation_id"] == "debug-uuid-123"
|
|
assert parsed_response["investigation_status"]["files_checked"] == 1
|
|
assert parsed_response["investigation_status"]["relevant_files"] == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_subsequent_investigation_step(self):
|
|
"""Test execute method for subsequent investigation step."""
|
|
tool = DebugIssueTool()
|
|
|
|
# Set up initial state
|
|
tool.initial_issue = "Session validation failures"
|
|
tool.consolidated_findings["files_checked"].add("/api/session_manager.py")
|
|
|
|
arguments = {
|
|
"step": "Examining session cleanup method for concurrent modification issues",
|
|
"step_number": 2,
|
|
"total_steps": 5,
|
|
"next_step_required": True,
|
|
"findings": "Found dictionary modification during iteration in cleanup_expired_sessions",
|
|
"files_checked": ["/api/session_manager.py", "/api/utils.py"],
|
|
"relevant_files": ["/api/session_manager.py"],
|
|
"relevant_methods": ["SessionManager.cleanup_expired_sessions"],
|
|
"hypothesis": "Dictionary modified during iteration causing RuntimeError",
|
|
"confidence": "high",
|
|
"continuation_id": "debug-uuid-123",
|
|
}
|
|
|
|
# Mock conversation memory functions
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result = await tool.execute(arguments)
|
|
|
|
# Should return a list with TextContent
|
|
assert len(result) == 1
|
|
assert result[0].type == "text"
|
|
|
|
# Parse the JSON response
|
|
import json
|
|
|
|
parsed_response = json.loads(result[0].text)
|
|
|
|
assert parsed_response["step_number"] == 2
|
|
assert parsed_response["next_step_required"] is True
|
|
assert parsed_response["continuation_id"] == "debug-uuid-123"
|
|
assert parsed_response["investigation_status"]["files_checked"] == 2 # Cumulative
|
|
assert parsed_response["investigation_status"]["relevant_methods"] == 1
|
|
assert parsed_response["investigation_status"]["current_confidence"] == "high"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_final_investigation_step(self):
|
|
"""Test execute method for final investigation step with expert analysis."""
|
|
tool = DebugIssueTool()
|
|
|
|
# Set up investigation history
|
|
tool.initial_issue = "Session validation failures"
|
|
tool.investigation_history = [
|
|
{
|
|
"step_number": 1,
|
|
"step": "Initial investigation of session validation failures",
|
|
"findings": "Initial investigation",
|
|
"files_checked": ["/api/utils.py"],
|
|
},
|
|
{
|
|
"step_number": 2,
|
|
"step": "Deeper analysis of session manager",
|
|
"findings": "Found dictionary issue",
|
|
"files_checked": ["/api/session_manager.py"],
|
|
},
|
|
]
|
|
tool.consolidated_findings = {
|
|
"files_checked": {"/api/session_manager.py", "/api/utils.py"},
|
|
"relevant_files": {"/api/session_manager.py"},
|
|
"relevant_methods": {"SessionManager.cleanup_expired_sessions"},
|
|
"findings": ["Step 1: Initial investigation", "Step 2: Found dictionary issue"],
|
|
"hypotheses": [{"step": 2, "hypothesis": "Dictionary modified during iteration", "confidence": "high"}],
|
|
"images": [],
|
|
}
|
|
|
|
arguments = {
|
|
"step": "Confirmed the root cause and identified fix",
|
|
"step_number": 3,
|
|
"total_steps": 3,
|
|
"next_step_required": False, # Final step
|
|
"findings": "Root cause confirmed: dictionary modification during iteration in cleanup method",
|
|
"files_checked": ["/api/session_manager.py"],
|
|
"relevant_files": ["/api/session_manager.py"],
|
|
"relevant_methods": ["SessionManager.cleanup_expired_sessions"],
|
|
"hypothesis": "Dictionary modification during iteration causes intermittent RuntimeError",
|
|
"confidence": "high",
|
|
"continuation_id": "debug-uuid-123",
|
|
}
|
|
|
|
# Mock the expert analysis call
|
|
mock_expert_response = {
|
|
"status": "analysis_complete",
|
|
"summary": "Dictionary modification during iteration bug identified",
|
|
"hypotheses": [
|
|
{
|
|
"name": "CONCURRENT_MODIFICATION",
|
|
"confidence": "High",
|
|
"root_cause": "Modifying dictionary while iterating",
|
|
"minimal_fix": "Create list of keys to delete first",
|
|
}
|
|
],
|
|
}
|
|
|
|
# Mock conversation memory and file reading
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
|
|
with patch.object(tool, "_prepare_file_content_for_prompt", return_value=("file content", 100)):
|
|
result = await tool.execute(arguments)
|
|
|
|
# Should return a list with TextContent
|
|
assert len(result) == 1
|
|
response_text = result[0].text
|
|
|
|
# Parse the JSON response
|
|
import json
|
|
|
|
parsed_response = json.loads(response_text)
|
|
|
|
# Check final step structure
|
|
assert parsed_response["status"] == "calling_expert_analysis"
|
|
assert parsed_response["investigation_complete"] is True
|
|
assert parsed_response["expert_analysis"]["status"] == "analysis_complete"
|
|
assert "complete_investigation" in parsed_response
|
|
assert parsed_response["complete_investigation"]["steps_taken"] == 3 # All steps including current
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_with_backtracking(self):
|
|
"""Test execute method with backtracking to revise findings."""
|
|
tool = DebugIssueTool()
|
|
|
|
# Set up some investigation history with all required fields
|
|
tool.investigation_history = [
|
|
{
|
|
"step": "Initial investigation",
|
|
"step_number": 1,
|
|
"findings": "Initial findings",
|
|
"files_checked": ["file1.py"],
|
|
"relevant_files": [],
|
|
"relevant_methods": [],
|
|
"hypothesis": None,
|
|
"confidence": "low",
|
|
},
|
|
{
|
|
"step": "Wrong direction",
|
|
"step_number": 2,
|
|
"findings": "Wrong path",
|
|
"files_checked": ["file2.py"],
|
|
"relevant_files": [],
|
|
"relevant_methods": [],
|
|
"hypothesis": None,
|
|
"confidence": "low",
|
|
},
|
|
]
|
|
tool.consolidated_findings = {
|
|
"files_checked": {"file1.py", "file2.py"},
|
|
"relevant_files": set(),
|
|
"relevant_methods": set(),
|
|
"findings": ["Step 1: Initial findings", "Step 2: Wrong path"],
|
|
"hypotheses": [],
|
|
"images": [],
|
|
}
|
|
|
|
arguments = {
|
|
"step": "Backtracking to revise approach",
|
|
"step_number": 3,
|
|
"total_steps": 5,
|
|
"next_step_required": True,
|
|
"findings": "Taking a different investigation approach",
|
|
"files_checked": ["file3.py"],
|
|
"backtrack_from_step": 2, # Backtrack from step 2
|
|
"continuation_id": "debug-uuid-123",
|
|
}
|
|
|
|
# Mock conversation memory functions
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result = await tool.execute(arguments)
|
|
|
|
# Should return a list with TextContent
|
|
assert len(result) == 1
|
|
response_text = result[0].text
|
|
|
|
# Parse the JSON response
|
|
import json
|
|
|
|
parsed_response = json.loads(response_text)
|
|
|
|
assert parsed_response["status"] == "investigation_in_progress"
|
|
# After backtracking from step 2, history should have step 1 plus the new step
|
|
assert len(tool.investigation_history) == 2 # Step 1 + new step 3
|
|
assert tool.investigation_history[0]["step_number"] == 1
|
|
assert tool.investigation_history[1]["step_number"] == 3 # The new step that triggered backtrack
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_adjusts_total_steps(self):
|
|
"""Test execute method adjusts total steps when current step exceeds estimate."""
|
|
tool = DebugIssueTool()
|
|
arguments = {
|
|
"step": "Additional investigation needed",
|
|
"step_number": 8,
|
|
"total_steps": 5, # Current step exceeds total
|
|
"next_step_required": True,
|
|
"findings": "More complexity discovered",
|
|
"continuation_id": "debug-uuid-123",
|
|
}
|
|
|
|
# Mock conversation memory functions
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result = await tool.execute(arguments)
|
|
|
|
# Should return a list with TextContent
|
|
assert len(result) == 1
|
|
response_text = result[0].text
|
|
|
|
# Parse the JSON response
|
|
import json
|
|
|
|
parsed_response = json.loads(response_text)
|
|
|
|
# Total steps should be adjusted to match current step
|
|
assert parsed_response["total_steps"] == 8
|
|
assert parsed_response["step_number"] == 8
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_error_handling(self):
|
|
"""Test execute method error handling."""
|
|
tool = DebugIssueTool()
|
|
# Invalid arguments - missing required fields
|
|
arguments = {
|
|
"step": "Invalid request"
|
|
# Missing required fields
|
|
}
|
|
|
|
result = await tool.execute(arguments)
|
|
|
|
# Should return error response
|
|
assert len(result) == 1
|
|
response_text = result[0].text
|
|
|
|
# Parse the JSON response
|
|
import json
|
|
|
|
parsed_response = json.loads(response_text)
|
|
|
|
assert parsed_response["status"] == "investigation_failed"
|
|
assert "error" in parsed_response
|
|
|
|
def test_prepare_investigation_summary(self):
|
|
"""Test investigation summary preparation."""
|
|
tool = DebugIssueTool()
|
|
tool.consolidated_findings = {
|
|
"files_checked": {"file1.py", "file2.py", "file3.py"},
|
|
"relevant_files": {"file1.py", "file2.py"},
|
|
"relevant_methods": {"Class1.method1", "Class2.method2"},
|
|
"findings": [
|
|
"Step 1: Initial investigation findings",
|
|
"Step 2: Discovered potential issue",
|
|
"Step 3: Confirmed root cause",
|
|
],
|
|
"hypotheses": [
|
|
{"step": 1, "hypothesis": "Initial hypothesis", "confidence": "low"},
|
|
{"step": 2, "hypothesis": "Refined hypothesis", "confidence": "medium"},
|
|
{"step": 3, "hypothesis": "Final hypothesis", "confidence": "high"},
|
|
],
|
|
"images": [],
|
|
}
|
|
|
|
summary = tool._prepare_investigation_summary()
|
|
|
|
assert "SYSTEMATIC INVESTIGATION SUMMARY" in summary
|
|
assert "Files examined: 3" in summary
|
|
assert "Relevant files identified: 2" in summary
|
|
assert "Methods/functions involved: 2" in summary
|
|
assert "INVESTIGATION PROGRESSION" in summary
|
|
assert "Step 1:" in summary
|
|
assert "Step 2:" in summary
|
|
assert "Step 3:" in summary
|
|
assert "HYPOTHESIS EVOLUTION" in summary
|
|
assert "low confidence" in summary
|
|
assert "medium confidence" in summary
|
|
assert "high confidence" in summary
|
|
|
|
def test_extract_error_context(self):
|
|
"""Test error context extraction from findings."""
|
|
tool = DebugIssueTool()
|
|
tool.consolidated_findings = {
|
|
"findings": [
|
|
"Step 1: Found no issues initially",
|
|
"Step 2: Discovered ERROR: Dictionary size changed during iteration",
|
|
"Step 3: Stack trace shows RuntimeError in cleanup method",
|
|
"Step 4: Exception occurs intermittently",
|
|
],
|
|
}
|
|
|
|
error_context = tool._extract_error_context()
|
|
|
|
assert error_context is not None
|
|
assert "ERROR: Dictionary size changed" in error_context
|
|
assert "Stack trace shows RuntimeError" in error_context
|
|
assert "Exception occurs intermittently" in error_context
|
|
assert "Found no issues initially" not in error_context # Should not include non-error findings
|
|
|
|
def test_reprocess_consolidated_findings(self):
|
|
"""Test reprocessing of consolidated findings after backtracking."""
|
|
tool = DebugIssueTool()
|
|
tool.investigation_history = [
|
|
{
|
|
"step_number": 1,
|
|
"findings": "Initial findings",
|
|
"files_checked": ["file1.py"],
|
|
"relevant_files": ["file1.py"],
|
|
"relevant_methods": ["method1"],
|
|
"hypothesis": "Initial hypothesis",
|
|
"confidence": "low",
|
|
},
|
|
{
|
|
"step_number": 2,
|
|
"findings": "Second findings",
|
|
"files_checked": ["file2.py"],
|
|
"relevant_files": [],
|
|
"relevant_methods": ["method2"],
|
|
},
|
|
]
|
|
|
|
tool._reprocess_consolidated_findings()
|
|
|
|
assert tool.consolidated_findings["files_checked"] == {"file1.py", "file2.py"}
|
|
assert tool.consolidated_findings["relevant_files"] == {"file1.py"}
|
|
assert tool.consolidated_findings["relevant_methods"] == {"method1", "method2"}
|
|
assert len(tool.consolidated_findings["findings"]) == 2
|
|
assert len(tool.consolidated_findings["hypotheses"]) == 1
|
|
assert tool.consolidated_findings["hypotheses"][0]["hypothesis"] == "Initial hypothesis"
|
|
|
|
|
|
# Integration test
|
|
class TestDebugToolIntegration:
|
|
"""Integration tests for debug tool."""
|
|
|
|
def setup_method(self):
|
|
"""Set up model context for integration tests."""
|
|
from utils.model_context import ModelContext
|
|
|
|
self.tool = DebugIssueTool()
|
|
self.tool._model_context = ModelContext("flash") # Test model
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complete_investigation_flow(self):
|
|
"""Test complete investigation flow from start to expert analysis."""
|
|
# Step 1: Initial investigation
|
|
arguments = {
|
|
"step": "Investigating memory leak in data processing pipeline",
|
|
"step_number": 1,
|
|
"total_steps": 3,
|
|
"next_step_required": True,
|
|
"findings": "High memory usage observed during batch processing",
|
|
"files_checked": ["/processor/main.py"],
|
|
}
|
|
|
|
# Mock conversation memory and expert analysis
|
|
with patch("utils.conversation_memory.create_thread", return_value="debug-flow-uuid"):
|
|
with patch("utils.conversation_memory.add_turn"):
|
|
result = await self.tool.execute(arguments)
|
|
|
|
# Verify response structure
|
|
assert len(result) == 1
|
|
response_text = result[0].text
|
|
|
|
# Parse the JSON response
|
|
import json
|
|
|
|
parsed_response = json.loads(response_text)
|
|
|
|
assert parsed_response["status"] == "investigation_in_progress"
|
|
assert parsed_response["step_number"] == 1
|
|
assert parsed_response["continuation_id"] == "debug-flow-uuid"
|