Make code lookup mandatory in between investigation steps for better results
This commit is contained in:
@@ -14,7 +14,7 @@ import os
|
||||
# These values are used in server responses and for tracking releases
|
||||
# IMPORTANT: This is the single source of truth for version and author info
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
__version__ = "5.2.2"
|
||||
__version__ = "5.2.3"
|
||||
# Last update date in ISO format
|
||||
__updated__ = "2025-06-19"
|
||||
# Primary maintainer
|
||||
|
||||
@@ -537,10 +537,8 @@ RuntimeError: dictionary changed size during iteration
|
||||
self.logger.error("Missing investigation_status in response")
|
||||
return False
|
||||
|
||||
# Check output guidance exists
|
||||
if "output" not in response_data:
|
||||
self.logger.error("Missing output guidance in response")
|
||||
return False
|
||||
# Output field removed in favor of contextual next_steps
|
||||
# No longer checking for "output" field as it was redundant
|
||||
|
||||
# Check next_steps guidance
|
||||
if not response_data.get("next_steps"):
|
||||
|
||||
@@ -95,10 +95,13 @@ class TestDynamicContextRequests:
|
||||
|
||||
# Parse the response - new debug tool returns structured JSON
|
||||
response_data = json.loads(result[0].text)
|
||||
assert response_data["status"] == "investigation_in_progress"
|
||||
# Debug tool now returns "pause_for_investigation" to force actual investigation
|
||||
assert response_data["status"] == "pause_for_investigation"
|
||||
assert response_data["step_number"] == 1
|
||||
assert response_data["next_step_required"] is True
|
||||
assert response_data["investigation_status"]["current_confidence"] == "high"
|
||||
assert response_data["investigation_required"] is True
|
||||
assert "required_actions" in response_data
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("tools.base.BaseTool.get_model_provider")
|
||||
|
||||
@@ -133,13 +133,16 @@ class TestDebugTool:
|
||||
|
||||
parsed_response = json.loads(result[0].text)
|
||||
|
||||
assert parsed_response["status"] == "investigation_in_progress"
|
||||
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
||||
assert parsed_response["status"] == "pause_for_investigation"
|
||||
assert parsed_response["step_number"] == 1
|
||||
assert parsed_response["total_steps"] == 5
|
||||
assert parsed_response["next_step_required"] is True
|
||||
assert parsed_response["continuation_id"] == "debug-uuid-123"
|
||||
assert parsed_response["investigation_status"]["files_checked"] == 1
|
||||
assert parsed_response["investigation_status"]["relevant_files"] == 1
|
||||
assert parsed_response["investigation_required"] is True
|
||||
assert "required_actions" in parsed_response
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_subsequent_investigation_step(self):
|
||||
@@ -317,6 +320,7 @@ class TestDebugTool:
|
||||
result = await tool.execute(arguments)
|
||||
|
||||
# Should return a list with TextContent
|
||||
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
|
||||
@@ -325,7 +329,7 @@ class TestDebugTool:
|
||||
|
||||
parsed_response = json.loads(response_text)
|
||||
|
||||
assert parsed_response["status"] == "investigation_in_progress"
|
||||
assert parsed_response["status"] == "pause_for_investigation"
|
||||
# After backtracking from step 2, history should have step 1 plus the new step
|
||||
assert len(tool.investigation_history) == 2 # Step 1 + new step 3
|
||||
assert tool.investigation_history[0]["step_number"] == 1
|
||||
@@ -502,6 +506,7 @@ class TestDebugToolIntegration:
|
||||
result = await self.tool.execute(arguments)
|
||||
|
||||
# Verify response structure
|
||||
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
|
||||
@@ -510,7 +515,7 @@ class TestDebugToolIntegration:
|
||||
|
||||
parsed_response = json.loads(response_text)
|
||||
|
||||
assert parsed_response["status"] == "investigation_in_progress"
|
||||
assert parsed_response["status"] == "pause_for_investigation"
|
||||
assert parsed_response["step_number"] == 1
|
||||
assert parsed_response["continuation_id"] == "debug-flow-uuid"
|
||||
|
||||
|
||||
@@ -45,8 +45,10 @@ class TestDebugCertainConfidence:
|
||||
|
||||
# Verify step 1 response
|
||||
response1 = json.loads(result1[0].text)
|
||||
assert response1["status"] == "investigation_in_progress"
|
||||
assert response1["status"] == "pause_for_investigation"
|
||||
assert response1["step_number"] == 1
|
||||
assert response1["investigation_required"] is True
|
||||
assert "required_actions" in response1
|
||||
continuation_id = response1["continuation_id"]
|
||||
|
||||
# Step 2: Final step with certain confidence (simple import fix)
|
||||
|
||||
@@ -43,7 +43,7 @@ class TestDebugComprehensiveWorkflow:
|
||||
# Verify step 1 response
|
||||
assert len(result1) == 1
|
||||
response1 = json.loads(result1[0].text)
|
||||
assert response1["status"] == "investigation_in_progress"
|
||||
assert response1["status"] == "pause_for_investigation"
|
||||
assert response1["step_number"] == 1
|
||||
assert response1["continuation_id"] == "debug-workflow-uuid"
|
||||
|
||||
@@ -56,7 +56,8 @@ class TestDebugComprehensiveWorkflow:
|
||||
if args and len(args) >= 3:
|
||||
assert args[0] == "debug-workflow-uuid"
|
||||
assert args[1] == "assistant"
|
||||
assert json.loads(args[2])["status"] == "investigation_in_progress"
|
||||
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
||||
assert json.loads(args[2])["status"] == "pause_for_investigation"
|
||||
|
||||
# Step 2: Continue investigation with findings
|
||||
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
|
||||
@@ -78,7 +79,8 @@ class TestDebugComprehensiveWorkflow:
|
||||
|
||||
# Verify step 2 response
|
||||
response2 = json.loads(result2[0].text)
|
||||
assert response2["status"] == "investigation_in_progress"
|
||||
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
||||
assert response2["status"] == "pause_for_investigation"
|
||||
assert response2["step_number"] == 2
|
||||
assert response2["investigation_status"]["files_checked"] == 2
|
||||
assert response2["investigation_status"]["relevant_methods"] == 2
|
||||
@@ -268,9 +270,12 @@ class TestDebugComprehensiveWorkflow:
|
||||
states.append(json.loads(result[0].text))
|
||||
|
||||
# Verify initial state
|
||||
assert states[0]["status"] == "investigation_in_progress"
|
||||
# Debug tool now returns "pause_for_investigation" for ongoing steps
|
||||
assert states[0]["status"] == "pause_for_investigation"
|
||||
assert states[0]["step_number"] == 1
|
||||
assert states[0]["next_step_required"] is True
|
||||
assert states[0]["investigation_required"] is True
|
||||
assert "required_actions" in states[0]
|
||||
|
||||
# Final state (triggers expert analysis)
|
||||
mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"}
|
||||
|
||||
@@ -39,8 +39,10 @@ class TestDebugContinuation:
|
||||
|
||||
assert len(result) == 1
|
||||
response = json.loads(result[0].text)
|
||||
assert response["status"] == "investigation_in_progress"
|
||||
assert response["status"] == "pause_for_investigation"
|
||||
assert response["continuation_id"] == "debug-test-uuid-123"
|
||||
assert response["investigation_required"] is True
|
||||
assert "required_actions" in response
|
||||
|
||||
def test_debug_conversation_formatting(self):
|
||||
"""Test that debug tool's structured output is properly formatted in conversation history."""
|
||||
|
||||
@@ -157,16 +157,18 @@ class DebugIssueTool(BaseTool):
|
||||
"DEBUG & ROOT CAUSE ANALYSIS - Systematic self-investigation followed by expert analysis. "
|
||||
"This tool guides you through a step-by-step investigation process where you:\n\n"
|
||||
"1. Start with step 1: describe the issue to investigate\n"
|
||||
"2. Continue with investigation steps: examine code, trace errors, test hypotheses\n"
|
||||
"3. Track findings, relevant files, and methods throughout\n"
|
||||
"4. Update hypotheses as understanding evolves\n"
|
||||
"5. Backtrack and revise findings when needed\n"
|
||||
"6. Once investigation is complete, receive expert analysis\n\n"
|
||||
"The tool enforces systematic investigation methodology:\n"
|
||||
"- Methodical code examination and evidence collection\n"
|
||||
"- Hypothesis formation and validation\n"
|
||||
"- File and method tracking for context\n"
|
||||
"- Confidence assessment and revision capabilities\n\n"
|
||||
"2. STOP and investigate using appropriate tools\n"
|
||||
"3. Report findings in step 2 with concrete evidence from actual code\n"
|
||||
"4. Continue investigating between each debug step\n"
|
||||
"5. Track findings, relevant files, and methods throughout\n"
|
||||
"6. Update hypotheses as understanding evolves\n"
|
||||
"7. Once investigation is complete, receive expert analysis\n\n"
|
||||
"IMPORTANT: This tool enforces investigation between steps:\n"
|
||||
"- After each debug call, you MUST investigate before calling debug again\n"
|
||||
"- Each step must include NEW evidence from code examination\n"
|
||||
"- No recursive debug calls without actual investigation work\n"
|
||||
"- The tool will specify which step number to use next\n"
|
||||
"- Follow the required_actions list for investigation guidance\n\n"
|
||||
"Perfect for: complex bugs, mysterious errors, performance issues, "
|
||||
"race conditions, memory leaks, integration problems."
|
||||
)
|
||||
@@ -357,10 +359,6 @@ class DebugIssueTool(BaseTool):
|
||||
"images_collected": len(set(self.consolidated_findings["images"])),
|
||||
"current_confidence": request.confidence,
|
||||
},
|
||||
"output": {
|
||||
"instructions": "Continue systematic investigation. Present findings clearly and proceed to next step if required.",
|
||||
"format": "systematic_investigation",
|
||||
},
|
||||
}
|
||||
|
||||
if continuation_id:
|
||||
@@ -436,10 +434,72 @@ class DebugIssueTool(BaseTool):
|
||||
"the problem lies."
|
||||
)
|
||||
else:
|
||||
response_data["next_steps"] = (
|
||||
f"Continue investigation with step {request.step_number + 1}. "
|
||||
f"Focus on: examining relevant code, testing hypotheses, gathering evidence."
|
||||
)
|
||||
# CRITICAL: Force Claude to actually investigate before calling debug again
|
||||
response_data["status"] = "pause_for_investigation"
|
||||
response_data["investigation_required"] = True
|
||||
|
||||
if request.step_number == 1:
|
||||
# Initial investigation tasks
|
||||
response_data["required_actions"] = [
|
||||
"Search for code related to the reported issue or symptoms",
|
||||
"Examine relevant files and understand the current implementation",
|
||||
"Understand the project structure and locate relevant modules",
|
||||
"Identify how the affected functionality is supposed to work",
|
||||
]
|
||||
response_data["next_steps"] = (
|
||||
f"MANDATORY: DO NOT call the debug tool again immediately. You MUST first investigate "
|
||||
f"the codebase using appropriate tools. Search for relevant code, examine implementations, "
|
||||
f"understand the logic flow. Only call debug again AFTER you have gathered concrete evidence "
|
||||
f"and examined actual code. When you call debug next time, use step_number: {request.step_number + 1} "
|
||||
f"and report the specific files you've examined and findings you've discovered."
|
||||
)
|
||||
elif request.step_number >= 2 and request.confidence in ["exploring", "low"]:
|
||||
# Need deeper investigation
|
||||
response_data["required_actions"] = [
|
||||
"Examine the specific files you've identified as relevant",
|
||||
"Trace method calls and data flow through the system",
|
||||
"Check for edge cases, boundary conditions, and assumptions in the code",
|
||||
"Look for related configuration, dependencies, or external factors",
|
||||
]
|
||||
response_data["next_steps"] = (
|
||||
f"STOP! Do NOT call debug again yet. Based on your findings, you've identified potential areas "
|
||||
f"but need concrete evidence. MANDATORY ACTIONS before calling debug step {request.step_number + 1}:\n"
|
||||
f"1. Examine ALL files in your relevant_files list\n"
|
||||
f"2. Trace how data flows through {', '.join(request.relevant_methods[:3]) if request.relevant_methods else 'the identified components'}\n"
|
||||
f"3. Look for logic errors, incorrect assumptions, missing validations\n"
|
||||
f"4. Check interactions between components and external dependencies\n"
|
||||
f"Only call debug again with step_number: {request.step_number + 1} AFTER completing these investigations."
|
||||
)
|
||||
elif request.confidence in ["medium", "high"]:
|
||||
# Close to root cause - need confirmation
|
||||
response_data["required_actions"] = [
|
||||
"Examine the exact code sections where you believe the issue occurs",
|
||||
"Trace the execution path that leads to the failure",
|
||||
"Verify your hypothesis with concrete code evidence",
|
||||
"Check for any similar patterns elsewhere in the codebase",
|
||||
]
|
||||
response_data["next_steps"] = (
|
||||
f"WAIT! Your hypothesis needs verification. DO NOT call debug immediately. REQUIRED ACTIONS:\n"
|
||||
f"1. Examine the exact lines where the issue occurs\n"
|
||||
f"2. Trace backwards: how does data get to this point? What transforms it?\n"
|
||||
f"3. Check all assumptions: are inputs validated? Are nulls handled?\n"
|
||||
f"4. Look for the EXACT line where expected != actual behavior\n"
|
||||
f"Document these findings with specific file:line references, then call debug with step_number: {request.step_number + 1}."
|
||||
)
|
||||
else:
|
||||
# General investigation needed
|
||||
response_data["required_actions"] = [
|
||||
"Continue examining the code paths identified in your hypothesis",
|
||||
"Gather more evidence using appropriate investigation tools",
|
||||
"Test edge cases and boundary conditions",
|
||||
"Look for patterns that confirm or refute your theory",
|
||||
]
|
||||
response_data["next_steps"] = (
|
||||
f"PAUSE INVESTIGATION. Before calling debug step {request.step_number + 1}, you MUST examine code. "
|
||||
f"Required: Read files from your files_checked list, search for patterns in your hypothesis, "
|
||||
f"trace execution flow. Your next debug call (step_number: {request.step_number + 1}) must include "
|
||||
f"NEW evidence from actual code examination, not just theories. NO recursive debug calls without investigation work!"
|
||||
)
|
||||
|
||||
# Store in conversation memory
|
||||
if continuation_id:
|
||||
|
||||
Reference in New Issue
Block a user