Make code lookup mandatory in between investigation steps for better results

This commit is contained in:
Fahad
2025-06-19 17:42:41 +04:00
parent 79abb9ca7e
commit 7900fdd21c
8 changed files with 108 additions and 33 deletions

View File

@@ -14,7 +14,7 @@ import os
# These values are used in server responses and for tracking releases # These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info # IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH # Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "5.2.2" __version__ = "5.2.3"
# Last update date in ISO format # Last update date in ISO format
__updated__ = "2025-06-19" __updated__ = "2025-06-19"
# Primary maintainer # Primary maintainer

View File

@@ -537,10 +537,8 @@ RuntimeError: dictionary changed size during iteration
self.logger.error("Missing investigation_status in response") self.logger.error("Missing investigation_status in response")
return False return False
# Check output guidance exists # Output field removed in favor of contextual next_steps
if "output" not in response_data: # No longer checking for "output" field as it was redundant
self.logger.error("Missing output guidance in response")
return False
# Check next_steps guidance # Check next_steps guidance
if not response_data.get("next_steps"): if not response_data.get("next_steps"):

View File

@@ -95,10 +95,13 @@ class TestDynamicContextRequests:
# Parse the response - new debug tool returns structured JSON # Parse the response - new debug tool returns structured JSON
response_data = json.loads(result[0].text) response_data = json.loads(result[0].text)
assert response_data["status"] == "investigation_in_progress" # Debug tool now returns "pause_for_investigation" to force actual investigation
assert response_data["status"] == "pause_for_investigation"
assert response_data["step_number"] == 1 assert response_data["step_number"] == 1
assert response_data["next_step_required"] is True assert response_data["next_step_required"] is True
assert response_data["investigation_status"]["current_confidence"] == "high" assert response_data["investigation_status"]["current_confidence"] == "high"
assert response_data["investigation_required"] is True
assert "required_actions" in response_data
@pytest.mark.asyncio @pytest.mark.asyncio
@patch("tools.base.BaseTool.get_model_provider") @patch("tools.base.BaseTool.get_model_provider")

View File

@@ -133,13 +133,16 @@ class TestDebugTool:
parsed_response = json.loads(result[0].text) parsed_response = json.loads(result[0].text)
assert parsed_response["status"] == "investigation_in_progress" # Debug tool now returns "pause_for_investigation" for ongoing steps
assert parsed_response["status"] == "pause_for_investigation"
assert parsed_response["step_number"] == 1 assert parsed_response["step_number"] == 1
assert parsed_response["total_steps"] == 5 assert parsed_response["total_steps"] == 5
assert parsed_response["next_step_required"] is True assert parsed_response["next_step_required"] is True
assert parsed_response["continuation_id"] == "debug-uuid-123" assert parsed_response["continuation_id"] == "debug-uuid-123"
assert parsed_response["investigation_status"]["files_checked"] == 1 assert parsed_response["investigation_status"]["files_checked"] == 1
assert parsed_response["investigation_status"]["relevant_files"] == 1 assert parsed_response["investigation_status"]["relevant_files"] == 1
assert parsed_response["investigation_required"] is True
assert "required_actions" in parsed_response
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_execute_subsequent_investigation_step(self): async def test_execute_subsequent_investigation_step(self):
@@ -317,6 +320,7 @@ class TestDebugTool:
result = await tool.execute(arguments) result = await tool.execute(arguments)
# Should return a list with TextContent # Should return a list with TextContent
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert len(result) == 1 assert len(result) == 1
response_text = result[0].text response_text = result[0].text
@@ -325,7 +329,7 @@ class TestDebugTool:
parsed_response = json.loads(response_text) parsed_response = json.loads(response_text)
assert parsed_response["status"] == "investigation_in_progress" assert parsed_response["status"] == "pause_for_investigation"
# After backtracking from step 2, history should have step 1 plus the new step # After backtracking from step 2, history should have step 1 plus the new step
assert len(tool.investigation_history) == 2 # Step 1 + new step 3 assert len(tool.investigation_history) == 2 # Step 1 + new step 3
assert tool.investigation_history[0]["step_number"] == 1 assert tool.investigation_history[0]["step_number"] == 1
@@ -502,6 +506,7 @@ class TestDebugToolIntegration:
result = await self.tool.execute(arguments) result = await self.tool.execute(arguments)
# Verify response structure # Verify response structure
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert len(result) == 1 assert len(result) == 1
response_text = result[0].text response_text = result[0].text
@@ -510,7 +515,7 @@ class TestDebugToolIntegration:
parsed_response = json.loads(response_text) parsed_response = json.loads(response_text)
assert parsed_response["status"] == "investigation_in_progress" assert parsed_response["status"] == "pause_for_investigation"
assert parsed_response["step_number"] == 1 assert parsed_response["step_number"] == 1
assert parsed_response["continuation_id"] == "debug-flow-uuid" assert parsed_response["continuation_id"] == "debug-flow-uuid"

View File

@@ -45,8 +45,10 @@ class TestDebugCertainConfidence:
# Verify step 1 response # Verify step 1 response
response1 = json.loads(result1[0].text) response1 = json.loads(result1[0].text)
assert response1["status"] == "investigation_in_progress" assert response1["status"] == "pause_for_investigation"
assert response1["step_number"] == 1 assert response1["step_number"] == 1
assert response1["investigation_required"] is True
assert "required_actions" in response1
continuation_id = response1["continuation_id"] continuation_id = response1["continuation_id"]
# Step 2: Final step with certain confidence (simple import fix) # Step 2: Final step with certain confidence (simple import fix)

View File

@@ -43,7 +43,7 @@ class TestDebugComprehensiveWorkflow:
# Verify step 1 response # Verify step 1 response
assert len(result1) == 1 assert len(result1) == 1
response1 = json.loads(result1[0].text) response1 = json.loads(result1[0].text)
assert response1["status"] == "investigation_in_progress" assert response1["status"] == "pause_for_investigation"
assert response1["step_number"] == 1 assert response1["step_number"] == 1
assert response1["continuation_id"] == "debug-workflow-uuid" assert response1["continuation_id"] == "debug-workflow-uuid"
@@ -56,7 +56,8 @@ class TestDebugComprehensiveWorkflow:
if args and len(args) >= 3: if args and len(args) >= 3:
assert args[0] == "debug-workflow-uuid" assert args[0] == "debug-workflow-uuid"
assert args[1] == "assistant" assert args[1] == "assistant"
assert json.loads(args[2])["status"] == "investigation_in_progress" # Debug tool now returns "pause_for_investigation" for ongoing steps
assert json.loads(args[2])["status"] == "pause_for_investigation"
# Step 2: Continue investigation with findings # Step 2: Continue investigation with findings
with patch("utils.conversation_memory.add_turn") as mock_add_turn: with patch("utils.conversation_memory.add_turn") as mock_add_turn:
@@ -78,7 +79,8 @@ class TestDebugComprehensiveWorkflow:
# Verify step 2 response # Verify step 2 response
response2 = json.loads(result2[0].text) response2 = json.loads(result2[0].text)
assert response2["status"] == "investigation_in_progress" # Debug tool now returns "pause_for_investigation" for ongoing steps
assert response2["status"] == "pause_for_investigation"
assert response2["step_number"] == 2 assert response2["step_number"] == 2
assert response2["investigation_status"]["files_checked"] == 2 assert response2["investigation_status"]["files_checked"] == 2
assert response2["investigation_status"]["relevant_methods"] == 2 assert response2["investigation_status"]["relevant_methods"] == 2
@@ -268,9 +270,12 @@ class TestDebugComprehensiveWorkflow:
states.append(json.loads(result[0].text)) states.append(json.loads(result[0].text))
# Verify initial state # Verify initial state
assert states[0]["status"] == "investigation_in_progress" # Debug tool now returns "pause_for_investigation" for ongoing steps
assert states[0]["status"] == "pause_for_investigation"
assert states[0]["step_number"] == 1 assert states[0]["step_number"] == 1
assert states[0]["next_step_required"] is True assert states[0]["next_step_required"] is True
assert states[0]["investigation_required"] is True
assert "required_actions" in states[0]
# Final state (triggers expert analysis) # Final state (triggers expert analysis)
mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"} mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"}

View File

@@ -39,8 +39,10 @@ class TestDebugContinuation:
assert len(result) == 1 assert len(result) == 1
response = json.loads(result[0].text) response = json.loads(result[0].text)
assert response["status"] == "investigation_in_progress" assert response["status"] == "pause_for_investigation"
assert response["continuation_id"] == "debug-test-uuid-123" assert response["continuation_id"] == "debug-test-uuid-123"
assert response["investigation_required"] is True
assert "required_actions" in response
def test_debug_conversation_formatting(self): def test_debug_conversation_formatting(self):
"""Test that debug tool's structured output is properly formatted in conversation history.""" """Test that debug tool's structured output is properly formatted in conversation history."""

View File

@@ -157,16 +157,18 @@ class DebugIssueTool(BaseTool):
"DEBUG & ROOT CAUSE ANALYSIS - Systematic self-investigation followed by expert analysis. " "DEBUG & ROOT CAUSE ANALYSIS - Systematic self-investigation followed by expert analysis. "
"This tool guides you through a step-by-step investigation process where you:\n\n" "This tool guides you through a step-by-step investigation process where you:\n\n"
"1. Start with step 1: describe the issue to investigate\n" "1. Start with step 1: describe the issue to investigate\n"
"2. Continue with investigation steps: examine code, trace errors, test hypotheses\n" "2. STOP and investigate using appropriate tools\n"
"3. Track findings, relevant files, and methods throughout\n" "3. Report findings in step 2 with concrete evidence from actual code\n"
"4. Update hypotheses as understanding evolves\n" "4. Continue investigating between each debug step\n"
"5. Backtrack and revise findings when needed\n" "5. Track findings, relevant files, and methods throughout\n"
"6. Once investigation is complete, receive expert analysis\n\n" "6. Update hypotheses as understanding evolves\n"
"The tool enforces systematic investigation methodology:\n" "7. Once investigation is complete, receive expert analysis\n\n"
"- Methodical code examination and evidence collection\n" "IMPORTANT: This tool enforces investigation between steps:\n"
"- Hypothesis formation and validation\n" "- After each debug call, you MUST investigate before calling debug again\n"
"- File and method tracking for context\n" "- Each step must include NEW evidence from code examination\n"
"- Confidence assessment and revision capabilities\n\n" "- No recursive debug calls without actual investigation work\n"
"- The tool will specify which step number to use next\n"
"- Follow the required_actions list for investigation guidance\n\n"
"Perfect for: complex bugs, mysterious errors, performance issues, " "Perfect for: complex bugs, mysterious errors, performance issues, "
"race conditions, memory leaks, integration problems." "race conditions, memory leaks, integration problems."
) )
@@ -357,10 +359,6 @@ class DebugIssueTool(BaseTool):
"images_collected": len(set(self.consolidated_findings["images"])), "images_collected": len(set(self.consolidated_findings["images"])),
"current_confidence": request.confidence, "current_confidence": request.confidence,
}, },
"output": {
"instructions": "Continue systematic investigation. Present findings clearly and proceed to next step if required.",
"format": "systematic_investigation",
},
} }
if continuation_id: if continuation_id:
@@ -436,9 +434,71 @@ class DebugIssueTool(BaseTool):
"the problem lies." "the problem lies."
) )
else: else:
# CRITICAL: Force Claude to actually investigate before calling debug again
response_data["status"] = "pause_for_investigation"
response_data["investigation_required"] = True
if request.step_number == 1:
# Initial investigation tasks
response_data["required_actions"] = [
"Search for code related to the reported issue or symptoms",
"Examine relevant files and understand the current implementation",
"Understand the project structure and locate relevant modules",
"Identify how the affected functionality is supposed to work",
]
response_data["next_steps"] = ( response_data["next_steps"] = (
f"Continue investigation with step {request.step_number + 1}. " f"MANDATORY: DO NOT call the debug tool again immediately. You MUST first investigate "
f"Focus on: examining relevant code, testing hypotheses, gathering evidence." f"the codebase using appropriate tools. Search for relevant code, examine implementations, "
f"understand the logic flow. Only call debug again AFTER you have gathered concrete evidence "
f"and examined actual code. When you call debug next time, use step_number: {request.step_number + 1} "
f"and report the specific files you've examined and findings you've discovered."
)
elif request.step_number >= 2 and request.confidence in ["exploring", "low"]:
# Need deeper investigation
response_data["required_actions"] = [
"Examine the specific files you've identified as relevant",
"Trace method calls and data flow through the system",
"Check for edge cases, boundary conditions, and assumptions in the code",
"Look for related configuration, dependencies, or external factors",
]
response_data["next_steps"] = (
f"STOP! Do NOT call debug again yet. Based on your findings, you've identified potential areas "
f"but need concrete evidence. MANDATORY ACTIONS before calling debug step {request.step_number + 1}:\n"
f"1. Examine ALL files in your relevant_files list\n"
f"2. Trace how data flows through {', '.join(request.relevant_methods[:3]) if request.relevant_methods else 'the identified components'}\n"
f"3. Look for logic errors, incorrect assumptions, missing validations\n"
f"4. Check interactions between components and external dependencies\n"
f"Only call debug again with step_number: {request.step_number + 1} AFTER completing these investigations."
)
elif request.confidence in ["medium", "high"]:
# Close to root cause - need confirmation
response_data["required_actions"] = [
"Examine the exact code sections where you believe the issue occurs",
"Trace the execution path that leads to the failure",
"Verify your hypothesis with concrete code evidence",
"Check for any similar patterns elsewhere in the codebase",
]
response_data["next_steps"] = (
f"WAIT! Your hypothesis needs verification. DO NOT call debug immediately. REQUIRED ACTIONS:\n"
f"1. Examine the exact lines where the issue occurs\n"
f"2. Trace backwards: how does data get to this point? What transforms it?\n"
f"3. Check all assumptions: are inputs validated? Are nulls handled?\n"
f"4. Look for the EXACT line where expected != actual behavior\n"
f"Document these findings with specific file:line references, then call debug with step_number: {request.step_number + 1}."
)
else:
# General investigation needed
response_data["required_actions"] = [
"Continue examining the code paths identified in your hypothesis",
"Gather more evidence using appropriate investigation tools",
"Test edge cases and boundary conditions",
"Look for patterns that confirm or refute your theory",
]
response_data["next_steps"] = (
f"PAUSE INVESTIGATION. Before calling debug step {request.step_number + 1}, you MUST examine code. "
f"Required: Read files from your files_checked list, search for patterns in your hypothesis, "
f"trace execution flow. Your next debug call (step_number: {request.step_number + 1}) must include "
f"NEW evidence from actual code examination, not just theories. NO recursive debug calls without investigation work!"
) )
# Store in conversation memory # Store in conversation memory