Make code lookup mandatory in between investigation steps for better results

This commit is contained in:
Fahad
2025-06-19 17:42:41 +04:00
parent 79abb9ca7e
commit 7900fdd21c
8 changed files with 108 additions and 33 deletions

View File

@@ -14,7 +14,7 @@ import os
# These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "5.2.2"
__version__ = "5.2.3"
# Last update date in ISO format
__updated__ = "2025-06-19"
# Primary maintainer

View File

@@ -537,10 +537,8 @@ RuntimeError: dictionary changed size during iteration
self.logger.error("Missing investigation_status in response")
return False
# Check output guidance exists
if "output" not in response_data:
self.logger.error("Missing output guidance in response")
return False
# Output field removed in favor of contextual next_steps
# No longer checking for "output" field as it was redundant
# Check next_steps guidance
if not response_data.get("next_steps"):

View File

@@ -95,10 +95,13 @@ class TestDynamicContextRequests:
# Parse the response - new debug tool returns structured JSON
response_data = json.loads(result[0].text)
assert response_data["status"] == "investigation_in_progress"
# Debug tool now returns "pause_for_investigation" to force actual investigation
assert response_data["status"] == "pause_for_investigation"
assert response_data["step_number"] == 1
assert response_data["next_step_required"] is True
assert response_data["investigation_status"]["current_confidence"] == "high"
assert response_data["investigation_required"] is True
assert "required_actions" in response_data
@pytest.mark.asyncio
@patch("tools.base.BaseTool.get_model_provider")

View File

@@ -133,13 +133,16 @@ class TestDebugTool:
parsed_response = json.loads(result[0].text)
assert parsed_response["status"] == "investigation_in_progress"
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert parsed_response["status"] == "pause_for_investigation"
assert parsed_response["step_number"] == 1
assert parsed_response["total_steps"] == 5
assert parsed_response["next_step_required"] is True
assert parsed_response["continuation_id"] == "debug-uuid-123"
assert parsed_response["investigation_status"]["files_checked"] == 1
assert parsed_response["investigation_status"]["relevant_files"] == 1
assert parsed_response["investigation_required"] is True
assert "required_actions" in parsed_response
@pytest.mark.asyncio
async def test_execute_subsequent_investigation_step(self):
@@ -317,6 +320,7 @@ class TestDebugTool:
result = await tool.execute(arguments)
# Should return a list with TextContent
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert len(result) == 1
response_text = result[0].text
@@ -325,7 +329,7 @@ class TestDebugTool:
parsed_response = json.loads(response_text)
assert parsed_response["status"] == "investigation_in_progress"
assert parsed_response["status"] == "pause_for_investigation"
# After backtracking from step 2, history should have step 1 plus the new step
assert len(tool.investigation_history) == 2 # Step 1 + new step 3
assert tool.investigation_history[0]["step_number"] == 1
@@ -502,6 +506,7 @@ class TestDebugToolIntegration:
result = await self.tool.execute(arguments)
# Verify response structure
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert len(result) == 1
response_text = result[0].text
@@ -510,7 +515,7 @@ class TestDebugToolIntegration:
parsed_response = json.loads(response_text)
assert parsed_response["status"] == "investigation_in_progress"
assert parsed_response["status"] == "pause_for_investigation"
assert parsed_response["step_number"] == 1
assert parsed_response["continuation_id"] == "debug-flow-uuid"

View File

@@ -45,8 +45,10 @@ class TestDebugCertainConfidence:
# Verify step 1 response
response1 = json.loads(result1[0].text)
assert response1["status"] == "investigation_in_progress"
assert response1["status"] == "pause_for_investigation"
assert response1["step_number"] == 1
assert response1["investigation_required"] is True
assert "required_actions" in response1
continuation_id = response1["continuation_id"]
# Step 2: Final step with certain confidence (simple import fix)

View File

@@ -43,7 +43,7 @@ class TestDebugComprehensiveWorkflow:
# Verify step 1 response
assert len(result1) == 1
response1 = json.loads(result1[0].text)
assert response1["status"] == "investigation_in_progress"
assert response1["status"] == "pause_for_investigation"
assert response1["step_number"] == 1
assert response1["continuation_id"] == "debug-workflow-uuid"
@@ -56,7 +56,8 @@ class TestDebugComprehensiveWorkflow:
if args and len(args) >= 3:
assert args[0] == "debug-workflow-uuid"
assert args[1] == "assistant"
assert json.loads(args[2])["status"] == "investigation_in_progress"
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert json.loads(args[2])["status"] == "pause_for_investigation"
# Step 2: Continue investigation with findings
with patch("utils.conversation_memory.add_turn") as mock_add_turn:
@@ -78,7 +79,8 @@ class TestDebugComprehensiveWorkflow:
# Verify step 2 response
response2 = json.loads(result2[0].text)
assert response2["status"] == "investigation_in_progress"
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert response2["status"] == "pause_for_investigation"
assert response2["step_number"] == 2
assert response2["investigation_status"]["files_checked"] == 2
assert response2["investigation_status"]["relevant_methods"] == 2
@@ -268,9 +270,12 @@ class TestDebugComprehensiveWorkflow:
states.append(json.loads(result[0].text))
# Verify initial state
assert states[0]["status"] == "investigation_in_progress"
# Debug tool now returns "pause_for_investigation" for ongoing steps
assert states[0]["status"] == "pause_for_investigation"
assert states[0]["step_number"] == 1
assert states[0]["next_step_required"] is True
assert states[0]["investigation_required"] is True
assert "required_actions" in states[0]
# Final state (triggers expert analysis)
mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"}

View File

@@ -39,8 +39,10 @@ class TestDebugContinuation:
assert len(result) == 1
response = json.loads(result[0].text)
assert response["status"] == "investigation_in_progress"
assert response["status"] == "pause_for_investigation"
assert response["continuation_id"] == "debug-test-uuid-123"
assert response["investigation_required"] is True
assert "required_actions" in response
def test_debug_conversation_formatting(self):
"""Test that debug tool's structured output is properly formatted in conversation history."""

View File

@@ -157,16 +157,18 @@ class DebugIssueTool(BaseTool):
"DEBUG & ROOT CAUSE ANALYSIS - Systematic self-investigation followed by expert analysis. "
"This tool guides you through a step-by-step investigation process where you:\n\n"
"1. Start with step 1: describe the issue to investigate\n"
"2. Continue with investigation steps: examine code, trace errors, test hypotheses\n"
"3. Track findings, relevant files, and methods throughout\n"
"4. Update hypotheses as understanding evolves\n"
"5. Backtrack and revise findings when needed\n"
"6. Once investigation is complete, receive expert analysis\n\n"
"The tool enforces systematic investigation methodology:\n"
"- Methodical code examination and evidence collection\n"
"- Hypothesis formation and validation\n"
"- File and method tracking for context\n"
"- Confidence assessment and revision capabilities\n\n"
"2. STOP and investigate using appropriate tools\n"
"3. Report findings in step 2 with concrete evidence from actual code\n"
"4. Continue investigating between each debug step\n"
"5. Track findings, relevant files, and methods throughout\n"
"6. Update hypotheses as understanding evolves\n"
"7. Once investigation is complete, receive expert analysis\n\n"
"IMPORTANT: This tool enforces investigation between steps:\n"
"- After each debug call, you MUST investigate before calling debug again\n"
"- Each step must include NEW evidence from code examination\n"
"- No recursive debug calls without actual investigation work\n"
"- The tool will specify which step number to use next\n"
"- Follow the required_actions list for investigation guidance\n\n"
"Perfect for: complex bugs, mysterious errors, performance issues, "
"race conditions, memory leaks, integration problems."
)
@@ -357,10 +359,6 @@ class DebugIssueTool(BaseTool):
"images_collected": len(set(self.consolidated_findings["images"])),
"current_confidence": request.confidence,
},
"output": {
"instructions": "Continue systematic investigation. Present findings clearly and proceed to next step if required.",
"format": "systematic_investigation",
},
}
if continuation_id:
@@ -436,10 +434,72 @@ class DebugIssueTool(BaseTool):
"the problem lies."
)
else:
response_data["next_steps"] = (
f"Continue investigation with step {request.step_number + 1}. "
f"Focus on: examining relevant code, testing hypotheses, gathering evidence."
)
# CRITICAL: Force Claude to actually investigate before calling debug again
response_data["status"] = "pause_for_investigation"
response_data["investigation_required"] = True
if request.step_number == 1:
# Initial investigation tasks
response_data["required_actions"] = [
"Search for code related to the reported issue or symptoms",
"Examine relevant files and understand the current implementation",
"Understand the project structure and locate relevant modules",
"Identify how the affected functionality is supposed to work",
]
response_data["next_steps"] = (
f"MANDATORY: DO NOT call the debug tool again immediately. You MUST first investigate "
f"the codebase using appropriate tools. Search for relevant code, examine implementations, "
f"understand the logic flow. Only call debug again AFTER you have gathered concrete evidence "
f"and examined actual code. When you call debug next time, use step_number: {request.step_number + 1} "
f"and report the specific files you've examined and findings you've discovered."
)
elif request.step_number >= 2 and request.confidence in ["exploring", "low"]:
# Need deeper investigation
response_data["required_actions"] = [
"Examine the specific files you've identified as relevant",
"Trace method calls and data flow through the system",
"Check for edge cases, boundary conditions, and assumptions in the code",
"Look for related configuration, dependencies, or external factors",
]
response_data["next_steps"] = (
f"STOP! Do NOT call debug again yet. Based on your findings, you've identified potential areas "
f"but need concrete evidence. MANDATORY ACTIONS before calling debug step {request.step_number + 1}:\n"
f"1. Examine ALL files in your relevant_files list\n"
f"2. Trace how data flows through {', '.join(request.relevant_methods[:3]) if request.relevant_methods else 'the identified components'}\n"
f"3. Look for logic errors, incorrect assumptions, missing validations\n"
f"4. Check interactions between components and external dependencies\n"
f"Only call debug again with step_number: {request.step_number + 1} AFTER completing these investigations."
)
elif request.confidence in ["medium", "high"]:
# Close to root cause - need confirmation
response_data["required_actions"] = [
"Examine the exact code sections where you believe the issue occurs",
"Trace the execution path that leads to the failure",
"Verify your hypothesis with concrete code evidence",
"Check for any similar patterns elsewhere in the codebase",
]
response_data["next_steps"] = (
f"WAIT! Your hypothesis needs verification. DO NOT call debug immediately. REQUIRED ACTIONS:\n"
f"1. Examine the exact lines where the issue occurs\n"
f"2. Trace backwards: how does data get to this point? What transforms it?\n"
f"3. Check all assumptions: are inputs validated? Are nulls handled?\n"
f"4. Look for the EXACT line where expected != actual behavior\n"
f"Document these findings with specific file:line references, then call debug with step_number: {request.step_number + 1}."
)
else:
# General investigation needed
response_data["required_actions"] = [
"Continue examining the code paths identified in your hypothesis",
"Gather more evidence using appropriate investigation tools",
"Test edge cases and boundary conditions",
"Look for patterns that confirm or refute your theory",
]
response_data["next_steps"] = (
f"PAUSE INVESTIGATION. Before calling debug step {request.step_number + 1}, you MUST examine code. "
f"Required: Read files from your files_checked list, search for patterns in your hypothesis, "
f"trace execution flow. Your next debug call (step_number: {request.step_number + 1}) must include "
f"NEW evidence from actual code examination, not just theories. NO recursive debug calls without investigation work!"
)
# Store in conversation memory
if continuation_id: