diff --git a/config.py b/config.py index 6856d83..216fdcb 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "5.2.2" +__version__ = "5.2.3" # Last update date in ISO format __updated__ = "2025-06-19" # Primary maintainer diff --git a/simulator_tests/test_debug_validation.py b/simulator_tests/test_debug_validation.py index 3e33ef7..50d89f3 100644 --- a/simulator_tests/test_debug_validation.py +++ b/simulator_tests/test_debug_validation.py @@ -537,10 +537,8 @@ RuntimeError: dictionary changed size during iteration self.logger.error("Missing investigation_status in response") return False - # Check output guidance exists - if "output" not in response_data: - self.logger.error("Missing output guidance in response") - return False + # Output field removed in favor of contextual next_steps + # No longer checking for "output" field as it was redundant # Check next_steps guidance if not response_data.get("next_steps"): diff --git a/tests/test_collaboration.py b/tests/test_collaboration.py index 5c4a665..bb52206 100644 --- a/tests/test_collaboration.py +++ b/tests/test_collaboration.py @@ -95,10 +95,13 @@ class TestDynamicContextRequests: # Parse the response - new debug tool returns structured JSON response_data = json.loads(result[0].text) - assert response_data["status"] == "investigation_in_progress" + # Debug tool now returns "pause_for_investigation" to force actual investigation + assert response_data["status"] == "pause_for_investigation" assert response_data["step_number"] == 1 assert response_data["next_step_required"] is True assert response_data["investigation_status"]["current_confidence"] == "high" + assert response_data["investigation_required"] is True + assert "required_actions" in response_data @pytest.mark.asyncio @patch("tools.base.BaseTool.get_model_provider") diff --git a/tests/test_debug.py b/tests/test_debug.py index 765c36c..45ffaa0 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -133,13 +133,16 @@ class TestDebugTool: parsed_response = json.loads(result[0].text) - assert parsed_response["status"] == "investigation_in_progress" + # Debug tool now returns "pause_for_investigation" for ongoing steps + assert parsed_response["status"] == "pause_for_investigation" assert parsed_response["step_number"] == 1 assert parsed_response["total_steps"] == 5 assert parsed_response["next_step_required"] is True assert parsed_response["continuation_id"] == "debug-uuid-123" assert parsed_response["investigation_status"]["files_checked"] == 1 assert parsed_response["investigation_status"]["relevant_files"] == 1 + assert parsed_response["investigation_required"] is True + assert "required_actions" in parsed_response @pytest.mark.asyncio async def test_execute_subsequent_investigation_step(self): @@ -317,6 +320,7 @@ class TestDebugTool: result = await tool.execute(arguments) # Should return a list with TextContent + # Debug tool now returns "pause_for_investigation" for ongoing steps assert len(result) == 1 response_text = result[0].text @@ -325,7 +329,7 @@ class TestDebugTool: parsed_response = json.loads(response_text) - assert parsed_response["status"] == "investigation_in_progress" + assert parsed_response["status"] == "pause_for_investigation" # After backtracking from step 2, history should have step 1 plus the new step assert len(tool.investigation_history) == 2 # Step 1 + new step 3 assert tool.investigation_history[0]["step_number"] == 1 @@ -502,6 +506,7 @@ class TestDebugToolIntegration: result = await self.tool.execute(arguments) # Verify response structure + # Debug tool now returns "pause_for_investigation" for ongoing steps assert len(result) == 1 response_text = result[0].text @@ -510,7 +515,7 @@ class TestDebugToolIntegration: parsed_response = json.loads(response_text) - assert parsed_response["status"] == "investigation_in_progress" + assert parsed_response["status"] == "pause_for_investigation" assert parsed_response["step_number"] == 1 assert parsed_response["continuation_id"] == "debug-flow-uuid" diff --git a/tests/test_debug_certain_confidence.py b/tests/test_debug_certain_confidence.py index 1772170..b650f2d 100644 --- a/tests/test_debug_certain_confidence.py +++ b/tests/test_debug_certain_confidence.py @@ -45,8 +45,10 @@ class TestDebugCertainConfidence: # Verify step 1 response response1 = json.loads(result1[0].text) - assert response1["status"] == "investigation_in_progress" + assert response1["status"] == "pause_for_investigation" assert response1["step_number"] == 1 + assert response1["investigation_required"] is True + assert "required_actions" in response1 continuation_id = response1["continuation_id"] # Step 2: Final step with certain confidence (simple import fix) diff --git a/tests/test_debug_comprehensive_workflow.py b/tests/test_debug_comprehensive_workflow.py index afd0f2b..242ab1f 100644 --- a/tests/test_debug_comprehensive_workflow.py +++ b/tests/test_debug_comprehensive_workflow.py @@ -43,7 +43,7 @@ class TestDebugComprehensiveWorkflow: # Verify step 1 response assert len(result1) == 1 response1 = json.loads(result1[0].text) - assert response1["status"] == "investigation_in_progress" + assert response1["status"] == "pause_for_investigation" assert response1["step_number"] == 1 assert response1["continuation_id"] == "debug-workflow-uuid" @@ -56,7 +56,8 @@ class TestDebugComprehensiveWorkflow: if args and len(args) >= 3: assert args[0] == "debug-workflow-uuid" assert args[1] == "assistant" - assert json.loads(args[2])["status"] == "investigation_in_progress" + # Debug tool now returns "pause_for_investigation" for ongoing steps + assert json.loads(args[2])["status"] == "pause_for_investigation" # Step 2: Continue investigation with findings with patch("utils.conversation_memory.add_turn") as mock_add_turn: @@ -78,7 +79,8 @@ class TestDebugComprehensiveWorkflow: # Verify step 2 response response2 = json.loads(result2[0].text) - assert response2["status"] == "investigation_in_progress" + # Debug tool now returns "pause_for_investigation" for ongoing steps + assert response2["status"] == "pause_for_investigation" assert response2["step_number"] == 2 assert response2["investigation_status"]["files_checked"] == 2 assert response2["investigation_status"]["relevant_methods"] == 2 @@ -268,9 +270,12 @@ class TestDebugComprehensiveWorkflow: states.append(json.loads(result[0].text)) # Verify initial state - assert states[0]["status"] == "investigation_in_progress" + # Debug tool now returns "pause_for_investigation" for ongoing steps + assert states[0]["status"] == "pause_for_investigation" assert states[0]["step_number"] == 1 assert states[0]["next_step_required"] is True + assert states[0]["investigation_required"] is True + assert "required_actions" in states[0] # Final state (triggers expert analysis) mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"} diff --git a/tests/test_debug_continuation.py b/tests/test_debug_continuation.py index 8b810b0..09c9b71 100644 --- a/tests/test_debug_continuation.py +++ b/tests/test_debug_continuation.py @@ -39,8 +39,10 @@ class TestDebugContinuation: assert len(result) == 1 response = json.loads(result[0].text) - assert response["status"] == "investigation_in_progress" + assert response["status"] == "pause_for_investigation" assert response["continuation_id"] == "debug-test-uuid-123" + assert response["investigation_required"] is True + assert "required_actions" in response def test_debug_conversation_formatting(self): """Test that debug tool's structured output is properly formatted in conversation history.""" diff --git a/tools/debug.py b/tools/debug.py index 4ee196e..956837a 100644 --- a/tools/debug.py +++ b/tools/debug.py @@ -157,16 +157,18 @@ class DebugIssueTool(BaseTool): "DEBUG & ROOT CAUSE ANALYSIS - Systematic self-investigation followed by expert analysis. " "This tool guides you through a step-by-step investigation process where you:\n\n" "1. Start with step 1: describe the issue to investigate\n" - "2. Continue with investigation steps: examine code, trace errors, test hypotheses\n" - "3. Track findings, relevant files, and methods throughout\n" - "4. Update hypotheses as understanding evolves\n" - "5. Backtrack and revise findings when needed\n" - "6. Once investigation is complete, receive expert analysis\n\n" - "The tool enforces systematic investigation methodology:\n" - "- Methodical code examination and evidence collection\n" - "- Hypothesis formation and validation\n" - "- File and method tracking for context\n" - "- Confidence assessment and revision capabilities\n\n" + "2. STOP and investigate using appropriate tools\n" + "3. Report findings in step 2 with concrete evidence from actual code\n" + "4. Continue investigating between each debug step\n" + "5. Track findings, relevant files, and methods throughout\n" + "6. Update hypotheses as understanding evolves\n" + "7. Once investigation is complete, receive expert analysis\n\n" + "IMPORTANT: This tool enforces investigation between steps:\n" + "- After each debug call, you MUST investigate before calling debug again\n" + "- Each step must include NEW evidence from code examination\n" + "- No recursive debug calls without actual investigation work\n" + "- The tool will specify which step number to use next\n" + "- Follow the required_actions list for investigation guidance\n\n" "Perfect for: complex bugs, mysterious errors, performance issues, " "race conditions, memory leaks, integration problems." ) @@ -357,10 +359,6 @@ class DebugIssueTool(BaseTool): "images_collected": len(set(self.consolidated_findings["images"])), "current_confidence": request.confidence, }, - "output": { - "instructions": "Continue systematic investigation. Present findings clearly and proceed to next step if required.", - "format": "systematic_investigation", - }, } if continuation_id: @@ -436,10 +434,72 @@ class DebugIssueTool(BaseTool): "the problem lies." ) else: - response_data["next_steps"] = ( - f"Continue investigation with step {request.step_number + 1}. " - f"Focus on: examining relevant code, testing hypotheses, gathering evidence." - ) + # CRITICAL: Force Claude to actually investigate before calling debug again + response_data["status"] = "pause_for_investigation" + response_data["investigation_required"] = True + + if request.step_number == 1: + # Initial investigation tasks + response_data["required_actions"] = [ + "Search for code related to the reported issue or symptoms", + "Examine relevant files and understand the current implementation", + "Understand the project structure and locate relevant modules", + "Identify how the affected functionality is supposed to work", + ] + response_data["next_steps"] = ( + f"MANDATORY: DO NOT call the debug tool again immediately. You MUST first investigate " + f"the codebase using appropriate tools. Search for relevant code, examine implementations, " + f"understand the logic flow. Only call debug again AFTER you have gathered concrete evidence " + f"and examined actual code. When you call debug next time, use step_number: {request.step_number + 1} " + f"and report the specific files you've examined and findings you've discovered." + ) + elif request.step_number >= 2 and request.confidence in ["exploring", "low"]: + # Need deeper investigation + response_data["required_actions"] = [ + "Examine the specific files you've identified as relevant", + "Trace method calls and data flow through the system", + "Check for edge cases, boundary conditions, and assumptions in the code", + "Look for related configuration, dependencies, or external factors", + ] + response_data["next_steps"] = ( + f"STOP! Do NOT call debug again yet. Based on your findings, you've identified potential areas " + f"but need concrete evidence. MANDATORY ACTIONS before calling debug step {request.step_number + 1}:\n" + f"1. Examine ALL files in your relevant_files list\n" + f"2. Trace how data flows through {', '.join(request.relevant_methods[:3]) if request.relevant_methods else 'the identified components'}\n" + f"3. Look for logic errors, incorrect assumptions, missing validations\n" + f"4. Check interactions between components and external dependencies\n" + f"Only call debug again with step_number: {request.step_number + 1} AFTER completing these investigations." + ) + elif request.confidence in ["medium", "high"]: + # Close to root cause - need confirmation + response_data["required_actions"] = [ + "Examine the exact code sections where you believe the issue occurs", + "Trace the execution path that leads to the failure", + "Verify your hypothesis with concrete code evidence", + "Check for any similar patterns elsewhere in the codebase", + ] + response_data["next_steps"] = ( + f"WAIT! Your hypothesis needs verification. DO NOT call debug immediately. REQUIRED ACTIONS:\n" + f"1. Examine the exact lines where the issue occurs\n" + f"2. Trace backwards: how does data get to this point? What transforms it?\n" + f"3. Check all assumptions: are inputs validated? Are nulls handled?\n" + f"4. Look for the EXACT line where expected != actual behavior\n" + f"Document these findings with specific file:line references, then call debug with step_number: {request.step_number + 1}." + ) + else: + # General investigation needed + response_data["required_actions"] = [ + "Continue examining the code paths identified in your hypothesis", + "Gather more evidence using appropriate investigation tools", + "Test edge cases and boundary conditions", + "Look for patterns that confirm or refute your theory", + ] + response_data["next_steps"] = ( + f"PAUSE INVESTIGATION. Before calling debug step {request.step_number + 1}, you MUST examine code. " + f"Required: Read files from your files_checked list, search for patterns in your hypothesis, " + f"trace execution flow. Your next debug call (step_number: {request.step_number + 1}) must include " + f"NEW evidence from actual code examination, not just theories. NO recursive debug calls without investigation work!" + ) # Store in conversation memory if continuation_id: