Make code lookup mandatory in between investigation steps for better results

2025-06-19 17:42:41 +04:00
parent 79abb9ca7e
commit 7900fdd21c
8 changed files with 108 additions and 33 deletions
--- a/tests/test_collaboration.py
+++ b/tests/test_collaboration.py
@@ -95,10 +95,13 @@ class TestDynamicContextRequests:

        # Parse the response - new debug tool returns structured JSON
        response_data = json.loads(result[0].text)
-        assert response_data["status"] == "investigation_in_progress"
+        # Debug tool now returns "pause_for_investigation" to force actual investigation
+        assert response_data["status"] == "pause_for_investigation"
        assert response_data["step_number"] == 1
        assert response_data["next_step_required"] is True
        assert response_data["investigation_status"]["current_confidence"] == "high"
+        assert response_data["investigation_required"] is True
+        assert "required_actions" in response_data

    @pytest.mark.asyncio
    @patch("tools.base.BaseTool.get_model_provider")
--- a/tests/test_debug.py
+++ b/tests/test_debug.py
@@ -133,13 +133,16 @@ class TestDebugTool:

        parsed_response = json.loads(result[0].text)

-        assert parsed_response["status"] == "investigation_in_progress"
+        # Debug tool now returns "pause_for_investigation" for ongoing steps
+        assert parsed_response["status"] == "pause_for_investigation"
        assert parsed_response["step_number"] == 1
        assert parsed_response["total_steps"] == 5
        assert parsed_response["next_step_required"] is True
        assert parsed_response["continuation_id"] == "debug-uuid-123"
        assert parsed_response["investigation_status"]["files_checked"] == 1
        assert parsed_response["investigation_status"]["relevant_files"] == 1
+        assert parsed_response["investigation_required"] is True
+        assert "required_actions" in parsed_response

    @pytest.mark.asyncio
    async def test_execute_subsequent_investigation_step(self):
@@ -317,6 +320,7 @@ class TestDebugTool:
            result = await tool.execute(arguments)

        # Should return a list with TextContent
+        # Debug tool now returns "pause_for_investigation" for ongoing steps
        assert len(result) == 1
        response_text = result[0].text

@@ -325,7 +329,7 @@ class TestDebugTool:

        parsed_response = json.loads(response_text)

-        assert parsed_response["status"] == "investigation_in_progress"
+        assert parsed_response["status"] == "pause_for_investigation"
        # After backtracking from step 2, history should have step 1 plus the new step
        assert len(tool.investigation_history) == 2  # Step 1 + new step 3
        assert tool.investigation_history[0]["step_number"] == 1
@@ -502,6 +506,7 @@ class TestDebugToolIntegration:
                result = await self.tool.execute(arguments)

        # Verify response structure
+        # Debug tool now returns "pause_for_investigation" for ongoing steps
        assert len(result) == 1
        response_text = result[0].text

@@ -510,7 +515,7 @@ class TestDebugToolIntegration:

        parsed_response = json.loads(response_text)

-        assert parsed_response["status"] == "investigation_in_progress"
+        assert parsed_response["status"] == "pause_for_investigation"
        assert parsed_response["step_number"] == 1
        assert parsed_response["continuation_id"] == "debug-flow-uuid"

--- a/tests/test_debug_certain_confidence.py
+++ b/tests/test_debug_certain_confidence.py
@@ -45,8 +45,10 @@ class TestDebugCertainConfidence:

        # Verify step 1 response
        response1 = json.loads(result1[0].text)
-        assert response1["status"] == "investigation_in_progress"
+        assert response1["status"] == "pause_for_investigation"
        assert response1["step_number"] == 1
+        assert response1["investigation_required"] is True
+        assert "required_actions" in response1
        continuation_id = response1["continuation_id"]

        # Step 2: Final step with certain confidence (simple import fix)
--- a/tests/test_debug_comprehensive_workflow.py
+++ b/tests/test_debug_comprehensive_workflow.py
@@ -43,7 +43,7 @@ class TestDebugComprehensiveWorkflow:
        # Verify step 1 response
        assert len(result1) == 1
        response1 = json.loads(result1[0].text)
-        assert response1["status"] == "investigation_in_progress"
+        assert response1["status"] == "pause_for_investigation"
        assert response1["step_number"] == 1
        assert response1["continuation_id"] == "debug-workflow-uuid"

@@ -56,7 +56,8 @@ class TestDebugComprehensiveWorkflow:
            if args and len(args) >= 3:
                assert args[0] == "debug-workflow-uuid"
                assert args[1] == "assistant"
-                assert json.loads(args[2])["status"] == "investigation_in_progress"
+                # Debug tool now returns "pause_for_investigation" for ongoing steps
+                assert json.loads(args[2])["status"] == "pause_for_investigation"

        # Step 2: Continue investigation with findings
        with patch("utils.conversation_memory.add_turn") as mock_add_turn:
@@ -78,7 +79,8 @@ class TestDebugComprehensiveWorkflow:

        # Verify step 2 response
        response2 = json.loads(result2[0].text)
-        assert response2["status"] == "investigation_in_progress"
+        # Debug tool now returns "pause_for_investigation" for ongoing steps
+        assert response2["status"] == "pause_for_investigation"
        assert response2["step_number"] == 2
        assert response2["investigation_status"]["files_checked"] == 2
        assert response2["investigation_status"]["relevant_methods"] == 2
@@ -268,9 +270,12 @@ class TestDebugComprehensiveWorkflow:
                states.append(json.loads(result[0].text))

        # Verify initial state
-        assert states[0]["status"] == "investigation_in_progress"
+        # Debug tool now returns "pause_for_investigation" for ongoing steps
+        assert states[0]["status"] == "pause_for_investigation"
        assert states[0]["step_number"] == 1
        assert states[0]["next_step_required"] is True
+        assert states[0]["investigation_required"] is True
+        assert "required_actions" in states[0]

        # Final state (triggers expert analysis)
        mock_expert_response = {"status": "analysis_complete", "summary": "Test complete"}
--- a/tests/test_debug_continuation.py
+++ b/tests/test_debug_continuation.py
@@ -39,8 +39,10 @@ class TestDebugContinuation:

        assert len(result) == 1
        response = json.loads(result[0].text)
-        assert response["status"] == "investigation_in_progress"
+        assert response["status"] == "pause_for_investigation"
        assert response["continuation_id"] == "debug-test-uuid-123"
+        assert response["investigation_required"] is True
+        assert "required_actions" in response

    def test_debug_conversation_formatting(self):
        """Test that debug tool's structured output is properly formatted in conversation history."""