🚀 Major Enhancement: Workflow-Based Tool Architecture v5.5.0 (#95)

* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-21 00:08:11 +04:00
parent 4dae6e457e
commit 69a3121452
76 changed files with 17111 additions and 7725 deletions
--- a/tests/test_debug.py
+++ b/tests/test_debug.py
@@ -1,17 +1,13 @@
 """
-Tests for the debug tool.
+Tests for the debug tool using new WorkflowTool architecture.
 """

-from unittest.mock import patch
-
-import pytest
-
 from tools.debug import DebugInvestigationRequest, DebugIssueTool
 from tools.models import ToolModelCategory


 class TestDebugTool:
-    """Test suite for DebugIssueTool."""
+    """Test suite for DebugIssueTool using new WorkflowTool architecture."""

    def test_tool_metadata(self):
        """Test basic tool metadata and configuration."""
@@ -21,7 +17,7 @@ class TestDebugTool:
        assert "DEBUG & ROOT CAUSE ANALYSIS" in tool.get_description()
        assert tool.get_default_temperature() == 0.2  # TEMPERATURE_ANALYTICAL
        assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
-        assert tool.requires_model() is True  # Requires model resolution for expert analysis
+        assert tool.requires_model() is True

    def test_request_validation(self):
        """Test Pydantic request model validation."""
@@ -29,622 +25,62 @@ class TestDebugTool:
        step_request = DebugInvestigationRequest(
            step="Investigating null pointer exception in UserService",
            step_number=1,
-            total_steps=5,
+            total_steps=3,
            next_step_required=True,
-            findings="Found that UserService.getUser() is called with null ID",
-        )
-        assert step_request.step == "Investigating null pointer exception in UserService"
-        assert step_request.step_number == 1
-        assert step_request.next_step_required is True
-        assert step_request.confidence == "low"  # default
-
-        # Request with optional fields
-        detailed_request = DebugInvestigationRequest(
-            step="Deep dive into getUser method implementation",
-            step_number=2,
-            total_steps=5,
-            next_step_required=True,
-            findings="Method doesn't validate input parameters",
-            files_checked=["/src/UserService.java", "/src/UserController.java"],
+            findings="Found potential null reference in user authentication flow",
+            files_checked=["/src/UserService.java"],
            relevant_files=["/src/UserService.java"],
-            relevant_methods=["UserService.getUser", "UserController.handleRequest"],
-            hypothesis="Null ID passed from controller without validation",
+            relevant_methods=["authenticate", "validateUser"],
            confidence="medium",
+            hypothesis="Null pointer occurs when user object is not properly validated",
        )
-        assert len(detailed_request.files_checked) == 2
-        assert len(detailed_request.relevant_files) == 1
-        assert detailed_request.confidence == "medium"

-        # Missing required fields should fail
-        with pytest.raises(ValueError):
-            DebugInvestigationRequest()  # Missing all required fields
-
-        with pytest.raises(ValueError):
-            DebugInvestigationRequest(step="test")  # Missing other required fields
+        assert step_request.step_number == 1
+        assert step_request.confidence == "medium"
+        assert len(step_request.relevant_methods) == 2
+        assert len(step_request.relevant_context) == 2  # Should be mapped from relevant_methods

    def test_input_schema_generation(self):
-        """Test JSON schema generation for MCP client."""
+        """Test that input schema is generated correctly."""
        tool = DebugIssueTool()
        schema = tool.get_input_schema()

-        assert schema["type"] == "object"
-        # Investigation fields
+        # Verify required investigation fields are present
        assert "step" in schema["properties"]
        assert "step_number" in schema["properties"]
        assert "total_steps" in schema["properties"]
        assert "next_step_required" in schema["properties"]
        assert "findings" in schema["properties"]
-        assert "files_checked" in schema["properties"]
-        assert "relevant_files" in schema["properties"]
        assert "relevant_methods" in schema["properties"]
-        assert "hypothesis" in schema["properties"]
-        assert "confidence" in schema["properties"]
-        assert "backtrack_from_step" in schema["properties"]
-        assert "continuation_id" in schema["properties"]
-        assert "images" in schema["properties"]  # Now supported for visual debugging

-        # Check model field is present (fixed from previous bug)
-        assert "model" in schema["properties"]
-        # Check excluded fields are NOT present
-        assert "temperature" not in schema["properties"]
-        assert "thinking_mode" not in schema["properties"]
-        assert "use_websearch" not in schema["properties"]
-
-        # Check required fields
-        assert "step" in schema["required"]
-        assert "step_number" in schema["required"]
-        assert "total_steps" in schema["required"]
-        assert "next_step_required" in schema["required"]
-        assert "findings" in schema["required"]
+        # Verify field types
+        assert schema["properties"]["step"]["type"] == "string"
+        assert schema["properties"]["step_number"]["type"] == "integer"
+        assert schema["properties"]["next_step_required"]["type"] == "boolean"
+        assert schema["properties"]["relevant_methods"]["type"] == "array"

    def test_model_category_for_debugging(self):
-        """Test that debug uses extended reasoning category."""
+        """Test that debug tool correctly identifies as extended reasoning category."""
        tool = DebugIssueTool()
-        category = tool.get_model_category()
+        assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING

-        # Debugging needs deep thinking
-        assert category == ToolModelCategory.EXTENDED_REASONING
+    def test_field_mapping_relevant_methods_to_context(self):
+        """Test that relevant_methods maps to relevant_context internally."""
+        request = DebugInvestigationRequest(
+            step="Test investigation",
+            step_number=1,
+            total_steps=2,
+            next_step_required=True,
+            findings="Test findings",
+            relevant_methods=["method1", "method2"],
+        )

-    @pytest.mark.asyncio
-    async def test_execute_first_investigation_step(self):
-        """Test execute method for first investigation step."""
+        # External API should have relevant_methods
+        assert request.relevant_methods == ["method1", "method2"]
+        # Internal processing should map to relevant_context
+        assert request.relevant_context == ["method1", "method2"]
+
+        # Test step data preparation
        tool = DebugIssueTool()
-        arguments = {
-            "step": "Investigating intermittent session validation failures in production",
-            "step_number": 1,
-            "total_steps": 5,
-            "next_step_required": True,
-            "findings": "Users report random session invalidation, occurs more during high traffic",
-            "files_checked": ["/api/session_manager.py"],
-            "relevant_files": ["/api/session_manager.py"],
-        }
-
-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.create_thread", return_value="debug-uuid-123"):
-            with patch("utils.conversation_memory.add_turn"):
-                result = await tool.execute(arguments)
-
-        # Should return a list with TextContent
-        assert len(result) == 1
-        assert result[0].type == "text"
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(result[0].text)
-
-        # Debug tool now returns "pause_for_investigation" for ongoing steps
-        assert parsed_response["status"] == "pause_for_investigation"
-        assert parsed_response["step_number"] == 1
-        assert parsed_response["total_steps"] == 5
-        assert parsed_response["next_step_required"] is True
-        assert parsed_response["continuation_id"] == "debug-uuid-123"
-        assert parsed_response["investigation_status"]["files_checked"] == 1
-        assert parsed_response["investigation_status"]["relevant_files"] == 1
-        assert parsed_response["investigation_required"] is True
-        assert "required_actions" in parsed_response
-
-    @pytest.mark.asyncio
-    async def test_execute_subsequent_investigation_step(self):
-        """Test execute method for subsequent investigation step."""
-        tool = DebugIssueTool()
-
-        # Set up initial state
-        tool.initial_issue = "Session validation failures"
-        tool.consolidated_findings["files_checked"].add("/api/session_manager.py")
-
-        arguments = {
-            "step": "Examining session cleanup method for concurrent modification issues",
-            "step_number": 2,
-            "total_steps": 5,
-            "next_step_required": True,
-            "findings": "Found dictionary modification during iteration in cleanup_expired_sessions",
-            "files_checked": ["/api/session_manager.py", "/api/utils.py"],
-            "relevant_files": ["/api/session_manager.py"],
-            "relevant_methods": ["SessionManager.cleanup_expired_sessions"],
-            "hypothesis": "Dictionary modified during iteration causing RuntimeError",
-            "confidence": "high",
-            "continuation_id": "debug-uuid-123",
-        }
-
-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.add_turn"):
-            result = await tool.execute(arguments)
-
-        # Should return a list with TextContent
-        assert len(result) == 1
-        assert result[0].type == "text"
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(result[0].text)
-
-        assert parsed_response["step_number"] == 2
-        assert parsed_response["next_step_required"] is True
-        assert parsed_response["continuation_id"] == "debug-uuid-123"
-        assert parsed_response["investigation_status"]["files_checked"] == 2  # Cumulative
-        assert parsed_response["investigation_status"]["relevant_methods"] == 1
-        assert parsed_response["investigation_status"]["current_confidence"] == "high"
-
-    @pytest.mark.asyncio
-    async def test_execute_final_investigation_step(self):
-        """Test execute method for final investigation step with expert analysis."""
-        tool = DebugIssueTool()
-
-        # Set up investigation history
-        tool.initial_issue = "Session validation failures"
-        tool.investigation_history = [
-            {
-                "step_number": 1,
-                "step": "Initial investigation of session validation failures",
-                "findings": "Initial investigation",
-                "files_checked": ["/api/utils.py"],
-            },
-            {
-                "step_number": 2,
-                "step": "Deeper analysis of session manager",
-                "findings": "Found dictionary issue",
-                "files_checked": ["/api/session_manager.py"],
-            },
-        ]
-        tool.consolidated_findings = {
-            "files_checked": {"/api/session_manager.py", "/api/utils.py"},
-            "relevant_files": {"/api/session_manager.py"},
-            "relevant_methods": {"SessionManager.cleanup_expired_sessions"},
-            "findings": ["Step 1: Initial investigation", "Step 2: Found dictionary issue"],
-            "hypotheses": [{"step": 2, "hypothesis": "Dictionary modified during iteration", "confidence": "high"}],
-            "images": [],
-        }
-
-        arguments = {
-            "step": "Confirmed the root cause and identified fix",
-            "step_number": 3,
-            "total_steps": 3,
-            "next_step_required": False,  # Final step
-            "findings": "Root cause confirmed: dictionary modification during iteration in cleanup method",
-            "files_checked": ["/api/session_manager.py"],
-            "relevant_files": ["/api/session_manager.py"],
-            "relevant_methods": ["SessionManager.cleanup_expired_sessions"],
-            "hypothesis": "Dictionary modification during iteration causes intermittent RuntimeError",
-            "confidence": "high",
-            "continuation_id": "debug-uuid-123",
-        }
-
-        # Mock the expert analysis call
-        mock_expert_response = {
-            "status": "analysis_complete",
-            "summary": "Dictionary modification during iteration bug identified",
-            "hypotheses": [
-                {
-                    "name": "CONCURRENT_MODIFICATION",
-                    "confidence": "High",
-                    "root_cause": "Modifying dictionary while iterating",
-                    "minimal_fix": "Create list of keys to delete first",
-                }
-            ],
-        }
-
-        # Mock conversation memory and file reading
-        with patch("utils.conversation_memory.add_turn"):
-            with patch.object(tool, "_call_expert_analysis", return_value=mock_expert_response):
-                with patch.object(tool, "_prepare_file_content_for_prompt", return_value=("file content", 100)):
-                    result = await tool.execute(arguments)
-
-        # Should return a list with TextContent
-        assert len(result) == 1
-        response_text = result[0].text
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(response_text)
-
-        # Check final step structure
-        assert parsed_response["status"] == "calling_expert_analysis"
-        assert parsed_response["investigation_complete"] is True
-        assert parsed_response["expert_analysis"]["status"] == "analysis_complete"
-        assert "complete_investigation" in parsed_response
-        assert parsed_response["complete_investigation"]["steps_taken"] == 3  # All steps including current
-
-    @pytest.mark.asyncio
-    async def test_execute_with_backtracking(self):
-        """Test execute method with backtracking to revise findings."""
-        tool = DebugIssueTool()
-
-        # Set up some investigation history with all required fields
-        tool.investigation_history = [
-            {
-                "step": "Initial investigation",
-                "step_number": 1,
-                "findings": "Initial findings",
-                "files_checked": ["file1.py"],
-                "relevant_files": [],
-                "relevant_methods": [],
-                "hypothesis": None,
-                "confidence": "low",
-            },
-            {
-                "step": "Wrong direction",
-                "step_number": 2,
-                "findings": "Wrong path",
-                "files_checked": ["file2.py"],
-                "relevant_files": [],
-                "relevant_methods": [],
-                "hypothesis": None,
-                "confidence": "low",
-            },
-        ]
-        tool.consolidated_findings = {
-            "files_checked": {"file1.py", "file2.py"},
-            "relevant_files": set(),
-            "relevant_methods": set(),
-            "findings": ["Step 1: Initial findings", "Step 2: Wrong path"],
-            "hypotheses": [],
-            "images": [],
-        }
-
-        arguments = {
-            "step": "Backtracking to revise approach",
-            "step_number": 3,
-            "total_steps": 5,
-            "next_step_required": True,
-            "findings": "Taking a different investigation approach",
-            "files_checked": ["file3.py"],
-            "backtrack_from_step": 2,  # Backtrack from step 2
-            "continuation_id": "debug-uuid-123",
-        }
-
-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.add_turn"):
-            result = await tool.execute(arguments)
-
-        # Should return a list with TextContent
-        # Debug tool now returns "pause_for_investigation" for ongoing steps
-        assert len(result) == 1
-        response_text = result[0].text
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(response_text)
-
-        assert parsed_response["status"] == "pause_for_investigation"
-        # After backtracking from step 2, history should have step 1 plus the new step
-        assert len(tool.investigation_history) == 2  # Step 1 + new step 3
-        assert tool.investigation_history[0]["step_number"] == 1
-        assert tool.investigation_history[1]["step_number"] == 3  # The new step that triggered backtrack
-
-    @pytest.mark.asyncio
-    async def test_execute_adjusts_total_steps(self):
-        """Test execute method adjusts total steps when current step exceeds estimate."""
-        tool = DebugIssueTool()
-        arguments = {
-            "step": "Additional investigation needed",
-            "step_number": 8,
-            "total_steps": 5,  # Current step exceeds total
-            "next_step_required": True,
-            "findings": "More complexity discovered",
-            "continuation_id": "debug-uuid-123",
-        }
-
-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.add_turn"):
-            result = await tool.execute(arguments)
-
-        # Should return a list with TextContent
-        assert len(result) == 1
-        response_text = result[0].text
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(response_text)
-
-        # Total steps should be adjusted to match current step
-        assert parsed_response["total_steps"] == 8
-        assert parsed_response["step_number"] == 8
-
-    @pytest.mark.asyncio
-    async def test_execute_error_handling(self):
-        """Test execute method error handling."""
-        tool = DebugIssueTool()
-        # Invalid arguments - missing required fields
-        arguments = {
-            "step": "Invalid request"
-            # Missing required fields
-        }
-
-        result = await tool.execute(arguments)
-
-        # Should return error response
-        assert len(result) == 1
-        response_text = result[0].text
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(response_text)
-
-        assert parsed_response["status"] == "investigation_failed"
-        assert "error" in parsed_response
-
-    @pytest.mark.asyncio
-    async def test_execute_with_string_instead_of_list_fields(self):
-        """Test execute method handles string inputs for list fields gracefully."""
-        tool = DebugIssueTool()
-        arguments = {
-            "step": "Investigating issue with string inputs",
-            "step_number": 1,
-            "total_steps": 3,
-            "next_step_required": True,
-            "findings": "Testing string input handling",
-            # These should be lists but passing strings to test the fix
-            "files_checked": "relevant_files",  # String instead of list
-            "relevant_files": "some_string",  # String instead of list
-            "relevant_methods": "another_string",  # String instead of list
-        }
-
-        # Mock conversation memory functions
-        with patch("utils.conversation_memory.create_thread", return_value="debug-string-test"):
-            with patch("utils.conversation_memory.add_turn"):
-                # Should handle gracefully without crashing
-                result = await tool.execute(arguments)
-
-        # Should return a valid response
-        assert len(result) == 1
-        assert result[0].type == "text"
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(result[0].text)
-
-        # Should complete successfully with empty lists
-        assert parsed_response["status"] == "pause_for_investigation"
-        assert parsed_response["step_number"] == 1
-        assert parsed_response["investigation_status"]["files_checked"] == 0  # Empty due to string conversion
-        assert parsed_response["investigation_status"]["relevant_files"] == 0
-        assert parsed_response["investigation_status"]["relevant_methods"] == 0
-
-        # Verify internal state - should have empty sets, not individual characters
-        assert tool.consolidated_findings["files_checked"] == set()
-        assert tool.consolidated_findings["relevant_files"] == set()
-        assert tool.consolidated_findings["relevant_methods"] == set()
-        # Should NOT have individual characters like {'r', 'e', 'l', 'e', 'v', 'a', 'n', 't', '_', 'f', 'i', 'l', 'e', 's'}
-
-    def test_prepare_investigation_summary(self):
-        """Test investigation summary preparation."""
-        tool = DebugIssueTool()
-        tool.consolidated_findings = {
-            "files_checked": {"file1.py", "file2.py", "file3.py"},
-            "relevant_files": {"file1.py", "file2.py"},
-            "relevant_methods": {"Class1.method1", "Class2.method2"},
-            "findings": [
-                "Step 1: Initial investigation findings",
-                "Step 2: Discovered potential issue",
-                "Step 3: Confirmed root cause",
-            ],
-            "hypotheses": [
-                {"step": 1, "hypothesis": "Initial hypothesis", "confidence": "low"},
-                {"step": 2, "hypothesis": "Refined hypothesis", "confidence": "medium"},
-                {"step": 3, "hypothesis": "Final hypothesis", "confidence": "high"},
-            ],
-            "images": [],
-        }
-
-        summary = tool._prepare_investigation_summary()
-
-        assert "SYSTEMATIC INVESTIGATION SUMMARY" in summary
-        assert "Files examined: 3" in summary
-        assert "Relevant files identified: 2" in summary
-        assert "Methods/functions involved: 2" in summary
-        assert "INVESTIGATION PROGRESSION" in summary
-        assert "Step 1:" in summary
-        assert "Step 2:" in summary
-        assert "Step 3:" in summary
-        assert "HYPOTHESIS EVOLUTION" in summary
-        assert "low confidence" in summary
-        assert "medium confidence" in summary
-        assert "high confidence" in summary
-
-    def test_extract_error_context(self):
-        """Test error context extraction from findings."""
-        tool = DebugIssueTool()
-        tool.consolidated_findings = {
-            "findings": [
-                "Step 1: Found no issues initially",
-                "Step 2: Discovered ERROR: Dictionary size changed during iteration",
-                "Step 3: Stack trace shows RuntimeError in cleanup method",
-                "Step 4: Exception occurs intermittently",
-            ],
-        }
-
-        error_context = tool._extract_error_context()
-
-        assert error_context is not None
-        assert "ERROR: Dictionary size changed" in error_context
-        assert "Stack trace shows RuntimeError" in error_context
-        assert "Exception occurs intermittently" in error_context
-        assert "Found no issues initially" not in error_context  # Should not include non-error findings
-
-    def test_reprocess_consolidated_findings(self):
-        """Test reprocessing of consolidated findings after backtracking."""
-        tool = DebugIssueTool()
-        tool.investigation_history = [
-            {
-                "step_number": 1,
-                "findings": "Initial findings",
-                "files_checked": ["file1.py"],
-                "relevant_files": ["file1.py"],
-                "relevant_methods": ["method1"],
-                "hypothesis": "Initial hypothesis",
-                "confidence": "low",
-            },
-            {
-                "step_number": 2,
-                "findings": "Second findings",
-                "files_checked": ["file2.py"],
-                "relevant_files": [],
-                "relevant_methods": ["method2"],
-            },
-        ]
-
-        tool._reprocess_consolidated_findings()
-
-        assert tool.consolidated_findings["files_checked"] == {"file1.py", "file2.py"}
-        assert tool.consolidated_findings["relevant_files"] == {"file1.py"}
-        assert tool.consolidated_findings["relevant_methods"] == {"method1", "method2"}
-        assert len(tool.consolidated_findings["findings"]) == 2
-        assert len(tool.consolidated_findings["hypotheses"]) == 1
-        assert tool.consolidated_findings["hypotheses"][0]["hypothesis"] == "Initial hypothesis"
-
-
-# Integration test
-class TestDebugToolIntegration:
-    """Integration tests for debug tool."""
-
-    def setup_method(self):
-        """Set up model context for integration tests."""
-        from utils.model_context import ModelContext
-
-        self.tool = DebugIssueTool()
-        self.tool._model_context = ModelContext("flash")  # Test model
-
-    @pytest.mark.asyncio
-    async def test_complete_investigation_flow(self):
-        """Test complete investigation flow from start to expert analysis."""
-        # Step 1: Initial investigation
-        arguments = {
-            "step": "Investigating memory leak in data processing pipeline",
-            "step_number": 1,
-            "total_steps": 3,
-            "next_step_required": True,
-            "findings": "High memory usage observed during batch processing",
-            "files_checked": ["/processor/main.py"],
-        }
-
-        # Mock conversation memory and expert analysis
-        with patch("utils.conversation_memory.create_thread", return_value="debug-flow-uuid"):
-            with patch("utils.conversation_memory.add_turn"):
-                result = await self.tool.execute(arguments)
-
-        # Verify response structure
-        # Debug tool now returns "pause_for_investigation" for ongoing steps
-        assert len(result) == 1
-        response_text = result[0].text
-
-        # Parse the JSON response
-        import json
-
-        parsed_response = json.loads(response_text)
-
-        assert parsed_response["status"] == "pause_for_investigation"
-        assert parsed_response["step_number"] == 1
-        assert parsed_response["continuation_id"] == "debug-flow-uuid"
-
-    @pytest.mark.asyncio
-    async def test_model_context_initialization_in_expert_analysis(self):
-        """Real integration test that model context is properly initialized when expert analysis is called."""
-        tool = DebugIssueTool()
-
-        # Do NOT manually set up model context - let the method do it itself
-
-        # Set up investigation state for final step
-        tool.initial_issue = "Memory leak investigation"
-        tool.investigation_history = [
-            {
-                "step_number": 1,
-                "step": "Initial investigation",
-                "findings": "Found memory issues",
-                "files_checked": [],
-            }
-        ]
-        tool.consolidated_findings = {
-            "files_checked": set(),
-            "relevant_files": set(),  # No files to avoid file I/O in this test
-            "relevant_methods": {"process_data"},
-            "findings": ["Step 1: Found memory issues"],
-            "hypotheses": [],
-            "images": [],
-        }
-
-        # Test the _call_expert_analysis method directly to verify ModelContext is properly handled
-        # This is the real test - we're testing that the method can be called without the ModelContext error
-        try:
-            # Only mock the API call itself, not the model resolution infrastructure
-            from unittest.mock import MagicMock
-
-            mock_provider = MagicMock()
-            mock_response = MagicMock()
-            mock_response.content = '{"status": "analysis_complete", "summary": "Test completed"}'
-            mock_provider.generate_content.return_value = mock_response
-
-            # Use the real get_model_provider method but override its result to avoid API calls
-            original_get_provider = tool.get_model_provider
-            tool.get_model_provider = lambda model_name: mock_provider
-
-            try:
-                # Create mock arguments and request for model resolution
-                from tools.debug import DebugInvestigationRequest
-
-                mock_arguments = {"model": None}  # No model specified, should fall back to DEFAULT_MODEL
-                mock_request = DebugInvestigationRequest(
-                    step="Test step", step_number=1, total_steps=1, next_step_required=False, findings="Test findings"
-                )
-
-                # This should NOT raise a ModelContext error - the method should set up context itself
-                result = await tool._call_expert_analysis(
-                    initial_issue="Test issue",
-                    investigation_summary="Test summary",
-                    relevant_files=[],  # Empty to avoid file operations
-                    relevant_methods=["test_method"],
-                    final_hypothesis="Test hypothesis",
-                    error_context=None,
-                    images=[],
-                    model_info=None,  # No pre-resolved model info
-                    arguments=mock_arguments,  # Provide arguments for model resolution
-                    request=mock_request,  # Provide request for model resolution
-                )
-
-                # Should complete without ModelContext error
-                assert "error" not in result
-                assert result["status"] == "analysis_complete"
-
-                # Verify the model context was actually set up
-                assert hasattr(tool, "_model_context")
-                assert hasattr(tool, "_current_model_name")
-                # Should use DEFAULT_MODEL when no model specified
-                from config import DEFAULT_MODEL
-
-                assert tool._current_model_name == DEFAULT_MODEL
-
-            finally:
-                # Restore original method
-                tool.get_model_provider = original_get_provider
-
-        except RuntimeError as e:
-            if "ModelContext not initialized" in str(e):
-                pytest.fail("ModelContext error still occurs - the fix is not working properly")
-            else:
-                raise  # Re-raise other RuntimeErrors
+        step_data = tool.prepare_step_data(request)
+        assert step_data["relevant_context"] == ["method1", "method2"]