Add DocGen tool with comprehensive documentation generation capabilities (#109)

* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools * WIP - Update tests to match new tools * Should help with https://github.com/BeehiveInnovations/zen-mcp-server/issues/97 Clear python cache when running script: https://github.com/BeehiveInnovations/zen-mcp-server/issues/96 Improved retry error logging Cleanup * WIP - chat tool using new architecture and improved code sharing * Removed todo * Removed todo * Cleanup old name * Tweak wordings * Tweak wordings Migrate old tests * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 Fixed test * Improved consensus to use the workflow base class * Improved consensus to use the workflow base class * Allow images * Allow images * Replaced old consensus tool * Cleanup tests * Tests for prompt size * New tool: docgen Tests for prompt size Fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/107 Use available token size limits: https://github.com/BeehiveInnovations/zen-mcp-server/issues/105 * Improved docgen prompt Exclude TestGen from pytest inclusion * Updated errors * Lint * DocGen instructed not to fix bugs, surface them and stick to d * WIP * Stop claude from being lazy and only documenting a small handful * More style rules --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-21 23:21:19 -07:00
parent 0655590a51
commit c960bcb720
58 changed files with 5492 additions and 5558 deletions
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -1,220 +1,401 @@
 """
-Tests for the Consensus tool
+Tests for the Consensus tool using WorkflowTool architecture.
 """

 import json
-from unittest.mock import patch
+from unittest.mock import Mock, patch

 import pytest

-from tools.consensus import ConsensusTool, ModelConfig
+from tools.consensus import ConsensusRequest, ConsensusTool
+from tools.models import ToolModelCategory


 class TestConsensusTool:
-    """Test cases for the Consensus tool"""
-
-    def setup_method(self):
-        """Set up test fixtures"""
-        self.tool = ConsensusTool()
+    """Test suite for ConsensusTool using WorkflowTool architecture."""

    def test_tool_metadata(self):
-        """Test tool metadata is correct"""
-        assert self.tool.get_name() == "consensus"
-        assert "MULTI-MODEL CONSENSUS" in self.tool.get_description()
-        assert self.tool.get_default_temperature() == 0.2
+        """Test basic tool metadata and configuration."""
+        tool = ConsensusTool()

-    def test_input_schema(self):
-        """Test input schema is properly defined"""
-        schema = self.tool.get_input_schema()
-        assert schema["type"] == "object"
-        assert "prompt" in schema["properties"]
+        assert tool.get_name() == "consensus"
+        assert "COMPREHENSIVE CONSENSUS WORKFLOW" in tool.get_description()
+        assert tool.get_default_temperature() == 0.2  # TEMPERATURE_ANALYTICAL
+        assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
+        assert tool.requires_model() is True
+
+    def test_request_validation_step1(self):
+        """Test Pydantic request model validation for step 1."""
+        # Valid step 1 request with models
+        step1_request = ConsensusRequest(
+            step="Analyzing the real-time collaboration proposal",
+            step_number=1,
+            total_steps=4,  # 1 (Claude) + 2 models + 1 (synthesis)
+            next_step_required=True,
+            findings="Initial assessment shows strong value but technical complexity",
+            confidence="medium",
+            models=[{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}],
+            relevant_files=["/proposal.md"],
+        )
+
+        assert step1_request.step_number == 1
+        assert step1_request.confidence == "medium"
+        assert len(step1_request.models) == 2
+        assert step1_request.models[0]["model"] == "flash"
+
+    def test_request_validation_missing_models_step1(self):
+        """Test that step 1 requires models field."""
+        with pytest.raises(ValueError, match="Step 1 requires 'models' field"):
+            ConsensusRequest(
+                step="Test step",
+                step_number=1,
+                total_steps=3,
+                next_step_required=True,
+                findings="Test findings",
+                # Missing models field
+            )
+
+    def test_request_validation_later_steps(self):
+        """Test request validation for steps 2+."""
+        # Step 2+ doesn't require models field
+        step2_request = ConsensusRequest(
+            step="Processing first model response",
+            step_number=2,
+            total_steps=4,
+            next_step_required=True,
+            findings="Model provided supportive perspective",
+            confidence="medium",
+            continuation_id="test-id",
+            current_model_index=1,
+        )
+
+        assert step2_request.step_number == 2
+        assert step2_request.models is None  # Not required after step 1
+
+    def test_request_validation_duplicate_model_stance(self):
+        """Test that duplicate model+stance combinations are rejected."""
+        # Valid: same model with different stances
+        valid_request = ConsensusRequest(
+            step="Analyze this proposal",
+            step_number=1,
+            total_steps=1,
+            next_step_required=True,
+            findings="Initial analysis",
+            models=[
+                {"model": "o3", "stance": "for"},
+                {"model": "o3", "stance": "against"},
+                {"model": "flash", "stance": "neutral"},
+            ],
+            continuation_id="test-id",
+        )
+        assert len(valid_request.models) == 3
+
+        # Invalid: duplicate model+stance combination
+        with pytest.raises(ValueError, match="Duplicate model \\+ stance combination"):
+            ConsensusRequest(
+                step="Analyze this proposal",
+                step_number=1,
+                total_steps=1,
+                next_step_required=True,
+                findings="Initial analysis",
+                models=[
+                    {"model": "o3", "stance": "for"},
+                    {"model": "flash", "stance": "neutral"},
+                    {"model": "o3", "stance": "for"},  # Duplicate!
+                ],
+                continuation_id="test-id",
+            )
+
+    def test_input_schema_generation(self):
+        """Test that input schema is generated correctly."""
+        tool = ConsensusTool()
+        schema = tool.get_input_schema()
+
+        # Verify consensus workflow fields are present
+        assert "step" in schema["properties"]
+        assert "step_number" in schema["properties"]
+        assert "total_steps" in schema["properties"]
+        assert "next_step_required" in schema["properties"]
+        assert "findings" in schema["properties"]
+        # confidence field should be excluded
+        assert "confidence" not in schema["properties"]
        assert "models" in schema["properties"]
-        assert schema["required"] == ["prompt", "models"]
+        # relevant_files should also be excluded
+        assert "relevant_files" not in schema["properties"]

-        # Check that schema includes model configuration information
-        models_desc = schema["properties"]["models"]["description"]
-        # Check description includes object format
-        assert "model configurations" in models_desc
-        assert "specific stance and custom instructions" in models_desc
-        # Check example shows new format
-        assert "'model': 'o3'" in models_desc
-        assert "'stance': 'for'" in models_desc
-        assert "'stance_prompt'" in models_desc
+        # Verify workflow fields that should NOT be present
+        assert "files_checked" not in schema["properties"]
+        assert "hypothesis" not in schema["properties"]
+        assert "issues_found" not in schema["properties"]
+        assert "temperature" not in schema["properties"]
+        assert "thinking_mode" not in schema["properties"]
+        assert "use_websearch" not in schema["properties"]

-    def test_normalize_stance_basic(self):
-        """Test basic stance normalization"""
-        # Test basic stances
-        assert self.tool._normalize_stance("for") == "for"
-        assert self.tool._normalize_stance("against") == "against"
-        assert self.tool._normalize_stance("neutral") == "neutral"
-        assert self.tool._normalize_stance(None) == "neutral"
+        # Images should be present now
+        assert "images" in schema["properties"]
+        assert schema["properties"]["images"]["type"] == "array"
+        assert schema["properties"]["images"]["items"]["type"] == "string"

-    def test_normalize_stance_synonyms(self):
-        """Test stance synonym normalization"""
-        # Supportive synonyms
-        assert self.tool._normalize_stance("support") == "for"
-        assert self.tool._normalize_stance("favor") == "for"
+        # Verify field types
+        assert schema["properties"]["step"]["type"] == "string"
+        assert schema["properties"]["step_number"]["type"] == "integer"
+        assert schema["properties"]["models"]["type"] == "array"

-        # Critical synonyms
-        assert self.tool._normalize_stance("critical") == "against"
-        assert self.tool._normalize_stance("oppose") == "against"
+        # Verify models array structure
+        models_items = schema["properties"]["models"]["items"]
+        assert models_items["type"] == "object"
+        assert "model" in models_items["properties"]
+        assert "stance" in models_items["properties"]
+        assert "stance_prompt" in models_items["properties"]

-        # Case insensitive
-        assert self.tool._normalize_stance("FOR") == "for"
-        assert self.tool._normalize_stance("Support") == "for"
-        assert self.tool._normalize_stance("AGAINST") == "against"
-        assert self.tool._normalize_stance("Critical") == "against"
+    def test_get_required_actions(self):
+        """Test required actions for different consensus phases."""
+        tool = ConsensusTool()

-        # Test unknown stances default to neutral
-        assert self.tool._normalize_stance("supportive") == "neutral"
-        assert self.tool._normalize_stance("maybe") == "neutral"
-        assert self.tool._normalize_stance("contra") == "neutral"
-        assert self.tool._normalize_stance("random") == "neutral"
+        # Step 1: Claude's initial analysis
+        actions = tool.get_required_actions(1, "exploring", "Initial findings", 4)
+        assert any("initial analysis" in action for action in actions)
+        assert any("consult other models" in action for action in actions)

-    def test_model_config_validation(self):
-        """Test ModelConfig validation"""
-        # Valid config
-        config = ModelConfig(model="o3", stance="for", stance_prompt="Custom prompt")
-        assert config.model == "o3"
-        assert config.stance == "for"
-        assert config.stance_prompt == "Custom prompt"
+        # Step 2-3: Model consultations
+        actions = tool.get_required_actions(2, "medium", "Model findings", 4)
+        assert any("Review the model response" in action for action in actions)

-        # Default stance
-        config = ModelConfig(model="flash")
-        assert config.stance == "neutral"
-        assert config.stance_prompt is None
+        # Final step: Synthesis
+        actions = tool.get_required_actions(4, "high", "All findings", 4)
+        assert any("All models have been consulted" in action for action in actions)
+        assert any("Synthesize all perspectives" in action for action in actions)

-        # Test that empty model is handled by validation elsewhere
-        # Pydantic allows empty strings by default, but the tool validates it
-        config = ModelConfig(model="")
-        assert config.model == ""
+    def test_prepare_step_data(self):
+        """Test step data preparation for consensus workflow."""
+        tool = ConsensusTool()
+        request = ConsensusRequest(
+            step="Test step",
+            step_number=1,
+            total_steps=3,
+            next_step_required=True,
+            findings="Test findings",
+            confidence="medium",
+            models=[{"model": "test"}],
+            relevant_files=["/test.py"],
+        )

-    def test_validate_model_combinations(self):
-        """Test model combination validation with ModelConfig objects"""
-        # Valid combinations
-        configs = [
-            ModelConfig(model="o3", stance="for"),
-            ModelConfig(model="pro", stance="against"),
-            ModelConfig(model="grok"),  # neutral default
-            ModelConfig(model="o3", stance="against"),
-        ]
-        valid, skipped = self.tool._validate_model_combinations(configs)
-        assert len(valid) == 4
-        assert len(skipped) == 0
+        step_data = tool.prepare_step_data(request)

-        # Test max instances per combination (2)
-        configs = [
-            ModelConfig(model="o3", stance="for"),
-            ModelConfig(model="o3", stance="for"),
-            ModelConfig(model="o3", stance="for"),  # This should be skipped
-            ModelConfig(model="pro", stance="against"),
-        ]
-        valid, skipped = self.tool._validate_model_combinations(configs)
-        assert len(valid) == 3
-        assert len(skipped) == 1
-        assert "max 2 instances" in skipped[0]
+        # Verify consensus-specific fields
+        assert step_data["step"] == "Test step"
+        assert step_data["findings"] == "Test findings"
+        assert step_data["relevant_files"] == ["/test.py"]

-        # Test unknown stances get normalized to neutral
-        configs = [
-            ModelConfig(model="o3", stance="maybe"),  # Unknown stance -> neutral
-            ModelConfig(model="pro", stance="kinda"),  # Unknown stance -> neutral
-            ModelConfig(model="grok"),  # Already neutral
-        ]
-        valid, skipped = self.tool._validate_model_combinations(configs)
-        assert len(valid) == 3  # All are valid (normalized to neutral)
-        assert len(skipped) == 0  # None skipped
+        # Verify unused workflow fields are empty
+        assert step_data["files_checked"] == []
+        assert step_data["relevant_context"] == []
+        assert step_data["issues_found"] == []
+        assert step_data["hypothesis"] is None

-        # Verify normalization worked
-        assert valid[0].stance == "neutral"  # maybe -> neutral
-        assert valid[1].stance == "neutral"  # kinda -> neutral
-        assert valid[2].stance == "neutral"  # already neutral
+    def test_stance_enhanced_prompt_generation(self):
+        """Test stance-enhanced prompt generation."""
+        tool = ConsensusTool()

-    def test_get_stance_enhanced_prompt(self):
-        """Test stance-enhanced prompt generation"""
-        # Test that stance prompts are injected correctly
-        for_prompt = self.tool._get_stance_enhanced_prompt("for")
+        # Test different stances
+        for_prompt = tool._get_stance_enhanced_prompt("for")
        assert "SUPPORTIVE PERSPECTIVE" in for_prompt

-        against_prompt = self.tool._get_stance_enhanced_prompt("against")
+        against_prompt = tool._get_stance_enhanced_prompt("against")
        assert "CRITICAL PERSPECTIVE" in against_prompt

-        neutral_prompt = self.tool._get_stance_enhanced_prompt("neutral")
+        neutral_prompt = tool._get_stance_enhanced_prompt("neutral")
        assert "BALANCED PERSPECTIVE" in neutral_prompt

        # Test custom stance prompt
-        custom_prompt = "Focus on user experience and business value"
-        enhanced = self.tool._get_stance_enhanced_prompt("for", custom_prompt)
-        assert custom_prompt in enhanced
-        assert "SUPPORTIVE PERSPECTIVE" not in enhanced  # Should use custom instead
+        custom = "Focus on specific aspects"
+        custom_prompt = tool._get_stance_enhanced_prompt("for", custom)
+        assert custom in custom_prompt
+        assert "SUPPORTIVE PERSPECTIVE" not in custom_prompt

-    def test_format_consensus_output(self):
-        """Test consensus output formatting"""
-        responses = [
-            {"model": "o3", "stance": "for", "status": "success", "verdict": "Good idea"},
-            {"model": "pro", "stance": "against", "status": "success", "verdict": "Bad idea"},
-            {"model": "grok", "stance": "neutral", "status": "error", "error": "Timeout"},
-        ]
-        skipped = ["flash:maybe (invalid stance)"]
-
-        output = self.tool._format_consensus_output(responses, skipped)
-        output_data = json.loads(output)
-
-        assert output_data["status"] == "consensus_success"
-        assert output_data["models_used"] == ["o3:for", "pro:against"]
-        assert output_data["models_skipped"] == skipped
-        assert output_data["models_errored"] == ["grok"]
-        assert "next_steps" in output_data
+    def test_should_call_expert_analysis(self):
+        """Test that consensus workflow doesn't use expert analysis."""
+        tool = ConsensusTool()
+        assert tool.should_call_expert_analysis({}) is False
+        assert tool.requires_expert_analysis() is False

    @pytest.mark.asyncio
-    @patch("tools.consensus.ConsensusTool._get_consensus_responses")
-    async def test_execute_with_model_configs(self, mock_get_responses):
-        """Test execute with ModelConfig objects"""
-        # Mock responses directly at the consensus level
-        mock_responses = [
-            {
-                "model": "o3",
-                "stance": "for",  # support normalized to for
-                "status": "success",
-                "verdict": "This is good for user benefits",
-                "metadata": {"provider": "openai", "usage": None, "custom_stance_prompt": True},
-            },
-            {
-                "model": "pro",
-                "stance": "against",  # critical normalized to against
-                "status": "success",
-                "verdict": "There are technical risks to consider",
-                "metadata": {"provider": "gemini", "usage": None, "custom_stance_prompt": True},
-            },
-            {
-                "model": "grok",
-                "stance": "neutral",
-                "status": "success",
-                "verdict": "Balanced perspective on the proposal",
-                "metadata": {"provider": "xai", "usage": None, "custom_stance_prompt": False},
-            },
-        ]
-        mock_get_responses.return_value = mock_responses
+    async def test_execute_workflow_step1(self):
+        """Test workflow execution for step 1."""
+        tool = ConsensusTool()

-        # Test with ModelConfig objects including custom stance prompts
-        models = [
-            {"model": "o3", "stance": "support", "stance_prompt": "Focus on user benefits"},  # Test synonym
-            {"model": "pro", "stance": "critical", "stance_prompt": "Focus on technical risks"},  # Test synonym
-            {"model": "grok", "stance": "neutral"},
-        ]
+        arguments = {
+            "step": "Initial analysis of proposal",
+            "step_number": 1,
+            "total_steps": 4,
+            "next_step_required": True,
+            "findings": "Found pros and cons",
+            "confidence": "medium",
+            "models": [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}],
+            "relevant_files": ["/proposal.md"],
+        }

-        result = await self.tool.execute({"prompt": "Test prompt", "models": models})
+        with patch.object(tool, "is_effective_auto_mode", return_value=False):
+            with patch.object(tool, "get_model_provider", return_value=Mock()):
+                result = await tool.execute_workflow(arguments)

-        # Verify the response structure
+        assert len(result) == 1
        response_text = result[0].text
        response_data = json.loads(response_text)
-        assert response_data["status"] == "consensus_success"
-        assert len(response_data["models_used"]) == 3

-        # Verify stance normalization worked in the models_used field
-        models_used = response_data["models_used"]
-        assert "o3:for" in models_used  # support -> for
-        assert "pro:against" in models_used  # critical -> against
-        assert "grok" in models_used  # neutral (no stance suffix)
+        # Verify step 1 response structure
+        assert response_data["status"] == "consulting_models"
+        assert response_data["step_number"] == 1
+        assert "continuation_id" in response_data
+
+    @pytest.mark.asyncio
+    async def test_execute_workflow_model_consultation(self):
+        """Test workflow execution for model consultation steps."""
+        tool = ConsensusTool()
+        tool.models_to_consult = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
+        tool.initial_prompt = "Test prompt"
+
+        arguments = {
+            "step": "Processing model response",
+            "step_number": 2,
+            "total_steps": 4,
+            "next_step_required": True,
+            "findings": "Model provided perspective",
+            "confidence": "medium",
+            "continuation_id": "test-id",
+            "current_model_index": 0,
+        }
+
+        # Mock the _consult_model method instead to return a proper dict
+        mock_model_response = {
+            "model": "flash",
+            "stance": "neutral",
+            "status": "success",
+            "verdict": "Model analysis response",
+            "metadata": {"provider": "gemini"},
+        }
+
+        with patch.object(tool, "_consult_model", return_value=mock_model_response):
+            result = await tool.execute_workflow(arguments)
+
+        assert len(result) == 1
+        response_text = result[0].text
+        response_data = json.loads(response_text)
+
+        # Verify model consultation response
+        assert response_data["status"] == "model_consulted"
+        assert response_data["model_consulted"] == "flash"
+        assert response_data["model_stance"] == "neutral"
+        assert "model_response" in response_data
+        assert response_data["model_response"]["status"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_consult_model_error_handling(self):
+        """Test error handling in model consultation."""
+        tool = ConsensusTool()
+        tool.initial_prompt = "Test prompt"
+
+        # Mock provider to raise an error
+        mock_provider = Mock()
+        mock_provider.generate_content.side_effect = Exception("Model error")
+
+        with patch.object(tool, "get_model_provider", return_value=mock_provider):
+            result = await tool._consult_model(
+                {"model": "test-model", "stance": "neutral"}, Mock(relevant_files=[], continuation_id=None, images=None)
+            )
+
+        assert result["status"] == "error"
+        assert result["error"] == "Model error"
+        assert result["model"] == "test-model"
+
+    @pytest.mark.asyncio
+    async def test_consult_model_with_images(self):
+        """Test model consultation with images."""
+        tool = ConsensusTool()
+        tool.initial_prompt = "Test prompt"
+
+        # Mock provider
+        mock_provider = Mock()
+        mock_response = Mock(content="Model response with image analysis")
+        mock_provider.generate_content.return_value = mock_response
+        mock_provider.get_provider_type.return_value = Mock(value="gemini")
+
+        test_images = ["/path/to/image1.png", "/path/to/image2.jpg"]
+
+        with patch.object(tool, "get_model_provider", return_value=mock_provider):
+            result = await tool._consult_model(
+                {"model": "test-model", "stance": "neutral"},
+                Mock(relevant_files=[], continuation_id=None, images=test_images),
+            )
+
+        # Verify that images were passed to generate_content
+        mock_provider.generate_content.assert_called_once()
+        call_args = mock_provider.generate_content.call_args
+        assert call_args.kwargs.get("images") == test_images
+
+        assert result["status"] == "success"
+        assert result["model"] == "test-model"
+
+    @pytest.mark.asyncio
+    async def test_handle_work_completion(self):
+        """Test work completion handling for consensus workflow."""
+        tool = ConsensusTool()
+        tool.initial_prompt = "Test prompt"
+        tool.accumulated_responses = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
+
+        request = Mock(confidence="high")
+        response_data = {}
+
+        result = await tool.handle_work_completion(response_data, request, {})
+
+        assert result["consensus_complete"] is True
+        assert result["status"] == "consensus_workflow_complete"
+        assert "complete_consensus" in result
+        assert result["complete_consensus"]["models_consulted"] == ["flash:neutral", "o3-mini:for"]
+        assert result["complete_consensus"]["total_responses"] == 2
+
+    def test_handle_work_continuation(self):
+        """Test work continuation handling between steps."""
+        tool = ConsensusTool()
+        tool.models_to_consult = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
+
+        # Test after step 1
+        request = Mock(step_number=1, current_model_index=0)
+        response_data = {}
+
+        result = tool.handle_work_continuation(response_data, request)
+        assert result["status"] == "consulting_models"
+        assert result["next_model"] == {"model": "flash", "stance": "neutral"}
+
+        # Test between model consultations
+        request = Mock(step_number=2, current_model_index=1)
+        response_data = {}
+
+        result = tool.handle_work_continuation(response_data, request)
+        assert result["status"] == "consulting_next_model"
+        assert result["next_model"] == {"model": "o3-mini", "stance": "for"}
+        assert result["models_remaining"] == 1
+
+    def test_customize_workflow_response(self):
+        """Test response customization for consensus workflow."""
+        tool = ConsensusTool()
+        tool.accumulated_responses = [{"model": "test", "response": "data"}]
+
+        # Test different step numbers
+        request = Mock(step_number=1, total_steps=4)
+        response_data = {}
+        result = tool.customize_workflow_response(response_data, request)
+        assert result["consensus_workflow_status"] == "initial_analysis_complete"
+
+        request = Mock(step_number=2, total_steps=4)
+        response_data = {}
+        result = tool.customize_workflow_response(response_data, request)
+        assert result["consensus_workflow_status"] == "consulting_models"
+
+        request = Mock(step_number=4, total_steps=4)
+        response_data = {}
+        result = tool.customize_workflow_response(response_data, request)
+        assert result["consensus_workflow_status"] == "ready_for_synthesis"


 if __name__ == "__main__":