Add DocGen tool with comprehensive documentation generation capabilities (#109)

* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools * WIP - Update tests to match new tools * Should help with https://github.com/BeehiveInnovations/zen-mcp-server/issues/97 Clear python cache when running script: https://github.com/BeehiveInnovations/zen-mcp-server/issues/96 Improved retry error logging Cleanup * WIP - chat tool using new architecture and improved code sharing * Removed todo * Removed todo * Cleanup old name * Tweak wordings * Tweak wordings Migrate old tests * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 Fixed test * Improved consensus to use the workflow base class * Improved consensus to use the workflow base class * Allow images * Allow images * Replaced old consensus tool * Cleanup tests * Tests for prompt size * New tool: docgen Tests for prompt size Fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/107 Use available token size limits: https://github.com/BeehiveInnovations/zen-mcp-server/issues/105 * Improved docgen prompt Exclude TestGen from pytest inclusion * Updated errors * Lint * DocGen instructed not to fix bugs, surface them and stick to d * WIP * Stop claude from being lazy and only documenting a small handful * More style rules --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-21 23:21:19 -07:00
parent 0655590a51
commit c960bcb720
58 changed files with 5492 additions and 5558 deletions
--- a/tests/test_model_resolution_bug.py
+++ b/tests/test_model_resolution_bug.py
@@ -11,7 +11,7 @@ from unittest.mock import Mock, patch

 from providers.base import ProviderType
 from providers.openrouter import OpenRouterProvider
-from tools.consensus import ConsensusTool, ModelConfig
+from tools.consensus import ConsensusTool


 class TestModelResolutionBug:
@@ -41,7 +41,8 @@ class TestModelResolutionBug:

    @patch.dict("os.environ", {"OPENROUTER_API_KEY": "test_key"}, clear=False)
    def test_consensus_tool_model_resolution_bug_reproduction(self):
-        """Reproduce the actual bug: consensus tool with 'gemini' model should resolve correctly."""
+        """Test that the new consensus workflow tool properly handles OpenRouter model resolution."""
+        import asyncio

        # Create a mock OpenRouter provider that tracks what model names it receives
        mock_provider = Mock(spec=OpenRouterProvider)
@@ -64,39 +65,31 @@ class TestModelResolutionBug:

        # Mock the get_model_provider to return our mock
        with patch.object(self.consensus_tool, "get_model_provider", return_value=mock_provider):
-            # Mock the prepare_prompt method
-            with patch.object(self.consensus_tool, "prepare_prompt", return_value="test prompt"):
+            # Set initial prompt
+            self.consensus_tool.initial_prompt = "Test prompt"

-                # Create consensus request with 'gemini' model
-                model_config = ModelConfig(model="gemini", stance="neutral")
-                request = Mock()
-                request.models = [model_config]
-                request.prompt = "Test prompt"
-                request.temperature = 0.2
-                request.thinking_mode = "medium"
-                request.images = []
-                request.continuation_id = None
-                request.files = []
-                request.focus_areas = []
+            # Create a mock request
+            request = Mock()
+            request.relevant_files = []
+            request.continuation_id = None
+            request.images = None

-                # Mock the provider configs generation
-                provider_configs = [(mock_provider, model_config)]
+            # Test model consultation directly
+            result = asyncio.run(self.consensus_tool._consult_model({"model": "gemini", "stance": "neutral"}, request))

-                # Call the method that causes the bug
-                self.consensus_tool._get_consensus_responses(provider_configs, "test prompt", request)
+            # Verify that generate_content was called
+            assert len(received_model_names) == 1

-                # Verify that generate_content was called
-                assert len(received_model_names) == 1
+            # The consensus tool should pass the original alias "gemini"
+            # The OpenRouter provider should resolve it internally
+            received_model = received_model_names[0]
+            print(f"Model name passed to provider: {received_model}")

-                # THIS IS THE BUG: We expect the model name to still be "gemini"
-                # because the OpenRouter provider should handle resolution internally
-                # If this assertion fails, it means the bug is elsewhere
-                received_model = received_model_names[0]
-                print(f"Model name passed to provider: {received_model}")
+            assert received_model == "gemini", f"Expected 'gemini' to be passed to provider, got '{received_model}'"

-                # The consensus tool should pass the original alias "gemini"
-                # The OpenRouter provider should resolve it internally
-                assert received_model == "gemini", f"Expected 'gemini' to be passed to provider, got '{received_model}'"
+            # Verify the result structure
+            assert result["model"] == "gemini"
+            assert result["status"] == "success"

    def test_bug_reproduction_with_malformed_model_name(self):
        """Test what happens when 'gemini-2.5-pro' (malformed) is passed to OpenRouter."""