Add DocGen tool with comprehensive documentation generation capabilities (#109)
* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools * WIP - Update tests to match new tools * Should help with https://github.com/BeehiveInnovations/zen-mcp-server/issues/97 Clear python cache when running script: https://github.com/BeehiveInnovations/zen-mcp-server/issues/96 Improved retry error logging Cleanup * WIP - chat tool using new architecture and improved code sharing * Removed todo * Removed todo * Cleanup old name * Tweak wordings * Tweak wordings Migrate old tests * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 Fixed test * Improved consensus to use the workflow base class * Improved consensus to use the workflow base class * Allow images * Allow images * Replaced old consensus tool * Cleanup tests * Tests for prompt size * New tool: docgen Tests for prompt size Fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/107 Use available token size limits: https://github.com/BeehiveInnovations/zen-mcp-server/issues/105 * Improved docgen prompt Exclude TestGen from pytest inclusion * Updated errors * Lint * DocGen instructed not to fix bugs, surface them and stick to d * WIP * Stop claude from being lazy and only documenting a small handful * More style rules --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
0655590a51
commit
c960bcb720
@@ -11,7 +11,7 @@ from unittest.mock import Mock, patch
|
||||
|
||||
from providers.base import ProviderType
|
||||
from providers.openrouter import OpenRouterProvider
|
||||
from tools.consensus import ConsensusTool, ModelConfig
|
||||
from tools.consensus import ConsensusTool
|
||||
|
||||
|
||||
class TestModelResolutionBug:
|
||||
@@ -41,7 +41,8 @@ class TestModelResolutionBug:
|
||||
|
||||
@patch.dict("os.environ", {"OPENROUTER_API_KEY": "test_key"}, clear=False)
|
||||
def test_consensus_tool_model_resolution_bug_reproduction(self):
|
||||
"""Reproduce the actual bug: consensus tool with 'gemini' model should resolve correctly."""
|
||||
"""Test that the new consensus workflow tool properly handles OpenRouter model resolution."""
|
||||
import asyncio
|
||||
|
||||
# Create a mock OpenRouter provider that tracks what model names it receives
|
||||
mock_provider = Mock(spec=OpenRouterProvider)
|
||||
@@ -64,39 +65,31 @@ class TestModelResolutionBug:
|
||||
|
||||
# Mock the get_model_provider to return our mock
|
||||
with patch.object(self.consensus_tool, "get_model_provider", return_value=mock_provider):
|
||||
# Mock the prepare_prompt method
|
||||
with patch.object(self.consensus_tool, "prepare_prompt", return_value="test prompt"):
|
||||
# Set initial prompt
|
||||
self.consensus_tool.initial_prompt = "Test prompt"
|
||||
|
||||
# Create consensus request with 'gemini' model
|
||||
model_config = ModelConfig(model="gemini", stance="neutral")
|
||||
request = Mock()
|
||||
request.models = [model_config]
|
||||
request.prompt = "Test prompt"
|
||||
request.temperature = 0.2
|
||||
request.thinking_mode = "medium"
|
||||
request.images = []
|
||||
request.continuation_id = None
|
||||
request.files = []
|
||||
request.focus_areas = []
|
||||
# Create a mock request
|
||||
request = Mock()
|
||||
request.relevant_files = []
|
||||
request.continuation_id = None
|
||||
request.images = None
|
||||
|
||||
# Mock the provider configs generation
|
||||
provider_configs = [(mock_provider, model_config)]
|
||||
# Test model consultation directly
|
||||
result = asyncio.run(self.consensus_tool._consult_model({"model": "gemini", "stance": "neutral"}, request))
|
||||
|
||||
# Call the method that causes the bug
|
||||
self.consensus_tool._get_consensus_responses(provider_configs, "test prompt", request)
|
||||
# Verify that generate_content was called
|
||||
assert len(received_model_names) == 1
|
||||
|
||||
# Verify that generate_content was called
|
||||
assert len(received_model_names) == 1
|
||||
# The consensus tool should pass the original alias "gemini"
|
||||
# The OpenRouter provider should resolve it internally
|
||||
received_model = received_model_names[0]
|
||||
print(f"Model name passed to provider: {received_model}")
|
||||
|
||||
# THIS IS THE BUG: We expect the model name to still be "gemini"
|
||||
# because the OpenRouter provider should handle resolution internally
|
||||
# If this assertion fails, it means the bug is elsewhere
|
||||
received_model = received_model_names[0]
|
||||
print(f"Model name passed to provider: {received_model}")
|
||||
assert received_model == "gemini", f"Expected 'gemini' to be passed to provider, got '{received_model}'"
|
||||
|
||||
# The consensus tool should pass the original alias "gemini"
|
||||
# The OpenRouter provider should resolve it internally
|
||||
assert received_model == "gemini", f"Expected 'gemini' to be passed to provider, got '{received_model}'"
|
||||
# Verify the result structure
|
||||
assert result["model"] == "gemini"
|
||||
assert result["status"] == "success"
|
||||
|
||||
def test_bug_reproduction_with_malformed_model_name(self):
|
||||
"""Test what happens when 'gemini-2.5-pro' (malformed) is passed to OpenRouter."""
|
||||
|
||||
Reference in New Issue
Block a user