Files
my-pal-mcp-server/tests/test_consensus.py
Beehive Innovations c960bcb720 Add DocGen tool with comprehensive documentation generation capabilities (#109)
* WIP: new workflow architecture

* WIP: further improvements and cleanup

* WIP: cleanup and docks, replace old tool with new

* WIP: cleanup and docks, replace old tool with new

* WIP: new planner implementation using workflow

* WIP: precommit tool working as a workflow instead of a basic tool
Support for passing False to use_assistant_model to skip external models completely and use Claude only

* WIP: precommit workflow version swapped with old

* WIP: codereview

* WIP: replaced codereview

* WIP: replaced codereview

* WIP: replaced refactor

* WIP: workflow for thinkdeep

* WIP: ensure files get embedded correctly

* WIP: thinkdeep replaced with workflow version

* WIP: improved messaging when an external model's response is received

* WIP: analyze tool swapped

* WIP: updated tests
* Extract only the content when building history
* Use "relevant_files" for workflow tools only

* WIP: updated tests
* Extract only the content when building history
* Use "relevant_files" for workflow tools only

* WIP: fixed get_completion_next_steps_message missing param

* Fixed tests
Request for files consistently

* Fixed tests
Request for files consistently

* Fixed tests

* New testgen workflow tool
Updated docs

* Swap testgen workflow

* Fix CI test failures by excluding API-dependent tests

- Update GitHub Actions workflow to exclude simulation tests that require API keys
- Fix collaboration tests to properly mock workflow tool expert analysis calls
- Update test assertions to handle new workflow tool response format
- Ensure unit tests run without external API dependencies in CI

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* WIP - Update tests to match new tools

* WIP - Update tests to match new tools

* WIP - Update tests to match new tools

* Should help with https://github.com/BeehiveInnovations/zen-mcp-server/issues/97
Clear python cache when running script: https://github.com/BeehiveInnovations/zen-mcp-server/issues/96
Improved retry error logging
Cleanup

* WIP - chat tool using new architecture and improved code sharing

* Removed todo

* Removed todo

* Cleanup old name

* Tweak wordings

* Tweak wordings
Migrate old tests

* Support for Flash 2.0 and Flash Lite 2.0

* Support for Flash 2.0 and Flash Lite 2.0

* Support for Flash 2.0 and Flash Lite 2.0
Fixed test

* Improved consensus to use the workflow base class

* Improved consensus to use the workflow base class

* Allow images

* Allow images

* Replaced old consensus tool

* Cleanup tests

* Tests for prompt size

* New tool: docgen
Tests for prompt size
Fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/107
Use available token size limits: https://github.com/BeehiveInnovations/zen-mcp-server/issues/105

* Improved docgen prompt
Exclude TestGen from pytest inclusion

* Updated errors

* Lint

* DocGen instructed not to fix bugs, surface them and stick to d

* WIP

* Stop claude from being lazy and only documenting a small handful

* More style rules

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-06-22 10:21:19 +04:00

405 lines
16 KiB
Python

"""
Tests for the Consensus tool using WorkflowTool architecture.
"""
import json
from unittest.mock import Mock, patch
import pytest
from tools.consensus import ConsensusRequest, ConsensusTool
from tools.models import ToolModelCategory
class TestConsensusTool:
"""Test suite for ConsensusTool using WorkflowTool architecture."""
def test_tool_metadata(self):
"""Test basic tool metadata and configuration."""
tool = ConsensusTool()
assert tool.get_name() == "consensus"
assert "COMPREHENSIVE CONSENSUS WORKFLOW" in tool.get_description()
assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL
assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
assert tool.requires_model() is True
def test_request_validation_step1(self):
"""Test Pydantic request model validation for step 1."""
# Valid step 1 request with models
step1_request = ConsensusRequest(
step="Analyzing the real-time collaboration proposal",
step_number=1,
total_steps=4, # 1 (Claude) + 2 models + 1 (synthesis)
next_step_required=True,
findings="Initial assessment shows strong value but technical complexity",
confidence="medium",
models=[{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}],
relevant_files=["/proposal.md"],
)
assert step1_request.step_number == 1
assert step1_request.confidence == "medium"
assert len(step1_request.models) == 2
assert step1_request.models[0]["model"] == "flash"
def test_request_validation_missing_models_step1(self):
"""Test that step 1 requires models field."""
with pytest.raises(ValueError, match="Step 1 requires 'models' field"):
ConsensusRequest(
step="Test step",
step_number=1,
total_steps=3,
next_step_required=True,
findings="Test findings",
# Missing models field
)
def test_request_validation_later_steps(self):
"""Test request validation for steps 2+."""
# Step 2+ doesn't require models field
step2_request = ConsensusRequest(
step="Processing first model response",
step_number=2,
total_steps=4,
next_step_required=True,
findings="Model provided supportive perspective",
confidence="medium",
continuation_id="test-id",
current_model_index=1,
)
assert step2_request.step_number == 2
assert step2_request.models is None # Not required after step 1
def test_request_validation_duplicate_model_stance(self):
"""Test that duplicate model+stance combinations are rejected."""
# Valid: same model with different stances
valid_request = ConsensusRequest(
step="Analyze this proposal",
step_number=1,
total_steps=1,
next_step_required=True,
findings="Initial analysis",
models=[
{"model": "o3", "stance": "for"},
{"model": "o3", "stance": "against"},
{"model": "flash", "stance": "neutral"},
],
continuation_id="test-id",
)
assert len(valid_request.models) == 3
# Invalid: duplicate model+stance combination
with pytest.raises(ValueError, match="Duplicate model \\+ stance combination"):
ConsensusRequest(
step="Analyze this proposal",
step_number=1,
total_steps=1,
next_step_required=True,
findings="Initial analysis",
models=[
{"model": "o3", "stance": "for"},
{"model": "flash", "stance": "neutral"},
{"model": "o3", "stance": "for"}, # Duplicate!
],
continuation_id="test-id",
)
def test_input_schema_generation(self):
"""Test that input schema is generated correctly."""
tool = ConsensusTool()
schema = tool.get_input_schema()
# Verify consensus workflow fields are present
assert "step" in schema["properties"]
assert "step_number" in schema["properties"]
assert "total_steps" in schema["properties"]
assert "next_step_required" in schema["properties"]
assert "findings" in schema["properties"]
# confidence field should be excluded
assert "confidence" not in schema["properties"]
assert "models" in schema["properties"]
# relevant_files should also be excluded
assert "relevant_files" not in schema["properties"]
# Verify workflow fields that should NOT be present
assert "files_checked" not in schema["properties"]
assert "hypothesis" not in schema["properties"]
assert "issues_found" not in schema["properties"]
assert "temperature" not in schema["properties"]
assert "thinking_mode" not in schema["properties"]
assert "use_websearch" not in schema["properties"]
# Images should be present now
assert "images" in schema["properties"]
assert schema["properties"]["images"]["type"] == "array"
assert schema["properties"]["images"]["items"]["type"] == "string"
# Verify field types
assert schema["properties"]["step"]["type"] == "string"
assert schema["properties"]["step_number"]["type"] == "integer"
assert schema["properties"]["models"]["type"] == "array"
# Verify models array structure
models_items = schema["properties"]["models"]["items"]
assert models_items["type"] == "object"
assert "model" in models_items["properties"]
assert "stance" in models_items["properties"]
assert "stance_prompt" in models_items["properties"]
def test_get_required_actions(self):
"""Test required actions for different consensus phases."""
tool = ConsensusTool()
# Step 1: Claude's initial analysis
actions = tool.get_required_actions(1, "exploring", "Initial findings", 4)
assert any("initial analysis" in action for action in actions)
assert any("consult other models" in action for action in actions)
# Step 2-3: Model consultations
actions = tool.get_required_actions(2, "medium", "Model findings", 4)
assert any("Review the model response" in action for action in actions)
# Final step: Synthesis
actions = tool.get_required_actions(4, "high", "All findings", 4)
assert any("All models have been consulted" in action for action in actions)
assert any("Synthesize all perspectives" in action for action in actions)
def test_prepare_step_data(self):
"""Test step data preparation for consensus workflow."""
tool = ConsensusTool()
request = ConsensusRequest(
step="Test step",
step_number=1,
total_steps=3,
next_step_required=True,
findings="Test findings",
confidence="medium",
models=[{"model": "test"}],
relevant_files=["/test.py"],
)
step_data = tool.prepare_step_data(request)
# Verify consensus-specific fields
assert step_data["step"] == "Test step"
assert step_data["findings"] == "Test findings"
assert step_data["relevant_files"] == ["/test.py"]
# Verify unused workflow fields are empty
assert step_data["files_checked"] == []
assert step_data["relevant_context"] == []
assert step_data["issues_found"] == []
assert step_data["hypothesis"] is None
def test_stance_enhanced_prompt_generation(self):
"""Test stance-enhanced prompt generation."""
tool = ConsensusTool()
# Test different stances
for_prompt = tool._get_stance_enhanced_prompt("for")
assert "SUPPORTIVE PERSPECTIVE" in for_prompt
against_prompt = tool._get_stance_enhanced_prompt("against")
assert "CRITICAL PERSPECTIVE" in against_prompt
neutral_prompt = tool._get_stance_enhanced_prompt("neutral")
assert "BALANCED PERSPECTIVE" in neutral_prompt
# Test custom stance prompt
custom = "Focus on specific aspects"
custom_prompt = tool._get_stance_enhanced_prompt("for", custom)
assert custom in custom_prompt
assert "SUPPORTIVE PERSPECTIVE" not in custom_prompt
def test_should_call_expert_analysis(self):
"""Test that consensus workflow doesn't use expert analysis."""
tool = ConsensusTool()
assert tool.should_call_expert_analysis({}) is False
assert tool.requires_expert_analysis() is False
@pytest.mark.asyncio
async def test_execute_workflow_step1(self):
"""Test workflow execution for step 1."""
tool = ConsensusTool()
arguments = {
"step": "Initial analysis of proposal",
"step_number": 1,
"total_steps": 4,
"next_step_required": True,
"findings": "Found pros and cons",
"confidence": "medium",
"models": [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}],
"relevant_files": ["/proposal.md"],
}
with patch.object(tool, "is_effective_auto_mode", return_value=False):
with patch.object(tool, "get_model_provider", return_value=Mock()):
result = await tool.execute_workflow(arguments)
assert len(result) == 1
response_text = result[0].text
response_data = json.loads(response_text)
# Verify step 1 response structure
assert response_data["status"] == "consulting_models"
assert response_data["step_number"] == 1
assert "continuation_id" in response_data
@pytest.mark.asyncio
async def test_execute_workflow_model_consultation(self):
"""Test workflow execution for model consultation steps."""
tool = ConsensusTool()
tool.models_to_consult = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
tool.initial_prompt = "Test prompt"
arguments = {
"step": "Processing model response",
"step_number": 2,
"total_steps": 4,
"next_step_required": True,
"findings": "Model provided perspective",
"confidence": "medium",
"continuation_id": "test-id",
"current_model_index": 0,
}
# Mock the _consult_model method instead to return a proper dict
mock_model_response = {
"model": "flash",
"stance": "neutral",
"status": "success",
"verdict": "Model analysis response",
"metadata": {"provider": "gemini"},
}
with patch.object(tool, "_consult_model", return_value=mock_model_response):
result = await tool.execute_workflow(arguments)
assert len(result) == 1
response_text = result[0].text
response_data = json.loads(response_text)
# Verify model consultation response
assert response_data["status"] == "model_consulted"
assert response_data["model_consulted"] == "flash"
assert response_data["model_stance"] == "neutral"
assert "model_response" in response_data
assert response_data["model_response"]["status"] == "success"
@pytest.mark.asyncio
async def test_consult_model_error_handling(self):
"""Test error handling in model consultation."""
tool = ConsensusTool()
tool.initial_prompt = "Test prompt"
# Mock provider to raise an error
mock_provider = Mock()
mock_provider.generate_content.side_effect = Exception("Model error")
with patch.object(tool, "get_model_provider", return_value=mock_provider):
result = await tool._consult_model(
{"model": "test-model", "stance": "neutral"}, Mock(relevant_files=[], continuation_id=None, images=None)
)
assert result["status"] == "error"
assert result["error"] == "Model error"
assert result["model"] == "test-model"
@pytest.mark.asyncio
async def test_consult_model_with_images(self):
"""Test model consultation with images."""
tool = ConsensusTool()
tool.initial_prompt = "Test prompt"
# Mock provider
mock_provider = Mock()
mock_response = Mock(content="Model response with image analysis")
mock_provider.generate_content.return_value = mock_response
mock_provider.get_provider_type.return_value = Mock(value="gemini")
test_images = ["/path/to/image1.png", "/path/to/image2.jpg"]
with patch.object(tool, "get_model_provider", return_value=mock_provider):
result = await tool._consult_model(
{"model": "test-model", "stance": "neutral"},
Mock(relevant_files=[], continuation_id=None, images=test_images),
)
# Verify that images were passed to generate_content
mock_provider.generate_content.assert_called_once()
call_args = mock_provider.generate_content.call_args
assert call_args.kwargs.get("images") == test_images
assert result["status"] == "success"
assert result["model"] == "test-model"
@pytest.mark.asyncio
async def test_handle_work_completion(self):
"""Test work completion handling for consensus workflow."""
tool = ConsensusTool()
tool.initial_prompt = "Test prompt"
tool.accumulated_responses = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
request = Mock(confidence="high")
response_data = {}
result = await tool.handle_work_completion(response_data, request, {})
assert result["consensus_complete"] is True
assert result["status"] == "consensus_workflow_complete"
assert "complete_consensus" in result
assert result["complete_consensus"]["models_consulted"] == ["flash:neutral", "o3-mini:for"]
assert result["complete_consensus"]["total_responses"] == 2
def test_handle_work_continuation(self):
"""Test work continuation handling between steps."""
tool = ConsensusTool()
tool.models_to_consult = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
# Test after step 1
request = Mock(step_number=1, current_model_index=0)
response_data = {}
result = tool.handle_work_continuation(response_data, request)
assert result["status"] == "consulting_models"
assert result["next_model"] == {"model": "flash", "stance": "neutral"}
# Test between model consultations
request = Mock(step_number=2, current_model_index=1)
response_data = {}
result = tool.handle_work_continuation(response_data, request)
assert result["status"] == "consulting_next_model"
assert result["next_model"] == {"model": "o3-mini", "stance": "for"}
assert result["models_remaining"] == 1
def test_customize_workflow_response(self):
"""Test response customization for consensus workflow."""
tool = ConsensusTool()
tool.accumulated_responses = [{"model": "test", "response": "data"}]
# Test different step numbers
request = Mock(step_number=1, total_steps=4)
response_data = {}
result = tool.customize_workflow_response(response_data, request)
assert result["consensus_workflow_status"] == "initial_analysis_complete"
request = Mock(step_number=2, total_steps=4)
response_data = {}
result = tool.customize_workflow_response(response_data, request)
assert result["consensus_workflow_status"] == "consulting_models"
request = Mock(step_number=4, total_steps=4)
response_data = {}
result = tool.customize_workflow_response(response_data, request)
assert result["consensus_workflow_status"] == "ready_for_synthesis"
if __name__ == "__main__":
import unittest
unittest.main()