Improved consensus to treat a step properly as both a request + response, and initial step includes Claude's assessment.
Improved prompt to not request for code when it's a general business decision
This commit is contained in:
@@ -2,8 +2,7 @@
|
||||
Tests for the Consensus tool using WorkflowTool architecture.
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest.mock import Mock, patch
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -219,180 +218,112 @@ class TestConsensusTool:
|
||||
assert tool.should_call_expert_analysis({}) is False
|
||||
assert tool.requires_expert_analysis() is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_workflow_step1(self):
|
||||
"""Test workflow execution for step 1."""
|
||||
def test_execute_workflow_step1_basic(self):
|
||||
"""Test basic workflow validation for step 1."""
|
||||
tool = ConsensusTool()
|
||||
|
||||
# Test that step 1 sets up the workflow correctly
|
||||
arguments = {
|
||||
"step": "Initial analysis of proposal",
|
||||
"step_number": 1,
|
||||
"total_steps": 4,
|
||||
"total_steps": 2,
|
||||
"next_step_required": True,
|
||||
"findings": "Found pros and cons",
|
||||
"confidence": "medium",
|
||||
"models": [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}],
|
||||
"relevant_files": ["/proposal.md"],
|
||||
}
|
||||
|
||||
with patch.object(tool, "is_effective_auto_mode", return_value=False):
|
||||
with patch.object(tool, "get_model_provider", return_value=Mock()):
|
||||
result = await tool.execute_workflow(arguments)
|
||||
# Verify models_to_consult is set correctly from step 1
|
||||
request = tool.get_workflow_request_model()(**arguments)
|
||||
assert len(request.models) == 2
|
||||
assert request.models[0]["model"] == "flash"
|
||||
assert request.models[1]["model"] == "o3-mini"
|
||||
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
response_data = json.loads(response_text)
|
||||
|
||||
# Verify step 1 response structure
|
||||
assert response_data["status"] == "consulting_models"
|
||||
assert response_data["step_number"] == 1
|
||||
assert "continuation_id" in response_data
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_workflow_model_consultation(self):
|
||||
"""Test workflow execution for model consultation steps."""
|
||||
def test_execute_workflow_total_steps_calculation(self):
|
||||
"""Test that total_steps is calculated correctly from models."""
|
||||
tool = ConsensusTool()
|
||||
tool.models_to_consult = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
|
||||
tool.initial_prompt = "Test prompt"
|
||||
|
||||
# Test with 2 models
|
||||
arguments = {
|
||||
"step": "Processing model response",
|
||||
"step_number": 2,
|
||||
"total_steps": 4,
|
||||
"step": "Initial analysis",
|
||||
"step_number": 1,
|
||||
"total_steps": 4, # This should be corrected to 2
|
||||
"next_step_required": True,
|
||||
"findings": "Model provided perspective",
|
||||
"confidence": "medium",
|
||||
"continuation_id": "test-id",
|
||||
"current_model_index": 0,
|
||||
"findings": "Analysis complete",
|
||||
"models": [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}],
|
||||
}
|
||||
|
||||
# Mock the _consult_model method instead to return a proper dict
|
||||
mock_model_response = {
|
||||
"model": "flash",
|
||||
"stance": "neutral",
|
||||
"status": "success",
|
||||
"verdict": "Model analysis response",
|
||||
"metadata": {"provider": "gemini"},
|
||||
request = tool.get_workflow_request_model()(**arguments)
|
||||
# The tool should set total_steps = len(models) = 2
|
||||
assert len(request.models) == 2
|
||||
|
||||
def test_consult_model_basic_structure(self):
|
||||
"""Test basic model consultation structure."""
|
||||
tool = ConsensusTool()
|
||||
|
||||
# Test that _get_stance_enhanced_prompt works
|
||||
for_prompt = tool._get_stance_enhanced_prompt("for")
|
||||
against_prompt = tool._get_stance_enhanced_prompt("against")
|
||||
neutral_prompt = tool._get_stance_enhanced_prompt("neutral")
|
||||
|
||||
assert "SUPPORTIVE PERSPECTIVE" in for_prompt
|
||||
assert "CRITICAL PERSPECTIVE" in against_prompt
|
||||
assert "BALANCED PERSPECTIVE" in neutral_prompt
|
||||
|
||||
def test_model_configuration_validation(self):
|
||||
"""Test model configuration validation."""
|
||||
tool = ConsensusTool()
|
||||
|
||||
# Test single model config
|
||||
models = [{"model": "flash", "stance": "neutral"}]
|
||||
arguments = {
|
||||
"step": "Test",
|
||||
"step_number": 1,
|
||||
"total_steps": 1,
|
||||
"next_step_required": False,
|
||||
"findings": "Test findings",
|
||||
"models": models,
|
||||
}
|
||||
|
||||
with patch.object(tool, "_consult_model", return_value=mock_model_response):
|
||||
result = await tool.execute_workflow(arguments)
|
||||
|
||||
assert len(result) == 1
|
||||
response_text = result[0].text
|
||||
response_data = json.loads(response_text)
|
||||
|
||||
# Verify model consultation response
|
||||
assert response_data["status"] == "model_consulted"
|
||||
assert response_data["model_consulted"] == "flash"
|
||||
assert response_data["model_stance"] == "neutral"
|
||||
assert "model_response" in response_data
|
||||
assert response_data["model_response"]["status"] == "success"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_consult_model_error_handling(self):
|
||||
"""Test error handling in model consultation."""
|
||||
tool = ConsensusTool()
|
||||
tool.initial_prompt = "Test prompt"
|
||||
|
||||
# Mock provider to raise an error
|
||||
mock_provider = Mock()
|
||||
mock_provider.generate_content.side_effect = Exception("Model error")
|
||||
|
||||
with patch.object(tool, "get_model_provider", return_value=mock_provider):
|
||||
result = await tool._consult_model(
|
||||
{"model": "test-model", "stance": "neutral"}, Mock(relevant_files=[], continuation_id=None, images=None)
|
||||
)
|
||||
|
||||
assert result["status"] == "error"
|
||||
assert result["error"] == "Model error"
|
||||
assert result["model"] == "test-model"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_consult_model_with_images(self):
|
||||
"""Test model consultation with images."""
|
||||
tool = ConsensusTool()
|
||||
tool.initial_prompt = "Test prompt"
|
||||
|
||||
# Mock provider
|
||||
mock_provider = Mock()
|
||||
mock_response = Mock(content="Model response with image analysis")
|
||||
mock_provider.generate_content.return_value = mock_response
|
||||
mock_provider.get_provider_type.return_value = Mock(value="gemini")
|
||||
|
||||
test_images = ["/path/to/image1.png", "/path/to/image2.jpg"]
|
||||
|
||||
with patch.object(tool, "get_model_provider", return_value=mock_provider):
|
||||
result = await tool._consult_model(
|
||||
{"model": "test-model", "stance": "neutral"},
|
||||
Mock(relevant_files=[], continuation_id=None, images=test_images),
|
||||
)
|
||||
|
||||
# Verify that images were passed to generate_content
|
||||
mock_provider.generate_content.assert_called_once()
|
||||
call_args = mock_provider.generate_content.call_args
|
||||
assert call_args.kwargs.get("images") == test_images
|
||||
|
||||
assert result["status"] == "success"
|
||||
assert result["model"] == "test-model"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_work_completion(self):
|
||||
"""Test work completion handling for consensus workflow."""
|
||||
tool = ConsensusTool()
|
||||
tool.initial_prompt = "Test prompt"
|
||||
tool.accumulated_responses = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
|
||||
|
||||
request = Mock(confidence="high")
|
||||
response_data = {}
|
||||
|
||||
result = await tool.handle_work_completion(response_data, request, {})
|
||||
|
||||
assert result["consensus_complete"] is True
|
||||
assert result["status"] == "consensus_workflow_complete"
|
||||
assert "complete_consensus" in result
|
||||
assert result["complete_consensus"]["models_consulted"] == ["flash:neutral", "o3-mini:for"]
|
||||
assert result["complete_consensus"]["total_responses"] == 2
|
||||
request = tool.get_workflow_request_model()(**arguments)
|
||||
assert len(request.models) == 1
|
||||
assert request.models[0]["model"] == "flash"
|
||||
assert request.models[0]["stance"] == "neutral"
|
||||
|
||||
def test_handle_work_continuation(self):
|
||||
"""Test work continuation handling between steps."""
|
||||
"""Test work continuation handling - legacy method for compatibility."""
|
||||
tool = ConsensusTool()
|
||||
tool.models_to_consult = [{"model": "flash", "stance": "neutral"}, {"model": "o3-mini", "stance": "for"}]
|
||||
|
||||
# Note: In the new workflow, model consultation happens DURING steps in execute_workflow
|
||||
# This method is kept for compatibility but not actively used in the step-by-step flow
|
||||
|
||||
# Test after step 1
|
||||
request = Mock(step_number=1, current_model_index=0)
|
||||
response_data = {}
|
||||
|
||||
result = tool.handle_work_continuation(response_data, request)
|
||||
assert result["status"] == "consulting_models"
|
||||
assert result["next_model"] == {"model": "flash", "stance": "neutral"}
|
||||
# The method still exists but returns legacy status for compatibility
|
||||
assert "status" in result
|
||||
|
||||
# Test between model consultations
|
||||
request = Mock(step_number=2, current_model_index=1)
|
||||
response_data = {}
|
||||
|
||||
result = tool.handle_work_continuation(response_data, request)
|
||||
assert result["status"] == "consulting_next_model"
|
||||
assert result["next_model"] == {"model": "o3-mini", "stance": "for"}
|
||||
assert result["models_remaining"] == 1
|
||||
assert "status" in result
|
||||
|
||||
def test_customize_workflow_response(self):
|
||||
"""Test response customization for consensus workflow."""
|
||||
tool = ConsensusTool()
|
||||
tool.accumulated_responses = [{"model": "test", "response": "data"}]
|
||||
|
||||
# Test different step numbers
|
||||
request = Mock(step_number=1, total_steps=4)
|
||||
# Test different step numbers (new workflow: 2 models = 2 steps)
|
||||
request = Mock(step_number=1, total_steps=2)
|
||||
response_data = {}
|
||||
result = tool.customize_workflow_response(response_data, request)
|
||||
assert result["consensus_workflow_status"] == "initial_analysis_complete"
|
||||
|
||||
request = Mock(step_number=2, total_steps=4)
|
||||
response_data = {}
|
||||
result = tool.customize_workflow_response(response_data, request)
|
||||
assert result["consensus_workflow_status"] == "consulting_models"
|
||||
|
||||
request = Mock(step_number=4, total_steps=4)
|
||||
request = Mock(step_number=2, total_steps=2)
|
||||
response_data = {}
|
||||
result = tool.customize_workflow_response(response_data, request)
|
||||
assert result["consensus_workflow_status"] == "ready_for_synthesis"
|
||||
|
||||
Reference in New Issue
Block a user