Add Consensus Tool for Multi-Model Perspective Gathering (#67)

* WIP Refactor resolving mode_names, should be done once at MCP call boundary Pass around model context instead Consensus tool allows one to get a consensus from multiple models, optionally assigning one a 'for' or 'against' stance to find nuanced responses. * Deduplication of model resolution, model_context should be available before reaching deeper parts of the code Improved abstraction when building conversations Throw programmer errors early * Guardrails Support for `model:option` format at MCP boundary so future tools can use additional options if needed instead of handling this only for consensus Model name now supports an optional ":option" for future use * Simplified async flow * Improved model for request to support natural language Simplified async flow * Improved model for request to support natural language Simplified async flow * Fix consensus tool async/sync patterns to match codebase standards CRITICAL FIXES: - Converted _get_consensus_responses from async to sync (matches other tools) - Converted store_conversation_turn from async to sync (add_turn is synchronous) - Removed unnecessary asyncio imports and sleep calls - Fixed ClosedResourceError in MCP protocol during long consensus operations PATTERN ALIGNMENT: - Consensus tool now follows same sync patterns as all other tools - Only execute() and prepare_prompt() are async (base class requirement) - All internal operations are synchronous like analyze, chat, debug, etc. TESTING: - MCP simulation test now passes: consensus_stance ✅ - Two-model consensus works correctly in ~35 seconds - Unknown stance handling defaults to neutral with warnings - All 9 unit tests pass (100% success rate) The consensus tool async patterns were anomalous in the codebase. This fix aligns it with the established synchronous patterns used by all other tools while maintaining full functionality. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed call order and added new test * Cleanup dead comments Docs for the new tool Improved tests --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-17 10:53:17 +04:00
parent 9b98df650b
commit 95556ba9ea
31 changed files with 2643 additions and 324 deletions
--- a/tests/test_large_prompt_handling.py
+++ b/tests/test_large_prompt_handling.py
@@ -91,23 +91,36 @@ class TestLargePromptHandling:
    @pytest.mark.asyncio
    async def test_chat_prompt_file_handling(self, temp_prompt_file):
        """Test that chat tool correctly handles prompt.txt files with reasonable size."""
+        from tests.mock_helpers import create_mock_provider
+
        tool = ChatTool()
        # Use a smaller prompt that won't exceed limit when combined with system prompt
        reasonable_prompt = "This is a reasonable sized prompt for testing prompt.txt file handling."

-        # Mock the model
-        with patch.object(tool, "get_model_provider") as mock_get_provider:
-            mock_provider = MagicMock()
-            mock_provider.get_provider_type.return_value = MagicMock(value="google")
-            mock_provider.supports_thinking_mode.return_value = False
-            mock_provider.generate_content.return_value = MagicMock(
-                content="Processed prompt from file",
-                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
-                model_name="gemini-2.5-flash-preview-05-20",
-                metadata={"finish_reason": "STOP"},
-            )
+        # Mock the model with proper capabilities and ModelContext
+        with (
+            patch.object(tool, "get_model_provider") as mock_get_provider,
+            patch("utils.model_context.ModelContext") as mock_model_context_class,
+        ):
+
+            mock_provider = create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", context_window=1_048_576)
+            mock_provider.generate_content.return_value.content = "Processed prompt from file"
            mock_get_provider.return_value = mock_provider

+            # Mock ModelContext to avoid the comparison issue
+            from utils.model_context import TokenAllocation
+
+            mock_model_context = MagicMock()
+            mock_model_context.model_name = "gemini-2.5-flash-preview-05-20"
+            mock_model_context.calculate_token_allocation.return_value = TokenAllocation(
+                total_tokens=1_048_576,
+                content_tokens=838_861,
+                response_tokens=209_715,
+                file_tokens=335_544,
+                history_tokens=335_544,
+            )
+            mock_model_context_class.return_value = mock_model_context
+
            # Mock read_file_content to avoid security checks
            with patch("tools.base.read_file_content") as mock_read_file:
                mock_read_file.return_value = (
@@ -358,21 +371,34 @@ class TestLargePromptHandling:
    @pytest.mark.asyncio
    async def test_prompt_file_read_error(self):
        """Test handling when prompt.txt can't be read."""
+        from tests.mock_helpers import create_mock_provider
+
        tool = ChatTool()
        bad_file = "/nonexistent/prompt.txt"

-        with patch.object(tool, "get_model_provider") as mock_get_provider:
-            mock_provider = MagicMock()
-            mock_provider.get_provider_type.return_value = MagicMock(value="google")
-            mock_provider.supports_thinking_mode.return_value = False
-            mock_provider.generate_content.return_value = MagicMock(
-                content="Success",
-                usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
-                model_name="gemini-2.5-flash-preview-05-20",
-                metadata={"finish_reason": "STOP"},
-            )
+        with (
+            patch.object(tool, "get_model_provider") as mock_get_provider,
+            patch("utils.model_context.ModelContext") as mock_model_context_class,
+        ):
+
+            mock_provider = create_mock_provider(model_name="gemini-2.5-flash-preview-05-20", context_window=1_048_576)
+            mock_provider.generate_content.return_value.content = "Success"
            mock_get_provider.return_value = mock_provider

+            # Mock ModelContext to avoid the comparison issue
+            from utils.model_context import TokenAllocation
+
+            mock_model_context = MagicMock()
+            mock_model_context.model_name = "gemini-2.5-flash-preview-05-20"
+            mock_model_context.calculate_token_allocation.return_value = TokenAllocation(
+                total_tokens=1_048_576,
+                content_tokens=838_861,
+                response_tokens=209_715,
+                file_tokens=335_544,
+                history_tokens=335_544,
+            )
+            mock_model_context_class.return_value = mock_model_context
+
            # Should continue with empty prompt when file can't be read
            result = await tool.execute({"prompt": "", "files": [bad_file]})
            output = json.loads(result[0].text)