Add Consensus Tool for Multi-Model Perspective Gathering (#67)
* WIP Refactor resolving mode_names, should be done once at MCP call boundary Pass around model context instead Consensus tool allows one to get a consensus from multiple models, optionally assigning one a 'for' or 'against' stance to find nuanced responses. * Deduplication of model resolution, model_context should be available before reaching deeper parts of the code Improved abstraction when building conversations Throw programmer errors early * Guardrails Support for `model:option` format at MCP boundary so future tools can use additional options if needed instead of handling this only for consensus Model name now supports an optional ":option" for future use * Simplified async flow * Improved model for request to support natural language Simplified async flow * Improved model for request to support natural language Simplified async flow * Fix consensus tool async/sync patterns to match codebase standards CRITICAL FIXES: - Converted _get_consensus_responses from async to sync (matches other tools) - Converted store_conversation_turn from async to sync (add_turn is synchronous) - Removed unnecessary asyncio imports and sleep calls - Fixed ClosedResourceError in MCP protocol during long consensus operations PATTERN ALIGNMENT: - Consensus tool now follows same sync patterns as all other tools - Only execute() and prepare_prompt() are async (base class requirement) - All internal operations are synchronous like analyze, chat, debug, etc. TESTING: - MCP simulation test now passes: consensus_stance ✅ - Two-model consensus works correctly in ~35 seconds - Unknown stance handling defaults to neutral with warnings - All 9 unit tests pass (100% success rate) The consensus tool async patterns were anomalous in the codebase. This fix aligns it with the established synchronous patterns used by all other tools while maintaining full functionality. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed call order and added new test * Cleanup dead comments Docs for the new tool Improved tests --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
9b98df650b
commit
95556ba9ea
326
tools/base.py
326
tools/base.py
@@ -31,6 +31,7 @@ from providers.base import ProviderType
|
||||
from utils import check_token_limit
|
||||
from utils.conversation_memory import (
|
||||
MAX_CONVERSATION_TURNS,
|
||||
ConversationTurn,
|
||||
add_turn,
|
||||
create_thread,
|
||||
get_conversation_file_list,
|
||||
@@ -643,6 +644,41 @@ class BaseTool(ABC):
|
||||
)
|
||||
return requested_files
|
||||
|
||||
def format_conversation_turn(self, turn: ConversationTurn) -> list[str]:
|
||||
"""
|
||||
Format a conversation turn for display in conversation history.
|
||||
|
||||
Tools can override this to provide custom formatting for their responses
|
||||
while maintaining the standard structure for cross-tool compatibility.
|
||||
|
||||
This method is called by build_conversation_history when reconstructing
|
||||
conversation context, allowing each tool to control how its responses
|
||||
appear in subsequent conversation turns.
|
||||
|
||||
Args:
|
||||
turn: The conversation turn to format (from utils.conversation_memory)
|
||||
|
||||
Returns:
|
||||
list[str]: Lines of formatted content for this turn
|
||||
|
||||
Example:
|
||||
Default implementation returns:
|
||||
["Files used in this turn: file1.py, file2.py", "", "Response content..."]
|
||||
|
||||
Tools can override to add custom sections, formatting, or metadata display.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Add files context if present
|
||||
if turn.files:
|
||||
parts.append(f"Files used in this turn: {', '.join(turn.files)}")
|
||||
parts.append("") # Empty line for readability
|
||||
|
||||
# Add the actual content
|
||||
parts.append(turn.content)
|
||||
|
||||
return parts
|
||||
|
||||
def _prepare_file_content_for_prompt(
|
||||
self,
|
||||
request_files: list[str],
|
||||
@@ -716,109 +752,35 @@ class BaseTool(ABC):
|
||||
elif max_tokens is not None:
|
||||
effective_max_tokens = max_tokens - reserve_tokens
|
||||
else:
|
||||
# Get model-specific limits
|
||||
# First check if model_context was passed from server.py
|
||||
model_context = None
|
||||
if arguments:
|
||||
model_context = arguments.get("_model_context") or getattr(self, "_current_arguments", {}).get(
|
||||
"_model_context"
|
||||
# The execute() method is responsible for setting self._model_context.
|
||||
# A missing context is a programming error, not a fallback case.
|
||||
if not hasattr(self, "_model_context") or not self._model_context:
|
||||
logger.error(
|
||||
f"[FILES] {self.name}: _prepare_file_content_for_prompt called without a valid model context. "
|
||||
"This indicates an incorrect call sequence in the tool's implementation."
|
||||
)
|
||||
# Fail fast to reveal integration issues. A silent fallback with arbitrary
|
||||
# limits can hide bugs and lead to unexpected token usage or silent failures.
|
||||
raise RuntimeError("ModelContext not initialized before file preparation.")
|
||||
|
||||
if model_context:
|
||||
# Use the passed model context
|
||||
try:
|
||||
token_allocation = model_context.calculate_token_allocation()
|
||||
effective_max_tokens = token_allocation.file_tokens - reserve_tokens
|
||||
logger.debug(
|
||||
f"[FILES] {self.name}: Using passed model context for {model_context.model_name}: "
|
||||
f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[FILES] {self.name}: Error using passed model context: {e}")
|
||||
# Fall through to manual calculation
|
||||
model_context = None
|
||||
|
||||
if not model_context:
|
||||
# Manual calculation as fallback
|
||||
from config import DEFAULT_MODEL
|
||||
|
||||
model_name = getattr(self, "_current_model_name", None) or DEFAULT_MODEL
|
||||
|
||||
# Handle auto mode gracefully
|
||||
if model_name.lower() == "auto":
|
||||
from providers.registry import ModelProviderRegistry
|
||||
|
||||
# Use tool-specific fallback model for capacity estimation
|
||||
# This properly handles different providers (OpenAI=200K, Gemini=1M)
|
||||
tool_category = self.get_model_category()
|
||||
fallback_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
|
||||
logger.debug(
|
||||
f"[FILES] {self.name}: Auto mode detected, using {fallback_model} "
|
||||
f"for {tool_category.value} tool capacity estimation"
|
||||
)
|
||||
|
||||
try:
|
||||
provider = self.get_model_provider(fallback_model)
|
||||
capabilities = provider.get_capabilities(fallback_model)
|
||||
|
||||
# Calculate content allocation based on model capacity
|
||||
if capabilities.context_window < 300_000:
|
||||
# Smaller context models: 60% content, 40% response
|
||||
model_content_tokens = int(capabilities.context_window * 0.6)
|
||||
else:
|
||||
# Larger context models: 80% content, 20% response
|
||||
model_content_tokens = int(capabilities.context_window * 0.8)
|
||||
|
||||
effective_max_tokens = model_content_tokens - reserve_tokens
|
||||
logger.debug(
|
||||
f"[FILES] {self.name}: Using {fallback_model} capacity for auto mode: "
|
||||
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
|
||||
)
|
||||
except (ValueError, AttributeError) as e:
|
||||
# Handle specific errors: provider not found, model not supported, missing attributes
|
||||
logger.warning(
|
||||
f"[FILES] {self.name}: Could not get capabilities for fallback model {fallback_model}: {type(e).__name__}: {e}"
|
||||
)
|
||||
# Fall back to conservative default for safety
|
||||
effective_max_tokens = 100_000 - reserve_tokens
|
||||
except Exception as e:
|
||||
# Catch any other unexpected errors
|
||||
logger.error(
|
||||
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
|
||||
)
|
||||
effective_max_tokens = 100_000 - reserve_tokens
|
||||
else:
|
||||
# Normal mode - use the specified model
|
||||
try:
|
||||
provider = self.get_model_provider(model_name)
|
||||
capabilities = provider.get_capabilities(model_name)
|
||||
|
||||
# Calculate content allocation based on model capacity
|
||||
if capabilities.context_window < 300_000:
|
||||
# Smaller context models: 60% content, 40% response
|
||||
model_content_tokens = int(capabilities.context_window * 0.6)
|
||||
else:
|
||||
# Larger context models: 80% content, 20% response
|
||||
model_content_tokens = int(capabilities.context_window * 0.8)
|
||||
|
||||
effective_max_tokens = model_content_tokens - reserve_tokens
|
||||
logger.debug(
|
||||
f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
|
||||
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
|
||||
)
|
||||
except (ValueError, AttributeError) as e:
|
||||
# Handle specific errors: provider not found, model not supported, missing attributes
|
||||
logger.warning(
|
||||
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
|
||||
)
|
||||
# Fall back to conservative default for safety
|
||||
effective_max_tokens = 100_000 - reserve_tokens
|
||||
except Exception as e:
|
||||
# Catch any other unexpected errors
|
||||
logger.error(
|
||||
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
|
||||
)
|
||||
effective_max_tokens = 100_000 - reserve_tokens
|
||||
# This is now the single source of truth for token allocation.
|
||||
model_context = self._model_context
|
||||
try:
|
||||
token_allocation = model_context.calculate_token_allocation()
|
||||
# Standardize on `file_tokens` for consistency and correctness.
|
||||
# This fixes the bug where the old code incorrectly used content_tokens
|
||||
effective_max_tokens = token_allocation.file_tokens - reserve_tokens
|
||||
logger.debug(
|
||||
f"[FILES] {self.name}: Using model context for {model_context.model_name}: "
|
||||
f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[FILES] {self.name}: Failed to calculate token allocation from model context: {e}", exc_info=True
|
||||
)
|
||||
# If the context exists but calculation fails, we still need to prevent a crash.
|
||||
# A loud error is logged, and we fall back to a safe default.
|
||||
effective_max_tokens = 100_000 - reserve_tokens
|
||||
|
||||
# Ensure we have a reasonable minimum budget
|
||||
effective_max_tokens = max(1000, effective_max_tokens)
|
||||
@@ -1087,8 +1049,14 @@ When recommending searches, be specific about what information you need and why
|
||||
|
||||
# Get model capabilities to check image support and size limits
|
||||
try:
|
||||
provider = self.get_model_provider(model_name)
|
||||
capabilities = provider.get_capabilities(model_name)
|
||||
# Use the already-resolved provider from model context if available
|
||||
if hasattr(self, "_model_context") and self._model_context:
|
||||
provider = self._model_context.provider
|
||||
capabilities = self._model_context.capabilities
|
||||
else:
|
||||
# Fallback for edge cases (e.g., direct test calls)
|
||||
provider = self.get_model_provider(model_name)
|
||||
capabilities = provider.get_capabilities(model_name)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get capabilities for model {model_name}: {e}")
|
||||
# Fall back to checking custom models configuration
|
||||
@@ -1214,7 +1182,7 @@ When recommending searches, be specific about what information you need and why
|
||||
|
||||
return estimate_file_tokens(file_path)
|
||||
|
||||
def check_total_file_size(self, files: list[str]) -> Optional[dict[str, Any]]:
|
||||
def check_total_file_size(self, files: list[str], model_name: str) -> Optional[dict[str, Any]]:
|
||||
"""
|
||||
Check if total file sizes would exceed token threshold before embedding.
|
||||
|
||||
@@ -1224,6 +1192,7 @@ When recommending searches, be specific about what information you need and why
|
||||
|
||||
Args:
|
||||
files: List of file paths to check
|
||||
model_name: The resolved model name to use for token limits
|
||||
|
||||
Returns:
|
||||
Dict with `code_too_large` response if too large, None if acceptable
|
||||
@@ -1231,13 +1200,6 @@ When recommending searches, be specific about what information you need and why
|
||||
if not files:
|
||||
return None
|
||||
|
||||
# Get current model name for context-aware thresholds
|
||||
model_name = getattr(self, "_current_model_name", None)
|
||||
if not model_name:
|
||||
from config import DEFAULT_MODEL
|
||||
|
||||
model_name = DEFAULT_MODEL
|
||||
|
||||
# Use centralized file size checking with model context
|
||||
from utils.file_utils import check_total_file_size as check_file_size_utility
|
||||
|
||||
@@ -1353,6 +1315,65 @@ When recommending searches, be specific about what information you need and why
|
||||
# Extract and validate images from request
|
||||
images = getattr(request, "images", None) or []
|
||||
|
||||
# MODEL RESOLUTION NOW HAPPENS AT MCP BOUNDARY
|
||||
# Extract pre-resolved model context from server.py
|
||||
model_context = self._current_arguments.get("_model_context")
|
||||
resolved_model_name = self._current_arguments.get("_resolved_model_name")
|
||||
|
||||
if model_context and resolved_model_name:
|
||||
# Model was already resolved at MCP boundary
|
||||
model_name = resolved_model_name
|
||||
logger.debug(f"Using pre-resolved model '{model_name}' from MCP boundary")
|
||||
else:
|
||||
# Fallback for direct execute calls
|
||||
model_name = getattr(request, "model", None)
|
||||
if not model_name:
|
||||
from config import DEFAULT_MODEL
|
||||
|
||||
model_name = DEFAULT_MODEL
|
||||
logger.debug(f"Using fallback model resolution for '{model_name}' (test mode)")
|
||||
|
||||
# For tests: Check if we should require model selection (auto mode)
|
||||
if self._should_require_model_selection(model_name):
|
||||
# Get suggested model based on tool category
|
||||
from providers.registry import ModelProviderRegistry
|
||||
|
||||
tool_category = self.get_model_category()
|
||||
suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
|
||||
|
||||
# Build error message based on why selection is required
|
||||
if model_name.lower() == "auto":
|
||||
error_message = (
|
||||
f"Model parameter is required in auto mode. "
|
||||
f"Suggested model for {self.name}: '{suggested_model}' "
|
||||
f"(category: {tool_category.value})"
|
||||
)
|
||||
else:
|
||||
# Model was specified but not available
|
||||
available_models = self._get_available_models()
|
||||
|
||||
error_message = (
|
||||
f"Model '{model_name}' is not available with current API keys. "
|
||||
f"Available models: {', '.join(available_models)}. "
|
||||
f"Suggested model for {self.name}: '{suggested_model}' "
|
||||
f"(category: {tool_category.value})"
|
||||
)
|
||||
error_output = ToolOutput(
|
||||
status="error",
|
||||
content=error_message,
|
||||
content_type="text",
|
||||
)
|
||||
return [TextContent(type="text", text=error_output.model_dump_json())]
|
||||
|
||||
# Create model context for tests
|
||||
from utils.model_context import ModelContext
|
||||
|
||||
model_context = ModelContext(model_name)
|
||||
|
||||
# Store resolved model name for use by helper methods
|
||||
self._current_model_name = model_name
|
||||
self._model_context = model_context
|
||||
|
||||
# Check if we have continuation_id - if so, conversation history is already embedded
|
||||
continuation_id = getattr(request, "continuation_id", None)
|
||||
|
||||
@@ -1389,57 +1410,11 @@ When recommending searches, be specific about what information you need and why
|
||||
prompt = f"{prompt}\n\n{follow_up_instructions}"
|
||||
logger.debug(f"Added follow-up instructions for new {self.name} conversation")
|
||||
|
||||
# Extract model configuration from request or use defaults
|
||||
model_name = getattr(request, "model", None)
|
||||
if not model_name:
|
||||
from config import DEFAULT_MODEL
|
||||
|
||||
model_name = DEFAULT_MODEL
|
||||
|
||||
# Check if we need Claude to select a model
|
||||
# This happens when:
|
||||
# 1. The model is explicitly "auto"
|
||||
# 2. The requested model is not available
|
||||
if self._should_require_model_selection(model_name):
|
||||
# Get suggested model based on tool category
|
||||
from providers.registry import ModelProviderRegistry
|
||||
|
||||
tool_category = self.get_model_category()
|
||||
suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
|
||||
|
||||
# Build error message based on why selection is required
|
||||
if model_name.lower() == "auto":
|
||||
error_message = (
|
||||
f"Model parameter is required in auto mode. "
|
||||
f"Suggested model for {self.name}: '{suggested_model}' "
|
||||
f"(category: {tool_category.value})"
|
||||
)
|
||||
else:
|
||||
# Model was specified but not available
|
||||
# Get list of available models
|
||||
available_models = self._get_available_models()
|
||||
|
||||
error_message = (
|
||||
f"Model '{model_name}' is not available with current API keys. "
|
||||
f"Available models: {', '.join(available_models)}. "
|
||||
f"Suggested model for {self.name}: '{suggested_model}' "
|
||||
f"(category: {tool_category.value})"
|
||||
)
|
||||
|
||||
error_output = ToolOutput(
|
||||
status="error",
|
||||
content=error_message,
|
||||
content_type="text",
|
||||
)
|
||||
return [TextContent(type="text", text=error_output.model_dump_json())]
|
||||
|
||||
# Store model name for use by helper methods like _prepare_file_content_for_prompt
|
||||
# Only set this after auto mode validation to prevent "auto" being used as a model name
|
||||
self._current_model_name = model_name
|
||||
# Model name already resolved and stored in self._current_model_name earlier
|
||||
|
||||
# Validate images at MCP boundary if any were provided
|
||||
if images:
|
||||
image_validation_error = self._validate_image_limits(images, model_name, continuation_id)
|
||||
image_validation_error = self._validate_image_limits(images, self._current_model_name, continuation_id)
|
||||
if image_validation_error:
|
||||
return [TextContent(type="text", text=json.dumps(image_validation_error))]
|
||||
|
||||
@@ -1451,10 +1426,10 @@ When recommending searches, be specific about what information you need and why
|
||||
thinking_mode = self.get_default_thinking_mode()
|
||||
|
||||
# Get the appropriate model provider
|
||||
provider = self.get_model_provider(model_name)
|
||||
provider = self.get_model_provider(self._current_model_name)
|
||||
|
||||
# Validate and correct temperature for this model
|
||||
temperature, temp_warnings = self._validate_and_correct_temperature(model_name, temperature)
|
||||
temperature, temp_warnings = self._validate_and_correct_temperature(self._current_model_name, temperature)
|
||||
|
||||
# Log any temperature corrections
|
||||
for warning in temp_warnings:
|
||||
@@ -1465,16 +1440,21 @@ When recommending searches, be specific about what information you need and why
|
||||
|
||||
# Generate AI response using the provider
|
||||
logger.info(f"Sending request to {provider.get_provider_type().value} API for {self.name}")
|
||||
logger.info(f"Using model: {model_name} via {provider.get_provider_type().value} provider")
|
||||
logger.debug(f"Prompt length: {len(prompt)} characters")
|
||||
logger.info(f"Using model: {self._current_model_name} via {provider.get_provider_type().value} provider")
|
||||
|
||||
# Import token estimation utility
|
||||
from utils.token_utils import estimate_tokens
|
||||
|
||||
estimated_tokens = estimate_tokens(prompt)
|
||||
logger.debug(f"Prompt length: {len(prompt)} characters (~{estimated_tokens:,} tokens)")
|
||||
|
||||
# Generate content with provider abstraction
|
||||
model_response = provider.generate_content(
|
||||
prompt=prompt,
|
||||
model_name=model_name,
|
||||
model_name=self._current_model_name,
|
||||
system_prompt=system_prompt,
|
||||
temperature=temperature,
|
||||
thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None,
|
||||
thinking_mode=thinking_mode if provider.supports_thinking_mode(self._current_model_name) else None,
|
||||
images=images if images else None, # Pass images via kwargs
|
||||
)
|
||||
|
||||
@@ -1486,7 +1466,11 @@ When recommending searches, be specific about what information you need and why
|
||||
|
||||
# Parse response to check for clarification requests or format output
|
||||
# Pass model info for conversation tracking
|
||||
model_info = {"provider": provider, "model_name": model_name, "model_response": model_response}
|
||||
model_info = {
|
||||
"provider": provider,
|
||||
"model_name": self._current_model_name,
|
||||
"model_response": model_response,
|
||||
}
|
||||
tool_output = self._parse_response(raw_text, request, model_info)
|
||||
logger.info(f"✅ {self.name} tool completed successfully")
|
||||
|
||||
@@ -1894,8 +1878,14 @@ When recommending searches, be specific about what information you need and why
|
||||
Tuple of (corrected_temperature, warning_messages)
|
||||
"""
|
||||
try:
|
||||
provider = self.get_model_provider(model_name)
|
||||
capabilities = provider.get_capabilities(model_name)
|
||||
# Use the already-resolved provider and capabilities from model context
|
||||
if hasattr(self, "_model_context") and self._model_context:
|
||||
capabilities = self._model_context.capabilities
|
||||
else:
|
||||
# Fallback for edge cases (e.g., direct test calls)
|
||||
provider = self.get_model_provider(model_name)
|
||||
capabilities = provider.get_capabilities(model_name)
|
||||
|
||||
constraint = capabilities.temperature_constraint
|
||||
|
||||
warnings = []
|
||||
|
||||
Reference in New Issue
Block a user