Files
my-pal-mcp-server/tools/consensus.py
Beehive Innovations 95556ba9ea Add Consensus Tool for Multi-Model Perspective Gathering (#67)
* WIP
Refactor resolving mode_names, should be done once at MCP call boundary
Pass around model context instead
Consensus tool allows one to get a consensus from multiple models, optionally assigning one a 'for' or 'against' stance to find nuanced responses.

* Deduplication of model resolution, model_context should be available before reaching deeper parts of the code
Improved abstraction when building conversations
Throw programmer errors early

* Guardrails
Support for `model:option` format at MCP boundary so future tools can use additional options if needed instead of handling this only for consensus
Model name now supports an optional ":option" for future use

* Simplified async flow

* Improved model for request to support natural language
Simplified async flow

* Improved model for request to support natural language
Simplified async flow

* Fix consensus tool async/sync patterns to match codebase standards

CRITICAL FIXES:
- Converted _get_consensus_responses from async to sync (matches other tools)
- Converted store_conversation_turn from async to sync (add_turn is synchronous)
- Removed unnecessary asyncio imports and sleep calls
- Fixed ClosedResourceError in MCP protocol during long consensus operations

PATTERN ALIGNMENT:
- Consensus tool now follows same sync patterns as all other tools
- Only execute() and prepare_prompt() are async (base class requirement)
- All internal operations are synchronous like analyze, chat, debug, etc.

TESTING:
- MCP simulation test now passes: consensus_stance 
- Two-model consensus works correctly in ~35 seconds
- Unknown stance handling defaults to neutral with warnings
- All 9 unit tests pass (100% success rate)

The consensus tool async patterns were anomalous in the codebase.
This fix aligns it with the established synchronous patterns used
by all other tools while maintaining full functionality.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Fixed call order and added new test

* Cleanup dead comments
Docs for the new tool
Improved tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-06-17 10:53:17 +04:00

847 lines
37 KiB
Python

"""
Consensus tool for multi-model perspective gathering and validation
"""
import json
import logging
from typing import TYPE_CHECKING, Any, Optional
from mcp.types import TextContent
from pydantic import BaseModel, Field, field_validator
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from config import DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION
from systemprompts import CONSENSUS_PROMPT
from .base import BaseTool, ToolRequest
logger = logging.getLogger(__name__)
class ModelConfig(BaseModel):
"""Enhanced model configuration for consensus tool"""
model: str = Field(..., description="Model name to use (e.g., 'o3', 'flash', 'pro')")
stance: Optional[str] = Field(
default="neutral",
description=(
"Stance for this model. Supportive: 'for', 'support', 'favor'. "
"Critical: 'against', 'oppose', 'critical'. Neutral: 'neutral'. "
"Defaults to 'neutral'."
),
)
stance_prompt: Optional[str] = Field(
default=None,
description=(
"Custom stance-specific instructions for this model. "
"If provided, this will be used instead of the default stance prompt. "
"Should be clear, specific instructions about how this model should approach the analysis."
),
)
class ConsensusRequest(ToolRequest):
"""Request model for consensus tool"""
prompt: str = Field(
...,
description=(
"Description of what to get consensus on, testing objectives, and specific scope/focus areas. "
"Be as detailed as possible about the proposal, plan, or idea you want multiple perspectives on."
),
)
models: list[ModelConfig] = Field(
...,
description=(
"List of model configurations for consensus analysis. Each model can have a specific stance and custom instructions. "
"Example: [{'model': 'o3', 'stance': 'for', 'stance_prompt': 'Focus on benefits and opportunities...'}, "
"{'model': 'flash', 'stance': 'against', 'stance_prompt': 'Identify risks and challenges...'}]. "
"Maximum 2 instances per model+stance combination."
),
)
files: Optional[list[str]] = Field(
default_factory=list,
description="Optional files or directories for additional context (must be absolute paths)",
)
images: Optional[list[str]] = Field(
default_factory=list,
description=(
"Optional images showing expected UI changes, design requirements, "
"or visual references for the consensus analysis"
),
)
focus_areas: Optional[list[str]] = Field(
default_factory=list,
description="Specific aspects to focus on (e.g., 'performance', 'security', 'user experience')",
)
@field_validator("models")
@classmethod
def validate_models_not_empty(cls, v):
if not v:
raise ValueError("At least one model must be specified")
return v
class ConsensusTool(BaseTool):
"""Multi-model consensus tool for gathering diverse perspectives on technical proposals"""
def __init__(self):
super().__init__()
@staticmethod
def parse_structured_prompt_models(model_spec: str) -> list[dict[str, str]]:
"""
Parse consensus model specification from structured prompt format.
This method parses structured prompt specifications used in Claude Code shortcuts
like "/zen:consensus:flash:for,o3:against,pro:neutral" to extract model configurations
with their assigned stances.
Supported formats:
- "model:stance" - Explicit stance assignment (e.g., "flash:for", "o3:against")
- "model" - Defaults to neutral stance (e.g., "pro" becomes "pro:neutral")
Supported stances:
- Supportive: "for", "support", "favor"
- Critical: "against", "oppose", "critical"
- Neutral: "neutral" (default)
Args:
model_spec (str): Comma-separated model specification string.
Examples: "flash:for,o3:against,pro:neutral" or "flash:for,o3:against,pro"
Returns:
list[dict[str, str]]: List of model configuration dictionaries with keys:
- "model": The model name (e.g., "flash", "o3", "pro")
- "stance": The normalized stance (e.g., "for", "against", "neutral")
Examples:
>>> ConsensusTool.parse_structured_prompt_models("flash:for,o3:against,pro")
[{"model": "flash", "stance": "for"}, {"model": "o3", "stance": "against"}, {"model": "pro", "stance": "neutral"}]
>>> ConsensusTool.parse_structured_prompt_models("flash,o3,pro")
[{"model": "flash", "stance": "neutral"}, {"model": "o3", "stance": "neutral"}, {"model": "pro", "stance": "neutral"}]
"""
models = []
# Split by comma to get individual model specs
model_parts = model_spec.split(",")
for part in model_parts:
part = part.strip()
if ":" in part:
# Model with stance: "flash:for" or "o3:against"
model_name, stance = part.split(":", 1)
models.append({"model": model_name.strip(), "stance": stance.strip()})
else:
# Model without stance (defaults to neutral): "pro"
models.append({"model": part.strip(), "stance": "neutral"})
return models
def get_name(self) -> str:
return "consensus"
def get_description(self) -> str:
return (
"MULTI-MODEL CONSENSUS - Gather diverse perspectives from multiple AI models on technical proposals, "
"plans, and ideas. Perfect for validation, feasibility assessment, and getting comprehensive "
"viewpoints on complex decisions. Supports advanced stance steering with custom instructions for each model. "
"You can specify different stances (for/against/neutral) and provide custom stance prompts to guide each "
"model's analysis. Example: [{'model': 'o3', 'stance': 'for', 'stance_prompt': 'Focus on implementation "
"benefits and user value'}, {'model': 'flash', 'stance': 'against', 'stance_prompt': 'Identify potential "
"risks and technical challenges'}]. Use neutral stances by default unless structured debate would add value."
)
def get_input_schema(self) -> dict[str, Any]:
schema = {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": (
"Description of what to get consensus on, testing objectives, and specific scope/focus areas. "
"Be as detailed as possible about the proposal, plan, or idea you want multiple perspectives on."
),
},
"models": {
"type": "array",
"items": {
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "Model name to use (e.g., 'o3', 'flash', 'pro')",
},
"stance": {
"type": "string",
"enum": ["for", "support", "favor", "against", "oppose", "critical", "neutral"],
"description": "Stance for this model: supportive ('for', 'support', 'favor'), critical ('against', 'oppose', 'critical'), or 'neutral'",
"default": "neutral",
},
"stance_prompt": {
"type": "string",
"description": "Custom stance-specific instructions for this model. If provided, this will be used instead of the default stance prompt.",
},
},
"required": ["model"],
},
"description": (
"List of model configurations for consensus analysis. Each model can have a specific stance and custom instructions. "
"Example: [{'model': 'o3', 'stance': 'for', 'stance_prompt': 'Focus on benefits and opportunities...'}, "
"{'model': 'flash', 'stance': 'against', 'stance_prompt': 'Identify risks and challenges...'}]. "
"Maximum 2 instances per model+stance combination."
),
},
"files": {
"type": "array",
"items": {"type": "string"},
"description": "Optional files or directories for additional context (must be absolute paths)",
},
"images": {
"type": "array",
"items": {"type": "string"},
"description": (
"Optional images showing expected UI changes, design requirements, "
"or visual references for the consensus analysis"
),
},
"focus_areas": {
"type": "array",
"items": {"type": "string"},
"description": "Specific aspects to focus on (e.g., 'performance', 'security', 'user experience')",
},
"temperature": {
"type": "number",
"description": "Temperature (0-1, default 0.2 for consistency)",
"minimum": 0,
"maximum": 1,
"default": self.get_default_temperature(),
},
"thinking_mode": {
"type": "string",
"enum": ["minimal", "low", "medium", "high", "max"],
"description": (
"Thinking depth: minimal (0.5% of model max), low (8%), medium (33%), "
"high (67%), max (100% of model max)"
),
},
"use_websearch": {
"type": "boolean",
"description": (
"Enable web search for documentation, best practices, and current information. "
"Particularly useful for: brainstorming sessions, architectural design discussions, "
"exploring industry best practices, working with specific frameworks/technologies, "
"researching solutions to complex problems, or when current documentation and "
"community insights would enhance the analysis."
),
"default": True,
},
"continuation_id": {
"type": "string",
"description": (
"Thread continuation ID for multi-turn conversations. Can be used to continue "
"conversations across different tools. Only provide this if continuing a previous "
"conversation thread."
),
},
},
"required": ["prompt", "models"],
}
return schema
def get_system_prompt(self) -> str:
return CONSENSUS_PROMPT
def get_default_temperature(self) -> float:
return 0.2 # Lower temperature for more consistent consensus responses
def get_model_category(self) -> "ToolModelCategory":
"""Consensus uses extended reasoning models for deep analysis"""
from tools.models import ToolModelCategory
return ToolModelCategory.EXTENDED_REASONING
def get_request_model(self):
return ConsensusRequest
def format_conversation_turn(self, turn) -> list[str]:
"""
Format consensus turns with individual model responses for better readability.
This custom formatting shows the individual model responses that were
synthesized into the consensus, making it easier to understand the
reasoning behind the final recommendation.
"""
parts = []
# Add files context if present
if turn.files:
parts.append(f"Files used in this turn: {', '.join(turn.files)}")
parts.append("")
# Check if this is a consensus turn with individual responses
if turn.model_metadata and turn.model_metadata.get("individual_responses"):
individual_responses = turn.model_metadata["individual_responses"]
# Add consensus header
models_consulted = []
for resp in individual_responses:
model = resp["model"]
stance = resp.get("stance", "neutral")
if stance != "neutral":
models_consulted.append(f"{model}:{stance}")
else:
models_consulted.append(model)
parts.append(f"Models consulted: {', '.join(models_consulted)}")
parts.append("")
parts.append("=== INDIVIDUAL MODEL RESPONSES ===")
parts.append("")
# Add each successful model response
for i, response in enumerate(individual_responses):
model_name = response["model"]
stance = response.get("stance", "neutral")
verdict = response["verdict"]
stance_label = f"({stance.title()} Stance)" if stance != "neutral" else "(Neutral Analysis)"
parts.append(f"**{model_name.upper()} {stance_label}**:")
parts.append(verdict)
if i < len(individual_responses) - 1:
parts.append("")
parts.append("---")
parts.append("")
parts.append("=== END INDIVIDUAL RESPONSES ===")
parts.append("")
parts.append("Claude's Synthesis:")
# Add the actual content
parts.append(turn.content)
return parts
def _normalize_stance(self, stance: Optional[str]) -> str:
"""Normalize stance to canonical form."""
if not stance:
return "neutral"
stance = stance.lower()
# Define stance synonyms
supportive_stances = {"for", "support", "favor"}
critical_stances = {"against", "oppose", "critical"}
# Map synonyms to canonical stance
if stance in supportive_stances:
return "for"
elif stance in critical_stances:
return "against"
elif stance == "neutral":
return "neutral"
else:
# Unknown stances default to neutral for robustness
logger.warning(
f"Unknown stance '{stance}' provided, defaulting to 'neutral'. Valid stances: {', '.join(sorted(supportive_stances | critical_stances))}, or 'neutral'"
)
return "neutral"
def _validate_model_combinations(self, model_configs: list[ModelConfig]) -> tuple[list[ModelConfig], list[str]]:
"""Validate model configurations and enforce limits.
Returns:
tuple: (valid_configs, skipped_entries)
- Each model+stance combination can appear max 2 times
- Same model+stance limited to 2 instances
"""
valid_configs = []
skipped_entries = []
combination_counts = {} # Track (model, stance) -> count
for config in model_configs:
try:
# Normalize stance
normalized_stance = self._normalize_stance(config.stance)
# Create normalized config
normalized_config = ModelConfig(
model=config.model, stance=normalized_stance, stance_prompt=config.stance_prompt
)
combination_key = (config.model, normalized_stance)
current_count = combination_counts.get(combination_key, 0)
if current_count >= DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION:
# Already have max instances of this model+stance combination
skipped_entries.append(
f"{config.model}:{normalized_stance} (max {DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION} instances)"
)
continue
combination_counts[combination_key] = current_count + 1
valid_configs.append(normalized_config)
except ValueError as e:
# Invalid stance or model
skipped_entries.append(f"{config.model} ({str(e)})")
continue
return valid_configs, skipped_entries
def _get_stance_enhanced_prompt(self, stance: str, custom_stance_prompt: Optional[str] = None) -> str:
"""Get the system prompt with stance injection based on the stance."""
base_prompt = self.get_system_prompt()
# If custom stance prompt is provided, use it instead of default
if custom_stance_prompt:
# Validate stance placeholder exists exactly once
if base_prompt.count("{stance_prompt}") != 1:
raise ValueError(
"System prompt must contain exactly one '{stance_prompt}' placeholder, "
f"found {base_prompt.count('{stance_prompt}')}"
)
return base_prompt.replace("{stance_prompt}", custom_stance_prompt)
stance_prompts = {
"for": """SUPPORTIVE PERSPECTIVE WITH INTEGRITY
You are tasked with advocating FOR this proposal, but with CRITICAL GUARDRAILS:
MANDATORY ETHICAL CONSTRAINTS:
- This is NOT a debate for entertainment. You MUST act in good faith and in the best interest of the questioner
- You MUST think deeply about whether supporting this idea is safe, sound, and passes essential requirements
- You MUST be direct and unequivocal in saying "this is a bad idea" when it truly is
- There must be at least ONE COMPELLING reason to be optimistic, otherwise DO NOT support it
WHEN TO REFUSE SUPPORT (MUST OVERRIDE STANCE):
- If the idea is fundamentally harmful to users, project, or stakeholders
- If implementation would violate security, privacy, or ethical standards
- If the proposal is technically infeasible within realistic constraints
- If costs/risks dramatically outweigh any potential benefits
YOUR SUPPORTIVE ANALYSIS SHOULD:
- Identify genuine strengths and opportunities
- Propose solutions to overcome legitimate challenges
- Highlight synergies with existing systems
- Suggest optimizations that enhance value
- Present realistic implementation pathways
Remember: Being "for" means finding the BEST possible version of the idea IF it has merit, not blindly supporting bad ideas.""",
"against": """CRITICAL PERSPECTIVE WITH RESPONSIBILITY
You are tasked with critiquing this proposal, but with ESSENTIAL BOUNDARIES:
MANDATORY FAIRNESS CONSTRAINTS:
- You MUST NOT oppose genuinely excellent, common-sense ideas just to be contrarian
- You MUST acknowledge when a proposal is fundamentally sound and well-conceived
- You CANNOT give harmful advice or recommend against beneficial changes
- If the idea is outstanding, say so clearly while offering constructive refinements
WHEN TO MODERATE CRITICISM (MUST OVERRIDE STANCE):
- If the proposal addresses critical user needs effectively
- If it follows established best practices with good reason
- If benefits clearly and substantially outweigh risks
- If it's the obvious right solution to the problem
YOUR CRITICAL ANALYSIS SHOULD:
- Identify legitimate risks and failure modes
- Point out overlooked complexities
- Suggest more efficient alternatives
- Highlight potential negative consequences
- Question assumptions that may be flawed
Remember: Being "against" means rigorous scrutiny to ensure quality, not undermining good ideas that deserve support.""",
"neutral": """BALANCED PERSPECTIVE
Provide objective analysis considering both positive and negative aspects. However, if there is overwhelming evidence
that the proposal clearly leans toward being exceptionally good or particularly problematic, you MUST accurately
reflect this reality. Being "balanced" means being truthful about the weight of evidence, not artificially creating
50/50 splits when the reality is 90/10.
Your analysis should:
- Present all significant pros and cons discovered
- Weight them according to actual impact and likelihood
- If evidence strongly favors one conclusion, clearly state this
- Provide proportional coverage based on the strength of arguments
- Help the questioner see the true balance of considerations
Remember: Artificial balance that misrepresents reality is not helpful. True balance means accurate representation
of the evidence, even when it strongly points in one direction.""",
}
stance_prompt = stance_prompts.get(stance, stance_prompts["neutral"])
# Validate stance placeholder exists exactly once
if base_prompt.count("{stance_prompt}") != 1:
raise ValueError(
"System prompt must contain exactly one '{stance_prompt}' placeholder, "
f"found {base_prompt.count('{stance_prompt}')}"
)
# Inject stance into the system prompt
return base_prompt.replace("{stance_prompt}", stance_prompt)
def _get_single_response(
self, provider, model_config: ModelConfig, prompt: str, request: ConsensusRequest
) -> dict[str, Any]:
"""Get response from a single model - synchronous method."""
logger.debug(f"Getting response from {model_config.model} with stance '{model_config.stance}'")
try:
# Provider.generate_content is synchronous, not async
response = provider.generate_content(
prompt=prompt,
model_name=model_config.model,
system_prompt=self._get_stance_enhanced_prompt(model_config.stance, model_config.stance_prompt),
temperature=getattr(request, "temperature", None) or self.get_default_temperature(),
thinking_mode=getattr(request, "thinking_mode", "medium"),
images=getattr(request, "images", None) or [],
)
return {
"model": model_config.model,
"stance": model_config.stance,
"status": "success",
"verdict": response.content, # Contains structured Markdown
"metadata": {
"provider": getattr(provider.get_provider_type(), "value", provider.get_provider_type()),
"usage": response.usage if hasattr(response, "usage") else None,
"custom_stance_prompt": bool(model_config.stance_prompt),
},
}
except Exception as e:
logger.error(f"Error getting response from {model_config.model}:{model_config.stance}: {str(e)}")
return {"model": model_config.model, "stance": model_config.stance, "status": "error", "error": str(e)}
def _get_consensus_responses(
self, provider_configs: list[tuple], prompt: str, request: ConsensusRequest
) -> list[dict[str, Any]]:
"""Execute all model requests sequentially - purely synchronous like other tools."""
logger.debug(f"Processing {len(provider_configs)} models sequentially")
responses = []
for i, (provider, model_config) in enumerate(provider_configs):
try:
logger.debug(
f"Processing {model_config.model}:{model_config.stance} sequentially ({i+1}/{len(provider_configs)})"
)
# Direct synchronous call - matches pattern of other tools
response = self._get_single_response(provider, model_config, prompt, request)
responses.append(response)
except Exception as e:
logger.error(f"Failed to get response from {model_config.model}:{model_config.stance}: {str(e)}")
responses.append(
{
"model": model_config.model,
"stance": model_config.stance,
"status": "error",
"error": f"Unhandled exception: {str(e)}",
}
)
logger.debug(f"Sequential processing completed for {len(responses)} models")
return responses
def _format_consensus_output(self, responses: list[dict[str, Any]], skipped_entries: list[str]) -> str:
"""Format the consensus responses into structured output for Claude."""
logger.debug(f"Formatting consensus output for {len(responses)} responses")
# Separate successful and failed responses
successful_responses = [r for r in responses if r["status"] == "success"]
failed_responses = [r for r in responses if r["status"] == "error"]
logger.debug(f"Successful responses: {len(successful_responses)}, Failed: {len(failed_responses)}")
# Prepare the structured output (minimize size for MCP stability)
models_used = [
f"{r['model']}:{r['stance']}" if r["stance"] != "neutral" else r["model"] for r in successful_responses
]
models_errored = [
f"{r['model']}:{r['stance']}" if r["stance"] != "neutral" else r["model"] for r in failed_responses
]
# Prepare clean responses without truncation
clean_responses = []
for r in responses:
if r["status"] == "success":
clean_responses.append(
{
"model": r["model"],
"stance": r["stance"],
"status": r["status"],
"verdict": r.get("verdict", ""),
"metadata": r.get("metadata", {}),
}
)
else:
clean_responses.append(
{
"model": r["model"],
"stance": r["stance"],
"status": r["status"],
"error": r.get("error", "Unknown error"),
}
)
output_data = {
"status": "consensus_success" if successful_responses else "consensus_failed",
"models_used": models_used,
"models_skipped": skipped_entries,
"models_errored": models_errored,
"responses": clean_responses,
"next_steps": self._get_synthesis_guidance(successful_responses, failed_responses),
}
return json.dumps(output_data, indent=2)
def _get_synthesis_guidance(
self, successful_responses: list[dict[str, Any]], failed_responses: list[dict[str, Any]]
) -> str:
"""Generate guidance for Claude on how to synthesize the consensus results."""
if not successful_responses:
return (
"No models provided successful responses. Please retry with different models or "
"check the error messages for guidance on resolving the issues."
)
if len(successful_responses) == 1:
return (
"Only one model provided a successful response. Synthesize based on the available "
"perspective and indicate areas where additional expert input would be valuable "
"due to the limited consensus data."
)
# Multiple successful responses - provide comprehensive synthesis guidance
stance_counts = {"for": 0, "against": 0, "neutral": 0}
for resp in successful_responses:
stance = resp.get("stance", "neutral")
stance_counts[stance] = stance_counts.get(stance, 0) + 1
guidance = (
"Claude, synthesize these perspectives by first identifying the key points of "
"**agreement** and **disagreement** between the models. Then provide your final, "
"consolidated recommendation, explaining how you weighed the different opinions and "
"why your proposed solution is the most balanced approach. Explicitly address the "
"most critical risks raised by each model and provide actionable next steps for implementation."
)
if failed_responses:
guidance += (
f" Note: {len(failed_responses)} model(s) failed to respond - consider this "
"partial consensus and indicate where additional expert input would strengthen the analysis."
)
return guidance
async def prepare_prompt(self, request: ConsensusRequest) -> str:
"""Prepare the consensus prompt with context files and focus areas."""
# Check for prompt.txt in files
prompt_content, updated_files = self.handle_prompt_file(request.files)
# Use prompt.txt content if available, otherwise use the prompt field
user_content = prompt_content if prompt_content else request.prompt
# Check user input size at MCP transport boundary (before adding internal content)
size_check = self.check_prompt_size(user_content)
if size_check:
# Need to return error, but prepare_prompt returns str
# Use exception to handle this cleanly
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
# Update request files list
if updated_files is not None:
request.files = updated_files
# Add focus areas if specified
if request.focus_areas:
focus_areas_text = "\n\nSpecific focus areas for this analysis:\n" + "\n".join(
f"- {area}" for area in request.focus_areas
)
user_content += focus_areas_text
# Add context files if provided (using centralized file handling with filtering)
if request.files:
file_content, processed_files = self._prepare_file_content_for_prompt(
request.files, request.continuation_id, "Context files"
)
self._actually_processed_files = processed_files
if file_content:
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
# Check token limits
self._validate_token_limit(user_content, "Content")
return user_content
async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
"""Execute consensus gathering from multiple models."""
# Store arguments for base class methods
self._current_arguments = arguments
# Validate and create request
request = ConsensusRequest(**arguments)
# Validate model configurations and enforce limits
valid_configs, skipped_entries = self._validate_model_combinations(request.models)
if not valid_configs:
error_output = {
"status": "consensus_failed",
"error": "No valid model configurations after validation",
"models_skipped": skipped_entries,
"next_steps": "Please provide valid model configurations with proper model names and stance values.",
}
return [TextContent(type="text", text=json.dumps(error_output, indent=2))]
# Set up a dummy model context for consensus since we handle multiple models
# This is needed for base class methods like prepare_prompt to work
if not hasattr(self, "_model_context") or not self._model_context:
from utils.model_context import ModelContext
# Use the first model as the representative for token calculations
first_model = valid_configs[0].model if valid_configs else "flash"
self._model_context = ModelContext(first_model)
# Handle conversation continuation if specified
if request.continuation_id:
from utils.conversation_memory import build_conversation_history, get_thread
thread_context = get_thread(request.continuation_id)
if thread_context:
# Build conversation history using the same pattern as other tools
conversation_context, _ = build_conversation_history(thread_context, self._model_context)
if conversation_context:
# Add conversation context to the beginning of the prompt
enhanced_prompt = f"{conversation_context}\n\n{request.prompt}"
request.prompt = enhanced_prompt
# Prepare the consensus prompt
consensus_prompt = await self.prepare_prompt(request)
# Get providers for valid model configurations with caching to avoid duplicate lookups
provider_configs = []
provider_cache = {} # Cache to avoid duplicate provider lookups
for model_config in valid_configs:
try:
# Check cache first
if model_config.model in provider_cache:
provider = provider_cache[model_config.model]
else:
# Look up provider and cache it
provider = self.get_model_provider(model_config.model)
provider_cache[model_config.model] = provider
provider_configs.append((provider, model_config))
except Exception as e:
# Track failed models
model_display = (
f"{model_config.model}:{model_config.stance}"
if model_config.stance != "neutral"
else model_config.model
)
skipped_entries.append(f"{model_display} (provider not available: {str(e)})")
if not provider_configs:
error_output = {
"status": "consensus_failed",
"error": "No model providers available",
"models_skipped": skipped_entries,
"next_steps": "Please check that the specified models have configured API keys and are available.",
}
return [TextContent(type="text", text=json.dumps(error_output, indent=2))]
# Send to all models sequentially (purely synchronous like other tools)
logger.debug(f"Sending consensus request to {len(provider_configs)} models")
responses = self._get_consensus_responses(provider_configs, consensus_prompt, request)
logger.debug(f"Received {len(responses)} responses from consensus models")
# Enforce minimum success requirement - must have at least 1 successful response
successful_responses = [r for r in responses if r["status"] == "success"]
if not successful_responses:
error_output = {
"status": "consensus_failed",
"error": "All model calls failed - no successful responses received",
"models_skipped": skipped_entries,
"models_errored": [
f"{r['model']}:{r['stance']}" if r["stance"] != "neutral" else r["model"]
for r in responses
if r["status"] == "error"
],
"next_steps": "Please retry with different models or check the error messages for guidance on resolving the issues.",
}
return [TextContent(type="text", text=json.dumps(error_output, indent=2))]
logger.debug("About to format consensus output for MCP response")
# Structure the output and store in conversation memory
consensus_output = self._format_consensus_output(responses, skipped_entries)
# Log response size for debugging
output_size = len(consensus_output)
logger.debug(f"Consensus output size: {output_size:,} characters")
# Store in conversation memory if continuation_id is provided
if request.continuation_id:
self.store_conversation_turn(
request.continuation_id,
consensus_output,
request.files,
request.images,
responses, # Store individual responses in metadata
skipped_entries,
)
return [TextContent(type="text", text=consensus_output)]
def store_conversation_turn(
self,
continuation_id: str,
output: str,
files: list[str],
images: list[str],
responses: list[dict[str, Any]],
skipped_entries: list[str],
):
"""Store consensus turn in conversation memory with special metadata."""
from utils.conversation_memory import add_turn
# Filter successful and failed responses
successful_responses = [r for r in responses if r["status"] == "success"]
failed_responses = [r for r in responses if r["status"] == "error"]
# Prepare metadata for conversation storage
metadata = {
"tool_type": "consensus",
"models_used": [r["model"] for r in successful_responses],
"models_skipped": skipped_entries,
"models_errored": [r["model"] for r in failed_responses],
"individual_responses": successful_responses, # Only store successful responses
}
# Store the turn with special consensus metadata - add_turn is synchronous
add_turn(
thread_id=continuation_id,
role="assistant",
content=output,
files=files or [],
images=images or [],
tool_name="consensus",
model_provider="consensus", # Special provider name
model_name="consensus", # Special model name
model_metadata=metadata,
)