Add Consensus Tool for Multi-Model Perspective Gathering (#67)
* WIP Refactor resolving mode_names, should be done once at MCP call boundary Pass around model context instead Consensus tool allows one to get a consensus from multiple models, optionally assigning one a 'for' or 'against' stance to find nuanced responses. * Deduplication of model resolution, model_context should be available before reaching deeper parts of the code Improved abstraction when building conversations Throw programmer errors early * Guardrails Support for `model:option` format at MCP boundary so future tools can use additional options if needed instead of handling this only for consensus Model name now supports an optional ":option" for future use * Simplified async flow * Improved model for request to support natural language Simplified async flow * Improved model for request to support natural language Simplified async flow * Fix consensus tool async/sync patterns to match codebase standards CRITICAL FIXES: - Converted _get_consensus_responses from async to sync (matches other tools) - Converted store_conversation_turn from async to sync (add_turn is synchronous) - Removed unnecessary asyncio imports and sleep calls - Fixed ClosedResourceError in MCP protocol during long consensus operations PATTERN ALIGNMENT: - Consensus tool now follows same sync patterns as all other tools - Only execute() and prepare_prompt() are async (base class requirement) - All internal operations are synchronous like analyze, chat, debug, etc. TESTING: - MCP simulation test now passes: consensus_stance ✅ - Two-model consensus works correctly in ~35 seconds - Unknown stance handling defaults to neutral with warnings - All 9 unit tests pass (100% success rate) The consensus tool async patterns were anomalous in the codebase. This fix aligns it with the established synchronous patterns used by all other tools while maintaining full functionality. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed call order and added new test * Cleanup dead comments Docs for the new tool Improved tests --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
9b98df650b
commit
95556ba9ea
135
server.py
135
server.py
@@ -25,7 +25,7 @@ import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
from mcp.server import Server
|
||||
from mcp.server.models import InitializationOptions
|
||||
@@ -50,6 +50,7 @@ from tools import (
|
||||
AnalyzeTool,
|
||||
ChatTool,
|
||||
CodeReviewTool,
|
||||
ConsensusTool,
|
||||
DebugIssueTool,
|
||||
ListModelsTool,
|
||||
Precommit,
|
||||
@@ -157,6 +158,7 @@ TOOLS = {
|
||||
"debug": DebugIssueTool(), # Root cause analysis and debugging assistance
|
||||
"analyze": AnalyzeTool(), # General-purpose file and code analysis
|
||||
"chat": ChatTool(), # Interactive development chat and brainstorming
|
||||
"consensus": ConsensusTool(), # Multi-model consensus for diverse perspectives on technical proposals
|
||||
"listmodels": ListModelsTool(), # List all available AI models by provider
|
||||
"precommit": Precommit(), # Pre-commit validation of git changes
|
||||
"testgen": TestGenerationTool(), # Comprehensive test generation with edge case coverage
|
||||
@@ -519,6 +521,78 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
||||
if name in TOOLS:
|
||||
logger.info(f"Executing tool '{name}' with {len(arguments)} parameter(s)")
|
||||
tool = TOOLS[name]
|
||||
|
||||
# EARLY MODEL RESOLUTION AT MCP BOUNDARY
|
||||
# Resolve model before passing to tool - this ensures consistent model handling
|
||||
# NOTE: Consensus tool is exempt as it handles multiple models internally
|
||||
from providers.registry import ModelProviderRegistry
|
||||
from utils.file_utils import check_total_file_size
|
||||
from utils.model_context import ModelContext
|
||||
|
||||
# Get model from arguments or use default
|
||||
model_name = arguments.get("model") or DEFAULT_MODEL
|
||||
logger.debug(f"Initial model for {name}: {model_name}")
|
||||
|
||||
# Parse model:option format if present
|
||||
model_name, model_option = parse_model_option(model_name)
|
||||
if model_option:
|
||||
logger.debug(f"Parsed model format - model: '{model_name}', option: '{model_option}'")
|
||||
|
||||
# Consensus tool handles its own model configuration validation
|
||||
# No special handling needed at server level
|
||||
|
||||
# Handle auto mode at MCP boundary - resolve to specific model
|
||||
if model_name.lower() == "auto":
|
||||
# Get tool category to determine appropriate model
|
||||
tool_category = tool.get_model_category()
|
||||
resolved_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
|
||||
logger.info(f"Auto mode resolved to {resolved_model} for {name} (category: {tool_category.value})")
|
||||
model_name = resolved_model
|
||||
# Update arguments with resolved model
|
||||
arguments["model"] = model_name
|
||||
|
||||
# Validate model availability at MCP boundary
|
||||
provider = ModelProviderRegistry.get_provider_for_model(model_name)
|
||||
if not provider:
|
||||
# Get list of available models for error message
|
||||
available_models = list(ModelProviderRegistry.get_available_models(respect_restrictions=True).keys())
|
||||
tool_category = tool.get_model_category()
|
||||
suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
|
||||
|
||||
error_message = (
|
||||
f"Model '{model_name}' is not available with current API keys. "
|
||||
f"Available models: {', '.join(available_models)}. "
|
||||
f"Suggested model for {name}: '{suggested_model}' "
|
||||
f"(category: {tool_category.value})"
|
||||
)
|
||||
error_output = ToolOutput(
|
||||
status="error",
|
||||
content=error_message,
|
||||
content_type="text",
|
||||
metadata={"tool_name": name, "requested_model": model_name},
|
||||
)
|
||||
return [TextContent(type="text", text=error_output.model_dump_json())]
|
||||
|
||||
# Create model context with resolved model and option
|
||||
model_context = ModelContext(model_name, model_option)
|
||||
arguments["_model_context"] = model_context
|
||||
arguments["_resolved_model_name"] = model_name
|
||||
logger.debug(
|
||||
f"Model context created for {model_name} with {model_context.capabilities.context_window} token capacity"
|
||||
)
|
||||
if model_option:
|
||||
logger.debug(f"Model option stored in context: '{model_option}'")
|
||||
|
||||
# EARLY FILE SIZE VALIDATION AT MCP BOUNDARY
|
||||
# Check file sizes before tool execution using resolved model
|
||||
if "files" in arguments and arguments["files"]:
|
||||
logger.debug(f"Checking file sizes for {len(arguments['files'])} files with model {model_name}")
|
||||
file_size_check = check_total_file_size(arguments["files"], model_name)
|
||||
if file_size_check:
|
||||
logger.warning(f"File size check failed for {name} with model {model_name}")
|
||||
return [TextContent(type="text", text=ToolOutput(**file_size_check).model_dump_json())]
|
||||
|
||||
# Execute tool with pre-resolved model context
|
||||
result = await tool.execute(arguments)
|
||||
logger.info(f"Tool '{name}' execution completed")
|
||||
|
||||
@@ -542,6 +616,24 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
||||
return [TextContent(type="text", text=f"Unknown tool: {name}")]
|
||||
|
||||
|
||||
def parse_model_option(model_string: str) -> tuple[str, Optional[str]]:
|
||||
"""
|
||||
Parse model:option format into model name and option.
|
||||
|
||||
Args:
|
||||
model_string: String that may contain "model:option" format
|
||||
|
||||
Returns:
|
||||
tuple: (model_name, option) where option may be None
|
||||
"""
|
||||
if ":" in model_string and not model_string.startswith("http"): # Avoid parsing URLs
|
||||
parts = model_string.split(":", 1)
|
||||
model_name = parts[0].strip()
|
||||
model_option = parts[1].strip() if len(parts) > 1 else None
|
||||
return model_name, model_option
|
||||
return model_string.strip(), None
|
||||
|
||||
|
||||
def get_follow_up_instructions(current_turn_count: int, max_turns: int = None) -> str:
|
||||
"""
|
||||
Generate dynamic follow-up instructions based on conversation turn count.
|
||||
@@ -708,7 +800,12 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
||||
# Capture files referenced in this turn
|
||||
user_files = arguments.get("files", [])
|
||||
logger.debug(f"[CONVERSATION_DEBUG] Adding user turn to thread {continuation_id}")
|
||||
logger.debug(f"[CONVERSATION_DEBUG] User prompt length: {len(user_prompt)} chars")
|
||||
from utils.token_utils import estimate_tokens
|
||||
|
||||
user_prompt_tokens = estimate_tokens(user_prompt)
|
||||
logger.debug(
|
||||
f"[CONVERSATION_DEBUG] User prompt length: {len(user_prompt)} chars (~{user_prompt_tokens:,} tokens)"
|
||||
)
|
||||
logger.debug(f"[CONVERSATION_DEBUG] User files: {user_files}")
|
||||
success = add_turn(continuation_id, "user", user_prompt, files=user_files)
|
||||
if not success:
|
||||
@@ -728,7 +825,9 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
||||
logger.debug(f"[CONVERSATION_DEBUG] Using model: {model_context.model_name}")
|
||||
conversation_history, conversation_tokens = build_conversation_history(context, model_context)
|
||||
logger.debug(f"[CONVERSATION_DEBUG] Conversation history built: {conversation_tokens:,} tokens")
|
||||
logger.debug(f"[CONVERSATION_DEBUG] Conversation history length: {len(conversation_history)} chars")
|
||||
logger.debug(
|
||||
f"[CONVERSATION_DEBUG] Conversation history length: {len(conversation_history)} chars (~{conversation_tokens:,} tokens)"
|
||||
)
|
||||
|
||||
# Add dynamic follow-up instructions based on turn count
|
||||
follow_up_instructions = get_follow_up_instructions(len(context.turns))
|
||||
@@ -737,7 +836,10 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
||||
# All tools now use standardized 'prompt' field
|
||||
original_prompt = arguments.get("prompt", "")
|
||||
logger.debug("[CONVERSATION_DEBUG] Extracting user input from 'prompt' field")
|
||||
logger.debug(f"[CONVERSATION_DEBUG] User input length: {len(original_prompt)} chars")
|
||||
original_prompt_tokens = estimate_tokens(original_prompt) if original_prompt else 0
|
||||
logger.debug(
|
||||
f"[CONVERSATION_DEBUG] User input length: {len(original_prompt)} chars (~{original_prompt_tokens:,} tokens)"
|
||||
)
|
||||
|
||||
# Merge original context with new prompt and follow-up instructions
|
||||
if conversation_history:
|
||||
@@ -963,9 +1065,10 @@ async def handle_get_prompt(name: str, arguments: dict[str, Any] = None) -> GetP
|
||||
"""
|
||||
logger.debug(f"MCP client requested prompt: {name} with args: {arguments}")
|
||||
|
||||
# Parse structured prompt names like "chat:o3" or "chat:continue"
|
||||
# Parse structured prompt names like "chat:o3", "chat:continue", or "consensus:flash:for,o3:against,pro:neutral"
|
||||
parsed_model = None
|
||||
is_continuation = False
|
||||
consensus_models = None
|
||||
base_name = name
|
||||
|
||||
if ":" in name:
|
||||
@@ -977,6 +1080,10 @@ async def handle_get_prompt(name: str, arguments: dict[str, Any] = None) -> GetP
|
||||
if second_part.lower() == "continue":
|
||||
is_continuation = True
|
||||
logger.debug(f"Parsed continuation prompt: tool='{base_name}', continue=True")
|
||||
elif base_name == "consensus" and "," in second_part:
|
||||
# Handle consensus tool format: "consensus:flash:for,o3:against,pro:neutral"
|
||||
consensus_models = ConsensusTool.parse_structured_prompt_models(second_part)
|
||||
logger.debug(f"Parsed consensus prompt with models: {consensus_models}")
|
||||
else:
|
||||
parsed_model = second_part
|
||||
logger.debug(f"Parsed structured prompt: tool='{base_name}', model='{parsed_model}'")
|
||||
@@ -1046,6 +1153,18 @@ async def handle_get_prompt(name: str, arguments: dict[str, Any] = None) -> GetP
|
||||
else:
|
||||
# "/zen:chat:continue" case
|
||||
tool_instruction = f"Continue the previous conversation using the {tool_name} tool"
|
||||
elif consensus_models:
|
||||
# "/zen:consensus:flash:for,o3:against,pro:neutral" case
|
||||
model_descriptions = []
|
||||
for model_config in consensus_models:
|
||||
if model_config["stance"] != "neutral":
|
||||
model_descriptions.append(f"{model_config['model']} with {model_config['stance']} stance")
|
||||
else:
|
||||
model_descriptions.append(f"{model_config['model']} with neutral stance")
|
||||
|
||||
models_text = ", ".join(model_descriptions)
|
||||
models_json = str(consensus_models).replace("'", '"') # Convert to JSON-like format for Claude
|
||||
tool_instruction = f"Use the {tool_name} tool with models: {models_text}. Call the consensus tool with prompt='debate this proposal' and models={models_json}"
|
||||
elif parsed_model:
|
||||
# "/zen:chat:o3" case
|
||||
tool_instruction = f"Use the {tool_name} tool with model '{parsed_model}'"
|
||||
@@ -1117,4 +1236,8 @@ async def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except KeyboardInterrupt:
|
||||
# Handle graceful shutdown
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user