Add Consensus Tool for Multi-Model Perspective Gathering (#67)

* WIP
Refactor resolving mode_names, should be done once at MCP call boundary
Pass around model context instead
Consensus tool allows one to get a consensus from multiple models, optionally assigning one a 'for' or 'against' stance to find nuanced responses.

* Deduplication of model resolution, model_context should be available before reaching deeper parts of the code
Improved abstraction when building conversations
Throw programmer errors early

* Guardrails
Support for `model:option` format at MCP boundary so future tools can use additional options if needed instead of handling this only for consensus
Model name now supports an optional ":option" for future use

* Simplified async flow

* Improved model for request to support natural language
Simplified async flow

* Improved model for request to support natural language
Simplified async flow

* Fix consensus tool async/sync patterns to match codebase standards

CRITICAL FIXES:
- Converted _get_consensus_responses from async to sync (matches other tools)
- Converted store_conversation_turn from async to sync (add_turn is synchronous)
- Removed unnecessary asyncio imports and sleep calls
- Fixed ClosedResourceError in MCP protocol during long consensus operations

PATTERN ALIGNMENT:
- Consensus tool now follows same sync patterns as all other tools
- Only execute() and prepare_prompt() are async (base class requirement)
- All internal operations are synchronous like analyze, chat, debug, etc.

TESTING:
- MCP simulation test now passes: consensus_stance 
- Two-model consensus works correctly in ~35 seconds
- Unknown stance handling defaults to neutral with warnings
- All 9 unit tests pass (100% success rate)

The consensus tool async patterns were anomalous in the codebase.
This fix aligns it with the established synchronous patterns used
by all other tools while maintaining full functionality.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Fixed call order and added new test

* Cleanup dead comments
Docs for the new tool
Improved tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Beehive Innovations
2025-06-17 10:53:17 +04:00
committed by GitHub
parent 9b98df650b
commit 95556ba9ea
31 changed files with 2643 additions and 324 deletions

View File

@@ -5,6 +5,7 @@ Tool implementations for Zen MCP Server
from .analyze import AnalyzeTool
from .chat import ChatTool
from .codereview import CodeReviewTool
from .consensus import ConsensusTool
from .debug import DebugIssueTool
from .listmodels import ListModelsTool
from .precommit import Precommit
@@ -19,6 +20,7 @@ __all__ = [
"DebugIssueTool",
"AnalyzeTool",
"ChatTool",
"ConsensusTool",
"ListModelsTool",
"Precommit",
"RefactorTool",

View File

@@ -141,13 +141,7 @@ class AnalyzeTool(BaseTool):
if updated_files is not None:
request.files = updated_files
# MCP boundary check - STRICT REJECTION
if request.files:
file_size_check = self.check_total_file_size(request.files)
if file_size_check:
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
# File size validation happens at MCP boundary in server.py
# Use centralized file processing logic
continuation_id = getattr(request, "continuation_id", None)

View File

@@ -31,6 +31,7 @@ from providers.base import ProviderType
from utils import check_token_limit
from utils.conversation_memory import (
MAX_CONVERSATION_TURNS,
ConversationTurn,
add_turn,
create_thread,
get_conversation_file_list,
@@ -643,6 +644,41 @@ class BaseTool(ABC):
)
return requested_files
def format_conversation_turn(self, turn: ConversationTurn) -> list[str]:
"""
Format a conversation turn for display in conversation history.
Tools can override this to provide custom formatting for their responses
while maintaining the standard structure for cross-tool compatibility.
This method is called by build_conversation_history when reconstructing
conversation context, allowing each tool to control how its responses
appear in subsequent conversation turns.
Args:
turn: The conversation turn to format (from utils.conversation_memory)
Returns:
list[str]: Lines of formatted content for this turn
Example:
Default implementation returns:
["Files used in this turn: file1.py, file2.py", "", "Response content..."]
Tools can override to add custom sections, formatting, or metadata display.
"""
parts = []
# Add files context if present
if turn.files:
parts.append(f"Files used in this turn: {', '.join(turn.files)}")
parts.append("") # Empty line for readability
# Add the actual content
parts.append(turn.content)
return parts
def _prepare_file_content_for_prompt(
self,
request_files: list[str],
@@ -716,109 +752,35 @@ class BaseTool(ABC):
elif max_tokens is not None:
effective_max_tokens = max_tokens - reserve_tokens
else:
# Get model-specific limits
# First check if model_context was passed from server.py
model_context = None
if arguments:
model_context = arguments.get("_model_context") or getattr(self, "_current_arguments", {}).get(
"_model_context"
# The execute() method is responsible for setting self._model_context.
# A missing context is a programming error, not a fallback case.
if not hasattr(self, "_model_context") or not self._model_context:
logger.error(
f"[FILES] {self.name}: _prepare_file_content_for_prompt called without a valid model context. "
"This indicates an incorrect call sequence in the tool's implementation."
)
# Fail fast to reveal integration issues. A silent fallback with arbitrary
# limits can hide bugs and lead to unexpected token usage or silent failures.
raise RuntimeError("ModelContext not initialized before file preparation.")
if model_context:
# Use the passed model context
try:
token_allocation = model_context.calculate_token_allocation()
effective_max_tokens = token_allocation.file_tokens - reserve_tokens
logger.debug(
f"[FILES] {self.name}: Using passed model context for {model_context.model_name}: "
f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total"
)
except Exception as e:
logger.warning(f"[FILES] {self.name}: Error using passed model context: {e}")
# Fall through to manual calculation
model_context = None
if not model_context:
# Manual calculation as fallback
from config import DEFAULT_MODEL
model_name = getattr(self, "_current_model_name", None) or DEFAULT_MODEL
# Handle auto mode gracefully
if model_name.lower() == "auto":
from providers.registry import ModelProviderRegistry
# Use tool-specific fallback model for capacity estimation
# This properly handles different providers (OpenAI=200K, Gemini=1M)
tool_category = self.get_model_category()
fallback_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
logger.debug(
f"[FILES] {self.name}: Auto mode detected, using {fallback_model} "
f"for {tool_category.value} tool capacity estimation"
)
try:
provider = self.get_model_provider(fallback_model)
capabilities = provider.get_capabilities(fallback_model)
# Calculate content allocation based on model capacity
if capabilities.context_window < 300_000:
# Smaller context models: 60% content, 40% response
model_content_tokens = int(capabilities.context_window * 0.6)
else:
# Larger context models: 80% content, 20% response
model_content_tokens = int(capabilities.context_window * 0.8)
effective_max_tokens = model_content_tokens - reserve_tokens
logger.debug(
f"[FILES] {self.name}: Using {fallback_model} capacity for auto mode: "
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
)
except (ValueError, AttributeError) as e:
# Handle specific errors: provider not found, model not supported, missing attributes
logger.warning(
f"[FILES] {self.name}: Could not get capabilities for fallback model {fallback_model}: {type(e).__name__}: {e}"
)
# Fall back to conservative default for safety
effective_max_tokens = 100_000 - reserve_tokens
except Exception as e:
# Catch any other unexpected errors
logger.error(
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
)
effective_max_tokens = 100_000 - reserve_tokens
else:
# Normal mode - use the specified model
try:
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
# Calculate content allocation based on model capacity
if capabilities.context_window < 300_000:
# Smaller context models: 60% content, 40% response
model_content_tokens = int(capabilities.context_window * 0.6)
else:
# Larger context models: 80% content, 20% response
model_content_tokens = int(capabilities.context_window * 0.8)
effective_max_tokens = model_content_tokens - reserve_tokens
logger.debug(
f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
f"{model_content_tokens:,} content tokens from {capabilities.context_window:,} total"
)
except (ValueError, AttributeError) as e:
# Handle specific errors: provider not found, model not supported, missing attributes
logger.warning(
f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}"
)
# Fall back to conservative default for safety
effective_max_tokens = 100_000 - reserve_tokens
except Exception as e:
# Catch any other unexpected errors
logger.error(
f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}"
)
effective_max_tokens = 100_000 - reserve_tokens
# This is now the single source of truth for token allocation.
model_context = self._model_context
try:
token_allocation = model_context.calculate_token_allocation()
# Standardize on `file_tokens` for consistency and correctness.
# This fixes the bug where the old code incorrectly used content_tokens
effective_max_tokens = token_allocation.file_tokens - reserve_tokens
logger.debug(
f"[FILES] {self.name}: Using model context for {model_context.model_name}: "
f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total"
)
except Exception as e:
logger.error(
f"[FILES] {self.name}: Failed to calculate token allocation from model context: {e}", exc_info=True
)
# If the context exists but calculation fails, we still need to prevent a crash.
# A loud error is logged, and we fall back to a safe default.
effective_max_tokens = 100_000 - reserve_tokens
# Ensure we have a reasonable minimum budget
effective_max_tokens = max(1000, effective_max_tokens)
@@ -1087,8 +1049,14 @@ When recommending searches, be specific about what information you need and why
# Get model capabilities to check image support and size limits
try:
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
# Use the already-resolved provider from model context if available
if hasattr(self, "_model_context") and self._model_context:
provider = self._model_context.provider
capabilities = self._model_context.capabilities
else:
# Fallback for edge cases (e.g., direct test calls)
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
except Exception as e:
logger.warning(f"Failed to get capabilities for model {model_name}: {e}")
# Fall back to checking custom models configuration
@@ -1214,7 +1182,7 @@ When recommending searches, be specific about what information you need and why
return estimate_file_tokens(file_path)
def check_total_file_size(self, files: list[str]) -> Optional[dict[str, Any]]:
def check_total_file_size(self, files: list[str], model_name: str) -> Optional[dict[str, Any]]:
"""
Check if total file sizes would exceed token threshold before embedding.
@@ -1224,6 +1192,7 @@ When recommending searches, be specific about what information you need and why
Args:
files: List of file paths to check
model_name: The resolved model name to use for token limits
Returns:
Dict with `code_too_large` response if too large, None if acceptable
@@ -1231,13 +1200,6 @@ When recommending searches, be specific about what information you need and why
if not files:
return None
# Get current model name for context-aware thresholds
model_name = getattr(self, "_current_model_name", None)
if not model_name:
from config import DEFAULT_MODEL
model_name = DEFAULT_MODEL
# Use centralized file size checking with model context
from utils.file_utils import check_total_file_size as check_file_size_utility
@@ -1353,6 +1315,65 @@ When recommending searches, be specific about what information you need and why
# Extract and validate images from request
images = getattr(request, "images", None) or []
# MODEL RESOLUTION NOW HAPPENS AT MCP BOUNDARY
# Extract pre-resolved model context from server.py
model_context = self._current_arguments.get("_model_context")
resolved_model_name = self._current_arguments.get("_resolved_model_name")
if model_context and resolved_model_name:
# Model was already resolved at MCP boundary
model_name = resolved_model_name
logger.debug(f"Using pre-resolved model '{model_name}' from MCP boundary")
else:
# Fallback for direct execute calls
model_name = getattr(request, "model", None)
if not model_name:
from config import DEFAULT_MODEL
model_name = DEFAULT_MODEL
logger.debug(f"Using fallback model resolution for '{model_name}' (test mode)")
# For tests: Check if we should require model selection (auto mode)
if self._should_require_model_selection(model_name):
# Get suggested model based on tool category
from providers.registry import ModelProviderRegistry
tool_category = self.get_model_category()
suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
# Build error message based on why selection is required
if model_name.lower() == "auto":
error_message = (
f"Model parameter is required in auto mode. "
f"Suggested model for {self.name}: '{suggested_model}' "
f"(category: {tool_category.value})"
)
else:
# Model was specified but not available
available_models = self._get_available_models()
error_message = (
f"Model '{model_name}' is not available with current API keys. "
f"Available models: {', '.join(available_models)}. "
f"Suggested model for {self.name}: '{suggested_model}' "
f"(category: {tool_category.value})"
)
error_output = ToolOutput(
status="error",
content=error_message,
content_type="text",
)
return [TextContent(type="text", text=error_output.model_dump_json())]
# Create model context for tests
from utils.model_context import ModelContext
model_context = ModelContext(model_name)
# Store resolved model name for use by helper methods
self._current_model_name = model_name
self._model_context = model_context
# Check if we have continuation_id - if so, conversation history is already embedded
continuation_id = getattr(request, "continuation_id", None)
@@ -1389,57 +1410,11 @@ When recommending searches, be specific about what information you need and why
prompt = f"{prompt}\n\n{follow_up_instructions}"
logger.debug(f"Added follow-up instructions for new {self.name} conversation")
# Extract model configuration from request or use defaults
model_name = getattr(request, "model", None)
if not model_name:
from config import DEFAULT_MODEL
model_name = DEFAULT_MODEL
# Check if we need Claude to select a model
# This happens when:
# 1. The model is explicitly "auto"
# 2. The requested model is not available
if self._should_require_model_selection(model_name):
# Get suggested model based on tool category
from providers.registry import ModelProviderRegistry
tool_category = self.get_model_category()
suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
# Build error message based on why selection is required
if model_name.lower() == "auto":
error_message = (
f"Model parameter is required in auto mode. "
f"Suggested model for {self.name}: '{suggested_model}' "
f"(category: {tool_category.value})"
)
else:
# Model was specified but not available
# Get list of available models
available_models = self._get_available_models()
error_message = (
f"Model '{model_name}' is not available with current API keys. "
f"Available models: {', '.join(available_models)}. "
f"Suggested model for {self.name}: '{suggested_model}' "
f"(category: {tool_category.value})"
)
error_output = ToolOutput(
status="error",
content=error_message,
content_type="text",
)
return [TextContent(type="text", text=error_output.model_dump_json())]
# Store model name for use by helper methods like _prepare_file_content_for_prompt
# Only set this after auto mode validation to prevent "auto" being used as a model name
self._current_model_name = model_name
# Model name already resolved and stored in self._current_model_name earlier
# Validate images at MCP boundary if any were provided
if images:
image_validation_error = self._validate_image_limits(images, model_name, continuation_id)
image_validation_error = self._validate_image_limits(images, self._current_model_name, continuation_id)
if image_validation_error:
return [TextContent(type="text", text=json.dumps(image_validation_error))]
@@ -1451,10 +1426,10 @@ When recommending searches, be specific about what information you need and why
thinking_mode = self.get_default_thinking_mode()
# Get the appropriate model provider
provider = self.get_model_provider(model_name)
provider = self.get_model_provider(self._current_model_name)
# Validate and correct temperature for this model
temperature, temp_warnings = self._validate_and_correct_temperature(model_name, temperature)
temperature, temp_warnings = self._validate_and_correct_temperature(self._current_model_name, temperature)
# Log any temperature corrections
for warning in temp_warnings:
@@ -1465,16 +1440,21 @@ When recommending searches, be specific about what information you need and why
# Generate AI response using the provider
logger.info(f"Sending request to {provider.get_provider_type().value} API for {self.name}")
logger.info(f"Using model: {model_name} via {provider.get_provider_type().value} provider")
logger.debug(f"Prompt length: {len(prompt)} characters")
logger.info(f"Using model: {self._current_model_name} via {provider.get_provider_type().value} provider")
# Import token estimation utility
from utils.token_utils import estimate_tokens
estimated_tokens = estimate_tokens(prompt)
logger.debug(f"Prompt length: {len(prompt)} characters (~{estimated_tokens:,} tokens)")
# Generate content with provider abstraction
model_response = provider.generate_content(
prompt=prompt,
model_name=model_name,
model_name=self._current_model_name,
system_prompt=system_prompt,
temperature=temperature,
thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None,
thinking_mode=thinking_mode if provider.supports_thinking_mode(self._current_model_name) else None,
images=images if images else None, # Pass images via kwargs
)
@@ -1486,7 +1466,11 @@ When recommending searches, be specific about what information you need and why
# Parse response to check for clarification requests or format output
# Pass model info for conversation tracking
model_info = {"provider": provider, "model_name": model_name, "model_response": model_response}
model_info = {
"provider": provider,
"model_name": self._current_model_name,
"model_response": model_response,
}
tool_output = self._parse_response(raw_text, request, model_info)
logger.info(f"{self.name} tool completed successfully")
@@ -1894,8 +1878,14 @@ When recommending searches, be specific about what information you need and why
Tuple of (corrected_temperature, warning_messages)
"""
try:
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
# Use the already-resolved provider and capabilities from model context
if hasattr(self, "_model_context") and self._model_context:
capabilities = self._model_context.capabilities
else:
# Fallback for edge cases (e.g., direct test calls)
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
constraint = capabilities.temperature_constraint
warnings = []

View File

@@ -227,13 +227,7 @@ class CodeReviewTool(BaseTool):
if updated_files is not None:
request.files = updated_files
# MCP boundary check - STRICT REJECTION
if request.files:
file_size_check = self.check_total_file_size(request.files)
if file_size_check:
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
# File size validation happens at MCP boundary in server.py
# Check user input size at MCP transport boundary (before adding internal content)
user_content = request.prompt

846
tools/consensus.py Normal file
View File

@@ -0,0 +1,846 @@
"""
Consensus tool for multi-model perspective gathering and validation
"""
import json
import logging
from typing import TYPE_CHECKING, Any, Optional
from mcp.types import TextContent
from pydantic import BaseModel, Field, field_validator
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from config import DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION
from systemprompts import CONSENSUS_PROMPT
from .base import BaseTool, ToolRequest
logger = logging.getLogger(__name__)
class ModelConfig(BaseModel):
"""Enhanced model configuration for consensus tool"""
model: str = Field(..., description="Model name to use (e.g., 'o3', 'flash', 'pro')")
stance: Optional[str] = Field(
default="neutral",
description=(
"Stance for this model. Supportive: 'for', 'support', 'favor'. "
"Critical: 'against', 'oppose', 'critical'. Neutral: 'neutral'. "
"Defaults to 'neutral'."
),
)
stance_prompt: Optional[str] = Field(
default=None,
description=(
"Custom stance-specific instructions for this model. "
"If provided, this will be used instead of the default stance prompt. "
"Should be clear, specific instructions about how this model should approach the analysis."
),
)
class ConsensusRequest(ToolRequest):
"""Request model for consensus tool"""
prompt: str = Field(
...,
description=(
"Description of what to get consensus on, testing objectives, and specific scope/focus areas. "
"Be as detailed as possible about the proposal, plan, or idea you want multiple perspectives on."
),
)
models: list[ModelConfig] = Field(
...,
description=(
"List of model configurations for consensus analysis. Each model can have a specific stance and custom instructions. "
"Example: [{'model': 'o3', 'stance': 'for', 'stance_prompt': 'Focus on benefits and opportunities...'}, "
"{'model': 'flash', 'stance': 'against', 'stance_prompt': 'Identify risks and challenges...'}]. "
"Maximum 2 instances per model+stance combination."
),
)
files: Optional[list[str]] = Field(
default_factory=list,
description="Optional files or directories for additional context (must be absolute paths)",
)
images: Optional[list[str]] = Field(
default_factory=list,
description=(
"Optional images showing expected UI changes, design requirements, "
"or visual references for the consensus analysis"
),
)
focus_areas: Optional[list[str]] = Field(
default_factory=list,
description="Specific aspects to focus on (e.g., 'performance', 'security', 'user experience')",
)
@field_validator("models")
@classmethod
def validate_models_not_empty(cls, v):
if not v:
raise ValueError("At least one model must be specified")
return v
class ConsensusTool(BaseTool):
"""Multi-model consensus tool for gathering diverse perspectives on technical proposals"""
def __init__(self):
super().__init__()
@staticmethod
def parse_structured_prompt_models(model_spec: str) -> list[dict[str, str]]:
"""
Parse consensus model specification from structured prompt format.
This method parses structured prompt specifications used in Claude Code shortcuts
like "/zen:consensus:flash:for,o3:against,pro:neutral" to extract model configurations
with their assigned stances.
Supported formats:
- "model:stance" - Explicit stance assignment (e.g., "flash:for", "o3:against")
- "model" - Defaults to neutral stance (e.g., "pro" becomes "pro:neutral")
Supported stances:
- Supportive: "for", "support", "favor"
- Critical: "against", "oppose", "critical"
- Neutral: "neutral" (default)
Args:
model_spec (str): Comma-separated model specification string.
Examples: "flash:for,o3:against,pro:neutral" or "flash:for,o3:against,pro"
Returns:
list[dict[str, str]]: List of model configuration dictionaries with keys:
- "model": The model name (e.g., "flash", "o3", "pro")
- "stance": The normalized stance (e.g., "for", "against", "neutral")
Examples:
>>> ConsensusTool.parse_structured_prompt_models("flash:for,o3:against,pro")
[{"model": "flash", "stance": "for"}, {"model": "o3", "stance": "against"}, {"model": "pro", "stance": "neutral"}]
>>> ConsensusTool.parse_structured_prompt_models("flash,o3,pro")
[{"model": "flash", "stance": "neutral"}, {"model": "o3", "stance": "neutral"}, {"model": "pro", "stance": "neutral"}]
"""
models = []
# Split by comma to get individual model specs
model_parts = model_spec.split(",")
for part in model_parts:
part = part.strip()
if ":" in part:
# Model with stance: "flash:for" or "o3:against"
model_name, stance = part.split(":", 1)
models.append({"model": model_name.strip(), "stance": stance.strip()})
else:
# Model without stance (defaults to neutral): "pro"
models.append({"model": part.strip(), "stance": "neutral"})
return models
def get_name(self) -> str:
return "consensus"
def get_description(self) -> str:
return (
"MULTI-MODEL CONSENSUS - Gather diverse perspectives from multiple AI models on technical proposals, "
"plans, and ideas. Perfect for validation, feasibility assessment, and getting comprehensive "
"viewpoints on complex decisions. Supports advanced stance steering with custom instructions for each model. "
"You can specify different stances (for/against/neutral) and provide custom stance prompts to guide each "
"model's analysis. Example: [{'model': 'o3', 'stance': 'for', 'stance_prompt': 'Focus on implementation "
"benefits and user value'}, {'model': 'flash', 'stance': 'against', 'stance_prompt': 'Identify potential "
"risks and technical challenges'}]. Use neutral stances by default unless structured debate would add value."
)
def get_input_schema(self) -> dict[str, Any]:
schema = {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": (
"Description of what to get consensus on, testing objectives, and specific scope/focus areas. "
"Be as detailed as possible about the proposal, plan, or idea you want multiple perspectives on."
),
},
"models": {
"type": "array",
"items": {
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "Model name to use (e.g., 'o3', 'flash', 'pro')",
},
"stance": {
"type": "string",
"enum": ["for", "support", "favor", "against", "oppose", "critical", "neutral"],
"description": "Stance for this model: supportive ('for', 'support', 'favor'), critical ('against', 'oppose', 'critical'), or 'neutral'",
"default": "neutral",
},
"stance_prompt": {
"type": "string",
"description": "Custom stance-specific instructions for this model. If provided, this will be used instead of the default stance prompt.",
},
},
"required": ["model"],
},
"description": (
"List of model configurations for consensus analysis. Each model can have a specific stance and custom instructions. "
"Example: [{'model': 'o3', 'stance': 'for', 'stance_prompt': 'Focus on benefits and opportunities...'}, "
"{'model': 'flash', 'stance': 'against', 'stance_prompt': 'Identify risks and challenges...'}]. "
"Maximum 2 instances per model+stance combination."
),
},
"files": {
"type": "array",
"items": {"type": "string"},
"description": "Optional files or directories for additional context (must be absolute paths)",
},
"images": {
"type": "array",
"items": {"type": "string"},
"description": (
"Optional images showing expected UI changes, design requirements, "
"or visual references for the consensus analysis"
),
},
"focus_areas": {
"type": "array",
"items": {"type": "string"},
"description": "Specific aspects to focus on (e.g., 'performance', 'security', 'user experience')",
},
"temperature": {
"type": "number",
"description": "Temperature (0-1, default 0.2 for consistency)",
"minimum": 0,
"maximum": 1,
"default": self.get_default_temperature(),
},
"thinking_mode": {
"type": "string",
"enum": ["minimal", "low", "medium", "high", "max"],
"description": (
"Thinking depth: minimal (0.5% of model max), low (8%), medium (33%), "
"high (67%), max (100% of model max)"
),
},
"use_websearch": {
"type": "boolean",
"description": (
"Enable web search for documentation, best practices, and current information. "
"Particularly useful for: brainstorming sessions, architectural design discussions, "
"exploring industry best practices, working with specific frameworks/technologies, "
"researching solutions to complex problems, or when current documentation and "
"community insights would enhance the analysis."
),
"default": True,
},
"continuation_id": {
"type": "string",
"description": (
"Thread continuation ID for multi-turn conversations. Can be used to continue "
"conversations across different tools. Only provide this if continuing a previous "
"conversation thread."
),
},
},
"required": ["prompt", "models"],
}
return schema
def get_system_prompt(self) -> str:
return CONSENSUS_PROMPT
def get_default_temperature(self) -> float:
return 0.2 # Lower temperature for more consistent consensus responses
def get_model_category(self) -> "ToolModelCategory":
"""Consensus uses extended reasoning models for deep analysis"""
from tools.models import ToolModelCategory
return ToolModelCategory.EXTENDED_REASONING
def get_request_model(self):
return ConsensusRequest
def format_conversation_turn(self, turn) -> list[str]:
"""
Format consensus turns with individual model responses for better readability.
This custom formatting shows the individual model responses that were
synthesized into the consensus, making it easier to understand the
reasoning behind the final recommendation.
"""
parts = []
# Add files context if present
if turn.files:
parts.append(f"Files used in this turn: {', '.join(turn.files)}")
parts.append("")
# Check if this is a consensus turn with individual responses
if turn.model_metadata and turn.model_metadata.get("individual_responses"):
individual_responses = turn.model_metadata["individual_responses"]
# Add consensus header
models_consulted = []
for resp in individual_responses:
model = resp["model"]
stance = resp.get("stance", "neutral")
if stance != "neutral":
models_consulted.append(f"{model}:{stance}")
else:
models_consulted.append(model)
parts.append(f"Models consulted: {', '.join(models_consulted)}")
parts.append("")
parts.append("=== INDIVIDUAL MODEL RESPONSES ===")
parts.append("")
# Add each successful model response
for i, response in enumerate(individual_responses):
model_name = response["model"]
stance = response.get("stance", "neutral")
verdict = response["verdict"]
stance_label = f"({stance.title()} Stance)" if stance != "neutral" else "(Neutral Analysis)"
parts.append(f"**{model_name.upper()} {stance_label}**:")
parts.append(verdict)
if i < len(individual_responses) - 1:
parts.append("")
parts.append("---")
parts.append("")
parts.append("=== END INDIVIDUAL RESPONSES ===")
parts.append("")
parts.append("Claude's Synthesis:")
# Add the actual content
parts.append(turn.content)
return parts
def _normalize_stance(self, stance: Optional[str]) -> str:
"""Normalize stance to canonical form."""
if not stance:
return "neutral"
stance = stance.lower()
# Define stance synonyms
supportive_stances = {"for", "support", "favor"}
critical_stances = {"against", "oppose", "critical"}
# Map synonyms to canonical stance
if stance in supportive_stances:
return "for"
elif stance in critical_stances:
return "against"
elif stance == "neutral":
return "neutral"
else:
# Unknown stances default to neutral for robustness
logger.warning(
f"Unknown stance '{stance}' provided, defaulting to 'neutral'. Valid stances: {', '.join(sorted(supportive_stances | critical_stances))}, or 'neutral'"
)
return "neutral"
def _validate_model_combinations(self, model_configs: list[ModelConfig]) -> tuple[list[ModelConfig], list[str]]:
"""Validate model configurations and enforce limits.
Returns:
tuple: (valid_configs, skipped_entries)
- Each model+stance combination can appear max 2 times
- Same model+stance limited to 2 instances
"""
valid_configs = []
skipped_entries = []
combination_counts = {} # Track (model, stance) -> count
for config in model_configs:
try:
# Normalize stance
normalized_stance = self._normalize_stance(config.stance)
# Create normalized config
normalized_config = ModelConfig(
model=config.model, stance=normalized_stance, stance_prompt=config.stance_prompt
)
combination_key = (config.model, normalized_stance)
current_count = combination_counts.get(combination_key, 0)
if current_count >= DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION:
# Already have max instances of this model+stance combination
skipped_entries.append(
f"{config.model}:{normalized_stance} (max {DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION} instances)"
)
continue
combination_counts[combination_key] = current_count + 1
valid_configs.append(normalized_config)
except ValueError as e:
# Invalid stance or model
skipped_entries.append(f"{config.model} ({str(e)})")
continue
return valid_configs, skipped_entries
def _get_stance_enhanced_prompt(self, stance: str, custom_stance_prompt: Optional[str] = None) -> str:
"""Get the system prompt with stance injection based on the stance."""
base_prompt = self.get_system_prompt()
# If custom stance prompt is provided, use it instead of default
if custom_stance_prompt:
# Validate stance placeholder exists exactly once
if base_prompt.count("{stance_prompt}") != 1:
raise ValueError(
"System prompt must contain exactly one '{stance_prompt}' placeholder, "
f"found {base_prompt.count('{stance_prompt}')}"
)
return base_prompt.replace("{stance_prompt}", custom_stance_prompt)
stance_prompts = {
"for": """SUPPORTIVE PERSPECTIVE WITH INTEGRITY
You are tasked with advocating FOR this proposal, but with CRITICAL GUARDRAILS:
MANDATORY ETHICAL CONSTRAINTS:
- This is NOT a debate for entertainment. You MUST act in good faith and in the best interest of the questioner
- You MUST think deeply about whether supporting this idea is safe, sound, and passes essential requirements
- You MUST be direct and unequivocal in saying "this is a bad idea" when it truly is
- There must be at least ONE COMPELLING reason to be optimistic, otherwise DO NOT support it
WHEN TO REFUSE SUPPORT (MUST OVERRIDE STANCE):
- If the idea is fundamentally harmful to users, project, or stakeholders
- If implementation would violate security, privacy, or ethical standards
- If the proposal is technically infeasible within realistic constraints
- If costs/risks dramatically outweigh any potential benefits
YOUR SUPPORTIVE ANALYSIS SHOULD:
- Identify genuine strengths and opportunities
- Propose solutions to overcome legitimate challenges
- Highlight synergies with existing systems
- Suggest optimizations that enhance value
- Present realistic implementation pathways
Remember: Being "for" means finding the BEST possible version of the idea IF it has merit, not blindly supporting bad ideas.""",
"against": """CRITICAL PERSPECTIVE WITH RESPONSIBILITY
You are tasked with critiquing this proposal, but with ESSENTIAL BOUNDARIES:
MANDATORY FAIRNESS CONSTRAINTS:
- You MUST NOT oppose genuinely excellent, common-sense ideas just to be contrarian
- You MUST acknowledge when a proposal is fundamentally sound and well-conceived
- You CANNOT give harmful advice or recommend against beneficial changes
- If the idea is outstanding, say so clearly while offering constructive refinements
WHEN TO MODERATE CRITICISM (MUST OVERRIDE STANCE):
- If the proposal addresses critical user needs effectively
- If it follows established best practices with good reason
- If benefits clearly and substantially outweigh risks
- If it's the obvious right solution to the problem
YOUR CRITICAL ANALYSIS SHOULD:
- Identify legitimate risks and failure modes
- Point out overlooked complexities
- Suggest more efficient alternatives
- Highlight potential negative consequences
- Question assumptions that may be flawed
Remember: Being "against" means rigorous scrutiny to ensure quality, not undermining good ideas that deserve support.""",
"neutral": """BALANCED PERSPECTIVE
Provide objective analysis considering both positive and negative aspects. However, if there is overwhelming evidence
that the proposal clearly leans toward being exceptionally good or particularly problematic, you MUST accurately
reflect this reality. Being "balanced" means being truthful about the weight of evidence, not artificially creating
50/50 splits when the reality is 90/10.
Your analysis should:
- Present all significant pros and cons discovered
- Weight them according to actual impact and likelihood
- If evidence strongly favors one conclusion, clearly state this
- Provide proportional coverage based on the strength of arguments
- Help the questioner see the true balance of considerations
Remember: Artificial balance that misrepresents reality is not helpful. True balance means accurate representation
of the evidence, even when it strongly points in one direction.""",
}
stance_prompt = stance_prompts.get(stance, stance_prompts["neutral"])
# Validate stance placeholder exists exactly once
if base_prompt.count("{stance_prompt}") != 1:
raise ValueError(
"System prompt must contain exactly one '{stance_prompt}' placeholder, "
f"found {base_prompt.count('{stance_prompt}')}"
)
# Inject stance into the system prompt
return base_prompt.replace("{stance_prompt}", stance_prompt)
def _get_single_response(
self, provider, model_config: ModelConfig, prompt: str, request: ConsensusRequest
) -> dict[str, Any]:
"""Get response from a single model - synchronous method."""
logger.debug(f"Getting response from {model_config.model} with stance '{model_config.stance}'")
try:
# Provider.generate_content is synchronous, not async
response = provider.generate_content(
prompt=prompt,
model_name=model_config.model,
system_prompt=self._get_stance_enhanced_prompt(model_config.stance, model_config.stance_prompt),
temperature=getattr(request, "temperature", None) or self.get_default_temperature(),
thinking_mode=getattr(request, "thinking_mode", "medium"),
images=getattr(request, "images", None) or [],
)
return {
"model": model_config.model,
"stance": model_config.stance,
"status": "success",
"verdict": response.content, # Contains structured Markdown
"metadata": {
"provider": getattr(provider.get_provider_type(), "value", provider.get_provider_type()),
"usage": response.usage if hasattr(response, "usage") else None,
"custom_stance_prompt": bool(model_config.stance_prompt),
},
}
except Exception as e:
logger.error(f"Error getting response from {model_config.model}:{model_config.stance}: {str(e)}")
return {"model": model_config.model, "stance": model_config.stance, "status": "error", "error": str(e)}
def _get_consensus_responses(
self, provider_configs: list[tuple], prompt: str, request: ConsensusRequest
) -> list[dict[str, Any]]:
"""Execute all model requests sequentially - purely synchronous like other tools."""
logger.debug(f"Processing {len(provider_configs)} models sequentially")
responses = []
for i, (provider, model_config) in enumerate(provider_configs):
try:
logger.debug(
f"Processing {model_config.model}:{model_config.stance} sequentially ({i+1}/{len(provider_configs)})"
)
# Direct synchronous call - matches pattern of other tools
response = self._get_single_response(provider, model_config, prompt, request)
responses.append(response)
except Exception as e:
logger.error(f"Failed to get response from {model_config.model}:{model_config.stance}: {str(e)}")
responses.append(
{
"model": model_config.model,
"stance": model_config.stance,
"status": "error",
"error": f"Unhandled exception: {str(e)}",
}
)
logger.debug(f"Sequential processing completed for {len(responses)} models")
return responses
def _format_consensus_output(self, responses: list[dict[str, Any]], skipped_entries: list[str]) -> str:
"""Format the consensus responses into structured output for Claude."""
logger.debug(f"Formatting consensus output for {len(responses)} responses")
# Separate successful and failed responses
successful_responses = [r for r in responses if r["status"] == "success"]
failed_responses = [r for r in responses if r["status"] == "error"]
logger.debug(f"Successful responses: {len(successful_responses)}, Failed: {len(failed_responses)}")
# Prepare the structured output (minimize size for MCP stability)
models_used = [
f"{r['model']}:{r['stance']}" if r["stance"] != "neutral" else r["model"] for r in successful_responses
]
models_errored = [
f"{r['model']}:{r['stance']}" if r["stance"] != "neutral" else r["model"] for r in failed_responses
]
# Prepare clean responses without truncation
clean_responses = []
for r in responses:
if r["status"] == "success":
clean_responses.append(
{
"model": r["model"],
"stance": r["stance"],
"status": r["status"],
"verdict": r.get("verdict", ""),
"metadata": r.get("metadata", {}),
}
)
else:
clean_responses.append(
{
"model": r["model"],
"stance": r["stance"],
"status": r["status"],
"error": r.get("error", "Unknown error"),
}
)
output_data = {
"status": "consensus_success" if successful_responses else "consensus_failed",
"models_used": models_used,
"models_skipped": skipped_entries,
"models_errored": models_errored,
"responses": clean_responses,
"next_steps": self._get_synthesis_guidance(successful_responses, failed_responses),
}
return json.dumps(output_data, indent=2)
def _get_synthesis_guidance(
self, successful_responses: list[dict[str, Any]], failed_responses: list[dict[str, Any]]
) -> str:
"""Generate guidance for Claude on how to synthesize the consensus results."""
if not successful_responses:
return (
"No models provided successful responses. Please retry with different models or "
"check the error messages for guidance on resolving the issues."
)
if len(successful_responses) == 1:
return (
"Only one model provided a successful response. Synthesize based on the available "
"perspective and indicate areas where additional expert input would be valuable "
"due to the limited consensus data."
)
# Multiple successful responses - provide comprehensive synthesis guidance
stance_counts = {"for": 0, "against": 0, "neutral": 0}
for resp in successful_responses:
stance = resp.get("stance", "neutral")
stance_counts[stance] = stance_counts.get(stance, 0) + 1
guidance = (
"Claude, synthesize these perspectives by first identifying the key points of "
"**agreement** and **disagreement** between the models. Then provide your final, "
"consolidated recommendation, explaining how you weighed the different opinions and "
"why your proposed solution is the most balanced approach. Explicitly address the "
"most critical risks raised by each model and provide actionable next steps for implementation."
)
if failed_responses:
guidance += (
f" Note: {len(failed_responses)} model(s) failed to respond - consider this "
"partial consensus and indicate where additional expert input would strengthen the analysis."
)
return guidance
async def prepare_prompt(self, request: ConsensusRequest) -> str:
"""Prepare the consensus prompt with context files and focus areas."""
# Check for prompt.txt in files
prompt_content, updated_files = self.handle_prompt_file(request.files)
# Use prompt.txt content if available, otherwise use the prompt field
user_content = prompt_content if prompt_content else request.prompt
# Check user input size at MCP transport boundary (before adding internal content)
size_check = self.check_prompt_size(user_content)
if size_check:
# Need to return error, but prepare_prompt returns str
# Use exception to handle this cleanly
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
# Update request files list
if updated_files is not None:
request.files = updated_files
# Add focus areas if specified
if request.focus_areas:
focus_areas_text = "\n\nSpecific focus areas for this analysis:\n" + "\n".join(
f"- {area}" for area in request.focus_areas
)
user_content += focus_areas_text
# Add context files if provided (using centralized file handling with filtering)
if request.files:
file_content, processed_files = self._prepare_file_content_for_prompt(
request.files, request.continuation_id, "Context files"
)
self._actually_processed_files = processed_files
if file_content:
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
# Check token limits
self._validate_token_limit(user_content, "Content")
return user_content
async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
"""Execute consensus gathering from multiple models."""
# Store arguments for base class methods
self._current_arguments = arguments
# Validate and create request
request = ConsensusRequest(**arguments)
# Validate model configurations and enforce limits
valid_configs, skipped_entries = self._validate_model_combinations(request.models)
if not valid_configs:
error_output = {
"status": "consensus_failed",
"error": "No valid model configurations after validation",
"models_skipped": skipped_entries,
"next_steps": "Please provide valid model configurations with proper model names and stance values.",
}
return [TextContent(type="text", text=json.dumps(error_output, indent=2))]
# Set up a dummy model context for consensus since we handle multiple models
# This is needed for base class methods like prepare_prompt to work
if not hasattr(self, "_model_context") or not self._model_context:
from utils.model_context import ModelContext
# Use the first model as the representative for token calculations
first_model = valid_configs[0].model if valid_configs else "flash"
self._model_context = ModelContext(first_model)
# Handle conversation continuation if specified
if request.continuation_id:
from utils.conversation_memory import build_conversation_history, get_thread
thread_context = get_thread(request.continuation_id)
if thread_context:
# Build conversation history using the same pattern as other tools
conversation_context, _ = build_conversation_history(thread_context, self._model_context)
if conversation_context:
# Add conversation context to the beginning of the prompt
enhanced_prompt = f"{conversation_context}\n\n{request.prompt}"
request.prompt = enhanced_prompt
# Prepare the consensus prompt
consensus_prompt = await self.prepare_prompt(request)
# Get providers for valid model configurations with caching to avoid duplicate lookups
provider_configs = []
provider_cache = {} # Cache to avoid duplicate provider lookups
for model_config in valid_configs:
try:
# Check cache first
if model_config.model in provider_cache:
provider = provider_cache[model_config.model]
else:
# Look up provider and cache it
provider = self.get_model_provider(model_config.model)
provider_cache[model_config.model] = provider
provider_configs.append((provider, model_config))
except Exception as e:
# Track failed models
model_display = (
f"{model_config.model}:{model_config.stance}"
if model_config.stance != "neutral"
else model_config.model
)
skipped_entries.append(f"{model_display} (provider not available: {str(e)})")
if not provider_configs:
error_output = {
"status": "consensus_failed",
"error": "No model providers available",
"models_skipped": skipped_entries,
"next_steps": "Please check that the specified models have configured API keys and are available.",
}
return [TextContent(type="text", text=json.dumps(error_output, indent=2))]
# Send to all models sequentially (purely synchronous like other tools)
logger.debug(f"Sending consensus request to {len(provider_configs)} models")
responses = self._get_consensus_responses(provider_configs, consensus_prompt, request)
logger.debug(f"Received {len(responses)} responses from consensus models")
# Enforce minimum success requirement - must have at least 1 successful response
successful_responses = [r for r in responses if r["status"] == "success"]
if not successful_responses:
error_output = {
"status": "consensus_failed",
"error": "All model calls failed - no successful responses received",
"models_skipped": skipped_entries,
"models_errored": [
f"{r['model']}:{r['stance']}" if r["stance"] != "neutral" else r["model"]
for r in responses
if r["status"] == "error"
],
"next_steps": "Please retry with different models or check the error messages for guidance on resolving the issues.",
}
return [TextContent(type="text", text=json.dumps(error_output, indent=2))]
logger.debug("About to format consensus output for MCP response")
# Structure the output and store in conversation memory
consensus_output = self._format_consensus_output(responses, skipped_entries)
# Log response size for debugging
output_size = len(consensus_output)
logger.debug(f"Consensus output size: {output_size:,} characters")
# Store in conversation memory if continuation_id is provided
if request.continuation_id:
self.store_conversation_turn(
request.continuation_id,
consensus_output,
request.files,
request.images,
responses, # Store individual responses in metadata
skipped_entries,
)
return [TextContent(type="text", text=consensus_output)]
def store_conversation_turn(
self,
continuation_id: str,
output: str,
files: list[str],
images: list[str],
responses: list[dict[str, Any]],
skipped_entries: list[str],
):
"""Store consensus turn in conversation memory with special metadata."""
from utils.conversation_memory import add_turn
# Filter successful and failed responses
successful_responses = [r for r in responses if r["status"] == "success"]
failed_responses = [r for r in responses if r["status"] == "error"]
# Prepare metadata for conversation storage
metadata = {
"tool_type": "consensus",
"models_used": [r["model"] for r in successful_responses],
"models_skipped": skipped_entries,
"models_errored": [r["model"] for r in failed_responses],
"individual_responses": successful_responses, # Only store successful responses
}
# Store the turn with special consensus metadata - add_turn is synchronous
add_turn(
thread_id=continuation_id,
role="assistant",
content=output,
files=files or [],
images=images or [],
tool_name="consensus",
model_provider="consensus", # Special provider name
model_name="consensus", # Special model name
model_metadata=metadata,
)

View File

@@ -159,13 +159,7 @@ class DebugIssueTool(BaseTool):
if updated_files is not None:
request.files = updated_files
# MCP boundary check - STRICT REJECTION
if request.files:
file_size_check = self.check_total_file_size(request.files)
if file_size_check:
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
# File size validation happens at MCP boundary in server.py
# Build context sections
context_parts = [f"=== ISSUE DESCRIPTION ===\n{request.prompt}\n=== END DESCRIPTION ==="]

View File

@@ -236,13 +236,7 @@ class Precommit(BaseTool):
translated_path = translate_path_for_environment(request.path)
translated_files = translate_file_paths(request.files)
# MCP boundary check - STRICT REJECTION (check original files before translation)
if request.files:
file_size_check = self.check_total_file_size(request.files)
if file_size_check:
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
# File size validation happens at MCP boundary in server.py
# Check if the path translation resulted in an error path
if translated_path.startswith("/inaccessible/"):

View File

@@ -409,23 +409,25 @@ class RefactorTool(BaseTool):
continuation_id = getattr(request, "continuation_id", None)
# Get model context for token budget calculation
model_name = getattr(self, "_current_model_name", None)
available_tokens = None
if model_name:
if hasattr(self, "_model_context") and self._model_context:
try:
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
capabilities = self._model_context.capabilities
# Use 75% of context for content (code + style examples), 25% for response
available_tokens = int(capabilities.context_window * 0.75)
logger.debug(
f"[REFACTOR] Token budget calculation: {available_tokens:,} tokens (75% of {capabilities.context_window:,}) for model {model_name}"
f"[REFACTOR] Token budget calculation: {available_tokens:,} tokens (75% of {capabilities.context_window:,}) for model {self._model_context.model_name}"
)
except Exception as e:
# Fallback to conservative estimate
logger.warning(f"[REFACTOR] Could not get model capabilities for {model_name}: {e}")
logger.warning(f"[REFACTOR] Could not get model capabilities: {e}")
available_tokens = 120000 # Conservative fallback
logger.debug(f"[REFACTOR] Using fallback token budget: {available_tokens:,} tokens")
else:
# No model context available (shouldn't happen in normal flow)
available_tokens = 120000 # Conservative fallback
logger.debug(f"[REFACTOR] No model context, using fallback token budget: {available_tokens:,} tokens")
# Process style guide examples first to determine token allocation
style_examples_content = ""

View File

@@ -290,23 +290,25 @@ class TestGenerationTool(BaseTool):
continuation_id = getattr(request, "continuation_id", None)
# Get model context for token budget calculation
model_name = getattr(self, "_current_model_name", None)
available_tokens = None
if model_name:
if hasattr(self, "_model_context") and self._model_context:
try:
provider = self.get_model_provider(model_name)
capabilities = provider.get_capabilities(model_name)
capabilities = self._model_context.capabilities
# Use 75% of context for content (code + test examples), 25% for response
available_tokens = int(capabilities.context_window * 0.75)
logger.debug(
f"[TESTGEN] Token budget calculation: {available_tokens:,} tokens (75% of {capabilities.context_window:,}) for model {model_name}"
f"[TESTGEN] Token budget calculation: {available_tokens:,} tokens (75% of {capabilities.context_window:,}) for model {self._model_context.model_name}"
)
except Exception as e:
# Fallback to conservative estimate
logger.warning(f"[TESTGEN] Could not get model capabilities for {model_name}: {e}")
logger.warning(f"[TESTGEN] Could not get model capabilities: {e}")
available_tokens = 120000 # Conservative fallback
logger.debug(f"[TESTGEN] Using fallback token budget: {available_tokens:,} tokens")
else:
# No model context available (shouldn't happen in normal flow)
available_tokens = 120000 # Conservative fallback
logger.debug(f"[TESTGEN] No model context, using fallback token budget: {available_tokens:,} tokens")
# Process test examples first to determine token allocation
test_examples_content = ""

View File

@@ -158,13 +158,7 @@ class ThinkDeepTool(BaseTool):
if updated_files is not None:
request.files = updated_files
# MCP boundary check - STRICT REJECTION
if request.files:
file_size_check = self.check_total_file_size(request.files)
if file_size_check:
from tools.models import ToolOutput
raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
# File size validation happens at MCP boundary in server.py
# Build context parts
context_parts = [f"=== CLAUDE'S CURRENT ANALYSIS ===\n{current_analysis}\n=== END ANALYSIS ==="]