Categorize tools into 'model capabilities categories' to help determine which type of model to pick when in auto mode

Encourage Claude to pick the best model for the job automatically in auto mode
Lots of new tests to ensure automatic model picking works reliably based on user preference or when a matching model is not found or ambiguous
Improved error reporting when bogus model is requested and is not configured or available
This commit is contained in:
Fahad
2025-06-14 02:17:06 +04:00
parent 7fc1186a7c
commit eb388ab2f2
13 changed files with 838 additions and 68 deletions

View File

@@ -2,11 +2,14 @@
Analyze tool - General-purpose code and file analysis
"""
from typing import Any, Optional
from typing import TYPE_CHECKING, Any, Optional
from mcp.types import TextContent
from pydantic import Field
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from config import TEMPERATURE_ANALYTICAL
from prompts import ANALYZE_PROMPT
@@ -42,8 +45,6 @@ class AnalyzeTool(BaseTool):
)
def get_input_schema(self) -> dict[str, Any]:
from config import IS_AUTO_MODE
schema = {
"type": "object",
"properties": {
@@ -95,7 +96,7 @@ class AnalyzeTool(BaseTool):
"description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
},
},
"required": ["files", "prompt"] + (["model"] if IS_AUTO_MODE else []),
"required": ["files", "prompt"] + (["model"] if self.is_effective_auto_mode() else []),
}
return schema
@@ -106,6 +107,12 @@ class AnalyzeTool(BaseTool):
def get_default_temperature(self) -> float:
return TEMPERATURE_ANALYTICAL
def get_model_category(self) -> "ToolModelCategory":
"""Analyze requires deep understanding and reasoning"""
from tools.models import ToolModelCategory
return ToolModelCategory.EXTENDED_REASONING
def get_request_model(self):
return AnalyzeRequest

View File

@@ -17,11 +17,14 @@ import json
import logging
import os
from abc import ABC, abstractmethod
from typing import Any, Literal, Optional
from typing import TYPE_CHECKING, Any, Literal, Optional
from mcp.types import TextContent
from pydantic import BaseModel, Field
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from config import MCP_PROMPT_SIZE_LIMIT
from providers import ModelProvider, ModelProviderRegistry
from utils import check_token_limit
@@ -156,6 +159,88 @@ class BaseTool(ABC):
"""
pass
def is_effective_auto_mode(self) -> bool:
"""
Check if we're in effective auto mode for schema generation.
This determines whether the model parameter should be required in the tool schema.
Used at initialization time when schemas are generated.
Returns:
bool: True if model parameter should be required in the schema
"""
from config import DEFAULT_MODEL, IS_AUTO_MODE
from providers.registry import ModelProviderRegistry
# Case 1: Explicit auto mode
if IS_AUTO_MODE:
return True
# Case 2: Model not available
if DEFAULT_MODEL.lower() != "auto":
provider = ModelProviderRegistry.get_provider_for_model(DEFAULT_MODEL)
if not provider:
return True
return False
def _should_require_model_selection(self, model_name: str) -> bool:
"""
Check if we should require Claude to select a model at runtime.
This is called during request execution to determine if we need
to return an error asking Claude to provide a model parameter.
Args:
model_name: The model name from the request or DEFAULT_MODEL
Returns:
bool: True if we should require model selection
"""
# Case 1: Model is explicitly "auto"
if model_name.lower() == "auto":
return True
# Case 2: Requested model is not available
from providers.registry import ModelProviderRegistry
provider = ModelProviderRegistry.get_provider_for_model(model_name)
if not provider:
logger = logging.getLogger(f"tools.{self.name}")
logger.warning(
f"Model '{model_name}' is not available with current API keys. " f"Requiring model selection."
)
return True
return False
def _get_available_models(self) -> list[str]:
"""
Get list of models that are actually available with current API keys.
Returns:
List of available model names
"""
from config import MODEL_CAPABILITIES_DESC
from providers.base import ProviderType
from providers.registry import ModelProviderRegistry
available_models = []
# Check each model in our capabilities list
for model_name in MODEL_CAPABILITIES_DESC.keys():
provider = ModelProviderRegistry.get_provider_for_model(model_name)
if provider:
available_models.append(model_name)
# Also check if OpenRouter is available (it accepts any model)
openrouter_provider = ModelProviderRegistry.get_provider(ProviderType.OPENROUTER)
if openrouter_provider and not available_models:
# If only OpenRouter is available, suggest using any model through it
available_models.append("any model via OpenRouter")
return available_models if available_models else ["none - please configure API keys"]
def get_model_field_schema(self) -> dict[str, Any]:
"""
Generate the model field schema based on auto mode configuration.
@@ -168,16 +253,20 @@ class BaseTool(ABC):
"""
import os
from config import DEFAULT_MODEL, IS_AUTO_MODE, MODEL_CAPABILITIES_DESC
from config import DEFAULT_MODEL, MODEL_CAPABILITIES_DESC
# Check if OpenRouter is configured
has_openrouter = bool(
os.getenv("OPENROUTER_API_KEY") and os.getenv("OPENROUTER_API_KEY") != "your_openrouter_api_key_here"
)
if IS_AUTO_MODE:
# Use the centralized effective auto mode check
if self.is_effective_auto_mode():
# In auto mode, model is required and we provide detailed descriptions
model_desc_parts = ["Choose the best model for this task based on these capabilities:"]
model_desc_parts = [
"IMPORTANT: Use the model specified by the user if provided, OR select the most suitable model "
"for this specific task based on the requirements and capabilities listed below:"
]
for model, desc in MODEL_CAPABILITIES_DESC.items():
model_desc_parts.append(f"- '{model}': {desc}")
@@ -302,6 +391,21 @@ class BaseTool(ABC):
"""
return "medium" # Default to medium thinking for better reasoning
def get_model_category(self) -> "ToolModelCategory":
"""
Return the model category for this tool.
Model category influences which model is selected in auto mode.
Override to specify whether your tool needs extended reasoning,
fast response, or balanced capabilities.
Returns:
ToolModelCategory: Category that influences model selection
"""
from tools.models import ToolModelCategory
return ToolModelCategory.BALANCED
def get_conversation_embedded_files(self, continuation_id: Optional[str]) -> list[str]:
"""
Get list of files already embedded in conversation history.
@@ -474,11 +578,13 @@ class BaseTool(ABC):
if model_name.lower() == "auto":
from providers.registry import ModelProviderRegistry
# Use the preferred fallback model for capacity estimation
# Use tool-specific fallback model for capacity estimation
# This properly handles different providers (OpenAI=200K, Gemini=1M)
fallback_model = ModelProviderRegistry.get_preferred_fallback_model()
tool_category = self.get_model_category()
fallback_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
logger.debug(
f"[FILES] {self.name}: Auto mode detected, using {fallback_model} for capacity estimation"
f"[FILES] {self.name}: Auto mode detected, using {fallback_model} "
f"for {tool_category.value} tool capacity estimation"
)
try:
@@ -898,13 +1004,39 @@ When recommending searches, be specific about what information you need and why
model_name = DEFAULT_MODEL
# In auto mode, model parameter is required
from config import IS_AUTO_MODE
# Check if we need Claude to select a model
# This happens when:
# 1. The model is explicitly "auto"
# 2. The requested model is not available
if self._should_require_model_selection(model_name):
# Get suggested model based on tool category
from providers.registry import ModelProviderRegistry
tool_category = self.get_model_category()
suggested_model = ModelProviderRegistry.get_preferred_fallback_model(tool_category)
# Build error message based on why selection is required
if model_name.lower() == "auto":
error_message = (
f"Model parameter is required in auto mode. "
f"Suggested model for {self.name}: '{suggested_model}' "
f"(category: {tool_category.value})"
)
else:
# Model was specified but not available
# Get list of available models
available_models = self._get_available_models()
error_message = (
f"Model '{model_name}' is not available with current API keys. "
f"Available models: {', '.join(available_models)}. "
f"Suggested model for {self.name}: '{suggested_model}' "
f"(category: {tool_category.value})"
)
if IS_AUTO_MODE and model_name.lower() == "auto":
error_output = ToolOutput(
status="error",
content="Model parameter is required. Please specify which model to use for this task.",
content=error_message,
content_type="text",
)
return [TextContent(type="text", text=error_output.model_dump_json())]

View File

@@ -2,11 +2,14 @@
Chat tool - General development chat and collaborative thinking
"""
from typing import Any, Optional
from typing import TYPE_CHECKING, Any, Optional
from mcp.types import TextContent
from pydantic import Field
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from config import TEMPERATURE_BALANCED
from prompts import CHAT_PROMPT
@@ -44,8 +47,6 @@ class ChatTool(BaseTool):
)
def get_input_schema(self) -> dict[str, Any]:
from config import IS_AUTO_MODE
schema = {
"type": "object",
"properties": {
@@ -80,7 +81,7 @@ class ChatTool(BaseTool):
"description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
},
},
"required": ["prompt"] + (["model"] if IS_AUTO_MODE else []),
"required": ["prompt"] + (["model"] if self.is_effective_auto_mode() else []),
}
return schema
@@ -91,6 +92,12 @@ class ChatTool(BaseTool):
def get_default_temperature(self) -> float:
return TEMPERATURE_BALANCED
def get_model_category(self) -> "ToolModelCategory":
"""Chat prioritizes fast responses and cost efficiency"""
from tools.models import ToolModelCategory
return ToolModelCategory.FAST_RESPONSE
def get_request_model(self):
return ChatRequest

View File

@@ -82,8 +82,6 @@ class CodeReviewTool(BaseTool):
)
def get_input_schema(self) -> dict[str, Any]:
from config import IS_AUTO_MODE
schema = {
"type": "object",
"properties": {
@@ -138,7 +136,7 @@ class CodeReviewTool(BaseTool):
"description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
},
},
"required": ["files", "prompt"] + (["model"] if IS_AUTO_MODE else []),
"required": ["files", "prompt"] + (["model"] if self.is_effective_auto_mode() else []),
}
return schema

View File

@@ -2,11 +2,14 @@
Debug Issue tool - Root cause analysis and debugging assistance
"""
from typing import Any, Optional
from typing import TYPE_CHECKING, Any, Optional
from mcp.types import TextContent
from pydantic import Field
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from config import TEMPERATURE_ANALYTICAL
from prompts import DEBUG_ISSUE_PROMPT
@@ -50,8 +53,6 @@ class DebugIssueTool(BaseTool):
)
def get_input_schema(self) -> dict[str, Any]:
from config import IS_AUTO_MODE
schema = {
"type": "object",
"properties": {
@@ -98,7 +99,7 @@ class DebugIssueTool(BaseTool):
"description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
},
},
"required": ["prompt"] + (["model"] if IS_AUTO_MODE else []),
"required": ["prompt"] + (["model"] if self.is_effective_auto_mode() else []),
}
return schema
@@ -109,6 +110,12 @@ class DebugIssueTool(BaseTool):
def get_default_temperature(self) -> float:
return TEMPERATURE_ANALYTICAL
def get_model_category(self) -> "ToolModelCategory":
"""Debug requires deep analysis and reasoning"""
from tools.models import ToolModelCategory
return ToolModelCategory.EXTENDED_REASONING
def get_request_model(self):
return DebugIssueRequest

View File

@@ -2,11 +2,20 @@
Data models for tool responses and interactions
"""
from enum import Enum
from typing import Any, Literal, Optional
from pydantic import BaseModel, Field
class ToolModelCategory(Enum):
"""Categories for tool model selection based on requirements."""
EXTENDED_REASONING = "extended_reasoning" # Requires deep thinking capabilities
FAST_RESPONSE = "fast_response" # Speed and cost efficiency preferred
BALANCED = "balanced" # Balance of capability and performance
class ContinuationOffer(BaseModel):
"""Offer for Claude to continue conversation when Gemini doesn't ask follow-up"""

View File

@@ -9,11 +9,14 @@ This provides comprehensive context for AI analysis - not a duplication bug.
"""
import os
from typing import Any, Literal, Optional
from typing import TYPE_CHECKING, Any, Literal, Optional
from mcp.types import TextContent
from pydantic import Field
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from prompts.tool_prompts import PRECOMMIT_PROMPT
from utils.file_utils import translate_file_paths, translate_path_for_environment
from utils.git_utils import find_git_repositories, get_git_status, run_git_command
@@ -100,30 +103,83 @@ class Precommit(BaseTool):
)
def get_input_schema(self) -> dict[str, Any]:
from config import IS_AUTO_MODE
schema = self.get_request_model().model_json_schema()
# Ensure model parameter has enhanced description
if "properties" in schema and "model" in schema["properties"]:
schema["properties"]["model"] = self.get_model_field_schema()
# In auto mode, model is required
if IS_AUTO_MODE and "required" in schema:
if "model" not in schema["required"]:
schema["required"].append("model")
# Ensure use_websearch is in the schema with proper description
if "properties" in schema and "use_websearch" not in schema["properties"]:
schema["properties"]["use_websearch"] = {
"type": "boolean",
"description": "Enable web search for documentation, best practices, and current information. Particularly useful for: brainstorming sessions, architectural design discussions, exploring industry best practices, working with specific frameworks/technologies, researching solutions to complex problems, or when current documentation and community insights would enhance the analysis.",
"default": True,
}
# Add continuation_id parameter
if "properties" in schema and "continuation_id" not in schema["properties"]:
schema["properties"]["continuation_id"] = {
"type": "string",
"description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
}
schema = {
"type": "object",
"title": "PrecommitRequest",
"description": "Request model for precommit tool",
"properties": {
"path": {
"type": "string",
"description": "Starting directory to search for git repositories (must be absolute path).",
},
"model": self.get_model_field_schema(),
"prompt": {
"type": "string",
"description": "The original user request description for the changes. Provides critical context for the review.",
},
"compare_to": {
"type": "string",
"description": "Optional: A git ref (branch, tag, commit hash) to compare against. If not provided, reviews local staged and unstaged changes.",
},
"include_staged": {
"type": "boolean",
"default": True,
"description": "Include staged changes in the review. Only applies if 'compare_to' is not set.",
},
"include_unstaged": {
"type": "boolean",
"default": True,
"description": "Include uncommitted (unstaged) changes in the review. Only applies if 'compare_to' is not set.",
},
"focus_on": {
"type": "string",
"description": "Specific aspects to focus on (e.g., 'logic for user authentication', 'database query efficiency').",
},
"review_type": {
"type": "string",
"enum": ["full", "security", "performance", "quick"],
"default": "full",
"description": "Type of review to perform on the changes.",
},
"severity_filter": {
"type": "string",
"enum": ["critical", "high", "medium", "all"],
"default": "all",
"description": "Minimum severity level to report on the changes.",
},
"max_depth": {
"type": "integer",
"default": 5,
"description": "Maximum depth to search for nested git repositories to prevent excessive recursion.",
},
"temperature": {
"type": "number",
"description": "Temperature for the response (0.0 to 1.0). Lower values are more focused and deterministic.",
"minimum": 0,
"maximum": 1,
},
"thinking_mode": {
"type": "string",
"enum": ["minimal", "low", "medium", "high", "max"],
"description": "Thinking depth mode for the assistant.",
},
"files": {
"type": "array",
"items": {"type": "string"},
"description": "Optional files or directories to provide as context (must be absolute paths). These files are not part of the changes but provide helpful context like configs, docs, or related code.",
},
"use_websearch": {
"type": "boolean",
"description": "Enable web search for documentation, best practices, and current information. Particularly useful for: brainstorming sessions, architectural design discussions, exploring industry best practices, working with specific frameworks/technologies, researching solutions to complex problems, or when current documentation and community insights would enhance the analysis.",
"default": True,
},
"continuation_id": {
"type": "string",
"description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
},
},
"required": ["path"] + (["model"] if self.is_effective_auto_mode() else []),
}
return schema
def get_system_prompt(self) -> str:
@@ -138,6 +194,12 @@ class Precommit(BaseTool):
return TEMPERATURE_ANALYTICAL
def get_model_category(self) -> "ToolModelCategory":
"""Precommit requires thorough analysis and reasoning"""
from tools.models import ToolModelCategory
return ToolModelCategory.EXTENDED_REASONING
async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
"""Override execute to check original_request size before processing"""
# First validate request

View File

@@ -2,11 +2,14 @@
ThinkDeep tool - Extended reasoning and problem-solving
"""
from typing import Any, Optional
from typing import TYPE_CHECKING, Any, Optional
from mcp.types import TextContent
from pydantic import Field
if TYPE_CHECKING:
from tools.models import ToolModelCategory
from config import TEMPERATURE_CREATIVE
from prompts import THINKDEEP_PROMPT
@@ -48,8 +51,6 @@ class ThinkDeepTool(BaseTool):
)
def get_input_schema(self) -> dict[str, Any]:
from config import IS_AUTO_MODE
schema = {
"type": "object",
"properties": {
@@ -93,7 +94,7 @@ class ThinkDeepTool(BaseTool):
"description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
},
},
"required": ["prompt"] + (["model"] if IS_AUTO_MODE else []),
"required": ["prompt"] + (["model"] if self.is_effective_auto_mode() else []),
}
return schema
@@ -110,6 +111,12 @@ class ThinkDeepTool(BaseTool):
return DEFAULT_THINKING_MODE_THINKDEEP
def get_model_category(self) -> "ToolModelCategory":
"""ThinkDeep requires extended reasoning capabilities"""
from tools.models import ToolModelCategory
return ToolModelCategory.EXTENDED_REASONING
def get_request_model(self):
return ThinkDeepRequest