feat: Add comprehensive dynamic configuration system v3.3.0

## Major Features Added ### 🎯 Dynamic Configuration System - **Environment-aware model selection**: DEFAULT_MODEL with 'pro'/'flash' shortcuts - **Configurable thinking modes**: DEFAULT_THINKING_MODE_THINKDEEP for extended reasoning - **All tool schemas now dynamic**: Show actual current defaults instead of hardcoded values - **Enhanced setup workflow**: Copy from .env.example with smart customization ### 🔧 Model & Thinking Configuration - **Smart model resolution**: Support both shortcuts ('pro', 'flash') and full model names - **Thinking mode optimization**: Only apply thinking budget to models that support it - **Flash model compatibility**: Works without thinking config, still beneficial via system prompts - **Dynamic schema descriptions**: Tool parameters show current environment values ### 🚀 Enhanced Developer Experience - **Fail-fast Docker setup**: GEMINI_API_KEY required upfront in docker-compose - **Comprehensive startup logging**: Shows current model and thinking mode defaults - **Enhanced get_version tool**: Reports all dynamic configuration values - **Better .env documentation**: Clear token consumption details and model options ### 🧪 Comprehensive Testing - **Live model validation**: New simulator test validates Pro vs Flash thinking behavior - **Dynamic configuration tests**: Verify environment variable overrides work correctly - **Complete test coverage**: All 139 unit tests pass, including new model config tests ### 📋 Configuration Files Updated - **docker-compose.yml**: Fail-fast API key validation, thinking mode support - **setup-docker.sh**: Copy from .env.example instead of manual creation - **.env.example**: Detailed documentation with token consumption per thinking mode - **.gitignore**: Added test-setup/ for cleanup ### 🛠 Technical Improvements - **Removed setup.py**: Fully Docker-based deployment (no longer needed) - **REDIS_URL smart defaults**: Auto-configured for Docker, still configurable for dev - **All tools updated**: Consistent dynamic model parameter descriptions - **Enhanced error handling**: Better model resolution and validation ## Breaking Changes - Removed setup.py (Docker-only deployment) - Model parameter descriptions now show actual defaults (dynamic) ## Migration Guide - Update .env files using new .env.example format - Use 'pro'/'flash' shortcuts or full model names - Set DEFAULT_THINKING_MODE_THINKDEEP for custom thinking depth 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-11 20:10:25 +04:00
parent f797480006
commit 22a3fb91ed
18 changed files with 351 additions and 116 deletions
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -42,6 +42,8 @@ class AnalyzeTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
        return {
            "type": "object",
            "properties": {
@@ -50,6 +52,10 @@ class AnalyzeTool(BaseTool):
                    "items": {"type": "string"},
                    "description": "Files or directories to analyze (must be absolute paths)",
                },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                "question": {
                    "type": "string",
                    "description": "What to analyze or look for",
--- a/tools/base.py
+++ b/tools/base.py
@@ -25,7 +25,7 @@ from google.genai import types
 from mcp.types import TextContent
 from pydantic import BaseModel, Field

-from config import GEMINI_MODEL, MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT
+from config import DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT
 from utils import check_token_limit
 from utils.conversation_memory import (
    MAX_CONVERSATION_TURNS,
@@ -50,7 +50,10 @@ class ToolRequest(BaseModel):
    these common fields.
    """

-    model: Optional[str] = Field(None, description="Model to use (defaults to Gemini 2.5 Pro)")
+    model: Optional[str] = Field(
+        None,
+        description=f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+    )
    temperature: Optional[float] = Field(None, description="Temperature for response (tool-specific defaults)")
    # Thinking mode controls how much computational budget the model uses for reasoning
    # Higher values allow for more complex reasoning but increase latency and cost
@@ -625,7 +628,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
                # No need to rebuild it here - prompt already contains conversation history

            # Extract model configuration from request or use defaults
-            model_name = getattr(request, "model", None) or GEMINI_MODEL
+            model_name = getattr(request, "model", None) or DEFAULT_MODEL
            temperature = getattr(request, "temperature", None)
            if temperature is None:
                temperature = self.get_default_temperature()
@@ -1064,13 +1067,22 @@ If any of these would strengthen your analysis, specify what Claude should searc
        temperature and thinking budget configuration for models that support it.

        Args:
-            model_name: Name of the Gemini model to use
+            model_name: Name of the Gemini model to use (or shorthand like 'flash', 'pro')
            temperature: Temperature setting for response generation
            thinking_mode: Thinking depth mode (affects computational budget)

        Returns:
            Model instance configured and ready for generation
        """
+        # Define model shorthands for user convenience
+        model_shorthands = {
+            "pro": "gemini-2.5-pro-preview-06-05",
+            "flash": "gemini-2.0-flash-exp",
+        }
+
+        # Resolve shorthand to full model name
+        resolved_model_name = model_shorthands.get(model_name.lower(), model_name)
+
        # Map thinking modes to computational budget values
        # Higher budgets allow for more complex reasoning but increase latency
        thinking_budgets = {
@@ -1085,7 +1097,7 @@ If any of these would strengthen your analysis, specify what Claude should searc

        # Gemini 2.5 models support thinking configuration for enhanced reasoning
        # Skip special handling in test environment to allow mocking
-        if "2.5" in model_name and not os.environ.get("PYTEST_CURRENT_TEST"):
+        if "2.5" in resolved_model_name and not os.environ.get("PYTEST_CURRENT_TEST"):
            try:
                # Retrieve API key for Gemini client creation
                api_key = os.environ.get("GEMINI_API_KEY")
@@ -1144,7 +1156,7 @@ If any of these would strengthen your analysis, specify what Claude should searc

                        return ResponseWrapper(response.text)

-                return ModelWrapper(client, model_name, temperature, thinking_budget)
+                return ModelWrapper(client, resolved_model_name, temperature, thinking_budget)

            except Exception:
                # Fall back to regular API if thinking configuration fails
@@ -1197,4 +1209,4 @@ If any of these would strengthen your analysis, specify what Claude should searc

                return ResponseWrapper(response.text)

-        return SimpleModelWrapper(client, model_name, temperature)
+        return SimpleModelWrapper(client, resolved_model_name, temperature)
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -44,6 +44,8 @@ class ChatTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
        return {
            "type": "object",
            "properties": {
@@ -56,6 +58,10 @@ class ChatTool(BaseTool):
                    "items": {"type": "string"},
                    "description": "Optional files for context (must be absolute paths)",
                },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                "temperature": {
                    "type": "number",
                    "description": "Response creativity (0-1, default 0.5)",
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -79,6 +79,8 @@ class CodeReviewTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
        return {
            "type": "object",
            "properties": {
@@ -87,6 +89,10 @@ class CodeReviewTool(BaseTool):
                    "items": {"type": "string"},
                    "description": "Code files or directories to review (must be absolute paths)",
                },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                "context": {
                    "type": "string",
                    "description": "User's summary of what the code does, expected behavior, constraints, and review objectives",
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -50,6 +50,8 @@ class DebugIssueTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
        return {
            "type": "object",
            "properties": {
@@ -57,6 +59,10 @@ class DebugIssueTool(BaseTool):
                    "type": "string",
                    "description": "Error message, symptoms, or issue description",
                },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                "error_context": {
                    "type": "string",
                    "description": "Stack trace, logs, or additional error context",
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -98,7 +98,15 @@ class Precommit(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
        schema = self.get_request_model().model_json_schema()
+        # Ensure model parameter has enhanced description
+        if "properties" in schema and "model" in schema["properties"]:
+            schema["properties"]["model"] = {
+                "type": "string",
+                "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+            }
        # Ensure use_websearch is in the schema with proper description
        if "properties" in schema and "use_websearch" not in schema["properties"]:
            schema["properties"]["use_websearch"] = {
--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -48,6 +48,8 @@ class ThinkDeepTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
+        from config import DEFAULT_MODEL
+
        return {
            "type": "object",
            "properties": {
@@ -55,6 +57,10 @@ class ThinkDeepTool(BaseTool):
                    "type": "string",
                    "description": "Your current thinking/analysis to extend and validate",
                },
+                "model": {
+                    "type": "string",
+                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+                },
                "problem_context": {
                    "type": "string",
                    "description": "Additional context about the problem or goal",
@@ -78,8 +84,7 @@ class ThinkDeepTool(BaseTool):
                "thinking_mode": {
                    "type": "string",
                    "enum": ["minimal", "low", "medium", "high", "max"],
-                    "description": "Thinking depth: minimal (128), low (2048), medium (8192), high (16384), max (32768)",
-                    "default": "high",
+                    "description": f"Thinking depth: minimal (128), low (2048), medium (8192), high (16384), max (32768). Defaults to '{self.get_default_thinking_mode()}' if not specified.",
                },
                "use_websearch": {
                    "type": "boolean",
@@ -101,8 +106,10 @@ class ThinkDeepTool(BaseTool):
        return TEMPERATURE_CREATIVE

    def get_default_thinking_mode(self) -> str:
-        """ThinkDeep uses high thinking by default"""
-        return "high"
+        """ThinkDeep uses configurable thinking mode, defaults to high"""
+        from config import DEFAULT_THINKING_MODE_THINKDEEP
+
+        return DEFAULT_THINKING_MODE_THINKDEEP

    def get_request_model(self):
        return ThinkDeepRequest