WIP major refactor and features

2025-06-12 07:14:59 +04:00
parent e06a6fd1fc
commit 2a067a7f4e
46 changed files with 2960 additions and 1011 deletions
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -18,7 +18,7 @@ class AnalyzeRequest(ToolRequest):
    """Request model for analyze tool"""

    files: list[str] = Field(..., description="Files or directories to analyze (must be absolute paths)")
-    question: str = Field(..., description="What to analyze or look for")
+    prompt: str = Field(..., description="What to analyze or look for")
    analysis_type: Optional[str] = Field(
        None,
        description="Type of analysis: architecture|performance|security|quality|general",
@@ -42,9 +42,9 @@ class AnalyzeTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
-        from config import DEFAULT_MODEL
+        from config import IS_AUTO_MODE

-        return {
+        schema = {
            "type": "object",
            "properties": {
                "files": {
@@ -52,11 +52,8 @@ class AnalyzeTool(BaseTool):
                    "items": {"type": "string"},
                    "description": "Files or directories to analyze (must be absolute paths)",
                },
-                "model": {
-                    "type": "string",
-                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
-                },
-                "question": {
+                "model": self.get_model_field_schema(),
+                "prompt": {
                    "type": "string",
                    "description": "What to analyze or look for",
                },
@@ -98,8 +95,10 @@ class AnalyzeTool(BaseTool):
                    "description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
                },
            },
-            "required": ["files", "question"],
+            "required": ["files", "prompt"] + (["model"] if IS_AUTO_MODE else []),
        }
+        
+        return schema

    def get_system_prompt(self) -> str:
        return ANALYZE_PROMPT
@@ -116,8 +115,8 @@ class AnalyzeTool(BaseTool):
        request_model = self.get_request_model()
        request = request_model(**arguments)

-        # Check question size
-        size_check = self.check_prompt_size(request.question)
+        # Check prompt size
+        size_check = self.check_prompt_size(request.prompt)
        if size_check:
            return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]

@@ -129,9 +128,9 @@ class AnalyzeTool(BaseTool):
        # Check for prompt.txt in files
        prompt_content, updated_files = self.handle_prompt_file(request.files)

-        # If prompt.txt was found, use it as the question
+        # If prompt.txt was found, use it as the prompt
        if prompt_content:
-            request.question = prompt_content
+            request.prompt = prompt_content

        # Update request files list
        if updated_files is not None:
@@ -177,7 +176,7 @@ class AnalyzeTool(BaseTool):
 {focus_instruction}{websearch_instruction}

 === USER QUESTION ===
-{request.question}
+{request.prompt}
 === END QUESTION ===

 === FILES TO ANALYZE ===
@@ -188,12 +187,6 @@ Please analyze these files to answer the user's question."""

        return full_prompt

-    def format_response(self, response: str, request: AnalyzeRequest) -> str:
+    def format_response(self, response: str, request: AnalyzeRequest, model_info: Optional[dict] = None) -> str:
        """Format the analysis response"""
-        header = f"Analysis: {request.question[:50]}..."
-        if request.analysis_type:
-            header = f"{request.analysis_type.upper()} Analysis"
-
-        summary_text = f"Analyzed {len(request.files)} file(s)"
-
-        return f"{header}\n{summary_text}\n{'=' * 50}\n\n{response}\n\n---\n\n**Next Steps:** Consider if this analysis reveals areas needing deeper investigation, additional context, or specific implementation details."
+        return f"{response}\n\n---\n\n**Next Steps:** Use this analysis to actively continue your task. Investigate deeper into any findings, implement solutions based on these insights, and carry out the necessary work. Only pause to ask the user if you need their explicit approval for major changes or if critical decisions require their input."
--- a/tools/base.py
+++ b/tools/base.py
@@ -20,13 +20,12 @@ import re
 from abc import ABC, abstractmethod
 from typing import Any, Literal, Optional

-from google import genai
-from google.genai import types
 from mcp.types import TextContent
 from pydantic import BaseModel, Field

 from config import DEFAULT_MODEL, MAX_CONTEXT_TOKENS, MCP_PROMPT_SIZE_LIMIT
 from utils import check_token_limit
+from providers import ModelProviderRegistry, ModelProvider, ModelResponse
 from utils.conversation_memory import (
    MAX_CONVERSATION_TURNS,
    add_turn,
@@ -52,7 +51,7 @@ class ToolRequest(BaseModel):

    model: Optional[str] = Field(
        None,
-        description=f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
+        description="Model to use. See tool's input schema for available models and their capabilities.",
    )
    temperature: Optional[float] = Field(None, description="Temperature for response (tool-specific defaults)")
    # Thinking mode controls how much computational budget the model uses for reasoning
@@ -144,6 +143,38 @@ class BaseTool(ABC):
        """
        pass

+    def get_model_field_schema(self) -> dict[str, Any]:
+        """
+        Generate the model field schema based on auto mode configuration.
+        
+        When auto mode is enabled, the model parameter becomes required
+        and includes detailed descriptions of each model's capabilities.
+        
+        Returns:
+            Dict containing the model field JSON schema
+        """
+        from config import DEFAULT_MODEL, IS_AUTO_MODE, MODEL_CAPABILITIES_DESC
+        
+        if IS_AUTO_MODE:
+            # In auto mode, model is required and we provide detailed descriptions
+            model_desc_parts = ["Choose the best model for this task based on these capabilities:"]
+            for model, desc in MODEL_CAPABILITIES_DESC.items():
+                model_desc_parts.append(f"- '{model}': {desc}")
+            
+            return {
+                "type": "string",
+                "description": "\n".join(model_desc_parts),
+                "enum": list(MODEL_CAPABILITIES_DESC.keys()),
+            }
+        else:
+            # Normal mode - model is optional with default
+            available_models = list(MODEL_CAPABILITIES_DESC.keys())
+            models_str = ', '.join(f"'{m}'" for m in available_models)
+            return {
+                "type": "string", 
+                "description": f"Model to use. Available: {models_str}. Defaults to '{DEFAULT_MODEL}' if not specified.",
+            }
+    
    def get_default_temperature(self) -> float:
        """
        Return the default temperature setting for this tool.
@@ -293,6 +324,11 @@ class BaseTool(ABC):
        """
        if not request_files:
            return ""
+            
+        # If conversation history is already embedded, skip file processing
+        if hasattr(self, '_has_embedded_history') and self._has_embedded_history:
+            logger.debug(f"[FILES] {self.name}: Skipping file processing - conversation history already embedded")
+            return ""

        # Extract remaining budget from arguments if available
        if remaining_budget is None:
@@ -300,15 +336,59 @@ class BaseTool(ABC):
            args_to_use = arguments or getattr(self, "_current_arguments", {})
            remaining_budget = args_to_use.get("_remaining_tokens")

-        # Use remaining budget if provided, otherwise fall back to max_tokens or default
+        # Use remaining budget if provided, otherwise fall back to max_tokens or model-specific default
        if remaining_budget is not None:
            effective_max_tokens = remaining_budget - reserve_tokens
        elif max_tokens is not None:
            effective_max_tokens = max_tokens - reserve_tokens
        else:
-            from config import MAX_CONTENT_TOKENS
-
-            effective_max_tokens = MAX_CONTENT_TOKENS - reserve_tokens
+            # Get model-specific limits
+            # First check if model_context was passed from server.py
+            model_context = None
+            if arguments:
+                model_context = arguments.get("_model_context") or getattr(self, "_current_arguments", {}).get("_model_context")
+            
+            if model_context:
+                # Use the passed model context
+                try:
+                    token_allocation = model_context.calculate_token_allocation()
+                    effective_max_tokens = token_allocation.file_tokens - reserve_tokens
+                    logger.debug(f"[FILES] {self.name}: Using passed model context for {model_context.model_name}: "
+                                f"{token_allocation.file_tokens:,} file tokens from {token_allocation.total_tokens:,} total")
+                except Exception as e:
+                    logger.warning(f"[FILES] {self.name}: Error using passed model context: {e}")
+                    # Fall through to manual calculation
+                    model_context = None
+            
+            if not model_context:
+                # Manual calculation as fallback
+                model_name = getattr(self, "_current_model_name", None) or DEFAULT_MODEL
+                try:
+                    provider = self.get_model_provider(model_name)
+                    capabilities = provider.get_capabilities(model_name)
+                    
+                    # Calculate content allocation based on model capacity
+                    if capabilities.max_tokens < 300_000:
+                        # Smaller context models: 60% content, 40% response
+                        model_content_tokens = int(capabilities.max_tokens * 0.6)
+                    else:
+                        # Larger context models: 80% content, 20% response  
+                        model_content_tokens = int(capabilities.max_tokens * 0.8)
+                    
+                    effective_max_tokens = model_content_tokens - reserve_tokens
+                    logger.debug(f"[FILES] {self.name}: Using model-specific limit for {model_name}: "
+                                f"{model_content_tokens:,} content tokens from {capabilities.max_tokens:,} total")
+                except (ValueError, AttributeError) as e:
+                    # Handle specific errors: provider not found, model not supported, missing attributes
+                    logger.warning(f"[FILES] {self.name}: Could not get model capabilities for {model_name}: {type(e).__name__}: {e}")
+                    # Fall back to conservative default for safety
+                    from config import MAX_CONTENT_TOKENS
+                    effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens
+                except Exception as e:
+                    # Catch any other unexpected errors
+                    logger.error(f"[FILES] {self.name}: Unexpected error getting model capabilities: {type(e).__name__}: {e}")
+                    from config import MAX_CONTENT_TOKENS
+                    effective_max_tokens = min(MAX_CONTENT_TOKENS, 100_000) - reserve_tokens

        # Ensure we have a reasonable minimum budget
        effective_max_tokens = max(1000, effective_max_tokens)
@@ -601,34 +681,59 @@ If any of these would strengthen your analysis, specify what Claude should searc
                )
                return [TextContent(type="text", text=error_output.model_dump_json())]

-            # Prepare the full prompt by combining system prompt with user request
-            # This is delegated to the tool implementation for customization
-            prompt = await self.prepare_prompt(request)
-
-            # Add follow-up instructions for new conversations (not threaded)
+            # Check if we have continuation_id - if so, conversation history is already embedded
            continuation_id = getattr(request, "continuation_id", None)
-            if not continuation_id:
-                # Import here to avoid circular imports
+            
+            if continuation_id:
+                # When continuation_id is present, server.py has already injected the
+                # conversation history into the appropriate field. We need to check if
+                # the prompt already contains conversation history marker.
+                logger.debug(f"Continuing {self.name} conversation with thread {continuation_id}")
+                
+                # Store the original arguments to detect enhanced prompts
+                self._has_embedded_history = False
+                
+                # Check if conversation history is already embedded in the prompt field
+                field_value = getattr(request, "prompt", "")
+                field_name = "prompt"
+                
+                if "=== CONVERSATION HISTORY ===" in field_value:
+                    # Conversation history is already embedded, use it directly
+                    prompt = field_value
+                    self._has_embedded_history = True
+                    logger.debug(f"{self.name}: Using pre-embedded conversation history from {field_name}")
+                else:
+                    # No embedded history, prepare prompt normally
+                    prompt = await self.prepare_prompt(request)
+                    logger.debug(f"{self.name}: No embedded history found, prepared prompt normally")
+            else:
+                # New conversation, prepare prompt normally
+                prompt = await self.prepare_prompt(request)
+                
+                # Add follow-up instructions for new conversations
                from server import get_follow_up_instructions
-
                follow_up_instructions = get_follow_up_instructions(0)  # New conversation, turn 0
                prompt = f"{prompt}\n\n{follow_up_instructions}"
-
                logger.debug(f"Added follow-up instructions for new {self.name} conversation")

-                # Also log to file for debugging MCP issues
-                try:
-                    with open("/tmp/gemini_debug.log", "a") as f:
-                        f.write(f"[{self.name}] Added follow-up instructions for new conversation\n")
-                except Exception:
-                    pass
-            else:
-                logger.debug(f"Continuing {self.name} conversation with thread {continuation_id}")
-                # History reconstruction is handled by server.py:reconstruct_thread_context
-                # No need to rebuild it here - prompt already contains conversation history
-
            # Extract model configuration from request or use defaults
-            model_name = getattr(request, "model", None) or DEFAULT_MODEL
+            model_name = getattr(request, "model", None)
+            if not model_name:
+                model_name = DEFAULT_MODEL
+            
+            # In auto mode, model parameter is required
+            from config import IS_AUTO_MODE
+            if IS_AUTO_MODE and model_name.lower() == "auto":
+                error_output = ToolOutput(
+                    status="error",
+                    content="Model parameter is required. Please specify which model to use for this task.",
+                    content_type="text",
+                )
+                return [TextContent(type="text", text=error_output.model_dump_json())]
+            
+            # Store model name for use by helper methods like _prepare_file_content_for_prompt
+            self._current_model_name = model_name
+            
            temperature = getattr(request, "temperature", None)
            if temperature is None:
                temperature = self.get_default_temperature()
@@ -636,28 +741,45 @@ If any of these would strengthen your analysis, specify what Claude should searc
            if thinking_mode is None:
                thinking_mode = self.get_default_thinking_mode()

-            # Create model instance with appropriate configuration
-            # This handles both regular models and thinking-enabled models
-            model = self.create_model(model_name, temperature, thinking_mode)
+            # Get the appropriate model provider
+            provider = self.get_model_provider(model_name)
+            
+            # Get system prompt for this tool
+            system_prompt = self.get_system_prompt()

-            # Generate AI response using the configured model
-            logger.info(f"Sending request to Gemini API for {self.name}")
+            # Generate AI response using the provider
+            logger.info(f"Sending request to {provider.get_provider_type().value} API for {self.name}")
            logger.debug(f"Prompt length: {len(prompt)} characters")
-            response = model.generate_content(prompt)
-            logger.info(f"Received response from Gemini API for {self.name}")
+            
+            # Generate content with provider abstraction
+            model_response = provider.generate_content(
+                prompt=prompt,
+                model_name=model_name,
+                system_prompt=system_prompt,
+                temperature=temperature,
+                thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None
+            )
+            
+            logger.info(f"Received response from {provider.get_provider_type().value} API for {self.name}")

            # Process the model's response
-            if response.candidates and response.candidates[0].content.parts:
-                raw_text = response.candidates[0].content.parts[0].text
+            if model_response.content:
+                raw_text = model_response.content

                # Parse response to check for clarification requests or format output
-                tool_output = self._parse_response(raw_text, request)
+                # Pass model info for conversation tracking
+                model_info = {
+                    "provider": provider,
+                    "model_name": model_name,
+                    "model_response": model_response
+                }
+                tool_output = self._parse_response(raw_text, request, model_info)
                logger.info(f"Successfully completed {self.name} tool execution")

            else:
                # Handle cases where the model couldn't generate a response
                # This might happen due to safety filters or other constraints
-                finish_reason = response.candidates[0].finish_reason if response.candidates else "Unknown"
+                finish_reason = model_response.metadata.get("finish_reason", "Unknown")
                logger.warning(f"Response blocked or incomplete for {self.name}. Finish reason: {finish_reason}")
                tool_output = ToolOutput(
                    status="error",
@@ -678,13 +800,24 @@ If any of these would strengthen your analysis, specify what Claude should searc
            if "500 INTERNAL" in error_msg and "Please retry" in error_msg:
                logger.warning(f"500 INTERNAL error in {self.name} - attempting retry")
                try:
-                    # Single retry attempt
-                    model = self._get_model_wrapper(request)
-                    raw_response = await model.generate_content(prompt)
-                    response = raw_response.text
-
-                    # If successful, process normally
-                    return [TextContent(type="text", text=self._process_response(response, request).model_dump_json())]
+                    # Single retry attempt using provider
+                    retry_response = provider.generate_content(
+                        prompt=prompt,
+                        model_name=model_name,
+                        system_prompt=system_prompt,
+                        temperature=temperature,
+                        thinking_mode=thinking_mode if provider.supports_thinking_mode(model_name) else None
+                    )
+                    
+                    if retry_response.content:
+                        # If successful, process normally
+                        retry_model_info = {
+                            "provider": provider,
+                            "model_name": model_name,
+                            "model_response": retry_response
+                        }
+                        tool_output = self._parse_response(retry_response.content, request, retry_model_info)
+                        return [TextContent(type="text", text=tool_output.model_dump_json())]

                except Exception as retry_e:
                    logger.error(f"Retry failed for {self.name} tool: {str(retry_e)}")
@@ -699,7 +832,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
            )
            return [TextContent(type="text", text=error_output.model_dump_json())]

-    def _parse_response(self, raw_text: str, request) -> ToolOutput:
+    def _parse_response(self, raw_text: str, request, model_info: Optional[dict] = None) -> ToolOutput:
        """
        Parse the raw response and determine if it's a clarification request or follow-up.

@@ -745,11 +878,11 @@ If any of these would strengthen your analysis, specify what Claude should searc
            pass

        # Normal text response - format using tool-specific formatting
-        formatted_content = self.format_response(raw_text, request)
+        formatted_content = self.format_response(raw_text, request, model_info)

        # If we found a follow-up question, prepare the threading response
        if follow_up_question:
-            return self._create_follow_up_response(formatted_content, follow_up_question, request)
+            return self._create_follow_up_response(formatted_content, follow_up_question, request, model_info)

        # Check if we should offer Claude a continuation opportunity
        continuation_offer = self._check_continuation_opportunity(request)
@@ -758,7 +891,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
            logger.debug(
                f"Creating continuation offer for {self.name} with {continuation_offer['remaining_turns']} turns remaining"
            )
-            return self._create_continuation_offer_response(formatted_content, continuation_offer, request)
+            return self._create_continuation_offer_response(formatted_content, continuation_offer, request, model_info)
        else:
            logger.debug(f"No continuation offer created for {self.name}")

@@ -766,12 +899,32 @@ If any of these would strengthen your analysis, specify what Claude should searc
        continuation_id = getattr(request, "continuation_id", None)
        if continuation_id:
            request_files = getattr(request, "files", []) or []
+            # Extract model metadata for conversation tracking
+            model_provider = None
+            model_name = None
+            model_metadata = None
+            
+            if model_info:
+                provider = model_info.get("provider")
+                if provider:
+                    model_provider = provider.get_provider_type().value
+                model_name = model_info.get("model_name")
+                model_response = model_info.get("model_response")
+                if model_response:
+                    model_metadata = {
+                        "usage": model_response.usage,
+                        "metadata": model_response.metadata
+                    }
+            
            success = add_turn(
                continuation_id,
                "assistant",
                formatted_content,
                files=request_files,
                tool_name=self.name,
+                model_provider=model_provider,
+                model_name=model_name,
+                model_metadata=model_metadata,
            )
            if not success:
                logging.warning(f"Failed to add turn to thread {continuation_id} for {self.name}")
@@ -820,7 +973,7 @@ If any of these would strengthen your analysis, specify what Claude should searc

        return None

-    def _create_follow_up_response(self, content: str, follow_up_data: dict, request) -> ToolOutput:
+    def _create_follow_up_response(self, content: str, follow_up_data: dict, request, model_info: Optional[dict] = None) -> ToolOutput:
        """
        Create a response with follow-up question for conversation threading.

@@ -832,56 +985,57 @@ If any of these would strengthen your analysis, specify what Claude should searc
        Returns:
            ToolOutput configured for conversation continuation
        """
-        # Create or get thread ID
+        # Always create a new thread (with parent linkage if continuation)
        continuation_id = getattr(request, "continuation_id", None)
+        request_files = getattr(request, "files", []) or []
+        
+        try:
+            # Create new thread with parent linkage if continuing
+            thread_id = create_thread(
+                tool_name=self.name, 
+                initial_request=request.model_dump() if hasattr(request, "model_dump") else {},
+                parent_thread_id=continuation_id  # Link to parent thread if continuing
+            )

-        if continuation_id:
-            # This is a continuation - add this turn to existing thread
-            request_files = getattr(request, "files", []) or []
-            success = add_turn(
-                continuation_id,
+            # Add the assistant's response with follow-up
+            # Extract model metadata
+            model_provider = None
+            model_name = None
+            model_metadata = None
+            
+            if model_info:
+                provider = model_info.get("provider")
+                if provider:
+                    model_provider = provider.get_provider_type().value
+                model_name = model_info.get("model_name")
+                model_response = model_info.get("model_response")
+                if model_response:
+                    model_metadata = {
+                        "usage": model_response.usage,
+                        "metadata": model_response.metadata
+                    }
+            
+            add_turn(
+                thread_id,  # Add to the new thread
                "assistant",
                content,
                follow_up_question=follow_up_data.get("follow_up_question"),
                files=request_files,
                tool_name=self.name,
+                model_provider=model_provider,
+                model_name=model_name,
+                model_metadata=model_metadata,
+            )
+        except Exception as e:
+            # Threading failed, return normal response
+            logger = logging.getLogger(f"tools.{self.name}")
+            logger.warning(f"Follow-up threading failed in {self.name}: {str(e)}")
+            return ToolOutput(
+                status="success",
+                content=content,
+                content_type="markdown",
+                metadata={"tool_name": self.name, "follow_up_error": str(e)},
            )
-            if not success:
-                # Thread not found or at limit, return normal response
-                return ToolOutput(
-                    status="success",
-                    content=content,
-                    content_type="markdown",
-                    metadata={"tool_name": self.name},
-                )
-            thread_id = continuation_id
-        else:
-            # Create new thread
-            try:
-                thread_id = create_thread(
-                    tool_name=self.name, initial_request=request.model_dump() if hasattr(request, "model_dump") else {}
-                )
-
-                # Add the assistant's response with follow-up
-                request_files = getattr(request, "files", []) or []
-                add_turn(
-                    thread_id,
-                    "assistant",
-                    content,
-                    follow_up_question=follow_up_data.get("follow_up_question"),
-                    files=request_files,
-                    tool_name=self.name,
-                )
-            except Exception as e:
-                # Threading failed, return normal response
-                logger = logging.getLogger(f"tools.{self.name}")
-                logger.warning(f"Follow-up threading failed in {self.name}: {str(e)}")
-                return ToolOutput(
-                    status="success",
-                    content=content,
-                    content_type="markdown",
-                    metadata={"tool_name": self.name, "follow_up_error": str(e)},
-                )

        # Create follow-up request
        follow_up_request = FollowUpRequest(
@@ -925,13 +1079,14 @@ If any of these would strengthen your analysis, specify what Claude should searc

        try:
            if continuation_id:
-                # Check remaining turns in existing thread
-                from utils.conversation_memory import get_thread
+                # Check remaining turns in thread chain
+                from utils.conversation_memory import get_thread_chain

-                context = get_thread(continuation_id)
-                if context:
-                    current_turns = len(context.turns)
-                    remaining_turns = MAX_CONVERSATION_TURNS - current_turns - 1  # -1 for this response
+                chain = get_thread_chain(continuation_id)
+                if chain:
+                    # Count total turns across all threads in chain
+                    total_turns = sum(len(thread.turns) for thread in chain)
+                    remaining_turns = MAX_CONVERSATION_TURNS - total_turns - 1  # -1 for this response
                else:
                    # Thread not found, don't offer continuation
                    return None
@@ -949,7 +1104,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
            # If anything fails, don't offer continuation
            return None

-    def _create_continuation_offer_response(self, content: str, continuation_data: dict, request) -> ToolOutput:
+    def _create_continuation_offer_response(self, content: str, continuation_data: dict, request, model_info: Optional[dict] = None) -> ToolOutput:
        """
        Create a response offering Claude the opportunity to continue conversation.

@@ -962,14 +1117,43 @@ If any of these would strengthen your analysis, specify what Claude should searc
            ToolOutput configured with continuation offer
        """
        try:
-            # Create new thread for potential continuation
+            # Create new thread for potential continuation (with parent link if continuing)
+            continuation_id = getattr(request, "continuation_id", None)
            thread_id = create_thread(
-                tool_name=self.name, initial_request=request.model_dump() if hasattr(request, "model_dump") else {}
+                tool_name=self.name, 
+                initial_request=request.model_dump() if hasattr(request, "model_dump") else {},
+                parent_thread_id=continuation_id  # Link to parent if this is a continuation
            )

            # Add this response as the first turn (assistant turn)
            request_files = getattr(request, "files", []) or []
-            add_turn(thread_id, "assistant", content, files=request_files, tool_name=self.name)
+            # Extract model metadata
+            model_provider = None
+            model_name = None
+            model_metadata = None
+            
+            if model_info:
+                provider = model_info.get("provider")
+                if provider:
+                    model_provider = provider.get_provider_type().value
+                model_name = model_info.get("model_name")
+                model_response = model_info.get("model_response")
+                if model_response:
+                    model_metadata = {
+                        "usage": model_response.usage,
+                        "metadata": model_response.metadata
+                    }
+            
+            add_turn(
+                thread_id, 
+                "assistant", 
+                content, 
+                files=request_files, 
+                tool_name=self.name,
+                model_provider=model_provider,
+                model_name=model_name,
+                model_metadata=model_metadata,
+            )

            # Create continuation offer
            remaining_turns = continuation_data["remaining_turns"]
@@ -1022,7 +1206,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
        """
        pass

-    def format_response(self, response: str, request) -> str:
+    def format_response(self, response: str, request, model_info: Optional[dict] = None) -> str:
        """
        Format the model's response for display.

@@ -1033,6 +1217,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
        Args:
            response: The raw response from the model
            request: The original request for context
+            model_info: Optional dict with model metadata (provider, model_name, model_response)

        Returns:
            str: Formatted response
@@ -1059,154 +1244,41 @@ If any of these would strengthen your analysis, specify what Claude should searc
                f"{context_type} too large (~{estimated_tokens:,} tokens). Maximum is {MAX_CONTEXT_TOKENS:,} tokens."
            )

-    def create_model(self, model_name: str, temperature: float, thinking_mode: str = "medium"):
+    def get_model_provider(self, model_name: str) -> ModelProvider:
        """
-        Create a configured Gemini model instance.
-
-        This method handles model creation with appropriate settings including
-        temperature and thinking budget configuration for models that support it.
+        Get a model provider for the specified model.

        Args:
-            model_name: Name of the Gemini model to use (or shorthand like 'flash', 'pro')
-            temperature: Temperature setting for response generation
-            thinking_mode: Thinking depth mode (affects computational budget)
+            model_name: Name of the model to use (can be provider-specific or generic)

        Returns:
-            Model instance configured and ready for generation
+            ModelProvider instance configured for the model
+
+        Raises:
+            ValueError: If no provider supports the requested model
        """
-        # Define model shorthands for user convenience
-        model_shorthands = {
-            "pro": "gemini-2.5-pro-preview-06-05",
-            "flash": "gemini-2.0-flash-exp",
-        }
-
-        # Resolve shorthand to full model name
-        resolved_model_name = model_shorthands.get(model_name.lower(), model_name)
-
-        # Map thinking modes to computational budget values
-        # Higher budgets allow for more complex reasoning but increase latency
-        thinking_budgets = {
-            "minimal": 128,  # Minimum for 2.5 Pro - fast responses
-            "low": 2048,  # Light reasoning tasks
-            "medium": 8192,  # Balanced reasoning (default)
-            "high": 16384,  # Complex analysis
-            "max": 32768,  # Maximum reasoning depth
-        }
-
-        thinking_budget = thinking_budgets.get(thinking_mode, 8192)
-
-        # Gemini 2.5 models support thinking configuration for enhanced reasoning
-        # Skip special handling in test environment to allow mocking
-        if "2.5" in resolved_model_name and not os.environ.get("PYTEST_CURRENT_TEST"):
-            try:
-                # Retrieve API key for Gemini client creation
-                api_key = os.environ.get("GEMINI_API_KEY")
-                if not api_key:
-                    raise ValueError("GEMINI_API_KEY environment variable is required")
-
-                client = genai.Client(api_key=api_key)
-
-                # Create a wrapper class to provide a consistent interface
-                # This abstracts the differences between API versions
-                class ModelWrapper:
-                    def __init__(self, client, model_name, temperature, thinking_budget):
-                        self.client = client
-                        self.model_name = model_name
-                        self.temperature = temperature
-                        self.thinking_budget = thinking_budget
-
-                    def generate_content(self, prompt):
-                        response = self.client.models.generate_content(
-                            model=self.model_name,
-                            contents=prompt,
-                            config=types.GenerateContentConfig(
-                                temperature=self.temperature,
-                                candidate_count=1,
-                                thinking_config=types.ThinkingConfig(thinking_budget=self.thinking_budget),
-                            ),
-                        )
-
-                        # Wrap the response to match the expected format
-                        # This ensures compatibility across different API versions
-                        class ResponseWrapper:
-                            def __init__(self, text):
-                                self.text = text
-                                self.candidates = [
-                                    type(
-                                        "obj",
-                                        (object,),
-                                        {
-                                            "content": type(
-                                                "obj",
-                                                (object,),
-                                                {
-                                                    "parts": [
-                                                        type(
-                                                            "obj",
-                                                            (object,),
-                                                            {"text": text},
-                                                        )
-                                                    ]
-                                                },
-                                            )(),
-                                            "finish_reason": "STOP",
-                                        },
-                                    )
-                                ]
-
-                        return ResponseWrapper(response.text)
-
-                return ModelWrapper(client, resolved_model_name, temperature, thinking_budget)
-
-            except Exception:
-                # Fall back to regular API if thinking configuration fails
-                # This ensures the tool remains functional even with API changes
-                pass
-
-        # For models that don't support thinking configuration, use standard API
-        api_key = os.environ.get("GEMINI_API_KEY")
-        if not api_key:
-            raise ValueError("GEMINI_API_KEY environment variable is required")
-
-        client = genai.Client(api_key=api_key)
-
-        # Create a simple wrapper for models without thinking configuration
-        # This provides the same interface as the thinking-enabled wrapper
-        class SimpleModelWrapper:
-            def __init__(self, client, model_name, temperature):
-                self.client = client
-                self.model_name = model_name
-                self.temperature = temperature
-
-            def generate_content(self, prompt):
-                response = self.client.models.generate_content(
-                    model=self.model_name,
-                    contents=prompt,
-                    config=types.GenerateContentConfig(
-                        temperature=self.temperature,
-                        candidate_count=1,
-                    ),
-                )
-
-                # Convert to match expected format
-                class ResponseWrapper:
-                    def __init__(self, text):
-                        self.text = text
-                        self.candidates = [
-                            type(
-                                "obj",
-                                (object,),
-                                {
-                                    "content": type(
-                                        "obj",
-                                        (object,),
-                                        {"parts": [type("obj", (object,), {"text": text})]},
-                                    )(),
-                                    "finish_reason": "STOP",
-                                },
-                            )
-                        ]
-
-                return ResponseWrapper(response.text)
-
-        return SimpleModelWrapper(client, resolved_model_name, temperature)
+        # Get provider from registry
+        provider = ModelProviderRegistry.get_provider_for_model(model_name)
+        
+        if not provider:
+            # Try to determine provider from model name patterns
+            if "gemini" in model_name.lower() or model_name.lower() in ["flash", "pro"]:
+                # Register Gemini provider if not already registered
+                from providers.gemini import GeminiModelProvider
+                from providers.base import ProviderType
+                ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
+                provider = ModelProviderRegistry.get_provider(ProviderType.GOOGLE)
+            elif "gpt" in model_name.lower() or "o3" in model_name.lower():
+                # Register OpenAI provider if not already registered
+                from providers.openai import OpenAIModelProvider
+                from providers.base import ProviderType
+                ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
+                provider = ModelProviderRegistry.get_provider(ProviderType.OPENAI)
+        
+        if not provider:
+            raise ValueError(
+                f"No provider found for model '{model_name}'. "
+                f"Ensure the appropriate API key is set and the model name is correct."
+            )
+        
+        return provider
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -19,7 +19,7 @@ class ChatRequest(ToolRequest):

    prompt: str = Field(
        ...,
-        description="Your question, topic, or current thinking to discuss with Gemini",
+        description="Your question, topic, or current thinking to discuss",
    )
    files: Optional[list[str]] = Field(
        default_factory=list,
@@ -35,33 +35,30 @@ class ChatTool(BaseTool):

    def get_description(self) -> str:
        return (
-            "GENERAL CHAT & COLLABORATIVE THINKING - Use Gemini as your thinking partner! "
+            "GENERAL CHAT & COLLABORATIVE THINKING - Use the AI model as your thinking partner! "
            "Perfect for: bouncing ideas during your own analysis, getting second opinions on your plans, "
            "collaborative brainstorming, validating your checklists and approaches, exploring alternatives. "
            "Also great for: explanations, comparisons, general development questions. "
-            "Use this when you want to ask Gemini questions, brainstorm ideas, get opinions, discuss topics, "
+            "Use this when you want to ask questions, brainstorm ideas, get opinions, discuss topics, "
            "share your thinking, or need explanations about concepts and approaches."
        )

    def get_input_schema(self) -> dict[str, Any]:
-        from config import DEFAULT_MODEL
+        from config import IS_AUTO_MODE

-        return {
+        schema = {
            "type": "object",
            "properties": {
                "prompt": {
                    "type": "string",
-                    "description": "Your question, topic, or current thinking to discuss with Gemini",
+                    "description": "Your question, topic, or current thinking to discuss",
                },
                "files": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Optional files for context (must be absolute paths)",
                },
-                "model": {
-                    "type": "string",
-                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
-                },
+                "model": self.get_model_field_schema(),
                "temperature": {
                    "type": "number",
                    "description": "Response creativity (0-1, default 0.5)",
@@ -83,8 +80,10 @@ class ChatTool(BaseTool):
                    "description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
                },
            },
-            "required": ["prompt"],
+            "required": ["prompt"] + (["model"] if IS_AUTO_MODE else []),
        }
+        
+        return schema

    def get_system_prompt(self) -> str:
        return CHAT_PROMPT
@@ -153,6 +152,6 @@ Please provide a thoughtful, comprehensive response:"""

        return full_prompt

-    def format_response(self, response: str, request: ChatRequest) -> str:
-        """Format the chat response with actionable guidance"""
+    def format_response(self, response: str, request: ChatRequest, model_info: Optional[dict] = None) -> str:
+        """Format the chat response"""
        return f"{response}\n\n---\n\n**Claude's Turn:** Evaluate this perspective alongside your analysis to form a comprehensive solution and continue with the user's request and task at hand."
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -39,12 +39,12 @@ class CodeReviewRequest(ToolRequest):
        ...,
        description="Code files or directories to review (must be absolute paths)",
    )
-    context: str = Field(
+    prompt: str = Field(
        ...,
        description="User's summary of what the code does, expected behavior, constraints, and review objectives",
    )
    review_type: str = Field("full", description="Type of review: full|security|performance|quick")
-    focus_on: Optional[str] = Field(None, description="Specific aspects to focus on during review")
+    focus_on: Optional[str] = Field(None, description="Specific aspects to focus on, or additional context that would help understand areas of concern")
    standards: Optional[str] = Field(None, description="Coding standards or guidelines to enforce")
    severity_filter: str = Field(
        "all",
@@ -79,9 +79,9 @@ class CodeReviewTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
-        from config import DEFAULT_MODEL
+        from config import IS_AUTO_MODE

-        return {
+        schema = {
            "type": "object",
            "properties": {
                "files": {
@@ -89,11 +89,8 @@ class CodeReviewTool(BaseTool):
                    "items": {"type": "string"},
                    "description": "Code files or directories to review (must be absolute paths)",
                },
-                "model": {
-                    "type": "string",
-                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
-                },
-                "context": {
+                "model": self.get_model_field_schema(),
+                "prompt": {
                    "type": "string",
                    "description": "User's summary of what the code does, expected behavior, constraints, and review objectives",
                },
@@ -105,7 +102,7 @@ class CodeReviewTool(BaseTool):
                },
                "focus_on": {
                    "type": "string",
-                    "description": "Specific aspects to focus on",
+                    "description": "Specific aspects to focus on, or additional context that would help understand areas of concern",
                },
                "standards": {
                    "type": "string",
@@ -138,8 +135,10 @@ class CodeReviewTool(BaseTool):
                    "description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
                },
            },
-            "required": ["files", "context"],
+            "required": ["files", "prompt"] + (["model"] if IS_AUTO_MODE else []),
        }
+        
+        return schema

    def get_system_prompt(self) -> str:
        return CODEREVIEW_PROMPT
@@ -184,9 +183,9 @@ class CodeReviewTool(BaseTool):
        # Check for prompt.txt in files
        prompt_content, updated_files = self.handle_prompt_file(request.files)

-        # If prompt.txt was found, use it as focus_on
+        # If prompt.txt was found, incorporate it into the prompt
        if prompt_content:
-            request.focus_on = prompt_content
+            request.prompt = prompt_content + "\n\n" + request.prompt

        # Update request files list
        if updated_files is not None:
@@ -234,7 +233,7 @@ class CodeReviewTool(BaseTool):
        full_prompt = f"""{self.get_system_prompt()}{websearch_instruction}

 === USER CONTEXT ===
-{request.context}
+{request.prompt}
 === END CONTEXT ===

 {focus_instruction}
@@ -247,27 +246,19 @@ Please provide a code review aligned with the user's context and expectations, f

        return full_prompt

-    def format_response(self, response: str, request: CodeReviewRequest) -> str:
+    def format_response(self, response: str, request: CodeReviewRequest, model_info: Optional[dict] = None) -> str:
        """
-        Format the review response with appropriate headers.
-
-        Adds context about the review type and focus area to help
-        users understand the scope of the review.
+        Format the review response.

        Args:
            response: The raw review from the model
            request: The original request for context
+            model_info: Optional dict with model metadata

        Returns:
-            str: Formatted response with headers
+            str: Formatted response with next steps
        """
-        header = f"Code Review ({request.review_type.upper()})"
-        if request.focus_on:
-            header += f" - Focus: {request.focus_on}"
-        return f"""{header}
-{"=" * 50}
-
-{response}
+        return f"""{response}

 ---

--- a/tools/debug.py
+++ b/tools/debug.py
@@ -17,7 +17,7 @@ from .models import ToolOutput
 class DebugIssueRequest(ToolRequest):
    """Request model for debug tool"""

-    error_description: str = Field(..., description="Error message, symptoms, or issue description")
+    prompt: str = Field(..., description="Error message, symptoms, or issue description")
    error_context: Optional[str] = Field(None, description="Stack trace, logs, or additional error context")
    files: Optional[list[str]] = Field(
        None,
@@ -38,7 +38,7 @@ class DebugIssueTool(BaseTool):
            "DEBUG & ROOT CAUSE ANALYSIS - Expert debugging for complex issues with 1M token capacity. "
            "Use this when you need to debug code, find out why something is failing, identify root causes, "
            "trace errors, or diagnose issues. "
-            "IMPORTANT: Share diagnostic files liberally! Gemini can handle up to 1M tokens, so include: "
+            "IMPORTANT: Share diagnostic files liberally! The model can handle up to 1M tokens, so include: "
            "large log files, full stack traces, memory dumps, diagnostic outputs, multiple related files, "
            "entire modules, test results, configuration files - anything that might help debug the issue. "
            "Claude should proactively use this tool whenever debugging is needed and share comprehensive "
@@ -50,19 +50,16 @@ class DebugIssueTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
-        from config import DEFAULT_MODEL
+        from config import IS_AUTO_MODE

-        return {
+        schema = {
            "type": "object",
            "properties": {
-                "error_description": {
+                "prompt": {
                    "type": "string",
                    "description": "Error message, symptoms, or issue description",
                },
-                "model": {
-                    "type": "string",
-                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
-                },
+                "model": self.get_model_field_schema(),
                "error_context": {
                    "type": "string",
                    "description": "Stack trace, logs, or additional error context",
@@ -101,8 +98,10 @@ class DebugIssueTool(BaseTool):
                    "description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
                },
            },
-            "required": ["error_description"],
+            "required": ["prompt"] + (["model"] if IS_AUTO_MODE else []),
        }
+        
+        return schema

    def get_system_prompt(self) -> str:
        return DEBUG_ISSUE_PROMPT
@@ -119,8 +118,8 @@ class DebugIssueTool(BaseTool):
        request_model = self.get_request_model()
        request = request_model(**arguments)

-        # Check error_description size
-        size_check = self.check_prompt_size(request.error_description)
+        # Check prompt size
+        size_check = self.check_prompt_size(request.prompt)
        if size_check:
            return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]

@@ -138,11 +137,10 @@ class DebugIssueTool(BaseTool):
        # Check for prompt.txt in files
        prompt_content, updated_files = self.handle_prompt_file(request.files)

-        # If prompt.txt was found, use it as error_description or error_context
-        # Priority: if error_description is empty, use it there, otherwise use as error_context
+        # If prompt.txt was found, use it as prompt or error_context
        if prompt_content:
-            if not request.error_description or request.error_description == "":
-                request.error_description = prompt_content
+            if not request.prompt or request.prompt == "":
+                request.prompt = prompt_content
            else:
                request.error_context = prompt_content

@@ -151,7 +149,7 @@ class DebugIssueTool(BaseTool):
            request.files = updated_files

        # Build context sections
-        context_parts = [f"=== ISSUE DESCRIPTION ===\n{request.error_description}\n=== END DESCRIPTION ==="]
+        context_parts = [f"=== ISSUE DESCRIPTION ===\n{request.prompt}\n=== END DESCRIPTION ==="]

        if request.error_context:
            context_parts.append(f"\n=== ERROR CONTEXT/STACK TRACE ===\n{request.error_context}\n=== END CONTEXT ===")
@@ -197,11 +195,15 @@ Focus on finding the root cause and providing actionable solutions."""

        return full_prompt

-    def format_response(self, response: str, request: DebugIssueRequest) -> str:
+    def format_response(self, response: str, request: DebugIssueRequest, model_info: Optional[dict] = None) -> str:
        """Format the debugging response"""
-        return (
-            f"Debug Analysis\n{'=' * 50}\n\n{response}\n\n---\n\n"
-            "**Next Steps:** Evaluate Gemini's recommendations, synthesize the best fix considering potential "
-            "regressions, and if the root cause has been clearly identified, proceed with implementing the "
-            "potential fixes."
-        )
+        # Get the friendly model name
+        model_name = "the model"
+        if model_info and model_info.get("model_response"):
+            model_name = model_info["model_response"].friendly_name or "the model"
+        
+        return f"""{response}
+
+---
+
+**Next Steps:** Evaluate {model_name}'s recommendations, synthesize the best fix considering potential regressions, and if the root cause has been clearly identified, proceed with implementing the potential fixes."""
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -31,7 +31,7 @@ class PrecommitRequest(ToolRequest):
        ...,
        description="Starting directory to search for git repositories (must be absolute path).",
    )
-    original_request: Optional[str] = Field(
+    prompt: Optional[str] = Field(
        None,
        description="The original user request description for the changes. Provides critical context for the review.",
    )
@@ -98,15 +98,17 @@ class Precommit(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
-        from config import DEFAULT_MODEL
+        from config import IS_AUTO_MODE

        schema = self.get_request_model().model_json_schema()
        # Ensure model parameter has enhanced description
        if "properties" in schema and "model" in schema["properties"]:
-            schema["properties"]["model"] = {
-                "type": "string",
-                "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
-            }
+            schema["properties"]["model"] = self.get_model_field_schema()
+        
+        # In auto mode, model is required
+        if IS_AUTO_MODE and "required" in schema:
+            if "model" not in schema["required"]:
+                schema["required"].append("model")
        # Ensure use_websearch is in the schema with proper description
        if "properties" in schema and "use_websearch" not in schema["properties"]:
            schema["properties"]["use_websearch"] = {
@@ -140,9 +142,9 @@ class Precommit(BaseTool):
        request_model = self.get_request_model()
        request = request_model(**arguments)

-        # Check original_request size if provided
-        if request.original_request:
-            size_check = self.check_prompt_size(request.original_request)
+        # Check prompt size if provided
+        if request.prompt:
+            size_check = self.check_prompt_size(request.prompt)
            if size_check:
                return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]

@@ -154,9 +156,9 @@ class Precommit(BaseTool):
        # Check for prompt.txt in files
        prompt_content, updated_files = self.handle_prompt_file(request.files)

-        # If prompt.txt was found, use it as original_request
+        # If prompt.txt was found, use it as prompt
        if prompt_content:
-            request.original_request = prompt_content
+            request.prompt = prompt_content

        # Update request files list
        if updated_files is not None:
@@ -338,8 +340,8 @@ class Precommit(BaseTool):
        prompt_parts = []

        # Add original request context if provided
-        if request.original_request:
-            prompt_parts.append(f"## Original Request\n\n{request.original_request}\n")
+        if request.prompt:
+            prompt_parts.append(f"## Original Request\n\n{request.prompt}\n")

        # Add review parameters
        prompt_parts.append("## Review Parameters\n")
@@ -443,6 +445,6 @@ class Precommit(BaseTool):

        return full_prompt

-    def format_response(self, response: str, request: PrecommitRequest) -> str:
+    def format_response(self, response: str, request: PrecommitRequest, model_info: Optional[dict] = None) -> str:
        """Format the response with commit guidance"""
        return f"{response}\n\n---\n\n**Commit Status:** If no critical issues found, changes are ready for commit. Otherwise, address issues first and re-run review. Check with user before proceeding with any commit."
--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -17,7 +17,7 @@ from .models import ToolOutput
 class ThinkDeepRequest(ToolRequest):
    """Request model for thinkdeep tool"""

-    current_analysis: str = Field(..., description="Claude's current thinking/analysis to extend")
+    prompt: str = Field(..., description="Your current thinking/analysis to extend and validate")
    problem_context: Optional[str] = Field(None, description="Additional context about the problem or goal")
    focus_areas: Optional[list[str]] = Field(
        None,
@@ -48,19 +48,16 @@ class ThinkDeepTool(BaseTool):
        )

    def get_input_schema(self) -> dict[str, Any]:
-        from config import DEFAULT_MODEL
+        from config import IS_AUTO_MODE

-        return {
+        schema = {
            "type": "object",
            "properties": {
-                "current_analysis": {
+                "prompt": {
                    "type": "string",
                    "description": "Your current thinking/analysis to extend and validate",
                },
-                "model": {
-                    "type": "string",
-                    "description": f"Model to use: 'pro' (Gemini 2.5 Pro with extended thinking) or 'flash' (Gemini 2.0 Flash - faster). Defaults to '{DEFAULT_MODEL}' if not specified.",
-                },
+                "model": self.get_model_field_schema(),
                "problem_context": {
                    "type": "string",
                    "description": "Additional context about the problem or goal",
@@ -96,8 +93,10 @@ class ThinkDeepTool(BaseTool):
                    "description": "Thread continuation ID for multi-turn conversations. Can be used to continue conversations across different tools. Only provide this if continuing a previous conversation thread.",
                },
            },
-            "required": ["current_analysis"],
+            "required": ["prompt"] + (["model"] if IS_AUTO_MODE else []),
        }
+        
+        return schema

    def get_system_prompt(self) -> str:
        return THINKDEEP_PROMPT
@@ -120,8 +119,8 @@ class ThinkDeepTool(BaseTool):
        request_model = self.get_request_model()
        request = request_model(**arguments)

-        # Check current_analysis size
-        size_check = self.check_prompt_size(request.current_analysis)
+        # Check prompt size
+        size_check = self.check_prompt_size(request.prompt)
        if size_check:
            return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]

@@ -133,8 +132,8 @@ class ThinkDeepTool(BaseTool):
        # Check for prompt.txt in files
        prompt_content, updated_files = self.handle_prompt_file(request.files)

-        # Use prompt.txt content if available, otherwise use the current_analysis field
-        current_analysis = prompt_content if prompt_content else request.current_analysis
+        # Use prompt.txt content if available, otherwise use the prompt field
+        current_analysis = prompt_content if prompt_content else request.prompt

        # Update request files list
        if updated_files is not None:
@@ -190,21 +189,24 @@ Please provide deep analysis that extends Claude's thinking with:

        return full_prompt

-    def format_response(self, response: str, request: ThinkDeepRequest) -> str:
+    def format_response(self, response: str, request: ThinkDeepRequest, model_info: Optional[dict] = None) -> str:
        """Format the response with clear attribution and critical thinking prompt"""
-        return f"""## Extended Analysis by Gemini
-
-{response}
+        # Get the friendly model name
+        model_name = "your fellow developer"
+        if model_info and model_info.get("model_response"):
+            model_name = model_info["model_response"].friendly_name or "your fellow developer"
+        
+        return f"""{response}

 ---

 ## Critical Evaluation Required

-Claude, please critically evaluate Gemini's analysis by considering:
+Claude, please critically evaluate {model_name}'s analysis by thinking hard about the following:

 1. **Technical merit** - Which suggestions are valuable vs. have limitations?
 2. **Constraints** - Fit with codebase patterns, performance, security, architecture
 3. **Risks** - Hidden complexities, edge cases, potential failure modes
 4. **Final recommendation** - Synthesize both perspectives, then think deeply further to explore additional considerations and arrive at the best technical solution

-Remember: Use Gemini's insights to enhance, not replace, your analysis."""
+Remember: Use {model_name}'s insights to enhance, not replace, your analysis."""