Perform prompt size checks only at the MCP boundary

New test to confirm history build-up and system prompt does not affect prompt size checks Also check for large prompts in focus_on Fixed .env.example incorrectly did not comment out CUSTOM_API causing the run-server script to think at least one key exists
2025-06-15 10:37:08 +04:00
parent 3b03783ea7
commit 4becd70a82
14 changed files with 404 additions and 198 deletions
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -4,7 +4,6 @@ Analyze tool - General-purpose code and file analysis

 from typing import TYPE_CHECKING, Any, Optional

-from mcp.types import TextContent
 from pydantic import Field

 if TYPE_CHECKING:
@@ -14,7 +13,6 @@ from config import TEMPERATURE_ANALYTICAL
 from systemprompts import ANALYZE_PROMPT

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput


 class AnalyzeRequest(ToolRequest):
@@ -117,20 +115,6 @@ class AnalyzeTool(BaseTool):
    def get_request_model(self):
        return AnalyzeRequest

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check question size before processing"""
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check prompt size
-        size_check = self.check_prompt_size(request.prompt)
-        if size_check:
-            return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    async def prepare_prompt(self, request: AnalyzeRequest) -> str:
        """Prepare the analysis prompt"""
        # Check for prompt.txt in files
@@ -140,6 +124,13 @@ class AnalyzeTool(BaseTool):
        if prompt_content:
            request.prompt = prompt_content

+        # Check user input size at MCP transport boundary (before adding internal content)
+        size_check = self.check_prompt_size(request.prompt)
+        if size_check:
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
        # Update request files list
        if updated_files is not None:
            request.files = updated_files
--- a/tools/base.py
+++ b/tools/base.py
@@ -862,16 +862,36 @@ When recommending searches, be specific about what information you need and why

    def check_prompt_size(self, text: str) -> Optional[dict[str, Any]]:
        """
-        Check if a text field is too large for MCP's token limits.
+        Check if USER INPUT text is too large for MCP transport boundary.
+
+        IMPORTANT: This method should ONLY be used to validate user input that crosses
+        the Claude CLI ↔ MCP Server transport boundary. It should NOT be used to limit
+        internal MCP Server operations.
+
+        MCP Protocol Boundaries:
+        Claude CLI ←→ MCP Server ←→ External Model
+            ↑                              ↑
+        This limit applies here      This is NOT limited

        The MCP protocol has a combined request+response limit of ~25K tokens.
-        To ensure adequate space for responses, we limit prompt input to a
-        configurable character limit (default 50K chars ~= 10-12K tokens).
-        Larger prompts are handled by having Claude save them to a file,
-        bypassing MCP's token constraints while preserving response capacity.
+        To ensure adequate space for MCP Server → Claude CLI responses, we limit
+        user input to 50K characters (roughly ~10-12K tokens). Larger user prompts
+        are handled by having Claude save them to prompt.txt files, bypassing MCP's
+        transport constraints while preserving response capacity.
+
+        What should be checked with this method:
+        - request.prompt field (user input from Claude CLI)
+        - prompt.txt file content (alternative user input)
+        - Other direct user input fields
+
+        What should NOT be checked with this method:
+        - System prompts added internally
+        - File content embedded by tools
+        - Conversation history from Redis
+        - Complete prompts sent to external models

        Args:
-            text: The text to check
+            text: The user input text to check (NOT internal prompt content)

        Returns:
            Optional[Dict[str, Any]]: Response asking for file handling if too large, None otherwise
@@ -1153,6 +1173,12 @@ When recommending searches, be specific about what information you need and why
            logger = logging.getLogger(f"tools.{self.name}")
            error_msg = str(e)

+            # Check if this is an MCP size check error from prepare_prompt
+            if error_msg.startswith("MCP_SIZE_CHECK:"):
+                logger.info(f"MCP prompt size limit exceeded in {self.name}")
+                tool_output_json = error_msg[15:]  # Remove "MCP_SIZE_CHECK:" prefix
+                return [TextContent(type="text", text=tool_output_json)]
+
            # Check if this is a 500 INTERNAL error that asks for retry
            if "500 INTERNAL" in error_msg and "Please retry" in error_msg:
                logger.warning(f"500 INTERNAL error in {self.name} - attempting retry")
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -4,7 +4,6 @@ Chat tool - General development chat and collaborative thinking

 from typing import TYPE_CHECKING, Any, Optional

-from mcp.types import TextContent
 from pydantic import Field

 if TYPE_CHECKING:
@@ -14,7 +13,6 @@ from config import TEMPERATURE_BALANCED
 from systemprompts import CHAT_PROMPT

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput


 class ChatRequest(ToolRequest):
@@ -102,20 +100,6 @@ class ChatTool(BaseTool):
    def get_request_model(self):
        return ChatRequest

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check prompt size before processing"""
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check prompt size
-        size_check = self.check_prompt_size(request.prompt)
-        if size_check:
-            return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    async def prepare_prompt(self, request: ChatRequest) -> str:
        """Prepare the chat prompt with optional context files"""
        # Check for prompt.txt in files
@@ -124,6 +108,16 @@ class ChatTool(BaseTool):
        # Use prompt.txt content if available, otherwise use the prompt field
        user_content = prompt_content if prompt_content else request.prompt

+        # Check user input size at MCP transport boundary (before adding internal content)
+        size_check = self.check_prompt_size(user_content)
+        if size_check:
+            # Need to return error, but prepare_prompt returns str
+            # Use exception to handle this cleanly
+
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
        # Update request files list
        if updated_files is not None:
            request.files = updated_files
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -16,14 +16,12 @@ Key Features:

 from typing import Any, Optional

-from mcp.types import TextContent
 from pydantic import Field

 from config import TEMPERATURE_ANALYTICAL
 from systemprompts import CODEREVIEW_PROMPT

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput


 class CodeReviewRequest(ToolRequest):
@@ -153,21 +151,6 @@ class CodeReviewTool(BaseTool):
    def get_request_model(self):
        return CodeReviewRequest

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check focus_on size before processing"""
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check focus_on size if provided
-        if request.focus_on:
-            size_check = self.check_prompt_size(request.focus_on)
-            if size_check:
-                return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    async def prepare_prompt(self, request: CodeReviewRequest) -> str:
        """
        Prepare the code review prompt with customized instructions.
@@ -195,6 +178,22 @@ class CodeReviewTool(BaseTool):
        if updated_files is not None:
            request.files = updated_files

+        # Check user input size at MCP transport boundary (before adding internal content)
+        user_content = request.prompt
+        size_check = self.check_prompt_size(user_content)
+        if size_check:
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
+        # Also check focus_on field if provided (user input)
+        if request.focus_on:
+            focus_size_check = self.check_prompt_size(request.focus_on)
+            if focus_size_check:
+                from tools.models import ToolOutput
+
+                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**focus_size_check).model_dump_json()}")
+
        # Use centralized file processing logic
        continuation_id = getattr(request, "continuation_id", None)
        file_content = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code")
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -4,7 +4,6 @@ Debug Issue tool - Root cause analysis and debugging assistance

 from typing import TYPE_CHECKING, Any, Optional

-from mcp.types import TextContent
 from pydantic import Field

 if TYPE_CHECKING:
@@ -14,7 +13,6 @@ from config import TEMPERATURE_ANALYTICAL
 from systemprompts import DEBUG_ISSUE_PROMPT

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput


 class DebugIssueRequest(ToolRequest):
@@ -122,26 +120,6 @@ class DebugIssueTool(BaseTool):
    def get_request_model(self):
        return DebugIssueRequest

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check error_description and error_context size before processing"""
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check prompt size
-        size_check = self.check_prompt_size(request.prompt)
-        if size_check:
-            return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Check error_context size if provided
-        if request.error_context:
-            size_check = self.check_prompt_size(request.error_context)
-            if size_check:
-                return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    async def prepare_prompt(self, request: DebugIssueRequest) -> str:
        """Prepare the debugging prompt"""
        # Check for prompt.txt in files
@@ -154,6 +132,20 @@ class DebugIssueTool(BaseTool):
            else:
                request.error_context = prompt_content

+        # Check user input sizes at MCP transport boundary (before adding internal content)
+        size_check = self.check_prompt_size(request.prompt)
+        if size_check:
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
+        if request.error_context:
+            size_check = self.check_prompt_size(request.error_context)
+            if size_check:
+                from tools.models import ToolOutput
+
+                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
        # Update request files list
        if updated_files is not None:
            request.files = updated_files
--- a/tools/models.py
+++ b/tools/models.py
@@ -141,6 +141,15 @@ class RefactorAnalysisComplete(BaseModel):
    next_actions_for_claude: list[RefactorAction] = Field(..., description="Specific actions for Claude to implement")


+class ResendPromptRequest(BaseModel):
+    """Request to resend prompt via file due to size limits"""
+
+    status: Literal["resend_prompt"] = "resend_prompt"
+    content: str = Field(..., description="Instructions for handling large prompt")
+    content_type: Literal["text"] = "text"
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
 # Registry mapping status strings to their corresponding Pydantic models
 SPECIAL_STATUS_MODELS = {
    "clarification_required": ClarificationRequest,
@@ -149,6 +158,7 @@ SPECIAL_STATUS_MODELS = {
    "test_sample_needed": TestSampleNeeded,
    "more_tests_required": MoreTestsRequired,
    "refactor_analysis_complete": RefactorAnalysisComplete,
+    "resend_prompt": ResendPromptRequest,
 }


--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -11,7 +11,6 @@ This provides comprehensive context for AI analysis - not a duplication bug.
 import os
 from typing import TYPE_CHECKING, Any, Literal, Optional

-from mcp.types import TextContent
 from pydantic import Field

 if TYPE_CHECKING:
@@ -23,7 +22,6 @@ from utils.git_utils import find_git_repositories, get_git_status, run_git_comma
 from utils.token_utils import estimate_tokens

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput

 # Conservative fallback for token limits
 DEFAULT_CONTEXT_WINDOW = 200_000
@@ -201,21 +199,6 @@ class Precommit(BaseTool):

        return ToolModelCategory.EXTENDED_REASONING

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check original_request size before processing"""
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check prompt size if provided
-        if request.prompt:
-            size_check = self.check_prompt_size(request.prompt)
-            if size_check:
-                return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    async def prepare_prompt(self, request: PrecommitRequest) -> str:
        """Prepare the prompt with git diff information."""
        # Check for prompt.txt in files
@@ -229,6 +212,14 @@ class Precommit(BaseTool):
        if updated_files is not None:
            request.files = updated_files

+        # Check user input size at MCP transport boundary (before adding internal content)
+        user_content = request.prompt if request.prompt else ""
+        size_check = self.check_prompt_size(user_content)
+        if size_check:
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
        # Translate the path and files if running in Docker
        translated_path = translate_path_for_environment(request.path)
        translated_files = translate_file_paths(request.files)
--- a/tools/refactor.py
+++ b/tools/refactor.py
@@ -19,7 +19,6 @@ import logging
 import os
 from typing import Any, Literal, Optional

-from mcp.types import TextContent
 from pydantic import Field

 from config import TEMPERATURE_ANALYTICAL
@@ -27,7 +26,6 @@ from systemprompts import REFACTOR_PROMPT
 from utils.file_utils import translate_file_paths

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput

 logger = logging.getLogger(__name__)

@@ -154,25 +152,6 @@ class RefactorTool(BaseTool):
    def get_request_model(self):
        return RefactorRequest

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check prompt size before processing"""
-        logger.info(f"[REFACTOR] execute called with arguments: {list(arguments.keys())}")
-
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check prompt size if provided
-        if request.prompt:
-            size_check = self.check_prompt_size(request.prompt)
-            if size_check:
-                logger.info("[REFACTOR] Prompt size check triggered, returning early")
-                return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        logger.info("[REFACTOR] Prompt size OK, calling super().execute()")
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    def detect_primary_language(self, file_paths: list[str]) -> str:
        """
        Detect the primary programming language from file extensions.
@@ -417,6 +396,14 @@ class RefactorTool(BaseTool):
            logger.debug(f"[REFACTOR] Updated files list after prompt.txt processing: {len(updated_files)} files")
            request.files = updated_files

+        # Check user input size at MCP transport boundary (before adding internal content)
+        user_content = request.prompt
+        size_check = self.check_prompt_size(user_content)
+        if size_check:
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
        # Calculate available token budget for dynamic allocation
        continuation_id = getattr(request, "continuation_id", None)

--- a/tools/testgen.py
+++ b/tools/testgen.py
@@ -17,7 +17,6 @@ import logging
 import os
 from typing import Any, Optional

-from mcp.types import TextContent
 from pydantic import Field

 from config import TEMPERATURE_ANALYTICAL
@@ -25,7 +24,6 @@ from systemprompts import TESTGEN_PROMPT
 from utils.file_utils import translate_file_paths

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput

 logger = logging.getLogger(__name__)

@@ -145,21 +143,6 @@ class TestGenTool(BaseTool):
    def get_request_model(self):
        return TestGenRequest

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check prompt size before processing"""
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check prompt size if provided
-        if request.prompt:
-            size_check = self.check_prompt_size(request.prompt)
-            if size_check:
-                return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    def _process_test_examples(
        self, test_examples: list[str], continuation_id: Optional[str], available_tokens: int = None
    ) -> tuple[str, str]:
@@ -294,6 +277,14 @@ class TestGenTool(BaseTool):
            logger.debug(f"[TESTGEN] Updated files list after prompt.txt processing: {len(updated_files)} files")
            request.files = updated_files

+        # Check user input size at MCP transport boundary (before adding internal content)
+        user_content = request.prompt
+        size_check = self.check_prompt_size(user_content)
+        if size_check:
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
        # Calculate available token budget for dynamic allocation
        continuation_id = getattr(request, "continuation_id", None)

--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -4,7 +4,6 @@ ThinkDeep tool - Extended reasoning and problem-solving

 from typing import TYPE_CHECKING, Any, Optional

-from mcp.types import TextContent
 from pydantic import Field

 if TYPE_CHECKING:
@@ -14,7 +13,6 @@ from config import TEMPERATURE_CREATIVE
 from systemprompts import THINKDEEP_PROMPT

 from .base import BaseTool, ToolRequest
-from .models import ToolOutput


 class ThinkDeepRequest(ToolRequest):
@@ -121,20 +119,6 @@ class ThinkDeepTool(BaseTool):
    def get_request_model(self):
        return ThinkDeepRequest

-    async def execute(self, arguments: dict[str, Any]) -> list[TextContent]:
-        """Override execute to check current_analysis size before processing"""
-        # First validate request
-        request_model = self.get_request_model()
-        request = request_model(**arguments)
-
-        # Check prompt size
-        size_check = self.check_prompt_size(request.prompt)
-        if size_check:
-            return [TextContent(type="text", text=ToolOutput(**size_check).model_dump_json())]
-
-        # Continue with normal execution
-        return await super().execute(arguments)
-
    async def prepare_prompt(self, request: ThinkDeepRequest) -> str:
        """Prepare the full prompt for extended thinking"""
        # Check for prompt.txt in files
@@ -143,6 +127,13 @@ class ThinkDeepTool(BaseTool):
        # Use prompt.txt content if available, otherwise use the prompt field
        current_analysis = prompt_content if prompt_content else request.prompt

+        # Check user input size at MCP transport boundary (before adding internal content)
+        size_check = self.check_prompt_size(current_analysis)
+        if size_check:
+            from tools.models import ToolOutput
+
+            raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}")
+
        # Update request files list
        if updated_files is not None:
            request.files = updated_files