WIP - improvements to token usage tracking, simulator added for live testing, improvements to file loading

2025-06-11 13:24:59 +04:00
parent 5a94737516
commit 98eab46abf
13 changed files with 1383 additions and 64 deletions
--- a/README.md
+++ b/README.md
@@ -14,6 +14,13 @@ The ultimate development partner for Claude - a Model Context Protocol server th
 **Think of it as Claude Code _for_ Claude Code.**
 ---
 > ⚠️ **Active Development Notice**  
 > This project is under rapid development with frequent commits and changes over the past few days. 
 > The goal is to expand support beyond Gemini to include additional AI models and providers. 
 > **Watch this space** for new capabilities and potentially breaking changes in between updates!
 ## Quick Navigation
 - **Getting Started**
--- a/communication_simulator_test.py
+++ b/communication_simulator_test.py
--- a/config.py
+++ b/config.py
@@ -23,11 +23,13 @@ __author__ = "Fahad Gilani"  # Primary maintainer
 # This should be a stable, high-performance model suitable for code analysis
 GEMINI_MODEL = "gemini-2.5-pro-preview-06-05"
-# MAX_CONTEXT_TOKENS: Maximum number of tokens that can be included in a single request
+# Token allocation for Gemini Pro (1M total capacity)
-# This limit includes both the prompt and expected response
+# MAX_CONTEXT_TOKENS: Total model capacity
-# Gemini Pro models support up to 1M tokens, but practical usage should reserve
+# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
-# space for the model's response (typically 50K-100K tokens reserved)
+# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
-MAX_CONTEXT_TOKENS = 1_000_000  # 1M tokens for Gemini Pro
+MAX_CONTEXT_TOKENS = 1_000_000  # 1M tokens total capacity for Gemini Pro
 MAX_CONTENT_TOKENS = 800_000    # 800K tokens for content (prompts + files + history)
 RESPONSE_RESERVE_TOKENS = 200_000  # 200K tokens reserved for response generation
 # Temperature defaults for different tool types
 # Temperature controls the randomness/creativity of model responses
--- a/server.py
+++ b/server.py
@@ -328,8 +328,8 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
        if not success:
            logger.warning(f"Failed to add user turn to thread {continuation_id}")
-    # Build conversation history
+    # Build conversation history and track token usage
-    conversation_history = build_conversation_history(context)
+    conversation_history, conversation_tokens = build_conversation_history(context)
    # Add dynamic follow-up instructions based on turn count
    follow_up_instructions = get_follow_up_instructions(len(context.turns))
@@ -343,10 +343,15 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
    else:
        enhanced_prompt = f"{original_prompt}\n\n{follow_up_instructions}"
-    # Update arguments with enhanced context
+    # Update arguments with enhanced context and remaining token budget
    enhanced_arguments = arguments.copy()
    enhanced_arguments["prompt"] = enhanced_prompt
    # Calculate remaining token budget for current request files/content
    from config import MAX_CONTENT_TOKENS
    remaining_tokens = MAX_CONTENT_TOKENS - conversation_tokens
    enhanced_arguments["_remaining_tokens"] = max(0, remaining_tokens)  # Ensure non-negative
    # Merge original context parameters (files, etc.) with new request
    if context.initial_context:
        for key, value in context.initial_context.items():
--- a/tests/test_conversation_memory.py
+++ b/tests/test_conversation_memory.py
@@ -166,7 +166,7 @@ class TestConversationMemory:
            initial_context={},
        )
-        history = build_conversation_history(context)
+        history, tokens = build_conversation_history(context)
        # Test basic structure
        assert "CONVERSATION HISTORY" in history
@@ -207,8 +207,9 @@ class TestConversationMemory:
            initial_context={},
        )
-        history = build_conversation_history(context)
+        history, tokens = build_conversation_history(context)
        assert history == ""
        assert tokens == 0
 class TestConversationFlow:
@@ -373,7 +374,7 @@ class TestConversationFlow:
                initial_context={},
            )
-            history = build_conversation_history(context)
+            history, tokens = build_conversation_history(context)
            expected_turn_text = f"Turn {test_max}/{MAX_CONVERSATION_TURNS}"
            assert expected_turn_text in history
@@ -595,7 +596,7 @@ class TestConversationFlow:
            initial_context={"prompt": "Analyze this codebase", "files": ["/project/src/"]},
        )
-        history = build_conversation_history(final_context)
+        history, tokens = build_conversation_history(final_context)
        # Verify chronological order and speaker identification
        assert "--- Turn 1 (Gemini using analyze) ---" in history
@@ -670,7 +671,7 @@ class TestConversationFlow:
        mock_client.get.return_value = context_with_followup.model_dump_json()
        # Build history to verify follow-up is preserved
-        history = build_conversation_history(context_with_followup)
+        history, tokens = build_conversation_history(context_with_followup)
        assert "Found potential issue in authentication" in history
        assert "[Gemini's Follow-up: Should I examine the authentication middleware?]" in history
@@ -762,7 +763,7 @@ class TestConversationFlow:
            )
            # Build conversation history (should handle token limits gracefully)
-            history = build_conversation_history(context)
+            history, tokens = build_conversation_history(context)
            # Verify the history was built successfully
            assert "=== CONVERSATION HISTORY ===" in history
--- a/tests/test_cross_tool_continuation.py
+++ b/tests/test_cross_tool_continuation.py
@@ -247,7 +247,7 @@ class TestCrossToolContinuation:
        # Build conversation history
        from utils.conversation_memory import build_conversation_history
-        history = build_conversation_history(thread_context)
+        history, tokens = build_conversation_history(thread_context)
        # Verify tool names are included in the history
        assert "Turn 1 (Gemini using test_analysis)" in history
--- a/tests/test_large_prompt_handling.py
+++ b/tests/test_large_prompt_handling.py
@@ -214,15 +214,15 @@ class TestLargePromptHandling:
            mock_model.generate_content.return_value = mock_response
            mock_create_model.return_value = mock_model
-            # Mock read_files to avoid file system access
+            # Mock the centralized file preparation method to avoid file system access
-            with patch("tools.chat.read_files") as mock_read_files:
+            with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
-                mock_read_files.return_value = "File content"
+                mock_prepare_files.return_value = "File content"
                await tool.execute({"prompt": "", "files": [temp_prompt_file, other_file]})
                # Verify prompt.txt was removed from files list
-                mock_read_files.assert_called_once()
+                mock_prepare_files.assert_called_once()
-                files_arg = mock_read_files.call_args[0][0]
+                files_arg = mock_prepare_files.call_args[0][0]
                assert len(files_arg) == 1
                assert files_arg[0] == other_file
--- a/tests/test_precommit.py
+++ b/tests/test_precommit.py
@@ -228,10 +228,8 @@ class TestPrecommitTool:
    @patch("tools.precommit.find_git_repositories")
    @patch("tools.precommit.get_git_status")
    @patch("tools.precommit.run_git_command")
    @patch("tools.precommit.read_files")
    async def test_files_parameter_with_context(
        self,
        mock_read_files,
        mock_run_git,
        mock_status,
        mock_find_repos,
@@ -254,14 +252,15 @@ class TestPrecommitTool:
            (True, ""),  # unstaged files list (empty)
        ]
-        # Mock read_files
+        # Mock the centralized file preparation method  
-        mock_read_files.return_value = "=== FILE: config.py ===\nCONFIG_VALUE = 42\n=== END FILE ==="
+        with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
            mock_prepare_files.return_value = "=== FILE: config.py ===\nCONFIG_VALUE = 42\n=== END FILE ==="
-        request = PrecommitRequest(
+            request = PrecommitRequest(
-            path="/absolute/repo/path",
+                path="/absolute/repo/path",
-            files=["/absolute/repo/path/config.py"],
+                files=["/absolute/repo/path/config.py"],
-        )
+            )
-        result = await tool.prepare_prompt(request)
+            result = await tool.prepare_prompt(request)
        # Verify context files are included
        assert "## Context Files Summary" in result
@@ -316,9 +315,9 @@ class TestPrecommitTool:
            (True, ""),  # unstaged files (empty)
        ]
-        # Mock read_files to return empty (file not found)
+        # Mock the centralized file preparation method to return empty (file not found)
-        with patch("tools.precommit.read_files") as mock_read:
+        with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
-            mock_read.return_value = ""
+            mock_prepare_files.return_value = ""
            result_with_files = await tool.prepare_prompt(request_with_files)
        assert "If you need additional context files" not in result_with_files
--- a/tests/test_prompt_regression.py
+++ b/tests/test_prompt_regression.py
@@ -67,16 +67,16 @@ class TestPromptRegression:
            mock_model.generate_content.return_value = mock_model_response()
            mock_create_model.return_value = mock_model
-            # Mock file reading
+            # Mock file reading through the centralized method
-            with patch("tools.chat.read_files") as mock_read_files:
+            with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
-                mock_read_files.return_value = "File content here"
+                mock_prepare_files.return_value = "File content here"
                result = await tool.execute({"prompt": "Analyze this code", "files": ["/path/to/file.py"]})
                assert len(result) == 1
                output = json.loads(result[0].text)
                assert output["status"] == "success"
-                mock_read_files.assert_called_once_with(["/path/to/file.py"])
+                mock_prepare_files.assert_called_once_with(["/path/to/file.py"], None, "Context files")
    @pytest.mark.asyncio
    async def test_thinkdeep_normal_analysis(self, mock_model_response):
--- a/tools/base.py
+++ b/tools/base.py
@@ -195,9 +195,10 @@ class BaseTool(ABC):
        """
        Filter out files that are already embedded in conversation history.
-        This method takes a list of requested files and removes any that have
+        This method prevents duplicate file embeddings by filtering out files that have
-        already been embedded in the conversation history, preventing duplicate
+        already been embedded in the conversation history. This optimizes token usage
-        file embeddings and optimizing token usage.
+        while ensuring tools still have logical access to all requested files through
        conversation history references.
        Args:
            requested_files: List of files requested for current tool execution
@@ -210,15 +211,36 @@ class BaseTool(ABC):
            # New conversation, all files are new
            return requested_files
-        embedded_files = set(self.get_conversation_embedded_files(continuation_id))
+        try:
            embedded_files = set(self.get_conversation_embedded_files(continuation_id))
-        # Return only files that haven't been embedded yet
+            # Safety check: If no files are marked as embedded but we have a continuation_id,
-        new_files = [f for f in requested_files if f not in embedded_files]
+            # this might indicate an issue with conversation history. Be conservative.
            if not embedded_files:
                logger.debug(f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}")
                return requested_files
-        return new_files
+            # Return only files that haven't been embedded yet
            new_files = [f for f in requested_files if f not in embedded_files]
            # Log filtering results for debugging
            if len(new_files) < len(requested_files):
                skipped = [f for f in requested_files if f in embedded_files]
                logger.debug(f"📁 {self.name} tool: Filtering {len(skipped)} files already in conversation history: {', '.join(skipped)}")
            return new_files
        except Exception as e:
            # If there's any issue with conversation history lookup, be conservative
            # and include all files rather than risk losing access to needed files
            logger.warning(f"📁 {self.name} tool: Error checking conversation history for {continuation_id}: {e}")
            logger.warning(f"📁 {self.name} tool: Including all requested files as fallback")
            return requested_files
    def _prepare_file_content_for_prompt(
-        self, request_files: list[str], continuation_id: Optional[str], context_description: str = "New files"
+        self, request_files: list[str], continuation_id: Optional[str], context_description: str = "New files",
        max_tokens: Optional[int] = None, reserve_tokens: int = 1_000, remaining_budget: Optional[int] = None,
        arguments: Optional[dict] = None
    ) -> str:
        """
        Centralized file processing for tool prompts.
@@ -232,6 +254,10 @@ class BaseTool(ABC):
            request_files: List of files requested for current tool execution
            continuation_id: Thread continuation ID, or None for new conversations
            context_description: Description for token limit validation (e.g. "Code", "New files")
            max_tokens: Maximum tokens to use (defaults to remaining budget or MAX_CONTENT_TOKENS)
            reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
            remaining_budget: Remaining token budget after conversation history (from server.py)
            arguments: Original tool arguments (used to extract _remaining_tokens if available)
        Returns:
            str: Formatted file content string ready for prompt inclusion
@@ -239,6 +265,24 @@ class BaseTool(ABC):
        if not request_files:
            return ""
        # Extract remaining budget from arguments if available
        if remaining_budget is None:
            # Use provided arguments or fall back to stored arguments from execute()
            args_to_use = arguments or getattr(self, '_current_arguments', {})
            remaining_budget = args_to_use.get("_remaining_tokens")
        # Use remaining budget if provided, otherwise fall back to max_tokens or default
        if remaining_budget is not None:
            effective_max_tokens = remaining_budget - reserve_tokens
        elif max_tokens is not None:
            effective_max_tokens = max_tokens - reserve_tokens
        else:
            from config import MAX_CONTENT_TOKENS
            effective_max_tokens = MAX_CONTENT_TOKENS - reserve_tokens
        # Ensure we have a reasonable minimum budget
        effective_max_tokens = max(1000, effective_max_tokens)
        files_to_embed = self.filter_new_files(request_files, continuation_id)
        content_parts = []
@@ -247,7 +291,7 @@ class BaseTool(ABC):
        if files_to_embed:
            logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
            try:
-                file_content = read_files(files_to_embed)
+                file_content = read_files(files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens)
                self._validate_token_limit(file_content, context_description)
                content_parts.append(file_content)
@@ -488,6 +532,9 @@ If any of these would strengthen your analysis, specify what Claude should searc
            List[TextContent]: Formatted response as MCP TextContent objects
        """
        try:
            # Store arguments for access by helper methods (like _prepare_file_content_for_prompt)
            self._current_arguments = arguments
            # Set up logger for this tool execution
            logger = logging.getLogger(f"tools.{self.name}")
            logger.info(f"Starting {self.name} tool execution with arguments: {list(arguments.keys())}")
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -116,10 +116,15 @@ class ChatTool(BaseTool):
        if updated_files is not None:
            request.files = updated_files
-        # Add context files if provided
+        # Add context files if provided (using centralized file handling with filtering)
        if request.files:
-            file_content = read_files(request.files)
+            file_content = self._prepare_file_content_for_prompt(
-            user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
+                request.files, 
                request.continuation_id, 
                "Context files"
            )
            if file_content:
                user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
        # Check token limits
        self._validate_token_limit(user_content, "Content")
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -298,11 +298,13 @@ class Precommit(BaseTool):
        if translated_files:
            remaining_tokens = max_tokens - total_tokens
-            # Use standardized file reading with token budget
+            # Use centralized file handling with filtering for duplicate prevention
-            file_content = read_files(
+            file_content = self._prepare_file_content_for_prompt(
                translated_files, 
-                max_tokens=remaining_tokens,
+                request.continuation_id, 
-                reserve_tokens=1000,  # Small reserve for formatting
+                "Context files",
                max_tokens=remaining_tokens + 1000,  # Add back the reserve that was calculated
                reserve_tokens=1000  # Small reserve for formatting
            )
            if file_content:
--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -312,7 +312,7 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
    return unique_files
-def build_conversation_history(context: ThreadContext, read_files_func=None) -> str:
+def build_conversation_history(context: ThreadContext, read_files_func=None) -> tuple[str, int]:
    """
    Build formatted conversation history for tool prompts with embedded file contents.
@@ -325,8 +325,8 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
        context: ThreadContext containing the complete conversation
    Returns:
-        str: Formatted conversation history with embedded files ready for inclusion in prompts
+        tuple[str, int]: (formatted_conversation_history, total_tokens_used)
-        Empty string if no conversation turns exist
+        Returns ("", 0) if no conversation turns exist
    Format:
        - Header with thread metadata and turn count
@@ -341,7 +341,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
        while preventing duplicate file embeddings.
    """
    if not context.turns:
-        return ""
+        return "", 0
    # Get all unique files referenced in this conversation
    all_files = get_conversation_file_list(context)
@@ -366,7 +366,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
        )
        # Import required functions
-        from config import MAX_CONTEXT_TOKENS
+        from config import MAX_CONTENT_TOKENS
        if read_files_func is None:
            from utils.file_utils import read_file_content
@@ -384,7 +384,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
                    if formatted_content:
                        # read_file_content already returns formatted content, use it directly
                        # Check if adding this file would exceed the limit
-                        if total_tokens + content_tokens <= MAX_CONTEXT_TOKENS:
+                        if total_tokens + content_tokens <= MAX_CONTENT_TOKENS:
                            file_contents.append(formatted_content)
                            total_tokens += content_tokens
                            files_included += 1
@@ -394,7 +394,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
                        else:
                            files_truncated += 1
                            logger.debug(
-                                f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTEXT_TOKENS:,} limit)"
+                                f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)"
                            )
                            # Stop processing more files
                            break
@@ -434,7 +434,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
                    history_parts.append(files_content)
                else:
                    # Handle token limit exceeded for conversation files
-                    error_message = f"ERROR: The total size of files referenced in this conversation has exceeded the context limit and cannot be displayed.\nEstimated tokens: {estimated_tokens}, but limit is {MAX_CONTEXT_TOKENS}."
+                    error_message = f"ERROR: The total size of files referenced in this conversation has exceeded the context limit and cannot be displayed.\nEstimated tokens: {estimated_tokens}, but limit is {MAX_CONTENT_TOKENS}."
                    history_parts.append(error_message)
            else:
                history_parts.append("(No accessible files found)")
@@ -476,7 +476,12 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
        ["", "=== END CONVERSATION HISTORY ===", "", "Continue this conversation by building on the previous context."]
    )
-    return "\n".join(history_parts)
+    # Calculate total tokens for the complete conversation history
    complete_history = "\n".join(history_parts)
    from utils.token_utils import estimate_tokens
    total_conversation_tokens = estimate_tokens(complete_history)
    return complete_history, total_conversation_tokens
 def _is_valid_uuid(val: str) -> bool: