WIP - improvements to token usage tracking, simulator added for live testing, improvements to file loading

This commit is contained in:
Fahad
2025-06-11 13:24:59 +04:00
parent 5a94737516
commit 98eab46abf
13 changed files with 1383 additions and 64 deletions

View File

@@ -14,6 +14,13 @@ The ultimate development partner for Claude - a Model Context Protocol server th
**Think of it as Claude Code _for_ Claude Code.** **Think of it as Claude Code _for_ Claude Code.**
---
> ⚠️ **Active Development Notice**
> This project is under rapid development with frequent commits and changes over the past few days.
> The goal is to expand support beyond Gemini to include additional AI models and providers.
> **Watch this space** for new capabilities and potentially breaking changes in between updates!
## Quick Navigation ## Quick Navigation
- **Getting Started** - **Getting Started**

1246
communication_simulator_test.py Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -23,11 +23,13 @@ __author__ = "Fahad Gilani" # Primary maintainer
# This should be a stable, high-performance model suitable for code analysis # This should be a stable, high-performance model suitable for code analysis
GEMINI_MODEL = "gemini-2.5-pro-preview-06-05" GEMINI_MODEL = "gemini-2.5-pro-preview-06-05"
# MAX_CONTEXT_TOKENS: Maximum number of tokens that can be included in a single request # Token allocation for Gemini Pro (1M total capacity)
# This limit includes both the prompt and expected response # MAX_CONTEXT_TOKENS: Total model capacity
# Gemini Pro models support up to 1M tokens, but practical usage should reserve # MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
# space for the model's response (typically 50K-100K tokens reserved) # RESPONSE_RESERVE_TOKENS: Reserved for model response generation
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens for Gemini Pro MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation
# Temperature defaults for different tool types # Temperature defaults for different tool types
# Temperature controls the randomness/creativity of model responses # Temperature controls the randomness/creativity of model responses

View File

@@ -328,8 +328,8 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
if not success: if not success:
logger.warning(f"Failed to add user turn to thread {continuation_id}") logger.warning(f"Failed to add user turn to thread {continuation_id}")
# Build conversation history # Build conversation history and track token usage
conversation_history = build_conversation_history(context) conversation_history, conversation_tokens = build_conversation_history(context)
# Add dynamic follow-up instructions based on turn count # Add dynamic follow-up instructions based on turn count
follow_up_instructions = get_follow_up_instructions(len(context.turns)) follow_up_instructions = get_follow_up_instructions(len(context.turns))
@@ -343,10 +343,15 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
else: else:
enhanced_prompt = f"{original_prompt}\n\n{follow_up_instructions}" enhanced_prompt = f"{original_prompt}\n\n{follow_up_instructions}"
# Update arguments with enhanced context # Update arguments with enhanced context and remaining token budget
enhanced_arguments = arguments.copy() enhanced_arguments = arguments.copy()
enhanced_arguments["prompt"] = enhanced_prompt enhanced_arguments["prompt"] = enhanced_prompt
# Calculate remaining token budget for current request files/content
from config import MAX_CONTENT_TOKENS
remaining_tokens = MAX_CONTENT_TOKENS - conversation_tokens
enhanced_arguments["_remaining_tokens"] = max(0, remaining_tokens) # Ensure non-negative
# Merge original context parameters (files, etc.) with new request # Merge original context parameters (files, etc.) with new request
if context.initial_context: if context.initial_context:
for key, value in context.initial_context.items(): for key, value in context.initial_context.items():

View File

@@ -166,7 +166,7 @@ class TestConversationMemory:
initial_context={}, initial_context={},
) )
history = build_conversation_history(context) history, tokens = build_conversation_history(context)
# Test basic structure # Test basic structure
assert "CONVERSATION HISTORY" in history assert "CONVERSATION HISTORY" in history
@@ -207,8 +207,9 @@ class TestConversationMemory:
initial_context={}, initial_context={},
) )
history = build_conversation_history(context) history, tokens = build_conversation_history(context)
assert history == "" assert history == ""
assert tokens == 0
class TestConversationFlow: class TestConversationFlow:
@@ -373,7 +374,7 @@ class TestConversationFlow:
initial_context={}, initial_context={},
) )
history = build_conversation_history(context) history, tokens = build_conversation_history(context)
expected_turn_text = f"Turn {test_max}/{MAX_CONVERSATION_TURNS}" expected_turn_text = f"Turn {test_max}/{MAX_CONVERSATION_TURNS}"
assert expected_turn_text in history assert expected_turn_text in history
@@ -595,7 +596,7 @@ class TestConversationFlow:
initial_context={"prompt": "Analyze this codebase", "files": ["/project/src/"]}, initial_context={"prompt": "Analyze this codebase", "files": ["/project/src/"]},
) )
history = build_conversation_history(final_context) history, tokens = build_conversation_history(final_context)
# Verify chronological order and speaker identification # Verify chronological order and speaker identification
assert "--- Turn 1 (Gemini using analyze) ---" in history assert "--- Turn 1 (Gemini using analyze) ---" in history
@@ -670,7 +671,7 @@ class TestConversationFlow:
mock_client.get.return_value = context_with_followup.model_dump_json() mock_client.get.return_value = context_with_followup.model_dump_json()
# Build history to verify follow-up is preserved # Build history to verify follow-up is preserved
history = build_conversation_history(context_with_followup) history, tokens = build_conversation_history(context_with_followup)
assert "Found potential issue in authentication" in history assert "Found potential issue in authentication" in history
assert "[Gemini's Follow-up: Should I examine the authentication middleware?]" in history assert "[Gemini's Follow-up: Should I examine the authentication middleware?]" in history
@@ -762,7 +763,7 @@ class TestConversationFlow:
) )
# Build conversation history (should handle token limits gracefully) # Build conversation history (should handle token limits gracefully)
history = build_conversation_history(context) history, tokens = build_conversation_history(context)
# Verify the history was built successfully # Verify the history was built successfully
assert "=== CONVERSATION HISTORY ===" in history assert "=== CONVERSATION HISTORY ===" in history

View File

@@ -247,7 +247,7 @@ class TestCrossToolContinuation:
# Build conversation history # Build conversation history
from utils.conversation_memory import build_conversation_history from utils.conversation_memory import build_conversation_history
history = build_conversation_history(thread_context) history, tokens = build_conversation_history(thread_context)
# Verify tool names are included in the history # Verify tool names are included in the history
assert "Turn 1 (Gemini using test_analysis)" in history assert "Turn 1 (Gemini using test_analysis)" in history

View File

@@ -214,15 +214,15 @@ class TestLargePromptHandling:
mock_model.generate_content.return_value = mock_response mock_model.generate_content.return_value = mock_response
mock_create_model.return_value = mock_model mock_create_model.return_value = mock_model
# Mock read_files to avoid file system access # Mock the centralized file preparation method to avoid file system access
with patch("tools.chat.read_files") as mock_read_files: with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
mock_read_files.return_value = "File content" mock_prepare_files.return_value = "File content"
await tool.execute({"prompt": "", "files": [temp_prompt_file, other_file]}) await tool.execute({"prompt": "", "files": [temp_prompt_file, other_file]})
# Verify prompt.txt was removed from files list # Verify prompt.txt was removed from files list
mock_read_files.assert_called_once() mock_prepare_files.assert_called_once()
files_arg = mock_read_files.call_args[0][0] files_arg = mock_prepare_files.call_args[0][0]
assert len(files_arg) == 1 assert len(files_arg) == 1
assert files_arg[0] == other_file assert files_arg[0] == other_file

View File

@@ -228,10 +228,8 @@ class TestPrecommitTool:
@patch("tools.precommit.find_git_repositories") @patch("tools.precommit.find_git_repositories")
@patch("tools.precommit.get_git_status") @patch("tools.precommit.get_git_status")
@patch("tools.precommit.run_git_command") @patch("tools.precommit.run_git_command")
@patch("tools.precommit.read_files")
async def test_files_parameter_with_context( async def test_files_parameter_with_context(
self, self,
mock_read_files,
mock_run_git, mock_run_git,
mock_status, mock_status,
mock_find_repos, mock_find_repos,
@@ -254,14 +252,15 @@ class TestPrecommitTool:
(True, ""), # unstaged files list (empty) (True, ""), # unstaged files list (empty)
] ]
# Mock read_files # Mock the centralized file preparation method
mock_read_files.return_value = "=== FILE: config.py ===\nCONFIG_VALUE = 42\n=== END FILE ===" with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
mock_prepare_files.return_value = "=== FILE: config.py ===\nCONFIG_VALUE = 42\n=== END FILE ==="
request = PrecommitRequest( request = PrecommitRequest(
path="/absolute/repo/path", path="/absolute/repo/path",
files=["/absolute/repo/path/config.py"], files=["/absolute/repo/path/config.py"],
) )
result = await tool.prepare_prompt(request) result = await tool.prepare_prompt(request)
# Verify context files are included # Verify context files are included
assert "## Context Files Summary" in result assert "## Context Files Summary" in result
@@ -316,9 +315,9 @@ class TestPrecommitTool:
(True, ""), # unstaged files (empty) (True, ""), # unstaged files (empty)
] ]
# Mock read_files to return empty (file not found) # Mock the centralized file preparation method to return empty (file not found)
with patch("tools.precommit.read_files") as mock_read: with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
mock_read.return_value = "" mock_prepare_files.return_value = ""
result_with_files = await tool.prepare_prompt(request_with_files) result_with_files = await tool.prepare_prompt(request_with_files)
assert "If you need additional context files" not in result_with_files assert "If you need additional context files" not in result_with_files

View File

@@ -67,16 +67,16 @@ class TestPromptRegression:
mock_model.generate_content.return_value = mock_model_response() mock_model.generate_content.return_value = mock_model_response()
mock_create_model.return_value = mock_model mock_create_model.return_value = mock_model
# Mock file reading # Mock file reading through the centralized method
with patch("tools.chat.read_files") as mock_read_files: with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
mock_read_files.return_value = "File content here" mock_prepare_files.return_value = "File content here"
result = await tool.execute({"prompt": "Analyze this code", "files": ["/path/to/file.py"]}) result = await tool.execute({"prompt": "Analyze this code", "files": ["/path/to/file.py"]})
assert len(result) == 1 assert len(result) == 1
output = json.loads(result[0].text) output = json.loads(result[0].text)
assert output["status"] == "success" assert output["status"] == "success"
mock_read_files.assert_called_once_with(["/path/to/file.py"]) mock_prepare_files.assert_called_once_with(["/path/to/file.py"], None, "Context files")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_thinkdeep_normal_analysis(self, mock_model_response): async def test_thinkdeep_normal_analysis(self, mock_model_response):

View File

@@ -195,9 +195,10 @@ class BaseTool(ABC):
""" """
Filter out files that are already embedded in conversation history. Filter out files that are already embedded in conversation history.
This method takes a list of requested files and removes any that have This method prevents duplicate file embeddings by filtering out files that have
already been embedded in the conversation history, preventing duplicate already been embedded in the conversation history. This optimizes token usage
file embeddings and optimizing token usage. while ensuring tools still have logical access to all requested files through
conversation history references.
Args: Args:
requested_files: List of files requested for current tool execution requested_files: List of files requested for current tool execution
@@ -210,15 +211,36 @@ class BaseTool(ABC):
# New conversation, all files are new # New conversation, all files are new
return requested_files return requested_files
embedded_files = set(self.get_conversation_embedded_files(continuation_id)) try:
embedded_files = set(self.get_conversation_embedded_files(continuation_id))
# Return only files that haven't been embedded yet # Safety check: If no files are marked as embedded but we have a continuation_id,
new_files = [f for f in requested_files if f not in embedded_files] # this might indicate an issue with conversation history. Be conservative.
if not embedded_files:
logger.debug(f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}")
return requested_files
return new_files # Return only files that haven't been embedded yet
new_files = [f for f in requested_files if f not in embedded_files]
# Log filtering results for debugging
if len(new_files) < len(requested_files):
skipped = [f for f in requested_files if f in embedded_files]
logger.debug(f"📁 {self.name} tool: Filtering {len(skipped)} files already in conversation history: {', '.join(skipped)}")
return new_files
except Exception as e:
# If there's any issue with conversation history lookup, be conservative
# and include all files rather than risk losing access to needed files
logger.warning(f"📁 {self.name} tool: Error checking conversation history for {continuation_id}: {e}")
logger.warning(f"📁 {self.name} tool: Including all requested files as fallback")
return requested_files
def _prepare_file_content_for_prompt( def _prepare_file_content_for_prompt(
self, request_files: list[str], continuation_id: Optional[str], context_description: str = "New files" self, request_files: list[str], continuation_id: Optional[str], context_description: str = "New files",
max_tokens: Optional[int] = None, reserve_tokens: int = 1_000, remaining_budget: Optional[int] = None,
arguments: Optional[dict] = None
) -> str: ) -> str:
""" """
Centralized file processing for tool prompts. Centralized file processing for tool prompts.
@@ -232,6 +254,10 @@ class BaseTool(ABC):
request_files: List of files requested for current tool execution request_files: List of files requested for current tool execution
continuation_id: Thread continuation ID, or None for new conversations continuation_id: Thread continuation ID, or None for new conversations
context_description: Description for token limit validation (e.g. "Code", "New files") context_description: Description for token limit validation (e.g. "Code", "New files")
max_tokens: Maximum tokens to use (defaults to remaining budget or MAX_CONTENT_TOKENS)
reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
remaining_budget: Remaining token budget after conversation history (from server.py)
arguments: Original tool arguments (used to extract _remaining_tokens if available)
Returns: Returns:
str: Formatted file content string ready for prompt inclusion str: Formatted file content string ready for prompt inclusion
@@ -239,6 +265,24 @@ class BaseTool(ABC):
if not request_files: if not request_files:
return "" return ""
# Extract remaining budget from arguments if available
if remaining_budget is None:
# Use provided arguments or fall back to stored arguments from execute()
args_to_use = arguments or getattr(self, '_current_arguments', {})
remaining_budget = args_to_use.get("_remaining_tokens")
# Use remaining budget if provided, otherwise fall back to max_tokens or default
if remaining_budget is not None:
effective_max_tokens = remaining_budget - reserve_tokens
elif max_tokens is not None:
effective_max_tokens = max_tokens - reserve_tokens
else:
from config import MAX_CONTENT_TOKENS
effective_max_tokens = MAX_CONTENT_TOKENS - reserve_tokens
# Ensure we have a reasonable minimum budget
effective_max_tokens = max(1000, effective_max_tokens)
files_to_embed = self.filter_new_files(request_files, continuation_id) files_to_embed = self.filter_new_files(request_files, continuation_id)
content_parts = [] content_parts = []
@@ -247,7 +291,7 @@ class BaseTool(ABC):
if files_to_embed: if files_to_embed:
logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}") logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
try: try:
file_content = read_files(files_to_embed) file_content = read_files(files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens)
self._validate_token_limit(file_content, context_description) self._validate_token_limit(file_content, context_description)
content_parts.append(file_content) content_parts.append(file_content)
@@ -488,6 +532,9 @@ If any of these would strengthen your analysis, specify what Claude should searc
List[TextContent]: Formatted response as MCP TextContent objects List[TextContent]: Formatted response as MCP TextContent objects
""" """
try: try:
# Store arguments for access by helper methods (like _prepare_file_content_for_prompt)
self._current_arguments = arguments
# Set up logger for this tool execution # Set up logger for this tool execution
logger = logging.getLogger(f"tools.{self.name}") logger = logging.getLogger(f"tools.{self.name}")
logger.info(f"Starting {self.name} tool execution with arguments: {list(arguments.keys())}") logger.info(f"Starting {self.name} tool execution with arguments: {list(arguments.keys())}")

View File

@@ -116,10 +116,15 @@ class ChatTool(BaseTool):
if updated_files is not None: if updated_files is not None:
request.files = updated_files request.files = updated_files
# Add context files if provided # Add context files if provided (using centralized file handling with filtering)
if request.files: if request.files:
file_content = read_files(request.files) file_content = self._prepare_file_content_for_prompt(
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ====" request.files,
request.continuation_id,
"Context files"
)
if file_content:
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
# Check token limits # Check token limits
self._validate_token_limit(user_content, "Content") self._validate_token_limit(user_content, "Content")

View File

@@ -298,11 +298,13 @@ class Precommit(BaseTool):
if translated_files: if translated_files:
remaining_tokens = max_tokens - total_tokens remaining_tokens = max_tokens - total_tokens
# Use standardized file reading with token budget # Use centralized file handling with filtering for duplicate prevention
file_content = read_files( file_content = self._prepare_file_content_for_prompt(
translated_files, translated_files,
max_tokens=remaining_tokens, request.continuation_id,
reserve_tokens=1000, # Small reserve for formatting "Context files",
max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated
reserve_tokens=1000 # Small reserve for formatting
) )
if file_content: if file_content:

View File

@@ -312,7 +312,7 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
return unique_files return unique_files
def build_conversation_history(context: ThreadContext, read_files_func=None) -> str: def build_conversation_history(context: ThreadContext, read_files_func=None) -> tuple[str, int]:
""" """
Build formatted conversation history for tool prompts with embedded file contents. Build formatted conversation history for tool prompts with embedded file contents.
@@ -325,8 +325,8 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
context: ThreadContext containing the complete conversation context: ThreadContext containing the complete conversation
Returns: Returns:
str: Formatted conversation history with embedded files ready for inclusion in prompts tuple[str, int]: (formatted_conversation_history, total_tokens_used)
Empty string if no conversation turns exist Returns ("", 0) if no conversation turns exist
Format: Format:
- Header with thread metadata and turn count - Header with thread metadata and turn count
@@ -341,7 +341,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
while preventing duplicate file embeddings. while preventing duplicate file embeddings.
""" """
if not context.turns: if not context.turns:
return "" return "", 0
# Get all unique files referenced in this conversation # Get all unique files referenced in this conversation
all_files = get_conversation_file_list(context) all_files = get_conversation_file_list(context)
@@ -366,7 +366,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
) )
# Import required functions # Import required functions
from config import MAX_CONTEXT_TOKENS from config import MAX_CONTENT_TOKENS
if read_files_func is None: if read_files_func is None:
from utils.file_utils import read_file_content from utils.file_utils import read_file_content
@@ -384,7 +384,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
if formatted_content: if formatted_content:
# read_file_content already returns formatted content, use it directly # read_file_content already returns formatted content, use it directly
# Check if adding this file would exceed the limit # Check if adding this file would exceed the limit
if total_tokens + content_tokens <= MAX_CONTEXT_TOKENS: if total_tokens + content_tokens <= MAX_CONTENT_TOKENS:
file_contents.append(formatted_content) file_contents.append(formatted_content)
total_tokens += content_tokens total_tokens += content_tokens
files_included += 1 files_included += 1
@@ -394,7 +394,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
else: else:
files_truncated += 1 files_truncated += 1
logger.debug( logger.debug(
f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTEXT_TOKENS:,} limit)" f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)"
) )
# Stop processing more files # Stop processing more files
break break
@@ -434,7 +434,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
history_parts.append(files_content) history_parts.append(files_content)
else: else:
# Handle token limit exceeded for conversation files # Handle token limit exceeded for conversation files
error_message = f"ERROR: The total size of files referenced in this conversation has exceeded the context limit and cannot be displayed.\nEstimated tokens: {estimated_tokens}, but limit is {MAX_CONTEXT_TOKENS}." error_message = f"ERROR: The total size of files referenced in this conversation has exceeded the context limit and cannot be displayed.\nEstimated tokens: {estimated_tokens}, but limit is {MAX_CONTENT_TOKENS}."
history_parts.append(error_message) history_parts.append(error_message)
else: else:
history_parts.append("(No accessible files found)") history_parts.append("(No accessible files found)")
@@ -476,7 +476,12 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
["", "=== END CONVERSATION HISTORY ===", "", "Continue this conversation by building on the previous context."] ["", "=== END CONVERSATION HISTORY ===", "", "Continue this conversation by building on the previous context."]
) )
return "\n".join(history_parts) # Calculate total tokens for the complete conversation history
complete_history = "\n".join(history_parts)
from utils.token_utils import estimate_tokens
total_conversation_tokens = estimate_tokens(complete_history)
return complete_history, total_conversation_tokens
def _is_valid_uuid(val: str) -> bool: def _is_valid_uuid(val: str) -> bool: