WIP - improvements to token usage tracking, simulator added for live testing, improvements to file loading
This commit is contained in:
@@ -195,9 +195,10 @@ class BaseTool(ABC):
|
||||
"""
|
||||
Filter out files that are already embedded in conversation history.
|
||||
|
||||
This method takes a list of requested files and removes any that have
|
||||
already been embedded in the conversation history, preventing duplicate
|
||||
file embeddings and optimizing token usage.
|
||||
This method prevents duplicate file embeddings by filtering out files that have
|
||||
already been embedded in the conversation history. This optimizes token usage
|
||||
while ensuring tools still have logical access to all requested files through
|
||||
conversation history references.
|
||||
|
||||
Args:
|
||||
requested_files: List of files requested for current tool execution
|
||||
@@ -210,15 +211,36 @@ class BaseTool(ABC):
|
||||
# New conversation, all files are new
|
||||
return requested_files
|
||||
|
||||
embedded_files = set(self.get_conversation_embedded_files(continuation_id))
|
||||
|
||||
# Return only files that haven't been embedded yet
|
||||
new_files = [f for f in requested_files if f not in embedded_files]
|
||||
|
||||
return new_files
|
||||
try:
|
||||
embedded_files = set(self.get_conversation_embedded_files(continuation_id))
|
||||
|
||||
# Safety check: If no files are marked as embedded but we have a continuation_id,
|
||||
# this might indicate an issue with conversation history. Be conservative.
|
||||
if not embedded_files:
|
||||
logger.debug(f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}")
|
||||
return requested_files
|
||||
|
||||
# Return only files that haven't been embedded yet
|
||||
new_files = [f for f in requested_files if f not in embedded_files]
|
||||
|
||||
# Log filtering results for debugging
|
||||
if len(new_files) < len(requested_files):
|
||||
skipped = [f for f in requested_files if f in embedded_files]
|
||||
logger.debug(f"📁 {self.name} tool: Filtering {len(skipped)} files already in conversation history: {', '.join(skipped)}")
|
||||
|
||||
return new_files
|
||||
|
||||
except Exception as e:
|
||||
# If there's any issue with conversation history lookup, be conservative
|
||||
# and include all files rather than risk losing access to needed files
|
||||
logger.warning(f"📁 {self.name} tool: Error checking conversation history for {continuation_id}: {e}")
|
||||
logger.warning(f"📁 {self.name} tool: Including all requested files as fallback")
|
||||
return requested_files
|
||||
|
||||
def _prepare_file_content_for_prompt(
|
||||
self, request_files: list[str], continuation_id: Optional[str], context_description: str = "New files"
|
||||
self, request_files: list[str], continuation_id: Optional[str], context_description: str = "New files",
|
||||
max_tokens: Optional[int] = None, reserve_tokens: int = 1_000, remaining_budget: Optional[int] = None,
|
||||
arguments: Optional[dict] = None
|
||||
) -> str:
|
||||
"""
|
||||
Centralized file processing for tool prompts.
|
||||
@@ -232,6 +254,10 @@ class BaseTool(ABC):
|
||||
request_files: List of files requested for current tool execution
|
||||
continuation_id: Thread continuation ID, or None for new conversations
|
||||
context_description: Description for token limit validation (e.g. "Code", "New files")
|
||||
max_tokens: Maximum tokens to use (defaults to remaining budget or MAX_CONTENT_TOKENS)
|
||||
reserve_tokens: Tokens to reserve for additional prompt content (default 1K)
|
||||
remaining_budget: Remaining token budget after conversation history (from server.py)
|
||||
arguments: Original tool arguments (used to extract _remaining_tokens if available)
|
||||
|
||||
Returns:
|
||||
str: Formatted file content string ready for prompt inclusion
|
||||
@@ -239,6 +265,24 @@ class BaseTool(ABC):
|
||||
if not request_files:
|
||||
return ""
|
||||
|
||||
# Extract remaining budget from arguments if available
|
||||
if remaining_budget is None:
|
||||
# Use provided arguments or fall back to stored arguments from execute()
|
||||
args_to_use = arguments or getattr(self, '_current_arguments', {})
|
||||
remaining_budget = args_to_use.get("_remaining_tokens")
|
||||
|
||||
# Use remaining budget if provided, otherwise fall back to max_tokens or default
|
||||
if remaining_budget is not None:
|
||||
effective_max_tokens = remaining_budget - reserve_tokens
|
||||
elif max_tokens is not None:
|
||||
effective_max_tokens = max_tokens - reserve_tokens
|
||||
else:
|
||||
from config import MAX_CONTENT_TOKENS
|
||||
effective_max_tokens = MAX_CONTENT_TOKENS - reserve_tokens
|
||||
|
||||
# Ensure we have a reasonable minimum budget
|
||||
effective_max_tokens = max(1000, effective_max_tokens)
|
||||
|
||||
files_to_embed = self.filter_new_files(request_files, continuation_id)
|
||||
|
||||
content_parts = []
|
||||
@@ -247,7 +291,7 @@ class BaseTool(ABC):
|
||||
if files_to_embed:
|
||||
logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
|
||||
try:
|
||||
file_content = read_files(files_to_embed)
|
||||
file_content = read_files(files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens)
|
||||
self._validate_token_limit(file_content, context_description)
|
||||
content_parts.append(file_content)
|
||||
|
||||
@@ -488,6 +532,9 @@ If any of these would strengthen your analysis, specify what Claude should searc
|
||||
List[TextContent]: Formatted response as MCP TextContent objects
|
||||
"""
|
||||
try:
|
||||
# Store arguments for access by helper methods (like _prepare_file_content_for_prompt)
|
||||
self._current_arguments = arguments
|
||||
|
||||
# Set up logger for this tool execution
|
||||
logger = logging.getLogger(f"tools.{self.name}")
|
||||
logger.info(f"Starting {self.name} tool execution with arguments: {list(arguments.keys())}")
|
||||
|
||||
@@ -116,10 +116,15 @@ class ChatTool(BaseTool):
|
||||
if updated_files is not None:
|
||||
request.files = updated_files
|
||||
|
||||
# Add context files if provided
|
||||
# Add context files if provided (using centralized file handling with filtering)
|
||||
if request.files:
|
||||
file_content = read_files(request.files)
|
||||
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
|
||||
file_content = self._prepare_file_content_for_prompt(
|
||||
request.files,
|
||||
request.continuation_id,
|
||||
"Context files"
|
||||
)
|
||||
if file_content:
|
||||
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
|
||||
|
||||
# Check token limits
|
||||
self._validate_token_limit(user_content, "Content")
|
||||
|
||||
@@ -298,11 +298,13 @@ class Precommit(BaseTool):
|
||||
if translated_files:
|
||||
remaining_tokens = max_tokens - total_tokens
|
||||
|
||||
# Use standardized file reading with token budget
|
||||
file_content = read_files(
|
||||
translated_files,
|
||||
max_tokens=remaining_tokens,
|
||||
reserve_tokens=1000, # Small reserve for formatting
|
||||
# Use centralized file handling with filtering for duplicate prevention
|
||||
file_content = self._prepare_file_content_for_prompt(
|
||||
translated_files,
|
||||
request.continuation_id,
|
||||
"Context files",
|
||||
max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated
|
||||
reserve_tokens=1000 # Small reserve for formatting
|
||||
)
|
||||
|
||||
if file_content:
|
||||
|
||||
Reference in New Issue
Block a user