More tests

This commit is contained in:
Fahad
2025-06-11 14:34:51 +04:00
parent 2e954b61ed
commit ac763e0213
9 changed files with 818 additions and 439 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,7 @@ GEMINI_MODEL = "gemini-2.5-pro-preview-06-05"
# MAX_CONTENT_TOKENS: Available for prompts, conversation history, and files
# RESPONSE_RESERVE_TOKENS: Reserved for model response generation
MAX_CONTEXT_TOKENS = 1_000_000 # 1M tokens total capacity for Gemini Pro
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content (prompts + files + history)
RESPONSE_RESERVE_TOKENS = 200_000 # 200K tokens reserved for response generation
# Temperature defaults for different tool types

View File

@@ -346,9 +346,10 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
# Update arguments with enhanced context and remaining token budget
enhanced_arguments = arguments.copy()
enhanced_arguments["prompt"] = enhanced_prompt
# Calculate remaining token budget for current request files/content
from config import MAX_CONTENT_TOKENS
remaining_tokens = MAX_CONTENT_TOKENS - conversation_tokens
enhanced_arguments["_remaining_tokens"] = max(0, remaining_tokens) # Ensure non-negative

View File

@@ -2,11 +2,13 @@
Sample Python module for testing MCP conversation continuity
"""
def fibonacci(n):
"""Calculate fibonacci number recursively"""
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
return fibonacci(n - 1) + fibonacci(n - 2)
def factorial(n):
"""Calculate factorial iteratively"""
@@ -15,17 +17,18 @@ def factorial(n):
result *= i
return result
class Calculator:
"""Simple calculator class"""
def __init__(self):
self.history = []
def add(self, a, b):
result = a + b
self.history.append(f"{a} + {b} = {result}")
return result
def multiply(self, a, b):
result = a * b
self.history.append(f"{a} * {b} = {result}")

View File

@@ -252,7 +252,7 @@ class TestPrecommitTool:
(True, ""), # unstaged files list (empty)
]
# Mock the centralized file preparation method
# Mock the centralized file preparation method
with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
mock_prepare_files.return_value = "=== FILE: config.py ===\nCONFIG_VALUE = 42\n=== END FILE ==="

View File

@@ -213,23 +213,27 @@ class BaseTool(ABC):
try:
embedded_files = set(self.get_conversation_embedded_files(continuation_id))
# Safety check: If no files are marked as embedded but we have a continuation_id,
# this might indicate an issue with conversation history. Be conservative.
if not embedded_files:
logger.debug(f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}")
logger.debug(
f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}"
)
return requested_files
# Return only files that haven't been embedded yet
new_files = [f for f in requested_files if f not in embedded_files]
# Log filtering results for debugging
if len(new_files) < len(requested_files):
skipped = [f for f in requested_files if f in embedded_files]
logger.debug(f"📁 {self.name} tool: Filtering {len(skipped)} files already in conversation history: {', '.join(skipped)}")
logger.debug(
f"📁 {self.name} tool: Filtering {len(skipped)} files already in conversation history: {', '.join(skipped)}"
)
return new_files
except Exception as e:
# If there's any issue with conversation history lookup, be conservative
# and include all files rather than risk losing access to needed files
@@ -238,9 +242,14 @@ class BaseTool(ABC):
return requested_files
def _prepare_file_content_for_prompt(
self, request_files: list[str], continuation_id: Optional[str], context_description: str = "New files",
max_tokens: Optional[int] = None, reserve_tokens: int = 1_000, remaining_budget: Optional[int] = None,
arguments: Optional[dict] = None
self,
request_files: list[str],
continuation_id: Optional[str],
context_description: str = "New files",
max_tokens: Optional[int] = None,
reserve_tokens: int = 1_000,
remaining_budget: Optional[int] = None,
arguments: Optional[dict] = None,
) -> str:
"""
Centralized file processing for tool prompts.
@@ -268,7 +277,7 @@ class BaseTool(ABC):
# Extract remaining budget from arguments if available
if remaining_budget is None:
# Use provided arguments or fall back to stored arguments from execute()
args_to_use = arguments or getattr(self, '_current_arguments', {})
args_to_use = arguments or getattr(self, "_current_arguments", {})
remaining_budget = args_to_use.get("_remaining_tokens")
# Use remaining budget if provided, otherwise fall back to max_tokens or default
@@ -278,8 +287,9 @@ class BaseTool(ABC):
effective_max_tokens = max_tokens - reserve_tokens
else:
from config import MAX_CONTENT_TOKENS
effective_max_tokens = MAX_CONTENT_TOKENS - reserve_tokens
# Ensure we have a reasonable minimum budget
effective_max_tokens = max(1000, effective_max_tokens)
@@ -291,7 +301,9 @@ class BaseTool(ABC):
if files_to_embed:
logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
try:
file_content = read_files(files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens)
file_content = read_files(
files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens
)
self._validate_token_limit(file_content, context_description)
content_parts.append(file_content)
@@ -534,7 +546,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
try:
# Store arguments for access by helper methods (like _prepare_file_content_for_prompt)
self._current_arguments = arguments
# Set up logger for this tool execution
logger = logging.getLogger(f"tools.{self.name}")
logger.info(f"Starting {self.name} tool execution with arguments: {list(arguments.keys())}")

View File

@@ -9,7 +9,6 @@ from pydantic import Field
from config import TEMPERATURE_BALANCED
from prompts import CHAT_PROMPT
from utils import read_files
from .base import BaseTool, ToolRequest
from .models import ToolOutput
@@ -119,9 +118,7 @@ class ChatTool(BaseTool):
# Add context files if provided (using centralized file handling with filtering)
if request.files:
file_content = self._prepare_file_content_for_prompt(
request.files,
request.continuation_id,
"Context files"
request.files, request.continuation_id, "Context files"
)
if file_content:
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="

View File

@@ -10,7 +10,7 @@ from pydantic import Field
from config import MAX_CONTEXT_TOKENS
from prompts.tool_prompts import PRECOMMIT_PROMPT
from utils.file_utils import read_files, translate_file_paths, translate_path_for_environment
from utils.file_utils import translate_file_paths, translate_path_for_environment
from utils.git_utils import find_git_repositories, get_git_status, run_git_command
from utils.token_utils import estimate_tokens
@@ -300,11 +300,11 @@ class Precommit(BaseTool):
# Use centralized file handling with filtering for duplicate prevention
file_content = self._prepare_file_content_for_prompt(
translated_files,
request.continuation_id,
translated_files,
request.continuation_id,
"Context files",
max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated
reserve_tokens=1000 # Small reserve for formatting
reserve_tokens=1000, # Small reserve for formatting
)
if file_content:

View File

@@ -479,6 +479,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
# Calculate total tokens for the complete conversation history
complete_history = "\n".join(history_parts)
from utils.token_utils import estimate_tokens
total_conversation_tokens = estimate_tokens(complete_history)
return complete_history, total_conversation_tokens