From 898373bc22218542fd69bf402bcd3b361ba013c8 Mon Sep 17 00:00:00 2001 From: Fahad Date: Wed, 11 Jun 2025 18:44:34 +0400 Subject: [PATCH] More tests --- communication_simulator_test.py | 7 +- server.py | 21 +- simulator_tests/__init__.py | 3 + simulator_tests/base_test.py | 9 +- simulator_tests/test_content_validation.py | 12 +- .../test_cross_tool_comprehensive.py | 306 ++++++++++++++++++ .../test_per_tool_deduplication.py | 133 ++++---- tools/base.py | 31 +- utils/conversation_memory.py | 26 +- utils/file_utils.py | 12 +- 10 files changed, 455 insertions(+), 105 deletions(-) create mode 100644 simulator_tests/test_cross_tool_comprehensive.py diff --git a/communication_simulator_test.py b/communication_simulator_test.py index ab5d4e3..c9b6592 100644 --- a/communication_simulator_test.py +++ b/communication_simulator_test.py @@ -366,7 +366,10 @@ def parse_arguments(): parser.add_argument("--list-tests", action="store_true", help="List available tests and exit") parser.add_argument("--individual", "-i", help="Run a single test individually") parser.add_argument( - "--skip-docker", action="store_true", default=True, help="Skip Docker setup (assumes containers are already running) - DEFAULT" + "--skip-docker", + action="store_true", + default=True, + help="Skip Docker setup (assumes containers are already running) - DEFAULT", ) parser.add_argument( "--rebuild-docker", action="store_true", help="Force rebuild Docker environment (overrides --skip-docker)" @@ -447,7 +450,7 @@ def main(): # Determine execution mode and run # Override skip_docker if rebuild_docker is specified skip_docker = args.skip_docker and not args.rebuild_docker - + if args.individual: exit_code = run_individual_test(simulator, args.individual, skip_docker) else: diff --git a/server.py b/server.py index 0ce7653..cd7ef42 100644 --- a/server.py +++ b/server.py @@ -22,6 +22,7 @@ import asyncio import logging import os import sys +import time from datetime import datetime from typing import Any @@ -52,7 +53,8 @@ from tools.models import ToolOutput log_level = os.getenv("LOG_LEVEL", "INFO").upper() # Create timezone-aware formatter -import time + + class LocalTimeFormatter(logging.Formatter): def formatTime(self, record, datefmt=None): """Override to use local timezone instead of UTC""" @@ -61,9 +63,10 @@ class LocalTimeFormatter(logging.Formatter): s = time.strftime(datefmt, ct) else: t = time.strftime("%Y-%m-%d %H:%M:%S", ct) - s = "%s,%03d" % (t, record.msecs) + s = f"{t},{record.msecs:03.0f}" return s + # Configure both console and file logging log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" logging.basicConfig( @@ -213,7 +216,9 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon if "continuation_id" in arguments and arguments["continuation_id"]: continuation_id = arguments["continuation_id"] logger.debug(f"Resuming conversation thread: {continuation_id}") - logger.debug(f"[CONVERSATION_DEBUG] Tool '{name}' resuming thread {continuation_id} with {len(arguments)} arguments") + logger.debug( + f"[CONVERSATION_DEBUG] Tool '{name}' resuming thread {continuation_id} with {len(arguments)} arguments" + ) logger.debug(f"[CONVERSATION_DEBUG] Original arguments keys: {list(arguments.keys())}") # Log to activity file for monitoring @@ -225,7 +230,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon arguments = await reconstruct_thread_context(arguments) logger.debug(f"[CONVERSATION_DEBUG] After thread reconstruction, arguments keys: {list(arguments.keys())}") - if '_remaining_tokens' in arguments: + if "_remaining_tokens" in arguments: logger.debug(f"[CONVERSATION_DEBUG] Remaining token budget: {arguments['_remaining_tokens']:,}") # Route to AI-powered tools that require Gemini API calls @@ -354,7 +359,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any success = add_turn(continuation_id, "user", user_prompt, files=user_files) if not success: logger.warning(f"Failed to add user turn to thread {continuation_id}") - logger.debug(f"[CONVERSATION_DEBUG] Failed to add user turn - thread may be at turn limit or expired") + logger.debug("[CONVERSATION_DEBUG] Failed to add user turn - thread may be at turn limit or expired") else: logger.debug(f"[CONVERSATION_DEBUG] Successfully added user turn to thread {continuation_id}") @@ -387,7 +392,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any remaining_tokens = MAX_CONTENT_TOKENS - conversation_tokens enhanced_arguments["_remaining_tokens"] = max(0, remaining_tokens) # Ensure non-negative - logger.debug(f"[CONVERSATION_DEBUG] Token budget calculation:") + logger.debug("[CONVERSATION_DEBUG] Token budget calculation:") logger.debug(f"[CONVERSATION_DEBUG] MAX_CONTENT_TOKENS: {MAX_CONTENT_TOKENS:,}") logger.debug(f"[CONVERSATION_DEBUG] Conversation tokens: {conversation_tokens:,}") logger.debug(f"[CONVERSATION_DEBUG] Remaining tokens: {remaining_tokens:,}") @@ -402,9 +407,9 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any logger.info(f"Reconstructed context for thread {continuation_id} (turn {len(context.turns)})") logger.debug(f"[CONVERSATION_DEBUG] Final enhanced arguments keys: {list(enhanced_arguments.keys())}") - + # Debug log files in the enhanced arguments for file tracking - if 'files' in enhanced_arguments: + if "files" in enhanced_arguments: logger.debug(f"[CONVERSATION_DEBUG] Final files in enhanced arguments: {enhanced_arguments['files']}") # Log to activity file for monitoring diff --git a/simulator_tests/__init__.py b/simulator_tests/__init__.py index e224a85..8150270 100644 --- a/simulator_tests/__init__.py +++ b/simulator_tests/__init__.py @@ -8,6 +8,7 @@ Each test is in its own file for better organization and maintainability. from .base_test import BaseSimulatorTest from .test_basic_conversation import BasicConversationTest from .test_content_validation import ContentValidationTest +from .test_cross_tool_comprehensive import CrossToolComprehensiveTest from .test_cross_tool_continuation import CrossToolContinuationTest from .test_logs_validation import LogsValidationTest from .test_per_tool_deduplication import PerToolDeduplicationTest @@ -19,6 +20,7 @@ TEST_REGISTRY = { "content_validation": ContentValidationTest, "per_tool_deduplication": PerToolDeduplicationTest, "cross_tool_continuation": CrossToolContinuationTest, + "cross_tool_comprehensive": CrossToolComprehensiveTest, "logs_validation": LogsValidationTest, "redis_validation": RedisValidationTest, } @@ -29,6 +31,7 @@ __all__ = [ "ContentValidationTest", "PerToolDeduplicationTest", "CrossToolContinuationTest", + "CrossToolComprehensiveTest", "LogsValidationTest", "RedisValidationTest", "TEST_REGISTRY", diff --git a/simulator_tests/base_test.py b/simulator_tests/base_test.py index 0dd2f58..7a3050c 100644 --- a/simulator_tests/base_test.py +++ b/simulator_tests/base_test.py @@ -96,10 +96,7 @@ class Calculator: f.write(config_content) # Ensure absolute paths for MCP server compatibility - self.test_files = { - "python": os.path.abspath(test_py), - "config": os.path.abspath(test_config) - } + self.test_files = {"python": os.path.abspath(test_py), "config": os.path.abspath(test_config)} self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}") def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: @@ -237,9 +234,9 @@ class Calculator: def create_additional_test_file(self, filename: str, content: str) -> str: """Create an additional test file for mixed scenario testing""" - if not hasattr(self, 'test_dir') or not self.test_dir: + if not hasattr(self, "test_dir") or not self.test_dir: raise RuntimeError("Test directory not initialized. Call setup_test_files() first.") - + file_path = os.path.join(self.test_dir, filename) with open(file_path, "w") as f: f.write(content) diff --git a/simulator_tests/test_content_validation.py b/simulator_tests/test_content_validation.py index da3116c..b9f6756 100644 --- a/simulator_tests/test_content_validation.py +++ b/simulator_tests/test_content_validation.py @@ -53,7 +53,7 @@ DATABASE_CONFIG = { validation_file = os.path.join(self.test_dir, "validation_config.py") with open(validation_file, "w") as f: f.write(validation_content) - + # Ensure absolute path for MCP server compatibility validation_file = os.path.abspath(validation_file) @@ -113,11 +113,17 @@ DATABASE_CONFIG = { tools_to_test = [ ( "chat", - {"prompt": "Please use low thinking mode. Analyze this config file", "files": [validation_file]}, # Using absolute path + { + "prompt": "Please use low thinking mode. Analyze this config file", + "files": [validation_file], + }, # Using absolute path ), ( "codereview", - {"files": [validation_file], "context": "Please use low thinking mode. Review this configuration"}, # Using absolute path + { + "files": [validation_file], + "context": "Please use low thinking mode. Review this configuration", + }, # Using absolute path ), ("analyze", {"files": [validation_file], "analysis_type": "code_quality"}), # Using absolute path ] diff --git a/simulator_tests/test_cross_tool_comprehensive.py b/simulator_tests/test_cross_tool_comprehensive.py new file mode 100644 index 0000000..cf8b344 --- /dev/null +++ b/simulator_tests/test_cross_tool_comprehensive.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +""" +Comprehensive Cross-Tool Test + +Tests file deduplication, conversation continuation, and file handling +across all available MCP tools using realistic workflows with low thinking mode. +Validates: +1. Cross-tool conversation continuation +2. File deduplication across different tools +3. Mixed file scenarios (old + new files) +4. Conversation history preservation +5. Proper tool chaining with context +""" + +import subprocess + +from .base_test import BaseSimulatorTest + + +class CrossToolComprehensiveTest(BaseSimulatorTest): + """Comprehensive test across all MCP tools""" + + @property + def test_name(self) -> str: + return "cross_tool_comprehensive" + + @property + def test_description(self) -> str: + return "Comprehensive cross-tool file deduplication and continuation" + + def get_docker_logs_since(self, since_time: str) -> str: + """Get docker logs since a specific timestamp""" + try: + # Check both main server and log monitor for comprehensive logs + cmd_server = ["docker", "logs", "--since", since_time, self.container_name] + cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"] + + result_server = subprocess.run(cmd_server, capture_output=True, text=True) + result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True) + + # Combine logs from both containers + combined_logs = result_server.stdout + "\n" + result_monitor.stdout + return combined_logs + except Exception as e: + self.logger.error(f"Failed to get docker logs: {e}") + return "" + + def run_test(self) -> bool: + """Comprehensive cross-tool test with all MCP tools""" + try: + self.logger.info("📄 Test: Comprehensive cross-tool file deduplication and continuation") + + # Setup test files + self.setup_test_files() + + # Create short test files for quick testing + python_code = '''def login(user, pwd): + # Security issue: plain text password + if user == "admin" and pwd == "123": + return True + return False + +def hash_pwd(pwd): + # Weak hashing + return str(hash(pwd)) +''' + + config_file = """{ + "db_password": "weak123", + "debug": true, + "secret_key": "test" +}""" + + auth_file = self.create_additional_test_file("auth.py", python_code) + config_file_path = self.create_additional_test_file("config.json", config_file) + + # Get timestamp for log filtering + import datetime + + start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + + # Tool chain: chat → analyze → debug → codereview → precommit + # Each step builds on the previous with cross-tool continuation + + current_continuation_id = None + responses = [] + + # Step 1: Start with chat tool to understand the codebase + self.logger.info(" Step 1: chat tool - Initial codebase exploration") + chat_params = { + "prompt": "Please give me a quick one line reply. I have an authentication module that needs review. Can you help me understand potential issues?", + "files": [auth_file], + "thinking_mode": "low" + } + + response1, continuation_id1 = self.call_mcp_tool("chat", chat_params) + if not response1 or not continuation_id1: + self.logger.error(" ❌ Step 1: chat tool failed") + return False + + self.logger.info(f" ✅ Step 1: chat completed with continuation_id: {continuation_id1[:8]}...") + responses.append(("chat", response1, continuation_id1)) + current_continuation_id = continuation_id1 + + # Step 2: Use analyze tool to do deeper analysis (fresh conversation) + self.logger.info(" Step 2: analyze tool - Deep code analysis (fresh)") + analyze_params = { + "files": [auth_file], + "question": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?", + "thinking_mode": "low" + } + + response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params) + if not response2: + self.logger.error(" ❌ Step 2: analyze tool failed") + return False + + self.logger.info( + f" ✅ Step 2: analyze completed with continuation_id: {continuation_id2[:8] if continuation_id2 else 'None'}..." + ) + responses.append(("analyze", response2, continuation_id2)) + + # Step 3: Continue chat conversation with config file + self.logger.info(" Step 3: chat continuation - Add config file context") + chat_continue_params = { + "continuation_id": current_continuation_id, + "prompt": "Please give me a quick one line reply. I also have this configuration file. Can you analyze it alongside the authentication code?", + "files": [auth_file, config_file_path], # Old + new file + "thinking_mode": "low" + } + + response3, _ = self.call_mcp_tool("chat", chat_continue_params) + if not response3: + self.logger.error(" ❌ Step 3: chat continuation failed") + return False + + self.logger.info(" ✅ Step 3: chat continuation completed") + responses.append(("chat_continue", response3, current_continuation_id)) + + # Step 4: Use debug tool to identify specific issues + self.logger.info(" Step 4: debug tool - Identify specific problems") + debug_params = { + "files": [auth_file, config_file_path], + "error_description": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.", + "thinking_mode": "low" + } + + response4, continuation_id4 = self.call_mcp_tool("debug", debug_params) + if not response4: + self.logger.error(" ❌ Step 4: debug tool failed") + return False + + self.logger.info( + f" ✅ Step 4: debug completed with continuation_id: {continuation_id4[:8] if continuation_id4 else 'None'}..." + ) + responses.append(("debug", response4, continuation_id4)) + + # Step 5: Cross-tool continuation - continue debug with chat context + if continuation_id4: + self.logger.info(" Step 5: debug continuation - Additional analysis") + debug_continue_params = { + "continuation_id": continuation_id4, + "files": [auth_file, config_file_path], + "error_description": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?", + "thinking_mode": "low" + } + + response5, _ = self.call_mcp_tool("debug", debug_continue_params) + if response5: + self.logger.info(" ✅ Step 5: debug continuation completed") + responses.append(("debug_continue", response5, continuation_id4)) + + # Step 6: Use codereview for comprehensive review + self.logger.info(" Step 6: codereview tool - Comprehensive code review") + codereview_params = { + "files": [auth_file, config_file_path], + "context": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness", + "thinking_mode": "low" + } + + response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params) + if not response6: + self.logger.error(" ❌ Step 6: codereview tool failed") + return False + + self.logger.info( + f" ✅ Step 6: codereview completed with continuation_id: {continuation_id6[:8] if continuation_id6 else 'None'}..." + ) + responses.append(("codereview", response6, continuation_id6)) + + # Step 7: Create improved version and use precommit + self.logger.info(" Step 7: precommit tool - Pre-commit validation") + + # Create a short improved version + improved_code = '''import hashlib + +def secure_login(user, pwd): + # Better: hashed password check + hashed = hashlib.sha256(pwd.encode()).hexdigest() + if user == "admin" and hashed == "expected_hash": + return True + return False +''' + + improved_file = self.create_additional_test_file("auth_improved.py", improved_code) + + precommit_params = { + "path": self.test_dir, + "files": [auth_file, config_file_path, improved_file], + "original_request": "Please give me a quick one line reply. Ready to commit security improvements to authentication module", + "thinking_mode": "low", + } + + response7, continuation_id7 = self.call_mcp_tool("precommit", precommit_params) + if not response7: + self.logger.error(" ❌ Step 7: precommit tool failed") + return False + + self.logger.info( + f" ✅ Step 7: precommit completed with continuation_id: {continuation_id7[:8] if continuation_id7 else 'None'}..." + ) + responses.append(("precommit", response7, continuation_id7)) + + # Validate comprehensive results + self.logger.info(" 📋 Validating comprehensive cross-tool results...") + logs = self.get_docker_logs_since(start_time) + + # Validation criteria + tools_used = [r[0] for r in responses] + continuation_ids_created = [r[2] for r in responses if r[2]] + + # Check for various log patterns + conversation_logs = [ + line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower() + ] + embedding_logs = [ + line + for line in logs.split("\n") + if "📁" in line or "embedding" in line.lower() or "file" in line.lower() + ] + continuation_logs = [ + line for line in logs.split("\n") if "continuation" in line.lower() or "resuming" in line.lower() + ] + cross_tool_logs = [ + line + for line in logs.split("\n") + if any(tool in line.lower() for tool in ["chat", "analyze", "debug", "codereview", "precommit"]) + ] + + # File mentions + auth_file_mentioned = any("auth.py" in line for line in logs.split("\n")) + config_file_mentioned = any("config.json" in line for line in logs.split("\n")) + improved_file_mentioned = any("auth_improved.py" in line for line in logs.split("\n")) + + # Print comprehensive diagnostics + self.logger.info(f" 📊 Tools used: {len(tools_used)} ({', '.join(tools_used)})") + self.logger.info(f" 📊 Continuation IDs created: {len(continuation_ids_created)}") + self.logger.info(f" 📊 Conversation logs found: {len(conversation_logs)}") + self.logger.info(f" 📊 File embedding logs found: {len(embedding_logs)}") + self.logger.info(f" 📊 Continuation logs found: {len(continuation_logs)}") + self.logger.info(f" 📊 Cross-tool activity logs: {len(cross_tool_logs)}") + self.logger.info(f" 📊 Auth file mentioned: {auth_file_mentioned}") + self.logger.info(f" 📊 Config file mentioned: {config_file_mentioned}") + self.logger.info(f" 📊 Improved file mentioned: {improved_file_mentioned}") + + if self.verbose: + self.logger.debug(" 📋 Sample tool activity logs:") + for log in cross_tool_logs[:10]: # Show first 10 + if log.strip(): + self.logger.debug(f" {log.strip()}") + + self.logger.debug(" 📋 Sample continuation logs:") + for log in continuation_logs[:5]: # Show first 5 + if log.strip(): + self.logger.debug(f" {log.strip()}") + + # Comprehensive success criteria + success_criteria = [ + len(tools_used) >= 5, # Used multiple tools + len(continuation_ids_created) >= 3, # Created multiple continuation threads + len(embedding_logs) > 10, # Significant file embedding activity + len(continuation_logs) > 0, # Evidence of continuation + auth_file_mentioned, # Original file processed + config_file_mentioned, # Additional file processed + improved_file_mentioned, # New file processed + len(conversation_logs) > 5, # Conversation history activity + ] + + passed_criteria = sum(success_criteria) + total_criteria = len(success_criteria) + + self.logger.info(f" 📊 Success criteria met: {passed_criteria}/{total_criteria}") + + if passed_criteria >= 6: # At least 6 out of 8 criteria + self.logger.info(" ✅ Comprehensive cross-tool test: PASSED") + return True + else: + self.logger.warning(" ⚠️ Comprehensive cross-tool test: FAILED") + self.logger.warning(" 💡 Check logs for detailed cross-tool activity") + return False + + except Exception as e: + self.logger.error(f"Comprehensive cross-tool test failed: {e}") + return False + finally: + self.cleanup_test_files() diff --git a/simulator_tests/test_per_tool_deduplication.py b/simulator_tests/test_per_tool_deduplication.py index 32ce5d7..2057928 100644 --- a/simulator_tests/test_per_tool_deduplication.py +++ b/simulator_tests/test_per_tool_deduplication.py @@ -11,10 +11,8 @@ Validates that: 4. Docker logs show deduplication behavior """ -import json -import os import subprocess -import tempfile + from .base_test import BaseSimulatorTest @@ -35,10 +33,10 @@ class PerToolDeduplicationTest(BaseSimulatorTest): # Check both main server and log monitor for comprehensive logs cmd_server = ["docker", "logs", "--since", since_time, self.container_name] cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"] - + result_server = subprocess.run(cmd_server, capture_output=True, text=True) result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True) - + # Combine logs from both containers combined_logs = result_server.stdout + "\n" + result_monitor.stdout return combined_logs @@ -51,14 +49,20 @@ class PerToolDeduplicationTest(BaseSimulatorTest): def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool: """Validate that logs show file deduplication behavior""" # Look for file embedding messages - embedding_messages = [line for line in logs.split('\n') if '📁' in line and 'embedding' in line and tool_name in line] - - # Look for deduplication/filtering messages - filtering_messages = [line for line in logs.split('\n') if '📁' in line and 'Filtering' in line and tool_name in line] - skipping_messages = [line for line in logs.split('\n') if '📁' in line and 'skipping' in line and tool_name in line] - + embedding_messages = [ + line for line in logs.split("\n") if "📁" in line and "embedding" in line and tool_name in line + ] + + # Look for deduplication/filtering messages + filtering_messages = [ + line for line in logs.split("\n") if "📁" in line and "Filtering" in line and tool_name in line + ] + skipping_messages = [ + line for line in logs.split("\n") if "📁" in line and "skipping" in line and tool_name in line + ] + deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0 - + if deduplication_found: self.logger.info(f" ✅ {tool_name}: Found deduplication evidence in logs") for msg in filtering_messages + skipping_messages: @@ -66,7 +70,7 @@ class PerToolDeduplicationTest(BaseSimulatorTest): else: self.logger.warning(f" ⚠️ {tool_name}: No deduplication evidence found in logs") self.logger.debug(f" 📁 All embedding messages: {embedding_messages}") - + return deduplication_found def run_test(self) -> bool: @@ -76,21 +80,19 @@ class PerToolDeduplicationTest(BaseSimulatorTest): # Setup test files self.setup_test_files() - - # Create a dummy file for precommit testing - dummy_content = '''def hello_world(): - """A simple hello world function with a bug""" - print("Hello world!") - return "hello" -# TODO: Fix the inconsistent return type -def calculate_sum(a, b): + # Create a short dummy file for quick testing + dummy_content = '''def add(a, b): return a + b # Missing type hints + +def divide(x, y): + return x / y # No zero check ''' dummy_file_path = self.create_additional_test_file("dummy_code.py", dummy_content) - + # Get timestamp for log filtering import datetime + start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") # Step 1: precommit tool with dummy file (low thinking mode) @@ -98,98 +100,105 @@ def calculate_sum(a, b): precommit_params = { "path": self.test_dir, # Required path parameter "files": [dummy_file_path], - "original_request": "Please use low thinking mode. Review this code for commit readiness", - "thinking_mode": "low" + "original_request": "Please give me a quick one line reply. Review this code for commit readiness", + "thinking_mode": "low", } - + response1, continuation_id = self.call_mcp_tool("precommit", precommit_params) if not response1: self.logger.error(" ❌ Step 1: precommit tool failed") return False - + if not continuation_id: self.logger.error(" ❌ Step 1: precommit tool didn't provide continuation_id") return False - + + # Validate continuation_id format (should be UUID) + if len(continuation_id) < 32: + self.logger.error(f" ❌ Step 1: Invalid continuation_id format: {continuation_id}") + return False + self.logger.info(f" ✅ Step 1: precommit completed with continuation_id: {continuation_id[:8]}...") # Step 2: codereview tool with same file (NO continuation - fresh conversation) self.logger.info(" Step 2: codereview tool with same file (fresh conversation)") codereview_params = { "files": [dummy_file_path], - "context": "Please use low thinking mode. General code review for quality and best practices" + "context": "Please give me a quick one line reply. General code review for quality and best practices", + "thinking_mode": "low" } - + response2, _ = self.call_mcp_tool("codereview", codereview_params) if not response2: self.logger.error(" ❌ Step 2: codereview tool failed") return False - + self.logger.info(" ✅ Step 2: codereview completed (fresh conversation)") # Step 3: Create new file and continue with precommit self.logger.info(" Step 3: precommit continuation with old + new file") - new_file_content = '''def new_feature(): - """A new feature function""" - return {"status": "implemented", "version": "1.0"} + new_file_content = '''def multiply(x, y): + return x * y -class NewUtility: - """A new utility class""" - - def __init__(self): - self.initialized = True - - def process_data(self, data): - return f"Processed: {data}" +def subtract(a, b): + return a - b ''' new_file_path = self.create_additional_test_file("new_feature.py", new_file_content) - + # Continue precommit with both files continue_params = { "continuation_id": continuation_id, "path": self.test_dir, # Required path parameter "files": [dummy_file_path, new_file_path], # Old + new file - "original_request": "Please use low thinking mode. Now also review the new feature file along with the previous one", - "thinking_mode": "low" + "original_request": "Please give me a quick one line reply. Now also review the new feature file along with the previous one", + "thinking_mode": "low", } - + response3, _ = self.call_mcp_tool("precommit", continue_params) if not response3: self.logger.error(" ❌ Step 3: precommit continuation failed") return False - + self.logger.info(" ✅ Step 3: precommit continuation completed") # Validate results in docker logs self.logger.info(" 📋 Validating conversation history and file deduplication...") logs = self.get_docker_logs_since(start_time) - + # Check for conversation history building - conversation_logs = [line for line in logs.split('\n') if 'conversation' in line.lower() or 'history' in line.lower()] - + conversation_logs = [ + line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower() + ] + # Check for file embedding/deduplication - embedding_logs = [line for line in logs.split('\n') if '📁' in line or 'embedding' in line.lower() or 'file' in line.lower()] - + embedding_logs = [ + line + for line in logs.split("\n") + if "📁" in line or "embedding" in line.lower() or "file" in line.lower() + ] + # Check for continuation evidence - continuation_logs = [line for line in logs.split('\n') if 'continuation' in line.lower() or continuation_id[:8] in line] - + continuation_logs = [ + line for line in logs.split("\n") if "continuation" in line.lower() or continuation_id[:8] in line + ] + # Check for both files mentioned - dummy_file_mentioned = any("dummy_code.py" in line for line in logs.split('\n')) - new_file_mentioned = any("new_feature.py" in line for line in logs.split('\n')) - + dummy_file_mentioned = any("dummy_code.py" in line for line in logs.split("\n")) + new_file_mentioned = any("new_feature.py" in line for line in logs.split("\n")) + # Print diagnostic information self.logger.info(f" 📊 Conversation logs found: {len(conversation_logs)}") self.logger.info(f" 📊 File embedding logs found: {len(embedding_logs)}") self.logger.info(f" 📊 Continuation logs found: {len(continuation_logs)}") self.logger.info(f" 📊 Dummy file mentioned: {dummy_file_mentioned}") self.logger.info(f" 📊 New file mentioned: {new_file_mentioned}") - + if self.verbose: self.logger.debug(" 📋 Sample embedding logs:") for log in embedding_logs[:5]: # Show first 5 if log.strip(): self.logger.debug(f" {log.strip()}") - + self.logger.debug(" 📋 Sample continuation logs:") for log in continuation_logs[:3]: # Show first 3 if log.strip(): @@ -200,14 +209,14 @@ class NewUtility: len(embedding_logs) > 0, # File embedding occurred len(continuation_logs) > 0, # Continuation worked dummy_file_mentioned, # Original file processed - new_file_mentioned # New file processed + new_file_mentioned, # New file processed ] - + passed_criteria = sum(success_criteria) total_criteria = len(success_criteria) - + self.logger.info(f" 📊 Success criteria met: {passed_criteria}/{total_criteria}") - + if passed_criteria >= 3: # At least 3 out of 4 criteria self.logger.info(" ✅ File deduplication workflow test: PASSED") return True diff --git a/tools/base.py b/tools/base.py index d7dd61c..3f06ffe 100644 --- a/tools/base.py +++ b/tools/base.py @@ -210,7 +210,7 @@ class BaseTool(ABC): list[str]: List of files that need to be embedded (not already in history) """ logger.debug(f"[FILES] {self.name}: Filtering {len(requested_files)} requested files") - + if not continuation_id: # New conversation, all files are new logger.debug(f"[FILES] {self.name}: New conversation, all {len(requested_files)} files are new") @@ -226,12 +226,16 @@ class BaseTool(ABC): logger.debug( f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}" ) - logger.debug(f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files") + logger.debug( + f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files" + ) return requested_files # Return only files that haven't been embedded yet new_files = [f for f in requested_files if f not in embedded_files] - logger.debug(f"[FILES] {self.name}: After filtering: {len(new_files)} new files, {len(requested_files) - len(new_files)} already embedded") + logger.debug( + f"[FILES] {self.name}: After filtering: {len(new_files)} new files, {len(requested_files) - len(new_files)} already embedded" + ) logger.debug(f"[FILES] {self.name}: New files to embed: {new_files}") # Log filtering results for debugging @@ -249,7 +253,9 @@ class BaseTool(ABC): # and include all files rather than risk losing access to needed files logger.warning(f"📁 {self.name} tool: Error checking conversation history for {continuation_id}: {e}") logger.warning(f"📁 {self.name} tool: Including all requested files as fallback") - logger.debug(f"[FILES] {self.name}: Exception in filter_new_files, returning all {len(requested_files)} files as fallback") + logger.debug( + f"[FILES] {self.name}: Exception in filter_new_files, returning all {len(requested_files)} files as fallback" + ) return requested_files def _prepare_file_content_for_prompt( @@ -312,7 +318,9 @@ class BaseTool(ABC): # Read content of new files only if files_to_embed: logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}") - logger.debug(f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}") + logger.debug( + f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}" + ) try: file_content = read_files( files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens @@ -662,7 +670,7 @@ If any of these would strengthen your analysis, specify what Claude should searc # Return error information in standardized format logger = logging.getLogger(f"tools.{self.name}") error_msg = str(e) - + # Check if this is a 500 INTERNAL error that asks for retry if "500 INTERNAL" in error_msg and "Please retry" in error_msg: logger.warning(f"500 INTERNAL error in {self.name} - attempting retry") @@ -671,16 +679,16 @@ If any of these would strengthen your analysis, specify what Claude should searc model = self._get_model_wrapper(request) raw_response = await model.generate_content(prompt) response = raw_response.text - + # If successful, process normally return [TextContent(type="text", text=self._process_response(response, request).model_dump_json())] - + except Exception as retry_e: logger.error(f"Retry failed for {self.name} tool: {str(retry_e)}") error_msg = f"Tool failed after retry: {str(retry_e)}" - + logger.error(f"Error in {self.name} tool execution: {error_msg}", exc_info=True) - + error_output = ToolOutput( status="error", content=f"Error in {self.name}: {error_msg}", @@ -911,11 +919,12 @@ If any of these would strengthen your analysis, specify what Claude should searc Dict with continuation data if opportunity should be offered, None otherwise """ continuation_id = getattr(request, "continuation_id", None) - + try: if continuation_id: # Check remaining turns in existing thread from utils.conversation_memory import get_thread + context = get_thread(continuation_id) if context: current_turns = len(context.turns) diff --git a/utils/conversation_memory.py b/utils/conversation_memory.py index 289e08b..7b5388b 100644 --- a/utils/conversation_memory.py +++ b/utils/conversation_memory.py @@ -251,7 +251,7 @@ def add_turn( - File references are preserved for cross-tool access """ logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})") - + context = get_thread(thread_id) if not context: logger.debug(f"[FLOW] Thread {thread_id} not found for turn addition") @@ -301,13 +301,13 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]: list[str]: Deduplicated list of file paths referenced in the conversation """ if not context.turns: - logger.debug(f"[FILES] No turns found, returning empty file list") + logger.debug("[FILES] No turns found, returning empty file list") return [] # Collect all unique files from all turns, preserving order of first appearance seen_files = set() unique_files = [] - + logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns") for i, turn in enumerate(context.turns): @@ -322,7 +322,7 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]: logger.debug(f"[FILES] Duplicate file skipped: {file_path}") else: logger.debug(f"[FILES] Turn {i+1} has no files") - + logger.debug(f"[FILES] Final unique file list ({len(unique_files)}): {unique_files}") return unique_files @@ -409,13 +409,17 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) -> logger.debug( f"📄 File embedded in conversation history: {file_path} ({content_tokens:,} tokens)" ) - logger.debug(f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})") + logger.debug( + f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})" + ) else: files_truncated += 1 logger.debug( f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)" ) - logger.debug(f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)") + logger.debug( + f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)" + ) # Stop processing more files break else: @@ -439,7 +443,9 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) -> logger.debug( f"📄 Conversation history file embedding complete: {files_included} files embedded, {files_truncated} truncated, {total_tokens:,} total tokens" ) - logger.debug(f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total") + logger.debug( + f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total" + ) else: history_parts.append("(No accessible files found)") logger.debug( @@ -505,11 +511,13 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) -> from utils.token_utils import estimate_tokens total_conversation_tokens = estimate_tokens(complete_history) - + # Summary log of what was built user_turns = len([t for t in context.turns if t.role == "user"]) assistant_turns = len([t for t in context.turns if t.role == "assistant"]) - logger.debug(f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens") + logger.debug( + f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens" + ) return complete_history, total_conversation_tokens diff --git a/utils/file_utils.py b/utils/file_utils.py index 1757042..8d986c4 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -470,7 +470,7 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i logger.debug(f"[FILES] Reading file content for {file_path}") with open(path, encoding="utf-8", errors="replace") as f: file_content = f.read() - + logger.debug(f"[FILES] Successfully read {len(file_content)} characters from {file_path}") # Format with clear delimiters that help the AI understand file boundaries @@ -518,7 +518,9 @@ def read_files( max_tokens = MAX_CONTEXT_TOKENS logger.debug(f"[FILES] read_files called with {len(file_paths)} paths") - logger.debug(f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}") + logger.debug( + f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}" + ) content_parts = [] total_tokens = 0 @@ -546,7 +548,7 @@ def read_files( if not all_files and file_paths: # No files found but paths were provided - logger.debug(f"[FILES] No files found from provided paths") + logger.debug("[FILES] No files found from provided paths") content_parts.append(f"\n--- NO FILES FOUND ---\nProvided paths: {', '.join(file_paths)}\n--- END ---\n") else: # Read files sequentially until token limit is reached @@ -567,7 +569,9 @@ def read_files( logger.debug(f"[FILES] Added file {file_path}, total tokens: {total_tokens:,}") else: # File too large for remaining budget - logger.debug(f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)") + logger.debug( + f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)" + ) files_skipped.append(file_path) # Add informative note about skipped files to help users understand