From 898373bc22218542fd69bf402bcd3b361ba013c8 Mon Sep 17 00:00:00 2001
From: Fahad <fahad@2doapp.com>
Date: Wed, 11 Jun 2025 18:44:34 +0400
Subject: [PATCH] More tests

---
 communication_simulator_test.py               |   7 +-
 server.py                                     |  21 +-
 simulator_tests/__init__.py                   |   3 +
 simulator_tests/base_test.py                  |   9 +-
 simulator_tests/test_content_validation.py    |  12 +-
 .../test_cross_tool_comprehensive.py          | 306 ++++++++++++++++++
 .../test_per_tool_deduplication.py            | 133 ++++----
 tools/base.py                                 |  31 +-
 utils/conversation_memory.py                  |  26 +-
 utils/file_utils.py                           |  12 +-
 10 files changed, 455 insertions(+), 105 deletions(-)
 create mode 100644 simulator_tests/test_cross_tool_comprehensive.py

diff --git a/communication_simulator_test.py b/communication_simulator_test.py
index ab5d4e3..c9b6592 100644
--- a/communication_simulator_test.py
+++ b/communication_simulator_test.py
@@ -366,7 +366,10 @@ def parse_arguments():
     parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
     parser.add_argument("--individual", "-i", help="Run a single test individually")
     parser.add_argument(
-        "--skip-docker", action="store_true", default=True, help="Skip Docker setup (assumes containers are already running) - DEFAULT"
+        "--skip-docker",
+        action="store_true",
+        default=True,
+        help="Skip Docker setup (assumes containers are already running) - DEFAULT",
     )
     parser.add_argument(
         "--rebuild-docker", action="store_true", help="Force rebuild Docker environment (overrides --skip-docker)"
@@ -447,7 +450,7 @@ def main():
     # Determine execution mode and run
     # Override skip_docker if rebuild_docker is specified
     skip_docker = args.skip_docker and not args.rebuild_docker
-    
+
     if args.individual:
         exit_code = run_individual_test(simulator, args.individual, skip_docker)
     else:
diff --git a/server.py b/server.py
index 0ce7653..cd7ef42 100644
--- a/server.py
+++ b/server.py
@@ -22,6 +22,7 @@ import asyncio
 import logging
 import os
 import sys
+import time
 from datetime import datetime
 from typing import Any
 
@@ -52,7 +53,8 @@ from tools.models import ToolOutput
 log_level = os.getenv("LOG_LEVEL", "INFO").upper()
 
 # Create timezone-aware formatter
-import time
+
+
 class LocalTimeFormatter(logging.Formatter):
     def formatTime(self, record, datefmt=None):
         """Override to use local timezone instead of UTC"""
@@ -61,9 +63,10 @@ class LocalTimeFormatter(logging.Formatter):
             s = time.strftime(datefmt, ct)
         else:
             t = time.strftime("%Y-%m-%d %H:%M:%S", ct)
-            s = "%s,%03d" % (t, record.msecs)
+            s = f"{t},{record.msecs:03.0f}"
         return s
 
+
 # Configure both console and file logging
 log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 logging.basicConfig(
@@ -213,7 +216,9 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
     if "continuation_id" in arguments and arguments["continuation_id"]:
         continuation_id = arguments["continuation_id"]
         logger.debug(f"Resuming conversation thread: {continuation_id}")
-        logger.debug(f"[CONVERSATION_DEBUG] Tool '{name}' resuming thread {continuation_id} with {len(arguments)} arguments")
+        logger.debug(
+            f"[CONVERSATION_DEBUG] Tool '{name}' resuming thread {continuation_id} with {len(arguments)} arguments"
+        )
         logger.debug(f"[CONVERSATION_DEBUG] Original arguments keys: {list(arguments.keys())}")
 
         # Log to activity file for monitoring
@@ -225,7 +230,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
 
         arguments = await reconstruct_thread_context(arguments)
         logger.debug(f"[CONVERSATION_DEBUG] After thread reconstruction, arguments keys: {list(arguments.keys())}")
-        if '_remaining_tokens' in arguments:
+        if "_remaining_tokens" in arguments:
             logger.debug(f"[CONVERSATION_DEBUG] Remaining token budget: {arguments['_remaining_tokens']:,}")
 
     # Route to AI-powered tools that require Gemini API calls
@@ -354,7 +359,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
         success = add_turn(continuation_id, "user", user_prompt, files=user_files)
         if not success:
             logger.warning(f"Failed to add user turn to thread {continuation_id}")
-            logger.debug(f"[CONVERSATION_DEBUG] Failed to add user turn - thread may be at turn limit or expired")
+            logger.debug("[CONVERSATION_DEBUG] Failed to add user turn - thread may be at turn limit or expired")
         else:
             logger.debug(f"[CONVERSATION_DEBUG] Successfully added user turn to thread {continuation_id}")
 
@@ -387,7 +392,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
 
     remaining_tokens = MAX_CONTENT_TOKENS - conversation_tokens
     enhanced_arguments["_remaining_tokens"] = max(0, remaining_tokens)  # Ensure non-negative
-    logger.debug(f"[CONVERSATION_DEBUG] Token budget calculation:")
+    logger.debug("[CONVERSATION_DEBUG] Token budget calculation:")
     logger.debug(f"[CONVERSATION_DEBUG]   MAX_CONTENT_TOKENS: {MAX_CONTENT_TOKENS:,}")
     logger.debug(f"[CONVERSATION_DEBUG]   Conversation tokens: {conversation_tokens:,}")
     logger.debug(f"[CONVERSATION_DEBUG]   Remaining tokens: {remaining_tokens:,}")
@@ -402,9 +407,9 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
 
     logger.info(f"Reconstructed context for thread {continuation_id} (turn {len(context.turns)})")
     logger.debug(f"[CONVERSATION_DEBUG] Final enhanced arguments keys: {list(enhanced_arguments.keys())}")
-    
+
     # Debug log files in the enhanced arguments for file tracking
-    if 'files' in enhanced_arguments:
+    if "files" in enhanced_arguments:
         logger.debug(f"[CONVERSATION_DEBUG] Final files in enhanced arguments: {enhanced_arguments['files']}")
 
     # Log to activity file for monitoring
diff --git a/simulator_tests/__init__.py b/simulator_tests/__init__.py
index e224a85..8150270 100644
--- a/simulator_tests/__init__.py
+++ b/simulator_tests/__init__.py
@@ -8,6 +8,7 @@ Each test is in its own file for better organization and maintainability.
 from .base_test import BaseSimulatorTest
 from .test_basic_conversation import BasicConversationTest
 from .test_content_validation import ContentValidationTest
+from .test_cross_tool_comprehensive import CrossToolComprehensiveTest
 from .test_cross_tool_continuation import CrossToolContinuationTest
 from .test_logs_validation import LogsValidationTest
 from .test_per_tool_deduplication import PerToolDeduplicationTest
@@ -19,6 +20,7 @@ TEST_REGISTRY = {
     "content_validation": ContentValidationTest,
     "per_tool_deduplication": PerToolDeduplicationTest,
     "cross_tool_continuation": CrossToolContinuationTest,
+    "cross_tool_comprehensive": CrossToolComprehensiveTest,
     "logs_validation": LogsValidationTest,
     "redis_validation": RedisValidationTest,
 }
@@ -29,6 +31,7 @@ __all__ = [
     "ContentValidationTest",
     "PerToolDeduplicationTest",
     "CrossToolContinuationTest",
+    "CrossToolComprehensiveTest",
     "LogsValidationTest",
     "RedisValidationTest",
     "TEST_REGISTRY",
diff --git a/simulator_tests/base_test.py b/simulator_tests/base_test.py
index 0dd2f58..7a3050c 100644
--- a/simulator_tests/base_test.py
+++ b/simulator_tests/base_test.py
@@ -96,10 +96,7 @@ class Calculator:
             f.write(config_content)
 
         # Ensure absolute paths for MCP server compatibility
-        self.test_files = {
-            "python": os.path.abspath(test_py), 
-            "config": os.path.abspath(test_config)
-        }
+        self.test_files = {"python": os.path.abspath(test_py), "config": os.path.abspath(test_config)}
         self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}")
 
     def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
@@ -237,9 +234,9 @@ class Calculator:
 
     def create_additional_test_file(self, filename: str, content: str) -> str:
         """Create an additional test file for mixed scenario testing"""
-        if not hasattr(self, 'test_dir') or not self.test_dir:
+        if not hasattr(self, "test_dir") or not self.test_dir:
             raise RuntimeError("Test directory not initialized. Call setup_test_files() first.")
-        
+
         file_path = os.path.join(self.test_dir, filename)
         with open(file_path, "w") as f:
             f.write(content)
diff --git a/simulator_tests/test_content_validation.py b/simulator_tests/test_content_validation.py
index da3116c..b9f6756 100644
--- a/simulator_tests/test_content_validation.py
+++ b/simulator_tests/test_content_validation.py
@@ -53,7 +53,7 @@ DATABASE_CONFIG = {
             validation_file = os.path.join(self.test_dir, "validation_config.py")
             with open(validation_file, "w") as f:
                 f.write(validation_content)
-            
+
             # Ensure absolute path for MCP server compatibility
             validation_file = os.path.abspath(validation_file)
 
@@ -113,11 +113,17 @@ DATABASE_CONFIG = {
             tools_to_test = [
                 (
                     "chat",
-                    {"prompt": "Please use low thinking mode. Analyze this config file", "files": [validation_file]},  # Using absolute path
+                    {
+                        "prompt": "Please use low thinking mode. Analyze this config file",
+                        "files": [validation_file],
+                    },  # Using absolute path
                 ),
                 (
                     "codereview",
-                    {"files": [validation_file], "context": "Please use low thinking mode. Review this configuration"},  # Using absolute path
+                    {
+                        "files": [validation_file],
+                        "context": "Please use low thinking mode. Review this configuration",
+                    },  # Using absolute path
                 ),
                 ("analyze", {"files": [validation_file], "analysis_type": "code_quality"}),  # Using absolute path
             ]
diff --git a/simulator_tests/test_cross_tool_comprehensive.py b/simulator_tests/test_cross_tool_comprehensive.py
new file mode 100644
index 0000000..cf8b344
--- /dev/null
+++ b/simulator_tests/test_cross_tool_comprehensive.py
@@ -0,0 +1,306 @@
+#!/usr/bin/env python3
+"""
+Comprehensive Cross-Tool Test
+
+Tests file deduplication, conversation continuation, and file handling
+across all available MCP tools using realistic workflows with low thinking mode.
+Validates:
+1. Cross-tool conversation continuation
+2. File deduplication across different tools
+3. Mixed file scenarios (old + new files)
+4. Conversation history preservation
+5. Proper tool chaining with context
+"""
+
+import subprocess
+
+from .base_test import BaseSimulatorTest
+
+
+class CrossToolComprehensiveTest(BaseSimulatorTest):
+    """Comprehensive test across all MCP tools"""
+
+    @property
+    def test_name(self) -> str:
+        return "cross_tool_comprehensive"
+
+    @property
+    def test_description(self) -> str:
+        return "Comprehensive cross-tool file deduplication and continuation"
+
+    def get_docker_logs_since(self, since_time: str) -> str:
+        """Get docker logs since a specific timestamp"""
+        try:
+            # Check both main server and log monitor for comprehensive logs
+            cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
+            cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"]
+
+            result_server = subprocess.run(cmd_server, capture_output=True, text=True)
+            result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
+
+            # Combine logs from both containers
+            combined_logs = result_server.stdout + "\n" + result_monitor.stdout
+            return combined_logs
+        except Exception as e:
+            self.logger.error(f"Failed to get docker logs: {e}")
+            return ""
+
+    def run_test(self) -> bool:
+        """Comprehensive cross-tool test with all MCP tools"""
+        try:
+            self.logger.info("📄 Test: Comprehensive cross-tool file deduplication and continuation")
+
+            # Setup test files
+            self.setup_test_files()
+
+            # Create short test files for quick testing
+            python_code = '''def login(user, pwd):
+    # Security issue: plain text password
+    if user == "admin" and pwd == "123":
+        return True
+    return False
+
+def hash_pwd(pwd):
+    # Weak hashing
+    return str(hash(pwd))
+'''
+
+            config_file = """{
+    "db_password": "weak123",
+    "debug": true,
+    "secret_key": "test"
+}"""
+
+            auth_file = self.create_additional_test_file("auth.py", python_code)
+            config_file_path = self.create_additional_test_file("config.json", config_file)
+
+            # Get timestamp for log filtering
+            import datetime
+
+            start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
+
+            # Tool chain: chat → analyze → debug → codereview → precommit
+            # Each step builds on the previous with cross-tool continuation
+
+            current_continuation_id = None
+            responses = []
+
+            # Step 1: Start with chat tool to understand the codebase
+            self.logger.info("  Step 1: chat tool - Initial codebase exploration")
+            chat_params = {
+                "prompt": "Please give me a quick one line reply. I have an authentication module that needs review. Can you help me understand potential issues?",
+                "files": [auth_file],
+                "thinking_mode": "low"
+            }
+
+            response1, continuation_id1 = self.call_mcp_tool("chat", chat_params)
+            if not response1 or not continuation_id1:
+                self.logger.error("  ❌ Step 1: chat tool failed")
+                return False
+
+            self.logger.info(f"  ✅ Step 1: chat completed with continuation_id: {continuation_id1[:8]}...")
+            responses.append(("chat", response1, continuation_id1))
+            current_continuation_id = continuation_id1
+
+            # Step 2: Use analyze tool to do deeper analysis (fresh conversation)
+            self.logger.info("  Step 2: analyze tool - Deep code analysis (fresh)")
+            analyze_params = {
+                "files": [auth_file],
+                "question": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
+                "thinking_mode": "low"
+            }
+
+            response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params)
+            if not response2:
+                self.logger.error("  ❌ Step 2: analyze tool failed")
+                return False
+
+            self.logger.info(
+                f"  ✅ Step 2: analyze completed with continuation_id: {continuation_id2[:8] if continuation_id2 else 'None'}..."
+            )
+            responses.append(("analyze", response2, continuation_id2))
+
+            # Step 3: Continue chat conversation with config file
+            self.logger.info("  Step 3: chat continuation - Add config file context")
+            chat_continue_params = {
+                "continuation_id": current_continuation_id,
+                "prompt": "Please give me a quick one line reply. I also have this configuration file. Can you analyze it alongside the authentication code?",
+                "files": [auth_file, config_file_path],  # Old + new file
+                "thinking_mode": "low"
+            }
+
+            response3, _ = self.call_mcp_tool("chat", chat_continue_params)
+            if not response3:
+                self.logger.error("  ❌ Step 3: chat continuation failed")
+                return False
+
+            self.logger.info("  ✅ Step 3: chat continuation completed")
+            responses.append(("chat_continue", response3, current_continuation_id))
+
+            # Step 4: Use debug tool to identify specific issues
+            self.logger.info("  Step 4: debug tool - Identify specific problems")
+            debug_params = {
+                "files": [auth_file, config_file_path],
+                "error_description": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
+                "thinking_mode": "low"
+            }
+
+            response4, continuation_id4 = self.call_mcp_tool("debug", debug_params)
+            if not response4:
+                self.logger.error("  ❌ Step 4: debug tool failed")
+                return False
+
+            self.logger.info(
+                f"  ✅ Step 4: debug completed with continuation_id: {continuation_id4[:8] if continuation_id4 else 'None'}..."
+            )
+            responses.append(("debug", response4, continuation_id4))
+
+            # Step 5: Cross-tool continuation - continue debug with chat context
+            if continuation_id4:
+                self.logger.info("  Step 5: debug continuation - Additional analysis")
+                debug_continue_params = {
+                    "continuation_id": continuation_id4,
+                    "files": [auth_file, config_file_path],
+                    "error_description": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
+                    "thinking_mode": "low"
+                }
+
+                response5, _ = self.call_mcp_tool("debug", debug_continue_params)
+                if response5:
+                    self.logger.info("  ✅ Step 5: debug continuation completed")
+                    responses.append(("debug_continue", response5, continuation_id4))
+
+            # Step 6: Use codereview for comprehensive review
+            self.logger.info("  Step 6: codereview tool - Comprehensive code review")
+            codereview_params = {
+                "files": [auth_file, config_file_path],
+                "context": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
+                "thinking_mode": "low"
+            }
+
+            response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params)
+            if not response6:
+                self.logger.error("  ❌ Step 6: codereview tool failed")
+                return False
+
+            self.logger.info(
+                f"  ✅ Step 6: codereview completed with continuation_id: {continuation_id6[:8] if continuation_id6 else 'None'}..."
+            )
+            responses.append(("codereview", response6, continuation_id6))
+
+            # Step 7: Create improved version and use precommit
+            self.logger.info("  Step 7: precommit tool - Pre-commit validation")
+
+            # Create a short improved version
+            improved_code = '''import hashlib
+
+def secure_login(user, pwd):
+    # Better: hashed password check
+    hashed = hashlib.sha256(pwd.encode()).hexdigest()
+    if user == "admin" and hashed == "expected_hash":
+        return True
+    return False
+'''
+
+            improved_file = self.create_additional_test_file("auth_improved.py", improved_code)
+
+            precommit_params = {
+                "path": self.test_dir,
+                "files": [auth_file, config_file_path, improved_file],
+                "original_request": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
+                "thinking_mode": "low",
+            }
+
+            response7, continuation_id7 = self.call_mcp_tool("precommit", precommit_params)
+            if not response7:
+                self.logger.error("  ❌ Step 7: precommit tool failed")
+                return False
+
+            self.logger.info(
+                f"  ✅ Step 7: precommit completed with continuation_id: {continuation_id7[:8] if continuation_id7 else 'None'}..."
+            )
+            responses.append(("precommit", response7, continuation_id7))
+
+            # Validate comprehensive results
+            self.logger.info("  📋 Validating comprehensive cross-tool results...")
+            logs = self.get_docker_logs_since(start_time)
+
+            # Validation criteria
+            tools_used = [r[0] for r in responses]
+            continuation_ids_created = [r[2] for r in responses if r[2]]
+
+            # Check for various log patterns
+            conversation_logs = [
+                line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower()
+            ]
+            embedding_logs = [
+                line
+                for line in logs.split("\n")
+                if "📁" in line or "embedding" in line.lower() or "file" in line.lower()
+            ]
+            continuation_logs = [
+                line for line in logs.split("\n") if "continuation" in line.lower() or "resuming" in line.lower()
+            ]
+            cross_tool_logs = [
+                line
+                for line in logs.split("\n")
+                if any(tool in line.lower() for tool in ["chat", "analyze", "debug", "codereview", "precommit"])
+            ]
+
+            # File mentions
+            auth_file_mentioned = any("auth.py" in line for line in logs.split("\n"))
+            config_file_mentioned = any("config.json" in line for line in logs.split("\n"))
+            improved_file_mentioned = any("auth_improved.py" in line for line in logs.split("\n"))
+
+            # Print comprehensive diagnostics
+            self.logger.info(f"  📊 Tools used: {len(tools_used)} ({', '.join(tools_used)})")
+            self.logger.info(f"  📊 Continuation IDs created: {len(continuation_ids_created)}")
+            self.logger.info(f"  📊 Conversation logs found: {len(conversation_logs)}")
+            self.logger.info(f"  📊 File embedding logs found: {len(embedding_logs)}")
+            self.logger.info(f"  📊 Continuation logs found: {len(continuation_logs)}")
+            self.logger.info(f"  📊 Cross-tool activity logs: {len(cross_tool_logs)}")
+            self.logger.info(f"  📊 Auth file mentioned: {auth_file_mentioned}")
+            self.logger.info(f"  📊 Config file mentioned: {config_file_mentioned}")
+            self.logger.info(f"  📊 Improved file mentioned: {improved_file_mentioned}")
+
+            if self.verbose:
+                self.logger.debug("  📋 Sample tool activity logs:")
+                for log in cross_tool_logs[:10]:  # Show first 10
+                    if log.strip():
+                        self.logger.debug(f"    {log.strip()}")
+
+                self.logger.debug("  📋 Sample continuation logs:")
+                for log in continuation_logs[:5]:  # Show first 5
+                    if log.strip():
+                        self.logger.debug(f"    {log.strip()}")
+
+            # Comprehensive success criteria
+            success_criteria = [
+                len(tools_used) >= 5,  # Used multiple tools
+                len(continuation_ids_created) >= 3,  # Created multiple continuation threads
+                len(embedding_logs) > 10,  # Significant file embedding activity
+                len(continuation_logs) > 0,  # Evidence of continuation
+                auth_file_mentioned,  # Original file processed
+                config_file_mentioned,  # Additional file processed
+                improved_file_mentioned,  # New file processed
+                len(conversation_logs) > 5,  # Conversation history activity
+            ]
+
+            passed_criteria = sum(success_criteria)
+            total_criteria = len(success_criteria)
+
+            self.logger.info(f"  📊 Success criteria met: {passed_criteria}/{total_criteria}")
+
+            if passed_criteria >= 6:  # At least 6 out of 8 criteria
+                self.logger.info("  ✅ Comprehensive cross-tool test: PASSED")
+                return True
+            else:
+                self.logger.warning("  ⚠️ Comprehensive cross-tool test: FAILED")
+                self.logger.warning("  💡 Check logs for detailed cross-tool activity")
+                return False
+
+        except Exception as e:
+            self.logger.error(f"Comprehensive cross-tool test failed: {e}")
+            return False
+        finally:
+            self.cleanup_test_files()
diff --git a/simulator_tests/test_per_tool_deduplication.py b/simulator_tests/test_per_tool_deduplication.py
index 32ce5d7..2057928 100644
--- a/simulator_tests/test_per_tool_deduplication.py
+++ b/simulator_tests/test_per_tool_deduplication.py
@@ -11,10 +11,8 @@ Validates that:
 4. Docker logs show deduplication behavior
 """
 
-import json
-import os
 import subprocess
-import tempfile
+
 from .base_test import BaseSimulatorTest
 
 
@@ -35,10 +33,10 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
             # Check both main server and log monitor for comprehensive logs
             cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
             cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"]
-            
+
             result_server = subprocess.run(cmd_server, capture_output=True, text=True)
             result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
-            
+
             # Combine logs from both containers
             combined_logs = result_server.stdout + "\n" + result_monitor.stdout
             return combined_logs
@@ -51,14 +49,20 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
     def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
         """Validate that logs show file deduplication behavior"""
         # Look for file embedding messages
-        embedding_messages = [line for line in logs.split('\n') if '📁' in line and 'embedding' in line and tool_name in line]
-        
-        # Look for deduplication/filtering messages  
-        filtering_messages = [line for line in logs.split('\n') if '📁' in line and 'Filtering' in line and tool_name in line]
-        skipping_messages = [line for line in logs.split('\n') if '📁' in line and 'skipping' in line and tool_name in line]
-        
+        embedding_messages = [
+            line for line in logs.split("\n") if "📁" in line and "embedding" in line and tool_name in line
+        ]
+
+        # Look for deduplication/filtering messages
+        filtering_messages = [
+            line for line in logs.split("\n") if "📁" in line and "Filtering" in line and tool_name in line
+        ]
+        skipping_messages = [
+            line for line in logs.split("\n") if "📁" in line and "skipping" in line and tool_name in line
+        ]
+
         deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0
-        
+
         if deduplication_found:
             self.logger.info(f"  ✅ {tool_name}: Found deduplication evidence in logs")
             for msg in filtering_messages + skipping_messages:
@@ -66,7 +70,7 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
         else:
             self.logger.warning(f"  ⚠️ {tool_name}: No deduplication evidence found in logs")
             self.logger.debug(f"  📁 All embedding messages: {embedding_messages}")
-        
+
         return deduplication_found
 
     def run_test(self) -> bool:
@@ -76,21 +80,19 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
 
             # Setup test files
             self.setup_test_files()
-            
-            # Create a dummy file for precommit testing
-            dummy_content = '''def hello_world():
-    """A simple hello world function with a bug"""
-    print("Hello world!")
-    return "hello"
 
-# TODO: Fix the inconsistent return type
-def calculate_sum(a, b):
+            # Create a short dummy file for quick testing
+            dummy_content = '''def add(a, b):
     return a + b  # Missing type hints
+
+def divide(x, y):
+    return x / y  # No zero check
 '''
             dummy_file_path = self.create_additional_test_file("dummy_code.py", dummy_content)
-            
+
             # Get timestamp for log filtering
             import datetime
+
             start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
 
             # Step 1: precommit tool with dummy file (low thinking mode)
@@ -98,98 +100,105 @@ def calculate_sum(a, b):
             precommit_params = {
                 "path": self.test_dir,  # Required path parameter
                 "files": [dummy_file_path],
-                "original_request": "Please use low thinking mode. Review this code for commit readiness",
-                "thinking_mode": "low"
+                "original_request": "Please give me a quick one line reply. Review this code for commit readiness",
+                "thinking_mode": "low",
             }
-            
+
             response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
             if not response1:
                 self.logger.error("  ❌ Step 1: precommit tool failed")
                 return False
-                
+
             if not continuation_id:
                 self.logger.error("  ❌ Step 1: precommit tool didn't provide continuation_id")
                 return False
-                
+
+            # Validate continuation_id format (should be UUID)
+            if len(continuation_id) < 32:
+                self.logger.error(f"  ❌ Step 1: Invalid continuation_id format: {continuation_id}")
+                return False
+
             self.logger.info(f"  ✅ Step 1: precommit completed with continuation_id: {continuation_id[:8]}...")
 
             # Step 2: codereview tool with same file (NO continuation - fresh conversation)
             self.logger.info("  Step 2: codereview tool with same file (fresh conversation)")
             codereview_params = {
                 "files": [dummy_file_path],
-                "context": "Please use low thinking mode. General code review for quality and best practices"
+                "context": "Please give me a quick one line reply. General code review for quality and best practices",
+                "thinking_mode": "low"
             }
-            
+
             response2, _ = self.call_mcp_tool("codereview", codereview_params)
             if not response2:
                 self.logger.error("  ❌ Step 2: codereview tool failed")
                 return False
-                
+
             self.logger.info("  ✅ Step 2: codereview completed (fresh conversation)")
 
             # Step 3: Create new file and continue with precommit
             self.logger.info("  Step 3: precommit continuation with old + new file")
-            new_file_content = '''def new_feature():
-    """A new feature function"""
-    return {"status": "implemented", "version": "1.0"}
+            new_file_content = '''def multiply(x, y):
+    return x * y
 
-class NewUtility:
-    """A new utility class"""
-    
-    def __init__(self):
-        self.initialized = True
-        
-    def process_data(self, data):
-        return f"Processed: {data}"
+def subtract(a, b):
+    return a - b
 '''
             new_file_path = self.create_additional_test_file("new_feature.py", new_file_content)
-            
+
             # Continue precommit with both files
             continue_params = {
                 "continuation_id": continuation_id,
                 "path": self.test_dir,  # Required path parameter
                 "files": [dummy_file_path, new_file_path],  # Old + new file
-                "original_request": "Please use low thinking mode. Now also review the new feature file along with the previous one",
-                "thinking_mode": "low"
+                "original_request": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
+                "thinking_mode": "low",
             }
-            
+
             response3, _ = self.call_mcp_tool("precommit", continue_params)
             if not response3:
                 self.logger.error("  ❌ Step 3: precommit continuation failed")
                 return False
-                
+
             self.logger.info("  ✅ Step 3: precommit continuation completed")
 
             # Validate results in docker logs
             self.logger.info("  📋 Validating conversation history and file deduplication...")
             logs = self.get_docker_logs_since(start_time)
-            
+
             # Check for conversation history building
-            conversation_logs = [line for line in logs.split('\n') if 'conversation' in line.lower() or 'history' in line.lower()]
-            
+            conversation_logs = [
+                line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower()
+            ]
+
             # Check for file embedding/deduplication
-            embedding_logs = [line for line in logs.split('\n') if '📁' in line or 'embedding' in line.lower() or 'file' in line.lower()]
-            
+            embedding_logs = [
+                line
+                for line in logs.split("\n")
+                if "📁" in line or "embedding" in line.lower() or "file" in line.lower()
+            ]
+
             # Check for continuation evidence
-            continuation_logs = [line for line in logs.split('\n') if 'continuation' in line.lower() or continuation_id[:8] in line]
-            
+            continuation_logs = [
+                line for line in logs.split("\n") if "continuation" in line.lower() or continuation_id[:8] in line
+            ]
+
             # Check for both files mentioned
-            dummy_file_mentioned = any("dummy_code.py" in line for line in logs.split('\n'))
-            new_file_mentioned = any("new_feature.py" in line for line in logs.split('\n'))
-            
+            dummy_file_mentioned = any("dummy_code.py" in line for line in logs.split("\n"))
+            new_file_mentioned = any("new_feature.py" in line for line in logs.split("\n"))
+
             # Print diagnostic information
             self.logger.info(f"  📊 Conversation logs found: {len(conversation_logs)}")
             self.logger.info(f"  📊 File embedding logs found: {len(embedding_logs)}")
             self.logger.info(f"  📊 Continuation logs found: {len(continuation_logs)}")
             self.logger.info(f"  📊 Dummy file mentioned: {dummy_file_mentioned}")
             self.logger.info(f"  📊 New file mentioned: {new_file_mentioned}")
-            
+
             if self.verbose:
                 self.logger.debug("  📋 Sample embedding logs:")
                 for log in embedding_logs[:5]:  # Show first 5
                     if log.strip():
                         self.logger.debug(f"    {log.strip()}")
-                        
+
                 self.logger.debug("  📋 Sample continuation logs:")
                 for log in continuation_logs[:3]:  # Show first 3
                     if log.strip():
@@ -200,14 +209,14 @@ class NewUtility:
                 len(embedding_logs) > 0,  # File embedding occurred
                 len(continuation_logs) > 0,  # Continuation worked
                 dummy_file_mentioned,  # Original file processed
-                new_file_mentioned  # New file processed
+                new_file_mentioned,  # New file processed
             ]
-            
+
             passed_criteria = sum(success_criteria)
             total_criteria = len(success_criteria)
-            
+
             self.logger.info(f"  📊 Success criteria met: {passed_criteria}/{total_criteria}")
-            
+
             if passed_criteria >= 3:  # At least 3 out of 4 criteria
                 self.logger.info("  ✅ File deduplication workflow test: PASSED")
                 return True
diff --git a/tools/base.py b/tools/base.py
index d7dd61c..3f06ffe 100644
--- a/tools/base.py
+++ b/tools/base.py
@@ -210,7 +210,7 @@ class BaseTool(ABC):
             list[str]: List of files that need to be embedded (not already in history)
         """
         logger.debug(f"[FILES] {self.name}: Filtering {len(requested_files)} requested files")
-        
+
         if not continuation_id:
             # New conversation, all files are new
             logger.debug(f"[FILES] {self.name}: New conversation, all {len(requested_files)} files are new")
@@ -226,12 +226,16 @@ class BaseTool(ABC):
                 logger.debug(
                     f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}"
                 )
-                logger.debug(f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files")
+                logger.debug(
+                    f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files"
+                )
                 return requested_files
 
             # Return only files that haven't been embedded yet
             new_files = [f for f in requested_files if f not in embedded_files]
-            logger.debug(f"[FILES] {self.name}: After filtering: {len(new_files)} new files, {len(requested_files) - len(new_files)} already embedded")
+            logger.debug(
+                f"[FILES] {self.name}: After filtering: {len(new_files)} new files, {len(requested_files) - len(new_files)} already embedded"
+            )
             logger.debug(f"[FILES] {self.name}: New files to embed: {new_files}")
 
             # Log filtering results for debugging
@@ -249,7 +253,9 @@ class BaseTool(ABC):
             # and include all files rather than risk losing access to needed files
             logger.warning(f"📁 {self.name} tool: Error checking conversation history for {continuation_id}: {e}")
             logger.warning(f"📁 {self.name} tool: Including all requested files as fallback")
-            logger.debug(f"[FILES] {self.name}: Exception in filter_new_files, returning all {len(requested_files)} files as fallback")
+            logger.debug(
+                f"[FILES] {self.name}: Exception in filter_new_files, returning all {len(requested_files)} files as fallback"
+            )
             return requested_files
 
     def _prepare_file_content_for_prompt(
@@ -312,7 +318,9 @@ class BaseTool(ABC):
         # Read content of new files only
         if files_to_embed:
             logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
-            logger.debug(f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}")
+            logger.debug(
+                f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}"
+            )
             try:
                 file_content = read_files(
                     files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens
@@ -662,7 +670,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
             # Return error information in standardized format
             logger = logging.getLogger(f"tools.{self.name}")
             error_msg = str(e)
-            
+
             # Check if this is a 500 INTERNAL error that asks for retry
             if "500 INTERNAL" in error_msg and "Please retry" in error_msg:
                 logger.warning(f"500 INTERNAL error in {self.name} - attempting retry")
@@ -671,16 +679,16 @@ If any of these would strengthen your analysis, specify what Claude should searc
                     model = self._get_model_wrapper(request)
                     raw_response = await model.generate_content(prompt)
                     response = raw_response.text
-                    
+
                     # If successful, process normally
                     return [TextContent(type="text", text=self._process_response(response, request).model_dump_json())]
-                    
+
                 except Exception as retry_e:
                     logger.error(f"Retry failed for {self.name} tool: {str(retry_e)}")
                     error_msg = f"Tool failed after retry: {str(retry_e)}"
-            
+
             logger.error(f"Error in {self.name} tool execution: {error_msg}", exc_info=True)
-            
+
             error_output = ToolOutput(
                 status="error",
                 content=f"Error in {self.name}: {error_msg}",
@@ -911,11 +919,12 @@ If any of these would strengthen your analysis, specify what Claude should searc
             Dict with continuation data if opportunity should be offered, None otherwise
         """
         continuation_id = getattr(request, "continuation_id", None)
-        
+
         try:
             if continuation_id:
                 # Check remaining turns in existing thread
                 from utils.conversation_memory import get_thread
+
                 context = get_thread(continuation_id)
                 if context:
                     current_turns = len(context.turns)
diff --git a/utils/conversation_memory.py b/utils/conversation_memory.py
index 289e08b..7b5388b 100644
--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -251,7 +251,7 @@ def add_turn(
         - File references are preserved for cross-tool access
     """
     logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
-    
+
     context = get_thread(thread_id)
     if not context:
         logger.debug(f"[FLOW] Thread {thread_id} not found for turn addition")
@@ -301,13 +301,13 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
         list[str]: Deduplicated list of file paths referenced in the conversation
     """
     if not context.turns:
-        logger.debug(f"[FILES] No turns found, returning empty file list")
+        logger.debug("[FILES] No turns found, returning empty file list")
         return []
 
     # Collect all unique files from all turns, preserving order of first appearance
     seen_files = set()
     unique_files = []
-    
+
     logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns")
 
     for i, turn in enumerate(context.turns):
@@ -322,7 +322,7 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
                     logger.debug(f"[FILES] Duplicate file skipped: {file_path}")
         else:
             logger.debug(f"[FILES] Turn {i+1} has no files")
-    
+
     logger.debug(f"[FILES] Final unique file list ({len(unique_files)}): {unique_files}")
     return unique_files
 
@@ -409,13 +409,17 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
                             logger.debug(
                                 f"📄 File embedded in conversation history: {file_path} ({content_tokens:,} tokens)"
                             )
-                            logger.debug(f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})")
+                            logger.debug(
+                                f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})"
+                            )
                         else:
                             files_truncated += 1
                             logger.debug(
                                 f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)"
                             )
-                            logger.debug(f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)")
+                            logger.debug(
+                                f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)"
+                            )
                             # Stop processing more files
                             break
                     else:
@@ -439,7 +443,9 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
                 logger.debug(
                     f"📄 Conversation history file embedding complete: {files_included} files embedded, {files_truncated} truncated, {total_tokens:,} total tokens"
                 )
-                logger.debug(f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total")
+                logger.debug(
+                    f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total"
+                )
             else:
                 history_parts.append("(No accessible files found)")
                 logger.debug(
@@ -505,11 +511,13 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
     from utils.token_utils import estimate_tokens
 
     total_conversation_tokens = estimate_tokens(complete_history)
-    
+
     # Summary log of what was built
     user_turns = len([t for t in context.turns if t.role == "user"])
     assistant_turns = len([t for t in context.turns if t.role == "assistant"])
-    logger.debug(f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens")
+    logger.debug(
+        f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens"
+    )
 
     return complete_history, total_conversation_tokens
 
diff --git a/utils/file_utils.py b/utils/file_utils.py
index 1757042..8d986c4 100644
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -470,7 +470,7 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
         logger.debug(f"[FILES] Reading file content for {file_path}")
         with open(path, encoding="utf-8", errors="replace") as f:
             file_content = f.read()
-        
+
         logger.debug(f"[FILES] Successfully read {len(file_content)} characters from {file_path}")
 
         # Format with clear delimiters that help the AI understand file boundaries
@@ -518,7 +518,9 @@ def read_files(
         max_tokens = MAX_CONTEXT_TOKENS
 
     logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
-    logger.debug(f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}")
+    logger.debug(
+        f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}"
+    )
 
     content_parts = []
     total_tokens = 0
@@ -546,7 +548,7 @@ def read_files(
 
         if not all_files and file_paths:
             # No files found but paths were provided
-            logger.debug(f"[FILES] No files found from provided paths")
+            logger.debug("[FILES] No files found from provided paths")
             content_parts.append(f"\n--- NO FILES FOUND ---\nProvided paths: {', '.join(file_paths)}\n--- END ---\n")
         else:
             # Read files sequentially until token limit is reached
@@ -567,7 +569,9 @@ def read_files(
                     logger.debug(f"[FILES] Added file {file_path}, total tokens: {total_tokens:,}")
                 else:
                     # File too large for remaining budget
-                    logger.debug(f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)")
+                    logger.debug(
+                        f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)"
+                    )
                     files_skipped.append(file_path)
 
     # Add informative note about skipped files to help users understand