#!/usr/bin/env python3 """ Comprehensive Cross-Tool Test Tests file deduplication, conversation continuation, and file handling across all available MCP tools using realistic workflows with low thinking mode. Validates: 1. Cross-tool conversation continuation 2. File deduplication across different tools 3. Mixed file scenarios (old + new files) 4. Conversation history preservation 5. Proper tool chaining with context """ import subprocess from .base_test import BaseSimulatorTest class CrossToolComprehensiveTest(BaseSimulatorTest): """Comprehensive test across all MCP tools""" @property def test_name(self) -> str: return "cross_tool_comprehensive" @property def test_description(self) -> str: return "Comprehensive cross-tool file deduplication and continuation" def get_docker_logs_since(self, since_time: str) -> str: """Get docker logs since a specific timestamp""" try: # Check both main server and log monitor for comprehensive logs cmd_server = ["docker", "logs", "--since", since_time, self.container_name] cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"] result_server = subprocess.run(cmd_server, capture_output=True, text=True) result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True) # Combine logs from both containers combined_logs = result_server.stdout + "\n" + result_monitor.stdout return combined_logs except Exception as e: self.logger.error(f"Failed to get docker logs: {e}") return "" def run_test(self) -> bool: """Comprehensive cross-tool test with all MCP tools""" try: self.logger.info("📄 Test: Comprehensive cross-tool file deduplication and continuation") # Setup test files self.setup_test_files() # Create short test files for quick testing python_code = """def login(user, pwd): # Security issue: plain text password if user == "admin" and pwd == "123": return True return False def hash_pwd(pwd): # Weak hashing return str(hash(pwd)) """ config_file = """{ "db_password": "weak123", "debug": true, "secret_key": "test" }""" auth_file = self.create_additional_test_file("auth.py", python_code) config_file_path = self.create_additional_test_file("config.json", config_file) # Get timestamp for log filtering import datetime start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") # Tool chain: chat → analyze → debug → codereview → precommit # Each step builds on the previous with cross-tool continuation current_continuation_id = None responses = [] # Step 1: Start with chat tool to understand the codebase self.logger.info(" Step 1: chat tool - Initial codebase exploration") chat_params = { "prompt": "Please give me a quick one line reply. I have an authentication module that needs review. Can you help me understand potential issues?", "files": [auth_file], "thinking_mode": "low", "model": "flash", } response1, continuation_id1 = self.call_mcp_tool("chat", chat_params) if not response1 or not continuation_id1: self.logger.error(" ❌ Step 1: chat tool failed") return False self.logger.info(f" ✅ Step 1: chat completed with continuation_id: {continuation_id1[:8]}...") responses.append(("chat", response1, continuation_id1)) current_continuation_id = continuation_id1 # Step 2: Use analyze tool to do deeper analysis (fresh conversation) self.logger.info(" Step 2: analyze tool - Deep code analysis (fresh)") analyze_params = { "files": [auth_file], "prompt": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?", "thinking_mode": "low", "model": "flash", } response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params) if not response2: self.logger.error(" ❌ Step 2: analyze tool failed") return False self.logger.info( f" ✅ Step 2: analyze completed with continuation_id: {continuation_id2[:8] if continuation_id2 else 'None'}..." ) responses.append(("analyze", response2, continuation_id2)) # Step 3: Continue chat conversation with config file self.logger.info(" Step 3: chat continuation - Add config file context") chat_continue_params = { "continuation_id": current_continuation_id, "prompt": "Please give me a quick one line reply. I also have this configuration file. Can you analyze it alongside the authentication code?", "files": [auth_file, config_file_path], # Old + new file "thinking_mode": "low", "model": "flash", } response3, _ = self.call_mcp_tool("chat", chat_continue_params) if not response3: self.logger.error(" ❌ Step 3: chat continuation failed") return False self.logger.info(" ✅ Step 3: chat continuation completed") responses.append(("chat_continue", response3, current_continuation_id)) # Step 4: Use debug tool to identify specific issues self.logger.info(" Step 4: debug tool - Identify specific problems") debug_params = { "files": [auth_file, config_file_path], "prompt": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.", "thinking_mode": "low", "model": "flash", } response4, continuation_id4 = self.call_mcp_tool("debug", debug_params) if not response4: self.logger.error(" ❌ Step 4: debug tool failed") return False self.logger.info( f" ✅ Step 4: debug completed with continuation_id: {continuation_id4[:8] if continuation_id4 else 'None'}..." ) responses.append(("debug", response4, continuation_id4)) # Step 5: Cross-tool continuation - continue debug with chat context if continuation_id4: self.logger.info(" Step 5: debug continuation - Additional analysis") debug_continue_params = { "continuation_id": continuation_id4, "files": [auth_file, config_file_path], "prompt": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?", "thinking_mode": "low", "model": "flash", } response5, _ = self.call_mcp_tool("debug", debug_continue_params) if response5: self.logger.info(" ✅ Step 5: debug continuation completed") responses.append(("debug_continue", response5, continuation_id4)) # Step 6: Use codereview for comprehensive review self.logger.info(" Step 6: codereview tool - Comprehensive code review") codereview_params = { "files": [auth_file, config_file_path], "prompt": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness", "thinking_mode": "low", "model": "flash", } response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params) if not response6: self.logger.error(" ❌ Step 6: codereview tool failed") return False self.logger.info( f" ✅ Step 6: codereview completed with continuation_id: {continuation_id6[:8] if continuation_id6 else 'None'}..." ) responses.append(("codereview", response6, continuation_id6)) # Step 7: Create improved version and use precommit self.logger.info(" Step 7: precommit tool - Pre-commit validation") # Create a short improved version improved_code = """import hashlib def secure_login(user, pwd): # Better: hashed password check hashed = hashlib.sha256(pwd.encode()).hexdigest() if user == "admin" and hashed == "expected_hash": return True return False """ improved_file = self.create_additional_test_file("auth_improved.py", improved_code) precommit_params = { "path": self.test_dir, "files": [auth_file, config_file_path, improved_file], "prompt": "Please give me a quick one line reply. Ready to commit security improvements to authentication module", "thinking_mode": "low", "model": "flash", } response7, continuation_id7 = self.call_mcp_tool("precommit", precommit_params) if not response7: self.logger.error(" ❌ Step 7: precommit tool failed") return False self.logger.info( f" ✅ Step 7: precommit completed with continuation_id: {continuation_id7[:8] if continuation_id7 else 'None'}..." ) responses.append(("precommit", response7, continuation_id7)) # Validate comprehensive results self.logger.info(" 📋 Validating comprehensive cross-tool results...") logs = self.get_docker_logs_since(start_time) # Validation criteria tools_used = [r[0] for r in responses] continuation_ids_created = [r[2] for r in responses if r[2]] # Check for various log patterns conversation_logs = [ line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower() ] embedding_logs = [ line for line in logs.split("\n") if "📁" in line or "embedding" in line.lower() or "file" in line.lower() ] continuation_logs = [ line for line in logs.split("\n") if "continuation" in line.lower() or "resuming" in line.lower() ] cross_tool_logs = [ line for line in logs.split("\n") if any(tool in line.lower() for tool in ["chat", "analyze", "debug", "codereview", "precommit"]) ] # File mentions auth_file_mentioned = any("auth.py" in line for line in logs.split("\n")) config_file_mentioned = any("config.json" in line for line in logs.split("\n")) improved_file_mentioned = any("auth_improved.py" in line for line in logs.split("\n")) # Print comprehensive diagnostics self.logger.info(f" 📊 Tools used: {len(tools_used)} ({', '.join(tools_used)})") self.logger.info(f" 📊 Continuation IDs created: {len(continuation_ids_created)}") self.logger.info(f" 📊 Conversation logs found: {len(conversation_logs)}") self.logger.info(f" 📊 File embedding logs found: {len(embedding_logs)}") self.logger.info(f" 📊 Continuation logs found: {len(continuation_logs)}") self.logger.info(f" 📊 Cross-tool activity logs: {len(cross_tool_logs)}") self.logger.info(f" 📊 Auth file mentioned: {auth_file_mentioned}") self.logger.info(f" 📊 Config file mentioned: {config_file_mentioned}") self.logger.info(f" 📊 Improved file mentioned: {improved_file_mentioned}") if self.verbose: self.logger.debug(" 📋 Sample tool activity logs:") for log in cross_tool_logs[:10]: # Show first 10 if log.strip(): self.logger.debug(f" {log.strip()}") self.logger.debug(" 📋 Sample continuation logs:") for log in continuation_logs[:5]: # Show first 5 if log.strip(): self.logger.debug(f" {log.strip()}") # Comprehensive success criteria success_criteria = [ len(tools_used) >= 5, # Used multiple tools len(continuation_ids_created) >= 3, # Created multiple continuation threads len(embedding_logs) > 10, # Significant file embedding activity len(continuation_logs) > 0, # Evidence of continuation auth_file_mentioned, # Original file processed config_file_mentioned, # Additional file processed improved_file_mentioned, # New file processed len(conversation_logs) > 5, # Conversation history activity ] passed_criteria = sum(success_criteria) total_criteria = len(success_criteria) self.logger.info(f" 📊 Success criteria met: {passed_criteria}/{total_criteria}") if passed_criteria >= 6: # At least 6 out of 8 criteria self.logger.info(" ✅ Comprehensive cross-tool test: PASSED") return True else: self.logger.warning(" ⚠️ Comprehensive cross-tool test: FAILED") self.logger.warning(" 💡 Check logs for detailed cross-tool activity") return False except Exception as e: self.logger.error(f"Comprehensive cross-tool test failed: {e}") return False finally: self.cleanup_test_files()