Lots of tests with live simulation to validate conversation continuation / preservation work across requests

This commit is contained in:
Fahad
2025-06-11 17:03:09 +04:00
parent ac763e0213
commit c90ac7561e
14 changed files with 3612 additions and 1420 deletions

View File

@@ -0,0 +1,35 @@
"""
Communication Simulator Tests Package
This package contains individual test modules for the Gemini MCP Communication Simulator.
Each test is in its own file for better organization and maintainability.
"""
from .base_test import BaseSimulatorTest
from .test_basic_conversation import BasicConversationTest
from .test_content_validation import ContentValidationTest
from .test_per_tool_deduplication import PerToolDeduplicationTest
from .test_cross_tool_continuation import CrossToolContinuationTest
from .test_logs_validation import LogsValidationTest
from .test_redis_validation import RedisValidationTest
# Test registry for dynamic loading
TEST_REGISTRY = {
"basic_conversation": BasicConversationTest,
"content_validation": ContentValidationTest,
"per_tool_deduplication": PerToolDeduplicationTest,
"cross_tool_continuation": CrossToolContinuationTest,
"logs_validation": LogsValidationTest,
"redis_validation": RedisValidationTest,
}
__all__ = [
'BaseSimulatorTest',
'BasicConversationTest',
'ContentValidationTest',
'PerToolDeduplicationTest',
'CrossToolContinuationTest',
'LogsValidationTest',
'RedisValidationTest',
'TEST_REGISTRY'
]

View File

@@ -0,0 +1,255 @@
#!/usr/bin/env python3
"""
Base Test Class for Communication Simulator Tests
Provides common functionality and utilities for all simulator tests.
"""
import json
import logging
import os
import subprocess
import tempfile
import time
from typing import Optional, Tuple
class BaseSimulatorTest:
"""Base class for all communication simulator tests"""
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.test_files = {}
self.test_dir = None
self.container_name = "gemini-mcp-server"
self.redis_container = "gemini-mcp-redis"
# Configure logging
log_level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
self.logger = logging.getLogger(self.__class__.__name__)
def setup_test_files(self):
"""Create test files for the simulation"""
# Test Python file
python_content = '''"""
Sample Python module for testing MCP conversation continuity
"""
def fibonacci(n):
"""Calculate fibonacci number recursively"""
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
def factorial(n):
"""Calculate factorial iteratively"""
result = 1
for i in range(1, n + 1):
result *= i
return result
class Calculator:
"""Simple calculator class"""
def __init__(self):
self.history = []
def add(self, a, b):
result = a + b
self.history.append(f"{a} + {b} = {result}")
return result
def multiply(self, a, b):
result = a * b
self.history.append(f"{a} * {b} = {result}")
return result
'''
# Test configuration file
config_content = """{
"database": {
"host": "localhost",
"port": 5432,
"name": "testdb",
"ssl": true
},
"cache": {
"redis_url": "redis://localhost:6379",
"ttl": 3600
},
"logging": {
"level": "INFO",
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
}
}"""
# Create files in the current project directory
current_dir = os.getcwd()
self.test_dir = os.path.join(current_dir, "test_simulation_files")
os.makedirs(self.test_dir, exist_ok=True)
test_py = os.path.join(self.test_dir, "test_module.py")
test_config = os.path.join(self.test_dir, "config.json")
with open(test_py, "w") as f:
f.write(python_content)
with open(test_config, "w") as f:
f.write(config_content)
self.test_files = {"python": test_py, "config": test_config}
self.logger.debug(f"Created test files: {list(self.test_files.values())}")
def call_mcp_tool(self, tool_name: str, params: dict) -> Tuple[Optional[str], Optional[str]]:
"""Call an MCP tool via Claude CLI (docker exec)"""
try:
# Prepare the MCP initialization and tool call sequence
init_request = {
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {
"protocolVersion": "2024-11-05",
"capabilities": {"tools": {}},
"clientInfo": {"name": "communication-simulator", "version": "1.0.0"},
},
}
# Send initialized notification
initialized_notification = {"jsonrpc": "2.0", "method": "notifications/initialized"}
# Prepare the tool call request
tool_request = {
"jsonrpc": "2.0",
"id": 2,
"method": "tools/call",
"params": {"name": tool_name, "arguments": params},
}
# Combine all messages
messages = [json.dumps(init_request), json.dumps(initialized_notification), json.dumps(tool_request)]
# Join with newlines as MCP expects
input_data = "\n".join(messages) + "\n"
# Simulate Claude CLI calling the MCP server via docker exec
docker_cmd = ["docker", "exec", "-i", self.container_name, "python", "server.py"]
self.logger.debug(f"Calling MCP tool {tool_name} with proper initialization")
# Execute the command
result = subprocess.run(
docker_cmd, input=input_data, text=True, capture_output=True, timeout=300 # 5 minute timeout
)
if result.returncode != 0:
self.logger.error(f"Docker exec failed: {result.stderr}")
return None, None
# Parse the response - look for the tool call response
response_data = self._parse_mcp_response(result.stdout, expected_id=2)
if not response_data:
return None, None
# Extract continuation_id if present
continuation_id = self._extract_continuation_id(response_data)
return response_data, continuation_id
except subprocess.TimeoutExpired:
self.logger.error(f"MCP tool call timed out: {tool_name}")
return None, None
except Exception as e:
self.logger.error(f"MCP tool call failed: {e}")
return None, None
def _parse_mcp_response(self, stdout: str, expected_id: int = 2) -> Optional[str]:
"""Parse MCP JSON-RPC response from stdout"""
try:
lines = stdout.strip().split("\n")
for line in lines:
if line.strip() and line.startswith("{"):
response = json.loads(line)
# Look for the tool call response with the expected ID
if response.get("id") == expected_id and "result" in response:
# Extract the actual content from the response
result = response["result"]
# Handle new response format with 'content' array
if isinstance(result, dict) and "content" in result:
content_array = result["content"]
if isinstance(content_array, list) and len(content_array) > 0:
return content_array[0].get("text", "")
# Handle legacy format
elif isinstance(result, list) and len(result) > 0:
return result[0].get("text", "")
elif response.get("id") == expected_id and "error" in response:
self.logger.error(f"MCP error: {response['error']}")
return None
# If we get here, log all responses for debugging
self.logger.warning(f"No valid tool call response found for ID {expected_id}")
self.logger.debug(f"Full stdout: {stdout}")
return None
except json.JSONDecodeError as e:
self.logger.error(f"Failed to parse MCP response: {e}")
self.logger.debug(f"Stdout that failed to parse: {stdout}")
return None
def _extract_continuation_id(self, response_text: str) -> Optional[str]:
"""Extract continuation_id from response metadata"""
try:
# Parse the response text as JSON to look for continuation metadata
response_data = json.loads(response_text)
# Look for continuation_id in various places
if isinstance(response_data, dict):
# Check metadata
metadata = response_data.get("metadata", {})
if "thread_id" in metadata:
return metadata["thread_id"]
# Check follow_up_request
follow_up = response_data.get("follow_up_request", {})
if follow_up and "continuation_id" in follow_up:
return follow_up["continuation_id"]
# Check continuation_offer
continuation_offer = response_data.get("continuation_offer", {})
if continuation_offer and "continuation_id" in continuation_offer:
return continuation_offer["continuation_id"]
self.logger.debug(f"No continuation_id found in response: {response_data}")
return None
except json.JSONDecodeError as e:
self.logger.debug(f"Failed to parse response for continuation_id: {e}")
return None
def run_command(self, cmd: list[str], check: bool = True, capture_output: bool = False, **kwargs):
"""Run a shell command with logging"""
if self.verbose:
self.logger.debug(f"Running: {' '.join(cmd)}")
return subprocess.run(cmd, check=check, capture_output=capture_output, **kwargs)
def cleanup_test_files(self):
"""Clean up test files"""
if hasattr(self, "test_dir") and self.test_dir and os.path.exists(self.test_dir):
import shutil
shutil.rmtree(self.test_dir)
self.logger.debug(f"Removed test files directory: {self.test_dir}")
def run_test(self) -> bool:
"""Run the test - to be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement run_test()")
@property
def test_name(self) -> str:
"""Get the test name - to be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement test_name property")
@property
def test_description(self) -> str:
"""Get the test description - to be implemented by subclasses"""
raise NotImplementedError("Subclasses must implement test_description property")

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""
Basic Conversation Flow Test
Tests basic conversation continuity with the chat tool, including:
- Initial chat with file analysis
- Continuing conversation with same file (deduplication)
- Adding additional files to ongoing conversation
"""
from .base_test import BaseSimulatorTest
class BasicConversationTest(BaseSimulatorTest):
"""Test basic conversation flow with chat tool"""
@property
def test_name(self) -> str:
return "basic_conversation"
@property
def test_description(self) -> str:
return "Basic conversation flow with chat tool"
def run_test(self) -> bool:
"""Test basic conversation flow with chat tool"""
try:
self.logger.info("📝 Test: Basic conversation flow")
# Setup test files
self.setup_test_files()
# Initial chat tool call with file
self.logger.info(" 1.1: Initial chat with file analysis")
response1, continuation_id = self.call_mcp_tool(
"chat",
{"prompt": "Please use low thinking mode. Analyze this Python code and explain what it does", "files": [self.test_files["python"]]},
)
if not response1 or not continuation_id:
self.logger.error("Failed to get initial response with continuation_id")
return False
self.logger.info(f" ✅ Got continuation_id: {continuation_id}")
# Continue conversation with same file (should be deduplicated)
self.logger.info(" 1.2: Continue conversation with same file")
response2, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Please use low thinking mode. Now focus on the Calculator class specifically. Are there any improvements you'd suggest?",
"files": [self.test_files["python"]], # Same file - should be deduplicated
"continuation_id": continuation_id,
},
)
if not response2:
self.logger.error("Failed to continue conversation")
return False
# Continue with additional file
self.logger.info(" 1.3: Continue conversation with additional file")
response3, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Please use low thinking mode. Now also analyze this configuration file and see how it might relate to the Python code",
"files": [self.test_files["python"], self.test_files["config"]],
"continuation_id": continuation_id,
},
)
if not response3:
self.logger.error("Failed to continue with additional file")
return False
self.logger.info(" ✅ Basic conversation flow working")
return True
except Exception as e:
self.logger.error(f"Basic conversation flow test failed: {e}")
return False
finally:
self.cleanup_test_files()

View File

@@ -0,0 +1,177 @@
#!/usr/bin/env python3
"""
Content Validation Test
Tests that tools don't duplicate file content in their responses.
This test is specifically designed to catch content duplication bugs.
"""
import json
import os
from .base_test import BaseSimulatorTest
class ContentValidationTest(BaseSimulatorTest):
"""Test that tools don't duplicate file content in their responses"""
@property
def test_name(self) -> str:
return "content_validation"
@property
def test_description(self) -> str:
return "Content validation and duplicate detection"
def run_test(self) -> bool:
"""Test that tools don't duplicate file content in their responses"""
try:
self.logger.info("📄 Test: Content validation and duplicate detection")
# Setup test files first
self.setup_test_files()
# Create a test file with distinctive content for validation
validation_content = '''"""
Configuration file for content validation testing
This content should appear only ONCE in any tool response
"""
# Configuration constants
MAX_CONTENT_TOKENS = 800_000 # This line should appear exactly once
TEMPERATURE_ANALYTICAL = 0.2 # This should also appear exactly once
UNIQUE_VALIDATION_MARKER = "CONTENT_VALIDATION_TEST_12345"
# Database settings
DATABASE_CONFIG = {
"host": "localhost",
"port": 5432,
"name": "validation_test_db"
}
'''
validation_file = os.path.join(self.test_dir, "validation_config.py")
with open(validation_file, "w") as f:
f.write(validation_content)
# Test 1: Precommit tool with files parameter (where the bug occurred)
self.logger.info(" 1: Testing precommit tool content duplication")
# Call precommit tool with the validation file
response1, thread_id = self.call_mcp_tool(
"precommit",
{
"path": os.getcwd(),
"files": [validation_file],
"original_request": "Test for content duplication in precommit tool"
}
)
if response1:
# Parse response and check for content duplication
try:
response_data = json.loads(response1)
content = response_data.get("content", "")
# Count occurrences of distinctive markers
max_content_count = content.count("MAX_CONTENT_TOKENS = 800_000")
temp_analytical_count = content.count("TEMPERATURE_ANALYTICAL = 0.2")
unique_marker_count = content.count("UNIQUE_VALIDATION_MARKER")
# Validate no duplication
duplication_detected = False
issues = []
if max_content_count > 1:
issues.append(f"MAX_CONTENT_TOKENS appears {max_content_count} times")
duplication_detected = True
if temp_analytical_count > 1:
issues.append(f"TEMPERATURE_ANALYTICAL appears {temp_analytical_count} times")
duplication_detected = True
if unique_marker_count > 1:
issues.append(f"UNIQUE_VALIDATION_MARKER appears {unique_marker_count} times")
duplication_detected = True
if duplication_detected:
self.logger.error(f" ❌ Content duplication detected in precommit tool: {'; '.join(issues)}")
return False
else:
self.logger.info(" ✅ No content duplication in precommit tool")
except json.JSONDecodeError:
self.logger.warning(" ⚠️ Could not parse precommit response as JSON")
else:
self.logger.warning(" ⚠️ Precommit tool failed to respond")
# Test 2: Other tools that use files parameter
tools_to_test = [
("chat", {"prompt": "Please use low thinking mode. Analyze this config file", "files": [validation_file]}),
("codereview", {"files": [validation_file], "context": "Please use low thinking mode. Review this configuration"}),
("analyze", {"files": [validation_file], "analysis_type": "code_quality"})
]
for tool_name, params in tools_to_test:
self.logger.info(f" 2.{tool_name}: Testing {tool_name} tool content duplication")
response, _ = self.call_mcp_tool(tool_name, params)
if response:
try:
response_data = json.loads(response)
content = response_data.get("content", "")
# Check for duplication
marker_count = content.count("UNIQUE_VALIDATION_MARKER")
if marker_count > 1:
self.logger.error(f" ❌ Content duplication in {tool_name}: marker appears {marker_count} times")
return False
else:
self.logger.info(f" ✅ No content duplication in {tool_name}")
except json.JSONDecodeError:
self.logger.warning(f" ⚠️ Could not parse {tool_name} response")
else:
self.logger.warning(f" ⚠️ {tool_name} tool failed to respond")
# Test 3: Cross-tool content validation with file deduplication
self.logger.info(" 3: Testing cross-tool content consistency")
if thread_id:
# Continue conversation with same file - content should be deduplicated in conversation history
response2, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Please use low thinking mode. Continue analyzing this configuration file",
"files": [validation_file], # Same file should be deduplicated
"continuation_id": thread_id,
},
)
if response2:
try:
response_data = json.loads(response2)
content = response_data.get("content", "")
# In continuation, the file content shouldn't be duplicated either
marker_count = content.count("UNIQUE_VALIDATION_MARKER")
if marker_count > 1:
self.logger.error(f" ❌ Content duplication in cross-tool continuation: marker appears {marker_count} times")
return False
else:
self.logger.info(" ✅ No content duplication in cross-tool continuation")
except json.JSONDecodeError:
self.logger.warning(" ⚠️ Could not parse continuation response")
# Cleanup
os.remove(validation_file)
self.logger.info(" ✅ All content validation tests passed")
return True
except Exception as e:
self.logger.error(f"Content validation test failed: {e}")
return False
finally:
self.cleanup_test_files()

View File

@@ -0,0 +1,196 @@
#!/usr/bin/env python3
"""
Cross-Tool Continuation Test
Tests comprehensive cross-tool continuation scenarios to ensure
conversation context is maintained when switching between different tools.
"""
from .base_test import BaseSimulatorTest
class CrossToolContinuationTest(BaseSimulatorTest):
"""Test comprehensive cross-tool continuation scenarios"""
@property
def test_name(self) -> str:
return "cross_tool_continuation"
@property
def test_description(self) -> str:
return "Cross-tool conversation continuation scenarios"
def run_test(self) -> bool:
"""Test comprehensive cross-tool continuation scenarios"""
try:
self.logger.info("🔧 Test: Cross-tool continuation scenarios")
# Setup test files
self.setup_test_files()
success_count = 0
total_scenarios = 3
# Scenario 1: chat -> thinkdeep -> codereview
if self._test_chat_thinkdeep_codereview():
success_count += 1
# Scenario 2: analyze -> debug -> thinkdeep
if self._test_analyze_debug_thinkdeep():
success_count += 1
# Scenario 3: Multi-file cross-tool continuation
if self._test_multi_file_continuation():
success_count += 1
self.logger.info(f" ✅ Cross-tool continuation scenarios completed: {success_count}/{total_scenarios} scenarios passed")
# Consider successful if at least one scenario worked
return success_count > 0
except Exception as e:
self.logger.error(f"Cross-tool continuation test failed: {e}")
return False
finally:
self.cleanup_test_files()
def _test_chat_thinkdeep_codereview(self) -> bool:
"""Test chat -> thinkdeep -> codereview scenario"""
try:
self.logger.info(" 1: Testing chat -> thinkdeep -> codereview")
# Start with chat
chat_response, chat_id = self.call_mcp_tool(
"chat",
{
"prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
"files": [self.test_files["python"]],
},
)
if not chat_response or not chat_id:
self.logger.error("Failed to start chat conversation")
return False
# Continue with thinkdeep
thinkdeep_response, _ = self.call_mcp_tool(
"thinkdeep",
{
"prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
"files": [self.test_files["python"]], # Same file should be deduplicated
"continuation_id": chat_id,
},
)
if not thinkdeep_response:
self.logger.error("Failed chat -> thinkdeep continuation")
return False
# Continue with codereview
codereview_response, _ = self.call_mcp_tool(
"codereview",
{
"files": [self.test_files["python"]], # Same file should be deduplicated
"context": "Building on our previous analysis, provide a comprehensive code review",
"continuation_id": chat_id,
},
)
if not codereview_response:
self.logger.error("Failed thinkdeep -> codereview continuation")
return False
self.logger.info(" ✅ chat -> thinkdeep -> codereview working")
return True
except Exception as e:
self.logger.error(f"Chat -> thinkdeep -> codereview scenario failed: {e}")
return False
def _test_analyze_debug_thinkdeep(self) -> bool:
"""Test analyze -> debug -> thinkdeep scenario"""
try:
self.logger.info(" 2: Testing analyze -> debug -> thinkdeep")
# Start with analyze
analyze_response, analyze_id = self.call_mcp_tool(
"analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality"}
)
if not analyze_response or not analyze_id:
self.logger.warning("Failed to start analyze conversation, skipping scenario 2")
return False
# Continue with debug
debug_response, _ = self.call_mcp_tool(
"debug",
{
"files": [self.test_files["python"]], # Same file should be deduplicated
"issue_description": "Based on our analysis, help debug the performance issue in fibonacci",
"continuation_id": analyze_id,
},
)
if not debug_response:
self.logger.warning(" ⚠️ analyze -> debug continuation failed")
return False
# Continue with thinkdeep
final_response, _ = self.call_mcp_tool(
"thinkdeep",
{
"prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
"files": [self.test_files["python"]], # Same file should be deduplicated
"continuation_id": analyze_id,
},
)
if not final_response:
self.logger.warning(" ⚠️ debug -> thinkdeep continuation failed")
return False
self.logger.info(" ✅ analyze -> debug -> thinkdeep working")
return True
except Exception as e:
self.logger.error(f"Analyze -> debug -> thinkdeep scenario failed: {e}")
return False
def _test_multi_file_continuation(self) -> bool:
"""Test multi-file cross-tool continuation"""
try:
self.logger.info(" 3: Testing multi-file cross-tool continuation")
# Start with both files
multi_response, multi_id = self.call_mcp_tool(
"chat",
{
"prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
"files": [self.test_files["python"], self.test_files["config"]],
},
)
if not multi_response or not multi_id:
self.logger.warning("Failed to start multi-file conversation, skipping scenario 3")
return False
# Switch to codereview with same files (should use conversation history)
multi_review, _ = self.call_mcp_tool(
"codereview",
{
"files": [self.test_files["python"], self.test_files["config"]], # Same files
"context": "Review both files in the context of our previous discussion",
"continuation_id": multi_id,
},
)
if not multi_review:
self.logger.warning(" ⚠️ Multi-file cross-tool continuation failed")
return False
self.logger.info(" ✅ Multi-file cross-tool continuation working")
return True
except Exception as e:
self.logger.error(f"Multi-file continuation scenario failed: {e}")
return False

View File

@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""
Docker Logs Validation Test
Validates Docker logs to confirm file deduplication behavior and
conversation threading is working properly.
"""
from .base_test import BaseSimulatorTest
class LogsValidationTest(BaseSimulatorTest):
"""Validate Docker logs to confirm file deduplication behavior"""
@property
def test_name(self) -> str:
return "logs_validation"
@property
def test_description(self) -> str:
return "Docker logs validation"
def run_test(self) -> bool:
"""Validate Docker logs to confirm file deduplication behavior"""
try:
self.logger.info("📋 Test: Validating Docker logs for file deduplication...")
# Get server logs from both main container and activity logs
result = self.run_command(["docker", "logs", self.container_name], capture_output=True)
if result.returncode != 0:
self.logger.error(f"Failed to get Docker logs: {result.stderr}")
return False
main_logs = result.stdout.decode() + result.stderr.decode()
# Also get activity logs for more detailed conversation tracking
activity_result = self.run_command(
["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True
)
activity_logs = ""
if activity_result.returncode == 0:
activity_logs = activity_result.stdout.decode()
logs = main_logs + "\n" + activity_logs
# Look for conversation threading patterns that indicate the system is working
conversation_patterns = [
"CONVERSATION_RESUME",
"CONVERSATION_CONTEXT",
"previous turns loaded",
"tool embedding",
"files included",
"files truncated",
"already in conversation history",
]
conversation_lines = []
for line in logs.split("\n"):
for pattern in conversation_patterns:
if pattern.lower() in line.lower():
conversation_lines.append(line.strip())
break
# Look for evidence of conversation threading and file handling
conversation_threading_found = False
multi_turn_conversations = False
for line in conversation_lines:
lower_line = line.lower()
if "conversation_resume" in lower_line:
conversation_threading_found = True
self.logger.debug(f"📄 Conversation threading: {line}")
elif "previous turns loaded" in lower_line:
multi_turn_conversations = True
self.logger.debug(f"📄 Multi-turn conversation: {line}")
elif "already in conversation" in lower_line:
self.logger.info(f"✅ Found explicit deduplication: {line}")
return True
# Conversation threading with multiple turns is evidence of file deduplication working
if conversation_threading_found and multi_turn_conversations:
self.logger.info("✅ Conversation threading with multi-turn context working")
self.logger.info(
"✅ File deduplication working implicitly (files embedded once in conversation history)"
)
return True
elif conversation_threading_found:
self.logger.info("✅ Conversation threading detected")
return True
else:
self.logger.warning("⚠️ No clear evidence of conversation threading in logs")
self.logger.debug(f"Found {len(conversation_lines)} conversation-related log lines")
return False
except Exception as e:
self.logger.error(f"Log validation failed: {e}")
return False

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""
Per-Tool File Deduplication Test
Tests file deduplication for each individual MCP tool to ensure
that files are properly deduplicated within single-tool conversations.
"""
from .base_test import BaseSimulatorTest
class PerToolDeduplicationTest(BaseSimulatorTest):
"""Test file deduplication for each individual tool"""
@property
def test_name(self) -> str:
return "per_tool_deduplication"
@property
def test_description(self) -> str:
return "File deduplication for individual tools"
def run_test(self) -> bool:
"""Test file deduplication for each individual tool"""
try:
self.logger.info("📄 Test: Per-tool file deduplication")
# Setup test files
self.setup_test_files()
tools_to_test = [
(
"thinkdeep",
{
"prompt": "Please use low thinking mode. Think deeply about this Python code and identify potential architectural improvements",
"files": [self.test_files["python"]],
},
),
("analyze", {"files": [self.test_files["python"]], "analysis_type": "architecture"}),
(
"debug",
{
"files": [self.test_files["python"]],
"issue_description": "The fibonacci function seems slow for large numbers",
},
),
(
"codereview",
{
"files": [self.test_files["python"]],
"context": "General code review for quality and best practices",
},
),
]
successful_tests = 0
total_tests = len(tools_to_test)
for tool_name, initial_params in tools_to_test:
self.logger.info(f" {tool_name}: Testing {tool_name} tool file deduplication")
# Initial call
response1, continuation_id = self.call_mcp_tool(tool_name, initial_params)
if not response1:
self.logger.warning(f" ⚠️ {tool_name} tool initial call failed, skipping")
continue
if not continuation_id:
self.logger.warning(f" ⚠️ {tool_name} tool didn't provide continuation_id, skipping")
continue
# Continue with same file - should be deduplicated
continue_params = initial_params.copy()
continue_params["continuation_id"] = continuation_id
if tool_name == "thinkdeep":
continue_params["prompt"] = "Please use low thinking mode. Now focus specifically on the recursive fibonacci implementation"
elif tool_name == "analyze":
continue_params["analysis_type"] = "performance"
elif tool_name == "debug":
continue_params["issue_description"] = "How can we optimize the fibonacci function?"
elif tool_name == "codereview":
continue_params["context"] = "Focus on the Calculator class implementation"
response2, _ = self.call_mcp_tool(tool_name, continue_params)
if response2:
self.logger.info(f"{tool_name} tool file deduplication working")
successful_tests += 1
else:
self.logger.warning(f" ⚠️ {tool_name} tool continuation failed")
self.logger.info(f" ✅ Per-tool file deduplication tests completed: {successful_tests}/{total_tests} tools passed")
# Consider test successful if at least one tool worked
return successful_tests > 0
except Exception as e:
self.logger.error(f"Per-tool file deduplication test failed: {e}")
return False
finally:
self.cleanup_test_files()

View File

@@ -0,0 +1,134 @@
#!/usr/bin/env python3
"""
Redis Conversation Memory Validation Test
Validates that conversation memory is working via Redis by checking
for stored conversation threads and their content.
"""
import json
from .base_test import BaseSimulatorTest
class RedisValidationTest(BaseSimulatorTest):
"""Validate that conversation memory is working via Redis"""
@property
def test_name(self) -> str:
return "redis_validation"
@property
def test_description(self) -> str:
return "Redis conversation memory validation"
def run_test(self) -> bool:
"""Validate that conversation memory is working via Redis"""
try:
self.logger.info("💾 Test: Validating conversation memory via Redis...")
# First, test Redis connectivity
ping_result = self.run_command(
["docker", "exec", self.redis_container, "redis-cli", "ping"], capture_output=True
)
if ping_result.returncode != 0:
self.logger.error("Failed to connect to Redis")
return False
if "PONG" not in ping_result.stdout.decode():
self.logger.error("Redis ping failed")
return False
self.logger.info("✅ Redis connectivity confirmed")
# Check Redis for stored conversations
result = self.run_command(
["docker", "exec", self.redis_container, "redis-cli", "KEYS", "thread:*"], capture_output=True
)
if result.returncode != 0:
self.logger.error("Failed to query Redis")
return False
keys = result.stdout.decode().strip().split("\n")
thread_keys = [k for k in keys if k.startswith("thread:") and k != "thread:*"]
if thread_keys:
self.logger.info(f"✅ Found {len(thread_keys)} conversation threads in Redis")
# Get details of first thread
thread_key = thread_keys[0]
result = self.run_command(
["docker", "exec", self.redis_container, "redis-cli", "GET", thread_key], capture_output=True
)
if result.returncode == 0:
thread_data = result.stdout.decode()
try:
parsed = json.loads(thread_data)
turns = parsed.get("turns", [])
self.logger.info(f"✅ Thread has {len(turns)} turns")
return True
except json.JSONDecodeError:
self.logger.warning("Could not parse thread data")
return True
else:
# If no existing threads, create a test thread to validate Redis functionality
self.logger.info("📝 No existing threads found, creating test thread to validate Redis...")
test_thread_id = "test_thread_validation"
test_data = {
"thread_id": test_thread_id,
"turns": [
{
"tool": "chat",
"timestamp": "2025-06-11T16:30:00Z",
"prompt": "Test validation prompt"
}
]
}
# Store test data
store_result = self.run_command([
"docker", "exec", self.redis_container, "redis-cli",
"SET", f"thread:{test_thread_id}", json.dumps(test_data)
], capture_output=True)
if store_result.returncode != 0:
self.logger.error("Failed to store test data in Redis")
return False
# Retrieve test data
retrieve_result = self.run_command([
"docker", "exec", self.redis_container, "redis-cli",
"GET", f"thread:{test_thread_id}"
], capture_output=True)
if retrieve_result.returncode != 0:
self.logger.error("Failed to retrieve test data from Redis")
return False
retrieved_data = retrieve_result.stdout.decode()
try:
parsed = json.loads(retrieved_data)
if parsed.get("thread_id") == test_thread_id:
self.logger.info("✅ Redis read/write validation successful")
# Clean up test data
self.run_command([
"docker", "exec", self.redis_container, "redis-cli",
"DEL", f"thread:{test_thread_id}"
], capture_output=True)
return True
else:
self.logger.error("Retrieved data doesn't match stored data")
return False
except json.JSONDecodeError:
self.logger.error("Could not parse retrieved test data")
return False
except Exception as e:
self.logger.error(f"Conversation memory validation failed: {e}")
return False