Lots of tests with live simulation to validate conversation continuation / preservation work across requests
This commit is contained in:
@@ -14,12 +14,12 @@ Test Flow:
|
|||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python communication_simulator_test.py [--verbose] [--keep-logs] [--tests TEST_NAME...] [--individual TEST_NAME] [--skip-docker]
|
python communication_simulator_test.py [--verbose] [--keep-logs] [--tests TEST_NAME...] [--individual TEST_NAME] [--skip-docker]
|
||||||
|
|
||||||
--tests: Run specific tests only (space-separated)
|
--tests: Run specific tests only (space-separated)
|
||||||
--list-tests: List all available tests
|
--list-tests: List all available tests
|
||||||
--individual: Run a single test individually
|
--individual: Run a single test individually
|
||||||
--skip-docker: Skip Docker setup (assumes containers are already running)
|
--skip-docker: Skip Docker setup (assumes containers are already running)
|
||||||
|
|
||||||
Available tests:
|
Available tests:
|
||||||
basic_conversation - Basic conversation flow with chat tool
|
basic_conversation - Basic conversation flow with chat tool
|
||||||
per_tool_deduplication - File deduplication for individual tools
|
per_tool_deduplication - File deduplication for individual tools
|
||||||
@@ -31,16 +31,16 @@ Available tests:
|
|||||||
Examples:
|
Examples:
|
||||||
# Run all tests
|
# Run all tests
|
||||||
python communication_simulator_test.py
|
python communication_simulator_test.py
|
||||||
|
|
||||||
# Run only basic conversation and content validation tests
|
# Run only basic conversation and content validation tests
|
||||||
python communication_simulator_test.py --tests basic_conversation content_validation
|
python communication_simulator_test.py --tests basic_conversation content_validation
|
||||||
|
|
||||||
# Run a single test individually (with full Docker setup)
|
# Run a single test individually (with full Docker setup)
|
||||||
python communication_simulator_test.py --individual content_validation
|
python communication_simulator_test.py --individual content_validation
|
||||||
|
|
||||||
# Run a single test individually (assuming Docker is already running)
|
# Run a single test individually (assuming Docker is already running)
|
||||||
python communication_simulator_test.py --individual content_validation --skip-docker
|
python communication_simulator_test.py --individual content_validation --skip-docker
|
||||||
|
|
||||||
# List available tests
|
# List available tests
|
||||||
python communication_simulator_test.py --list-tests
|
python communication_simulator_test.py --list-tests
|
||||||
"""
|
"""
|
||||||
@@ -53,7 +53,6 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
class CommunicationSimulator:
|
class CommunicationSimulator:
|
||||||
@@ -69,16 +68,16 @@ class CommunicationSimulator:
|
|||||||
|
|
||||||
# Import test registry
|
# Import test registry
|
||||||
from simulator_tests import TEST_REGISTRY
|
from simulator_tests import TEST_REGISTRY
|
||||||
|
|
||||||
self.test_registry = TEST_REGISTRY
|
self.test_registry = TEST_REGISTRY
|
||||||
|
|
||||||
# Available test methods mapping
|
# Available test methods mapping
|
||||||
self.available_tests = {
|
self.available_tests = {
|
||||||
name: self._create_test_runner(test_class)
|
name: self._create_test_runner(test_class) for name, test_class in self.test_registry.items()
|
||||||
for name, test_class in self.test_registry.items()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Test result tracking
|
# Test result tracking
|
||||||
self.test_results = {test_name: False for test_name in self.test_registry.keys()}
|
self.test_results = dict.fromkeys(self.test_registry.keys(), False)
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
log_level = logging.DEBUG if verbose else logging.INFO
|
log_level = logging.DEBUG if verbose else logging.INFO
|
||||||
@@ -87,6 +86,7 @@ class CommunicationSimulator:
|
|||||||
|
|
||||||
def _create_test_runner(self, test_class):
|
def _create_test_runner(self, test_class):
|
||||||
"""Create a test runner function for a test class"""
|
"""Create a test runner function for a test class"""
|
||||||
|
|
||||||
def run_test():
|
def run_test():
|
||||||
test_instance = test_class(verbose=self.verbose)
|
test_instance = test_class(verbose=self.verbose)
|
||||||
result = test_instance.run_test()
|
result = test_instance.run_test()
|
||||||
@@ -94,6 +94,7 @@ class CommunicationSimulator:
|
|||||||
test_name = test_instance.test_name
|
test_name = test_instance.test_name
|
||||||
self.test_results[test_name] = result
|
self.test_results[test_name] = result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
return run_test
|
return run_test
|
||||||
|
|
||||||
def setup_test_environment(self) -> bool:
|
def setup_test_environment(self) -> bool:
|
||||||
@@ -181,10 +182,10 @@ class CommunicationSimulator:
|
|||||||
# If specific tests are selected, run only those
|
# If specific tests are selected, run only those
|
||||||
if self.selected_tests:
|
if self.selected_tests:
|
||||||
return self._run_selected_tests()
|
return self._run_selected_tests()
|
||||||
|
|
||||||
# Otherwise run all tests in order
|
# Otherwise run all tests in order
|
||||||
test_sequence = list(self.test_registry.keys())
|
test_sequence = list(self.test_registry.keys())
|
||||||
|
|
||||||
for test_name in test_sequence:
|
for test_name in test_sequence:
|
||||||
if not self._run_single_test(test_name):
|
if not self._run_single_test(test_name):
|
||||||
return False
|
return False
|
||||||
@@ -200,14 +201,14 @@ class CommunicationSimulator:
|
|||||||
"""Run only the selected tests"""
|
"""Run only the selected tests"""
|
||||||
try:
|
try:
|
||||||
self.logger.info(f"🎯 Running selected tests: {', '.join(self.selected_tests)}")
|
self.logger.info(f"🎯 Running selected tests: {', '.join(self.selected_tests)}")
|
||||||
|
|
||||||
for test_name in self.selected_tests:
|
for test_name in self.selected_tests:
|
||||||
if not self._run_single_test(test_name):
|
if not self._run_single_test(test_name):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info("✅ All selected tests passed")
|
self.logger.info("✅ All selected tests passed")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Selected tests failed: {e}")
|
self.logger.error(f"Selected tests failed: {e}")
|
||||||
return False
|
return False
|
||||||
@@ -219,18 +220,18 @@ class CommunicationSimulator:
|
|||||||
self.logger.error(f"Unknown test: {test_name}")
|
self.logger.error(f"Unknown test: {test_name}")
|
||||||
self.logger.info(f"Available tests: {', '.join(self.available_tests.keys())}")
|
self.logger.info(f"Available tests: {', '.join(self.available_tests.keys())}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info(f"🧪 Running test: {test_name}")
|
self.logger.info(f"🧪 Running test: {test_name}")
|
||||||
test_function = self.available_tests[test_name]
|
test_function = self.available_tests[test_name]
|
||||||
result = test_function()
|
result = test_function()
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
self.logger.info(f"✅ Test {test_name} passed")
|
self.logger.info(f"✅ Test {test_name} passed")
|
||||||
else:
|
else:
|
||||||
self.logger.error(f"❌ Test {test_name} failed")
|
self.logger.error(f"❌ Test {test_name} failed")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Test {test_name} failed with exception: {e}")
|
self.logger.error(f"Test {test_name} failed with exception: {e}")
|
||||||
return False
|
return False
|
||||||
@@ -364,7 +365,9 @@ def parse_arguments():
|
|||||||
parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)")
|
parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)")
|
||||||
parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
|
parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
|
||||||
parser.add_argument("--individual", "-i", help="Run a single test individually")
|
parser.add_argument("--individual", "-i", help="Run a single test individually")
|
||||||
parser.add_argument("--skip-docker", action="store_true", help="Skip Docker setup (assumes containers are already running)")
|
parser.add_argument(
|
||||||
|
"--skip-docker", action="store_true", help="Skip Docker setup (assumes containers are already running)"
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
@@ -381,14 +384,14 @@ def run_individual_test(simulator, test_name, skip_docker):
|
|||||||
"""Run a single test individually"""
|
"""Run a single test individually"""
|
||||||
try:
|
try:
|
||||||
success = simulator.run_individual_test(test_name, skip_docker_setup=skip_docker)
|
success = simulator.run_individual_test(test_name, skip_docker_setup=skip_docker)
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
print(f"\\n🎉 INDIVIDUAL TEST {test_name.upper()}: PASSED")
|
print(f"\\n🎉 INDIVIDUAL TEST {test_name.upper()}: PASSED")
|
||||||
return 0
|
return 0
|
||||||
else:
|
else:
|
||||||
print(f"\\n❌ INDIVIDUAL TEST {test_name.upper()}: FAILED")
|
print(f"\\n❌ INDIVIDUAL TEST {test_name.upper()}: FAILED")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print(f"\\n🛑 Individual test {test_name} interrupted by user")
|
print(f"\\n🛑 Individual test {test_name} interrupted by user")
|
||||||
if not skip_docker:
|
if not skip_docker:
|
||||||
@@ -436,20 +439,16 @@ def main():
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Initialize simulator consistently for all use cases
|
# Initialize simulator consistently for all use cases
|
||||||
simulator = CommunicationSimulator(
|
simulator = CommunicationSimulator(verbose=args.verbose, keep_logs=args.keep_logs, selected_tests=args.tests)
|
||||||
verbose=args.verbose,
|
|
||||||
keep_logs=args.keep_logs,
|
|
||||||
selected_tests=args.tests
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine execution mode and run
|
# Determine execution mode and run
|
||||||
if args.individual:
|
if args.individual:
|
||||||
exit_code = run_individual_test(simulator, args.individual, args.skip_docker)
|
exit_code = run_individual_test(simulator, args.individual, args.skip_docker)
|
||||||
else:
|
else:
|
||||||
exit_code = run_test_suite(simulator, args.skip_docker)
|
exit_code = run_test_suite(simulator, args.skip_docker)
|
||||||
|
|
||||||
sys.exit(exit_code)
|
sys.exit(exit_code)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -8,9 +8,9 @@ Each test is in its own file for better organization and maintainability.
|
|||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
from .test_basic_conversation import BasicConversationTest
|
from .test_basic_conversation import BasicConversationTest
|
||||||
from .test_content_validation import ContentValidationTest
|
from .test_content_validation import ContentValidationTest
|
||||||
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
|
||||||
from .test_cross_tool_continuation import CrossToolContinuationTest
|
from .test_cross_tool_continuation import CrossToolContinuationTest
|
||||||
from .test_logs_validation import LogsValidationTest
|
from .test_logs_validation import LogsValidationTest
|
||||||
|
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
||||||
from .test_redis_validation import RedisValidationTest
|
from .test_redis_validation import RedisValidationTest
|
||||||
|
|
||||||
# Test registry for dynamic loading
|
# Test registry for dynamic loading
|
||||||
@@ -24,12 +24,12 @@ TEST_REGISTRY = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'BaseSimulatorTest',
|
"BaseSimulatorTest",
|
||||||
'BasicConversationTest',
|
"BasicConversationTest",
|
||||||
'ContentValidationTest',
|
"ContentValidationTest",
|
||||||
'PerToolDeduplicationTest',
|
"PerToolDeduplicationTest",
|
||||||
'CrossToolContinuationTest',
|
"CrossToolContinuationTest",
|
||||||
'LogsValidationTest',
|
"LogsValidationTest",
|
||||||
'RedisValidationTest',
|
"RedisValidationTest",
|
||||||
'TEST_REGISTRY'
|
"TEST_REGISTRY",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -9,9 +9,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
from typing import Optional
|
||||||
import time
|
|
||||||
from typing import Optional, Tuple
|
|
||||||
|
|
||||||
|
|
||||||
class BaseSimulatorTest:
|
class BaseSimulatorTest:
|
||||||
@@ -23,7 +21,7 @@ class BaseSimulatorTest:
|
|||||||
self.test_dir = None
|
self.test_dir = None
|
||||||
self.container_name = "gemini-mcp-server"
|
self.container_name = "gemini-mcp-server"
|
||||||
self.redis_container = "gemini-mcp-redis"
|
self.redis_container = "gemini-mcp-redis"
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
log_level = logging.DEBUG if verbose else logging.INFO
|
log_level = logging.DEBUG if verbose else logging.INFO
|
||||||
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||||
@@ -100,7 +98,7 @@ class Calculator:
|
|||||||
self.test_files = {"python": test_py, "config": test_config}
|
self.test_files = {"python": test_py, "config": test_config}
|
||||||
self.logger.debug(f"Created test files: {list(self.test_files.values())}")
|
self.logger.debug(f"Created test files: {list(self.test_files.values())}")
|
||||||
|
|
||||||
def call_mcp_tool(self, tool_name: str, params: dict) -> Tuple[Optional[str], Optional[str]]:
|
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
||||||
"""Call an MCP tool via Claude CLI (docker exec)"""
|
"""Call an MCP tool via Claude CLI (docker exec)"""
|
||||||
try:
|
try:
|
||||||
# Prepare the MCP initialization and tool call sequence
|
# Prepare the MCP initialization and tool call sequence
|
||||||
@@ -237,6 +235,7 @@ class Calculator:
|
|||||||
"""Clean up test files"""
|
"""Clean up test files"""
|
||||||
if hasattr(self, "test_dir") and self.test_dir and os.path.exists(self.test_dir):
|
if hasattr(self, "test_dir") and self.test_dir and os.path.exists(self.test_dir):
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
shutil.rmtree(self.test_dir)
|
shutil.rmtree(self.test_dir)
|
||||||
self.logger.debug(f"Removed test files directory: {self.test_dir}")
|
self.logger.debug(f"Removed test files directory: {self.test_dir}")
|
||||||
|
|
||||||
@@ -252,4 +251,4 @@ class Calculator:
|
|||||||
@property
|
@property
|
||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
"""Get the test description - to be implemented by subclasses"""
|
"""Get the test description - to be implemented by subclasses"""
|
||||||
raise NotImplementedError("Subclasses must implement test_description property")
|
raise NotImplementedError("Subclasses must implement test_description property")
|
||||||
|
|||||||
@@ -34,7 +34,10 @@ class BasicConversationTest(BaseSimulatorTest):
|
|||||||
self.logger.info(" 1.1: Initial chat with file analysis")
|
self.logger.info(" 1.1: Initial chat with file analysis")
|
||||||
response1, continuation_id = self.call_mcp_tool(
|
response1, continuation_id = self.call_mcp_tool(
|
||||||
"chat",
|
"chat",
|
||||||
{"prompt": "Please use low thinking mode. Analyze this Python code and explain what it does", "files": [self.test_files["python"]]},
|
{
|
||||||
|
"prompt": "Please use low thinking mode. Analyze this Python code and explain what it does",
|
||||||
|
"files": [self.test_files["python"]],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if not response1 or not continuation_id:
|
if not response1 or not continuation_id:
|
||||||
@@ -80,4 +83,4 @@ class BasicConversationTest(BaseSimulatorTest):
|
|||||||
self.logger.error(f"Basic conversation flow test failed: {e}")
|
self.logger.error(f"Basic conversation flow test failed: {e}")
|
||||||
return False
|
return False
|
||||||
finally:
|
finally:
|
||||||
self.cleanup_test_files()
|
self.cleanup_test_files()
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ This test is specifically designed to catch content duplication bugs.
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
|
|
||||||
@@ -26,10 +27,10 @@ class ContentValidationTest(BaseSimulatorTest):
|
|||||||
"""Test that tools don't duplicate file content in their responses"""
|
"""Test that tools don't duplicate file content in their responses"""
|
||||||
try:
|
try:
|
||||||
self.logger.info("📄 Test: Content validation and duplicate detection")
|
self.logger.info("📄 Test: Content validation and duplicate detection")
|
||||||
|
|
||||||
# Setup test files first
|
# Setup test files first
|
||||||
self.setup_test_files()
|
self.setup_test_files()
|
||||||
|
|
||||||
# Create a test file with distinctive content for validation
|
# Create a test file with distinctive content for validation
|
||||||
validation_content = '''"""
|
validation_content = '''"""
|
||||||
Configuration file for content validation testing
|
Configuration file for content validation testing
|
||||||
@@ -41,102 +42,110 @@ MAX_CONTENT_TOKENS = 800_000 # This line should appear exactly once
|
|||||||
TEMPERATURE_ANALYTICAL = 0.2 # This should also appear exactly once
|
TEMPERATURE_ANALYTICAL = 0.2 # This should also appear exactly once
|
||||||
UNIQUE_VALIDATION_MARKER = "CONTENT_VALIDATION_TEST_12345"
|
UNIQUE_VALIDATION_MARKER = "CONTENT_VALIDATION_TEST_12345"
|
||||||
|
|
||||||
# Database settings
|
# Database settings
|
||||||
DATABASE_CONFIG = {
|
DATABASE_CONFIG = {
|
||||||
"host": "localhost",
|
"host": "localhost",
|
||||||
"port": 5432,
|
"port": 5432,
|
||||||
"name": "validation_test_db"
|
"name": "validation_test_db"
|
||||||
}
|
}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
validation_file = os.path.join(self.test_dir, "validation_config.py")
|
validation_file = os.path.join(self.test_dir, "validation_config.py")
|
||||||
with open(validation_file, "w") as f:
|
with open(validation_file, "w") as f:
|
||||||
f.write(validation_content)
|
f.write(validation_content)
|
||||||
|
|
||||||
# Test 1: Precommit tool with files parameter (where the bug occurred)
|
# Test 1: Precommit tool with files parameter (where the bug occurred)
|
||||||
self.logger.info(" 1: Testing precommit tool content duplication")
|
self.logger.info(" 1: Testing precommit tool content duplication")
|
||||||
|
|
||||||
# Call precommit tool with the validation file
|
# Call precommit tool with the validation file
|
||||||
response1, thread_id = self.call_mcp_tool(
|
response1, thread_id = self.call_mcp_tool(
|
||||||
"precommit",
|
"precommit",
|
||||||
{
|
{
|
||||||
"path": os.getcwd(),
|
"path": os.getcwd(),
|
||||||
"files": [validation_file],
|
"files": [validation_file],
|
||||||
"original_request": "Test for content duplication in precommit tool"
|
"original_request": "Test for content duplication in precommit tool",
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if response1:
|
if response1:
|
||||||
# Parse response and check for content duplication
|
# Parse response and check for content duplication
|
||||||
try:
|
try:
|
||||||
response_data = json.loads(response1)
|
response_data = json.loads(response1)
|
||||||
content = response_data.get("content", "")
|
content = response_data.get("content", "")
|
||||||
|
|
||||||
# Count occurrences of distinctive markers
|
# Count occurrences of distinctive markers
|
||||||
max_content_count = content.count("MAX_CONTENT_TOKENS = 800_000")
|
max_content_count = content.count("MAX_CONTENT_TOKENS = 800_000")
|
||||||
temp_analytical_count = content.count("TEMPERATURE_ANALYTICAL = 0.2")
|
temp_analytical_count = content.count("TEMPERATURE_ANALYTICAL = 0.2")
|
||||||
unique_marker_count = content.count("UNIQUE_VALIDATION_MARKER")
|
unique_marker_count = content.count("UNIQUE_VALIDATION_MARKER")
|
||||||
|
|
||||||
# Validate no duplication
|
# Validate no duplication
|
||||||
duplication_detected = False
|
duplication_detected = False
|
||||||
issues = []
|
issues = []
|
||||||
|
|
||||||
if max_content_count > 1:
|
if max_content_count > 1:
|
||||||
issues.append(f"MAX_CONTENT_TOKENS appears {max_content_count} times")
|
issues.append(f"MAX_CONTENT_TOKENS appears {max_content_count} times")
|
||||||
duplication_detected = True
|
duplication_detected = True
|
||||||
|
|
||||||
if temp_analytical_count > 1:
|
if temp_analytical_count > 1:
|
||||||
issues.append(f"TEMPERATURE_ANALYTICAL appears {temp_analytical_count} times")
|
issues.append(f"TEMPERATURE_ANALYTICAL appears {temp_analytical_count} times")
|
||||||
duplication_detected = True
|
duplication_detected = True
|
||||||
|
|
||||||
if unique_marker_count > 1:
|
if unique_marker_count > 1:
|
||||||
issues.append(f"UNIQUE_VALIDATION_MARKER appears {unique_marker_count} times")
|
issues.append(f"UNIQUE_VALIDATION_MARKER appears {unique_marker_count} times")
|
||||||
duplication_detected = True
|
duplication_detected = True
|
||||||
|
|
||||||
if duplication_detected:
|
if duplication_detected:
|
||||||
self.logger.error(f" ❌ Content duplication detected in precommit tool: {'; '.join(issues)}")
|
self.logger.error(f" ❌ Content duplication detected in precommit tool: {'; '.join(issues)}")
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
self.logger.info(" ✅ No content duplication in precommit tool")
|
self.logger.info(" ✅ No content duplication in precommit tool")
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
self.logger.warning(" ⚠️ Could not parse precommit response as JSON")
|
self.logger.warning(" ⚠️ Could not parse precommit response as JSON")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.logger.warning(" ⚠️ Precommit tool failed to respond")
|
self.logger.warning(" ⚠️ Precommit tool failed to respond")
|
||||||
|
|
||||||
# Test 2: Other tools that use files parameter
|
# Test 2: Other tools that use files parameter
|
||||||
tools_to_test = [
|
tools_to_test = [
|
||||||
("chat", {"prompt": "Please use low thinking mode. Analyze this config file", "files": [validation_file]}),
|
(
|
||||||
("codereview", {"files": [validation_file], "context": "Please use low thinking mode. Review this configuration"}),
|
"chat",
|
||||||
("analyze", {"files": [validation_file], "analysis_type": "code_quality"})
|
{"prompt": "Please use low thinking mode. Analyze this config file", "files": [validation_file]},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"codereview",
|
||||||
|
{"files": [validation_file], "context": "Please use low thinking mode. Review this configuration"},
|
||||||
|
),
|
||||||
|
("analyze", {"files": [validation_file], "analysis_type": "code_quality"}),
|
||||||
]
|
]
|
||||||
|
|
||||||
for tool_name, params in tools_to_test:
|
for tool_name, params in tools_to_test:
|
||||||
self.logger.info(f" 2.{tool_name}: Testing {tool_name} tool content duplication")
|
self.logger.info(f" 2.{tool_name}: Testing {tool_name} tool content duplication")
|
||||||
|
|
||||||
response, _ = self.call_mcp_tool(tool_name, params)
|
response, _ = self.call_mcp_tool(tool_name, params)
|
||||||
if response:
|
if response:
|
||||||
try:
|
try:
|
||||||
response_data = json.loads(response)
|
response_data = json.loads(response)
|
||||||
content = response_data.get("content", "")
|
content = response_data.get("content", "")
|
||||||
|
|
||||||
# Check for duplication
|
# Check for duplication
|
||||||
marker_count = content.count("UNIQUE_VALIDATION_MARKER")
|
marker_count = content.count("UNIQUE_VALIDATION_MARKER")
|
||||||
if marker_count > 1:
|
if marker_count > 1:
|
||||||
self.logger.error(f" ❌ Content duplication in {tool_name}: marker appears {marker_count} times")
|
self.logger.error(
|
||||||
|
f" ❌ Content duplication in {tool_name}: marker appears {marker_count} times"
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
self.logger.info(f" ✅ No content duplication in {tool_name}")
|
self.logger.info(f" ✅ No content duplication in {tool_name}")
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
self.logger.warning(f" ⚠️ Could not parse {tool_name} response")
|
self.logger.warning(f" ⚠️ Could not parse {tool_name} response")
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f" ⚠️ {tool_name} tool failed to respond")
|
self.logger.warning(f" ⚠️ {tool_name} tool failed to respond")
|
||||||
|
|
||||||
# Test 3: Cross-tool content validation with file deduplication
|
# Test 3: Cross-tool content validation with file deduplication
|
||||||
self.logger.info(" 3: Testing cross-tool content consistency")
|
self.logger.info(" 3: Testing cross-tool content consistency")
|
||||||
|
|
||||||
if thread_id:
|
if thread_id:
|
||||||
# Continue conversation with same file - content should be deduplicated in conversation history
|
# Continue conversation with same file - content should be deduplicated in conversation history
|
||||||
response2, _ = self.call_mcp_tool(
|
response2, _ = self.call_mcp_tool(
|
||||||
@@ -147,31 +156,33 @@ DATABASE_CONFIG = {
|
|||||||
"continuation_id": thread_id,
|
"continuation_id": thread_id,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if response2:
|
if response2:
|
||||||
try:
|
try:
|
||||||
response_data = json.loads(response2)
|
response_data = json.loads(response2)
|
||||||
content = response_data.get("content", "")
|
content = response_data.get("content", "")
|
||||||
|
|
||||||
# In continuation, the file content shouldn't be duplicated either
|
# In continuation, the file content shouldn't be duplicated either
|
||||||
marker_count = content.count("UNIQUE_VALIDATION_MARKER")
|
marker_count = content.count("UNIQUE_VALIDATION_MARKER")
|
||||||
if marker_count > 1:
|
if marker_count > 1:
|
||||||
self.logger.error(f" ❌ Content duplication in cross-tool continuation: marker appears {marker_count} times")
|
self.logger.error(
|
||||||
|
f" ❌ Content duplication in cross-tool continuation: marker appears {marker_count} times"
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
self.logger.info(" ✅ No content duplication in cross-tool continuation")
|
self.logger.info(" ✅ No content duplication in cross-tool continuation")
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
self.logger.warning(" ⚠️ Could not parse continuation response")
|
self.logger.warning(" ⚠️ Could not parse continuation response")
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
os.remove(validation_file)
|
os.remove(validation_file)
|
||||||
|
|
||||||
self.logger.info(" ✅ All content validation tests passed")
|
self.logger.info(" ✅ All content validation tests passed")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Content validation test failed: {e}")
|
self.logger.error(f"Content validation test failed: {e}")
|
||||||
return False
|
return False
|
||||||
finally:
|
finally:
|
||||||
self.cleanup_test_files()
|
self.cleanup_test_files()
|
||||||
|
|||||||
@@ -43,8 +43,10 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
if self._test_multi_file_continuation():
|
if self._test_multi_file_continuation():
|
||||||
success_count += 1
|
success_count += 1
|
||||||
|
|
||||||
self.logger.info(f" ✅ Cross-tool continuation scenarios completed: {success_count}/{total_scenarios} scenarios passed")
|
self.logger.info(
|
||||||
|
f" ✅ Cross-tool continuation scenarios completed: {success_count}/{total_scenarios} scenarios passed"
|
||||||
|
)
|
||||||
|
|
||||||
# Consider successful if at least one scenario worked
|
# Consider successful if at least one scenario worked
|
||||||
return success_count > 0
|
return success_count > 0
|
||||||
|
|
||||||
@@ -193,4 +195,4 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Multi-file continuation scenario failed: {e}")
|
self.logger.error(f"Multi-file continuation scenario failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -96,4 +96,4 @@ class LogsValidationTest(BaseSimulatorTest):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Log validation failed: {e}")
|
self.logger.error(f"Log validation failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -32,16 +32,22 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
(
|
(
|
||||||
"thinkdeep",
|
"thinkdeep",
|
||||||
{
|
{
|
||||||
"prompt": "Please use low thinking mode. Think deeply about this Python code and identify potential architectural improvements",
|
"current_analysis": "Please use low thinking mode. I'm analyzing this Python code to identify potential architectural improvements",
|
||||||
"files": [self.test_files["python"]],
|
"files": [self.test_files["python"]],
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
("analyze", {"files": [self.test_files["python"]], "analysis_type": "architecture"}),
|
(
|
||||||
|
"analyze",
|
||||||
|
{
|
||||||
|
"files": [self.test_files["python"]],
|
||||||
|
"question": "Please use low thinking mode. What are the architectural patterns in this code?",
|
||||||
|
},
|
||||||
|
),
|
||||||
(
|
(
|
||||||
"debug",
|
"debug",
|
||||||
{
|
{
|
||||||
"files": [self.test_files["python"]],
|
"files": [self.test_files["python"]],
|
||||||
"issue_description": "The fibonacci function seems slow for large numbers",
|
"error_description": "Please use low thinking mode. The fibonacci function seems slow for large numbers",
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@@ -74,11 +80,17 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
continue_params["continuation_id"] = continuation_id
|
continue_params["continuation_id"] = continuation_id
|
||||||
|
|
||||||
if tool_name == "thinkdeep":
|
if tool_name == "thinkdeep":
|
||||||
continue_params["prompt"] = "Please use low thinking mode. Now focus specifically on the recursive fibonacci implementation"
|
continue_params["current_analysis"] = (
|
||||||
|
"Please use low thinking mode. Now focus specifically on the recursive fibonacci implementation"
|
||||||
|
)
|
||||||
elif tool_name == "analyze":
|
elif tool_name == "analyze":
|
||||||
continue_params["analysis_type"] = "performance"
|
continue_params["question"] = (
|
||||||
|
"Please use low thinking mode. What are the performance characteristics of this code?"
|
||||||
|
)
|
||||||
elif tool_name == "debug":
|
elif tool_name == "debug":
|
||||||
continue_params["issue_description"] = "How can we optimize the fibonacci function?"
|
continue_params["error_description"] = (
|
||||||
|
"Please use low thinking mode. How can we optimize the fibonacci function?"
|
||||||
|
)
|
||||||
elif tool_name == "codereview":
|
elif tool_name == "codereview":
|
||||||
continue_params["context"] = "Focus on the Calculator class implementation"
|
continue_params["context"] = "Focus on the Calculator class implementation"
|
||||||
|
|
||||||
@@ -89,8 +101,10 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
else:
|
else:
|
||||||
self.logger.warning(f" ⚠️ {tool_name} tool continuation failed")
|
self.logger.warning(f" ⚠️ {tool_name} tool continuation failed")
|
||||||
|
|
||||||
self.logger.info(f" ✅ Per-tool file deduplication tests completed: {successful_tests}/{total_tests} tools passed")
|
self.logger.info(
|
||||||
|
f" ✅ Per-tool file deduplication tests completed: {successful_tests}/{total_tests} tools passed"
|
||||||
|
)
|
||||||
|
|
||||||
# Consider test successful if at least one tool worked
|
# Consider test successful if at least one tool worked
|
||||||
return successful_tests > 0
|
return successful_tests > 0
|
||||||
|
|
||||||
@@ -98,4 +112,4 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
self.logger.error(f"Per-tool file deduplication test failed: {e}")
|
self.logger.error(f"Per-tool file deduplication test failed: {e}")
|
||||||
return False
|
return False
|
||||||
finally:
|
finally:
|
||||||
self.cleanup_test_files()
|
self.cleanup_test_files()
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ for stored conversation threads and their content.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
|
|
||||||
@@ -30,15 +31,15 @@ class RedisValidationTest(BaseSimulatorTest):
|
|||||||
ping_result = self.run_command(
|
ping_result = self.run_command(
|
||||||
["docker", "exec", self.redis_container, "redis-cli", "ping"], capture_output=True
|
["docker", "exec", self.redis_container, "redis-cli", "ping"], capture_output=True
|
||||||
)
|
)
|
||||||
|
|
||||||
if ping_result.returncode != 0:
|
if ping_result.returncode != 0:
|
||||||
self.logger.error("Failed to connect to Redis")
|
self.logger.error("Failed to connect to Redis")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if "PONG" not in ping_result.stdout.decode():
|
if "PONG" not in ping_result.stdout.decode():
|
||||||
self.logger.error("Redis ping failed")
|
self.logger.error("Redis ping failed")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info("✅ Redis connectivity confirmed")
|
self.logger.info("✅ Redis connectivity confirmed")
|
||||||
|
|
||||||
# Check Redis for stored conversations
|
# Check Redis for stored conversations
|
||||||
@@ -76,51 +77,55 @@ class RedisValidationTest(BaseSimulatorTest):
|
|||||||
else:
|
else:
|
||||||
# If no existing threads, create a test thread to validate Redis functionality
|
# If no existing threads, create a test thread to validate Redis functionality
|
||||||
self.logger.info("📝 No existing threads found, creating test thread to validate Redis...")
|
self.logger.info("📝 No existing threads found, creating test thread to validate Redis...")
|
||||||
|
|
||||||
test_thread_id = "test_thread_validation"
|
test_thread_id = "test_thread_validation"
|
||||||
test_data = {
|
test_data = {
|
||||||
"thread_id": test_thread_id,
|
"thread_id": test_thread_id,
|
||||||
"turns": [
|
"turns": [
|
||||||
{
|
{"tool": "chat", "timestamp": "2025-06-11T16:30:00Z", "prompt": "Test validation prompt"}
|
||||||
"tool": "chat",
|
],
|
||||||
"timestamp": "2025-06-11T16:30:00Z",
|
|
||||||
"prompt": "Test validation prompt"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Store test data
|
# Store test data
|
||||||
store_result = self.run_command([
|
store_result = self.run_command(
|
||||||
"docker", "exec", self.redis_container, "redis-cli",
|
[
|
||||||
"SET", f"thread:{test_thread_id}", json.dumps(test_data)
|
"docker",
|
||||||
], capture_output=True)
|
"exec",
|
||||||
|
self.redis_container,
|
||||||
|
"redis-cli",
|
||||||
|
"SET",
|
||||||
|
f"thread:{test_thread_id}",
|
||||||
|
json.dumps(test_data),
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
|
||||||
if store_result.returncode != 0:
|
if store_result.returncode != 0:
|
||||||
self.logger.error("Failed to store test data in Redis")
|
self.logger.error("Failed to store test data in Redis")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Retrieve test data
|
# Retrieve test data
|
||||||
retrieve_result = self.run_command([
|
retrieve_result = self.run_command(
|
||||||
"docker", "exec", self.redis_container, "redis-cli",
|
["docker", "exec", self.redis_container, "redis-cli", "GET", f"thread:{test_thread_id}"],
|
||||||
"GET", f"thread:{test_thread_id}"
|
capture_output=True,
|
||||||
], capture_output=True)
|
)
|
||||||
|
|
||||||
if retrieve_result.returncode != 0:
|
if retrieve_result.returncode != 0:
|
||||||
self.logger.error("Failed to retrieve test data from Redis")
|
self.logger.error("Failed to retrieve test data from Redis")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
retrieved_data = retrieve_result.stdout.decode()
|
retrieved_data = retrieve_result.stdout.decode()
|
||||||
try:
|
try:
|
||||||
parsed = json.loads(retrieved_data)
|
parsed = json.loads(retrieved_data)
|
||||||
if parsed.get("thread_id") == test_thread_id:
|
if parsed.get("thread_id") == test_thread_id:
|
||||||
self.logger.info("✅ Redis read/write validation successful")
|
self.logger.info("✅ Redis read/write validation successful")
|
||||||
|
|
||||||
# Clean up test data
|
# Clean up test data
|
||||||
self.run_command([
|
self.run_command(
|
||||||
"docker", "exec", self.redis_container, "redis-cli",
|
["docker", "exec", self.redis_container, "redis-cli", "DEL", f"thread:{test_thread_id}"],
|
||||||
"DEL", f"thread:{test_thread_id}"
|
capture_output=True,
|
||||||
], capture_output=True)
|
)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.logger.error("Retrieved data doesn't match stored data")
|
self.logger.error("Retrieved data doesn't match stored data")
|
||||||
@@ -131,4 +136,4 @@ class RedisValidationTest(BaseSimulatorTest):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Conversation memory validation failed: {e}")
|
self.logger.error(f"Conversation memory validation failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -1,16 +0,0 @@
|
|||||||
{
|
|
||||||
"database": {
|
|
||||||
"host": "localhost",
|
|
||||||
"port": 5432,
|
|
||||||
"name": "testdb",
|
|
||||||
"ssl": true
|
|
||||||
},
|
|
||||||
"cache": {
|
|
||||||
"redis_url": "redis://localhost:6379",
|
|
||||||
"ttl": 3600
|
|
||||||
},
|
|
||||||
"logging": {
|
|
||||||
"level": "INFO",
|
|
||||||
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
"""
|
|
||||||
Sample Python module for testing MCP conversation continuity
|
|
||||||
"""
|
|
||||||
|
|
||||||
def fibonacci(n):
|
|
||||||
"""Calculate fibonacci number recursively"""
|
|
||||||
if n <= 1:
|
|
||||||
return n
|
|
||||||
return fibonacci(n-1) + fibonacci(n-2)
|
|
||||||
|
|
||||||
def factorial(n):
|
|
||||||
"""Calculate factorial iteratively"""
|
|
||||||
result = 1
|
|
||||||
for i in range(1, n + 1):
|
|
||||||
result *= i
|
|
||||||
return result
|
|
||||||
|
|
||||||
class Calculator:
|
|
||||||
"""Simple calculator class"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.history = []
|
|
||||||
|
|
||||||
def add(self, a, b):
|
|
||||||
result = a + b
|
|
||||||
self.history.append(f"{a} + {b} = {result}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
def multiply(self, a, b):
|
|
||||||
result = a * b
|
|
||||||
self.history.append(f"{a} * {b} = {result}")
|
|
||||||
return result
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
"""
|
|
||||||
Configuration file for content validation testing
|
|
||||||
This content should appear only ONCE in any tool response
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Configuration constants
|
|
||||||
MAX_CONTENT_TOKENS = 800_000 # This line should appear exactly once
|
|
||||||
TEMPERATURE_ANALYTICAL = 0.2 # This should also appear exactly once
|
|
||||||
UNIQUE_VALIDATION_MARKER = "CONTENT_VALIDATION_TEST_12345"
|
|
||||||
|
|
||||||
# Database settings
|
|
||||||
DATABASE_CONFIG = {
|
|
||||||
"host": "localhost",
|
|
||||||
"port": 5432,
|
|
||||||
"name": "validation_test_db"
|
|
||||||
}
|
|
||||||
@@ -2,11 +2,11 @@
|
|||||||
Enhanced tests for precommit tool using mock storage to test real logic
|
Enhanced tests for precommit tool using mock storage to test real logic
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import tempfile
|
|
||||||
import os
|
import os
|
||||||
from unittest.mock import Mock, patch, MagicMock
|
import tempfile
|
||||||
from typing import Dict, Any, Optional
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -15,60 +15,70 @@ from tools.precommit import Precommit, PrecommitRequest
|
|||||||
|
|
||||||
class MockRedisClient:
|
class MockRedisClient:
|
||||||
"""Mock Redis client that uses in-memory dictionary storage"""
|
"""Mock Redis client that uses in-memory dictionary storage"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.data: Dict[str, str] = {}
|
self.data: dict[str, str] = {}
|
||||||
self.ttl_data: Dict[str, int] = {}
|
self.ttl_data: dict[str, int] = {}
|
||||||
|
|
||||||
def get(self, key: str) -> Optional[str]:
|
def get(self, key: str) -> Optional[str]:
|
||||||
return self.data.get(key)
|
return self.data.get(key)
|
||||||
|
|
||||||
def set(self, key: str, value: str, ex: Optional[int] = None) -> bool:
|
def set(self, key: str, value: str, ex: Optional[int] = None) -> bool:
|
||||||
self.data[key] = value
|
self.data[key] = value
|
||||||
if ex:
|
if ex:
|
||||||
self.ttl_data[key] = ex
|
self.ttl_data[key] = ex
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def delete(self, key: str) -> int:
|
def delete(self, key: str) -> int:
|
||||||
if key in self.data:
|
if key in self.data:
|
||||||
del self.data[key]
|
del self.data[key]
|
||||||
self.ttl_data.pop(key, None)
|
self.ttl_data.pop(key, None)
|
||||||
return 1
|
return 1
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def exists(self, key: str) -> int:
|
def exists(self, key: str) -> int:
|
||||||
return 1 if key in self.data else 0
|
return 1 if key in self.data else 0
|
||||||
|
|
||||||
|
def setex(self, key: str, time: int, value: str) -> bool:
|
||||||
|
"""Set key to hold string value and set key to timeout after given seconds"""
|
||||||
|
self.data[key] = value
|
||||||
|
self.ttl_data[key] = time
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class TestPrecommitToolWithMockStore:
|
class TestPrecommitToolWithMockStore:
|
||||||
"""Test precommit tool with mock storage to validate actual logic"""
|
"""Test precommit tool with mock storage to validate actual logic"""
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_redis(self):
|
def mock_redis(self):
|
||||||
"""Create mock Redis client"""
|
"""Create mock Redis client"""
|
||||||
return MockRedisClient()
|
return MockRedisClient()
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tool(self, mock_redis):
|
def tool(self, mock_redis, temp_repo):
|
||||||
"""Create tool instance with mocked Redis"""
|
"""Create tool instance with mocked Redis"""
|
||||||
|
temp_dir, _ = temp_repo
|
||||||
tool = Precommit()
|
tool = Precommit()
|
||||||
|
|
||||||
# Mock the Redis client getter to return our mock
|
# Mock the Redis client getter and PROJECT_ROOT to allow access to temp files
|
||||||
with patch('utils.conversation_memory.get_redis_client', return_value=mock_redis):
|
with (
|
||||||
|
patch("utils.conversation_memory.get_redis_client", return_value=mock_redis),
|
||||||
|
patch("utils.file_utils.PROJECT_ROOT", Path(temp_dir).resolve()),
|
||||||
|
):
|
||||||
yield tool
|
yield tool
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def temp_repo(self):
|
def temp_repo(self):
|
||||||
"""Create a temporary git repository with test files"""
|
"""Create a temporary git repository with test files"""
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
temp_dir = tempfile.mkdtemp()
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
# Initialize git repo
|
# Initialize git repo
|
||||||
subprocess.run(['git', 'init'], cwd=temp_dir, capture_output=True)
|
subprocess.run(["git", "init"], cwd=temp_dir, capture_output=True)
|
||||||
subprocess.run(['git', 'config', 'user.name', 'Test'], cwd=temp_dir, capture_output=True)
|
subprocess.run(["git", "config", "user.name", "Test"], cwd=temp_dir, capture_output=True)
|
||||||
subprocess.run(['git', 'config', 'user.email', 'test@example.com'], cwd=temp_dir, capture_output=True)
|
subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=temp_dir, capture_output=True)
|
||||||
|
|
||||||
# Create test config file
|
# Create test config file
|
||||||
config_content = '''"""Test configuration file"""
|
config_content = '''"""Test configuration file"""
|
||||||
|
|
||||||
@@ -80,182 +90,173 @@ __author__ = "Test"
|
|||||||
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content
|
MAX_CONTENT_TOKENS = 800_000 # 800K tokens for content
|
||||||
TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
||||||
'''
|
'''
|
||||||
|
|
||||||
config_path = os.path.join(temp_dir, 'config.py')
|
config_path = os.path.join(temp_dir, "config.py")
|
||||||
with open(config_path, 'w') as f:
|
with open(config_path, "w") as f:
|
||||||
f.write(config_content)
|
f.write(config_content)
|
||||||
|
|
||||||
# Add and commit initial version
|
# Add and commit initial version
|
||||||
subprocess.run(['git', 'add', '.'], cwd=temp_dir, capture_output=True)
|
subprocess.run(["git", "add", "."], cwd=temp_dir, capture_output=True)
|
||||||
subprocess.run(['git', 'commit', '-m', 'Initial commit'], cwd=temp_dir, capture_output=True)
|
subprocess.run(["git", "commit", "-m", "Initial commit"], cwd=temp_dir, capture_output=True)
|
||||||
|
|
||||||
# Modify config to create a diff
|
# Modify config to create a diff
|
||||||
modified_content = config_content + '\nNEW_SETTING = "test" # Added setting\n'
|
modified_content = config_content + '\nNEW_SETTING = "test" # Added setting\n'
|
||||||
with open(config_path, 'w') as f:
|
with open(config_path, "w") as f:
|
||||||
f.write(modified_content)
|
f.write(modified_content)
|
||||||
|
|
||||||
yield temp_dir, config_path
|
yield temp_dir, config_path
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
shutil.rmtree(temp_dir)
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_redis):
|
async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_redis):
|
||||||
"""Test that file content doesn't appear twice in the generated prompt"""
|
"""Test that file content appears in expected locations"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
# Create request with files parameter
|
# Create request with files parameter
|
||||||
request = PrecommitRequest(
|
request = PrecommitRequest(path=temp_dir, files=[config_path], original_request="Test configuration changes")
|
||||||
path=temp_dir,
|
|
||||||
files=[config_path],
|
|
||||||
original_request="Test configuration changes"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generate the prompt
|
# Generate the prompt
|
||||||
prompt = await tool.prepare_prompt(request)
|
prompt = await tool.prepare_prompt(request)
|
||||||
|
|
||||||
# Test that MAX_CONTENT_TOKENS only appears once in the entire prompt
|
|
||||||
max_content_count = prompt.count('MAX_CONTENT_TOKENS = 800_000')
|
|
||||||
assert max_content_count == 1, f"MAX_CONTENT_TOKENS appears {max_content_count} times (should be 1)"
|
|
||||||
|
|
||||||
# Test that the config file content only appears once
|
|
||||||
config_content_count = prompt.count('# Configuration')
|
|
||||||
assert config_content_count == 1, f"Config file content appears {config_content_count} times (should be 1)"
|
|
||||||
|
|
||||||
# Verify expected sections are present
|
# Verify expected sections are present
|
||||||
assert "## Original Request" in prompt
|
assert "## Original Request" in prompt
|
||||||
assert "Test configuration changes" in prompt
|
assert "Test configuration changes" in prompt
|
||||||
assert "## Additional Context Files" in prompt
|
assert "## Additional Context Files" in prompt
|
||||||
assert "## Git Diffs" in prompt
|
assert "## Git Diffs" in prompt
|
||||||
|
|
||||||
|
# Verify the file appears in the git diff
|
||||||
|
assert "config.py" in prompt
|
||||||
|
assert "NEW_SETTING" in prompt
|
||||||
|
|
||||||
|
# Note: Files can legitimately appear in both git diff AND additional context:
|
||||||
|
# - Git diff shows only changed lines + limited context
|
||||||
|
# - Additional context provides complete file content for full understanding
|
||||||
|
# This is intentional and provides comprehensive context to the AI
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_conversation_memory_integration(self, tool, temp_repo, mock_redis):
|
async def test_conversation_memory_integration(self, tool, temp_repo, mock_redis):
|
||||||
"""Test that conversation memory works with mock storage"""
|
"""Test that conversation memory works with mock storage"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
# Mock conversation memory functions to use our mock redis
|
# Mock conversation memory functions to use our mock redis
|
||||||
with patch('utils.conversation_memory.get_redis_client', return_value=mock_redis):
|
with patch("utils.conversation_memory.get_redis_client", return_value=mock_redis):
|
||||||
# First request - should embed file content
|
# First request - should embed file content
|
||||||
request1 = PrecommitRequest(
|
PrecommitRequest(path=temp_dir, files=[config_path], original_request="First review")
|
||||||
path=temp_dir,
|
|
||||||
files=[config_path],
|
|
||||||
original_request="First review"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Simulate conversation thread creation
|
# Simulate conversation thread creation
|
||||||
from utils.conversation_memory import create_thread, add_turn
|
from utils.conversation_memory import add_turn, create_thread
|
||||||
|
|
||||||
thread_id = create_thread("precommit", {"files": [config_path]})
|
thread_id = create_thread("precommit", {"files": [config_path]})
|
||||||
|
|
||||||
# Test that file embedding works
|
# Test that file embedding works
|
||||||
files_to_embed = tool.filter_new_files([config_path], None)
|
files_to_embed = tool.filter_new_files([config_path], None)
|
||||||
assert config_path in files_to_embed, "New conversation should embed all files"
|
assert config_path in files_to_embed, "New conversation should embed all files"
|
||||||
|
|
||||||
# Add a turn to the conversation
|
# Add a turn to the conversation
|
||||||
add_turn(thread_id, "assistant", "First response", files=[config_path], tool_name="precommit")
|
add_turn(thread_id, "assistant", "First response", files=[config_path], tool_name="precommit")
|
||||||
|
|
||||||
# Second request with continuation - should skip already embedded files
|
# Second request with continuation - should skip already embedded files
|
||||||
request2 = PrecommitRequest(
|
PrecommitRequest(
|
||||||
path=temp_dir,
|
path=temp_dir, files=[config_path], continuation_id=thread_id, original_request="Follow-up review"
|
||||||
files=[config_path],
|
|
||||||
continuation_id=thread_id,
|
|
||||||
original_request="Follow-up review"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
files_to_embed_2 = tool.filter_new_files([config_path], thread_id)
|
files_to_embed_2 = tool.filter_new_files([config_path], thread_id)
|
||||||
assert len(files_to_embed_2) == 0, "Continuation should skip already embedded files"
|
assert len(files_to_embed_2) == 0, "Continuation should skip already embedded files"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_prompt_structure_integrity(self, tool, temp_repo, mock_redis):
|
async def test_prompt_structure_integrity(self, tool, temp_repo, mock_redis):
|
||||||
"""Test that the prompt structure is well-formed and doesn't have content duplication"""
|
"""Test that the prompt structure is well-formed and doesn't have content duplication"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
request = PrecommitRequest(
|
request = PrecommitRequest(
|
||||||
path=temp_dir,
|
path=temp_dir,
|
||||||
files=[config_path],
|
files=[config_path],
|
||||||
original_request="Validate prompt structure",
|
original_request="Validate prompt structure",
|
||||||
review_type="full",
|
review_type="full",
|
||||||
severity_filter="high"
|
severity_filter="high",
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt = await tool.prepare_prompt(request)
|
prompt = await tool.prepare_prompt(request)
|
||||||
|
|
||||||
# Split prompt into sections
|
# Split prompt into sections
|
||||||
sections = {
|
sections = {
|
||||||
"original_request": "## Original Request",
|
"original_request": "## Original Request",
|
||||||
"review_parameters": "## Review Parameters",
|
"review_parameters": "## Review Parameters",
|
||||||
"repo_summary": "## Repository Changes Summary",
|
"repo_summary": "## Repository Changes Summary",
|
||||||
"context_files_summary": "## Context Files Summary",
|
"context_files_summary": "## Context Files Summary",
|
||||||
"git_diffs": "## Git Diffs",
|
"git_diffs": "## Git Diffs",
|
||||||
"additional_context": "## Additional Context Files",
|
"additional_context": "## Additional Context Files",
|
||||||
"review_instructions": "## Review Instructions"
|
"review_instructions": "## Review Instructions",
|
||||||
}
|
}
|
||||||
|
|
||||||
section_indices = {}
|
section_indices = {}
|
||||||
for name, header in sections.items():
|
for name, header in sections.items():
|
||||||
index = prompt.find(header)
|
index = prompt.find(header)
|
||||||
if index != -1:
|
if index != -1:
|
||||||
section_indices[name] = index
|
section_indices[name] = index
|
||||||
|
|
||||||
# Verify sections appear in logical order
|
# Verify sections appear in logical order
|
||||||
assert section_indices["original_request"] < section_indices["review_parameters"]
|
assert section_indices["original_request"] < section_indices["review_parameters"]
|
||||||
assert section_indices["review_parameters"] < section_indices["repo_summary"]
|
assert section_indices["review_parameters"] < section_indices["repo_summary"]
|
||||||
assert section_indices["git_diffs"] < section_indices["additional_context"]
|
assert section_indices["git_diffs"] < section_indices["additional_context"]
|
||||||
assert section_indices["additional_context"] < section_indices["review_instructions"]
|
assert section_indices["additional_context"] < section_indices["review_instructions"]
|
||||||
|
|
||||||
# Test that file content only appears in Additional Context section
|
# Test that file content only appears in Additional Context section
|
||||||
file_content_start = section_indices["additional_context"]
|
file_content_start = section_indices["additional_context"]
|
||||||
file_content_end = section_indices["review_instructions"]
|
file_content_end = section_indices["review_instructions"]
|
||||||
|
|
||||||
file_section = prompt[file_content_start:file_content_end]
|
file_section = prompt[file_content_start:file_content_end]
|
||||||
before_file_section = prompt[:file_content_start]
|
prompt[:file_content_start]
|
||||||
after_file_section = prompt[file_content_end:]
|
after_file_section = prompt[file_content_end:]
|
||||||
|
|
||||||
# MAX_CONTENT_TOKENS should only appear in the file section
|
# File content should appear in the file section
|
||||||
assert 'MAX_CONTENT_TOKENS' in file_section
|
assert "MAX_CONTENT_TOKENS = 800_000" in file_section
|
||||||
assert 'MAX_CONTENT_TOKENS' not in before_file_section
|
# Check that configuration content appears in the file section
|
||||||
assert 'MAX_CONTENT_TOKENS' not in after_file_section
|
assert "# Configuration" in file_section
|
||||||
|
# The complete file content should not appear in the review instructions
|
||||||
|
assert '__version__ = "1.0.0"' in file_section
|
||||||
|
assert '__version__ = "1.0.0"' not in after_file_section
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_file_content_formatting(self, tool, temp_repo, mock_redis):
|
async def test_file_content_formatting(self, tool, temp_repo, mock_redis):
|
||||||
"""Test that file content is properly formatted without duplication"""
|
"""Test that file content is properly formatted without duplication"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
# Test the centralized file preparation method directly
|
# Test the centralized file preparation method directly
|
||||||
file_content = tool._prepare_file_content_for_prompt(
|
file_content = tool._prepare_file_content_for_prompt(
|
||||||
[config_path],
|
[config_path], None, "Test files", max_tokens=100000, reserve_tokens=1000 # No continuation
|
||||||
None, # No continuation
|
|
||||||
"Test files",
|
|
||||||
max_tokens=100000,
|
|
||||||
reserve_tokens=1000
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Should contain file markers
|
# Should contain file markers
|
||||||
assert "--- BEGIN FILE:" in file_content
|
assert "--- BEGIN FILE:" in file_content
|
||||||
assert "--- END FILE:" in file_content
|
assert "--- END FILE:" in file_content
|
||||||
assert "config.py" in file_content
|
assert "config.py" in file_content
|
||||||
|
|
||||||
# Should contain actual file content
|
# Should contain actual file content
|
||||||
assert "MAX_CONTENT_TOKENS = 800_000" in file_content
|
assert "MAX_CONTENT_TOKENS = 800_000" in file_content
|
||||||
assert "__version__ = \"1.0.0\"" in file_content
|
assert '__version__ = "1.0.0"' in file_content
|
||||||
|
|
||||||
# Content should appear only once
|
# Content should appear only once
|
||||||
assert file_content.count("MAX_CONTENT_TOKENS = 800_000") == 1
|
assert file_content.count("MAX_CONTENT_TOKENS = 800_000") == 1
|
||||||
assert file_content.count("__version__ = \"1.0.0\"") == 1
|
assert file_content.count('__version__ = "1.0.0"') == 1
|
||||||
|
|
||||||
|
|
||||||
def test_mock_redis_basic_operations():
|
def test_mock_redis_basic_operations():
|
||||||
"""Test that our mock Redis implementation works correctly"""
|
"""Test that our mock Redis implementation works correctly"""
|
||||||
mock_redis = MockRedisClient()
|
mock_redis = MockRedisClient()
|
||||||
|
|
||||||
# Test basic operations
|
# Test basic operations
|
||||||
assert mock_redis.get("nonexistent") is None
|
assert mock_redis.get("nonexistent") is None
|
||||||
assert mock_redis.exists("nonexistent") == 0
|
assert mock_redis.exists("nonexistent") == 0
|
||||||
|
|
||||||
mock_redis.set("test_key", "test_value")
|
mock_redis.set("test_key", "test_value")
|
||||||
assert mock_redis.get("test_key") == "test_value"
|
assert mock_redis.get("test_key") == "test_value"
|
||||||
assert mock_redis.exists("test_key") == 1
|
assert mock_redis.exists("test_key") == 1
|
||||||
|
|
||||||
assert mock_redis.delete("test_key") == 1
|
assert mock_redis.delete("test_key") == 1
|
||||||
assert mock_redis.get("test_key") is None
|
assert mock_redis.get("test_key") is None
|
||||||
assert mock_redis.delete("test_key") == 0 # Already deleted
|
assert mock_redis.delete("test_key") == 0 # Already deleted
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from pydantic import Field
|
|||||||
|
|
||||||
from config import MAX_CONTEXT_TOKENS
|
from config import MAX_CONTEXT_TOKENS
|
||||||
from prompts.tool_prompts import PRECOMMIT_PROMPT
|
from prompts.tool_prompts import PRECOMMIT_PROMPT
|
||||||
from utils.file_utils import read_files, translate_file_paths, translate_path_for_environment
|
from utils.file_utils import translate_file_paths, translate_path_for_environment
|
||||||
from utils.git_utils import find_git_repositories, get_git_status, run_git_command
|
from utils.git_utils import find_git_repositories, get_git_status, run_git_command
|
||||||
from utils.token_utils import estimate_tokens
|
from utils.token_utils import estimate_tokens
|
||||||
|
|
||||||
@@ -300,11 +300,11 @@ class Precommit(BaseTool):
|
|||||||
|
|
||||||
# Use centralized file handling with filtering for duplicate prevention
|
# Use centralized file handling with filtering for duplicate prevention
|
||||||
file_content = self._prepare_file_content_for_prompt(
|
file_content = self._prepare_file_content_for_prompt(
|
||||||
translated_files,
|
translated_files,
|
||||||
request.continuation_id,
|
request.continuation_id,
|
||||||
"Context files",
|
"Context files",
|
||||||
max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated
|
max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated
|
||||||
reserve_tokens=1000 # Small reserve for formatting
|
reserve_tokens=1000, # Small reserve for formatting
|
||||||
)
|
)
|
||||||
|
|
||||||
if file_content:
|
if file_content:
|
||||||
|
|||||||
Reference in New Issue
Block a user