Files
my-pal-mcp-server/simulator_tests/test_content_validation.py
Beehive Innovations 4151c3c3a5 Migration from Docker to Standalone Python Server (#73)
* Migration from docker to standalone server
Migration handling
Fixed tests
Use simpler in-memory storage
Support for concurrent logging to disk
Simplified direct connections to localhost

* Migration from docker / redis to standalone script
Updated tests
Updated run script
Fixed requirements
Use dotenv
Ask if user would like to install MCP in Claude Desktop once
Updated docs

* More cleanup and references to docker removed

* Cleanup

* Comments

* Fixed tests

* Fix GitHub Actions workflow for standalone Python architecture

- Install requirements-dev.txt for pytest and testing dependencies
- Remove Docker setup from simulation tests (now standalone)
- Simplify linting job to use requirements-dev.txt
- Update simulation tests to run directly without Docker

Fixes unit test failures in CI due to missing pytest dependency.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Remove simulation tests from GitHub Actions

- Removed simulation-tests job that makes real API calls
- Keep only unit tests (mocked, no API costs) and linting
- Simulation tests should be run manually with real API keys
- Reduces CI costs and complexity

GitHub Actions now only runs:
- Unit tests (569 tests, all mocked)
- Code quality checks (ruff, black)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Fixed tests

* Fixed tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-06-18 23:41:22 +04:00

185 lines
6.6 KiB
Python

#!/usr/bin/env python3
"""
Content Validation Test
Tests that tools don't duplicate file content in their responses.
This test is specifically designed to catch content duplication bugs.
"""
import os
from .base_test import BaseSimulatorTest
class ContentValidationTest(BaseSimulatorTest):
"""Test that tools don't duplicate file content in their responses"""
@property
def test_name(self) -> str:
return "content_validation"
@property
def test_description(self) -> str:
return "Content validation and duplicate detection"
def run_test(self) -> bool:
"""Test that file processing system properly handles file deduplication"""
try:
self.logger.info("📄 Test: Content validation and file processing deduplication")
# Setup test files first
self.setup_test_files()
# Create a test file for validation
validation_content = '''"""
Configuration file for content validation testing
"""
# Configuration constants
MAX_CONTENT_TOKENS = 800_000
TEMPERATURE_ANALYTICAL = 0.2
UNIQUE_VALIDATION_MARKER = "CONTENT_VALIDATION_TEST_12345"
# Database settings
DATABASE_CONFIG = {
"host": "localhost",
"port": 5432,
"name": "validation_test_db"
}
'''
validation_file = os.path.join(self.test_dir, "validation_config.py")
with open(validation_file, "w") as f:
f.write(validation_content)
# Ensure absolute path for MCP server compatibility
validation_file = os.path.abspath(validation_file)
# Get timestamp for log filtering
import datetime
start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
# Test 1: Initial tool call with validation file
self.logger.info(" 1: Testing initial tool call with file")
# Call chat tool with the validation file
response1, thread_id = self.call_mcp_tool(
"chat",
{
"prompt": "Analyze this configuration file briefly",
"files": [validation_file],
"model": "flash",
},
)
if not response1:
self.logger.error(" ❌ Initial tool call failed")
return False
self.logger.info(" ✅ Initial tool call completed")
# Test 2: Continuation with same file (should be deduplicated)
self.logger.info(" 2: Testing continuation with same file")
if thread_id:
response2, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Continue analyzing this configuration file",
"files": [validation_file], # Same file should be deduplicated
"continuation_id": thread_id,
"model": "flash",
},
)
if response2:
self.logger.info(" ✅ Continuation with same file completed")
else:
self.logger.warning(" ⚠️ Continuation failed")
# Test 3: Different tool with same file (new conversation)
self.logger.info(" 3: Testing different tool with same file")
response3, _ = self.call_mcp_tool(
"codereview",
{
"files": [validation_file],
"prompt": "Review this configuration file",
"model": "flash",
},
)
if response3:
self.logger.info(" ✅ Different tool with same file completed")
else:
self.logger.warning(" ⚠️ Different tool failed")
# Validate file processing behavior from server logs
self.logger.info(" 4: Validating file processing logs")
logs = self.get_server_logs_since(start_time)
# Check for proper file embedding logs
embedding_logs = [
line
for line in logs.split("\n")
if "[FILE_PROCESSING]" in line or "embedding" in line.lower() or "[FILES]" in line
]
# Check for deduplication evidence
deduplication_logs = [
line
for line in logs.split("\n")
if ("skipping" in line.lower() and "already in conversation" in line.lower())
or "No new files to embed" in line
]
# Check for file processing patterns
new_file_logs = [
line
for line in logs.split("\n")
if "will embed new files" in line or "New conversation" in line or "[FILE_PROCESSING]" in line
]
# Validation criteria
validation_file_mentioned = any("validation_config.py" in line for line in logs.split("\n"))
embedding_found = len(embedding_logs) > 0
(len(deduplication_logs) > 0 or len(new_file_logs) >= 2) # Should see new conversation patterns
self.logger.info(f" Embedding logs found: {len(embedding_logs)}")
self.logger.info(f" Deduplication evidence: {len(deduplication_logs)}")
self.logger.info(f" New conversation patterns: {len(new_file_logs)}")
self.logger.info(f" Validation file mentioned: {validation_file_mentioned}")
# Log sample evidence for debugging
if self.verbose and embedding_logs:
self.logger.debug(" 📋 Sample embedding logs:")
for log in embedding_logs[:5]:
self.logger.debug(f" {log}")
# Success criteria
success_criteria = [
("Embedding logs found", embedding_found),
("File processing evidence", validation_file_mentioned),
("Multiple tool calls", len(new_file_logs) >= 2),
]
passed_criteria = sum(1 for _, passed in success_criteria if passed)
self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}")
# Cleanup
os.remove(validation_file)
if passed_criteria >= 2: # At least 2 out of 3 criteria
self.logger.info(" ✅ File processing validation passed")
return True
else:
self.logger.error(" ❌ File processing validation failed")
return False
except Exception as e:
self.logger.error(f"Content validation test failed: {e}")
return False
finally:
self.cleanup_test_files()