Migration from Docker to Standalone Python Server (#73)
* Migration from docker to standalone server Migration handling Fixed tests Use simpler in-memory storage Support for concurrent logging to disk Simplified direct connections to localhost * Migration from docker / redis to standalone script Updated tests Updated run script Fixed requirements Use dotenv Ask if user would like to install MCP in Claude Desktop once Updated docs * More cleanup and references to docker removed * Cleanup * Comments * Fixed tests * Fix GitHub Actions workflow for standalone Python architecture - Install requirements-dev.txt for pytest and testing dependencies - Remove Docker setup from simulation tests (now standalone) - Simplify linting job to use requirements-dev.txt - Update simulation tests to run directly without Docker Fixes unit test failures in CI due to missing pytest dependency. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Remove simulation tests from GitHub Actions - Removed simulation-tests job that makes real API calls - Keep only unit tests (mocked, no API costs) and linting - Simulation tests should be run manually with real API keys - Reduces CI costs and complexity GitHub Actions now only runs: - Unit tests (569 tests, all mocked) - Code quality checks (ruff, black) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed tests * Fixed tests --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
9d72545ecd
commit
4151c3c3a5
@@ -8,16 +8,15 @@ Validates that:
|
||||
1. Files are embedded only once in conversation history
|
||||
2. Continuation calls don't re-read existing files
|
||||
3. New files are still properly embedded
|
||||
4. Docker logs show deduplication behavior
|
||||
4. Server logs show deduplication behavior
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .base_test import BaseSimulatorTest
|
||||
from .conversation_base_test import ConversationBaseTest
|
||||
|
||||
|
||||
class PerToolDeduplicationTest(BaseSimulatorTest):
|
||||
class PerToolDeduplicationTest(ConversationBaseTest):
|
||||
"""Test file deduplication for each individual tool"""
|
||||
|
||||
@property
|
||||
@@ -28,74 +27,16 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
||||
def test_description(self) -> str:
|
||||
return "File deduplication for individual tools"
|
||||
|
||||
def get_docker_logs_since(self, since_time: str) -> str:
|
||||
"""Get docker logs since a specific timestamp"""
|
||||
try:
|
||||
# Check both main server and log monitor for comprehensive logs
|
||||
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
|
||||
cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"]
|
||||
|
||||
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
|
||||
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
|
||||
|
||||
# Get the internal log files which have more detailed logging
|
||||
server_log_result = subprocess.run(
|
||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True
|
||||
)
|
||||
|
||||
activity_log_result = subprocess.run(
|
||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True
|
||||
)
|
||||
|
||||
# Combine all logs
|
||||
combined_logs = (
|
||||
result_server.stdout
|
||||
+ "\n"
|
||||
+ result_monitor.stdout
|
||||
+ "\n"
|
||||
+ server_log_result.stdout
|
||||
+ "\n"
|
||||
+ activity_log_result.stdout
|
||||
)
|
||||
return combined_logs
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get docker logs: {e}")
|
||||
return ""
|
||||
|
||||
# create_additional_test_file method now inherited from base class
|
||||
|
||||
def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
|
||||
"""Validate that logs show file deduplication behavior"""
|
||||
# Look for file embedding messages
|
||||
embedding_messages = [
|
||||
line for line in logs.split("\n") if "📁" in line and "embedding" in line and tool_name in line
|
||||
]
|
||||
|
||||
# Look for deduplication/filtering messages
|
||||
filtering_messages = [
|
||||
line for line in logs.split("\n") if "📁" in line and "Filtering" in line and tool_name in line
|
||||
]
|
||||
skipping_messages = [
|
||||
line for line in logs.split("\n") if "📁" in line and "skipping" in line and tool_name in line
|
||||
]
|
||||
|
||||
deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0
|
||||
|
||||
if deduplication_found:
|
||||
self.logger.info(f" ✅ {tool_name}: Found deduplication evidence in logs")
|
||||
for msg in filtering_messages + skipping_messages:
|
||||
self.logger.debug(f" 📁 {msg.strip()}")
|
||||
else:
|
||||
self.logger.warning(f" ⚠️ {tool_name}: No deduplication evidence found in logs")
|
||||
self.logger.debug(f" 📁 All embedding messages: {embedding_messages}")
|
||||
|
||||
return deduplication_found
|
||||
|
||||
def run_test(self) -> bool:
|
||||
"""Test file deduplication with realistic precommit/codereview workflow"""
|
||||
try:
|
||||
self.logger.info("📄 Test: Simplified file deduplication with precommit/codereview workflow")
|
||||
|
||||
# Setup test environment for conversation testing
|
||||
self.setUp()
|
||||
|
||||
# Setup test files
|
||||
self.setup_test_files()
|
||||
|
||||
@@ -126,7 +67,7 @@ def divide(x, y):
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
|
||||
response1, continuation_id = self.call_mcp_tool_direct("precommit", precommit_params)
|
||||
if not response1:
|
||||
self.logger.error(" ❌ Step 1: precommit tool failed")
|
||||
return False
|
||||
@@ -151,7 +92,7 @@ def divide(x, y):
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response2, _ = self.call_mcp_tool("codereview", codereview_params)
|
||||
response2, _ = self.call_mcp_tool_direct("codereview", codereview_params)
|
||||
if not response2:
|
||||
self.logger.error(" ❌ Step 2: codereview tool failed")
|
||||
return False
|
||||
@@ -181,16 +122,16 @@ def subtract(a, b):
|
||||
"model": "flash",
|
||||
}
|
||||
|
||||
response3, _ = self.call_mcp_tool("precommit", continue_params)
|
||||
response3, _ = self.call_mcp_tool_direct("precommit", continue_params)
|
||||
if not response3:
|
||||
self.logger.error(" ❌ Step 3: precommit continuation failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Step 3: precommit continuation completed")
|
||||
|
||||
# Validate results in docker logs
|
||||
# Validate results in server logs
|
||||
self.logger.info(" 📋 Validating conversation history and file deduplication...")
|
||||
logs = self.get_docker_logs_since(start_time)
|
||||
logs = self.get_server_logs_since(start_time)
|
||||
|
||||
# Check for conversation history building
|
||||
conversation_logs = [
|
||||
@@ -249,7 +190,7 @@ def subtract(a, b):
|
||||
return True
|
||||
else:
|
||||
self.logger.warning(" ⚠️ File deduplication workflow test: FAILED")
|
||||
self.logger.warning(" 💡 Check docker logs for detailed file embedding and continuation activity")
|
||||
self.logger.warning(" 💡 Check server logs for detailed file embedding and continuation activity")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user