Migration from Docker to Standalone Python Server (#73)

* Migration from docker to standalone server
Migration handling
Fixed tests
Use simpler in-memory storage
Support for concurrent logging to disk
Simplified direct connections to localhost

* Migration from docker / redis to standalone script
Updated tests
Updated run script
Fixed requirements
Use dotenv
Ask if user would like to install MCP in Claude Desktop once
Updated docs

* More cleanup and references to docker removed

* Cleanup

* Comments

* Fixed tests

* Fix GitHub Actions workflow for standalone Python architecture

- Install requirements-dev.txt for pytest and testing dependencies
- Remove Docker setup from simulation tests (now standalone)
- Simplify linting job to use requirements-dev.txt
- Update simulation tests to run directly without Docker

Fixes unit test failures in CI due to missing pytest dependency.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Remove simulation tests from GitHub Actions

- Removed simulation-tests job that makes real API calls
- Keep only unit tests (mocked, no API costs) and linting
- Simulation tests should be run manually with real API keys
- Reduces CI costs and complexity

GitHub Actions now only runs:
- Unit tests (569 tests, all mocked)
- Code quality checks (ruff, black)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Fixed tests

* Fixed tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Beehive Innovations
2025-06-18 23:41:22 +04:00
committed by GitHub
parent 9d72545ecd
commit 4151c3c3a5
121 changed files with 2842 additions and 3168 deletions

View File

@@ -8,16 +8,15 @@ Validates that:
1. Files are embedded only once in conversation history
2. Continuation calls don't re-read existing files
3. New files are still properly embedded
4. Docker logs show deduplication behavior
4. Server logs show deduplication behavior
"""
import os
import subprocess
from .base_test import BaseSimulatorTest
from .conversation_base_test import ConversationBaseTest
class PerToolDeduplicationTest(BaseSimulatorTest):
class PerToolDeduplicationTest(ConversationBaseTest):
"""Test file deduplication for each individual tool"""
@property
@@ -28,74 +27,16 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
def test_description(self) -> str:
return "File deduplication for individual tools"
def get_docker_logs_since(self, since_time: str) -> str:
"""Get docker logs since a specific timestamp"""
try:
# Check both main server and log monitor for comprehensive logs
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"]
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
# Get the internal log files which have more detailed logging
server_log_result = subprocess.run(
["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True
)
activity_log_result = subprocess.run(
["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True
)
# Combine all logs
combined_logs = (
result_server.stdout
+ "\n"
+ result_monitor.stdout
+ "\n"
+ server_log_result.stdout
+ "\n"
+ activity_log_result.stdout
)
return combined_logs
except Exception as e:
self.logger.error(f"Failed to get docker logs: {e}")
return ""
# create_additional_test_file method now inherited from base class
def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
"""Validate that logs show file deduplication behavior"""
# Look for file embedding messages
embedding_messages = [
line for line in logs.split("\n") if "📁" in line and "embedding" in line and tool_name in line
]
# Look for deduplication/filtering messages
filtering_messages = [
line for line in logs.split("\n") if "📁" in line and "Filtering" in line and tool_name in line
]
skipping_messages = [
line for line in logs.split("\n") if "📁" in line and "skipping" in line and tool_name in line
]
deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0
if deduplication_found:
self.logger.info(f"{tool_name}: Found deduplication evidence in logs")
for msg in filtering_messages + skipping_messages:
self.logger.debug(f" 📁 {msg.strip()}")
else:
self.logger.warning(f" ⚠️ {tool_name}: No deduplication evidence found in logs")
self.logger.debug(f" 📁 All embedding messages: {embedding_messages}")
return deduplication_found
def run_test(self) -> bool:
"""Test file deduplication with realistic precommit/codereview workflow"""
try:
self.logger.info("📄 Test: Simplified file deduplication with precommit/codereview workflow")
# Setup test environment for conversation testing
self.setUp()
# Setup test files
self.setup_test_files()
@@ -126,7 +67,7 @@ def divide(x, y):
"model": "flash",
}
response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
response1, continuation_id = self.call_mcp_tool_direct("precommit", precommit_params)
if not response1:
self.logger.error(" ❌ Step 1: precommit tool failed")
return False
@@ -151,7 +92,7 @@ def divide(x, y):
"model": "flash",
}
response2, _ = self.call_mcp_tool("codereview", codereview_params)
response2, _ = self.call_mcp_tool_direct("codereview", codereview_params)
if not response2:
self.logger.error(" ❌ Step 2: codereview tool failed")
return False
@@ -181,16 +122,16 @@ def subtract(a, b):
"model": "flash",
}
response3, _ = self.call_mcp_tool("precommit", continue_params)
response3, _ = self.call_mcp_tool_direct("precommit", continue_params)
if not response3:
self.logger.error(" ❌ Step 3: precommit continuation failed")
return False
self.logger.info(" ✅ Step 3: precommit continuation completed")
# Validate results in docker logs
# Validate results in server logs
self.logger.info(" 📋 Validating conversation history and file deduplication...")
logs = self.get_docker_logs_since(start_time)
logs = self.get_server_logs_since(start_time)
# Check for conversation history building
conversation_logs = [
@@ -249,7 +190,7 @@ def subtract(a, b):
return True
else:
self.logger.warning(" ⚠️ File deduplication workflow test: FAILED")
self.logger.warning(" 💡 Check docker logs for detailed file embedding and continuation activity")
self.logger.warning(" 💡 Check server logs for detailed file embedding and continuation activity")
return False
except Exception as e: