Migration from Docker to Standalone Python Server (#73)

* Migration from docker to standalone server Migration handling Fixed tests Use simpler in-memory storage Support for concurrent logging to disk Simplified direct connections to localhost * Migration from docker / redis to standalone script Updated tests Updated run script Fixed requirements Use dotenv Ask if user would like to install MCP in Claude Desktop once Updated docs * More cleanup and references to docker removed * Cleanup * Comments * Fixed tests * Fix GitHub Actions workflow for standalone Python architecture - Install requirements-dev.txt for pytest and testing dependencies - Remove Docker setup from simulation tests (now standalone) - Simplify linting job to use requirements-dev.txt - Update simulation tests to run directly without Docker Fixes unit test failures in CI due to missing pytest dependency. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Remove simulation tests from GitHub Actions - Removed simulation-tests job that makes real API calls - Keep only unit tests (mocked, no API costs) and linting - Simulation tests should be run manually with real API keys - Reduces CI costs and complexity GitHub Actions now only runs: - Unit tests (569 tests, all mocked) - Code quality checks (ruff, black) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed tests * Fixed tests --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-18 23:41:22 +04:00
parent 9d72545ecd
commit 4151c3c3a5
121 changed files with 2842 additions and 3168 deletions
--- a/simulator_tests/test_per_tool_deduplication.py
+++ b/simulator_tests/test_per_tool_deduplication.py
@@ -8,16 +8,15 @@ Validates that:
 1. Files are embedded only once in conversation history
 2. Continuation calls don't re-read existing files
 3. New files are still properly embedded
-4. Docker logs show deduplication behavior
+4. Server logs show deduplication behavior
 """

 import os
-import subprocess

-from .base_test import BaseSimulatorTest
+from .conversation_base_test import ConversationBaseTest


-class PerToolDeduplicationTest(BaseSimulatorTest):
+class PerToolDeduplicationTest(ConversationBaseTest):
    """Test file deduplication for each individual tool"""

    @property
@@ -28,74 +27,16 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
    def test_description(self) -> str:
        return "File deduplication for individual tools"

-    def get_docker_logs_since(self, since_time: str) -> str:
-        """Get docker logs since a specific timestamp"""
-        try:
-            # Check both main server and log monitor for comprehensive logs
-            cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
-            cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"]
-
-            result_server = subprocess.run(cmd_server, capture_output=True, text=True)
-            result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
-
-            # Get the internal log files which have more detailed logging
-            server_log_result = subprocess.run(
-                ["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True
-            )
-
-            activity_log_result = subprocess.run(
-                ["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True
-            )
-
-            # Combine all logs
-            combined_logs = (
-                result_server.stdout
-                + "\n"
-                + result_monitor.stdout
-                + "\n"
-                + server_log_result.stdout
-                + "\n"
-                + activity_log_result.stdout
-            )
-            return combined_logs
-        except Exception as e:
-            self.logger.error(f"Failed to get docker logs: {e}")
-            return ""
-
    # create_additional_test_file method now inherited from base class

-    def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
-        """Validate that logs show file deduplication behavior"""
-        # Look for file embedding messages
-        embedding_messages = [
-            line for line in logs.split("\n") if "📁" in line and "embedding" in line and tool_name in line
-        ]
-
-        # Look for deduplication/filtering messages
-        filtering_messages = [
-            line for line in logs.split("\n") if "📁" in line and "Filtering" in line and tool_name in line
-        ]
-        skipping_messages = [
-            line for line in logs.split("\n") if "📁" in line and "skipping" in line and tool_name in line
-        ]
-
-        deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0
-
-        if deduplication_found:
-            self.logger.info(f"  ✅ {tool_name}: Found deduplication evidence in logs")
-            for msg in filtering_messages + skipping_messages:
-                self.logger.debug(f"    📁 {msg.strip()}")
-        else:
-            self.logger.warning(f"  ⚠️ {tool_name}: No deduplication evidence found in logs")
-            self.logger.debug(f"  📁 All embedding messages: {embedding_messages}")
-
-        return deduplication_found
-
    def run_test(self) -> bool:
        """Test file deduplication with realistic precommit/codereview workflow"""
        try:
            self.logger.info("📄 Test: Simplified file deduplication with precommit/codereview workflow")

+            # Setup test environment for conversation testing
+            self.setUp()
+
            # Setup test files
            self.setup_test_files()

@@ -126,7 +67,7 @@ def divide(x, y):
                "model": "flash",
            }

-            response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
+            response1, continuation_id = self.call_mcp_tool_direct("precommit", precommit_params)
            if not response1:
                self.logger.error("  ❌ Step 1: precommit tool failed")
                return False
@@ -151,7 +92,7 @@ def divide(x, y):
                "model": "flash",
            }

-            response2, _ = self.call_mcp_tool("codereview", codereview_params)
+            response2, _ = self.call_mcp_tool_direct("codereview", codereview_params)
            if not response2:
                self.logger.error("  ❌ Step 2: codereview tool failed")
                return False
@@ -181,16 +122,16 @@ def subtract(a, b):
                "model": "flash",
            }

-            response3, _ = self.call_mcp_tool("precommit", continue_params)
+            response3, _ = self.call_mcp_tool_direct("precommit", continue_params)
            if not response3:
                self.logger.error("  ❌ Step 3: precommit continuation failed")
                return False

            self.logger.info("  ✅ Step 3: precommit continuation completed")

-            # Validate results in docker logs
+            # Validate results in server logs
            self.logger.info("  📋 Validating conversation history and file deduplication...")
-            logs = self.get_docker_logs_since(start_time)
+            logs = self.get_server_logs_since(start_time)

            # Check for conversation history building
            conversation_logs = [
@@ -249,7 +190,7 @@ def subtract(a, b):
                return True
            else:
                self.logger.warning("  ⚠️ File deduplication workflow test: FAILED")
-                self.logger.warning("  💡 Check docker logs for detailed file embedding and continuation activity")
+                self.logger.warning("  💡 Check server logs for detailed file embedding and continuation activity")
                return False

        except Exception as e: