Migration from Docker to Standalone Python Server (#73)

* Migration from docker to standalone server Migration handling Fixed tests Use simpler in-memory storage Support for concurrent logging to disk Simplified direct connections to localhost * Migration from docker / redis to standalone script Updated tests Updated run script Fixed requirements Use dotenv Ask if user would like to install MCP in Claude Desktop once Updated docs * More cleanup and references to docker removed * Cleanup * Comments * Fixed tests * Fix GitHub Actions workflow for standalone Python architecture - Install requirements-dev.txt for pytest and testing dependencies - Remove Docker setup from simulation tests (now standalone) - Simplify linting job to use requirements-dev.txt - Update simulation tests to run directly without Docker Fixes unit test failures in CI due to missing pytest dependency. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Remove simulation tests from GitHub Actions - Removed simulation-tests job that makes real API calls - Keep only unit tests (mocked, no API costs) and linting - Simulation tests should be run manually with real API keys - Reduces CI costs and complexity GitHub Actions now only runs: - Unit tests (569 tests, all mocked) - Code quality checks (ruff, black) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed tests * Fixed tests --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-18 23:41:22 +04:00
parent 9d72545ecd
commit 4151c3c3a5
121 changed files with 2842 additions and 3168 deletions
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -45,7 +45,7 @@ from pathlib import Path
 from typing import Callable, Optional

 from .file_types import BINARY_EXTENSIONS, CODE_EXTENSIONS, IMAGE_EXTENSIONS, TEXT_EXTENSIONS
-from .security_config import CONTAINER_WORKSPACE, EXCLUDED_DIRS, MCP_SIGNATURE_FILES, SECURITY_ROOT, WORKSPACE_ROOT
+from .security_config import EXCLUDED_DIRS, is_dangerous_path
 from .token_utils import DEFAULT_CONTEXT_WINDOW, estimate_tokens


@@ -92,44 +92,32 @@ def is_mcp_directory(path: Path) -> bool:
        path: Directory path to check

    Returns:
-        True if this appears to be the MCP directory
+        True if this is the MCP server directory or a subdirectory
    """
    if not path.is_dir():
        return False

-    # Check for multiple signature files to be sure
-    matches = 0
-    for sig_file in MCP_SIGNATURE_FILES:
-        if (path / sig_file).exists():
-            matches += 1
-            if matches >= 3:  # Require at least 3 matches to be certain
-                logger.info(f"Detected MCP directory at {path}, will exclude from scanning")
-                return True
-    return False
+    # Get the directory where the MCP server is running from
+    # __file__ is utils/file_utils.py, so parent.parent is the MCP root
+    mcp_server_dir = Path(__file__).parent.parent.resolve()
+
+    # Check if the given path is the MCP server directory or a subdirectory
+    try:
+        path.resolve().relative_to(mcp_server_dir)
+        logger.info(f"Detected MCP server directory at {path}, will exclude from scanning")
+        return True
+    except ValueError:
+        # Not a subdirectory of MCP server
+        return False


 def get_user_home_directory() -> Optional[Path]:
    """
-    Get the user's home directory based on environment variables.
-
-    In Docker, USER_HOME should be set to the mounted home path.
-    Outside Docker, we use Path.home() or environment variables.
+    Get the user's home directory.

    Returns:
-        User's home directory path or None if not determinable
+        User's home directory path
    """
-    # Check for explicit USER_HOME env var (set in docker-compose.yml)
-    user_home = os.environ.get("USER_HOME")
-    if user_home:
-        return Path(user_home).resolve()
-
-    # In container, check if we're running in Docker
-    if CONTAINER_WORKSPACE.exists():
-        # We're in Docker but USER_HOME not set - use WORKSPACE_ROOT as fallback
-        if WORKSPACE_ROOT:
-            return Path(WORKSPACE_ROOT).resolve()
-
-    # Outside Docker, use system home
    return Path.home()


@@ -291,155 +279,51 @@ def _add_line_numbers(content: str) -> str:
    return "\n".join(numbered_lines)


-def translate_path_for_environment(path_str: str) -> str:
-    """
-    Translate paths between host and container environments as needed.
-
-    This is the unified path translation function that should be used by all
-    tools and utilities throughout the codebase. It handles:
-    1. Docker host-to-container path translation (host paths -> /workspace/...)
-    2. Direct mode (no translation needed)
-    3. Internal server files (conf/custom_models.json)
-    4. Security validation and error handling
-
-    Docker Path Translation Logic:
-    - Input: /Users/john/project/src/file.py (host path from Claude)
-    - WORKSPACE_ROOT: /Users/john/project (host path in env var)
-    - Output: /workspace/src/file.py (container path for file operations)
-
-    Args:
-        path_str: Original path string from the client (absolute host path)
-
-    Returns:
-        Translated path appropriate for the current environment
-    """
-    # Handle built-in server config file - no translation needed
-    if _is_builtin_custom_models_config(path_str):
-        return path_str
-    if not WORKSPACE_ROOT or not WORKSPACE_ROOT.strip() or not CONTAINER_WORKSPACE.exists():
-        if path_str.startswith("/app/"):
-            # Convert Docker internal paths to local relative paths for standalone mode
-            relative_path = path_str[5:]  # Remove "/app/" prefix
-            if relative_path.startswith("/"):
-                relative_path = relative_path[1:]  # Remove leading slash if present
-            return "./" + relative_path
-        # No other translation needed for standalone mode
-        return path_str
-
-    # Check if the path is already a container path (starts with /workspace)
-    if path_str.startswith(str(CONTAINER_WORKSPACE) + "/") or path_str == str(CONTAINER_WORKSPACE):
-        # Path is already translated to container format, return as-is
-        return path_str
-
-    try:
-        # Use os.path.realpath for security - it resolves symlinks completely
-        # This prevents symlink attacks that could escape the workspace
-        real_workspace_root = Path(os.path.realpath(WORKSPACE_ROOT))
-        # For the host path, we can't use realpath if it doesn't exist in the container
-        # So we'll use Path().resolve(strict=False) instead
-        real_host_path = Path(path_str).resolve(strict=False)
-
-        # Security check: ensure the path is within the mounted workspace
-        # This prevents path traversal attacks (e.g., ../../../etc/passwd)
-        relative_path = real_host_path.relative_to(real_workspace_root)
-
-        # Construct the container path
-        container_path = CONTAINER_WORKSPACE / relative_path
-
-        # Log the translation for debugging (but not sensitive paths)
-        if str(container_path) != path_str:
-            logger.info(f"Translated host path to container: {path_str} -> {container_path}")
-
-        return str(container_path)
-
-    except ValueError:
-        # Path is not within the host's WORKSPACE_ROOT
-        # In Docker, we cannot access files outside the mounted volume
-        logger.warning(
-            f"Path '{path_str}' is outside the mounted workspace '{WORKSPACE_ROOT}'. "
-            f"Docker containers can only access files within the mounted directory."
-        )
-        # Return a clear error path that will fail gracefully
-        return f"/inaccessible/outside/mounted/volume{path_str}"
-    except Exception as e:
-        # Log unexpected errors but don't expose internal details to clients
-        logger.warning(f"Path translation failed for '{path_str}': {type(e).__name__}")
-        # Return a clear error path that will fail gracefully
-        return f"/inaccessible/translation/error{path_str}"
-
-
 def resolve_and_validate_path(path_str: str) -> Path:
    """
-    Resolves, translates, and validates a path against security policies.
+    Resolves and validates a path against security policies.

-    This is the primary security function that ensures all file access
-    is properly sandboxed. It enforces three critical policies:
-    1. Translate host paths to container paths if applicable (Docker environment)
-    2. All paths must be absolute (no ambiguity)
-    3. All paths must resolve to within PROJECT_ROOT (sandboxing)
+    This function ensures safe file access by:
+    1. Requiring absolute paths (no ambiguity)
+    2. Resolving symlinks to prevent deception
+    3. Blocking access to dangerous system directories

    Args:
        path_str: Path string (must be absolute)

    Returns:
-        Resolved Path object that is guaranteed to be within PROJECT_ROOT
+        Resolved Path object that is safe to access

    Raises:
        ValueError: If path is not absolute or otherwise invalid
-        PermissionError: If path is outside allowed directory
+        PermissionError: If path is in a dangerous location
    """
-    # Step 1: Translate Docker paths first (if applicable)
-    # This must happen before any other validation
-    translated_path_str = translate_path_for_environment(path_str)
+    # Step 1: Create a Path object
+    user_path = Path(path_str)

-    # Step 2: Create a Path object from the (potentially translated) path
-    user_path = Path(translated_path_str)
-
-    # Step 3: Security Policy - Require absolute paths
+    # Step 2: Security Policy - Require absolute paths
    # Relative paths could be interpreted differently depending on working directory
    if not user_path.is_absolute():
        raise ValueError(f"Relative paths are not supported. Please provide an absolute path.\nReceived: {path_str}")

-    # Step 4: Resolve the absolute path (follows symlinks, removes .. and .)
+    # Step 3: Resolve the absolute path (follows symlinks, removes .. and .)
    # This is critical for security as it reveals the true destination of symlinks
    resolved_path = user_path.resolve()

-    # Step 5: Security Policy - Ensure the resolved path is within PROJECT_ROOT
-    # This prevents directory traversal attacks (e.g., /project/../../../etc/passwd)
-    try:
-        resolved_path.relative_to(SECURITY_ROOT)
-    except ValueError:
-        # Provide detailed error for debugging while avoiding information disclosure
-        logger.warning(
-            f"Access denied - path outside workspace. "
-            f"Requested: {path_str}, Resolved: {resolved_path}, Workspace: {SECURITY_ROOT}"
-        )
+    # Step 4: Check against dangerous paths
+    if is_dangerous_path(resolved_path):
+        logger.warning(f"Access denied - dangerous path: {resolved_path}")
+        raise PermissionError(f"Access to system directory denied: {path_str}")
+
+    # Step 5: Check if it's the home directory root
+    if is_home_directory_root(resolved_path):
        raise PermissionError(
-            f"Path outside workspace: {path_str}\nWorkspace: {SECURITY_ROOT}\nResolved path: {resolved_path}"
+            f"Cannot scan entire home directory: {path_str}\n" f"Please specify a subdirectory within your home folder."
        )

    return resolved_path


-def translate_file_paths(file_paths: Optional[list[str]]) -> Optional[list[str]]:
-    """
-    Translate a list of file paths for the current environment.
-
-    This function should be used by all tools to consistently handle path translation
-    for file lists. It applies the unified path translation to each path in the list.
-
-    Args:
-        file_paths: List of file paths to translate, or None
-
-    Returns:
-        List of translated paths, or None if input was None
-    """
-    if not file_paths:
-        return file_paths
-
-    return [translate_path_for_environment(path) for path in file_paths]
-
-
 def expand_paths(paths: list[str], extensions: Optional[set[str]] = None) -> list[str]:
    """
    Expand paths to individual files, handling both files and directories.
@@ -474,23 +358,12 @@ def expand_paths(paths: list[str], extensions: Optional[set[str]] = None) -> lis

        # Safety checks for directory scanning
        if path_obj.is_dir():
-            resolved_workspace = SECURITY_ROOT.resolve()
-            resolved_path = path_obj.resolve()
-
-            # Check 1: Prevent reading entire workspace root
-            if resolved_path == resolved_workspace:
-                logger.warning(
-                    f"Ignoring request to read entire workspace directory: {path}. "
-                    f"Please specify individual files or subdirectories instead."
-                )
-                continue
-
-            # Check 2: Prevent scanning user's home directory root
+            # Check 1: Prevent scanning user's home directory root
            if is_home_directory_root(path_obj):
                logger.warning(f"Skipping home directory root: {path}. Please specify a project subdirectory instead.")
                continue

-            # Check 3: Skip if this is the MCP's own directory
+            # Check 2: Skip if this is the MCP's own directory
            if is_mcp_directory(path_obj):
                logger.info(
                    f"Skipping MCP server directory: {path}. The MCP server code is excluded from project scans."
@@ -573,15 +446,6 @@ def read_file_content(
        # Return error in a format that provides context to the AI
        logger.debug(f"[FILES] Path validation failed for {file_path}: {type(e).__name__}: {e}")
        error_msg = str(e)
-        # Add Docker-specific help if we're in Docker and path is inaccessible
-        if WORKSPACE_ROOT and CONTAINER_WORKSPACE.exists():
-            # We're in Docker
-            error_msg = (
-                f"File is outside the Docker mounted directory. "
-                f"When running in Docker, only files within the mounted workspace are accessible. "
-                f"Current mounted directory: {WORKSPACE_ROOT}. "
-                f"To access files in a different directory, please run Claude from that directory."
-            )
        content = f"\n--- ERROR ACCESSING FILE: {file_path} ---\nError: {error_msg}\n--- END FILE ---\n"
        tokens = estimate_tokens(content)
        logger.debug(f"[FILES] Returning error content for {file_path}: {tokens} tokens")
@@ -761,12 +625,10 @@ def estimate_file_tokens(file_path: str) -> int:
        Estimated token count for the file
    """
    try:
-        translated_path = translate_path_for_environment(file_path)
-
-        if not os.path.exists(translated_path) or not os.path.isfile(translated_path):
+        if not os.path.exists(file_path) or not os.path.isfile(file_path):
            return 0

-        file_size = os.path.getsize(translated_path)
+        file_size = os.path.getsize(file_path)

        # Get the appropriate ratio for this file type
        from .file_types import get_token_estimation_ratio
@@ -911,11 +773,10 @@ def read_json_file(file_path: str) -> Optional[dict]:
        Parsed JSON data as dict, or None if file doesn't exist or invalid
    """
    try:
-        translated_path = translate_path_for_environment(file_path)
-        if not os.path.exists(translated_path):
+        if not os.path.exists(file_path):
            return None

-        with open(translated_path, encoding="utf-8") as f:
+        with open(file_path, encoding="utf-8") as f:
            return json.load(f)
    except (json.JSONDecodeError, OSError):
        return None
@@ -934,10 +795,9 @@ def write_json_file(file_path: str, data: dict, indent: int = 2) -> bool:
        True if successful, False otherwise
    """
    try:
-        translated_path = translate_path_for_environment(file_path)
-        os.makedirs(os.path.dirname(translated_path), exist_ok=True)
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)

-        with open(translated_path, "w", encoding="utf-8") as f:
+        with open(file_path, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=indent, ensure_ascii=False)
        return True
    except (OSError, TypeError):
@@ -955,9 +815,8 @@ def get_file_size(file_path: str) -> int:
        File size in bytes, or 0 if file doesn't exist or error
    """
    try:
-        translated_path = translate_path_for_environment(file_path)
-        if os.path.exists(translated_path) and os.path.isfile(translated_path):
-            return os.path.getsize(translated_path)
+        if os.path.exists(file_path) and os.path.isfile(file_path):
+            return os.path.getsize(file_path)
        return 0
    except OSError:
        return 0
@@ -974,8 +833,7 @@ def ensure_directory_exists(file_path: str) -> bool:
        True if directory exists or was created, False on error
    """
    try:
-        translated_path = translate_path_for_environment(file_path)
-        directory = os.path.dirname(translated_path)
+        directory = os.path.dirname(file_path)
        if directory:
            os.makedirs(directory, exist_ok=True)
        return True
@@ -1010,15 +868,14 @@ def read_file_safely(file_path: str, max_size: int = 10 * 1024 * 1024) -> Option
        File content as string, or None if file too large or unreadable
    """
    try:
-        translated_path = translate_path_for_environment(file_path)
-        if not os.path.exists(translated_path) or not os.path.isfile(translated_path):
+        if not os.path.exists(file_path) or not os.path.isfile(file_path):
            return None

-        file_size = os.path.getsize(translated_path)
+        file_size = os.path.getsize(file_path)
        if file_size > max_size:
            return None

-        with open(translated_path, encoding="utf-8", errors="ignore") as f:
+        with open(file_path, encoding="utf-8", errors="ignore") as f:
            return f.read()
    except OSError:
        return None