feat: add review_pending_changes tool and enforce absolute path security

- Add new review_pending_changes tool for comprehensive pre-commit reviews - Implement filesystem sandboxing with MCP_PROJECT_ROOT - Enforce absolute paths for all file/directory operations - Add comprehensive git utilities for repository management - Update all tools to use centralized path validation - Add extensive test coverage for new features and security model - Update documentation with new tool and path requirements - Remove obsolete demo and guide files 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-09 12:42:18 +04:00
parent 00b365f108
commit 7ee610938b
23 changed files with 1369 additions and 486 deletions
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -5,9 +5,21 @@ File reading utilities with directory support and token management
 import os
 from pathlib import Path
 from typing import List, Optional, Tuple, Set
+import sys

 from .token_utils import estimate_tokens, MAX_CONTEXT_TOKENS

+# Get project root from environment or use current directory
+# This defines the sandbox directory where file access is allowed
+PROJECT_ROOT = Path(os.environ.get("MCP_PROJECT_ROOT", os.getcwd())).resolve()
+
+# Security: Prevent running with overly permissive root
+if str(PROJECT_ROOT) == "/":
+    raise RuntimeError(
+        "Security Error: MCP_PROJECT_ROOT cannot be set to '/'. "
+        "This would give access to the entire filesystem."
+    )
+

 # Common code file extensions
 CODE_EXTENSIONS = {
@@ -60,6 +72,46 @@ CODE_EXTENSIONS = {
 }


+def resolve_and_validate_path(path_str: str) -> Path:
+    """
+    Validates that a path is absolute and resolves it.
+
+    Args:
+        path_str: Path string (must be absolute)
+
+    Returns:
+        Resolved Path object
+
+    Raises:
+        ValueError: If path is not absolute
+        PermissionError: If path is outside allowed directory
+    """
+    # Create a Path object from the user-provided path
+    user_path = Path(path_str)
+
+    # Require absolute paths
+    if not user_path.is_absolute():
+        raise ValueError(
+            f"Relative paths are not supported. Please provide an absolute path.\n"
+            f"Received: {path_str}"
+        )
+
+    # Resolve the absolute path
+    resolved_path = user_path.resolve()
+
+    # Security check: ensure the resolved path is within PROJECT_ROOT
+    try:
+        resolved_path.relative_to(PROJECT_ROOT)
+    except ValueError:
+        raise PermissionError(
+            f"Path outside project root: {path_str}\n"
+            f"Project root: {PROJECT_ROOT}\n"
+            f"Resolved path: {resolved_path}"
+        )
+
+    return resolved_path
+
+
 def expand_paths(paths: List[str], extensions: Optional[Set[str]] = None) -> List[str]:
    """
    Expand paths to individual files, handling both files and directories.
@@ -78,7 +130,11 @@ def expand_paths(paths: List[str], extensions: Optional[Set[str]] = None) -> Lis
    seen = set()

    for path in paths:
-        path_obj = Path(path)
+        try:
+            path_obj = resolve_and_validate_path(path)
+        except (ValueError, PermissionError):
+            # Skip invalid paths
+            continue

        if not path_obj.exists():
            continue
@@ -121,13 +177,17 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> Tuple[str, i
    Read a single file and format it for Gemini.

    Args:
-        file_path: Path to file
+        file_path: Path to file (must be absolute)
        max_size: Maximum file size to read

    Returns:
        (formatted_content, estimated_tokens)
    """
-    path = Path(file_path)
+    try:
+        path = resolve_and_validate_path(file_path)
+    except (ValueError, PermissionError) as e:
+        content = f"\n--- ERROR ACCESSING FILE: {file_path} ---\nError: {str(e)}\n--- END FILE ---\n"
+        return content, estimate_tokens(content)

    try:
        # Check if path exists and is a file
--- a/utils/git_utils.py
+++ b/utils/git_utils.py
@@ -0,0 +1,164 @@
+"""
+Git utilities for finding repositories and generating diffs.
+"""
+
+import os
+import subprocess
+from typing import Dict, List, Optional, Tuple
+from pathlib import Path
+
+
+# Directories to ignore when searching for git repositories
+IGNORED_DIRS = {
+    "node_modules",
+    "__pycache__",
+    "venv",
+    "env",
+    "build",
+    "dist",
+    "target",
+    ".tox",
+    ".pytest_cache",
+}
+
+
+def find_git_repositories(start_path: str, max_depth: int = 5) -> List[str]:
+    """
+    Recursively find all git repositories starting from the given path.
+
+    Args:
+        start_path: Directory to start searching from
+        max_depth: Maximum depth to search (prevents excessive recursion)
+
+    Returns:
+        List of absolute paths to git repositories
+    """
+    repositories = []
+    start_path = Path(start_path).resolve()
+
+    def _find_repos(current_path: Path, current_depth: int):
+        if current_depth > max_depth:
+            return
+
+        try:
+            # Check if current directory is a git repo
+            git_dir = current_path / ".git"
+            if git_dir.exists() and git_dir.is_dir():
+                repositories.append(str(current_path))
+                # Don't search inside .git directory
+                return
+
+            # Search subdirectories
+            for item in current_path.iterdir():
+                if item.is_dir() and not item.name.startswith("."):
+                    # Skip common non-code directories
+                    if item.name in IGNORED_DIRS:
+                        continue
+                    _find_repos(item, current_depth + 1)
+
+        except PermissionError:
+            # Skip directories we can't access
+            pass
+
+    _find_repos(start_path, 0)
+    return sorted(repositories)
+
+
+def run_git_command(repo_path: str, command: List[str]) -> Tuple[bool, str]:
+    """
+    Run a git command in the specified repository.
+
+    Args:
+        repo_path: Path to the git repository
+        command: Git command as a list of arguments
+
+    Returns:
+        Tuple of (success, output/error)
+    """
+    try:
+        result = subprocess.run(
+            ["git"] + command, cwd=repo_path, capture_output=True, text=True, timeout=30
+        )
+
+        if result.returncode == 0:
+            return True, result.stdout
+        else:
+            return False, result.stderr
+
+    except subprocess.TimeoutExpired:
+        return False, "Command timed out"
+    except Exception as e:
+        return False, str(e)
+
+
+def get_git_status(repo_path: str) -> Dict[str, any]:
+    """
+    Get the current git status of a repository.
+
+    Args:
+        repo_path: Path to the git repository
+
+    Returns:
+        Dictionary with status information
+    """
+    status = {
+        "branch": "",
+        "ahead": 0,
+        "behind": 0,
+        "staged_files": [],
+        "unstaged_files": [],
+        "untracked_files": [],
+    }
+
+    # Get current branch
+    success, branch = run_git_command(repo_path, ["branch", "--show-current"])
+    if success:
+        status["branch"] = branch.strip()
+
+    # Get ahead/behind info
+    if status["branch"]:
+        success, ahead_behind = run_git_command(
+            repo_path,
+            [
+                "rev-list",
+                "--count",
+                "--left-right",
+                f'{status["branch"]}@{{upstream}}...HEAD',
+            ],
+        )
+        if success:
+            if ahead_behind.strip():
+                parts = ahead_behind.strip().split()
+                if len(parts) == 2:
+                    status["behind"] = int(parts[0])
+                    status["ahead"] = int(parts[1])
+        # else: Could not get ahead/behind status (branch may not have upstream)
+
+    # Get file status
+    success, status_output = run_git_command(repo_path, ["status", "--porcelain"])
+    if success:
+        for line in status_output.strip().split("\n"):
+            if not line:
+                continue
+
+            status_code = line[:2]
+            path_info = line[3:]
+
+            # Handle staged changes
+            if status_code[0] == "R":
+                # Format is "old_path -> new_path" for renamed files
+                if " -> " in path_info:
+                    _, new_path = path_info.split(" -> ", 1)
+                    status["staged_files"].append(new_path)
+                else:
+                    status["staged_files"].append(path_info)
+            elif status_code[0] in ["M", "A", "D", "C"]:
+                status["staged_files"].append(path_info)
+
+            # Handle unstaged changes
+            if status_code[1] in ["M", "D"]:
+                status["unstaged_files"].append(path_info)
+            elif status_code == "??":
+                status["untracked_files"].append(path_info)
+
+    return status