From 545338ca2303ced9fab429b05c52e79a74eaf7ab Mon Sep 17 00:00:00 2001 From: Fahad Date: Mon, 9 Jun 2025 06:00:25 +0400 Subject: [PATCH] feat: add full directory support and smart file handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major improvements to file handling capabilities: - Add directory traversal support to all file-processing tools - Tools now accept both individual files and entire directories - Automatically expand directories and discover code files recursively - Smart filtering: skip hidden files, __pycache__, and non-code files - Progressive token loading: read as many files as possible within limits - Clear file separation markers with full paths for Gemini Key changes: - Rewrite file_utils.py with expand_paths() and improved read_files() - Update all tool descriptions to indicate directory support - Add comprehensive tests for directory handling and token limits - Document tool parameters and examples in README - Bump version to 2.4.2 All tools (analyze, review_code, debug_issue, think_deeper) now support: - Single files: "analyze main.py" - Directories: "review src/" - Mixed paths: "analyze config.py, src/, tests/" This enables analyzing entire projects or specific subsystems efficiently while respecting token limits and providing clear file boundaries. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- README.md | 62 ++++++++++++ config.py | 2 +- tests/test_utils.py | 126 +++++++++++++++++++++++- tools/analyze.py | 5 +- tools/debug_issue.py | 2 +- tools/review_code.py | 5 +- tools/think_deeper.py | 2 +- utils/__init__.py | 4 +- utils/file_utils.py | 222 +++++++++++++++++++++++++++++++++++------- 9 files changed, 384 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 7a90454..cc3eb4d 100644 --- a/README.md +++ b/README.md @@ -257,6 +257,68 @@ Just ask Claude naturally: "Get gemini to show server configuration" ``` +## Tool Parameters + +All tools that work with files now support **both individual files and entire directories**. The server automatically expands directories, filters for relevant code files, and manages token limits. + +### File-Processing Tools + +**`analyze`** - Analyze files or directories +- `files`: List of file paths or directories (required) +- `question`: What to analyze (required) +- `analysis_type`: architecture|performance|security|quality|general +- `output_format`: summary|detailed|actionable + +``` +"Use gemini to analyze the src/ directory for architectural patterns" +"Get gemini to analyze main.py and tests/ to understand test coverage" +``` + +**`review_code`** - Review code files or directories +- `files`: List of file paths or directories (required) +- `review_type`: full|security|performance|quick +- `focus_on`: Specific aspects to focus on +- `standards`: Coding standards to enforce +- `severity_filter`: critical|high|medium|all + +``` +"Use gemini to review the entire api/ directory for security issues" +"Get gemini to review src/ with focus on performance, only show critical issues" +``` + +**`debug_issue`** - Debug with file context +- `error_description`: Description of the issue (required) +- `error_context`: Stack trace or logs +- `relevant_files`: Files or directories related to the issue +- `runtime_info`: Environment details +- `previous_attempts`: What you've tried + +``` +"Use gemini to debug this error with context from the entire backend/ directory" +``` + +**`think_deeper`** - Extended analysis with file context +- `current_analysis`: Your current thinking (required) +- `problem_context`: Additional context +- `focus_areas`: Specific aspects to focus on +- `reference_files`: Files or directories for context + +``` +"Use gemini to think deeper about my design with reference to the src/models/ directory" +``` + +### Directory Support Features + +- **Automatic Expansion**: Directories are recursively scanned for code files +- **Smart Filtering**: Hidden files, caches, and non-code files are automatically excluded +- **Token Management**: Loads as many files as possible within token limits +- **Clear Markers**: Each file is marked with full path for Gemini to distinguish + +Example with mixed paths: +``` +"Use gemini to analyze config.py, src/, and tests/unit/ to understand the testing strategy" +``` + ## Collaborative Workflows ### Design → Review → Implement diff --git a/config.py b/config.py index bd13be8..dacbc2d 100644 --- a/config.py +++ b/config.py @@ -3,7 +3,7 @@ Configuration and constants for Gemini MCP Server """ # Version and metadata -__version__ = "2.4.1" +__version__ = "2.4.2" __updated__ = "2025-06-09" __author__ = "Fahad Gilani" diff --git a/tests/test_utils.py b/tests/test_utils.py index 9bede78..8bd2c12 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -16,23 +16,26 @@ class TestFileUtils: "def hello():\n return 'world'", encoding="utf-8" ) - content = read_file_content(str(test_file)) + content, tokens = read_file_content(str(test_file)) assert "--- BEGIN FILE:" in content assert "--- END FILE:" in content assert "def hello():" in content assert "return 'world'" in content + assert tokens > 0 # Should have estimated tokens def test_read_file_content_not_found(self): """Test reading non-existent file""" - content = read_file_content("/nonexistent/file.py") + content, tokens = read_file_content("/nonexistent/file.py") assert "--- FILE NOT FOUND:" in content assert "Error: File does not exist" in content + assert tokens > 0 def test_read_file_content_directory(self, tmp_path): """Test reading a directory""" - content = read_file_content(str(tmp_path)) + content, tokens = read_file_content(str(tmp_path)) assert "--- NOT A FILE:" in content assert "Error: Path is not a file" in content + assert tokens > 0 def test_read_files_multiple(self, tmp_path): """Test reading multiple files""" @@ -49,7 +52,7 @@ class TestFileUtils: assert "print('file1')" in content assert "print('file2')" in content - assert "Reading 2 file(s)" in summary + assert "Read 2 file(s)" in summary def test_read_files_with_code(self): """Test reading with direct code""" @@ -62,6 +65,121 @@ class TestFileUtils: assert "Direct code:" in summary + def test_read_files_directory_support(self, tmp_path): + """Test reading all files from a directory""" + # Create directory structure + (tmp_path / "file1.py").write_text("print('file1')", encoding="utf-8") + (tmp_path / "file2.js").write_text("console.log('file2')", encoding="utf-8") + (tmp_path / "readme.md").write_text("# README", encoding="utf-8") + + # Create subdirectory + subdir = tmp_path / "src" + subdir.mkdir() + (subdir / "module.py").write_text("class Module: pass", encoding="utf-8") + + # Create hidden file (should be skipped) + (tmp_path / ".hidden").write_text("secret", encoding="utf-8") + + # Read the directory + content, summary = read_files([str(tmp_path)]) + + # Check files are included + assert "file1.py" in content + assert "file2.js" in content + assert "readme.md" in content + assert "src/module.py" in content + + # Check content + assert "print('file1')" in content + assert "console.log('file2')" in content + assert "# README" in content + assert "class Module: pass" in content + + # Hidden file should not be included + assert ".hidden" not in content + assert "secret" not in content + + # Check summary + assert "Processed 1 dir(s)" in summary + assert "Read 4 file(s)" in summary + + def test_read_files_mixed_paths(self, tmp_path): + """Test reading mix of files and directories""" + # Create files + file1 = tmp_path / "direct.py" + file1.write_text("# Direct file", encoding="utf-8") + + # Create directory with files + subdir = tmp_path / "subdir" + subdir.mkdir() + (subdir / "sub1.py").write_text("# Sub file 1", encoding="utf-8") + (subdir / "sub2.py").write_text("# Sub file 2", encoding="utf-8") + + # Read mix of direct file and directory + content, summary = read_files([str(file1), str(subdir)]) + + assert "direct.py" in content + assert "sub1.py" in content + assert "sub2.py" in content + assert "# Direct file" in content + assert "# Sub file 1" in content + assert "# Sub file 2" in content + + assert "Processed 1 dir(s)" in summary + assert "Read 3 file(s)" in summary + + def test_read_files_token_limit(self, tmp_path): + """Test token limit handling""" + # Create files with known token counts + # ~250 tokens each (1000 chars) + large_content = "x" * 1000 + + for i in range(5): + (tmp_path / f"file{i}.txt").write_text(large_content, encoding="utf-8") + + # Read with small token limit (should skip some files) + # Reserve 50k tokens, limit to 51k total = 1k available + # Each file ~250 tokens, so should read ~3-4 files + content, summary = read_files([str(tmp_path)], max_tokens=51_000) + + assert "Skipped" in summary + assert "token limit" in summary + assert "--- SKIPPED FILES (TOKEN LIMIT) ---" in content + + # Count how many files were read + read_count = content.count("--- BEGIN FILE:") + assert 2 <= read_count <= 4 # Should read some but not all + + def test_read_files_large_file(self, tmp_path): + """Test handling of large files""" + # Create a file larger than max_size (1MB) + large_file = tmp_path / "large.txt" + large_file.write_text("x" * 2_000_000, encoding="utf-8") # 2MB + + content, summary = read_files([str(large_file)]) + + assert "--- FILE TOO LARGE:" in content + assert "2,000,000 bytes" in content + assert "Read 1 file(s)" in summary # File is counted but shows error message + + def test_read_files_file_extensions(self, tmp_path): + """Test file extension filtering""" + # Create various file types + (tmp_path / "code.py").write_text("python", encoding="utf-8") + (tmp_path / "style.css").write_text("css", encoding="utf-8") + (tmp_path / "binary.exe").write_text("exe", encoding="utf-8") + (tmp_path / "image.jpg").write_text("jpg", encoding="utf-8") + + content, summary = read_files([str(tmp_path)]) + + # Code files should be included + assert "code.py" in content + assert "style.css" in content + + # Binary files should not be included (not in CODE_EXTENSIONS) + assert "binary.exe" not in content + assert "image.jpg" not in content + class TestTokenUtils: """Test token counting utilities""" diff --git a/tools/analyze.py b/tools/analyze.py index 8af4526..1c11683 100644 --- a/tools/analyze.py +++ b/tools/analyze.py @@ -16,7 +16,7 @@ from .base import BaseTool, ToolRequest class AnalyzeRequest(ToolRequest): """Request model for analyze tool""" - files: List[str] = Field(..., description="Files to analyze") + files: List[str] = Field(..., description="Files or directories to analyze") question: str = Field(..., description="What to analyze or look for") analysis_type: Optional[str] = Field( None, @@ -36,6 +36,7 @@ class AnalyzeTool(BaseTool): def get_description(self) -> str: return ( "ANALYZE FILES & CODE - General-purpose analysis for understanding code. " + "Supports both individual files and entire directories. " "Use this for examining files, understanding architecture, or investigating specific aspects. " "Triggers: 'analyze these files', 'examine this code', 'understand this'. " "Perfect for: codebase exploration, dependency analysis, pattern detection. " @@ -49,7 +50,7 @@ class AnalyzeTool(BaseTool): "files": { "type": "array", "items": {"type": "string"}, - "description": "Files to analyze", + "description": "Files or directories to analyze", }, "question": { "type": "string", diff --git a/tools/debug_issue.py b/tools/debug_issue.py index 4cef14d..1566e14 100644 --- a/tools/debug_issue.py +++ b/tools/debug_issue.py @@ -23,7 +23,7 @@ class DebugIssueRequest(ToolRequest): None, description="Stack trace, logs, or additional error context" ) relevant_files: Optional[List[str]] = Field( - None, description="Files that might be related to the issue" + None, description="Files or directories that might be related to the issue" ) runtime_info: Optional[str] = Field( None, description="Environment, versions, or runtime information" diff --git a/tools/review_code.py b/tools/review_code.py index 0fca490..5b12280 100644 --- a/tools/review_code.py +++ b/tools/review_code.py @@ -16,7 +16,7 @@ from .base import BaseTool, ToolRequest class ReviewCodeRequest(ToolRequest): """Request model for review_code tool""" - files: List[str] = Field(..., description="Code files to review") + files: List[str] = Field(..., description="Code files or directories to review") review_type: str = Field( "full", description="Type of review: full|security|performance|quick" ) @@ -41,6 +41,7 @@ class ReviewCodeTool(BaseTool): def get_description(self) -> str: return ( "PROFESSIONAL CODE REVIEW - Comprehensive analysis for bugs, security, and quality. " + "Supports both individual files and entire directories/projects. " "Use this for thorough code review with actionable feedback. " "Triggers: 'review this code', 'check for issues', 'find bugs', 'security audit'. " "I'll identify issues by severity (Critical→High→Medium→Low) with specific fixes. " @@ -54,7 +55,7 @@ class ReviewCodeTool(BaseTool): "files": { "type": "array", "items": {"type": "string"}, - "description": "Code files to review", + "description": "Code files or directories to review", }, "review_type": { "type": "string", diff --git a/tools/think_deeper.py b/tools/think_deeper.py index 793be57..6e1e8c1 100644 --- a/tools/think_deeper.py +++ b/tools/think_deeper.py @@ -27,7 +27,7 @@ class ThinkDeeperRequest(ToolRequest): description="Specific aspects to focus on (architecture, performance, security, etc.)", ) reference_files: Optional[List[str]] = Field( - None, description="Optional file paths for additional context" + None, description="Optional file paths or directories for additional context" ) diff --git a/utils/__init__.py b/utils/__init__.py index c7dc6af..7fee1d5 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -2,12 +2,14 @@ Utility functions for Gemini MCP Server """ -from .file_utils import read_file_content, read_files +from .file_utils import read_file_content, read_files, expand_paths, CODE_EXTENSIONS from .token_utils import check_token_limit, estimate_tokens __all__ = [ "read_files", "read_file_content", + "expand_paths", + "CODE_EXTENSIONS", "estimate_tokens", "check_token_limit", ] diff --git a/utils/file_utils.py b/utils/file_utils.py index 9b79007..3f18ab0 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -1,63 +1,217 @@ """ -File reading utilities +File reading utilities with directory support and token management """ +import os from pathlib import Path -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Set + +from .token_utils import estimate_tokens, MAX_CONTEXT_TOKENS -def read_file_content(file_path: str) -> str: - """Read a single file and format it for Gemini""" +# Common code file extensions +CODE_EXTENSIONS = { + '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h', '.hpp', + '.cs', '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.r', '.m', + '.mm', '.sql', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd', + '.yml', '.yaml', '.json', '.xml', '.toml', '.ini', '.cfg', '.conf', + '.txt', '.md', '.rst', '.tex', '.html', '.css', '.scss', '.sass', '.less' +} + + +def expand_paths(paths: List[str], extensions: Optional[Set[str]] = None) -> List[str]: + """ + Expand paths to individual files, handling both files and directories. + + Args: + paths: List of file or directory paths + extensions: Optional set of file extensions to include + + Returns: + List of individual file paths + """ + if extensions is None: + extensions = CODE_EXTENSIONS + + expanded_files = [] + seen = set() + + for path in paths: + path_obj = Path(path) + + if not path_obj.exists(): + continue + + if path_obj.is_file(): + # Add file directly + if str(path_obj) not in seen: + expanded_files.append(str(path_obj)) + seen.add(str(path_obj)) + + elif path_obj.is_dir(): + # Walk directory recursively + for root, dirs, files in os.walk(path_obj): + # Skip hidden directories and __pycache__ + dirs[:] = [d for d in dirs if not d.startswith('.') and d != '__pycache__'] + + for file in files: + # Skip hidden files + if file.startswith('.'): + continue + + file_path = Path(root) / file + + # Check extension + if not extensions or file_path.suffix.lower() in extensions: + full_path = str(file_path) + if full_path not in seen: + expanded_files.append(full_path) + seen.add(full_path) + + # Sort for consistent ordering + expanded_files.sort() + return expanded_files + + +def read_file_content(file_path: str, max_size: int = 1_000_000) -> Tuple[str, int]: + """ + Read a single file and format it for Gemini. + + Args: + file_path: Path to file + max_size: Maximum file size to read + + Returns: + (formatted_content, estimated_tokens) + """ path = Path(file_path) try: # Check if path exists and is a file if not path.exists(): - return f"\n--- FILE NOT FOUND: {file_path} ---\nError: File does not exist\n--- END FILE ---\n" + content = f"\n--- FILE NOT FOUND: {file_path} ---\nError: File does not exist\n--- END FILE ---\n" + return content, estimate_tokens(content) if not path.is_file(): - return f"\n--- NOT A FILE: {file_path} ---\nError: Path is not a file\n--- END FILE ---\n" + content = f"\n--- NOT A FILE: {file_path} ---\nError: Path is not a file\n--- END FILE ---\n" + return content, estimate_tokens(content) + + # Check file size + file_size = path.stat().st_size + if file_size > max_size: + content = f"\n--- FILE TOO LARGE: {file_path} ---\nFile size: {file_size:,} bytes (max: {max_size:,})\n--- END FILE ---\n" + return content, estimate_tokens(content) # Read the file - with open(path, "r", encoding="utf-8") as f: - content = f.read() + with open(path, "r", encoding="utf-8", errors="replace") as f: + file_content = f.read() # Format with clear delimiters for Gemini - return f"\n--- BEGIN FILE: {file_path} ---\n{content}\n--- END FILE: {file_path} ---\n" + formatted = f"\n--- BEGIN FILE: {file_path} ---\n{file_content}\n--- END FILE: {file_path} ---\n" + return formatted, estimate_tokens(formatted) except Exception as e: - return f"\n--- ERROR READING FILE: {file_path} ---\nError: {str(e)}\n--- END FILE ---\n" + content = f"\n--- ERROR READING FILE: {file_path} ---\nError: {str(e)}\n--- END FILE ---\n" + return content, estimate_tokens(content) def read_files( - file_paths: List[str], code: Optional[str] = None + file_paths: List[str], + code: Optional[str] = None, + max_tokens: Optional[int] = None, + reserve_tokens: int = 50_000 ) -> Tuple[str, str]: """ - Read multiple files and optional direct code. - Returns: (full_content, brief_summary) + Read multiple files and optional direct code with smart token management. + + Args: + file_paths: List of file or directory paths + code: Optional direct code to include + max_tokens: Maximum tokens to use (defaults to MAX_CONTEXT_TOKENS) + reserve_tokens: Tokens to reserve for prompt and response + + Returns: + (full_content, brief_summary) """ + if max_tokens is None: + max_tokens = MAX_CONTEXT_TOKENS + content_parts = [] summary_parts = [] - - # Process files - if file_paths: - summary_parts.append(f"Reading {len(file_paths)} file(s)") - for file_path in file_paths: - content = read_file_content(file_path) - content_parts.append(content) - - # Add direct code if provided + total_tokens = 0 + available_tokens = max_tokens - reserve_tokens + + files_read = [] + files_skipped = [] + dirs_processed = [] + + # First, handle direct code if provided if code: - formatted_code = ( - f"\n--- BEGIN DIRECT CODE ---\n{code}\n--- END DIRECT CODE ---\n" - ) - content_parts.append(formatted_code) - code_preview = code[:50] + "..." if len(code) > 50 else code - summary_parts.append(f"Direct code: {code_preview}") - - full_content = "\n\n".join(content_parts) - summary = ( - " | ".join(summary_parts) if summary_parts else "No input provided" - ) - + formatted_code = f"\n--- BEGIN DIRECT CODE ---\n{code}\n--- END DIRECT CODE ---\n" + code_tokens = estimate_tokens(formatted_code) + + if code_tokens <= available_tokens: + content_parts.append(formatted_code) + total_tokens += code_tokens + available_tokens -= code_tokens + code_preview = code[:50] + "..." if len(code) > 50 else code + summary_parts.append(f"Direct code: {code_preview}") + else: + summary_parts.append("Direct code skipped (too large)") + + # Expand all paths to get individual files + if file_paths: + # Track which paths are directories + for path in file_paths: + if Path(path).is_dir(): + dirs_processed.append(path) + + # Expand to get all files + all_files = expand_paths(file_paths) + + if not all_files and file_paths: + # No files found but paths were provided + content_parts.append(f"\n--- NO FILES FOUND ---\nProvided paths: {', '.join(file_paths)}\n--- END ---\n") + else: + # Read files up to token limit + for file_path in all_files: + if total_tokens >= available_tokens: + files_skipped.append(file_path) + continue + + file_content, file_tokens = read_file_content(file_path) + + # Check if adding this file would exceed limit + if total_tokens + file_tokens <= available_tokens: + content_parts.append(file_content) + total_tokens += file_tokens + files_read.append(file_path) + else: + files_skipped.append(file_path) + + # Build summary + if dirs_processed: + summary_parts.append(f"Processed {len(dirs_processed)} dir(s)") + if files_read: + summary_parts.append(f"Read {len(files_read)} file(s)") + if files_skipped: + summary_parts.append(f"Skipped {len(files_skipped)} file(s) (token limit)") + if total_tokens > 0: + summary_parts.append(f"~{total_tokens:,} tokens used") + + # Add skipped files note if any were skipped + if files_skipped: + skip_note = f"\n\n--- SKIPPED FILES (TOKEN LIMIT) ---\n" + skip_note += f"Total skipped: {len(files_skipped)}\n" + # Show first 10 skipped files + for i, file_path in enumerate(files_skipped[:10]): + skip_note += f" - {file_path}\n" + if len(files_skipped) > 10: + skip_note += f" ... and {len(files_skipped) - 10} more\n" + skip_note += "--- END SKIPPED FILES ---\n" + content_parts.append(skip_note) + + full_content = "\n\n".join(content_parts) if content_parts else "" + summary = " | ".join(summary_parts) if summary_parts else "No input provided" + return full_content, summary