feat: add full directory support and smart file handling
Major improvements to file handling capabilities: - Add directory traversal support to all file-processing tools - Tools now accept both individual files and entire directories - Automatically expand directories and discover code files recursively - Smart filtering: skip hidden files, __pycache__, and non-code files - Progressive token loading: read as many files as possible within limits - Clear file separation markers with full paths for Gemini Key changes: - Rewrite file_utils.py with expand_paths() and improved read_files() - Update all tool descriptions to indicate directory support - Add comprehensive tests for directory handling and token limits - Document tool parameters and examples in README - Bump version to 2.4.2 All tools (analyze, review_code, debug_issue, think_deeper) now support: - Single files: "analyze main.py" - Directories: "review src/" - Mixed paths: "analyze config.py, src/, tests/" This enables analyzing entire projects or specific subsystems efficiently while respecting token limits and providing clear file boundaries. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -16,23 +16,26 @@ class TestFileUtils:
|
||||
"def hello():\n return 'world'", encoding="utf-8"
|
||||
)
|
||||
|
||||
content = read_file_content(str(test_file))
|
||||
content, tokens = read_file_content(str(test_file))
|
||||
assert "--- BEGIN FILE:" in content
|
||||
assert "--- END FILE:" in content
|
||||
assert "def hello():" in content
|
||||
assert "return 'world'" in content
|
||||
assert tokens > 0 # Should have estimated tokens
|
||||
|
||||
def test_read_file_content_not_found(self):
|
||||
"""Test reading non-existent file"""
|
||||
content = read_file_content("/nonexistent/file.py")
|
||||
content, tokens = read_file_content("/nonexistent/file.py")
|
||||
assert "--- FILE NOT FOUND:" in content
|
||||
assert "Error: File does not exist" in content
|
||||
assert tokens > 0
|
||||
|
||||
def test_read_file_content_directory(self, tmp_path):
|
||||
"""Test reading a directory"""
|
||||
content = read_file_content(str(tmp_path))
|
||||
content, tokens = read_file_content(str(tmp_path))
|
||||
assert "--- NOT A FILE:" in content
|
||||
assert "Error: Path is not a file" in content
|
||||
assert tokens > 0
|
||||
|
||||
def test_read_files_multiple(self, tmp_path):
|
||||
"""Test reading multiple files"""
|
||||
@@ -49,7 +52,7 @@ class TestFileUtils:
|
||||
assert "print('file1')" in content
|
||||
assert "print('file2')" in content
|
||||
|
||||
assert "Reading 2 file(s)" in summary
|
||||
assert "Read 2 file(s)" in summary
|
||||
|
||||
def test_read_files_with_code(self):
|
||||
"""Test reading with direct code"""
|
||||
@@ -62,6 +65,121 @@ class TestFileUtils:
|
||||
|
||||
assert "Direct code:" in summary
|
||||
|
||||
def test_read_files_directory_support(self, tmp_path):
|
||||
"""Test reading all files from a directory"""
|
||||
# Create directory structure
|
||||
(tmp_path / "file1.py").write_text("print('file1')", encoding="utf-8")
|
||||
(tmp_path / "file2.js").write_text("console.log('file2')", encoding="utf-8")
|
||||
(tmp_path / "readme.md").write_text("# README", encoding="utf-8")
|
||||
|
||||
# Create subdirectory
|
||||
subdir = tmp_path / "src"
|
||||
subdir.mkdir()
|
||||
(subdir / "module.py").write_text("class Module: pass", encoding="utf-8")
|
||||
|
||||
# Create hidden file (should be skipped)
|
||||
(tmp_path / ".hidden").write_text("secret", encoding="utf-8")
|
||||
|
||||
# Read the directory
|
||||
content, summary = read_files([str(tmp_path)])
|
||||
|
||||
# Check files are included
|
||||
assert "file1.py" in content
|
||||
assert "file2.js" in content
|
||||
assert "readme.md" in content
|
||||
assert "src/module.py" in content
|
||||
|
||||
# Check content
|
||||
assert "print('file1')" in content
|
||||
assert "console.log('file2')" in content
|
||||
assert "# README" in content
|
||||
assert "class Module: pass" in content
|
||||
|
||||
# Hidden file should not be included
|
||||
assert ".hidden" not in content
|
||||
assert "secret" not in content
|
||||
|
||||
# Check summary
|
||||
assert "Processed 1 dir(s)" in summary
|
||||
assert "Read 4 file(s)" in summary
|
||||
|
||||
def test_read_files_mixed_paths(self, tmp_path):
|
||||
"""Test reading mix of files and directories"""
|
||||
# Create files
|
||||
file1 = tmp_path / "direct.py"
|
||||
file1.write_text("# Direct file", encoding="utf-8")
|
||||
|
||||
# Create directory with files
|
||||
subdir = tmp_path / "subdir"
|
||||
subdir.mkdir()
|
||||
(subdir / "sub1.py").write_text("# Sub file 1", encoding="utf-8")
|
||||
(subdir / "sub2.py").write_text("# Sub file 2", encoding="utf-8")
|
||||
|
||||
# Read mix of direct file and directory
|
||||
content, summary = read_files([str(file1), str(subdir)])
|
||||
|
||||
assert "direct.py" in content
|
||||
assert "sub1.py" in content
|
||||
assert "sub2.py" in content
|
||||
assert "# Direct file" in content
|
||||
assert "# Sub file 1" in content
|
||||
assert "# Sub file 2" in content
|
||||
|
||||
assert "Processed 1 dir(s)" in summary
|
||||
assert "Read 3 file(s)" in summary
|
||||
|
||||
def test_read_files_token_limit(self, tmp_path):
|
||||
"""Test token limit handling"""
|
||||
# Create files with known token counts
|
||||
# ~250 tokens each (1000 chars)
|
||||
large_content = "x" * 1000
|
||||
|
||||
for i in range(5):
|
||||
(tmp_path / f"file{i}.txt").write_text(large_content, encoding="utf-8")
|
||||
|
||||
# Read with small token limit (should skip some files)
|
||||
# Reserve 50k tokens, limit to 51k total = 1k available
|
||||
# Each file ~250 tokens, so should read ~3-4 files
|
||||
content, summary = read_files([str(tmp_path)], max_tokens=51_000)
|
||||
|
||||
assert "Skipped" in summary
|
||||
assert "token limit" in summary
|
||||
assert "--- SKIPPED FILES (TOKEN LIMIT) ---" in content
|
||||
|
||||
# Count how many files were read
|
||||
read_count = content.count("--- BEGIN FILE:")
|
||||
assert 2 <= read_count <= 4 # Should read some but not all
|
||||
|
||||
def test_read_files_large_file(self, tmp_path):
|
||||
"""Test handling of large files"""
|
||||
# Create a file larger than max_size (1MB)
|
||||
large_file = tmp_path / "large.txt"
|
||||
large_file.write_text("x" * 2_000_000, encoding="utf-8") # 2MB
|
||||
|
||||
content, summary = read_files([str(large_file)])
|
||||
|
||||
assert "--- FILE TOO LARGE:" in content
|
||||
assert "2,000,000 bytes" in content
|
||||
assert "Read 1 file(s)" in summary # File is counted but shows error message
|
||||
|
||||
def test_read_files_file_extensions(self, tmp_path):
|
||||
"""Test file extension filtering"""
|
||||
# Create various file types
|
||||
(tmp_path / "code.py").write_text("python", encoding="utf-8")
|
||||
(tmp_path / "style.css").write_text("css", encoding="utf-8")
|
||||
(tmp_path / "binary.exe").write_text("exe", encoding="utf-8")
|
||||
(tmp_path / "image.jpg").write_text("jpg", encoding="utf-8")
|
||||
|
||||
content, summary = read_files([str(tmp_path)])
|
||||
|
||||
# Code files should be included
|
||||
assert "code.py" in content
|
||||
assert "style.css" in content
|
||||
|
||||
# Binary files should not be included (not in CODE_EXTENSIONS)
|
||||
assert "binary.exe" not in content
|
||||
assert "image.jpg" not in content
|
||||
|
||||
|
||||
class TestTokenUtils:
|
||||
"""Test token counting utilities"""
|
||||
|
||||
Reference in New Issue
Block a user