From e8df6a7a31f5355ca8f5cfc7c600682d0362b3ea Mon Sep 17 00:00:00 2001 From: Fahad Date: Wed, 11 Jun 2025 17:18:40 +0400 Subject: [PATCH] Comments --- tests/test_precommit_with_mock_store.py | 9 ++++++++- tools/precommit.py | 18 +++++++++++++++++- utils/file_utils.py | 3 +++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tests/test_precommit_with_mock_store.py b/tests/test_precommit_with_mock_store.py index 5cb6e1f..4788ee4 100644 --- a/tests/test_precommit_with_mock_store.py +++ b/tests/test_precommit_with_mock_store.py @@ -113,7 +113,14 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging @pytest.mark.asyncio async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_redis): - """Test that file content appears in expected locations""" + """Test that file content appears in expected locations + + This test validates our design decision that files can legitimately appear in both: + 1. Git Diffs section: Shows only changed lines + limited context (wrapped with BEGIN DIFF markers) + 2. Additional Context section: Shows complete file content (wrapped with BEGIN FILE markers) + + This is intentional, not a bug - the AI needs both perspectives for comprehensive analysis. + """ temp_dir, config_path = temp_repo # Create request with files parameter diff --git a/tools/precommit.py b/tools/precommit.py index 1fd1498..7ffc45f 100644 --- a/tools/precommit.py +++ b/tools/precommit.py @@ -1,5 +1,11 @@ """ Tool for pre-commit validation of git changes across multiple repositories. + +Design Note - File Content in Multiple Sections: +Files may legitimately appear in both "Git Diffs" and "Additional Context Files" sections: +- Git Diffs: Shows changed lines + limited context (marked with "BEGIN DIFF" / "END DIFF") +- Additional Context: Shows complete file content (marked with "BEGIN FILE" / "END FILE") +This provides comprehensive context for AI analysis - not a duplication bug. """ import os @@ -239,9 +245,12 @@ class Precommit(BaseTool): staged_files = [f for f in files_output.strip().split("\n") if f] # Generate per-file diffs for staged changes + # Each diff is wrapped with clear markers to distinguish from full file content for file_path in staged_files: success, diff = run_git_command(repo_path, ["diff", "--cached", "--", file_path]) if success and diff.strip(): + # Use "BEGIN DIFF" markers (distinct from "BEGIN FILE" markers in utils/file_utils.py) + # This allows AI to distinguish between diff context vs complete file content diff_header = f"\n--- BEGIN DIFF: {repo_name} / {file_path} (staged) ---\n" diff_footer = f"\n--- END DIFF: {repo_name} / {file_path} ---\n" formatted_diff = diff_header + diff + diff_footer @@ -258,6 +267,7 @@ class Precommit(BaseTool): unstaged_files = [f for f in files_output.strip().split("\n") if f] # Generate per-file diffs for unstaged changes + # Same clear marker pattern as staged changes above for file_path in unstaged_files: success, diff = run_git_command(repo_path, ["diff", "--", file_path]) if success and diff.strip(): @@ -372,7 +382,8 @@ class Precommit(BaseTool): if total_tokens > 0: prompt_parts.append(f"\nTotal context tokens used: ~{total_tokens:,}") - # Add the diff contents + # Add the diff contents with clear section markers + # Each diff is wrapped with "--- BEGIN DIFF: ... ---" and "--- END DIFF: ... ---" prompt_parts.append("\n## Git Diffs\n") if all_diffs: prompt_parts.extend(all_diffs) @@ -380,6 +391,11 @@ class Precommit(BaseTool): prompt_parts.append("--- NO DIFFS FOUND ---") # Add context files content if provided + # IMPORTANT: Files may legitimately appear in BOTH sections: + # - Git Diffs: Show only changed lines + limited context (what changed) + # - Additional Context: Show complete file content (full understanding) + # This is intentional design for comprehensive AI analysis, not duplication bug. + # Each file in this section is wrapped with "--- BEGIN FILE: ... ---" and "--- END FILE: ... ---" if context_files_content: prompt_parts.append("\n## Additional Context Files") prompt_parts.append( diff --git a/utils/file_utils.py b/utils/file_utils.py index 4bc7928..6d92512 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -463,6 +463,9 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i # Format with clear delimiters that help the AI understand file boundaries # Using consistent markers makes it easier for the model to parse + # NOTE: These markers ("--- BEGIN FILE: ... ---") are distinct from git diff markers + # ("--- BEGIN DIFF: ... ---") to allow AI to distinguish between complete file content + # vs. partial diff content when files appear in both sections formatted = f"\n--- BEGIN FILE: {file_path} ---\n{file_content}\n--- END FILE: {file_path} ---\n" return formatted, estimate_tokens(formatted)