Comments
This commit is contained in:
@@ -113,7 +113,14 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_redis):
|
async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_redis):
|
||||||
"""Test that file content appears in expected locations"""
|
"""Test that file content appears in expected locations
|
||||||
|
|
||||||
|
This test validates our design decision that files can legitimately appear in both:
|
||||||
|
1. Git Diffs section: Shows only changed lines + limited context (wrapped with BEGIN DIFF markers)
|
||||||
|
2. Additional Context section: Shows complete file content (wrapped with BEGIN FILE markers)
|
||||||
|
|
||||||
|
This is intentional, not a bug - the AI needs both perspectives for comprehensive analysis.
|
||||||
|
"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
# Create request with files parameter
|
# Create request with files parameter
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
"""
|
"""
|
||||||
Tool for pre-commit validation of git changes across multiple repositories.
|
Tool for pre-commit validation of git changes across multiple repositories.
|
||||||
|
|
||||||
|
Design Note - File Content in Multiple Sections:
|
||||||
|
Files may legitimately appear in both "Git Diffs" and "Additional Context Files" sections:
|
||||||
|
- Git Diffs: Shows changed lines + limited context (marked with "BEGIN DIFF" / "END DIFF")
|
||||||
|
- Additional Context: Shows complete file content (marked with "BEGIN FILE" / "END FILE")
|
||||||
|
This provides comprehensive context for AI analysis - not a duplication bug.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -239,9 +245,12 @@ class Precommit(BaseTool):
|
|||||||
staged_files = [f for f in files_output.strip().split("\n") if f]
|
staged_files = [f for f in files_output.strip().split("\n") if f]
|
||||||
|
|
||||||
# Generate per-file diffs for staged changes
|
# Generate per-file diffs for staged changes
|
||||||
|
# Each diff is wrapped with clear markers to distinguish from full file content
|
||||||
for file_path in staged_files:
|
for file_path in staged_files:
|
||||||
success, diff = run_git_command(repo_path, ["diff", "--cached", "--", file_path])
|
success, diff = run_git_command(repo_path, ["diff", "--cached", "--", file_path])
|
||||||
if success and diff.strip():
|
if success and diff.strip():
|
||||||
|
# Use "BEGIN DIFF" markers (distinct from "BEGIN FILE" markers in utils/file_utils.py)
|
||||||
|
# This allows AI to distinguish between diff context vs complete file content
|
||||||
diff_header = f"\n--- BEGIN DIFF: {repo_name} / {file_path} (staged) ---\n"
|
diff_header = f"\n--- BEGIN DIFF: {repo_name} / {file_path} (staged) ---\n"
|
||||||
diff_footer = f"\n--- END DIFF: {repo_name} / {file_path} ---\n"
|
diff_footer = f"\n--- END DIFF: {repo_name} / {file_path} ---\n"
|
||||||
formatted_diff = diff_header + diff + diff_footer
|
formatted_diff = diff_header + diff + diff_footer
|
||||||
@@ -258,6 +267,7 @@ class Precommit(BaseTool):
|
|||||||
unstaged_files = [f for f in files_output.strip().split("\n") if f]
|
unstaged_files = [f for f in files_output.strip().split("\n") if f]
|
||||||
|
|
||||||
# Generate per-file diffs for unstaged changes
|
# Generate per-file diffs for unstaged changes
|
||||||
|
# Same clear marker pattern as staged changes above
|
||||||
for file_path in unstaged_files:
|
for file_path in unstaged_files:
|
||||||
success, diff = run_git_command(repo_path, ["diff", "--", file_path])
|
success, diff = run_git_command(repo_path, ["diff", "--", file_path])
|
||||||
if success and diff.strip():
|
if success and diff.strip():
|
||||||
@@ -372,7 +382,8 @@ class Precommit(BaseTool):
|
|||||||
if total_tokens > 0:
|
if total_tokens > 0:
|
||||||
prompt_parts.append(f"\nTotal context tokens used: ~{total_tokens:,}")
|
prompt_parts.append(f"\nTotal context tokens used: ~{total_tokens:,}")
|
||||||
|
|
||||||
# Add the diff contents
|
# Add the diff contents with clear section markers
|
||||||
|
# Each diff is wrapped with "--- BEGIN DIFF: ... ---" and "--- END DIFF: ... ---"
|
||||||
prompt_parts.append("\n## Git Diffs\n")
|
prompt_parts.append("\n## Git Diffs\n")
|
||||||
if all_diffs:
|
if all_diffs:
|
||||||
prompt_parts.extend(all_diffs)
|
prompt_parts.extend(all_diffs)
|
||||||
@@ -380,6 +391,11 @@ class Precommit(BaseTool):
|
|||||||
prompt_parts.append("--- NO DIFFS FOUND ---")
|
prompt_parts.append("--- NO DIFFS FOUND ---")
|
||||||
|
|
||||||
# Add context files content if provided
|
# Add context files content if provided
|
||||||
|
# IMPORTANT: Files may legitimately appear in BOTH sections:
|
||||||
|
# - Git Diffs: Show only changed lines + limited context (what changed)
|
||||||
|
# - Additional Context: Show complete file content (full understanding)
|
||||||
|
# This is intentional design for comprehensive AI analysis, not duplication bug.
|
||||||
|
# Each file in this section is wrapped with "--- BEGIN FILE: ... ---" and "--- END FILE: ... ---"
|
||||||
if context_files_content:
|
if context_files_content:
|
||||||
prompt_parts.append("\n## Additional Context Files")
|
prompt_parts.append("\n## Additional Context Files")
|
||||||
prompt_parts.append(
|
prompt_parts.append(
|
||||||
|
|||||||
@@ -463,6 +463,9 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
|
|||||||
|
|
||||||
# Format with clear delimiters that help the AI understand file boundaries
|
# Format with clear delimiters that help the AI understand file boundaries
|
||||||
# Using consistent markers makes it easier for the model to parse
|
# Using consistent markers makes it easier for the model to parse
|
||||||
|
# NOTE: These markers ("--- BEGIN FILE: ... ---") are distinct from git diff markers
|
||||||
|
# ("--- BEGIN DIFF: ... ---") to allow AI to distinguish between complete file content
|
||||||
|
# vs. partial diff content when files appear in both sections
|
||||||
formatted = f"\n--- BEGIN FILE: {file_path} ---\n{file_content}\n--- END FILE: {file_path} ---\n"
|
formatted = f"\n--- BEGIN FILE: {file_path} ---\n{file_content}\n--- END FILE: {file_path} ---\n"
|
||||||
return formatted, estimate_tokens(formatted)
|
return formatted, estimate_tokens(formatted)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user