Fix directory expansion tracking in conversation memory
When directories were provided to tools, only the directory path was stored in conversation history instead of the individual expanded files. This caused file filtering to incorrectly skip files in continued conversations. Changes: - Modified _prepare_file_content_for_prompt to return (content, processed_files) - Updated all tools to track actually processed files for conversation memory - Ensures directories are tracked as their expanded individual files Fixes issue where Swift directory with 46 files was not properly embedded in conversation continuations.
This commit is contained in:
@@ -137,7 +137,8 @@ class AnalyzeTool(BaseTool):
|
|||||||
|
|
||||||
# Use centralized file processing logic
|
# Use centralized file processing logic
|
||||||
continuation_id = getattr(request, "continuation_id", None)
|
continuation_id = getattr(request, "continuation_id", None)
|
||||||
file_content = self._prepare_file_content_for_prompt(request.files, continuation_id, "Files")
|
file_content, processed_files = self._prepare_file_content_for_prompt(request.files, continuation_id, "Files")
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
|
|
||||||
# Build analysis instructions
|
# Build analysis instructions
|
||||||
analysis_focus = []
|
analysis_focus = []
|
||||||
|
|||||||
@@ -544,7 +544,7 @@ class BaseTool(ABC):
|
|||||||
reserve_tokens: int = 1_000,
|
reserve_tokens: int = 1_000,
|
||||||
remaining_budget: Optional[int] = None,
|
remaining_budget: Optional[int] = None,
|
||||||
arguments: Optional[dict] = None,
|
arguments: Optional[dict] = None,
|
||||||
) -> str:
|
) -> tuple[str, list[str]]:
|
||||||
"""
|
"""
|
||||||
Centralized file processing for tool prompts.
|
Centralized file processing for tool prompts.
|
||||||
|
|
||||||
@@ -563,10 +563,13 @@ class BaseTool(ABC):
|
|||||||
arguments: Original tool arguments (used to extract _remaining_tokens if available)
|
arguments: Original tool arguments (used to extract _remaining_tokens if available)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Formatted file content string ready for prompt inclusion
|
tuple[str, list[str]]: (formatted_file_content, actually_processed_files)
|
||||||
|
- formatted_file_content: Formatted file content string ready for prompt inclusion
|
||||||
|
- actually_processed_files: List of individual file paths that were actually read and embedded
|
||||||
|
(directories are expanded to individual files)
|
||||||
"""
|
"""
|
||||||
if not request_files:
|
if not request_files:
|
||||||
return ""
|
return "", []
|
||||||
|
|
||||||
# Note: Even if conversation history is already embedded, we still need to process
|
# Note: Even if conversation history is already embedded, we still need to process
|
||||||
# any NEW files that aren't in the conversation history yet. The filter_new_files
|
# any NEW files that aren't in the conversation history yet. The filter_new_files
|
||||||
@@ -705,6 +708,7 @@ class BaseTool(ABC):
|
|||||||
)
|
)
|
||||||
|
|
||||||
content_parts = []
|
content_parts = []
|
||||||
|
actually_processed_files = []
|
||||||
|
|
||||||
# Read content of new files only
|
# Read content of new files only
|
||||||
if files_to_embed:
|
if files_to_embed:
|
||||||
@@ -713,6 +717,11 @@ class BaseTool(ABC):
|
|||||||
f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}"
|
f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
# Before calling read_files, expand directories to get individual file paths
|
||||||
|
from utils.file_utils import expand_paths
|
||||||
|
expanded_files = expand_paths(files_to_embed)
|
||||||
|
logger.debug(f"[FILES] {self.name}: Expanded {len(files_to_embed)} paths to {len(expanded_files)} individual files")
|
||||||
|
|
||||||
file_content = read_files(
|
file_content = read_files(
|
||||||
files_to_embed,
|
files_to_embed,
|
||||||
max_tokens=effective_max_tokens + reserve_tokens,
|
max_tokens=effective_max_tokens + reserve_tokens,
|
||||||
@@ -721,6 +730,9 @@ class BaseTool(ABC):
|
|||||||
)
|
)
|
||||||
self._validate_token_limit(file_content, context_description)
|
self._validate_token_limit(file_content, context_description)
|
||||||
content_parts.append(file_content)
|
content_parts.append(file_content)
|
||||||
|
|
||||||
|
# Track the expanded files as actually processed
|
||||||
|
actually_processed_files.extend(expanded_files)
|
||||||
|
|
||||||
# Estimate tokens for debug logging
|
# Estimate tokens for debug logging
|
||||||
from utils.token_utils import estimate_tokens
|
from utils.token_utils import estimate_tokens
|
||||||
@@ -730,6 +742,7 @@ class BaseTool(ABC):
|
|||||||
f"{self.name} tool successfully embedded {len(files_to_embed)} files ({content_tokens:,} tokens)"
|
f"{self.name} tool successfully embedded {len(files_to_embed)} files ({content_tokens:,} tokens)"
|
||||||
)
|
)
|
||||||
logger.debug(f"[FILES] {self.name}: Successfully embedded files - {content_tokens:,} tokens used")
|
logger.debug(f"[FILES] {self.name}: Successfully embedded files - {content_tokens:,} tokens used")
|
||||||
|
logger.debug(f"[FILES] {self.name}: Actually processed {len(actually_processed_files)} individual files")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"{self.name} tool failed to embed files {files_to_embed}: {type(e).__name__}: {e}")
|
logger.error(f"{self.name} tool failed to embed files {files_to_embed}: {type(e).__name__}: {e}")
|
||||||
logger.debug(f"[FILES] {self.name}: File embedding failed - {type(e).__name__}: {e}")
|
logger.debug(f"[FILES] {self.name}: File embedding failed - {type(e).__name__}: {e}")
|
||||||
@@ -759,8 +772,8 @@ class BaseTool(ABC):
|
|||||||
logger.debug(f"[FILES] {self.name}: No skipped files to note")
|
logger.debug(f"[FILES] {self.name}: No skipped files to note")
|
||||||
|
|
||||||
result = "".join(content_parts) if content_parts else ""
|
result = "".join(content_parts) if content_parts else ""
|
||||||
logger.debug(f"[FILES] {self.name}: _prepare_file_content_for_prompt returning {len(result)} chars")
|
logger.debug(f"[FILES] {self.name}: _prepare_file_content_for_prompt returning {len(result)} chars, {len(actually_processed_files)} processed files")
|
||||||
return result
|
return result, actually_processed_files
|
||||||
|
|
||||||
def get_websearch_instruction(self, use_websearch: bool, tool_specific: Optional[str] = None) -> str:
|
def get_websearch_instruction(self, use_websearch: bool, tool_specific: Optional[str] = None) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -1408,7 +1421,9 @@ When recommending searches, be specific about what information you need and why
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Add this response as the first turn (assistant turn)
|
# Add this response as the first turn (assistant turn)
|
||||||
request_files = getattr(request, "files", []) or []
|
# Use actually processed files from file preparation instead of original request files
|
||||||
|
# This ensures directories are tracked as their individual expanded files
|
||||||
|
request_files = getattr(self, "_actually_processed_files", []) or getattr(request, "files", []) or []
|
||||||
# Extract model metadata
|
# Extract model metadata
|
||||||
model_provider = None
|
model_provider = None
|
||||||
model_name = None
|
model_name = None
|
||||||
|
|||||||
@@ -124,9 +124,10 @@ class ChatTool(BaseTool):
|
|||||||
|
|
||||||
# Add context files if provided (using centralized file handling with filtering)
|
# Add context files if provided (using centralized file handling with filtering)
|
||||||
if request.files:
|
if request.files:
|
||||||
file_content = self._prepare_file_content_for_prompt(
|
file_content, processed_files = self._prepare_file_content_for_prompt(
|
||||||
request.files, request.continuation_id, "Context files"
|
request.files, request.continuation_id, "Context files"
|
||||||
)
|
)
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
if file_content:
|
if file_content:
|
||||||
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
|
user_content = f"{user_content}\n\n=== CONTEXT FILES ===\n{file_content}\n=== END CONTEXT ===="
|
||||||
|
|
||||||
|
|||||||
@@ -196,7 +196,8 @@ class CodeReviewTool(BaseTool):
|
|||||||
|
|
||||||
# Use centralized file processing logic
|
# Use centralized file processing logic
|
||||||
continuation_id = getattr(request, "continuation_id", None)
|
continuation_id = getattr(request, "continuation_id", None)
|
||||||
file_content = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code")
|
file_content, processed_files = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code")
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
|
|
||||||
# Build customized review instructions based on review type
|
# Build customized review instructions based on review type
|
||||||
review_focus = []
|
review_focus = []
|
||||||
|
|||||||
@@ -166,7 +166,8 @@ class DebugIssueTool(BaseTool):
|
|||||||
if request.files:
|
if request.files:
|
||||||
# Use centralized file processing logic
|
# Use centralized file processing logic
|
||||||
continuation_id = getattr(request, "continuation_id", None)
|
continuation_id = getattr(request, "continuation_id", None)
|
||||||
file_content = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code")
|
file_content, processed_files = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code")
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
|
|
||||||
if file_content:
|
if file_content:
|
||||||
context_parts.append(f"\n=== RELEVANT CODE ===\n{file_content}\n=== END CODE ===")
|
context_parts.append(f"\n=== RELEVANT CODE ===\n{file_content}\n=== END CODE ===")
|
||||||
|
|||||||
@@ -408,13 +408,14 @@ class Precommit(BaseTool):
|
|||||||
remaining_tokens = max_tokens - total_tokens
|
remaining_tokens = max_tokens - total_tokens
|
||||||
|
|
||||||
# Use centralized file handling with filtering for duplicate prevention
|
# Use centralized file handling with filtering for duplicate prevention
|
||||||
file_content = self._prepare_file_content_for_prompt(
|
file_content, processed_files = self._prepare_file_content_for_prompt(
|
||||||
translated_files,
|
translated_files,
|
||||||
request.continuation_id,
|
request.continuation_id,
|
||||||
"Context files",
|
"Context files",
|
||||||
max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated
|
max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated
|
||||||
reserve_tokens=1000, # Small reserve for formatting
|
reserve_tokens=1000, # Small reserve for formatting
|
||||||
)
|
)
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
|
|
||||||
if file_content:
|
if file_content:
|
||||||
context_tokens = estimate_tokens(file_content)
|
context_tokens = estimate_tokens(file_content)
|
||||||
|
|||||||
@@ -330,13 +330,14 @@ class RefactorTool(BaseTool):
|
|||||||
# Use standard file content preparation with dynamic token budget and line numbers
|
# Use standard file content preparation with dynamic token budget and line numbers
|
||||||
try:
|
try:
|
||||||
logger.debug(f"[REFACTOR] Preparing file content for {len(examples_to_process)} style examples")
|
logger.debug(f"[REFACTOR] Preparing file content for {len(examples_to_process)} style examples")
|
||||||
content = self._prepare_file_content_for_prompt(
|
content, processed_files = self._prepare_file_content_for_prompt(
|
||||||
examples_to_process,
|
examples_to_process,
|
||||||
continuation_id,
|
continuation_id,
|
||||||
"Style guide examples",
|
"Style guide examples",
|
||||||
max_tokens=style_examples_budget,
|
max_tokens=style_examples_budget,
|
||||||
reserve_tokens=1000,
|
reserve_tokens=1000,
|
||||||
)
|
)
|
||||||
|
# Store processed files for tracking - style examples are tracked separately from main code files
|
||||||
|
|
||||||
# Determine how many files were actually included
|
# Determine how many files were actually included
|
||||||
if content:
|
if content:
|
||||||
@@ -478,9 +479,10 @@ class RefactorTool(BaseTool):
|
|||||||
|
|
||||||
# Use centralized file processing logic for main code files (with line numbers enabled)
|
# Use centralized file processing logic for main code files (with line numbers enabled)
|
||||||
logger.debug(f"[REFACTOR] Preparing {len(code_files_to_process)} code files for analysis")
|
logger.debug(f"[REFACTOR] Preparing {len(code_files_to_process)} code files for analysis")
|
||||||
code_content = self._prepare_file_content_for_prompt(
|
code_content, processed_files = self._prepare_file_content_for_prompt(
|
||||||
code_files_to_process, continuation_id, "Code to analyze", max_tokens=remaining_tokens, reserve_tokens=2000
|
code_files_to_process, continuation_id, "Code to analyze", max_tokens=remaining_tokens, reserve_tokens=2000
|
||||||
)
|
)
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
|
|
||||||
if code_content:
|
if code_content:
|
||||||
from utils.token_utils import estimate_tokens
|
from utils.token_utils import estimate_tokens
|
||||||
|
|||||||
@@ -214,13 +214,14 @@ class TestGenTool(BaseTool):
|
|||||||
# Use standard file content preparation with dynamic token budget
|
# Use standard file content preparation with dynamic token budget
|
||||||
try:
|
try:
|
||||||
logger.debug(f"[TESTGEN] Preparing file content for {len(examples_to_process)} test examples")
|
logger.debug(f"[TESTGEN] Preparing file content for {len(examples_to_process)} test examples")
|
||||||
content = self._prepare_file_content_for_prompt(
|
content, processed_files = self._prepare_file_content_for_prompt(
|
||||||
examples_to_process,
|
examples_to_process,
|
||||||
continuation_id,
|
continuation_id,
|
||||||
"Test examples",
|
"Test examples",
|
||||||
max_tokens=test_examples_budget,
|
max_tokens=test_examples_budget,
|
||||||
reserve_tokens=1000,
|
reserve_tokens=1000,
|
||||||
)
|
)
|
||||||
|
# Store processed files for tracking - test examples are tracked separately from main code files
|
||||||
|
|
||||||
# Determine how many files were actually included
|
# Determine how many files were actually included
|
||||||
if content:
|
if content:
|
||||||
@@ -358,9 +359,10 @@ class TestGenTool(BaseTool):
|
|||||||
|
|
||||||
# Use centralized file processing logic for main code files (after deduplication)
|
# Use centralized file processing logic for main code files (after deduplication)
|
||||||
logger.debug(f"[TESTGEN] Preparing {len(code_files_to_process)} code files for analysis")
|
logger.debug(f"[TESTGEN] Preparing {len(code_files_to_process)} code files for analysis")
|
||||||
code_content = self._prepare_file_content_for_prompt(
|
code_content, processed_files = self._prepare_file_content_for_prompt(
|
||||||
code_files_to_process, continuation_id, "Code to test", max_tokens=remaining_tokens, reserve_tokens=2000
|
code_files_to_process, continuation_id, "Code to test", max_tokens=remaining_tokens, reserve_tokens=2000
|
||||||
)
|
)
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
|
|
||||||
if code_content:
|
if code_content:
|
||||||
from utils.token_utils import estimate_tokens
|
from utils.token_utils import estimate_tokens
|
||||||
|
|||||||
@@ -148,7 +148,8 @@ class ThinkDeepTool(BaseTool):
|
|||||||
if request.files:
|
if request.files:
|
||||||
# Use centralized file processing logic
|
# Use centralized file processing logic
|
||||||
continuation_id = getattr(request, "continuation_id", None)
|
continuation_id = getattr(request, "continuation_id", None)
|
||||||
file_content = self._prepare_file_content_for_prompt(request.files, continuation_id, "Reference files")
|
file_content, processed_files = self._prepare_file_content_for_prompt(request.files, continuation_id, "Reference files")
|
||||||
|
self._actually_processed_files = processed_files
|
||||||
|
|
||||||
if file_content:
|
if file_content:
|
||||||
context_parts.append(f"\n=== REFERENCE FILES ===\n{file_content}\n=== END FILES ===")
|
context_parts.append(f"\n=== REFERENCE FILES ===\n{file_content}\n=== END FILES ===")
|
||||||
|
|||||||
Reference in New Issue
Block a user