Extra logging and more tests

This commit is contained in:
Fahad
2025-06-11 18:26:13 +04:00
parent 3aef6e961b
commit 4974fbc725
10 changed files with 400 additions and 112 deletions

View File

@@ -250,12 +250,16 @@ def add_turn(
- Turn limits prevent runaway conversations
- File references are preserved for cross-tool access
"""
logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
context = get_thread(thread_id)
if not context:
logger.debug(f"[FLOW] Thread {thread_id} not found for turn addition")
return False
# Check turn limit to prevent runaway conversations
if len(context.turns) >= MAX_CONVERSATION_TURNS:
logger.debug(f"[FLOW] Thread {thread_id} at max turns ({MAX_CONVERSATION_TURNS})")
return False
# Create new turn with complete metadata
@@ -277,7 +281,8 @@ def add_turn(
key = f"thread:{thread_id}"
client.setex(key, 3600, context.model_dump_json()) # Refresh TTL to 1 hour
return True
except Exception:
except Exception as e:
logger.debug(f"[FLOW] Failed to save turn to Redis: {type(e).__name__}")
return False
@@ -296,19 +301,29 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
list[str]: Deduplicated list of file paths referenced in the conversation
"""
if not context.turns:
logger.debug(f"[FILES] No turns found, returning empty file list")
return []
# Collect all unique files from all turns, preserving order of first appearance
seen_files = set()
unique_files = []
logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns")
for turn in context.turns:
for i, turn in enumerate(context.turns):
if turn.files:
logger.debug(f"[FILES] Turn {i+1} has {len(turn.files)} files: {turn.files}")
for file_path in turn.files:
if file_path not in seen_files:
seen_files.add(file_path)
unique_files.append(file_path)
logger.debug(f"[FILES] Added new file: {file_path}")
else:
logger.debug(f"[FILES] Duplicate file skipped: {file_path}")
else:
logger.debug(f"[FILES] Turn {i+1} has no files")
logger.debug(f"[FILES] Final unique file list ({len(unique_files)}): {unique_files}")
return unique_files
@@ -345,6 +360,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
# Get all unique files referenced in this conversation
all_files = get_conversation_file_list(context)
logger.debug(f"[FILES] Found {len(all_files)} unique files in conversation history")
history_parts = [
"=== CONVERSATION HISTORY ===",
@@ -356,6 +372,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
# Embed all files referenced in this conversation once at the start
if all_files:
logger.debug(f"[FILES] Starting embedding for {len(all_files)} files")
history_parts.extend(
[
"=== FILES REFERENCED IN THIS CONVERSATION ===",
@@ -379,6 +396,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
for file_path in all_files:
try:
logger.debug(f"[FILES] Processing file {file_path}")
# Correctly unpack the tuple returned by read_file_content
formatted_content, content_tokens = read_file_content(file_path)
if formatted_content:
@@ -391,20 +409,24 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
logger.debug(
f"📄 File embedded in conversation history: {file_path} ({content_tokens:,} tokens)"
)
logger.debug(f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})")
else:
files_truncated += 1
logger.debug(
f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)"
)
logger.debug(f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)")
# Stop processing more files
break
else:
logger.debug(f"📄 File skipped (empty content): {file_path}")
logger.debug(f"[FILES] File {file_path} has empty content - skipping")
except Exception as e:
# Skip files that can't be read but log the failure
logger.warning(
f"📄 Failed to embed file in conversation history: {file_path} - {type(e).__name__}: {e}"
)
logger.debug(f"[FILES] Failed to read file {file_path} - {type(e).__name__}: {e}")
continue
if file_contents:
@@ -417,11 +439,13 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
logger.debug(
f"📄 Conversation history file embedding complete: {files_included} files embedded, {files_truncated} truncated, {total_tokens:,} total tokens"
)
logger.debug(f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total")
else:
history_parts.append("(No accessible files found)")
logger.debug(
f"📄 Conversation history file embedding: no accessible files found from {len(all_files)} requested"
)
logger.debug(f"[FILES] No accessible files found from {len(all_files)} requested files")
else:
# Fallback to original read_files function for backward compatibility
files_content = read_files_func(all_files)
@@ -481,6 +505,11 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
from utils.token_utils import estimate_tokens
total_conversation_tokens = estimate_tokens(complete_history)
# Summary log of what was built
user_turns = len([t for t in context.turns if t.role == "user"])
assistant_turns = len([t for t in context.turns if t.role == "assistant"])
logger.debug(f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens")
return complete_history, total_conversation_tokens

View File

@@ -422,11 +422,14 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
Tuple of (formatted_content, estimated_tokens)
Content is wrapped with clear delimiters for AI parsing
"""
logger.debug(f"[FILES] read_file_content called for: {file_path}")
try:
# Validate path security before any file operations
path = resolve_and_validate_path(file_path)
logger.debug(f"[FILES] Path validated and resolved: {path}")
except (ValueError, PermissionError) as e:
# Return error in a format that provides context to the AI
logger.debug(f"[FILES] Path validation failed for {file_path}: {type(e).__name__}: {e}")
error_msg = str(e)
# Add Docker-specific help if we're in Docker and path is inaccessible
if WORKSPACE_ROOT and CONTAINER_WORKSPACE.exists():
@@ -438,28 +441,37 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
f"To access files in a different directory, please run Claude from that directory."
)
content = f"\n--- ERROR ACCESSING FILE: {file_path} ---\nError: {error_msg}\n--- END FILE ---\n"
return content, estimate_tokens(content)
tokens = estimate_tokens(content)
logger.debug(f"[FILES] Returning error content for {file_path}: {tokens} tokens")
return content, tokens
try:
# Validate file existence and type
if not path.exists():
logger.debug(f"[FILES] File does not exist: {file_path}")
content = f"\n--- FILE NOT FOUND: {file_path} ---\nError: File does not exist\n--- END FILE ---\n"
return content, estimate_tokens(content)
if not path.is_file():
logger.debug(f"[FILES] Path is not a file: {file_path}")
content = f"\n--- NOT A FILE: {file_path} ---\nError: Path is not a file\n--- END FILE ---\n"
return content, estimate_tokens(content)
# Check file size to prevent memory exhaustion
file_size = path.stat().st_size
logger.debug(f"[FILES] File size for {file_path}: {file_size:,} bytes")
if file_size > max_size:
logger.debug(f"[FILES] File too large: {file_path} ({file_size:,} > {max_size:,} bytes)")
content = f"\n--- FILE TOO LARGE: {file_path} ---\nFile size: {file_size:,} bytes (max: {max_size:,})\n--- END FILE ---\n"
return content, estimate_tokens(content)
# Read the file with UTF-8 encoding, replacing invalid characters
# This ensures we can handle files with mixed encodings
logger.debug(f"[FILES] Reading file content for {file_path}")
with open(path, encoding="utf-8", errors="replace") as f:
file_content = f.read()
logger.debug(f"[FILES] Successfully read {len(file_content)} characters from {file_path}")
# Format with clear delimiters that help the AI understand file boundaries
# Using consistent markers makes it easier for the model to parse
@@ -467,11 +479,16 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
# ("--- BEGIN DIFF: ... ---") to allow AI to distinguish between complete file content
# vs. partial diff content when files appear in both sections
formatted = f"\n--- BEGIN FILE: {file_path} ---\n{file_content}\n--- END FILE: {file_path} ---\n"
return formatted, estimate_tokens(formatted)
tokens = estimate_tokens(formatted)
logger.debug(f"[FILES] Formatted content for {file_path}: {len(formatted)} chars, {tokens} tokens")
return formatted, tokens
except Exception as e:
logger.debug(f"[FILES] Exception reading file {file_path}: {type(e).__name__}: {e}")
content = f"\n--- ERROR READING FILE: {file_path} ---\nError: {str(e)}\n--- END FILE ---\n"
return content, estimate_tokens(content)
tokens = estimate_tokens(content)
logger.debug(f"[FILES] Returning error content for {file_path}: {tokens} tokens")
return content, tokens
def read_files(
@@ -500,6 +517,9 @@ def read_files(
if max_tokens is None:
max_tokens = MAX_CONTEXT_TOKENS
logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
logger.debug(f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}")
content_parts = []
total_tokens = 0
available_tokens = max_tokens - reserve_tokens
@@ -520,31 +540,40 @@ def read_files(
# Priority 2: Process file paths
if file_paths:
# Expand directories to get all individual files
logger.debug(f"[FILES] Expanding {len(file_paths)} file paths")
all_files = expand_paths(file_paths)
logger.debug(f"[FILES] After expansion: {len(all_files)} individual files")
if not all_files and file_paths:
# No files found but paths were provided
logger.debug(f"[FILES] No files found from provided paths")
content_parts.append(f"\n--- NO FILES FOUND ---\nProvided paths: {', '.join(file_paths)}\n--- END ---\n")
else:
# Read files sequentially until token limit is reached
for file_path in all_files:
logger.debug(f"[FILES] Reading {len(all_files)} files with token budget {available_tokens:,}")
for i, file_path in enumerate(all_files):
if total_tokens >= available_tokens:
files_skipped.append(file_path)
continue
logger.debug(f"[FILES] Token budget exhausted, skipping remaining {len(all_files) - i} files")
files_skipped.extend(all_files[i:])
break
file_content, file_tokens = read_file_content(file_path)
logger.debug(f"[FILES] File {file_path}: {file_tokens:,} tokens")
# Check if adding this file would exceed limit
if total_tokens + file_tokens <= available_tokens:
content_parts.append(file_content)
total_tokens += file_tokens
logger.debug(f"[FILES] Added file {file_path}, total tokens: {total_tokens:,}")
else:
# File too large for remaining budget
logger.debug(f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)")
files_skipped.append(file_path)
# Add informative note about skipped files to help users understand
# what was omitted and why
if files_skipped:
logger.debug(f"[FILES] {len(files_skipped)} files skipped due to token limits")
skip_note = "\n\n--- SKIPPED FILES (TOKEN LIMIT) ---\n"
skip_note += f"Total skipped: {len(files_skipped)}\n"
# Show first 10 skipped files as examples
@@ -555,4 +584,6 @@ def read_files(
skip_note += "--- END SKIPPED FILES ---\n"
content_parts.append(skip_note)
return "\n\n".join(content_parts) if content_parts else ""
result = "\n\n".join(content_parts) if content_parts else ""
logger.debug(f"[FILES] read_files complete: {len(result)} chars, {total_tokens:,} tokens used")
return result