Extra logging and more tests

2025-06-11 18:26:13 +04:00
parent 3aef6e961b
commit 4974fbc725
10 changed files with 400 additions and 112 deletions
--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -250,12 +250,16 @@ def add_turn(
        - Turn limits prevent runaway conversations
        - File references are preserved for cross-tool access
    """
+    logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
+    
    context = get_thread(thread_id)
    if not context:
+        logger.debug(f"[FLOW] Thread {thread_id} not found for turn addition")
        return False

    # Check turn limit to prevent runaway conversations
    if len(context.turns) >= MAX_CONVERSATION_TURNS:
+        logger.debug(f"[FLOW] Thread {thread_id} at max turns ({MAX_CONVERSATION_TURNS})")
        return False

    # Create new turn with complete metadata
@@ -277,7 +281,8 @@ def add_turn(
        key = f"thread:{thread_id}"
        client.setex(key, 3600, context.model_dump_json())  # Refresh TTL to 1 hour
        return True
-    except Exception:
+    except Exception as e:
+        logger.debug(f"[FLOW] Failed to save turn to Redis: {type(e).__name__}")
        return False


@@ -296,19 +301,29 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
        list[str]: Deduplicated list of file paths referenced in the conversation
    """
    if not context.turns:
+        logger.debug(f"[FILES] No turns found, returning empty file list")
        return []

    # Collect all unique files from all turns, preserving order of first appearance
    seen_files = set()
    unique_files = []
+    
+    logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns")

-    for turn in context.turns:
+    for i, turn in enumerate(context.turns):
        if turn.files:
+            logger.debug(f"[FILES] Turn {i+1} has {len(turn.files)} files: {turn.files}")
            for file_path in turn.files:
                if file_path not in seen_files:
                    seen_files.add(file_path)
                    unique_files.append(file_path)
-
+                    logger.debug(f"[FILES] Added new file: {file_path}")
+                else:
+                    logger.debug(f"[FILES] Duplicate file skipped: {file_path}")
+        else:
+            logger.debug(f"[FILES] Turn {i+1} has no files")
+    
+    logger.debug(f"[FILES] Final unique file list ({len(unique_files)}): {unique_files}")
    return unique_files


@@ -345,6 +360,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->

    # Get all unique files referenced in this conversation
    all_files = get_conversation_file_list(context)
+    logger.debug(f"[FILES] Found {len(all_files)} unique files in conversation history")

    history_parts = [
        "=== CONVERSATION HISTORY ===",
@@ -356,6 +372,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->

    # Embed all files referenced in this conversation once at the start
    if all_files:
+        logger.debug(f"[FILES] Starting embedding for {len(all_files)} files")
        history_parts.extend(
            [
                "=== FILES REFERENCED IN THIS CONVERSATION ===",
@@ -379,6 +396,7 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->

            for file_path in all_files:
                try:
+                    logger.debug(f"[FILES] Processing file {file_path}")
                    # Correctly unpack the tuple returned by read_file_content
                    formatted_content, content_tokens = read_file_content(file_path)
                    if formatted_content:
@@ -391,20 +409,24 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
                            logger.debug(
                                f"📄 File embedded in conversation history: {file_path} ({content_tokens:,} tokens)"
                            )
+                            logger.debug(f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})")
                        else:
                            files_truncated += 1
                            logger.debug(
                                f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)"
                            )
+                            logger.debug(f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)")
                            # Stop processing more files
                            break
                    else:
                        logger.debug(f"📄 File skipped (empty content): {file_path}")
+                        logger.debug(f"[FILES] File {file_path} has empty content - skipping")
                except Exception as e:
                    # Skip files that can't be read but log the failure
                    logger.warning(
                        f"📄 Failed to embed file in conversation history: {file_path} - {type(e).__name__}: {e}"
                    )
+                    logger.debug(f"[FILES] Failed to read file {file_path} - {type(e).__name__}: {e}")
                    continue

            if file_contents:
@@ -417,11 +439,13 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
                logger.debug(
                    f"📄 Conversation history file embedding complete: {files_included} files embedded, {files_truncated} truncated, {total_tokens:,} total tokens"
                )
+                logger.debug(f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total")
            else:
                history_parts.append("(No accessible files found)")
                logger.debug(
                    f"📄 Conversation history file embedding: no accessible files found from {len(all_files)} requested"
                )
+                logger.debug(f"[FILES] No accessible files found from {len(all_files)} requested files")
        else:
            # Fallback to original read_files function for backward compatibility
            files_content = read_files_func(all_files)
@@ -481,6 +505,11 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
    from utils.token_utils import estimate_tokens

    total_conversation_tokens = estimate_tokens(complete_history)
+    
+    # Summary log of what was built
+    user_turns = len([t for t in context.turns if t.role == "user"])
+    assistant_turns = len([t for t in context.turns if t.role == "assistant"])
+    logger.debug(f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens")

    return complete_history, total_conversation_tokens

--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -422,11 +422,14 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
        Tuple of (formatted_content, estimated_tokens)
        Content is wrapped with clear delimiters for AI parsing
    """
+    logger.debug(f"[FILES] read_file_content called for: {file_path}")
    try:
        # Validate path security before any file operations
        path = resolve_and_validate_path(file_path)
+        logger.debug(f"[FILES] Path validated and resolved: {path}")
    except (ValueError, PermissionError) as e:
        # Return error in a format that provides context to the AI
+        logger.debug(f"[FILES] Path validation failed for {file_path}: {type(e).__name__}: {e}")
        error_msg = str(e)
        # Add Docker-specific help if we're in Docker and path is inaccessible
        if WORKSPACE_ROOT and CONTAINER_WORKSPACE.exists():
@@ -438,28 +441,37 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
                f"To access files in a different directory, please run Claude from that directory."
            )
        content = f"\n--- ERROR ACCESSING FILE: {file_path} ---\nError: {error_msg}\n--- END FILE ---\n"
-        return content, estimate_tokens(content)
+        tokens = estimate_tokens(content)
+        logger.debug(f"[FILES] Returning error content for {file_path}: {tokens} tokens")
+        return content, tokens

    try:
        # Validate file existence and type
        if not path.exists():
+            logger.debug(f"[FILES] File does not exist: {file_path}")
            content = f"\n--- FILE NOT FOUND: {file_path} ---\nError: File does not exist\n--- END FILE ---\n"
            return content, estimate_tokens(content)

        if not path.is_file():
+            logger.debug(f"[FILES] Path is not a file: {file_path}")
            content = f"\n--- NOT A FILE: {file_path} ---\nError: Path is not a file\n--- END FILE ---\n"
            return content, estimate_tokens(content)

        # Check file size to prevent memory exhaustion
        file_size = path.stat().st_size
+        logger.debug(f"[FILES] File size for {file_path}: {file_size:,} bytes")
        if file_size > max_size:
+            logger.debug(f"[FILES] File too large: {file_path} ({file_size:,} > {max_size:,} bytes)")
            content = f"\n--- FILE TOO LARGE: {file_path} ---\nFile size: {file_size:,} bytes (max: {max_size:,})\n--- END FILE ---\n"
            return content, estimate_tokens(content)

        # Read the file with UTF-8 encoding, replacing invalid characters
        # This ensures we can handle files with mixed encodings
+        logger.debug(f"[FILES] Reading file content for {file_path}")
        with open(path, encoding="utf-8", errors="replace") as f:
            file_content = f.read()
+        
+        logger.debug(f"[FILES] Successfully read {len(file_content)} characters from {file_path}")

        # Format with clear delimiters that help the AI understand file boundaries
        # Using consistent markers makes it easier for the model to parse
@@ -467,11 +479,16 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
        # ("--- BEGIN DIFF: ... ---") to allow AI to distinguish between complete file content
        # vs. partial diff content when files appear in both sections
        formatted = f"\n--- BEGIN FILE: {file_path} ---\n{file_content}\n--- END FILE: {file_path} ---\n"
-        return formatted, estimate_tokens(formatted)
+        tokens = estimate_tokens(formatted)
+        logger.debug(f"[FILES] Formatted content for {file_path}: {len(formatted)} chars, {tokens} tokens")
+        return formatted, tokens

    except Exception as e:
+        logger.debug(f"[FILES] Exception reading file {file_path}: {type(e).__name__}: {e}")
        content = f"\n--- ERROR READING FILE: {file_path} ---\nError: {str(e)}\n--- END FILE ---\n"
-        return content, estimate_tokens(content)
+        tokens = estimate_tokens(content)
+        logger.debug(f"[FILES] Returning error content for {file_path}: {tokens} tokens")
+        return content, tokens


 def read_files(
@@ -500,6 +517,9 @@ def read_files(
    if max_tokens is None:
        max_tokens = MAX_CONTEXT_TOKENS

+    logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
+    logger.debug(f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}")
+
    content_parts = []
    total_tokens = 0
    available_tokens = max_tokens - reserve_tokens
@@ -520,31 +540,40 @@ def read_files(
    # Priority 2: Process file paths
    if file_paths:
        # Expand directories to get all individual files
+        logger.debug(f"[FILES] Expanding {len(file_paths)} file paths")
        all_files = expand_paths(file_paths)
+        logger.debug(f"[FILES] After expansion: {len(all_files)} individual files")

        if not all_files and file_paths:
            # No files found but paths were provided
+            logger.debug(f"[FILES] No files found from provided paths")
            content_parts.append(f"\n--- NO FILES FOUND ---\nProvided paths: {', '.join(file_paths)}\n--- END ---\n")
        else:
            # Read files sequentially until token limit is reached
-            for file_path in all_files:
+            logger.debug(f"[FILES] Reading {len(all_files)} files with token budget {available_tokens:,}")
+            for i, file_path in enumerate(all_files):
                if total_tokens >= available_tokens:
-                    files_skipped.append(file_path)
-                    continue
+                    logger.debug(f"[FILES] Token budget exhausted, skipping remaining {len(all_files) - i} files")
+                    files_skipped.extend(all_files[i:])
+                    break

                file_content, file_tokens = read_file_content(file_path)
+                logger.debug(f"[FILES] File {file_path}: {file_tokens:,} tokens")

                # Check if adding this file would exceed limit
                if total_tokens + file_tokens <= available_tokens:
                    content_parts.append(file_content)
                    total_tokens += file_tokens
+                    logger.debug(f"[FILES] Added file {file_path}, total tokens: {total_tokens:,}")
                else:
                    # File too large for remaining budget
+                    logger.debug(f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)")
                    files_skipped.append(file_path)

    # Add informative note about skipped files to help users understand
    # what was omitted and why
    if files_skipped:
+        logger.debug(f"[FILES] {len(files_skipped)} files skipped due to token limits")
        skip_note = "\n\n--- SKIPPED FILES (TOKEN LIMIT) ---\n"
        skip_note += f"Total skipped: {len(files_skipped)}\n"
        # Show first 10 skipped files as examples
@@ -555,4 +584,6 @@ def read_files(
        skip_note += "--- END SKIPPED FILES ---\n"
        content_parts.append(skip_note)

-    return "\n\n".join(content_parts) if content_parts else ""
+    result = "\n\n".join(content_parts) if content_parts else ""
+    logger.debug(f"[FILES] read_files complete: {len(result)} chars, {total_tokens:,} tokens used")
+    return result