Performance improvements when embedding files:

- Exit early at MCP boundary if files won't fit within given context of chosen model - Encourage claude to re-run with better context - Check file sizes before embedding - Drop files from older conversations when building continuations and give priority to newer files - List and mention excluded files to Claude on return - Improved tests - Improved precommit prompt - Added a new Low severity to precommit - Improved documentation of file embedding strategy - Refactor
2025-06-16 05:51:52 +04:00
parent 56333cbd86
commit 91077e3810
16 changed files with 1557 additions and 308 deletions
--- a/log_monitor.py
+++ b/log_monitor.py
@@ -1,185 +1,178 @@
 #!/usr/bin/env python3
 """
 Log monitor for MCP server - monitors and displays tool activity
+
+This module provides a simplified log monitoring interface using the
+centralized LogTailer class from utils.file_utils.
 """

-import os
-import time
 from datetime import datetime
-from pathlib import Path
+
+from utils.file_utils import LogTailer
+
+
+def create_line_handler(log_name: str, filter_func=None, format_func=None):
+    """
+    Create a line handler function for log monitoring.
+
+    Args:
+        log_name: Name of the log for display purposes
+        filter_func: Optional function to filter lines (return True to include)
+        format_func: Optional function to format lines for display
+
+    Returns:
+        Function that handles log lines
+    """
+
+    def handle_line(line: str):
+        if filter_func and not filter_func(line):
+            return
+
+        timestamp = datetime.now().strftime("%H:%M:%S")
+
+        if format_func:
+            formatted_line = format_func(line)
+        else:
+            formatted_line = line
+
+        print(f"[{timestamp}] {formatted_line}")
+
+    return handle_line


 def monitor_mcp_activity():
-    """Monitor MCP server activity by watching the log file"""
-    log_file = "/tmp/mcp_server.log"
-    activity_file = "/tmp/mcp_activity.log"
-    debug_file = "/tmp/gemini_debug.log"
-    overflow_file = "/tmp/mcp_server_overflow.log"
+    """Monitor MCP server activity by watching multiple log files"""
+    log_files = {
+        "/tmp/mcp_server.log": "main",
+        "/tmp/mcp_activity.log": "activity",
+        "/tmp/gemini_debug.log": "debug",
+        "/tmp/mcp_server_overflow.log": "overflow",
+    }

    print(f"[{datetime.now().strftime('%H:%M:%S')}] MCP Log Monitor started")
-    print(f"[{datetime.now().strftime('%H:%M:%S')}] Monitoring: {log_file}")
-    print(f"[{datetime.now().strftime('%H:%M:%S')}] Activity file: {activity_file}")
-    print(f"[{datetime.now().strftime('%H:%M:%S')}] Debug file: {debug_file}")
-    print(f"[{datetime.now().strftime('%H:%M:%S')}] Overflow file: {overflow_file}")
+    for file_path, name in log_files.items():
+        print(f"[{datetime.now().strftime('%H:%M:%S')}] Monitoring {name}: {file_path}")
    print(f"[{datetime.now().strftime('%H:%M:%S')}] Note: Logs rotate daily at midnight, keeping 7 days of history")
    print("-" * 60)

-    # Track file positions and sizes for rotation detection
-    log_pos = 0
-    activity_pos = 0
-    debug_pos = 0
-    overflow_pos = 0
+    # Create tailers for each log file
+    tailers = {}

-    # Track file sizes to detect rotation
-    log_size = 0
-    activity_size = 0
-    debug_size = 0
-    overflow_size = 0
+    # Activity log - most important for tool calls
+    def activity_filter(line: str) -> bool:
+        return any(
+            keyword in line
+            for keyword in [
+                "TOOL_CALL:",
+                "TOOL_COMPLETED:",
+                "CONVERSATION_RESUME:",
+                "CONVERSATION_CONTEXT:",
+                "CONVERSATION_ERROR:",
+            ]
+        )

-    # Ensure files exist
-    Path(log_file).touch()
-    Path(activity_file).touch()
-    Path(debug_file).touch()
-    Path(overflow_file).touch()
+    def activity_formatter(line: str) -> str:
+        if "TOOL_CALL:" in line:
+            tool_info = line.split("TOOL_CALL:")[-1].strip()
+            return f"Tool called: {tool_info}"
+        elif "TOOL_COMPLETED:" in line:
+            tool_name = line.split("TOOL_COMPLETED:")[-1].strip()
+            return f"✓ Tool completed: {tool_name}"
+        elif "CONVERSATION_RESUME:" in line:
+            resume_info = line.split("CONVERSATION_RESUME:")[-1].strip()
+            return f"Resume: {resume_info}"
+        elif "CONVERSATION_CONTEXT:" in line:
+            context_info = line.split("CONVERSATION_CONTEXT:")[-1].strip()
+            return f"Context: {context_info}"
+        elif "CONVERSATION_ERROR:" in line:
+            error_info = line.split("CONVERSATION_ERROR:")[-1].strip()
+            return f"❌ Conversation error: {error_info}"
+        return line

-    # Initialize file sizes
-    if os.path.exists(log_file):
-        log_size = os.path.getsize(log_file)
-        log_pos = log_size  # Start from end to avoid old logs
-    if os.path.exists(activity_file):
-        activity_size = os.path.getsize(activity_file)
-        activity_pos = activity_size  # Start from end to avoid old logs
-    if os.path.exists(debug_file):
-        debug_size = os.path.getsize(debug_file)
-        debug_pos = debug_size  # Start from end to avoid old logs
-    if os.path.exists(overflow_file):
-        overflow_size = os.path.getsize(overflow_file)
-        overflow_pos = overflow_size  # Start from end to avoid old logs
+    tailers["activity"] = LogTailer("/tmp/mcp_activity.log")

-    while True:
-        try:
-            # Check activity file (most important for tool calls)
-            if os.path.exists(activity_file):
-                # Check for log rotation
-                current_activity_size = os.path.getsize(activity_file)
-                if current_activity_size < activity_size:
-                    # File was rotated - start from beginning
-                    activity_pos = 0
-                    activity_size = current_activity_size
-                    print(f"[{datetime.now().strftime('%H:%M:%S')}] Activity log rotated - restarting from beginning")
+    # Main log - errors and warnings
+    def main_filter(line: str) -> bool:
+        return any(keyword in line for keyword in ["ERROR", "WARNING", "DEBUG", "Gemini API"])

-                with open(activity_file) as f:
-                    f.seek(activity_pos)
-                    new_lines = f.readlines()
-                    activity_pos = f.tell()
-                    activity_size = current_activity_size
+    def main_formatter(line: str) -> str:
+        if "ERROR" in line:
+            return f"❌ {line}"
+        elif "WARNING" in line:
+            return f"⚠️  {line}"
+        elif "DEBUG" in line:
+            if "📄" in line or "📁" in line:
+                return f"📂 FILE: {line}"
+            else:
+                return f"🔍 {line}"
+        elif "Gemini API" in line and ("Sending" in line or "Received" in line):
+            return f"API: {line}"
+        elif "INFO" in line and any(keyword in line for keyword in ["Gemini API", "Tool", "Conversation"]):
+            return f"ℹ️  {line}"
+        return line

-                    for line in new_lines:
-                        line = line.strip()
-                        if line:
-                            if "TOOL_CALL:" in line:
-                                tool_info = line.split("TOOL_CALL:")[-1].strip()
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] Tool called: {tool_info}")
-                            elif "TOOL_COMPLETED:" in line:
-                                tool_name = line.split("TOOL_COMPLETED:")[-1].strip()
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] ✓ Tool completed: {tool_name}")
-                            elif "CONVERSATION_RESUME:" in line:
-                                resume_info = line.split("CONVERSATION_RESUME:")[-1].strip()
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] Resume: {resume_info}")
-                            elif "CONVERSATION_CONTEXT:" in line:
-                                context_info = line.split("CONVERSATION_CONTEXT:")[-1].strip()
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] Context: {context_info}")
-                            elif "CONVERSATION_ERROR:" in line:
-                                error_info = line.split("CONVERSATION_ERROR:")[-1].strip()
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] ❌ Conversation error: {error_info}")
+    tailers["main"] = LogTailer("/tmp/mcp_server.log")

-            # Check main log file for errors and warnings
-            if os.path.exists(log_file):
-                # Check for log rotation
-                current_log_size = os.path.getsize(log_file)
-                if current_log_size < log_size:
-                    # File was rotated - start from beginning
-                    log_pos = 0
-                    log_size = current_log_size
-                    print(f"[{datetime.now().strftime('%H:%M:%S')}] Main log rotated - restarting from beginning")
+    # Debug log
+    def debug_formatter(line: str) -> str:
+        return f"DEBUG: {line}"

-                with open(log_file) as f:
-                    f.seek(log_pos)
-                    new_lines = f.readlines()
-                    log_pos = f.tell()
-                    log_size = current_log_size
+    tailers["debug"] = LogTailer("/tmp/gemini_debug.log")

-                    for line in new_lines:
-                        line = line.strip()
-                        if line:
-                            if "ERROR" in line:
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] ❌ {line}")
-                            elif "WARNING" in line:
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] ⚠️  {line}")
-                            elif "DEBUG" in line:
-                                # Highlight file embedding debug logs
-                                if "📄" in line or "📁" in line:
-                                    print(f"[{datetime.now().strftime('%H:%M:%S')}] 📂 FILE: {line}")
-                                else:
-                                    print(f"[{datetime.now().strftime('%H:%M:%S')}] 🔍 {line}")
-                            elif "INFO" in line and ("Gemini API" in line or "Tool" in line or "Conversation" in line):
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] ℹ️  {line}")
-                            elif "Gemini API" in line and ("Sending" in line or "Received" in line):
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] API: {line}")
+    # Overflow log
+    def overflow_filter(line: str) -> bool:
+        return "ERROR" in line or "WARNING" in line

-            # Check debug file
-            if os.path.exists(debug_file):
-                # Check for log rotation
-                current_debug_size = os.path.getsize(debug_file)
-                if current_debug_size < debug_size:
-                    # File was rotated - start from beginning
-                    debug_pos = 0
-                    debug_size = current_debug_size
-                    print(f"[{datetime.now().strftime('%H:%M:%S')}] Debug log rotated - restarting from beginning")
+    def overflow_formatter(line: str) -> str:
+        if "ERROR" in line:
+            return f"🚨 OVERFLOW: {line}"
+        elif "WARNING" in line:
+            return f"⚠️  OVERFLOW: {line}"
+        return line

-                with open(debug_file) as f:
-                    f.seek(debug_pos)
-                    new_lines = f.readlines()
-                    debug_pos = f.tell()
-                    debug_size = current_debug_size
+    tailers["overflow"] = LogTailer("/tmp/mcp_server_overflow.log")

-                    for line in new_lines:
-                        line = line.strip()
-                        if line:
-                            print(f"[{datetime.now().strftime('%H:%M:%S')}] DEBUG: {line}")
+    # Monitor all files in a simple loop
+    try:
+        while True:
+            # Check activity log
+            activity_lines = tailers["activity"].read_new_lines()
+            for line in activity_lines:
+                if activity_filter(line):
+                    timestamp = datetime.now().strftime("%H:%M:%S")
+                    formatted = activity_formatter(line)
+                    print(f"[{timestamp}] {formatted}")

-            # Check overflow file for warnings/errors when main log gets too large
-            if os.path.exists(overflow_file):
-                # Check for log rotation
-                current_overflow_size = os.path.getsize(overflow_file)
-                if current_overflow_size < overflow_size:
-                    # File was rotated - start from beginning
-                    overflow_pos = 0
-                    overflow_size = current_overflow_size
-                    print(f"[{datetime.now().strftime('%H:%M:%S')}] Overflow log rotated - restarting from beginning")
+            # Check main log
+            main_lines = tailers["main"].read_new_lines()
+            for line in main_lines:
+                if main_filter(line):
+                    timestamp = datetime.now().strftime("%H:%M:%S")
+                    formatted = main_formatter(line)
+                    print(f"[{timestamp}] {formatted}")

-                with open(overflow_file) as f:
-                    f.seek(overflow_pos)
-                    new_lines = f.readlines()
-                    overflow_pos = f.tell()
-                    overflow_size = current_overflow_size
+            # Check debug log
+            debug_lines = tailers["debug"].read_new_lines()
+            for line in debug_lines:
+                timestamp = datetime.now().strftime("%H:%M:%S")
+                formatted = debug_formatter(line)
+                print(f"[{timestamp}] {formatted}")

-                    for line in new_lines:
-                        line = line.strip()
-                        if line:
-                            if "ERROR" in line:
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] 🚨 OVERFLOW: {line}")
-                            elif "WARNING" in line:
-                                print(f"[{datetime.now().strftime('%H:%M:%S')}] ⚠️  OVERFLOW: {line}")
+            # Check overflow log
+            overflow_lines = tailers["overflow"].read_new_lines()
+            for line in overflow_lines:
+                if overflow_filter(line):
+                    timestamp = datetime.now().strftime("%H:%M:%S")
+                    formatted = overflow_formatter(line)
+                    print(f"[{timestamp}] {formatted}")

-            time.sleep(0.5)  # Check every 500ms
+            # Wait before next check
+            import time

-        except KeyboardInterrupt:
-            print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Log monitor stopped")
-            break
-        except Exception as e:
-            print(f"[{datetime.now().strftime('%H:%M:%S')}] Monitor error: {e}")
-            time.sleep(1)
+            time.sleep(0.5)
+
+    except KeyboardInterrupt:
+        print(f"\n[{datetime.now().strftime('%H:%M:%S')}] Log monitor stopped")


 if __name__ == "__main__":
--- a/providers/openrouter_registry.py
+++ b/providers/openrouter_registry.py
@@ -1,13 +1,12 @@
 """OpenRouter model registry for managing model configurations and aliases."""

-import json
 import logging
 import os
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional

-from utils.file_utils import translate_path_for_environment
+from utils.file_utils import read_json_file, translate_path_for_environment

 from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint

@@ -130,8 +129,10 @@ class OpenRouterModelRegistry:
            return []

        try:
-            with open(self.config_path) as f:
-                data = json.load(f)
+            # Use centralized JSON reading utility
+            data = read_json_file(str(self.config_path))
+            if data is None:
+                raise ValueError(f"Could not read or parse JSON from {self.config_path}")

            # Parse models
            configs = []
@@ -140,8 +141,9 @@ class OpenRouterModelRegistry:
                configs.append(config)

            return configs
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Invalid JSON in {self.config_path}: {e}")
+        except ValueError:
+            # Re-raise ValueError for specific config errors
+            raise
        except Exception as e:
            raise ValueError(f"Error reading config from {self.config_path}: {e}")

--- a/systemprompts/precommit_prompt.py
+++ b/systemprompts/precommit_prompt.py
@@ -27,25 +27,25 @@ INPUTS PROVIDED
 3. File names and related code

 SCOPE & FOCUS
-• Review **only** the changes in the diff and the given code
+• Review ONLY the changes in the diff and the given code
 • From the diff, infer what got changed and why, determine if the changes make logical sense
 • Ensure they correctly implement the request, are secure (where applicable), efficient, and maintainable and do not
 cause potential regressions
-• Do **not** propose broad refactors or off-scope improvements.
+• DO NOT propose broad refactors or off-scope improvements. Stick to the code and changes you have visibility into.

 REVIEW METHOD
 1. Identify tech stack, frameworks, and patterns present in the diff.
 2. Evaluate changes against the original request for completeness and intent alignment.
-3. Detect issues, prioritising by severity (**Critical → High → Medium → Low**).
-4. Highlight incomplete changes, or changes that would cause bugs, crashes or data loss or race conditions
+3. Detect issues, prioritising by severity (CRITICAL → HIGH → MEDIUM → LOW).
+4. Highlight incomplete changes, or changes that would cause bugs, regressions, crashes or data loss or race conditions
 5. Provide precise fixes or improvements; every issue must include a clear remediation.
 6. Acknowledge good patterns to reinforce best practice.

 CORE ANALYSIS (adapt to the diff and stack)
-• **Security** – injection risks, auth/authz flaws, sensitive-data exposure, insecure dependencies, memory safety
-• **Bugs & Logic Errors** – off-by-one, null refs, race conditions, incorrect branching
-• **Performance** – inefficient algorithms, resource leaks, blocking operations
-• **Code Quality** – DRY violations, complexity, SOLID adherence
+• Security – injection risks, auth/authz flaws, sensitive-data exposure, insecure dependencies, memory safety
+• Bugs & Logic Errors – off-by-one, null refs, race conditions, incorrect branching
+• Performance – inefficient algorithms, resource leaks, blocking operations
+• Code Quality – DRY violations, complexity, SOLID adherence

 ADDITIONAL ANALYSIS (apply only when relevant)
 • Language/runtime concerns – memory management, concurrency, exception handling
@@ -66,7 +66,9 @@ OUTPUT FORMAT
 - Files changed: X
 - Overall assessment: brief statement with critical issue count

-### Issues by Severity
+MANDATORY: You must ONLY respond in the following format. List issues by severity and include ONLY the severities
+that apply:
+
 [CRITICAL] Short title
 - File: path/to/file.py:line
 - Description: what & why
@@ -74,8 +76,13 @@ OUTPUT FORMAT

 [HIGH] ...

-### Recommendations
- Top priority fixes before commit
+[MEDIUM] ...
+
+[LOW] ...
+
+MAKE RECOMMENDATIONS:
+Make a final, short, clear, to the point statement or list in a brief bullet point:
+- Mention top priority fixes to be IMMEDIATELY made before commit
 - Notable positives to keep

 Be thorough yet actionable. Focus on the diff, map every issue to a concrete fix, and keep comments aligned
--- a/tests/test_auto_mode.py
+++ b/tests/test_auto_mode.py
@@ -171,7 +171,21 @@ class TestAutoMode:
                        # Return a mock provider for actually available models
                        from unittest.mock import MagicMock

-                        return MagicMock()
+                        from providers.base import ModelCapabilities
+
+                        mock_provider = MagicMock()
+                        # Set up proper capabilities to avoid MagicMock comparison errors
+                        from providers.base import ProviderType
+
+                        mock_capabilities = ModelCapabilities(
+                            provider=ProviderType.GOOGLE,
+                            model_name=model_name,
+                            friendly_name="Test Model",
+                            context_window=1048576,  # 1M tokens
+                            supports_function_calling=True,
+                        )
+                        mock_provider.get_capabilities.return_value = mock_capabilities
+                        return mock_provider
                    else:
                        # Other unknown models are not available
                        return None
--- a/tests/test_conversation_file_features.py
+++ b/tests/test_conversation_file_features.py
@@ -0,0 +1,541 @@
+"""
+Test suite for conversation memory file management features.
+
+This module tests the enhanced conversation memory system including:
+- File inclusion in conversation history
+- Token-aware file inclusion planning
+- Smart file size limiting for conversation history
+- Cross-tool file context preservation
+- MCP boundary vs conversation building separation
+"""
+
+import os
+from unittest.mock import patch
+
+from utils.conversation_memory import (
+    ConversationTurn,
+    ThreadContext,
+    _plan_file_inclusion_by_size,
+    build_conversation_history,
+    get_conversation_file_list,
+)
+
+
+class TestConversationFileList:
+    """Test file list extraction from conversation turns"""
+
+    def test_get_conversation_file_list_basic(self):
+        """Test that files are returned from conversation turns, newest first"""
+        turns = [
+            ConversationTurn(
+                role="user",
+                content="First turn (older)",
+                timestamp="2023-01-01T00:00:00Z",
+                files=["/project/file1.py", "/project/file2.py"],
+            ),
+            ConversationTurn(
+                role="assistant",
+                content="Second turn (newer)",
+                timestamp="2023-01-01T00:01:00Z",
+                files=["/project/file3.py"],
+            ),
+        ]
+
+        context = ThreadContext(
+            thread_id="test",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:01:00Z",
+            tool_name="test",
+            turns=turns,
+            initial_context={},
+        )
+
+        files = get_conversation_file_list(context)
+
+        # Should contain all unique files, with newest turn files first
+        assert len(files) == 3
+        assert files[0] == "/project/file3.py"  # From newest turn (turn 2)
+        assert "/project/file1.py" in files[1:]  # From older turn (turn 1)
+        assert "/project/file2.py" in files[1:]  # From older turn (turn 1)
+
+    def test_get_conversation_file_list_deduplication(self):
+        """Test that duplicate files are removed, prioritizing newer turns"""
+        turns = [
+            ConversationTurn(
+                role="user",
+                content="First mention (older)",
+                timestamp="2023-01-01T00:00:00Z",
+                files=["/project/file1.py", "/project/shared.py"],
+            ),
+            ConversationTurn(
+                role="assistant",
+                content="Duplicate mention (newer)",
+                timestamp="2023-01-01T00:01:00Z",
+                files=["/project/shared.py", "/project/file2.py"],  # shared.py is duplicate
+            ),
+        ]
+
+        context = ThreadContext(
+            thread_id="test",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:01:00Z",
+            tool_name="test",
+            turns=turns,
+            initial_context={},
+        )
+
+        files = get_conversation_file_list(context)
+
+        # Should have unique files only, with newer turn files first
+        assert len(files) == 3
+        # Files from turn 2 (newer) should come first
+        assert files[0] == "/project/shared.py"  # From newer turn (turn 2)
+        assert files[1] == "/project/file2.py"  # From newer turn (turn 2)
+        # Files from turn 1 (older) that aren't duplicates
+        assert files[2] == "/project/file1.py"  # From older turn (turn 1)
+
+
+class TestFileInclusionPlanning:
+    """Test token-aware file inclusion planning for conversation history"""
+
+    def test_plan_file_inclusion_within_budget(self, project_path):
+        """Test file inclusion when all files fit within token budget"""
+        # Create small test files
+        small_file1 = os.path.join(project_path, "small1.py")
+        small_file2 = os.path.join(project_path, "small2.py")
+
+        with open(small_file1, "w") as f:
+            f.write("# Small file 1\nprint('hello')\n")  # ~30 chars
+        with open(small_file2, "w") as f:
+            f.write("# Small file 2\nprint('world')\n")  # ~30 chars
+
+        all_files = [small_file1, small_file2]
+        max_tokens = 1000  # Generous budget
+
+        included, skipped, total_tokens = _plan_file_inclusion_by_size(all_files, max_tokens)
+
+        assert included == all_files
+        assert skipped == []
+        assert total_tokens > 0  # Should have estimated some tokens
+
+    def test_plan_file_inclusion_exceeds_budget(self, project_path):
+        """Test file inclusion when files exceed token budget"""
+        # Create files with different sizes
+        small_file = os.path.join(project_path, "small.py")
+        large_file = os.path.join(project_path, "large.py")
+
+        with open(small_file, "w") as f:
+            f.write("# Small file\nprint('hello')\n")  # ~25 chars
+        with open(large_file, "w") as f:
+            f.write("# Large file\n" + "x = 1\n" * 1000)  # Much larger
+
+        all_files = [small_file, large_file]
+        max_tokens = 50  # Very tight budget
+
+        included, skipped, total_tokens = _plan_file_inclusion_by_size(all_files, max_tokens)
+
+        # Should include some files, skip others when budget is tight
+        assert len(included) + len(skipped) == 2
+        assert total_tokens <= max_tokens
+
+    def test_plan_file_inclusion_empty_list(self):
+        """Test file inclusion planning with empty file list"""
+        included, skipped, total_tokens = _plan_file_inclusion_by_size([], 1000)
+
+        assert included == []
+        assert skipped == []
+        assert total_tokens == 0
+
+    def test_plan_file_inclusion_nonexistent_files(self):
+        """Test file inclusion planning with non-existent files"""
+        nonexistent_files = ["/does/not/exist1.py", "/does/not/exist2.py"]
+
+        included, skipped, total_tokens = _plan_file_inclusion_by_size(nonexistent_files, 1000)
+
+        assert included == []
+        assert skipped == nonexistent_files
+        assert total_tokens == 0
+
+
+class TestConversationHistoryBuilding:
+    """Test conversation history building with file content embedding"""
+
+    @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
+    def test_build_conversation_history_with_file_content(self, project_path):
+        """Test that conversation history includes embedded file content"""
+        from providers.registry import ModelProviderRegistry
+
+        ModelProviderRegistry.clear_cache()
+
+        # Create test file with known content
+        test_file = os.path.join(project_path, "test.py")
+        test_content = "# Test file\ndef hello():\n    print('Hello, world!')\n"
+        with open(test_file, "w") as f:
+            f.write(test_content)
+
+        # Create conversation with file reference
+        turns = [
+            ConversationTurn(
+                role="user",
+                content="Please analyze this file",
+                timestamp="2023-01-01T00:00:00Z",
+                files=[test_file],
+            )
+        ]
+
+        context = ThreadContext(
+            thread_id="test-thread",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:00:00Z",
+            tool_name="analyze",
+            turns=turns,
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        # Verify structure
+        assert "=== CONVERSATION HISTORY (CONTINUATION) ===" in history
+        assert "=== FILES REFERENCED IN THIS CONVERSATION ===" in history
+        assert "--- Turn 1 (Claude) ---" in history
+
+        # Verify file content is embedded
+        assert "--- BEGIN FILE:" in history
+        assert test_file in history
+        assert test_content in history
+        assert "--- END FILE:" in history
+
+        # Verify turn content
+        assert "Please analyze this file" in history
+        assert f"Files used in this turn: {test_file}" in history
+
+    @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
+    def test_build_conversation_history_file_deduplication(self, project_path):
+        """Test that files are embedded only once even if referenced multiple times"""
+        from providers.registry import ModelProviderRegistry
+
+        ModelProviderRegistry.clear_cache()
+
+        test_file = os.path.join(project_path, "shared.py")
+        with open(test_file, "w") as f:
+            f.write("# Shared file\nshared_var = 42\n")
+
+        # Multiple turns referencing the same file
+        turns = [
+            ConversationTurn(
+                role="user",
+                content="First look at this file",
+                timestamp="2023-01-01T00:00:00Z",
+                files=[test_file],
+            ),
+            ConversationTurn(
+                role="assistant",
+                content="Analysis complete",
+                timestamp="2023-01-01T00:01:00Z",
+                files=[test_file],  # Same file referenced again
+            ),
+        ]
+
+        context = ThreadContext(
+            thread_id="test-thread",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:01:00Z",
+            tool_name="analyze",
+            turns=turns,
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        # File should appear in embedded section only once
+        file_begin_count = history.count("--- BEGIN FILE:")
+        file_end_count = history.count("--- END FILE:")
+        assert file_begin_count == 1, "File should be embedded exactly once"
+        assert file_end_count == 1, "File should be embedded exactly once"
+
+        # But should show in both turn references
+        turn_file_refs = history.count(f"Files used in this turn: {test_file}")
+        assert turn_file_refs == 2, "Both turns should show file usage"
+
+    def test_build_conversation_history_empty_turns(self):
+        """Test conversation history building with no turns"""
+        context = ThreadContext(
+            thread_id="empty-thread",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:00:00Z",
+            tool_name="test",
+            turns=[],
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        assert history == ""
+        assert tokens == 0
+
+
+class TestCrossToolFileContext:
+    """Test cross-tool file context preservation in conversations"""
+
+    @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
+    def test_cross_tool_file_context_preservation(self, project_path):
+        """Test that file context is preserved across different tools"""
+        from providers.registry import ModelProviderRegistry
+
+        ModelProviderRegistry.clear_cache()
+
+        src_file = os.path.join(project_path, "src.py")
+        test_file = os.path.join(project_path, "test.py")
+
+        with open(src_file, "w") as f:
+            f.write("def main():\n    return 'hello'\n")
+        with open(test_file, "w") as f:
+            f.write("import src\nassert src.main() == 'hello'\n")
+
+        # Simulate cross-tool conversation with chronological timestamps
+        turns = [
+            ConversationTurn(
+                role="assistant",
+                content="I've analyzed the source code structure",
+                timestamp="2023-01-01T00:00:00Z",  # First turn
+                files=[src_file],
+                tool_name="analyze",
+            ),
+            ConversationTurn(
+                role="user",
+                content="Now generate tests for it",
+                timestamp="2023-01-01T00:01:00Z",  # Second turn (1 minute later)
+                files=[test_file],
+            ),
+            ConversationTurn(
+                role="assistant",
+                content="I've generated comprehensive tests",
+                timestamp="2023-01-01T00:02:00Z",  # Third turn (2 minutes later)
+                files=[src_file, test_file],  # References both files
+                tool_name="testgen",
+            ),
+        ]
+
+        context = ThreadContext(
+            thread_id="cross-tool-thread",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:02:00Z",
+            tool_name="testgen",
+            turns=turns,
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        # Verify cross-tool context
+        assert "--- Turn 1 (Gemini using analyze) ---" in history
+        assert "--- Turn 2 (Claude) ---" in history
+        assert "--- Turn 3 (Gemini using testgen) ---" in history
+
+        # Verify file context preservation
+        assert "Files used in this turn: " + src_file in history
+        assert "Files used in this turn: " + test_file in history
+        assert f"Files used in this turn: {src_file}, {test_file}" in history
+
+        # Verify both files are embedded
+        files_section_start = history.find("=== FILES REFERENCED IN THIS CONVERSATION ===")
+        first_file_pos = history.find(src_file, files_section_start)
+        second_file_pos = history.find(test_file, files_section_start)
+
+        assert first_file_pos > 0 and second_file_pos > 0
+
+
+class TestLargeConversations:
+    """Test behavior with large conversations, many files, and many turns"""
+
+    @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
+    def test_large_conversation_with_many_files(self, project_path):
+        """Test conversation with many files across multiple turns"""
+        from providers.registry import ModelProviderRegistry
+
+        ModelProviderRegistry.clear_cache()
+
+        # Create 20 test files
+        test_files = []
+        for i in range(20):
+            test_file = os.path.join(project_path, f"file{i:02d}.py")
+            with open(test_file, "w") as f:
+                f.write(f"# File {i}\nclass Module{i}:\n    def method(self):\n        return {i}\n")
+            test_files.append(test_file)
+
+        # Create 15 conversation turns with files spread across them
+        turns = []
+        for turn_num in range(15):
+            # Distribute files across turns (some turns have multiple files)
+            if turn_num < 10:
+                turn_files = test_files[turn_num * 2 : (turn_num + 1) * 2]  # 2 files per turn
+            else:
+                turn_files = []  # Some turns without files
+
+            turns.append(
+                ConversationTurn(
+                    role="user" if turn_num % 2 == 0 else "assistant",
+                    content=f"Turn {turn_num} content - working on modules",
+                    timestamp=f"2023-01-01T{turn_num:02d}:00:00Z",
+                    files=turn_files,
+                    tool_name="analyze" if turn_num % 3 == 0 else None,
+                )
+            )
+
+        context = ThreadContext(
+            thread_id="large-conversation",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T14:00:00Z",
+            tool_name="analyze",
+            turns=turns,
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        # Verify structure
+        assert "=== CONVERSATION HISTORY (CONTINUATION) ===" in history
+        assert "=== FILES REFERENCED IN THIS CONVERSATION ===" in history
+
+        # Should handle large conversation gracefully
+        assert len(history) > 1000  # Should have substantial content
+        assert tokens > 0
+
+        # Files from newer turns should be prioritized
+        file_list = get_conversation_file_list(context)
+        assert len(file_list) == 20  # All unique files
+
+        # Files from turn 9 (newest with files) should come first
+        newest_files = test_files[18:20]  # Files from turn 9
+        assert file_list[0] in newest_files
+        assert file_list[1] in newest_files
+
+
+class TestSmallAndNewConversations:
+    """Test behavior with small/new conversations and edge cases"""
+
+    def test_empty_conversation(self):
+        """Test completely empty conversation"""
+        context = ThreadContext(
+            thread_id="empty",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:00:00Z",
+            tool_name="test",
+            turns=[],
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        assert history == ""
+        assert tokens == 0
+
+    @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
+    def test_single_turn_conversation(self, project_path):
+        """Test conversation with just one turn"""
+        from providers.registry import ModelProviderRegistry
+
+        ModelProviderRegistry.clear_cache()
+
+        test_file = os.path.join(project_path, "single.py")
+        with open(test_file, "w") as f:
+            f.write("# Single file\ndef hello():\n    return 'world'\n")
+
+        turns = [
+            ConversationTurn(
+                role="user",
+                content="Quick question about this file",
+                timestamp="2023-01-01T00:00:00Z",
+                files=[test_file],
+            )
+        ]
+
+        context = ThreadContext(
+            thread_id="single-turn",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:00:00Z",
+            tool_name="chat",
+            turns=turns,
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        # Should work correctly for single turn
+        assert "=== CONVERSATION HISTORY (CONTINUATION) ===" in history
+        assert "=== FILES REFERENCED IN THIS CONVERSATION ===" in history
+        assert "--- Turn 1 (Claude) ---" in history
+        assert "Quick question about this file" in history
+        assert test_file in history
+        assert tokens > 0
+
+
+class TestFailureScenarios:
+    """Test failure scenarios and error handling"""
+
+    def test_file_list_with_missing_files(self):
+        """Test conversation with references to missing files"""
+        turns = [
+            ConversationTurn(
+                role="user",
+                content="Analyze these files",
+                timestamp="2023-01-01T00:00:00Z",
+                files=["/does/not/exist.py", "/also/missing.py"],
+            )
+        ]
+
+        context = ThreadContext(
+            thread_id="missing-files",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:00:00Z",
+            tool_name="analyze",
+            turns=turns,
+            initial_context={},
+        )
+
+        # Should handle missing files gracefully
+        files = get_conversation_file_list(context)
+        assert len(files) == 2  # Still returns file paths
+        assert "/does/not/exist.py" in files
+        assert "/also/missing.py" in files
+
+    @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
+    def test_conversation_with_unreadable_files(self, project_path):
+        """Test conversation history building with unreadable files"""
+        from providers.registry import ModelProviderRegistry
+
+        ModelProviderRegistry.clear_cache()
+
+        # Create a file that will be treated as missing
+        missing_file = os.path.join(project_path, "nonexistent.py")
+
+        # Create a readable file for comparison
+        test_file = os.path.join(project_path, "readable.py")
+        with open(test_file, "w") as f:
+            f.write("# Test file\ndef test(): pass\n")
+
+        turns = [
+            ConversationTurn(
+                role="user",
+                content="Analyze these files",
+                timestamp="2023-01-01T00:00:00Z",
+                files=[test_file, missing_file],
+            )
+        ]
+
+        context = ThreadContext(
+            thread_id="mixed-files",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:00:00Z",
+            tool_name="analyze",
+            turns=turns,
+            initial_context={},
+        )
+
+        history, tokens = build_conversation_history(context)
+
+        # Should handle gracefully - build history with accessible files
+        assert "=== CONVERSATION HISTORY (CONTINUATION) ===" in history
+        assert "--- Turn 1 (Claude) ---" in history
+        assert "Analyze these files" in history
+        assert tokens > 0
--- a/tests/test_conversation_memory.py
+++ b/tests/test_conversation_memory.py
@@ -139,12 +139,26 @@ class TestConversationMemory:
        assert success is False

    @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
-    def test_build_conversation_history(self):
+    def test_build_conversation_history(self, project_path):
        """Test building conversation history format with files and speaker identification"""
        from providers.registry import ModelProviderRegistry

        ModelProviderRegistry.clear_cache()

+        # Create real test files to test actual file embedding functionality
+        main_file = project_path / "main.py"
+        readme_file = project_path / "docs" / "readme.md"
+        examples_dir = project_path / "examples"
+        examples_file = examples_dir / "example.py"
+
+        # Create directories and files
+        readme_file.parent.mkdir(parents=True, exist_ok=True)
+        examples_dir.mkdir(parents=True, exist_ok=True)
+
+        main_file.write_text("def main():\n    print('Hello world')\n")
+        readme_file.write_text("# Project Documentation\nThis is a test project.\n")
+        examples_file.write_text("# Example code\nprint('Example')\n")
+
        test_uuid = "12345678-1234-1234-1234-123456789012"

        turns = [
@@ -152,13 +166,13 @@ class TestConversationMemory:
                role="user",
                content="What is Python?",
                timestamp="2023-01-01T00:00:00Z",
-                files=["/home/user/main.py", "/home/user/docs/readme.md"],
+                files=[str(main_file), str(readme_file)],
            ),
            ConversationTurn(
                role="assistant",
                content="Python is a programming language",
                timestamp="2023-01-01T00:01:00Z",
-                files=["/home/user/examples/"],
+                files=[str(examples_dir)],  # Directory will be expanded to files
                tool_name="chat",
            ),
        ]
@@ -194,8 +208,13 @@ class TestConversationMemory:
        assert "The following files have been shared and analyzed during our conversation." in history

        # Check that file context from previous turns is included (now shows files used per turn)
-        assert "Files used in this turn: /home/user/main.py, /home/user/docs/readme.md" in history
-        assert "Files used in this turn: /home/user/examples/" in history
+        assert f"Files used in this turn: {main_file}, {readme_file}" in history
+        assert f"Files used in this turn: {examples_dir}" in history
+
+        # Verify actual file content is embedded
+        assert "def main():" in history
+        assert "Hello world" in history
+        assert "Project Documentation" in history

    def test_build_conversation_history_empty(self):
        """Test building history with no turns"""
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -135,6 +135,14 @@ class AnalyzeTool(BaseTool):
        if updated_files is not None:
            request.files = updated_files

+        # MCP boundary check - STRICT REJECTION
+        if request.files:
+            file_size_check = self.check_total_file_size(request.files)
+            if file_size_check:
+                from tools.models import ToolOutput
+
+                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
+
        # Use centralized file processing logic
        continuation_id = getattr(request, "continuation_id", None)
        file_content, processed_files = self._prepare_file_content_for_prompt(request.files, continuation_id, "Files")
--- a/tools/base.py
+++ b/tools/base.py
@@ -936,6 +936,49 @@ When recommending searches, be specific about what information you need and why
            }
        return None

+    def estimate_tokens_smart(self, file_path: str) -> int:
+        """
+        Estimate tokens for a file using file-type aware ratios.
+
+        Args:
+            file_path: Path to the file
+
+        Returns:
+            int: Estimated token count
+        """
+        from utils.file_utils import estimate_file_tokens
+
+        return estimate_file_tokens(file_path)
+
+    def check_total_file_size(self, files: list[str]) -> Optional[dict[str, Any]]:
+        """
+        Check if total file sizes would exceed token threshold before embedding.
+
+        IMPORTANT: This performs STRICT REJECTION at MCP boundary.
+        No partial inclusion - either all files fit or request is rejected.
+        This forces Claude to make better file selection decisions.
+
+        Args:
+            files: List of file paths to check
+
+        Returns:
+            Dict with MCP_CODE_TOO_LARGE response if too large, None if acceptable
+        """
+        if not files:
+            return None
+
+        # Get current model name for context-aware thresholds
+        model_name = getattr(self, "_current_model_name", None)
+        if not model_name:
+            from config import DEFAULT_MODEL
+
+            model_name = DEFAULT_MODEL
+
+        # Use centralized file size checking with model context
+        from utils.file_utils import check_total_file_size as check_file_size_utility
+
+        return check_file_size_utility(files, model_name)
+
    def handle_prompt_file(self, files: Optional[list[str]]) -> tuple[Optional[str], Optional[list[str]]]:
        """
        Check for and handle prompt.txt in the files list.
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -178,6 +178,14 @@ class CodeReviewTool(BaseTool):
        if updated_files is not None:
            request.files = updated_files

+        # MCP boundary check - STRICT REJECTION
+        if request.files:
+            file_size_check = self.check_total_file_size(request.files)
+            if file_size_check:
+                from tools.models import ToolOutput
+
+                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
+
        # Check user input size at MCP transport boundary (before adding internal content)
        user_content = request.prompt
        size_check = self.check_prompt_size(user_content)
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -150,6 +150,14 @@ class DebugIssueTool(BaseTool):
        if updated_files is not None:
            request.files = updated_files

+        # MCP boundary check - STRICT REJECTION
+        if request.files:
+            file_size_check = self.check_total_file_size(request.files)
+            if file_size_check:
+                from tools.models import ToolOutput
+
+                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
+
        # Build context sections
        context_parts = [f"=== ISSUE DESCRIPTION ===\n{request.prompt}\n=== END DESCRIPTION ==="]

--- a/tools/models.py
+++ b/tools/models.py
@@ -43,6 +43,7 @@ class ToolOutput(BaseModel):
        "refactor_analysis_complete",
        "trace_complete",
        "resend_prompt",
+        "code_too_large",
        "continuation_available",
    ] = "success"
    content: Optional[str] = Field(None, description="The main content/response from the tool")
@@ -142,6 +143,15 @@ class RefactorAnalysisComplete(BaseModel):
    next_actions_for_claude: list[RefactorAction] = Field(..., description="Specific actions for Claude to implement")


+class CodeTooLargeRequest(BaseModel):
+    """Request to reduce file selection due to size constraints"""
+
+    status: Literal["code_too_large"] = "code_too_large"
+    content: str = Field(..., description="Message explaining the size constraint")
+    content_type: Literal["text"] = "text"
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
 class ResendPromptRequest(BaseModel):
    """Request to resend prompt via file due to size limits"""

@@ -284,6 +294,7 @@ SPECIAL_STATUS_MODELS = {
    "refactor_analysis_complete": RefactorAnalysisComplete,
    "trace_complete": TraceComplete,
    "resend_prompt": ResendPromptRequest,
+    "code_too_large": CodeTooLargeRequest,
 }


--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -36,7 +36,7 @@ class PrecommitRequest(ToolRequest):
    )
    prompt: Optional[str] = Field(
        None,
-        description="The original user request description for the changes. Provides critical context for the review. If original request is limited or not available, Claude MUST study the changes carefully, think deeply about the implementation intent, analyze patterns across all modifications, infer the logic and requirements from the code changes and provide a thorough starting point.",
+        description="The original user request description for the changes. Provides critical context for the review. If original request is limited or not available, you MUST study the changes carefully, think deeply about the implementation intent, analyze patterns across all modifications, infer the logic and requirements from the code changes and provide a thorough starting point.",
    )
    compare_to: Optional[str] = Field(
        None,
@@ -57,7 +57,7 @@ class PrecommitRequest(ToolRequest):
    review_type: Literal["full", "security", "performance", "quick"] = Field(
        "full", description="Type of review to perform on the changes."
    )
-    severity_filter: Literal["critical", "high", "medium", "all"] = Field(
+    severity_filter: Literal["critical", "high", "medium", "low", "all"] = Field(
        "all",
        description="Minimum severity level to report on the changes.",
    )
@@ -117,7 +117,7 @@ class Precommit(BaseTool):
                "model": self.get_model_field_schema(),
                "prompt": {
                    "type": "string",
-                    "description": "The original user request description for the changes. Provides critical context for the review. If original request is limited or not available, Claude MUST study the changes carefully, think deeply about the implementation intent, analyze patterns across all modifications, infer the logic and requirements from the code changes and provide a thorough starting point.",
+                    "description": "The original user request description for the changes. Provides critical context for the review. If original request is limited or not available, you MUST study the changes carefully, think deeply about the implementation intent, analyze patterns across all modifications, infer the logic and requirements from the code changes and provide a thorough starting point.",
                },
                "compare_to": {
                    "type": "string",
@@ -145,7 +145,7 @@ class Precommit(BaseTool):
                },
                "severity_filter": {
                    "type": "string",
-                    "enum": ["critical", "high", "medium", "all"],
+                    "enum": ["critical", "high", "medium", "low", "all"],
                    "default": "all",
                    "description": "Minimum severity level to report on the changes.",
                },
@@ -227,6 +227,14 @@ class Precommit(BaseTool):
        translated_path = translate_path_for_environment(request.path)
        translated_files = translate_file_paths(request.files)

+        # MCP boundary check - STRICT REJECTION (check original files before translation)
+        if request.files:
+            file_size_check = self.check_total_file_size(request.files)
+            if file_size_check:
+                from tools.models import ToolOutput
+
+                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
+
        # Check if the path translation resulted in an error path
        if translated_path.startswith("/inaccessible/"):
            raise ValueError(
@@ -540,4 +548,20 @@ class Precommit(BaseTool):

    def format_response(self, response: str, request: PrecommitRequest, model_info: Optional[dict] = None) -> str:
        """Format the response with commit guidance"""
-        return f"{response}\n\n---\n\n**Commit Status:** If no critical issues found, changes are ready for commit. Otherwise, address issues first and re-run review. Check with user before proceeding with any commit."
+        # Base response
+        formatted_response = response
+
+        # Add footer separator
+        formatted_response += "\n\n---\n\n"
+
+        # Add commit status instruction
+        formatted_response += (
+            "COMMIT STATUS: You MUST provide a clear summary of ALL issues found to the user. "
+            "If no critical or high severity issues found, changes are ready for commit. "
+            "If critical issues are found, you MUST fix them first and then run the precommit tool again "
+            "to validate the fixes before proceeding. "
+            "Medium to low severity issues should be addressed but may not block commit. "
+            "You MUST always CONFIRM with user and show them a CLEAR summary of ALL issues before proceeding with any commit."
+        )
+
+        return formatted_response
--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -143,6 +143,14 @@ class ThinkDeepTool(BaseTool):
        if updated_files is not None:
            request.files = updated_files

+        # MCP boundary check - STRICT REJECTION
+        if request.files:
+            file_size_check = self.check_total_file_size(request.files)
+            if file_size_check:
+                from tools.models import ToolOutput
+
+                raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**file_size_check).model_dump_json()}")
+
        # Build context parts
        context_parts = [f"=== CLAUDE'S CURRENT ANALYSIS ===\n{current_analysis}\n=== END ANALYSIS ==="]

--- a/utils/conversation_memory.py
+++ b/utils/conversation_memory.py
@@ -30,12 +30,33 @@ Key Features:
 - Turn-by-turn conversation history storage with tool attribution
 - Cross-tool continuation support - switch tools while preserving context
 - File context preservation - files shared in earlier turns remain accessible
- Automatic turn limiting (5 turns max) to prevent runaway conversations
+- NEWEST-FIRST FILE PRIORITIZATION - when the same file appears in multiple turns,
+  references from newer turns take precedence over older ones. This ensures the
+  most recent file context is preserved when token limits require exclusions.
+- Automatic turn limiting (20 turns max) to prevent runaway conversations
 - Context reconstruction for stateless request continuity
- Redis-based persistence with automatic expiration (1 hour TTL)
+- Redis-based persistence with automatic expiration (3 hour TTL)
 - Thread-safe operations for concurrent access
 - Graceful degradation when Redis is unavailable

+FILE PRIORITIZATION STRATEGY:
+The conversation memory system implements a sophisticated file prioritization algorithm
+that ensures newer file references always take precedence over older ones:
+
+1. When collecting files across conversation turns, the system walks BACKWARDS through
+   turns (newest to oldest) and builds a unique file list
+2. If the same file path appears in multiple turns, only the reference from the
+   NEWEST turn is kept in the final list
+3. This "newest-first" ordering is preserved throughout the entire pipeline:
+   - get_conversation_file_list() establishes the order
+   - build_conversation_history() maintains it during token budgeting
+   - When token limits are hit, OLDER files are excluded first
+4. This strategy works across conversation chains - files from newer turns in ANY
+   thread take precedence over files from older turns in ANY thread
+
+This approach ensures that when token limits force file exclusions, the most
+recently referenced and contextually relevant files are preserved.
+
 USAGE EXAMPLE:
 1. Tool A creates thread: create_thread("analyze", request_data) → returns UUID
 2. Tool A adds response: add_turn(UUID, "assistant", response, files=[...], tool_name="analyze")
@@ -262,11 +283,12 @@ def add_turn(
    model_metadata: Optional[dict[str, Any]] = None,
 ) -> bool:
    """
-    Add turn to existing thread
+    Add turn to existing thread with atomic file ordering.

    Appends a new conversation turn to an existing thread. This is the core
    function for building conversation history and enabling cross-tool
-    continuation. Each turn preserves the tool and model that generated it.
+    continuation. Each turn preserves the tool and model that generated it,
+    and tracks file reception order using atomic Redis counters.

    Args:
        thread_id: UUID of the conversation thread
@@ -289,7 +311,7 @@ def add_turn(
    Note:
        - Refreshes thread TTL to configured timeout on successful update
        - Turn limits prevent runaway conversations
-        - File references are preserved for cross-tool access
+        - File references are preserved for cross-tool access with atomic ordering
        - Model information enables cross-provider conversations
    """
    logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
@@ -374,77 +396,212 @@ def get_thread_chain(thread_id: str, max_depth: int = 20) -> list[ThreadContext]

 def get_conversation_file_list(context: ThreadContext) -> list[str]:
    """
-    Get all unique files referenced across all turns in a conversation.
+    Extract all unique files from conversation turns with newest-first prioritization.

-    This function extracts and deduplicates file references from all conversation
-    turns to enable efficient file embedding - files are read once and shared
-    across all turns rather than being embedded multiple times.
+    This function implements the core file prioritization logic used throughout the
+    conversation memory system. It walks backwards through conversation turns
+    (from newest to oldest) and collects unique file references, ensuring that
+    when the same file appears in multiple turns, the reference from the NEWEST
+    turn takes precedence.
+
+    PRIORITIZATION ALGORITHM:
+    1. Iterate through turns in REVERSE order (index len-1 down to 0)
+    2. For each turn, process files in the order they appear in turn.files
+    3. Add file to result list only if not already seen (newest reference wins)
+    4. Skip duplicate files that were already added from newer turns
+
+    This ensures that:
+    - Files from newer conversation turns appear first in the result
+    - When the same file is referenced multiple times, only the newest reference is kept
+    - The order reflects the most recent conversation context
+
+    Example:
+        Turn 1: files = ["main.py", "utils.py"]
+        Turn 2: files = ["test.py"]
+        Turn 3: files = ["main.py", "config.py"]  # main.py appears again
+
+        Result: ["main.py", "config.py", "test.py", "utils.py"]
+        (main.py from Turn 3 takes precedence over Turn 1)

    Args:
-        context: ThreadContext containing the complete conversation
+        context: ThreadContext containing all conversation turns to process

    Returns:
-        list[str]: Deduplicated list of file paths referenced in the conversation
+        list[str]: Unique file paths ordered by newest reference first.
+                   Empty list if no turns exist or no files are referenced.
+
+    Performance:
+        - Time Complexity: O(n*m) where n=turns, m=avg files per turn
+        - Space Complexity: O(f) where f=total unique files
+        - Uses set for O(1) duplicate detection
    """
    if not context.turns:
        logger.debug("[FILES] No turns found, returning empty file list")
        return []

-    # Collect all unique files from all turns, preserving order of first appearance
+    # Collect files by walking backwards (newest to oldest turns)
    seen_files = set()
-    unique_files = []
+    file_list = []

-    logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns")
+    logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns (newest first)")

-    for i, turn in enumerate(context.turns):
+    # Process turns in reverse order (newest first) - this is the CORE of newest-first prioritization
+    # By iterating from len-1 down to 0, we encounter newer turns before older turns
+    # When we find a duplicate file, we skip it because the newer version is already in our list
+    for i in range(len(context.turns) - 1, -1, -1):  # REVERSE: newest turn first
+        turn = context.turns[i]
        if turn.files:
            logger.debug(f"[FILES] Turn {i + 1} has {len(turn.files)} files: {turn.files}")
            for file_path in turn.files:
                if file_path not in seen_files:
+                    # First time seeing this file - add it (this is the NEWEST reference)
                    seen_files.add(file_path)
-                    unique_files.append(file_path)
-                    logger.debug(f"[FILES] Added new file: {file_path}")
+                    file_list.append(file_path)
+                    logger.debug(f"[FILES] Added new file: {file_path} (from turn {i + 1})")
                else:
-                    logger.debug(f"[FILES] Duplicate file skipped: {file_path}")
-        else:
-            logger.debug(f"[FILES] Turn {i + 1} has no files")
+                    # File already seen from a NEWER turn - skip this older reference
+                    logger.debug(f"[FILES] Skipping duplicate file: {file_path} (newer version already included)")

-    logger.debug(f"[FILES] Final unique file list ({len(unique_files)}): {unique_files}")
-    return unique_files
+    logger.debug(f"[FILES] Final file list ({len(file_list)}): {file_list}")
+    return file_list
+
+
+def _plan_file_inclusion_by_size(all_files: list[str], max_file_tokens: int) -> tuple[list[str], list[str], int]:
+    """
+    Plan which files to include based on size constraints.
+
+    This is ONLY used for conversation history building, not MCP boundary checks.
+
+    Args:
+        all_files: List of files to consider for inclusion
+        max_file_tokens: Maximum tokens available for file content
+
+    Returns:
+        Tuple of (files_to_include, files_to_skip, estimated_total_tokens)
+    """
+    if not all_files:
+        return [], [], 0
+
+    files_to_include = []
+    files_to_skip = []
+    total_tokens = 0
+
+    logger.debug(f"[FILES] Planning inclusion for {len(all_files)} files with budget {max_file_tokens:,} tokens")
+
+    for file_path in all_files:
+        try:
+            from utils.file_utils import estimate_file_tokens, translate_path_for_environment
+
+            translated_path = translate_path_for_environment(file_path)
+
+            if os.path.exists(translated_path) and os.path.isfile(translated_path):
+                # Use centralized token estimation for consistency
+                estimated_tokens = estimate_file_tokens(file_path)
+
+                if total_tokens + estimated_tokens <= max_file_tokens:
+                    files_to_include.append(file_path)
+                    total_tokens += estimated_tokens
+                    logger.debug(
+                        f"[FILES] Including {file_path} - {estimated_tokens:,} tokens (total: {total_tokens:,})"
+                    )
+                else:
+                    files_to_skip.append(file_path)
+                    logger.debug(
+                        f"[FILES] Skipping {file_path} - would exceed budget (needs {estimated_tokens:,} tokens)"
+                    )
+            else:
+                files_to_skip.append(file_path)
+                logger.debug(f"[FILES] Skipping {file_path} - file not accessible")
+
+        except Exception as e:
+            files_to_skip.append(file_path)
+            logger.debug(f"[FILES] Skipping {file_path} - error: {type(e).__name__}: {e}")
+
+    logger.debug(
+        f"[FILES] Inclusion plan: {len(files_to_include)} include, {len(files_to_skip)} skip, {total_tokens:,} tokens"
+    )
+    return files_to_include, files_to_skip, total_tokens


 def build_conversation_history(context: ThreadContext, model_context=None, read_files_func=None) -> tuple[str, int]:
    """
    Build formatted conversation history for tool prompts with embedded file contents.

-    Creates a formatted string representation of the conversation history that includes
-    full file contents from all referenced files. Files are embedded only ONCE at the
-    start, even if referenced in multiple turns, to prevent duplication and optimize
-    token usage.
+    Creates a comprehensive conversation history that includes both conversation turns and
+    file contents, with intelligent prioritization to maximize relevant context within
+    token limits. This function enables stateless tools to access complete conversation
+    context from previous interactions, including cross-tool continuations.

-    If the thread has a parent chain, this function traverses the entire chain to
-    include the complete conversation history.
+    FILE PRIORITIZATION BEHAVIOR:
+    Files from newer conversation turns are prioritized over files from older turns.
+    When the same file appears in multiple turns, the reference from the NEWEST turn
+    takes precedence. This ensures the most recent file context is preserved when
+    token limits require file exclusions.
+
+    CONVERSATION CHAIN HANDLING:
+    If the thread has a parent_thread_id, this function traverses the entire chain
+    to include complete conversation history across multiple linked threads. File
+    prioritization works across the entire chain, not just the current thread.
+
+    TOKEN MANAGEMENT:
+    - Uses model-specific token allocation (file_tokens + history_tokens)
+    - Files are embedded ONCE at the start to prevent duplication
+    - Conversation turns are processed newest-first but presented chronologically
+    - Stops adding turns when token budget would be exceeded
+    - Gracefully handles token limits with informative notes

    Args:
-        context: ThreadContext containing the complete conversation
-        model_context: ModelContext for token allocation (optional, uses DEFAULT_MODEL if not provided)
-        read_files_func: Optional function to read files (for testing)
+        context: ThreadContext containing the conversation to format
+        model_context: ModelContext for token allocation (optional, uses DEFAULT_MODEL fallback)
+        read_files_func: Optional function to read files (primarily for testing)

    Returns:
        tuple[str, int]: (formatted_conversation_history, total_tokens_used)
-        Returns ("", 0) if no conversation turns exist
+        Returns ("", 0) if no conversation turns exist in the context

-    Format:
-        - Header with thread metadata and turn count
-        - All referenced files embedded once with full contents
-        - Each turn shows: role, tool used, which files were used, content
-        - Clear delimiters for AI parsing
-        - Continuation instruction at end
+    Output Format:
+        === CONVERSATION HISTORY (CONTINUATION) ===
+        Thread: <thread_id>
+        Tool: <original_tool_name>
+        Turn <current>/<max_allowed>
+        You are continuing this conversation thread from where it left off.

-    Note:
-        This formatted history allows tools to "see" both conversation context AND
-        file contents from previous tools, enabling true cross-tool collaboration
-        while preventing duplicate file embeddings.
+        === FILES REFERENCED IN THIS CONVERSATION ===
+        The following files have been shared and analyzed during our conversation.
+        [NOTE: X files omitted due to size constraints]
+        Refer to these when analyzing the context and requests below:
+
+        <embedded_file_contents_with_line_numbers>
+
+        === END REFERENCED FILES ===
+
+        Previous conversation turns:
+
+        --- Turn 1 (Claude) ---
+        Files used in this turn: file1.py, file2.py
+
+        <turn_content>
+
+        --- Turn 2 (Gemini using analyze via google/gemini-2.5-flash) ---
+        Files used in this turn: file3.py
+
+        <turn_content>
+
+        === END CONVERSATION HISTORY ===
+
+        IMPORTANT: You are continuing an existing conversation thread...
+        This is turn X of the conversation - use the conversation history above...
+
+    Cross-Tool Collaboration:
+        This formatted history allows any tool to "see" both conversation context AND
+        file contents from previous tools, enabling seamless handoffs between analyze,
+        codereview, debug, chat, and other tools while maintaining complete context.
+
+    Performance Characteristics:
+        - O(n) file collection with newest-first prioritization
+        - Intelligent token budgeting prevents context window overflow
+        - Redis-based persistence with automatic TTL management
+        - Graceful degradation when files are inaccessible or too large
    """
    # Get the complete thread chain
    if context.parent_thread_id:
@@ -453,19 +610,25 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_

        # Collect all turns from all threads in chain
        all_turns = []
-        all_files_set = set()
        total_turns = 0

        for thread in chain:
            all_turns.extend(thread.turns)
            total_turns += len(thread.turns)

-            # Collect files from this thread
-            for turn in thread.turns:
-                if turn.files:
-                    all_files_set.update(turn.files)
-
-        all_files = list(all_files_set)
+        # Use centralized file collection logic for consistency across the entire chain
+        # This ensures files from newer turns across ALL threads take precedence
+        # over files from older turns, maintaining the newest-first prioritization
+        # even when threads are chained together
+        temp_context = ThreadContext(
+            thread_id="merged_chain",
+            created_at=context.created_at,
+            last_updated_at=context.last_updated_at,
+            tool_name=context.tool_name,
+            turns=all_turns,  # All turns from entire chain in chronological order
+            initial_context=context.initial_context,
+        )
+        all_files = get_conversation_file_list(temp_context)  # Applies newest-first logic to entire chain
        logger.debug(f"[THREAD] Built history from {len(chain)} threads with {total_turns} total turns")
    else:
        # Single thread, no parent chain
@@ -511,101 +674,91 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_
        "",
    ]

-    # Embed all files referenced in this conversation once at the start
+    # Embed files referenced in this conversation with size-aware selection
    if all_files:
        logger.debug(f"[FILES] Starting embedding for {len(all_files)} files")
-        history_parts.extend(
-            [
-                "=== FILES REFERENCED IN THIS CONVERSATION ===",
-                "The following files have been shared and analyzed during our conversation.",
-                "Refer to these when analyzing the context and requests below:",
-                "",
-            ]
-        )

-        if read_files_func is None:
-            from utils.file_utils import read_file_content
+        # Plan file inclusion based on size constraints
+        # CRITICAL: all_files is already ordered by newest-first prioritization from get_conversation_file_list()
+        # So when _plan_file_inclusion_by_size() hits token limits, it naturally excludes OLDER files first
+        # while preserving the most recent file references - exactly what we want!
+        files_to_include, files_to_skip, estimated_tokens = _plan_file_inclusion_by_size(all_files, max_file_tokens)

-            # Optimized: read files incrementally with token tracking
-            file_contents = []
-            total_tokens = 0
-            files_included = 0
-            files_truncated = 0
+        if files_to_skip:
+            logger.info(f"[FILES] Skipping {len(files_to_skip)} files due to size constraints: {files_to_skip}")

-            for file_path in all_files:
-                try:
-                    logger.debug(f"[FILES] Processing file {file_path}")
-                    # Correctly unpack the tuple returned by read_file_content
-                    formatted_content, content_tokens = read_file_content(file_path)
-                    if formatted_content:
-                        # read_file_content already returns formatted content, use it directly
-                        # Check if adding this file would exceed the limit
-                        if total_tokens + content_tokens <= max_file_tokens:
+        if files_to_include:
+            history_parts.extend(
+                [
+                    "=== FILES REFERENCED IN THIS CONVERSATION ===",
+                    "The following files have been shared and analyzed during our conversation.",
+                    (
+                        ""
+                        if not files_to_skip
+                        else f"[NOTE: {len(files_to_skip)} files omitted due to size constraints]"
+                    ),
+                    "Refer to these when analyzing the context and requests below:",
+                    "",
+                ]
+            )
+
+            if read_files_func is None:
+                from utils.file_utils import read_file_content
+
+                # Process files for embedding
+                file_contents = []
+                total_tokens = 0
+                files_included = 0
+
+                for file_path in files_to_include:
+                    try:
+                        logger.debug(f"[FILES] Processing file {file_path}")
+                        formatted_content, content_tokens = read_file_content(file_path)
+                        if formatted_content:
                            file_contents.append(formatted_content)
                            total_tokens += content_tokens
                            files_included += 1
                            logger.debug(
                                f"File embedded in conversation history: {file_path} ({content_tokens:,} tokens)"
                            )
-                            logger.debug(
-                                f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})"
-                            )
                        else:
-                            files_truncated += 1
-                            logger.debug(
-                                f"File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {max_file_tokens:,} limit)"
-                            )
-                            logger.debug(
-                                f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)"
-                            )
-                            # Stop processing more files
-                            break
-                    else:
-                        logger.debug(f"File skipped (empty content): {file_path}")
-                        logger.debug(f"[FILES] File {file_path} has empty content - skipping")
-                except Exception as e:
-                    # Skip files that can't be read but log the failure
-                    logger.warning(
-                        f"Failed to embed file in conversation history: {file_path} - {type(e).__name__}: {e}"
-                    )
-                    logger.debug(f"[FILES] Failed to read file {file_path} - {type(e).__name__}: {e}")
-                    continue
+                            logger.debug(f"File skipped (empty content): {file_path}")
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to embed file in conversation history: {file_path} - {type(e).__name__}: {e}"
+                        )
+                        continue

-            if file_contents:
-                files_content = "".join(file_contents)
-                if files_truncated > 0:
-                    files_content += (
-                        f"\n[NOTE: {files_truncated} additional file(s) were truncated due to token limit]\n"
-                    )
-                history_parts.append(files_content)
-                logger.debug(
-                    f"Conversation history file embedding complete: {files_included} files embedded, {files_truncated} truncated, {total_tokens:,} total tokens"
-                )
-                logger.debug(
-                    f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total"
-                )
-            else:
-                history_parts.append("(No accessible files found)")
-                logger.debug(
-                    f"Conversation history file embedding: no accessible files found from {len(all_files)} requested"
-                )
-                logger.debug(f"[FILES] No accessible files found from {len(all_files)} requested files")
-        else:
-            # Fallback to original read_files function for backward compatibility
-            files_content = read_files_func(all_files)
-            if files_content:
-                # Add token validation for the combined file content
-                from utils.token_utils import check_token_limit
-
-                within_limit, estimated_tokens = check_token_limit(files_content)
-                if within_limit:
+                if file_contents:
+                    files_content = "".join(file_contents)
+                    if files_to_skip:
+                        files_content += (
+                            f"\n[NOTE: {len(files_to_skip)} additional file(s) were omitted due to size constraints. "
+                            f"These were older files from earlier conversation turns.]\n"
+                        )
                    history_parts.append(files_content)
+                    logger.debug(
+                        f"Conversation history file embedding complete: {files_included} files embedded, {len(files_to_skip)} omitted, {total_tokens:,} total tokens"
+                    )
                else:
-                    # Handle token limit exceeded for conversation files
-                    error_message = f"ERROR: The total size of files referenced in this conversation has exceeded the context limit and cannot be displayed.\nEstimated tokens: {estimated_tokens}, but limit is {max_file_tokens}."
-                    history_parts.append(error_message)
+                    history_parts.append("(No accessible files found)")
+                    logger.debug(f"[FILES] No accessible files found from {len(files_to_include)} planned files")
            else:
-                history_parts.append("(No accessible files found)")
+                # Fallback to original read_files function for backward compatibility
+                files_content = read_files_func(all_files)
+                if files_content:
+                    # Add token validation for the combined file content
+                    from utils.token_utils import check_token_limit
+
+                    within_limit, estimated_tokens = check_token_limit(files_content)
+                    if within_limit:
+                        history_parts.append(files_content)
+                    else:
+                        # Handle token limit exceeded for conversation files
+                        error_message = f"ERROR: The total size of files referenced in this conversation has exceeded the context limit and cannot be displayed.\nEstimated tokens: {estimated_tokens}, but limit is {max_file_tokens}."
+                        history_parts.append(error_message)
+                else:
+                    history_parts.append("(No accessible files found)")

        history_parts.extend(
            [
--- a/utils/file_types.py
+++ b/utils/file_types.py
@@ -178,3 +178,65 @@ def is_binary_file(file_path: str) -> bool:
    from pathlib import Path

    return Path(file_path).suffix.lower() in BINARY_EXTENSIONS
+
+
+# File-type specific token-to-byte ratios for accurate token estimation
+# Based on empirical analysis of file compression characteristics and tokenization patterns
+TOKEN_ESTIMATION_RATIOS = {
+    # Programming languages
+    ".py": 3.5,  # Python - moderate verbosity
+    ".js": 3.2,  # JavaScript - compact syntax
+    ".ts": 3.3,  # TypeScript - type annotations add tokens
+    ".jsx": 3.1,  # React JSX - JSX tags are tokenized efficiently
+    ".tsx": 3.0,  # React TSX - combination of TypeScript + JSX
+    ".java": 3.6,  # Java - verbose syntax, long identifiers
+    ".cpp": 3.7,  # C++ - preprocessor directives, templates
+    ".c": 3.8,  # C - function definitions, struct declarations
+    ".go": 3.9,  # Go - explicit error handling, package names
+    ".rs": 3.5,  # Rust - similar to Python in verbosity
+    ".php": 3.3,  # PHP - mixed HTML/code, variable prefixes
+    ".rb": 3.6,  # Ruby - descriptive method names
+    ".swift": 3.4,  # Swift - modern syntax, type inference
+    ".kt": 3.5,  # Kotlin - similar to modern languages
+    ".scala": 3.2,  # Scala - functional programming, concise
+    # Scripts and configuration
+    ".sh": 4.1,  # Shell scripts - commands and paths
+    ".bat": 4.0,  # Batch files - similar to shell
+    ".ps1": 3.8,  # PowerShell - more structured than bash
+    ".sql": 3.8,  # SQL - keywords and table/column names
+    # Data and configuration formats
+    ".json": 2.5,  # JSON - lots of punctuation and quotes
+    ".yaml": 3.0,  # YAML - structured but readable
+    ".yml": 3.0,  # YAML (alternative extension)
+    ".xml": 2.8,  # XML - tags and attributes
+    ".toml": 3.2,  # TOML - similar to config files
+    # Documentation and text
+    ".md": 4.2,  # Markdown - natural language with formatting
+    ".txt": 4.0,  # Plain text - mostly natural language
+    ".rst": 4.1,  # reStructuredText - documentation format
+    # Web technologies
+    ".html": 2.9,  # HTML - tags and attributes
+    ".css": 3.4,  # CSS - properties and selectors
+    # Logs and data
+    ".log": 4.5,  # Log files - timestamps, messages, stack traces
+    ".csv": 3.1,  # CSV - data with delimiters
+    # Docker and infrastructure
+    ".dockerfile": 3.7,  # Dockerfile - commands and paths
+    ".tf": 3.5,  # Terraform - infrastructure as code
+}
+
+
+def get_token_estimation_ratio(file_path: str) -> float:
+    """
+    Get the token estimation ratio for a file based on its extension.
+
+    Args:
+        file_path: Path to the file
+
+    Returns:
+        Token-to-byte ratio for the file type (default: 3.5 for unknown types)
+    """
+    from pathlib import Path
+
+    extension = Path(file_path).suffix.lower()
+    return TOKEN_ESTIMATION_RATIOS.get(extension, 3.5)  # Conservative default
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -18,10 +18,12 @@ Security Model:
 - Symbolic links are resolved to ensure they stay within bounds
 """

+import json
 import logging
 import os
+import time
 from pathlib import Path
-from typing import Optional
+from typing import Callable, Optional

 from .file_types import BINARY_EXTENSIONS, CODE_EXTENSIONS, IMAGE_EXTENSIONS, TEXT_EXTENSIONS
 from .security_config import CONTAINER_WORKSPACE, EXCLUDED_DIRS, MCP_SIGNATURE_FILES, SECURITY_ROOT, WORKSPACE_ROOT
@@ -689,3 +691,349 @@ def read_files(
    result = "\n\n".join(content_parts) if content_parts else ""
    logger.debug(f"[FILES] read_files complete: {len(result)} chars, {total_tokens:,} tokens used")
    return result
+
+
+def estimate_file_tokens(file_path: str) -> int:
+    """
+    Estimate tokens for a file using file-type aware ratios.
+
+    Args:
+        file_path: Path to the file
+
+    Returns:
+        Estimated token count for the file
+    """
+    try:
+        translated_path = translate_path_for_environment(file_path)
+
+        if not os.path.exists(translated_path) or not os.path.isfile(translated_path):
+            return 0
+
+        file_size = os.path.getsize(translated_path)
+
+        # Get the appropriate ratio for this file type
+        from .file_types import get_token_estimation_ratio
+
+        ratio = get_token_estimation_ratio(file_path)
+
+        return int(file_size / ratio)
+    except Exception:
+        return 0
+
+
+def check_files_size_limit(files: list[str], max_tokens: int, threshold_percent: float = 1.0) -> tuple[bool, int, int]:
+    """
+    Check if a list of files would exceed token limits.
+
+    Args:
+        files: List of file paths to check
+        max_tokens: Maximum allowed tokens
+        threshold_percent: Percentage of max_tokens to use as threshold (0.0-1.0)
+
+    Returns:
+        Tuple of (within_limit, total_estimated_tokens, file_count)
+    """
+    if not files:
+        return True, 0, 0
+
+    total_estimated_tokens = 0
+    file_count = 0
+    threshold = int(max_tokens * threshold_percent)
+
+    for file_path in files:
+        try:
+            estimated_tokens = estimate_file_tokens(file_path)
+            total_estimated_tokens += estimated_tokens
+            if estimated_tokens > 0:  # Only count accessible files
+                file_count += 1
+        except Exception:
+            # Skip files that can't be accessed for size check
+            continue
+
+    within_limit = total_estimated_tokens <= threshold
+    return within_limit, total_estimated_tokens, file_count
+
+
+class LogTailer:
+    """
+    General-purpose log file tailer with rotation detection.
+
+    This class provides a reusable way to monitor log files for new content,
+    automatically handling log rotation and maintaining position tracking.
+    """
+
+    def __init__(self, file_path: str, initial_seek_end: bool = True):
+        """
+        Initialize log tailer for a specific file.
+
+        Args:
+            file_path: Path to the log file to monitor
+            initial_seek_end: If True, start monitoring from end of file
+        """
+        self.file_path = file_path
+        self.position = 0
+        self.last_size = 0
+        self.initial_seek_end = initial_seek_end
+
+        # Ensure file exists and initialize position
+        Path(self.file_path).touch()
+        if self.initial_seek_end and os.path.exists(self.file_path):
+            self.last_size = os.path.getsize(self.file_path)
+            self.position = self.last_size
+
+    def read_new_lines(self) -> list[str]:
+        """
+        Read new lines since last call, handling rotation.
+
+        Returns:
+            List of new lines from the file
+        """
+        if not os.path.exists(self.file_path):
+            return []
+
+        try:
+            current_size = os.path.getsize(self.file_path)
+
+            # Check for log rotation (file size decreased)
+            if current_size < self.last_size:
+                self.position = 0
+                self.last_size = current_size
+
+            with open(self.file_path, encoding="utf-8", errors="ignore") as f:
+                f.seek(self.position)
+                new_lines = f.readlines()
+                self.position = f.tell()
+                self.last_size = current_size
+
+                # Strip whitespace from each line
+                return [line.strip() for line in new_lines if line.strip()]
+
+        except OSError:
+            return []
+
+    def monitor_continuously(
+        self,
+        line_handler: Callable[[str], None],
+        check_interval: float = 0.5,
+        stop_condition: Optional[Callable[[], bool]] = None,
+    ):
+        """
+        Monitor file continuously and call handler for each new line.
+
+        Args:
+            line_handler: Function to call for each new line
+            check_interval: Seconds between file checks
+            stop_condition: Optional function that returns True to stop monitoring
+        """
+        while True:
+            try:
+                if stop_condition and stop_condition():
+                    break
+
+                new_lines = self.read_new_lines()
+                for line in new_lines:
+                    line_handler(line)
+
+                time.sleep(check_interval)
+
+            except KeyboardInterrupt:
+                break
+            except Exception as e:
+                logger.warning(f"Error monitoring log file {self.file_path}: {e}")
+                time.sleep(1)
+
+
+def read_json_file(file_path: str) -> Optional[dict]:
+    """
+    Read and parse a JSON file with proper error handling.
+
+    Args:
+        file_path: Path to the JSON file
+
+    Returns:
+        Parsed JSON data as dict, or None if file doesn't exist or invalid
+    """
+    try:
+        translated_path = translate_path_for_environment(file_path)
+        if not os.path.exists(translated_path):
+            return None
+
+        with open(translated_path, encoding="utf-8") as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return None
+
+
+def write_json_file(file_path: str, data: dict, indent: int = 2) -> bool:
+    """
+    Write data to a JSON file with proper formatting.
+
+    Args:
+        file_path: Path to write the JSON file
+        data: Dictionary data to serialize
+        indent: JSON indentation level
+
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        translated_path = translate_path_for_environment(file_path)
+        os.makedirs(os.path.dirname(translated_path), exist_ok=True)
+
+        with open(translated_path, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=indent, ensure_ascii=False)
+        return True
+    except (OSError, TypeError):
+        return False
+
+
+def get_file_size(file_path: str) -> int:
+    """
+    Get file size in bytes with proper error handling.
+
+    Args:
+        file_path: Path to the file
+
+    Returns:
+        File size in bytes, or 0 if file doesn't exist or error
+    """
+    try:
+        translated_path = translate_path_for_environment(file_path)
+        if os.path.exists(translated_path) and os.path.isfile(translated_path):
+            return os.path.getsize(translated_path)
+        return 0
+    except OSError:
+        return 0
+
+
+def ensure_directory_exists(file_path: str) -> bool:
+    """
+    Ensure the parent directory of a file path exists.
+
+    Args:
+        file_path: Path to file (directory will be created for parent)
+
+    Returns:
+        True if directory exists or was created, False on error
+    """
+    try:
+        translated_path = translate_path_for_environment(file_path)
+        directory = os.path.dirname(translated_path)
+        if directory:
+            os.makedirs(directory, exist_ok=True)
+        return True
+    except OSError:
+        return False
+
+
+def is_text_file(file_path: str) -> bool:
+    """
+    Check if a file is likely a text file based on extension and content.
+
+    Args:
+        file_path: Path to the file
+
+    Returns:
+        True if file appears to be text, False otherwise
+    """
+    from .file_types import is_text_file as check_text_type
+
+    return check_text_type(file_path)
+
+
+def read_file_safely(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]:
+    """
+    Read a file with size limits and encoding handling.
+
+    Args:
+        file_path: Path to the file
+        max_size: Maximum file size in bytes (default 10MB)
+
+    Returns:
+        File content as string, or None if file too large or unreadable
+    """
+    try:
+        translated_path = translate_path_for_environment(file_path)
+        if not os.path.exists(translated_path) or not os.path.isfile(translated_path):
+            return None
+
+        file_size = os.path.getsize(translated_path)
+        if file_size > max_size:
+            return None
+
+        with open(translated_path, encoding="utf-8", errors="ignore") as f:
+            return f.read()
+    except OSError:
+        return None
+
+
+def check_total_file_size(files: list[str], model_name: Optional[str] = None) -> Optional[dict]:
+    """
+    Check if total file sizes would exceed token threshold before embedding.
+
+    IMPORTANT: This performs STRICT REJECTION at MCP boundary.
+    No partial inclusion - either all files fit or request is rejected.
+    This forces Claude to make better file selection decisions.
+
+    Args:
+        files: List of file paths to check
+        model_name: Model name for context-aware thresholds, or None for default
+
+    Returns:
+        Dict with MCP_CODE_TOO_LARGE response if too large, None if acceptable
+    """
+    if not files:
+        return None
+
+    # Get model-specific token allocation (dynamic thresholds)
+    if not model_name:
+        from config import DEFAULT_MODEL
+
+        model_name = DEFAULT_MODEL
+
+    # Handle auto mode gracefully
+    if model_name.lower() == "auto":
+        from providers.registry import ModelProviderRegistry
+
+        model_name = ModelProviderRegistry.get_preferred_fallback_model()
+
+    from utils.model_context import ModelContext
+
+    model_context = ModelContext(model_name)
+    token_allocation = model_context.calculate_token_allocation()
+
+    # Dynamic threshold based on model capacity
+    context_window = token_allocation.total_tokens
+    if context_window >= 1_000_000:  # Gemini-class models
+        threshold_percent = 0.8  # Can be more generous
+    elif context_window >= 500_000:  # Mid-range models
+        threshold_percent = 0.7  # Moderate
+    else:  # OpenAI-class models (200K)
+        threshold_percent = 0.6  # Conservative
+
+    max_file_tokens = int(token_allocation.file_tokens * threshold_percent)
+
+    # Use centralized file size checking (threshold already applied to max_file_tokens)
+    within_limit, total_estimated_tokens, file_count = check_files_size_limit(files, max_file_tokens)
+
+    if not within_limit:
+        return {
+            "status": "code_too_large",
+            "content": (
+                f"The selected files are too large for analysis "
+                f"(estimated {total_estimated_tokens:,} tokens, limit {max_file_tokens:,}). "
+                f"Please select fewer, more specific files that are most relevant "
+                f"to your question, then invoke the tool again."
+            ),
+            "content_type": "text",
+            "metadata": {
+                "total_estimated_tokens": total_estimated_tokens,
+                "limit": max_file_tokens,
+                "file_count": file_count,
+                "threshold_percent": threshold_percent,
+                "model_context_window": context_window,
+                "instructions": "Reduce file selection and try again - all files must fit within budget",
+            },
+        }
+
+    return None  # Proceed with ALL files