Fix conversation history duplication and optimize file embedding

This major refactoring addresses critical bugs in conversation history management and significantly improves token efficiency through intelligent file embedding: **Key Improvements:** • Fixed conversation history duplication bug by centralizing reconstruction in server.py • Added intelligent file filtering to prevent re-embedding files already in conversation history • Centralized file processing logic in BaseTool._prepare_file_content_for_prompt() • Enhanced log monitoring with better categorization and file embedding visibility • Updated comprehensive test suite to verify new architecture and edge cases **Architecture Changes:** • Removed duplicate conversation history reconstruction from tools/base.py • Conversation history now handled exclusively by server.py:reconstruct_thread_context • All tools now use centralized file processing with automatic deduplication • Improved token efficiency by embedding unique files only once per conversation **Performance Benefits:** • Reduced token usage through smart file filtering • Eliminated redundant file embeddings in continued conversations • Better observability with detailed debug logging for file operations 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-11 11:40:12 +04:00
parent 4466d1d1fe
commit 5a94737516
11 changed files with 501 additions and 84 deletions
--- a/tests/test_conversation_history_bug.py
+++ b/tests/test_conversation_history_bug.py
@@ -68,13 +68,12 @@ class TestConversationHistoryBugFix:
    def setup_method(self):
        self.tool = FileContextTool()

-    @patch("tools.base.get_thread")
    @patch("tools.base.add_turn")
-    async def test_conversation_history_included_with_continuation_id(self, mock_add_turn, mock_get_thread):
+    async def test_conversation_history_included_with_continuation_id(self, mock_add_turn):
        """Test that conversation history (including file context) is included when using continuation_id"""

        # Create a thread context with previous turns including files
-        thread_context = ThreadContext(
+        _thread_context = ThreadContext(
            thread_id="test-history-id",
            created_at="2023-01-01T00:00:00Z",
            last_updated_at="2023-01-01T00:02:00Z",
@@ -98,8 +97,6 @@ class TestConversationHistoryBugFix:
            initial_context={"question": "Analyze authentication security"},
        )

-        # Mock get_thread to return our test context
-        mock_get_thread.return_value = thread_context
        # Mock add_turn to return success
        mock_add_turn.return_value = True

@@ -125,6 +122,9 @@ class TestConversationHistoryBugFix:
            mock_create_model.return_value = mock_model

            # Execute tool with continuation_id
+            # In the corrected flow, server.py:reconstruct_thread_context
+            # would have already added conversation history to the prompt
+            # This test simulates that the prompt already contains conversation history
            arguments = {
                "prompt": "What should we fix first?",
                "continuation_id": "test-history-id",
@@ -136,38 +136,30 @@ class TestConversationHistoryBugFix:
            response_data = json.loads(response[0].text)
            assert response_data["status"] == "success"

-            # Verify get_thread was called for history reconstruction
-            mock_get_thread.assert_called_with("test-history-id")
+            # Note: After fixing the duplication bug, conversation history reconstruction
+            # now happens ONLY in server.py, not in tools/base.py
+            # This test verifies that tools/base.py no longer duplicates conversation history

-            # Verify the prompt includes conversation history
+            # Verify the prompt is captured
            assert captured_prompt is not None

-            # Check that conversation history is included
-            assert "=== CONVERSATION HISTORY ===" in captured_prompt
-            assert "Turn 1 (Gemini using analyze)" in captured_prompt
-            assert "Turn 2 (Gemini using codereview)" in captured_prompt
+            # The prompt should NOT contain conversation history (since we removed the duplicate code)
+            # In the real flow, server.py would add conversation history before calling tool.execute()
+            assert "=== CONVERSATION HISTORY ===" not in captured_prompt

-            # Check that file context from previous turns is included
-            assert "📁 Files referenced: /src/auth.py, /src/security.py" in captured_prompt
-            assert "📁 Files referenced: /src/auth.py, /tests/test_auth.py" in captured_prompt
-
-            # Check that previous turn content is included
-            assert "I've analyzed the authentication module and found several security issues." in captured_prompt
-            assert "The code review shows these files have critical vulnerabilities." in captured_prompt
-
-            # Check that continuation instruction is included
-            assert "Continue this conversation by building on the previous context." in captured_prompt
-
-            # Check that current request is still included
+            # The prompt should contain the current request
            assert "What should we fix first?" in captured_prompt
            assert "Files in current request: /src/utils.py" in captured_prompt

-    @patch("tools.base.get_thread")
-    async def test_no_history_when_thread_not_found(self, mock_get_thread):
+            # This test confirms the duplication bug is fixed - tools/base.py no longer
+            # redundantly adds conversation history that server.py already added
+
+    async def test_no_history_when_thread_not_found(self):
        """Test graceful handling when thread is not found"""

-        # Mock get_thread to return None (thread not found)
-        mock_get_thread.return_value = None
+        # Note: After fixing the duplication bug, thread not found handling
+        # happens in server.py:reconstruct_thread_context, not in tools/base.py
+        # This test verifies tools don't try to handle missing threads themselves

        captured_prompt = None

@@ -190,17 +182,16 @@ class TestConversationHistoryBugFix:
            mock_create_model.return_value = mock_model

            # Execute tool with continuation_id for non-existent thread
+            # In the real flow, server.py would have already handled the missing thread
            arguments = {"prompt": "Test without history", "continuation_id": "non-existent-thread-id"}
            response = await self.tool.execute(arguments)

-            # Should still succeed but without history
+            # Should succeed since tools/base.py no longer handles missing threads
            response_data = json.loads(response[0].text)
            assert response_data["status"] == "success"

-            # Verify get_thread was called for non-existent thread
-            mock_get_thread.assert_called_with("non-existent-thread-id")
-
            # Verify the prompt does NOT include conversation history
+            # (because tools/base.py no longer tries to add it)
            assert captured_prompt is not None
            assert "=== CONVERSATION HISTORY ===" not in captured_prompt
            assert "Test without history" in captured_prompt
@@ -246,6 +237,113 @@ class TestConversationHistoryBugFix:
            # (This is the existing behavior for new conversations)
            assert "If you'd like to ask a follow-up question" in captured_prompt

+    @patch("tools.base.get_thread")
+    @patch("tools.base.add_turn")
+    @patch("utils.file_utils.resolve_and_validate_path")
+    async def test_no_duplicate_file_embedding_during_continuation(
+        self, mock_resolve_path, mock_add_turn, mock_get_thread
+    ):
+        """Test that files already embedded in conversation history are not re-embedded"""
+
+        # Mock file resolution to allow our test files
+        def mock_resolve(path_str):
+            from pathlib import Path
+
+            return Path(path_str)  # Just return as-is for test files
+
+        mock_resolve_path.side_effect = mock_resolve
+
+        # Create a thread context with previous turns including files
+        _thread_context = ThreadContext(
+            thread_id="test-duplicate-files-id",
+            created_at="2023-01-01T00:00:00Z",
+            last_updated_at="2023-01-01T00:02:00Z",
+            tool_name="analyze",
+            turns=[
+                ConversationTurn(
+                    role="assistant",
+                    content="I've analyzed the authentication module.",
+                    timestamp="2023-01-01T00:01:00Z",
+                    tool_name="analyze",
+                    files=["/src/auth.py", "/src/security.py"],  # These files were already analyzed
+                ),
+                ConversationTurn(
+                    role="assistant",
+                    content="Found security issues in the auth system.",
+                    timestamp="2023-01-01T00:02:00Z",
+                    tool_name="codereview",
+                    files=["/src/auth.py", "/tests/test_auth.py"],  # auth.py referenced again + new file
+                ),
+            ],
+            initial_context={"question": "Analyze authentication security"},
+        )
+
+        # Mock get_thread to return our test context
+        mock_get_thread.return_value = _thread_context
+        mock_add_turn.return_value = True
+
+        # Mock the model to capture what prompt it receives
+        captured_prompt = None
+
+        with patch.object(self.tool, "create_model") as mock_create_model:
+            mock_model = Mock()
+            mock_response = Mock()
+            mock_response.candidates = [
+                Mock(
+                    content=Mock(parts=[Mock(text="Analysis of new files complete")]),
+                    finish_reason="STOP",
+                )
+            ]
+
+            def capture_prompt(prompt):
+                nonlocal captured_prompt
+                captured_prompt = prompt
+                return mock_response
+
+            mock_model.generate_content.side_effect = capture_prompt
+            mock_create_model.return_value = mock_model
+
+            # Mock read_files to simulate file existence and capture its calls
+            with patch("tools.base.read_files") as mock_read_files:
+                # When the tool processes the new files, it should only read '/src/utils.py'
+                mock_read_files.return_value = "--- /src/utils.py ---\ncontent of utils"
+
+                # Execute tool with continuation_id and mix of already-referenced and new files
+                arguments = {
+                    "prompt": "Now check the utility functions too",
+                    "continuation_id": "test-duplicate-files-id",
+                    "files": ["/src/auth.py", "/src/utils.py"],  # auth.py already in history, utils.py is new
+                }
+                response = await self.tool.execute(arguments)
+
+                # Verify response succeeded
+                response_data = json.loads(response[0].text)
+                assert response_data["status"] == "success"
+
+                # Verify the prompt structure
+                assert captured_prompt is not None
+
+                # After fixing the duplication bug, conversation history (including file embedding)
+                # is no longer added by tools/base.py - it's handled by server.py
+                # This test verifies the file filtering logic still works correctly
+
+                # The current request should still be processed normally
+                assert "Now check the utility functions too" in captured_prompt
+                assert "Files in current request: /src/auth.py, /src/utils.py" in captured_prompt
+
+                # Most importantly, verify that the file filtering logic works correctly
+                # even though conversation history isn't built by tools/base.py anymore
+                with patch.object(self.tool, "get_conversation_embedded_files") as mock_get_embedded:
+                    # Mock that certain files are already embedded
+                    mock_get_embedded.return_value = ["/src/auth.py", "/src/security.py", "/tests/test_auth.py"]
+
+                    # Test the filtering logic directly
+                    new_files = self.tool.filter_new_files(["/src/auth.py", "/src/utils.py"], "test-duplicate-files-id")
+                    assert new_files == ["/src/utils.py"]  # Only the new file should remain
+
+                    # Verify get_conversation_embedded_files was called correctly
+                    mock_get_embedded.assert_called_with("test-duplicate-files-id")
+

 if __name__ == "__main__":
    pytest.main([__file__])
--- a/tests/test_conversation_memory.py
+++ b/tests/test_conversation_memory.py
@@ -183,8 +183,13 @@ class TestConversationMemory:
        assert "Python is a programming language" in history

        # Test file tracking
-        assert "📁 Files referenced: /home/user/main.py, /home/user/docs/readme.md" in history
-        assert "📁 Files referenced: /home/user/examples/" in history
+        # Check that the new file embedding section is included
+        assert "=== FILES REFERENCED IN THIS CONVERSATION ===" in history
+        assert "The following files have been shared and analyzed during our conversation." in history
+
+        # Check that file context from previous turns is included (now shows files used per turn)
+        assert "📁 Files used in this turn: /home/user/main.py, /home/user/docs/readme.md" in history
+        assert "📁 Files used in this turn: /home/user/examples/" in history

        # Test follow-up attribution
        assert "[Gemini's Follow-up: Would you like examples?]" in history
@@ -598,9 +603,9 @@ class TestConversationFlow:
        assert "--- Turn 3 (Gemini using analyze) ---" in history

        # Verify all files are preserved in chronological order
-        turn_1_files = "📁 Files referenced: /project/src/main.py, /project/src/utils.py"
-        turn_2_files = "📁 Files referenced: /project/tests/, /project/test_main.py"
-        turn_3_files = "📁 Files referenced: /project/tests/test_utils.py, /project/coverage.html"
+        turn_1_files = "📁 Files used in this turn: /project/src/main.py, /project/src/utils.py"
+        turn_2_files = "📁 Files used in this turn: /project/tests/, /project/test_main.py"
+        turn_3_files = "📁 Files used in this turn: /project/tests/test_utils.py, /project/coverage.html"

        assert turn_1_files in history
        assert turn_2_files in history
@@ -718,6 +723,63 @@ class TestConversationFlow:
        assert len(retrieved_context.turns) == 1
        assert retrieved_context.turns[0].follow_up_question == "Want to explore scalability?"

+    def test_token_limit_optimization_in_conversation_history(self):
+        """Test that build_conversation_history efficiently handles token limits"""
+        import os
+        import tempfile
+
+        from utils.conversation_memory import build_conversation_history
+
+        # Create test files with known content sizes
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Create small and large test files
+            small_file = os.path.join(temp_dir, "small.py")
+            large_file = os.path.join(temp_dir, "large.py")
+
+            small_content = "# Small file\nprint('hello')\n"
+            large_content = "# Large file\n" + "x = 1\n" * 10000  # Very large file
+
+            with open(small_file, "w") as f:
+                f.write(small_content)
+            with open(large_file, "w") as f:
+                f.write(large_content)
+
+            # Create context with files that would exceed token limit
+            context = ThreadContext(
+                thread_id="test-token-limit",
+                created_at="2023-01-01T00:00:00Z",
+                last_updated_at="2023-01-01T00:01:00Z",
+                tool_name="analyze",
+                turns=[
+                    ConversationTurn(
+                        role="user",
+                        content="Analyze these files",
+                        timestamp="2023-01-01T00:00:30Z",
+                        files=[small_file, large_file],  # Large file should be truncated
+                    )
+                ],
+                initial_context={"prompt": "Analyze code"},
+            )
+
+            # Build conversation history (should handle token limits gracefully)
+            history = build_conversation_history(context)
+
+            # Verify the history was built successfully
+            assert "=== CONVERSATION HISTORY ===" in history
+            assert "=== FILES REFERENCED IN THIS CONVERSATION ===" in history
+
+            # The small file should be included, but large file might be truncated
+            # At minimum, verify no crashes and history is generated
+            assert len(history) > 0
+
+            # If truncation occurred, there should be a note about it
+            if "additional file(s) were truncated due to token limit" in history:
+                assert small_file in history or large_file in history
+            else:
+                # Both files fit within limit
+                assert small_file in history
+                assert large_file in history
+

 if __name__ == "__main__":
    pytest.main([__file__])
--- a/tests/test_prompt_regression.py
+++ b/tests/test_prompt_regression.py
@@ -117,7 +117,7 @@ class TestPromptRegression:
            mock_create_model.return_value = mock_model

            # Mock file reading
-            with patch("tools.codereview.read_files") as mock_read_files:
+            with patch("tools.base.read_files") as mock_read_files:
                mock_read_files.return_value = "def main(): pass"

                result = await tool.execute(
@@ -205,7 +205,7 @@ class TestPromptRegression:
            mock_create_model.return_value = mock_model

            # Mock file reading
-            with patch("tools.analyze.read_files") as mock_read_files:
+            with patch("tools.base.read_files") as mock_read_files:
                mock_read_files.return_value = "class UserController: ..."

                result = await tool.execute(
@@ -287,7 +287,7 @@ class TestPromptRegression:
            mock_model.generate_content.return_value = mock_model_response()
            mock_create_model.return_value = mock_model

-            with patch("tools.analyze.read_files") as mock_read_files:
+            with patch("tools.base.read_files") as mock_read_files:
                mock_read_files.return_value = "Content"

                result = await tool.execute(