""" Test suite for conversation history bug fix This test verifies that the critical bug where conversation history (including file context) was not included when using continuation_id has been properly fixed. The bug was that tools with continuation_id would not see previous conversation turns, causing issues like Gemini not seeing files that Claude had shared in earlier turns. """ import json from unittest.mock import Mock, patch import pytest from pydantic import Field from tests.mock_helpers import create_mock_provider from tools.base import BaseTool, ToolRequest from utils.conversation_memory import ConversationTurn, ThreadContext class FileContextRequest(ToolRequest): """Test request with file support""" prompt: str = Field(..., description="Test prompt") files: list[str] = Field(default_factory=list, description="Optional files") class FileContextTool(BaseTool): """Test tool for file context verification""" def get_name(self) -> str: return "test_file_context" def get_description(self) -> str: return "Test tool for file context" def get_input_schema(self) -> dict: return { "type": "object", "properties": { "prompt": {"type": "string"}, "files": {"type": "array", "items": {"type": "string"}}, "continuation_id": {"type": "string", "required": False}, }, } def get_system_prompt(self) -> str: return "Test system prompt for file context" def get_request_model(self): return FileContextRequest async def prepare_prompt(self, request) -> str: # Simple prompt preparation that would normally read files # For this test, we're focusing on whether conversation history is included files_context = "" if request.files: files_context = f"\nFiles in current request: {', '.join(request.files)}" return f"System: {self.get_system_prompt()}\nUser: {request.prompt}{files_context}" class TestConversationHistoryBugFix: """Test that conversation history is properly included with continuation_id""" def setup_method(self): self.tool = FileContextTool() @patch("tools.base.add_turn") async def test_conversation_history_included_with_continuation_id(self, mock_add_turn): """Test that conversation history (including file context) is included when using continuation_id""" # Test setup note: This test simulates a conversation thread with previous turns # containing files from different tools (analyze -> codereview) # The continuation_id "test-history-id" references this implicit thread context # In the real flow, server.py would reconstruct this context and add it to the prompt # Mock add_turn to return success mock_add_turn.return_value = True # Mock the model to capture what prompt it receives captured_prompt = None with patch.object(self.tool, "get_model_provider") as mock_get_provider: mock_provider = create_mock_provider() mock_provider.get_provider_type.return_value = Mock(value="google") mock_provider.supports_thinking_mode.return_value = False def capture_prompt(prompt, **kwargs): nonlocal captured_prompt captured_prompt = prompt return Mock( content="Response with conversation context", usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, model_name="gemini-2.5-flash-preview-05-20", metadata={"finish_reason": "STOP"}, ) mock_provider.generate_content.side_effect = capture_prompt mock_get_provider.return_value = mock_provider # Execute tool with continuation_id # In the corrected flow, server.py:reconstruct_thread_context # would have already added conversation history to the prompt # This test simulates that the prompt already contains conversation history arguments = { "prompt": "What should we fix first?", "continuation_id": "test-history-id", "files": ["/src/utils.py"], # New file for this turn } response = await self.tool.execute(arguments) # Verify response succeeded response_data = json.loads(response[0].text) assert response_data["status"] == "success" # Note: After fixing the duplication bug, conversation history reconstruction # now happens ONLY in server.py, not in tools/base.py # This test verifies that tools/base.py no longer duplicates conversation history # Verify the prompt is captured assert captured_prompt is not None # The prompt should NOT contain conversation history (since we removed the duplicate code) # In the real flow, server.py would add conversation history before calling tool.execute() assert "=== CONVERSATION HISTORY ===" not in captured_prompt # The prompt should contain the current request assert "What should we fix first?" in captured_prompt assert "Files in current request: /src/utils.py" in captured_prompt # This test confirms the duplication bug is fixed - tools/base.py no longer # redundantly adds conversation history that server.py already added async def test_no_history_when_thread_not_found(self): """Test graceful handling when thread is not found""" # Note: After fixing the duplication bug, thread not found handling # happens in server.py:reconstruct_thread_context, not in tools/base.py # This test verifies tools don't try to handle missing threads themselves captured_prompt = None with patch.object(self.tool, "get_model_provider") as mock_get_provider: mock_provider = create_mock_provider() mock_provider.get_provider_type.return_value = Mock(value="google") mock_provider.supports_thinking_mode.return_value = False def capture_prompt(prompt, **kwargs): nonlocal captured_prompt captured_prompt = prompt return Mock( content="Response without history", usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, model_name="gemini-2.5-flash-preview-05-20", metadata={"finish_reason": "STOP"}, ) mock_provider.generate_content.side_effect = capture_prompt mock_get_provider.return_value = mock_provider # Execute tool with continuation_id for non-existent thread # In the real flow, server.py would have already handled the missing thread arguments = {"prompt": "Test without history", "continuation_id": "non-existent-thread-id"} response = await self.tool.execute(arguments) # Should succeed since tools/base.py no longer handles missing threads response_data = json.loads(response[0].text) assert response_data["status"] == "success" # Verify the prompt does NOT include conversation history # (because tools/base.py no longer tries to add it) assert captured_prompt is not None assert "=== CONVERSATION HISTORY ===" not in captured_prompt assert "Test without history" in captured_prompt async def test_no_history_for_new_conversations(self): """Test that new conversations (no continuation_id) don't get history""" captured_prompt = None with patch.object(self.tool, "get_model_provider") as mock_get_provider: mock_provider = create_mock_provider() mock_provider.get_provider_type.return_value = Mock(value="google") mock_provider.supports_thinking_mode.return_value = False def capture_prompt(prompt, **kwargs): nonlocal captured_prompt captured_prompt = prompt return Mock( content="New conversation response", usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, model_name="gemini-2.5-flash-preview-05-20", metadata={"finish_reason": "STOP"}, ) mock_provider.generate_content.side_effect = capture_prompt mock_get_provider.return_value = mock_provider # Execute tool without continuation_id (new conversation) arguments = {"prompt": "Start new conversation", "files": ["/src/new_file.py"]} response = await self.tool.execute(arguments) # Should succeed (may offer continuation for new conversations) response_data = json.loads(response[0].text) assert response_data["status"] in ["success", "continuation_available"] # Verify the prompt does NOT include conversation history assert captured_prompt is not None assert "=== CONVERSATION HISTORY ===" not in captured_prompt assert "Start new conversation" in captured_prompt assert "Files in current request: /src/new_file.py" in captured_prompt # Should include follow-up instructions for new conversation # (This is the existing behavior for new conversations) assert "CONVERSATION CONTINUATION" in captured_prompt @patch("tools.base.get_thread") @patch("tools.base.add_turn") @patch("utils.file_utils.resolve_and_validate_path") async def test_no_duplicate_file_embedding_during_continuation( self, mock_resolve_path, mock_add_turn, mock_get_thread ): """Test that files already embedded in conversation history are not re-embedded""" # Mock file resolution to allow our test files def mock_resolve(path_str): from pathlib import Path return Path(path_str) # Just return as-is for test files mock_resolve_path.side_effect = mock_resolve # Create a thread context with previous turns including files _thread_context = ThreadContext( thread_id="test-duplicate-files-id", created_at="2023-01-01T00:00:00Z", last_updated_at="2023-01-01T00:02:00Z", tool_name="analyze", turns=[ ConversationTurn( role="assistant", content="I've analyzed the authentication module.", timestamp="2023-01-01T00:01:00Z", tool_name="analyze", files=["/src/auth.py", "/src/security.py"], # These files were already analyzed ), ConversationTurn( role="assistant", content="Found security issues in the auth system.", timestamp="2023-01-01T00:02:00Z", tool_name="codereview", files=["/src/auth.py", "/tests/test_auth.py"], # auth.py referenced again + new file ), ], initial_context={"prompt": "Analyze authentication security"}, ) # Mock get_thread to return our test context mock_get_thread.return_value = _thread_context mock_add_turn.return_value = True # Mock the model to capture what prompt it receives captured_prompt = None with patch.object(self.tool, "get_model_provider") as mock_get_provider: mock_provider = create_mock_provider() mock_provider.get_provider_type.return_value = Mock(value="google") mock_provider.supports_thinking_mode.return_value = False def capture_prompt(prompt, **kwargs): nonlocal captured_prompt captured_prompt = prompt return Mock( content="Analysis of new files complete", usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, model_name="gemini-2.5-flash-preview-05-20", metadata={"finish_reason": "STOP"}, ) mock_provider.generate_content.side_effect = capture_prompt mock_get_provider.return_value = mock_provider # Mock read_files to simulate file existence and capture its calls with patch("tools.base.read_files") as mock_read_files: # When the tool processes the new files, it should only read '/src/utils.py' mock_read_files.return_value = "--- /src/utils.py ---\ncontent of utils" # Execute tool with continuation_id and mix of already-referenced and new files arguments = { "prompt": "Now check the utility functions too", "continuation_id": "test-duplicate-files-id", "files": ["/src/auth.py", "/src/utils.py"], # auth.py already in history, utils.py is new } response = await self.tool.execute(arguments) # Verify response succeeded response_data = json.loads(response[0].text) assert response_data["status"] == "success" # Verify the prompt structure assert captured_prompt is not None # After fixing the duplication bug, conversation history (including file embedding) # is no longer added by tools/base.py - it's handled by server.py # This test verifies the file filtering logic still works correctly # The current request should still be processed normally assert "Now check the utility functions too" in captured_prompt assert "Files in current request: /src/auth.py, /src/utils.py" in captured_prompt # Most importantly, verify that the file filtering logic works correctly # even though conversation history isn't built by tools/base.py anymore with patch.object(self.tool, "get_conversation_embedded_files") as mock_get_embedded: # Mock that certain files are already embedded mock_get_embedded.return_value = ["/src/auth.py", "/src/security.py", "/tests/test_auth.py"] # Test the filtering logic directly new_files = self.tool.filter_new_files(["/src/auth.py", "/src/utils.py"], "test-duplicate-files-id") assert new_files == ["/src/utils.py"] # Only the new file should remain # Verify get_conversation_embedded_files was called correctly mock_get_embedded.assert_called_with("test-duplicate-files-id") if __name__ == "__main__": pytest.main([__file__])