Files
my-pal-mcp-server/tests/test_conversation_history_bug.py
2025-06-19 05:37:40 +04:00

331 lines
15 KiB
Python

"""
Test suite for conversation history bug fix
This test verifies that the critical bug where conversation history
(including file context) was not included when using continuation_id
has been properly fixed.
The bug was that tools with continuation_id would not see previous
conversation turns, causing issues like Gemini not seeing files that
Claude had shared in earlier turns.
"""
import json
from unittest.mock import Mock, patch
import pytest
from pydantic import Field
from tests.mock_helpers import create_mock_provider
from tools.base import BaseTool, ToolRequest
from utils.conversation_memory import ConversationTurn, ThreadContext
class FileContextRequest(ToolRequest):
"""Test request with file support"""
prompt: str = Field(..., description="Test prompt")
files: list[str] = Field(default_factory=list, description="Optional files")
class FileContextTool(BaseTool):
"""Test tool for file context verification"""
def get_name(self) -> str:
return "test_file_context"
def get_description(self) -> str:
return "Test tool for file context"
def get_input_schema(self) -> dict:
return {
"type": "object",
"properties": {
"prompt": {"type": "string"},
"files": {"type": "array", "items": {"type": "string"}},
"continuation_id": {"type": "string", "required": False},
},
}
def get_system_prompt(self) -> str:
return "Test system prompt for file context"
def get_request_model(self):
return FileContextRequest
async def prepare_prompt(self, request) -> str:
# Simple prompt preparation that would normally read files
# For this test, we're focusing on whether conversation history is included
files_context = ""
if request.files:
files_context = f"\nFiles in current request: {', '.join(request.files)}"
return f"System: {self.get_system_prompt()}\nUser: {request.prompt}{files_context}"
class TestConversationHistoryBugFix:
"""Test that conversation history is properly included with continuation_id"""
def setup_method(self):
self.tool = FileContextTool()
@patch("tools.base.add_turn")
async def test_conversation_history_included_with_continuation_id(self, mock_add_turn):
"""Test that conversation history (including file context) is included when using continuation_id"""
# Test setup note: This test simulates a conversation thread with previous turns
# containing files from different tools (analyze -> codereview)
# The continuation_id "test-history-id" references this implicit thread context
# In the real flow, server.py would reconstruct this context and add it to the prompt
# Mock add_turn to return success
mock_add_turn.return_value = True
# Mock the model to capture what prompt it receives
captured_prompt = None
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
mock_provider = create_mock_provider()
mock_provider.get_provider_type.return_value = Mock(value="google")
mock_provider.supports_thinking_mode.return_value = False
def capture_prompt(prompt, **kwargs):
nonlocal captured_prompt
captured_prompt = prompt
return Mock(
content="Response with conversation context",
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
model_name="gemini-2.5-flash",
metadata={"finish_reason": "STOP"},
)
mock_provider.generate_content.side_effect = capture_prompt
mock_get_provider.return_value = mock_provider
# Execute tool with continuation_id
# In the corrected flow, server.py:reconstruct_thread_context
# would have already added conversation history to the prompt
# This test simulates that the prompt already contains conversation history
arguments = {
"prompt": "What should we fix first?",
"continuation_id": "test-history-id",
"files": ["/src/utils.py"], # New file for this turn
}
response = await self.tool.execute(arguments)
# Verify response succeeded
response_data = json.loads(response[0].text)
assert response_data["status"] == "success"
# Note: After fixing the duplication bug, conversation history reconstruction
# now happens ONLY in server.py, not in tools/base.py
# This test verifies that tools/base.py no longer duplicates conversation history
# Verify the prompt is captured
assert captured_prompt is not None
# The prompt should NOT contain conversation history (since we removed the duplicate code)
# In the real flow, server.py would add conversation history before calling tool.execute()
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
# The prompt should contain the current request
assert "What should we fix first?" in captured_prompt
assert "Files in current request: /src/utils.py" in captured_prompt
# This test confirms the duplication bug is fixed - tools/base.py no longer
# redundantly adds conversation history that server.py already added
async def test_no_history_when_thread_not_found(self):
"""Test graceful handling when thread is not found"""
# Note: After fixing the duplication bug, thread not found handling
# happens in server.py:reconstruct_thread_context, not in tools/base.py
# This test verifies tools don't try to handle missing threads themselves
captured_prompt = None
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
mock_provider = create_mock_provider()
mock_provider.get_provider_type.return_value = Mock(value="google")
mock_provider.supports_thinking_mode.return_value = False
def capture_prompt(prompt, **kwargs):
nonlocal captured_prompt
captured_prompt = prompt
return Mock(
content="Response without history",
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
model_name="gemini-2.5-flash",
metadata={"finish_reason": "STOP"},
)
mock_provider.generate_content.side_effect = capture_prompt
mock_get_provider.return_value = mock_provider
# Execute tool with continuation_id for non-existent thread
# In the real flow, server.py would have already handled the missing thread
arguments = {"prompt": "Test without history", "continuation_id": "non-existent-thread-id"}
response = await self.tool.execute(arguments)
# Should succeed since tools/base.py no longer handles missing threads
response_data = json.loads(response[0].text)
assert response_data["status"] == "success"
# Verify the prompt does NOT include conversation history
# (because tools/base.py no longer tries to add it)
assert captured_prompt is not None
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
assert "Test without history" in captured_prompt
async def test_no_history_for_new_conversations(self):
"""Test that new conversations (no continuation_id) don't get history"""
captured_prompt = None
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
mock_provider = create_mock_provider()
mock_provider.get_provider_type.return_value = Mock(value="google")
mock_provider.supports_thinking_mode.return_value = False
def capture_prompt(prompt, **kwargs):
nonlocal captured_prompt
captured_prompt = prompt
return Mock(
content="New conversation response",
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
model_name="gemini-2.5-flash",
metadata={"finish_reason": "STOP"},
)
mock_provider.generate_content.side_effect = capture_prompt
mock_get_provider.return_value = mock_provider
# Execute tool without continuation_id (new conversation)
arguments = {"prompt": "Start new conversation", "files": ["/src/new_file.py"]}
response = await self.tool.execute(arguments)
# Should succeed (may offer continuation for new conversations)
response_data = json.loads(response[0].text)
assert response_data["status"] in ["success", "continuation_available"]
# Verify the prompt does NOT include conversation history
assert captured_prompt is not None
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
assert "Start new conversation" in captured_prompt
assert "Files in current request: /src/new_file.py" in captured_prompt
# Should include follow-up instructions for new conversation
# (This is the existing behavior for new conversations)
assert "CONVERSATION CONTINUATION" in captured_prompt
@patch("tools.base.get_thread")
@patch("tools.base.add_turn")
@patch("utils.file_utils.resolve_and_validate_path")
async def test_no_duplicate_file_embedding_during_continuation(
self, mock_resolve_path, mock_add_turn, mock_get_thread
):
"""Test that files already embedded in conversation history are not re-embedded"""
# Mock file resolution to allow our test files
def mock_resolve(path_str):
from pathlib import Path
return Path(path_str) # Just return as-is for test files
mock_resolve_path.side_effect = mock_resolve
# Create a thread context with previous turns including files
_thread_context = ThreadContext(
thread_id="test-duplicate-files-id",
created_at="2023-01-01T00:00:00Z",
last_updated_at="2023-01-01T00:02:00Z",
tool_name="analyze",
turns=[
ConversationTurn(
role="assistant",
content="I've analyzed the authentication module.",
timestamp="2023-01-01T00:01:00Z",
tool_name="analyze",
files=["/src/auth.py", "/src/security.py"], # These files were already analyzed
),
ConversationTurn(
role="assistant",
content="Found security issues in the auth system.",
timestamp="2023-01-01T00:02:00Z",
tool_name="codereview",
files=["/src/auth.py", "/tests/test_auth.py"], # auth.py referenced again + new file
),
],
initial_context={"prompt": "Analyze authentication security"},
)
# Mock get_thread to return our test context
mock_get_thread.return_value = _thread_context
mock_add_turn.return_value = True
# Mock the model to capture what prompt it receives
captured_prompt = None
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
mock_provider = create_mock_provider()
mock_provider.get_provider_type.return_value = Mock(value="google")
mock_provider.supports_thinking_mode.return_value = False
def capture_prompt(prompt, **kwargs):
nonlocal captured_prompt
captured_prompt = prompt
return Mock(
content="Analysis of new files complete",
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
model_name="gemini-2.5-flash",
metadata={"finish_reason": "STOP"},
)
mock_provider.generate_content.side_effect = capture_prompt
mock_get_provider.return_value = mock_provider
# Mock read_files to simulate file existence and capture its calls
with patch("tools.base.read_files") as mock_read_files:
# When the tool processes the new files, it should only read '/src/utils.py'
mock_read_files.return_value = "--- /src/utils.py ---\ncontent of utils"
# Execute tool with continuation_id and mix of already-referenced and new files
arguments = {
"prompt": "Now check the utility functions too",
"continuation_id": "test-duplicate-files-id",
"files": ["/src/auth.py", "/src/utils.py"], # auth.py already in history, utils.py is new
}
response = await self.tool.execute(arguments)
# Verify response succeeded
response_data = json.loads(response[0].text)
assert response_data["status"] == "success"
# Verify the prompt structure
assert captured_prompt is not None
# After fixing the duplication bug, conversation history (including file embedding)
# is no longer added by tools/base.py - it's handled by server.py
# This test verifies the file filtering logic still works correctly
# The current request should still be processed normally
assert "Now check the utility functions too" in captured_prompt
assert "Files in current request: /src/auth.py, /src/utils.py" in captured_prompt
# Most importantly, verify that the file filtering logic works correctly
# even though conversation history isn't built by tools/base.py anymore
with patch.object(self.tool, "get_conversation_embedded_files") as mock_get_embedded:
# Mock that certain files are already embedded
mock_get_embedded.return_value = ["/src/auth.py", "/src/security.py", "/tests/test_auth.py"]
# Test the filtering logic directly
new_files = self.tool.filter_new_files(["/src/auth.py", "/src/utils.py"], "test-duplicate-files-id")
assert new_files == ["/src/utils.py"] # Only the new file should remain
# Verify get_conversation_embedded_files was called correctly
mock_get_embedded.assert_called_with("test-duplicate-files-id")
if __name__ == "__main__":
pytest.main([__file__])