331 lines
15 KiB
Python
331 lines
15 KiB
Python
"""
|
|
Test suite for conversation history bug fix
|
|
|
|
This test verifies that the critical bug where conversation history
|
|
(including file context) was not included when using continuation_id
|
|
has been properly fixed.
|
|
|
|
The bug was that tools with continuation_id would not see previous
|
|
conversation turns, causing issues like Gemini not seeing files that
|
|
Claude had shared in earlier turns.
|
|
"""
|
|
|
|
import json
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
from pydantic import Field
|
|
|
|
from tests.mock_helpers import create_mock_provider
|
|
from tools.base import BaseTool, ToolRequest
|
|
from utils.conversation_memory import ConversationTurn, ThreadContext
|
|
|
|
|
|
class FileContextRequest(ToolRequest):
|
|
"""Test request with file support"""
|
|
|
|
prompt: str = Field(..., description="Test prompt")
|
|
files: list[str] = Field(default_factory=list, description="Optional files")
|
|
|
|
|
|
class FileContextTool(BaseTool):
|
|
"""Test tool for file context verification"""
|
|
|
|
def get_name(self) -> str:
|
|
return "test_file_context"
|
|
|
|
def get_description(self) -> str:
|
|
return "Test tool for file context"
|
|
|
|
def get_input_schema(self) -> dict:
|
|
return {
|
|
"type": "object",
|
|
"properties": {
|
|
"prompt": {"type": "string"},
|
|
"files": {"type": "array", "items": {"type": "string"}},
|
|
"continuation_id": {"type": "string", "required": False},
|
|
},
|
|
}
|
|
|
|
def get_system_prompt(self) -> str:
|
|
return "Test system prompt for file context"
|
|
|
|
def get_request_model(self):
|
|
return FileContextRequest
|
|
|
|
async def prepare_prompt(self, request) -> str:
|
|
# Simple prompt preparation that would normally read files
|
|
# For this test, we're focusing on whether conversation history is included
|
|
files_context = ""
|
|
if request.files:
|
|
files_context = f"\nFiles in current request: {', '.join(request.files)}"
|
|
|
|
return f"System: {self.get_system_prompt()}\nUser: {request.prompt}{files_context}"
|
|
|
|
|
|
class TestConversationHistoryBugFix:
|
|
"""Test that conversation history is properly included with continuation_id"""
|
|
|
|
def setup_method(self):
|
|
self.tool = FileContextTool()
|
|
|
|
@patch("tools.base.add_turn")
|
|
async def test_conversation_history_included_with_continuation_id(self, mock_add_turn):
|
|
"""Test that conversation history (including file context) is included when using continuation_id"""
|
|
|
|
# Test setup note: This test simulates a conversation thread with previous turns
|
|
# containing files from different tools (analyze -> codereview)
|
|
# The continuation_id "test-history-id" references this implicit thread context
|
|
# In the real flow, server.py would reconstruct this context and add it to the prompt
|
|
|
|
# Mock add_turn to return success
|
|
mock_add_turn.return_value = True
|
|
|
|
# Mock the model to capture what prompt it receives
|
|
captured_prompt = None
|
|
|
|
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
|
|
def capture_prompt(prompt, **kwargs):
|
|
nonlocal captured_prompt
|
|
captured_prompt = prompt
|
|
return Mock(
|
|
content="Response with conversation context",
|
|
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
|
|
model_name="gemini-2.5-flash",
|
|
metadata={"finish_reason": "STOP"},
|
|
)
|
|
|
|
mock_provider.generate_content.side_effect = capture_prompt
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
# Execute tool with continuation_id
|
|
# In the corrected flow, server.py:reconstruct_thread_context
|
|
# would have already added conversation history to the prompt
|
|
# This test simulates that the prompt already contains conversation history
|
|
arguments = {
|
|
"prompt": "What should we fix first?",
|
|
"continuation_id": "test-history-id",
|
|
"files": ["/src/utils.py"], # New file for this turn
|
|
}
|
|
response = await self.tool.execute(arguments)
|
|
|
|
# Verify response succeeded
|
|
response_data = json.loads(response[0].text)
|
|
assert response_data["status"] == "success"
|
|
|
|
# Note: After fixing the duplication bug, conversation history reconstruction
|
|
# now happens ONLY in server.py, not in tools/base.py
|
|
# This test verifies that tools/base.py no longer duplicates conversation history
|
|
|
|
# Verify the prompt is captured
|
|
assert captured_prompt is not None
|
|
|
|
# The prompt should NOT contain conversation history (since we removed the duplicate code)
|
|
# In the real flow, server.py would add conversation history before calling tool.execute()
|
|
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
|
|
|
|
# The prompt should contain the current request
|
|
assert "What should we fix first?" in captured_prompt
|
|
assert "Files in current request: /src/utils.py" in captured_prompt
|
|
|
|
# This test confirms the duplication bug is fixed - tools/base.py no longer
|
|
# redundantly adds conversation history that server.py already added
|
|
|
|
async def test_no_history_when_thread_not_found(self):
|
|
"""Test graceful handling when thread is not found"""
|
|
|
|
# Note: After fixing the duplication bug, thread not found handling
|
|
# happens in server.py:reconstruct_thread_context, not in tools/base.py
|
|
# This test verifies tools don't try to handle missing threads themselves
|
|
|
|
captured_prompt = None
|
|
|
|
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
|
|
def capture_prompt(prompt, **kwargs):
|
|
nonlocal captured_prompt
|
|
captured_prompt = prompt
|
|
return Mock(
|
|
content="Response without history",
|
|
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
|
|
model_name="gemini-2.5-flash",
|
|
metadata={"finish_reason": "STOP"},
|
|
)
|
|
|
|
mock_provider.generate_content.side_effect = capture_prompt
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
# Execute tool with continuation_id for non-existent thread
|
|
# In the real flow, server.py would have already handled the missing thread
|
|
arguments = {"prompt": "Test without history", "continuation_id": "non-existent-thread-id"}
|
|
response = await self.tool.execute(arguments)
|
|
|
|
# Should succeed since tools/base.py no longer handles missing threads
|
|
response_data = json.loads(response[0].text)
|
|
assert response_data["status"] == "success"
|
|
|
|
# Verify the prompt does NOT include conversation history
|
|
# (because tools/base.py no longer tries to add it)
|
|
assert captured_prompt is not None
|
|
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
|
|
assert "Test without history" in captured_prompt
|
|
|
|
async def test_no_history_for_new_conversations(self):
|
|
"""Test that new conversations (no continuation_id) don't get history"""
|
|
|
|
captured_prompt = None
|
|
|
|
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
|
|
def capture_prompt(prompt, **kwargs):
|
|
nonlocal captured_prompt
|
|
captured_prompt = prompt
|
|
return Mock(
|
|
content="New conversation response",
|
|
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
|
|
model_name="gemini-2.5-flash",
|
|
metadata={"finish_reason": "STOP"},
|
|
)
|
|
|
|
mock_provider.generate_content.side_effect = capture_prompt
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
# Execute tool without continuation_id (new conversation)
|
|
arguments = {"prompt": "Start new conversation", "files": ["/src/new_file.py"]}
|
|
response = await self.tool.execute(arguments)
|
|
|
|
# Should succeed (may offer continuation for new conversations)
|
|
response_data = json.loads(response[0].text)
|
|
assert response_data["status"] in ["success", "continuation_available"]
|
|
|
|
# Verify the prompt does NOT include conversation history
|
|
assert captured_prompt is not None
|
|
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
|
|
assert "Start new conversation" in captured_prompt
|
|
assert "Files in current request: /src/new_file.py" in captured_prompt
|
|
|
|
# Should include follow-up instructions for new conversation
|
|
# (This is the existing behavior for new conversations)
|
|
assert "CONVERSATION CONTINUATION" in captured_prompt
|
|
|
|
@patch("tools.base.get_thread")
|
|
@patch("tools.base.add_turn")
|
|
@patch("utils.file_utils.resolve_and_validate_path")
|
|
async def test_no_duplicate_file_embedding_during_continuation(
|
|
self, mock_resolve_path, mock_add_turn, mock_get_thread
|
|
):
|
|
"""Test that files already embedded in conversation history are not re-embedded"""
|
|
|
|
# Mock file resolution to allow our test files
|
|
def mock_resolve(path_str):
|
|
from pathlib import Path
|
|
|
|
return Path(path_str) # Just return as-is for test files
|
|
|
|
mock_resolve_path.side_effect = mock_resolve
|
|
|
|
# Create a thread context with previous turns including files
|
|
_thread_context = ThreadContext(
|
|
thread_id="test-duplicate-files-id",
|
|
created_at="2023-01-01T00:00:00Z",
|
|
last_updated_at="2023-01-01T00:02:00Z",
|
|
tool_name="analyze",
|
|
turns=[
|
|
ConversationTurn(
|
|
role="assistant",
|
|
content="I've analyzed the authentication module.",
|
|
timestamp="2023-01-01T00:01:00Z",
|
|
tool_name="analyze",
|
|
files=["/src/auth.py", "/src/security.py"], # These files were already analyzed
|
|
),
|
|
ConversationTurn(
|
|
role="assistant",
|
|
content="Found security issues in the auth system.",
|
|
timestamp="2023-01-01T00:02:00Z",
|
|
tool_name="codereview",
|
|
files=["/src/auth.py", "/tests/test_auth.py"], # auth.py referenced again + new file
|
|
),
|
|
],
|
|
initial_context={"prompt": "Analyze authentication security"},
|
|
)
|
|
|
|
# Mock get_thread to return our test context
|
|
mock_get_thread.return_value = _thread_context
|
|
mock_add_turn.return_value = True
|
|
|
|
# Mock the model to capture what prompt it receives
|
|
captured_prompt = None
|
|
|
|
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
|
|
def capture_prompt(prompt, **kwargs):
|
|
nonlocal captured_prompt
|
|
captured_prompt = prompt
|
|
return Mock(
|
|
content="Analysis of new files complete",
|
|
usage={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30},
|
|
model_name="gemini-2.5-flash",
|
|
metadata={"finish_reason": "STOP"},
|
|
)
|
|
|
|
mock_provider.generate_content.side_effect = capture_prompt
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
# Mock read_files to simulate file existence and capture its calls
|
|
with patch("tools.base.read_files") as mock_read_files:
|
|
# When the tool processes the new files, it should only read '/src/utils.py'
|
|
mock_read_files.return_value = "--- /src/utils.py ---\ncontent of utils"
|
|
|
|
# Execute tool with continuation_id and mix of already-referenced and new files
|
|
arguments = {
|
|
"prompt": "Now check the utility functions too",
|
|
"continuation_id": "test-duplicate-files-id",
|
|
"files": ["/src/auth.py", "/src/utils.py"], # auth.py already in history, utils.py is new
|
|
}
|
|
response = await self.tool.execute(arguments)
|
|
|
|
# Verify response succeeded
|
|
response_data = json.loads(response[0].text)
|
|
assert response_data["status"] == "success"
|
|
|
|
# Verify the prompt structure
|
|
assert captured_prompt is not None
|
|
|
|
# After fixing the duplication bug, conversation history (including file embedding)
|
|
# is no longer added by tools/base.py - it's handled by server.py
|
|
# This test verifies the file filtering logic still works correctly
|
|
|
|
# The current request should still be processed normally
|
|
assert "Now check the utility functions too" in captured_prompt
|
|
assert "Files in current request: /src/auth.py, /src/utils.py" in captured_prompt
|
|
|
|
# Most importantly, verify that the file filtering logic works correctly
|
|
# even though conversation history isn't built by tools/base.py anymore
|
|
with patch.object(self.tool, "get_conversation_embedded_files") as mock_get_embedded:
|
|
# Mock that certain files are already embedded
|
|
mock_get_embedded.return_value = ["/src/auth.py", "/src/security.py", "/tests/test_auth.py"]
|
|
|
|
# Test the filtering logic directly
|
|
new_files = self.tool.filter_new_files(["/src/auth.py", "/src/utils.py"], "test-duplicate-files-id")
|
|
assert new_files == ["/src/utils.py"] # Only the new file should remain
|
|
|
|
# Verify get_conversation_embedded_files was called correctly
|
|
mock_get_embedded.assert_called_with("test-duplicate-files-id")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__])
|