fix: critical conversation history bug and improve Docker integration

This commit addresses several critical issues and improvements:

🔧 Critical Fixes:
- Fixed conversation history not being included when using continuation_id in AI-to-AI conversations
- Fixed test mock targeting issues preventing proper conversation memory validation
- Fixed Docker debug logging functionality with Gemini tools

🐛 Bug Fixes:
- Docker compose configuration for proper container command execution
- Test mock import targeting from utils.conversation_memory.* to tools.base.*
- Version bump to 3.1.0 reflecting significant improvements

🚀 Improvements:
- Enhanced Docker environment configuration with comprehensive logging setup
- Added cross-tool continuation documentation and examples in README
- Improved error handling and validation across all tools
- Better logging configuration with LOG_LEVEL environment variable support
- Enhanced conversation memory system documentation

🧪 Testing:
- Added comprehensive conversation history bug fix tests
- Added cross-tool continuation functionality tests
- All 132 tests now pass with proper conversation history validation
- Improved test coverage for AI-to-AI conversation threading

 Code Quality:
- Applied black, isort, and ruff formatting across entire codebase
- Enhanced inline documentation for conversation memory system
- Cleaned up temporary files and improved repository hygiene
- Better test descriptions and coverage for critical functionality

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Fahad
2025-06-11 08:53:45 +04:00
parent 14ccbede43
commit 94f542c76a
20 changed files with 1012 additions and 103 deletions

View File

@@ -241,7 +241,7 @@ class TestClaudeContinuationOffers:
def test_max_turns_reached_no_continuation_offer(self):
"""Test that no continuation is offered when max turns would be exceeded"""
# Mock MAX_CONVERSATION_TURNS to be 1 for this test
with patch("utils.conversation_memory.MAX_CONVERSATION_TURNS", 1):
with patch("tools.base.MAX_CONVERSATION_TURNS", 1):
request = ContinuationRequest(prompt="Test prompt")
# Check continuation opportunity

View File

@@ -235,9 +235,9 @@ class TestCollaborationWorkflow:
)
response = json.loads(result[0].text)
assert response["status"] == "requires_clarification", (
"Should request clarification when asked about dependencies without package files"
)
assert (
response["status"] == "requires_clarification"
), "Should request clarification when asked about dependencies without package files"
clarification = json.loads(response["content"])
assert "package.json" in str(clarification["files_needed"]), "Should specifically request package.json"

View File

@@ -0,0 +1,251 @@
"""
Test suite for conversation history bug fix
This test verifies that the critical bug where conversation history
(including file context) was not included when using continuation_id
has been properly fixed.
The bug was that tools with continuation_id would not see previous
conversation turns, causing issues like Gemini not seeing files that
Claude had shared in earlier turns.
"""
import json
from unittest.mock import Mock, patch
import pytest
from pydantic import Field
from tools.base import BaseTool, ToolRequest
from utils.conversation_memory import ConversationTurn, ThreadContext
class FileContextRequest(ToolRequest):
"""Test request with file support"""
prompt: str = Field(..., description="Test prompt")
files: list[str] = Field(default_factory=list, description="Optional files")
class FileContextTool(BaseTool):
"""Test tool for file context verification"""
def get_name(self) -> str:
return "test_file_context"
def get_description(self) -> str:
return "Test tool for file context"
def get_input_schema(self) -> dict:
return {
"type": "object",
"properties": {
"prompt": {"type": "string"},
"files": {"type": "array", "items": {"type": "string"}},
"continuation_id": {"type": "string", "required": False},
},
}
def get_system_prompt(self) -> str:
return "Test system prompt for file context"
def get_request_model(self):
return FileContextRequest
async def prepare_prompt(self, request) -> str:
# Simple prompt preparation that would normally read files
# For this test, we're focusing on whether conversation history is included
files_context = ""
if request.files:
files_context = f"\nFiles in current request: {', '.join(request.files)}"
return f"System: {self.get_system_prompt()}\nUser: {request.prompt}{files_context}"
class TestConversationHistoryBugFix:
"""Test that conversation history is properly included with continuation_id"""
def setup_method(self):
self.tool = FileContextTool()
@patch("tools.base.get_thread")
@patch("tools.base.add_turn")
async def test_conversation_history_included_with_continuation_id(self, mock_add_turn, mock_get_thread):
"""Test that conversation history (including file context) is included when using continuation_id"""
# Create a thread context with previous turns including files
thread_context = ThreadContext(
thread_id="test-history-id",
created_at="2023-01-01T00:00:00Z",
last_updated_at="2023-01-01T00:02:00Z",
tool_name="analyze", # Started with analyze tool
turns=[
ConversationTurn(
role="assistant",
content="I've analyzed the authentication module and found several security issues.",
timestamp="2023-01-01T00:01:00Z",
tool_name="analyze",
files=["/src/auth.py", "/src/security.py"], # Files from analyze tool
),
ConversationTurn(
role="assistant",
content="The code review shows these files have critical vulnerabilities.",
timestamp="2023-01-01T00:02:00Z",
tool_name="codereview",
files=["/src/auth.py", "/tests/test_auth.py"], # Files from codereview tool
),
],
initial_context={"question": "Analyze authentication security"},
)
# Mock get_thread to return our test context
mock_get_thread.return_value = thread_context
# Mock add_turn to return success
mock_add_turn.return_value = True
# Mock the model to capture what prompt it receives
captured_prompt = None
with patch.object(self.tool, "create_model") as mock_create_model:
mock_model = Mock()
mock_response = Mock()
mock_response.candidates = [
Mock(
content=Mock(parts=[Mock(text="Response with conversation context")]),
finish_reason="STOP",
)
]
def capture_prompt(prompt):
nonlocal captured_prompt
captured_prompt = prompt
return mock_response
mock_model.generate_content.side_effect = capture_prompt
mock_create_model.return_value = mock_model
# Execute tool with continuation_id
arguments = {
"prompt": "What should we fix first?",
"continuation_id": "test-history-id",
"files": ["/src/utils.py"], # New file for this turn
}
response = await self.tool.execute(arguments)
# Verify response succeeded
response_data = json.loads(response[0].text)
assert response_data["status"] == "success"
# Verify get_thread was called for history reconstruction
mock_get_thread.assert_called_with("test-history-id")
# Verify the prompt includes conversation history
assert captured_prompt is not None
# Check that conversation history is included
assert "=== CONVERSATION HISTORY ===" in captured_prompt
assert "Turn 1 (Gemini using analyze)" in captured_prompt
assert "Turn 2 (Gemini using codereview)" in captured_prompt
# Check that file context from previous turns is included
assert "📁 Files referenced: /src/auth.py, /src/security.py" in captured_prompt
assert "📁 Files referenced: /src/auth.py, /tests/test_auth.py" in captured_prompt
# Check that previous turn content is included
assert "I've analyzed the authentication module and found several security issues." in captured_prompt
assert "The code review shows these files have critical vulnerabilities." in captured_prompt
# Check that continuation instruction is included
assert "Continue this conversation by building on the previous context." in captured_prompt
# Check that current request is still included
assert "What should we fix first?" in captured_prompt
assert "Files in current request: /src/utils.py" in captured_prompt
@patch("tools.base.get_thread")
async def test_no_history_when_thread_not_found(self, mock_get_thread):
"""Test graceful handling when thread is not found"""
# Mock get_thread to return None (thread not found)
mock_get_thread.return_value = None
captured_prompt = None
with patch.object(self.tool, "create_model") as mock_create_model:
mock_model = Mock()
mock_response = Mock()
mock_response.candidates = [
Mock(
content=Mock(parts=[Mock(text="Response without history")]),
finish_reason="STOP",
)
]
def capture_prompt(prompt):
nonlocal captured_prompt
captured_prompt = prompt
return mock_response
mock_model.generate_content.side_effect = capture_prompt
mock_create_model.return_value = mock_model
# Execute tool with continuation_id for non-existent thread
arguments = {"prompt": "Test without history", "continuation_id": "non-existent-thread-id"}
response = await self.tool.execute(arguments)
# Should still succeed but without history
response_data = json.loads(response[0].text)
assert response_data["status"] == "success"
# Verify get_thread was called for non-existent thread
mock_get_thread.assert_called_with("non-existent-thread-id")
# Verify the prompt does NOT include conversation history
assert captured_prompt is not None
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
assert "Test without history" in captured_prompt
async def test_no_history_for_new_conversations(self):
"""Test that new conversations (no continuation_id) don't get history"""
captured_prompt = None
with patch.object(self.tool, "create_model") as mock_create_model:
mock_model = Mock()
mock_response = Mock()
mock_response.candidates = [
Mock(
content=Mock(parts=[Mock(text="New conversation response")]),
finish_reason="STOP",
)
]
def capture_prompt(prompt):
nonlocal captured_prompt
captured_prompt = prompt
return mock_response
mock_model.generate_content.side_effect = capture_prompt
mock_create_model.return_value = mock_model
# Execute tool without continuation_id (new conversation)
arguments = {"prompt": "Start new conversation", "files": ["/src/new_file.py"]}
response = await self.tool.execute(arguments)
# Should succeed (may offer continuation for new conversations)
response_data = json.loads(response[0].text)
assert response_data["status"] in ["success", "continuation_available"]
# Verify the prompt does NOT include conversation history
assert captured_prompt is not None
assert "=== CONVERSATION HISTORY ===" not in captured_prompt
assert "Start new conversation" in captured_prompt
assert "Files in current request: /src/new_file.py" in captured_prompt
# Should include follow-up instructions for new conversation
# (This is the existing behavior for new conversations)
assert "If you'd like to ask a follow-up question" in captured_prompt
if __name__ == "__main__":
pytest.main([__file__])

View File

@@ -0,0 +1,381 @@
"""
Test suite for cross-tool continuation functionality
Tests that continuation IDs work properly across different tools,
allowing multi-turn conversations to span multiple tool types.
"""
import json
from unittest.mock import Mock, patch
import pytest
from pydantic import Field
from tools.base import BaseTool, ToolRequest
from utils.conversation_memory import ConversationTurn, ThreadContext
class AnalysisRequest(ToolRequest):
"""Test request for analysis tool"""
code: str = Field(..., description="Code to analyze")
class ReviewRequest(ToolRequest):
"""Test request for review tool"""
findings: str = Field(..., description="Analysis findings to review")
files: list[str] = Field(default_factory=list, description="Optional files to review")
class MockAnalysisTool(BaseTool):
"""Mock analysis tool for cross-tool testing"""
def get_name(self) -> str:
return "test_analysis"
def get_description(self) -> str:
return "Test analysis tool"
def get_input_schema(self) -> dict:
return {
"type": "object",
"properties": {
"code": {"type": "string"},
"continuation_id": {"type": "string", "required": False},
},
}
def get_system_prompt(self) -> str:
return "Analyze the provided code"
def get_request_model(self):
return AnalysisRequest
async def prepare_prompt(self, request) -> str:
return f"System: {self.get_system_prompt()}\nCode: {request.code}"
class MockReviewTool(BaseTool):
"""Mock review tool for cross-tool testing"""
def get_name(self) -> str:
return "test_review"
def get_description(self) -> str:
return "Test review tool"
def get_input_schema(self) -> dict:
return {
"type": "object",
"properties": {
"findings": {"type": "string"},
"continuation_id": {"type": "string", "required": False},
},
}
def get_system_prompt(self) -> str:
return "Review the analysis findings"
def get_request_model(self):
return ReviewRequest
async def prepare_prompt(self, request) -> str:
return f"System: {self.get_system_prompt()}\nFindings: {request.findings}"
class TestCrossToolContinuation:
"""Test cross-tool continuation functionality"""
def setup_method(self):
self.analysis_tool = MockAnalysisTool()
self.review_tool = MockReviewTool()
@patch("utils.conversation_memory.get_redis_client")
async def test_continuation_id_works_across_different_tools(self, mock_redis):
"""Test that a continuation_id from one tool can be used with another tool"""
mock_client = Mock()
mock_redis.return_value = mock_client
# Step 1: Analysis tool creates a conversation with follow-up
with patch.object(self.analysis_tool, "create_model") as mock_create_model:
mock_model = Mock()
mock_response = Mock()
mock_response.candidates = [
Mock(
content=Mock(
parts=[
Mock(
text="""Found potential security issues in authentication logic.
```json
{
"follow_up_question": "Would you like me to review these security findings in detail?",
"suggested_params": {"findings": "Authentication bypass vulnerability detected"},
"ui_hint": "Security review recommended"
}
```"""
)
]
),
finish_reason="STOP",
)
]
mock_model.generate_content.return_value = mock_response
mock_create_model.return_value = mock_model
# Execute analysis tool
arguments = {"code": "function authenticate(user) { return true; }"}
response = await self.analysis_tool.execute(arguments)
response_data = json.loads(response[0].text)
assert response_data["status"] == "requires_continuation"
continuation_id = response_data["follow_up_request"]["continuation_id"]
# Step 2: Mock the existing thread context for the review tool
# The thread was created by analysis_tool but will be continued by review_tool
existing_context = ThreadContext(
thread_id=continuation_id,
created_at="2023-01-01T00:00:00Z",
last_updated_at="2023-01-01T00:01:00Z",
tool_name="test_analysis", # Original tool
turns=[
ConversationTurn(
role="assistant",
content="Found potential security issues in authentication logic.",
timestamp="2023-01-01T00:00:30Z",
tool_name="test_analysis", # Original tool
follow_up_question="Would you like me to review these security findings in detail?",
)
],
initial_context={"code": "function authenticate(user) { return true; }"},
)
# Mock the get call to return existing context for add_turn to work
def mock_get_side_effect(key):
if key.startswith("thread:"):
return existing_context.model_dump_json()
return None
mock_client.get.side_effect = mock_get_side_effect
# Step 3: Review tool uses the same continuation_id
with patch.object(self.review_tool, "create_model") as mock_create_model:
mock_model = Mock()
mock_response = Mock()
mock_response.candidates = [
Mock(
content=Mock(
parts=[
Mock(
text="Critical security vulnerability confirmed. The authentication function always returns true, bypassing all security checks."
)
]
),
finish_reason="STOP",
)
]
mock_model.generate_content.return_value = mock_response
mock_create_model.return_value = mock_model
# Execute review tool with the continuation_id from analysis tool
arguments = {
"findings": "Authentication bypass vulnerability detected",
"continuation_id": continuation_id,
}
response = await self.review_tool.execute(arguments)
response_data = json.loads(response[0].text)
# Should successfully continue the conversation
assert response_data["status"] == "success"
assert "Critical security vulnerability confirmed" in response_data["content"]
# Step 4: Verify the cross-tool continuation worked
# Should have at least 2 setex calls: 1 from analysis tool follow-up, 1 from review tool add_turn
setex_calls = mock_client.setex.call_args_list
assert len(setex_calls) >= 2 # Analysis tool creates thread + review tool adds turn
# Get the final thread state from the last setex call
final_thread_data = setex_calls[-1][0][2] # Last setex call's data
final_context = json.loads(final_thread_data)
assert final_context["thread_id"] == continuation_id
assert final_context["tool_name"] == "test_analysis" # Original tool name preserved
assert len(final_context["turns"]) == 2 # Original + new turn
# Verify the new turn has the review tool's name
second_turn = final_context["turns"][1]
assert second_turn["role"] == "assistant"
assert second_turn["tool_name"] == "test_review" # New tool name
assert "Critical security vulnerability confirmed" in second_turn["content"]
@patch("utils.conversation_memory.get_redis_client")
def test_cross_tool_conversation_history_includes_tool_names(self, mock_redis):
"""Test that conversation history properly shows which tool was used for each turn"""
mock_client = Mock()
mock_redis.return_value = mock_client
# Create a thread context with turns from different tools
thread_context = ThreadContext(
thread_id="12345678-1234-1234-1234-123456789012",
created_at="2023-01-01T00:00:00Z",
last_updated_at="2023-01-01T00:03:00Z",
tool_name="test_analysis", # Original tool
turns=[
ConversationTurn(
role="assistant",
content="Analysis complete: Found 3 issues",
timestamp="2023-01-01T00:01:00Z",
tool_name="test_analysis",
),
ConversationTurn(
role="assistant",
content="Review complete: 2 critical, 1 minor issue",
timestamp="2023-01-01T00:02:00Z",
tool_name="test_review",
),
ConversationTurn(
role="assistant",
content="Deep analysis: Root cause identified",
timestamp="2023-01-01T00:03:00Z",
tool_name="test_thinkdeep",
),
],
initial_context={"code": "test code"},
)
# Build conversation history
from utils.conversation_memory import build_conversation_history
history = build_conversation_history(thread_context)
# Verify tool names are included in the history
assert "Turn 1 (Gemini using test_analysis)" in history
assert "Turn 2 (Gemini using test_review)" in history
assert "Turn 3 (Gemini using test_thinkdeep)" in history
assert "Analysis complete: Found 3 issues" in history
assert "Review complete: 2 critical, 1 minor issue" in history
assert "Deep analysis: Root cause identified" in history
@patch("utils.conversation_memory.get_redis_client")
@patch("utils.conversation_memory.get_thread")
async def test_cross_tool_conversation_with_files_context(self, mock_get_thread, mock_redis):
"""Test that file context is preserved across tool switches"""
mock_client = Mock()
mock_redis.return_value = mock_client
# Create existing context with files from analysis tool
existing_context = ThreadContext(
thread_id="test-thread-id",
created_at="2023-01-01T00:00:00Z",
last_updated_at="2023-01-01T00:01:00Z",
tool_name="test_analysis",
turns=[
ConversationTurn(
role="assistant",
content="Analysis of auth.py complete",
timestamp="2023-01-01T00:01:00Z",
tool_name="test_analysis",
files=["/src/auth.py", "/src/utils.py"],
)
],
initial_context={"code": "authentication code", "files": ["/src/auth.py"]},
)
# Mock get_thread to return the existing context
mock_get_thread.return_value = existing_context
# Mock review tool response
with patch.object(self.review_tool, "create_model") as mock_create_model:
mock_model = Mock()
mock_response = Mock()
mock_response.candidates = [
Mock(
content=Mock(parts=[Mock(text="Security review of auth.py shows vulnerabilities")]),
finish_reason="STOP",
)
]
mock_model.generate_content.return_value = mock_response
mock_create_model.return_value = mock_model
# Execute review tool with additional files
arguments = {
"findings": "Auth vulnerabilities found",
"continuation_id": "test-thread-id",
"files": ["/src/security.py"], # Additional file for review
}
response = await self.review_tool.execute(arguments)
response_data = json.loads(response[0].text)
assert response_data["status"] == "success"
# Verify files from both tools are tracked in Redis calls
setex_calls = mock_client.setex.call_args_list
assert len(setex_calls) >= 1 # At least the add_turn call from review tool
# Get the final thread state
final_thread_data = setex_calls[-1][0][2]
final_context = json.loads(final_thread_data)
# Check that the new turn includes the review tool's files
review_turn = final_context["turns"][1] # Second turn (review tool)
assert review_turn["tool_name"] == "test_review"
assert review_turn["files"] == ["/src/security.py"]
# Original turn's files should still be there
analysis_turn = final_context["turns"][0] # First turn (analysis tool)
assert analysis_turn["files"] == ["/src/auth.py", "/src/utils.py"]
@patch("utils.conversation_memory.get_redis_client")
@patch("utils.conversation_memory.get_thread")
def test_thread_preserves_original_tool_name(self, mock_get_thread, mock_redis):
"""Test that the thread's original tool_name is preserved even when other tools contribute"""
mock_client = Mock()
mock_redis.return_value = mock_client
# Create existing thread from analysis tool
existing_context = ThreadContext(
thread_id="test-thread-id",
created_at="2023-01-01T00:00:00Z",
last_updated_at="2023-01-01T00:01:00Z",
tool_name="test_analysis", # Original tool
turns=[
ConversationTurn(
role="assistant",
content="Initial analysis",
timestamp="2023-01-01T00:01:00Z",
tool_name="test_analysis",
)
],
initial_context={"code": "test"},
)
# Mock get_thread to return the existing context
mock_get_thread.return_value = existing_context
# Add turn from review tool
from utils.conversation_memory import add_turn
success = add_turn(
"test-thread-id",
"assistant",
"Review completed",
tool_name="test_review", # Different tool
)
# Verify the add_turn succeeded (basic cross-tool functionality test)
assert success
# Verify thread's original tool_name is preserved
setex_calls = mock_client.setex.call_args_list
updated_thread_data = setex_calls[-1][0][2]
updated_context = json.loads(updated_thread_data)
assert updated_context["tool_name"] == "test_analysis" # Original preserved
assert len(updated_context["turns"]) == 2
assert updated_context["turns"][0]["tool_name"] == "test_analysis"
assert updated_context["turns"][1]["tool_name"] == "test_review"
if __name__ == "__main__":
pytest.main([__file__])

View File

@@ -32,9 +32,9 @@ class TestThinkingModes:
]
for tool, expected_default in tools:
assert tool.get_default_thinking_mode() == expected_default, (
f"{tool.__class__.__name__} should default to {expected_default}"
)
assert (
tool.get_default_thinking_mode() == expected_default
), f"{tool.__class__.__name__} should default to {expected_default}"
@pytest.mark.asyncio
@patch("tools.base.BaseTool.create_model")