* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools * WIP - Update tests to match new tools * Should help with https://github.com/BeehiveInnovations/zen-mcp-server/issues/97 Clear python cache when running script: https://github.com/BeehiveInnovations/zen-mcp-server/issues/96 Improved retry error logging Cleanup * WIP - chat tool using new architecture and improved code sharing * Removed todo * Removed todo * Cleanup old name * Tweak wordings * Tweak wordings Migrate old tests * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 Fixed test * Improved consensus to use the workflow base class * Improved consensus to use the workflow base class * Allow images * Allow images * Replaced old consensus tool * Cleanup tests * Tests for prompt size * New tool: docgen Tests for prompt size Fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/107 Use available token size limits: https://github.com/BeehiveInnovations/zen-mcp-server/issues/105 * Improved docgen prompt Exclude TestGen from pytest inclusion * Updated errors * Lint * DocGen instructed not to fix bugs, surface them and stick to d * WIP * Stop claude from being lazy and only documenting a small handful * More style rules --------- Co-authored-by: Claude <noreply@anthropic.com>
201 lines
7.1 KiB
Python
201 lines
7.1 KiB
Python
"""
|
|
Test that conversation history is correctly mapped to tool-specific fields
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from server import reconstruct_thread_context
|
|
from utils.conversation_memory import ConversationTurn, ThreadContext
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.no_mock_provider
|
|
async def test_conversation_history_field_mapping():
|
|
"""Test that enhanced prompts are mapped to prompt field for all tools"""
|
|
|
|
# Test data for different tools - all use 'prompt' now
|
|
test_cases = [
|
|
{
|
|
"tool_name": "analyze",
|
|
"original_value": "What does this code do?",
|
|
},
|
|
{
|
|
"tool_name": "chat",
|
|
"original_value": "Explain this concept",
|
|
},
|
|
{
|
|
"tool_name": "debug",
|
|
"original_value": "Getting undefined error",
|
|
},
|
|
{
|
|
"tool_name": "codereview",
|
|
"original_value": "Review this implementation",
|
|
},
|
|
{
|
|
"tool_name": "thinkdeep",
|
|
"original_value": "My analysis so far",
|
|
},
|
|
]
|
|
|
|
for test_case in test_cases:
|
|
# Create real conversation context
|
|
mock_context = ThreadContext(
|
|
thread_id="test-thread-123",
|
|
tool_name=test_case["tool_name"],
|
|
created_at=datetime.now().isoformat(),
|
|
last_updated_at=datetime.now().isoformat(),
|
|
turns=[
|
|
ConversationTurn(
|
|
role="user",
|
|
content="Previous user message",
|
|
timestamp=datetime.now().isoformat(),
|
|
files=["/test/file1.py"],
|
|
),
|
|
ConversationTurn(
|
|
role="assistant",
|
|
content="Previous assistant response",
|
|
timestamp=datetime.now().isoformat(),
|
|
),
|
|
],
|
|
initial_context={},
|
|
)
|
|
|
|
# Mock get_thread to return our test context
|
|
with patch("utils.conversation_memory.get_thread", return_value=mock_context):
|
|
with patch("utils.conversation_memory.add_turn", return_value=True):
|
|
# Create arguments with continuation_id and use a test model
|
|
arguments = {
|
|
"continuation_id": "test-thread-123",
|
|
"prompt": test_case["original_value"],
|
|
"files": ["/test/file2.py"],
|
|
"model": "flash", # Use test model to avoid provider errors
|
|
}
|
|
|
|
# Call reconstruct_thread_context
|
|
enhanced_args = await reconstruct_thread_context(arguments)
|
|
|
|
# Verify the enhanced prompt is in the prompt field
|
|
assert "prompt" in enhanced_args
|
|
enhanced_value = enhanced_args["prompt"]
|
|
|
|
# Should contain conversation history
|
|
assert "=== CONVERSATION HISTORY" in enhanced_value # Allow for both formats
|
|
assert "Previous user message" in enhanced_value
|
|
assert "Previous assistant response" in enhanced_value
|
|
|
|
# Should contain the new user input
|
|
assert "=== NEW USER INPUT ===" in enhanced_value
|
|
assert test_case["original_value"] in enhanced_value
|
|
|
|
# Should have token budget
|
|
assert "_remaining_tokens" in enhanced_args
|
|
assert enhanced_args["_remaining_tokens"] > 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.no_mock_provider
|
|
async def test_unknown_tool_defaults_to_prompt():
|
|
"""Test that unknown tools default to using 'prompt' field"""
|
|
|
|
mock_context = ThreadContext(
|
|
thread_id="test-thread-456",
|
|
tool_name="unknown_tool",
|
|
created_at=datetime.now().isoformat(),
|
|
last_updated_at=datetime.now().isoformat(),
|
|
turns=[
|
|
ConversationTurn(
|
|
role="user",
|
|
content="First message",
|
|
timestamp=datetime.now().isoformat(),
|
|
),
|
|
ConversationTurn(
|
|
role="assistant",
|
|
content="First response",
|
|
timestamp=datetime.now().isoformat(),
|
|
),
|
|
],
|
|
initial_context={},
|
|
)
|
|
|
|
with patch("utils.conversation_memory.get_thread", return_value=mock_context):
|
|
with patch("utils.conversation_memory.add_turn", return_value=True):
|
|
arguments = {
|
|
"continuation_id": "test-thread-456",
|
|
"prompt": "User input",
|
|
"model": "flash", # Use test model for real integration
|
|
}
|
|
|
|
enhanced_args = await reconstruct_thread_context(arguments)
|
|
|
|
# Should default to 'prompt' field
|
|
assert "prompt" in enhanced_args
|
|
assert "=== CONVERSATION HISTORY" in enhanced_args["prompt"] # Allow for both formats
|
|
assert "First message" in enhanced_args["prompt"]
|
|
assert "First response" in enhanced_args["prompt"]
|
|
assert "User input" in enhanced_args["prompt"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tool_parameter_standardization():
|
|
"""Test that workflow tools use standardized investigation pattern"""
|
|
from tools.analyze import AnalyzeWorkflowRequest
|
|
from tools.codereview import CodeReviewRequest
|
|
from tools.debug import DebugInvestigationRequest
|
|
from tools.precommit import PrecommitRequest
|
|
from tools.thinkdeep import ThinkDeepWorkflowRequest
|
|
|
|
# Test analyze tool uses workflow pattern
|
|
analyze = AnalyzeWorkflowRequest(
|
|
step="What does this do?",
|
|
step_number=1,
|
|
total_steps=1,
|
|
next_step_required=False,
|
|
findings="Initial analysis",
|
|
relevant_files=["/test.py"],
|
|
)
|
|
assert analyze.step == "What does this do?"
|
|
|
|
# Debug tool now uses self-investigation pattern with different fields
|
|
debug = DebugInvestigationRequest(
|
|
step="Investigating error",
|
|
step_number=1,
|
|
total_steps=3,
|
|
next_step_required=True,
|
|
findings="Initial error analysis",
|
|
)
|
|
assert debug.step == "Investigating error"
|
|
assert debug.findings == "Initial error analysis"
|
|
|
|
# Test codereview tool uses workflow fields
|
|
review = CodeReviewRequest(
|
|
step="Initial code review investigation",
|
|
step_number=1,
|
|
total_steps=2,
|
|
next_step_required=True,
|
|
findings="Initial review findings",
|
|
relevant_files=["/test.py"],
|
|
)
|
|
assert review.step == "Initial code review investigation"
|
|
assert review.findings == "Initial review findings"
|
|
|
|
# Test thinkdeep tool uses workflow pattern
|
|
think = ThinkDeepWorkflowRequest(
|
|
step="My analysis", step_number=1, total_steps=1, next_step_required=False, findings="Initial thinking analysis"
|
|
)
|
|
assert think.step == "My analysis"
|
|
|
|
# Test precommit tool uses workflow fields
|
|
precommit = PrecommitRequest(
|
|
step="Validating changes for commit",
|
|
step_number=1,
|
|
total_steps=2,
|
|
next_step_required=True,
|
|
findings="Initial validation findings",
|
|
path="/repo", # path only needed for step 1
|
|
)
|
|
assert precommit.step == "Validating changes for commit"
|
|
assert precommit.findings == "Initial validation findings"
|