523 lines
23 KiB
Python
523 lines
23 KiB
Python
"""
|
|
Tests for dynamic context request and collaboration features
|
|
"""
|
|
|
|
import json
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
|
|
from tests.mock_helpers import create_mock_provider
|
|
from tools.analyze import AnalyzeTool
|
|
from tools.debug import DebugIssueTool
|
|
from tools.models import FilesNeededRequest, ToolOutput
|
|
|
|
|
|
class TestDynamicContextRequests:
|
|
"""Test the dynamic context request mechanism"""
|
|
|
|
@pytest.fixture
|
|
def analyze_tool(self):
|
|
return AnalyzeTool()
|
|
|
|
@pytest.fixture
|
|
def debug_tool(self):
|
|
return DebugIssueTool()
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
|
async def test_clarification_request_parsing(self, mock_get_provider, analyze_tool):
|
|
"""Test that tools correctly parse clarification requests"""
|
|
# Mock model to return a clarification request
|
|
clarification_json = json.dumps(
|
|
{
|
|
"status": "files_required_to_continue",
|
|
"mandatory_instructions": "I need to see the package.json file to understand dependencies",
|
|
"files_needed": ["package.json", "package-lock.json"],
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
mock_provider.generate_content.return_value = Mock(
|
|
content=clarification_json, usage={}, model_name="gemini-2.5-flash", metadata={}
|
|
)
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
result = await analyze_tool.execute(
|
|
{
|
|
"step": "Analyze the dependencies used in this project",
|
|
"step_number": 1,
|
|
"total_steps": 1,
|
|
"next_step_required": False,
|
|
"findings": "Initial dependency analysis",
|
|
"relevant_files": ["/absolute/path/src/index.js"],
|
|
}
|
|
)
|
|
|
|
assert len(result) == 1
|
|
|
|
# Parse the response - analyze tool now uses workflow architecture
|
|
response_data = json.loads(result[0].text)
|
|
# Workflow tools may handle provider errors differently than simple tools
|
|
# They might return error, expert analysis, or clarification requests
|
|
assert response_data["status"] in ["calling_expert_analysis", "error", "files_required_to_continue"]
|
|
|
|
# Check that expert analysis was performed and contains the clarification
|
|
if "expert_analysis" in response_data:
|
|
expert_analysis = response_data["expert_analysis"]
|
|
# The mock should have returned the clarification JSON
|
|
if "raw_analysis" in expert_analysis:
|
|
analysis_content = expert_analysis["raw_analysis"]
|
|
assert "package.json" in analysis_content
|
|
assert "dependencies" in analysis_content
|
|
|
|
# For workflow tools, the files_needed logic is handled differently
|
|
# The test validates that the mocked clarification content was processed
|
|
assert "step_number" in response_data
|
|
assert response_data["step_number"] == 1
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
|
@patch("utils.conversation_memory.create_thread", return_value="debug-test-uuid")
|
|
@patch("utils.conversation_memory.add_turn")
|
|
async def test_normal_response_not_parsed_as_clarification(
|
|
self, mock_add_turn, mock_create_thread, mock_get_provider, debug_tool
|
|
):
|
|
"""Test that normal investigation responses work correctly with new debug tool"""
|
|
# The new debug tool uses self-investigation pattern
|
|
result = await debug_tool.execute(
|
|
{
|
|
"step": "Investigating NameError: name 'utils' is not defined",
|
|
"step_number": 1,
|
|
"total_steps": 3,
|
|
"next_step_required": True,
|
|
"findings": "The error indicates 'utils' module is not imported or defined",
|
|
"files_checked": ["/code/main.py"],
|
|
"relevant_files": ["/code/main.py"],
|
|
"hypothesis": "Missing import statement for utils module",
|
|
"confidence": "high",
|
|
}
|
|
)
|
|
|
|
assert len(result) == 1
|
|
|
|
# Parse the response - new debug tool returns structured JSON
|
|
response_data = json.loads(result[0].text)
|
|
# Debug tool now returns "pause_for_investigation" to force actual investigation
|
|
assert response_data["status"] == "pause_for_investigation"
|
|
assert response_data["step_number"] == 1
|
|
assert response_data["next_step_required"] is True
|
|
assert response_data["investigation_status"]["current_confidence"] == "high"
|
|
assert response_data["investigation_required"] is True
|
|
assert "required_actions" in response_data
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
|
async def test_malformed_clarification_request_treated_as_normal(self, mock_get_provider, analyze_tool):
|
|
"""Test that malformed JSON clarification requests are treated as normal responses"""
|
|
malformed_json = '{"status": "files_required_to_continue", "prompt": "Missing closing brace"'
|
|
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
mock_provider.generate_content.return_value = Mock(
|
|
content=malformed_json, usage={}, model_name="gemini-2.5-flash", metadata={}
|
|
)
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
result = await analyze_tool.execute(
|
|
{
|
|
"step": "What does this do?",
|
|
"step_number": 1,
|
|
"total_steps": 1,
|
|
"next_step_required": False,
|
|
"findings": "Initial code analysis",
|
|
"relevant_files": ["/absolute/path/test.py"],
|
|
}
|
|
)
|
|
|
|
assert len(result) == 1
|
|
|
|
# Should be treated as normal response due to JSON parse error
|
|
response_data = json.loads(result[0].text)
|
|
# Workflow tools may handle provider errors differently than simple tools
|
|
# They might return error, expert analysis, or clarification requests
|
|
assert response_data["status"] in ["calling_expert_analysis", "error", "files_required_to_continue"]
|
|
|
|
# The malformed JSON should appear in the expert analysis content
|
|
if "expert_analysis" in response_data:
|
|
expert_analysis = response_data["expert_analysis"]
|
|
if "raw_analysis" in expert_analysis:
|
|
analysis_content = expert_analysis["raw_analysis"]
|
|
# The malformed JSON should be included in the analysis
|
|
assert "files_required_to_continue" in analysis_content or malformed_json in str(response_data)
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
|
async def test_clarification_with_suggested_action(self, mock_get_provider, analyze_tool):
|
|
"""Test clarification request with suggested next action"""
|
|
clarification_json = json.dumps(
|
|
{
|
|
"status": "files_required_to_continue",
|
|
"mandatory_instructions": "I need to see the database configuration to analyze the connection error",
|
|
"files_needed": ["config/database.yml", "src/db.py"],
|
|
"suggested_next_action": {
|
|
"tool": "analyze",
|
|
"args": {
|
|
"prompt": "Analyze database connection timeout issue",
|
|
"relevant_files": [
|
|
"/config/database.yml",
|
|
"/src/db.py",
|
|
"/logs/error.log",
|
|
],
|
|
},
|
|
},
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
mock_provider.generate_content.return_value = Mock(
|
|
content=clarification_json, usage={}, model_name="gemini-2.5-flash", metadata={}
|
|
)
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
result = await analyze_tool.execute(
|
|
{
|
|
"step": "Analyze database connection timeout issue",
|
|
"step_number": 1,
|
|
"total_steps": 1,
|
|
"next_step_required": False,
|
|
"findings": "Initial database timeout analysis",
|
|
"relevant_files": ["/absolute/logs/error.log"],
|
|
}
|
|
)
|
|
|
|
assert len(result) == 1
|
|
|
|
response_data = json.loads(result[0].text)
|
|
|
|
# Workflow tools should either promote clarification status or handle it in expert analysis
|
|
if response_data["status"] == "files_required_to_continue":
|
|
# Clarification was properly promoted to main status
|
|
# Check if mandatory_instructions is at top level or in content
|
|
if "mandatory_instructions" in response_data:
|
|
assert "database configuration" in response_data["mandatory_instructions"]
|
|
assert "files_needed" in response_data
|
|
assert "config/database.yml" in response_data["files_needed"]
|
|
assert "src/db.py" in response_data["files_needed"]
|
|
elif "content" in response_data:
|
|
# Parse content JSON for workflow tools
|
|
try:
|
|
content_json = json.loads(response_data["content"])
|
|
assert "mandatory_instructions" in content_json
|
|
assert (
|
|
"database configuration" in content_json["mandatory_instructions"]
|
|
or "database" in content_json["mandatory_instructions"]
|
|
)
|
|
assert "files_needed" in content_json
|
|
files_needed_str = str(content_json["files_needed"])
|
|
assert (
|
|
"config/database.yml" in files_needed_str
|
|
or "config" in files_needed_str
|
|
or "database" in files_needed_str
|
|
)
|
|
except json.JSONDecodeError:
|
|
# Content is not JSON, check if it contains required text
|
|
content = response_data["content"]
|
|
assert "database configuration" in content or "config" in content
|
|
elif response_data["status"] == "calling_expert_analysis":
|
|
# Clarification may be handled in expert analysis section
|
|
if "expert_analysis" in response_data:
|
|
expert_analysis = response_data["expert_analysis"]
|
|
expert_content = str(expert_analysis)
|
|
assert (
|
|
"database configuration" in expert_content
|
|
or "config/database.yml" in expert_content
|
|
or "files_required_to_continue" in expert_content
|
|
)
|
|
else:
|
|
# Some other status - ensure it's a valid workflow response
|
|
assert "step_number" in response_data
|
|
|
|
# Check for suggested next action
|
|
if "suggested_next_action" in response_data:
|
|
action = response_data["suggested_next_action"]
|
|
assert action["tool"] == "analyze"
|
|
|
|
def test_tool_output_model_serialization(self):
|
|
"""Test ToolOutput model serialization"""
|
|
output = ToolOutput(
|
|
status="success",
|
|
content="Test content",
|
|
content_type="markdown",
|
|
metadata={"tool_name": "test", "execution_time": 1.5},
|
|
)
|
|
|
|
json_str = output.model_dump_json()
|
|
parsed = json.loads(json_str)
|
|
|
|
assert parsed["status"] == "success"
|
|
assert parsed["content"] == "Test content"
|
|
assert parsed["content_type"] == "markdown"
|
|
assert parsed["metadata"]["tool_name"] == "test"
|
|
|
|
def test_clarification_request_model(self):
|
|
"""Test FilesNeededRequest model"""
|
|
request = FilesNeededRequest(
|
|
mandatory_instructions="Need more context",
|
|
files_needed=["file1.py", "file2.py"],
|
|
suggested_next_action={"tool": "analyze", "args": {}},
|
|
)
|
|
|
|
assert request.mandatory_instructions == "Need more context"
|
|
assert len(request.files_needed) == 2
|
|
assert request.suggested_next_action["tool"] == "analyze"
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
|
async def test_error_response_format(self, mock_get_provider, analyze_tool):
|
|
"""Test error response format"""
|
|
mock_get_provider.side_effect = Exception("API connection failed")
|
|
|
|
result = await analyze_tool.execute(
|
|
{
|
|
"step": "Analyze this",
|
|
"step_number": 1,
|
|
"total_steps": 1,
|
|
"next_step_required": False,
|
|
"findings": "Initial analysis",
|
|
"relevant_files": ["/absolute/path/test.py"],
|
|
}
|
|
)
|
|
|
|
assert len(result) == 1
|
|
|
|
response_data = json.loads(result[0].text)
|
|
# Workflow tools may handle provider errors differently than simple tools
|
|
# They might return error, complete analysis, or even clarification requests
|
|
assert response_data["status"] in ["error", "calling_expert_analysis", "files_required_to_continue"]
|
|
|
|
# If expert analysis was attempted, it may succeed or fail
|
|
if response_data["status"] == "calling_expert_analysis" and "expert_analysis" in response_data:
|
|
expert_analysis = response_data["expert_analysis"]
|
|
# Could be an error or a successful analysis that requests clarification
|
|
analysis_status = expert_analysis.get("status", "")
|
|
assert (
|
|
analysis_status in ["analysis_error", "analysis_complete"]
|
|
or "error" in expert_analysis
|
|
or "files_required_to_continue" in str(expert_analysis)
|
|
)
|
|
elif response_data["status"] == "error":
|
|
assert "content" in response_data
|
|
assert response_data["content_type"] == "text"
|
|
|
|
|
|
class TestCollaborationWorkflow:
|
|
"""Test complete collaboration workflows"""
|
|
|
|
def teardown_method(self):
|
|
"""Clean up after each test to prevent state pollution."""
|
|
# Clear provider registry singleton
|
|
from providers.registry import ModelProviderRegistry
|
|
|
|
ModelProviderRegistry._instance = None
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
|
@patch("tools.workflow.workflow_mixin.BaseWorkflowMixin._call_expert_analysis")
|
|
async def test_dependency_analysis_triggers_clarification(self, mock_expert_analysis, mock_get_provider):
|
|
"""Test that asking about dependencies without package files triggers clarification"""
|
|
tool = AnalyzeTool()
|
|
|
|
# Mock Gemini to request package.json when asked about dependencies
|
|
clarification_json = json.dumps(
|
|
{
|
|
"status": "files_required_to_continue",
|
|
"mandatory_instructions": "I need to see the package.json file to analyze npm dependencies",
|
|
"files_needed": ["package.json", "package-lock.json"],
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
mock_provider.generate_content.return_value = Mock(
|
|
content=clarification_json, usage={}, model_name="gemini-2.5-flash", metadata={}
|
|
)
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
# Mock expert analysis to avoid actual API calls
|
|
mock_expert_analysis.return_value = {
|
|
"status": "analysis_complete",
|
|
"raw_analysis": "I need to see the package.json file to analyze npm dependencies",
|
|
}
|
|
|
|
# Ask about dependencies with only source files (using new workflow format)
|
|
result = await tool.execute(
|
|
{
|
|
"step": "What npm packages and versions does this project use?",
|
|
"step_number": 1,
|
|
"total_steps": 1,
|
|
"next_step_required": False,
|
|
"findings": "Initial dependency analysis",
|
|
"relevant_files": ["/absolute/path/src/index.js"],
|
|
}
|
|
)
|
|
|
|
response = json.loads(result[0].text)
|
|
|
|
# Workflow tools should either promote clarification status or handle it in expert analysis
|
|
if response["status"] == "files_required_to_continue":
|
|
# Clarification was properly promoted to main status
|
|
assert "mandatory_instructions" in response
|
|
assert "package.json" in response["mandatory_instructions"]
|
|
assert "files_needed" in response
|
|
assert "package.json" in response["files_needed"]
|
|
assert "package-lock.json" in response["files_needed"]
|
|
elif response["status"] == "calling_expert_analysis":
|
|
# Clarification may be handled in expert analysis section
|
|
if "expert_analysis" in response:
|
|
expert_analysis = response["expert_analysis"]
|
|
expert_content = str(expert_analysis)
|
|
assert (
|
|
"package.json" in expert_content
|
|
or "dependencies" in expert_content
|
|
or "files_required_to_continue" in expert_content
|
|
)
|
|
else:
|
|
# Some other status - ensure it's a valid workflow response
|
|
assert "step_number" in response
|
|
|
|
@pytest.mark.asyncio
|
|
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
|
@patch("tools.workflow.workflow_mixin.BaseWorkflowMixin._call_expert_analysis")
|
|
async def test_multi_step_collaboration(self, mock_expert_analysis, mock_get_provider):
|
|
"""Test a multi-step collaboration workflow"""
|
|
tool = AnalyzeTool()
|
|
|
|
# Step 1: Initial request returns clarification needed
|
|
clarification_json = json.dumps(
|
|
{
|
|
"status": "files_required_to_continue",
|
|
"mandatory_instructions": "I need to see the configuration file to understand the connection settings",
|
|
"files_needed": ["config.py"],
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
|
|
mock_provider = create_mock_provider()
|
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
|
mock_provider.supports_thinking_mode.return_value = False
|
|
mock_provider.generate_content.return_value = Mock(
|
|
content=clarification_json, usage={}, model_name="gemini-2.5-flash", metadata={}
|
|
)
|
|
mock_get_provider.return_value = mock_provider
|
|
|
|
# Mock expert analysis to avoid actual API calls
|
|
mock_expert_analysis.return_value = {
|
|
"status": "analysis_complete",
|
|
"raw_analysis": "I need to see the configuration file to understand the database connection settings",
|
|
}
|
|
|
|
result1 = await tool.execute(
|
|
{
|
|
"step": "Analyze database connection timeout issue",
|
|
"step_number": 1,
|
|
"total_steps": 1,
|
|
"next_step_required": False,
|
|
"findings": "Initial database timeout analysis",
|
|
"relevant_files": ["/logs/error.log"],
|
|
}
|
|
)
|
|
|
|
response1 = json.loads(result1[0].text)
|
|
|
|
# First call should either return clarification request or handle it in expert analysis
|
|
if response1["status"] == "files_required_to_continue":
|
|
# Clarification was properly promoted to main status
|
|
pass # This is the expected behavior
|
|
elif response1["status"] == "calling_expert_analysis":
|
|
# Clarification may be handled in expert analysis section
|
|
if "expert_analysis" in response1:
|
|
expert_analysis = response1["expert_analysis"]
|
|
expert_content = str(expert_analysis)
|
|
# Should contain some indication of clarification request
|
|
assert (
|
|
"config" in expert_content
|
|
or "files_required_to_continue" in expert_content
|
|
or "database" in expert_content
|
|
)
|
|
else:
|
|
# Some other status - ensure it's a valid workflow response
|
|
assert "step_number" in response1
|
|
|
|
# Step 2: Claude would provide additional context and re-invoke
|
|
# This simulates the second call with more context
|
|
final_response = """
|
|
## Summary
|
|
The database connection timeout is caused by incorrect host configuration.
|
|
|
|
## Hypotheses (Ranked by Likelihood)
|
|
|
|
### 1. Incorrect Database Host (Confidence: High)
|
|
**Root Cause:** The config.py file shows the database host is set to 'localhost' but the database is running on a different server.
|
|
"""
|
|
|
|
mock_provider.generate_content.return_value = Mock(
|
|
content=final_response, usage={}, model_name="gemini-2.5-flash", metadata={}
|
|
)
|
|
|
|
# Update expert analysis mock for second call
|
|
mock_expert_analysis.return_value = {
|
|
"status": "analysis_complete",
|
|
"raw_analysis": final_response,
|
|
}
|
|
|
|
result2 = await tool.execute(
|
|
{
|
|
"step": "Analyze database connection timeout issue with config file",
|
|
"step_number": 1,
|
|
"total_steps": 1,
|
|
"next_step_required": False,
|
|
"findings": "Analysis with configuration context",
|
|
"relevant_files": ["/absolute/path/config.py", "/logs/error.log"], # Additional context provided
|
|
}
|
|
)
|
|
|
|
response2 = json.loads(result2[0].text)
|
|
|
|
# Workflow tools should either return expert analysis or handle clarification properly
|
|
# Accept multiple valid statuses as the workflow can handle the additional context differently
|
|
# Include 'error' status in case API calls fail in test environment
|
|
assert response2["status"] in [
|
|
"calling_expert_analysis",
|
|
"files_required_to_continue",
|
|
"pause_for_analysis",
|
|
"error",
|
|
]
|
|
|
|
# Check that the response contains the expected content regardless of status
|
|
|
|
# If expert analysis was performed, verify content is there
|
|
if "expert_analysis" in response2:
|
|
expert_analysis = response2["expert_analysis"]
|
|
if "raw_analysis" in expert_analysis:
|
|
analysis_content = expert_analysis["raw_analysis"]
|
|
assert (
|
|
"incorrect host configuration" in analysis_content.lower() or "database" in analysis_content.lower()
|
|
)
|
|
elif response2["status"] == "files_required_to_continue":
|
|
# If clarification is still being requested, ensure it's reasonable
|
|
# Since we provided config.py and error.log, workflow tool might still need more context
|
|
assert "step_number" in response2 # Should be valid workflow response
|
|
else:
|
|
# For other statuses, ensure basic workflow structure is maintained
|
|
assert "step_number" in response2
|