Files
my-pal-mcp-server/tests/test_prompt_regression.py

449 lines
16 KiB
Python

"""
Integration tests to ensure normal prompt handling works with real API calls.
This test module verifies that all tools continue to work correctly with
normal-sized prompts using real integration testing instead of mocks.
INTEGRATION TESTS:
These tests are marked with @pytest.mark.integration and make real API calls.
They use the local-llama model which is FREE and runs locally via Ollama.
Prerequisites:
- Ollama installed and running locally
- CUSTOM_API_URL environment variable set to your Ollama endpoint (e.g., http://localhost:11434)
- local-llama model available through custom provider configuration
- No API keys required - completely FREE to run unlimited times!
Running Tests:
- All tests (including integration): pytest tests/test_prompt_regression.py
- Unit tests only: pytest tests/test_prompt_regression.py -m "not integration"
- Integration tests only: pytest tests/test_prompt_regression.py -m "integration"
Note: Integration tests skip gracefully if CUSTOM_API_URL is not set.
They are excluded from CI/CD but run by default locally when Ollama is configured.
"""
import json
import os
import tempfile
import pytest
# Load environment variables from .env file
from dotenv import load_dotenv
from tools.analyze import AnalyzeTool
from tools.chat import ChatTool
from tools.codereview import CodeReviewTool
from tools.thinkdeep import ThinkDeepTool
load_dotenv()
# Check if CUSTOM_API_URL is available for local-llama
CUSTOM_API_AVAILABLE = os.getenv("CUSTOM_API_URL") is not None
def skip_if_no_custom_api():
"""Helper to skip integration tests if CUSTOM_API_URL is not available."""
if not CUSTOM_API_AVAILABLE:
pytest.skip(
"CUSTOM_API_URL not set. To run integration tests with local-llama, ensure CUSTOM_API_URL is set in .env file (e.g., http://localhost:11434/v1)"
)
class TestPromptIntegration:
"""Integration test suite for normal prompt handling with real API calls."""
@pytest.mark.integration
@pytest.mark.asyncio
async def test_chat_normal_prompt(self):
"""Test chat tool with normal prompt using real API."""
skip_if_no_custom_api()
tool = ChatTool()
result = await tool.execute(
{
"prompt": "Explain Python decorators in one sentence",
"model": "local-llama", # Use available model for integration tests
"working_directory_absolute_path": tempfile.gettempdir(),
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert output["status"] in ["success", "continuation_available"]
assert "content" in output
assert len(output["content"]) > 0
@pytest.mark.integration
@pytest.mark.asyncio
async def test_chat_with_files(self):
"""Test chat tool with absolute_file_paths parameter using real API."""
skip_if_no_custom_api()
tool = ChatTool()
# Create a temporary Python file for testing
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write(
"""
def hello_world():
\"\"\"A simple hello world function.\"\"\"
return "Hello, World!"
if __name__ == "__main__":
print(hello_world())
"""
)
temp_file = f.name
try:
result = await tool.execute(
{
"prompt": "What does this Python code do?",
"absolute_file_paths": [temp_file],
"model": "local-llama",
"working_directory_absolute_path": tempfile.gettempdir(),
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert output["status"] in ["success", "continuation_available"]
assert "content" in output
# Should mention the hello world function
assert "hello" in output["content"].lower() or "function" in output["content"].lower()
finally:
# Clean up temp file
os.unlink(temp_file)
@pytest.mark.integration
@pytest.mark.asyncio
async def test_thinkdeep_normal_analysis(self):
"""Test thinkdeep tool with normal analysis using real API."""
skip_if_no_custom_api()
tool = ThinkDeepTool()
result = await tool.execute(
{
"step": "I think we should use a cache for performance",
"step_number": 1,
"total_steps": 1,
"next_step_required": False,
"findings": "Building a high-traffic API - considering scalability and reliability",
"problem_context": "Building a high-traffic API",
"focus_areas": ["scalability", "reliability"],
"model": "local-llama",
}
)
assert len(result) == 1
output = json.loads(result[0].text)
# ThinkDeep workflow tool should process the analysis
assert "status" in output
assert output["status"] in ["calling_expert_analysis", "analysis_complete", "pause_for_investigation"]
@pytest.mark.integration
@pytest.mark.asyncio
async def test_codereview_normal_review(self):
"""Test codereview tool with workflow inputs using real API."""
skip_if_no_custom_api()
tool = CodeReviewTool()
# Create a temporary Python file for testing
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write(
"""
def process_user_input(user_input):
# Potentially unsafe code for demonstration
query = f"SELECT * FROM users WHERE name = '{user_input}'"
return query
def main():
user_name = input("Enter name: ")
result = process_user_input(user_name)
print(result)
"""
)
temp_file = f.name
try:
result = await tool.execute(
{
"step": "Initial code review investigation - examining security vulnerabilities",
"step_number": 1,
"total_steps": 2,
"next_step_required": True,
"findings": "Found security issues in code",
"relevant_files": [temp_file],
"review_type": "security",
"focus_on": "Look for SQL injection vulnerabilities",
"model": "local-llama",
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert "status" in output
assert output["status"] in ["pause_for_code_review", "calling_expert_analysis"]
finally:
# Clean up temp file
os.unlink(temp_file)
# NOTE: Precommit test has been removed because the precommit tool has been
# refactored to use a workflow-based pattern instead of accepting simple prompt/path fields.
# The new precommit tool requires workflow fields like: step, step_number, total_steps,
# next_step_required, findings, etc. See simulator_tests/test_precommitworkflow_validation.py
# for comprehensive workflow testing.
# NOTE: Debug tool test has been commented out because the debug tool has been
# refactored to use a self-investigation pattern instead of accepting prompt/error_context fields.
# The new debug tool requires fields like: step, step_number, total_steps, next_step_required, findings
# @pytest.mark.asyncio
# async def test_debug_normal_error(self, mock_model_response):
# """Test debug tool with normal error description."""
# tool = DebugIssueTool()
#
# with patch.object(tool, "get_model_provider") as mock_get_provider:
# mock_provider = MagicMock()
# mock_provider.get_provider_type.return_value = MagicMock(value="google")
# mock_provider.supports_thinking_mode.return_value = False
# mock_provider.generate_content.return_value = mock_model_response(
# "Root cause: The variable is undefined. Fix: Initialize it..."
# )
# mock_get_provider.return_value = mock_provider
#
# result = await tool.execute(
# {
# "prompt": "TypeError: Cannot read property 'name' of undefined",
# "error_context": "at line 42 in user.js\n console.log(user.name)",
# "runtime_info": "Node.js v16.14.0",
# }
# )
#
# assert len(result) == 1
# output = json.loads(result[0].text)
# assert output["status"] in ["success", "continuation_available"]
# assert "Next Steps:" in output["content"]
# assert "Root cause" in output["content"]
@pytest.mark.integration
@pytest.mark.asyncio
async def test_analyze_normal_question(self):
"""Test analyze tool with normal question using real API."""
skip_if_no_custom_api()
tool = AnalyzeTool()
# Create a temporary Python file demonstrating MVC pattern
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write(
"""
# Model
class User:
def __init__(self, name, email):
self.name = name
self.email = email
# View
class UserView:
def display_user(self, user):
return f"User: {user.name} ({user.email})"
# Controller
class UserController:
def __init__(self, model, view):
self.model = model
self.view = view
def get_user_display(self):
return self.view.display_user(self.model)
"""
)
temp_file = f.name
try:
result = await tool.execute(
{
"step": "What design patterns are used in this codebase?",
"step_number": 1,
"total_steps": 1,
"next_step_required": False,
"findings": "Initial architectural analysis",
"relevant_files": [temp_file],
"analysis_type": "architecture",
"model": "local-llama",
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert "status" in output
# Workflow analyze tool should process the analysis
assert output["status"] in ["calling_expert_analysis", "pause_for_investigation"]
finally:
# Clean up temp file
os.unlink(temp_file)
@pytest.mark.integration
@pytest.mark.asyncio
async def test_empty_optional_fields(self):
"""Test tools work with empty optional fields using real API."""
skip_if_no_custom_api()
tool = ChatTool()
# Test with no absolute_file_paths parameter
result = await tool.execute(
{
"prompt": "Hello",
"model": "local-llama",
"working_directory_absolute_path": tempfile.gettempdir(),
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert output["status"] in ["success", "continuation_available"]
assert "content" in output
@pytest.mark.integration
@pytest.mark.asyncio
async def test_thinking_modes_work(self):
"""Test that thinking modes are properly passed through using real API."""
skip_if_no_custom_api()
tool = ChatTool()
result = await tool.execute(
{
"prompt": "Explain quantum computing briefly",
"thinking_mode": "low",
"temperature": 0.8,
"model": "local-llama",
"working_directory_absolute_path": tempfile.gettempdir(),
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert output["status"] in ["success", "continuation_available"]
assert "content" in output
# Should contain some quantum-related content
assert "quantum" in output["content"].lower() or "computing" in output["content"].lower()
@pytest.mark.integration
@pytest.mark.asyncio
async def test_special_characters_in_prompts(self):
"""Test prompts with special characters work correctly using real API."""
skip_if_no_custom_api()
tool = ChatTool()
special_prompt = (
'Test with "quotes" and\nnewlines\tand tabs. Please just respond with the number that is the answer to 1+1.'
)
result = await tool.execute(
{
"prompt": special_prompt,
"model": "local-llama",
"working_directory_absolute_path": tempfile.gettempdir(),
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert output["status"] in ["success", "continuation_available"]
assert "content" in output
# Should handle the special characters without crashing - the exact content doesn't matter as much as not failing
assert len(output["content"]) > 0
@pytest.mark.integration
@pytest.mark.asyncio
async def test_mixed_file_paths(self):
"""Test handling of various file path formats using real API."""
skip_if_no_custom_api()
tool = AnalyzeTool()
# Create multiple temporary files to test different path formats
temp_files = []
try:
# Create first file
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
f.write("def function_one(): pass")
temp_files.append(f.name)
# Create second file
with tempfile.NamedTemporaryFile(mode="w", suffix=".js", delete=False) as f:
f.write("function functionTwo() { return 'hello'; }")
temp_files.append(f.name)
result = await tool.execute(
{
"step": "Analyze these files",
"step_number": 1,
"total_steps": 1,
"next_step_required": False,
"findings": "Initial file analysis",
"relevant_files": temp_files,
"model": "local-llama",
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert "status" in output
# Should process the files
assert output["status"] in [
"calling_expert_analysis",
"pause_for_investigation",
"files_required_to_continue",
]
finally:
# Clean up temp files
for temp_file in temp_files:
if os.path.exists(temp_file):
os.unlink(temp_file)
@pytest.mark.integration
@pytest.mark.asyncio
async def test_unicode_content(self):
"""Test handling of unicode content in prompts using real API."""
skip_if_no_custom_api()
tool = ChatTool()
unicode_prompt = "Explain what these mean: 你好世界 (Chinese) and مرحبا بالعالم (Arabic)"
result = await tool.execute(
{
"prompt": unicode_prompt,
"model": "local-llama",
"working_directory_absolute_path": tempfile.gettempdir(),
}
)
assert len(result) == 1
output = json.loads(result[0].text)
assert output["status"] in ["success", "continuation_available"]
assert "content" in output
# Should mention hello or world or greeting in some form (including French equivalents)
content_lower = output["content"].lower()
assert (
"hello" in content_lower
or "world" in content_lower
or "greeting" in content_lower
or "bonjour" in content_lower # French: hello
or "monde" in content_lower # French: world
or "salut" in content_lower # French: greeting
)
if __name__ == "__main__":
# Run integration tests by default when called directly
pytest.main([__file__, "-v", "-m", "integration"])