New tool! "challenge" with confidence and stop Claude from agreeing with you blindly and undoing the _correct_ strategy because you were wrong
Fixed run script to ensure pip is installed
This commit is contained in:
200
tests/test_challenge.py
Normal file
200
tests/test_challenge.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""
|
||||
Tests for Challenge tool - validating critical challenge prompt wrapper
|
||||
|
||||
This module contains unit tests to ensure that the Challenge tool
|
||||
properly wraps statements to encourage critical thinking and avoid
|
||||
automatic agreement patterns.
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.challenge import ChallengeRequest, ChallengeTool
|
||||
|
||||
|
||||
class TestChallengeTool:
|
||||
"""Test suite for Challenge tool"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures"""
|
||||
self.tool = ChallengeTool()
|
||||
|
||||
def test_tool_metadata(self):
|
||||
"""Test that tool metadata matches requirements"""
|
||||
assert self.tool.get_name() == "challenge"
|
||||
assert "CRITICAL CHALLENGE PROMPT" in self.tool.get_description()
|
||||
assert "challenge it thoughtfully" in self.tool.get_description()
|
||||
assert "agreeing by default" in self.tool.get_description()
|
||||
assert self.tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL
|
||||
|
||||
def test_requires_model(self):
|
||||
"""Test that challenge tool doesn't require a model"""
|
||||
assert self.tool.requires_model() is False
|
||||
|
||||
def test_schema_structure(self):
|
||||
"""Test that schema has correct structure and excludes model fields"""
|
||||
schema = self.tool.get_input_schema()
|
||||
|
||||
# Basic schema structure
|
||||
assert schema["type"] == "object"
|
||||
assert "properties" in schema
|
||||
assert "required" in schema
|
||||
|
||||
# Required fields
|
||||
assert "prompt" in schema["required"]
|
||||
assert len(schema["required"]) == 1 # Only prompt is required
|
||||
|
||||
# Properties
|
||||
properties = schema["properties"]
|
||||
assert "prompt" in properties
|
||||
|
||||
# Should NOT have model-related fields since it doesn't require a model
|
||||
assert "model" not in properties
|
||||
assert "temperature" not in properties
|
||||
assert "thinking_mode" not in properties
|
||||
assert "use_websearch" not in properties
|
||||
assert "continuation_id" not in properties
|
||||
|
||||
def test_request_model_validation(self):
|
||||
"""Test that the request model validates correctly"""
|
||||
# Test valid request
|
||||
request = ChallengeRequest(prompt="The sky is green")
|
||||
assert request.prompt == "The sky is green"
|
||||
|
||||
# Test with longer prompt
|
||||
long_prompt = (
|
||||
"Machine learning models always produce accurate results and should be trusted without verification"
|
||||
)
|
||||
request = ChallengeRequest(prompt=long_prompt)
|
||||
assert request.prompt == long_prompt
|
||||
|
||||
def test_required_fields(self):
|
||||
"""Test that required fields are enforced"""
|
||||
from pydantic import ValidationError
|
||||
|
||||
# Missing prompt should raise validation error
|
||||
with pytest.raises(ValidationError):
|
||||
ChallengeRequest()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_success(self):
|
||||
"""Test successful execution of challenge tool"""
|
||||
arguments = {"prompt": "All software bugs are caused by syntax errors"}
|
||||
|
||||
result = await self.tool.execute(arguments)
|
||||
|
||||
# Should return a list with TextContent
|
||||
assert len(result) == 1
|
||||
assert result[0].type == "text"
|
||||
|
||||
# Parse the JSON response
|
||||
response_data = json.loads(result[0].text)
|
||||
|
||||
# Check response structure
|
||||
assert response_data["status"] == "challenge_created"
|
||||
assert response_data["original_statement"] == "All software bugs are caused by syntax errors"
|
||||
assert "challenge_prompt" in response_data
|
||||
assert "instructions" in response_data
|
||||
|
||||
# Check that the challenge prompt contains critical thinking instructions
|
||||
challenge_prompt = response_data["challenge_prompt"]
|
||||
assert "CHALLENGE THIS STATEMENT - Do not automatically agree" in challenge_prompt
|
||||
assert "Is this actually correct? Check carefully" in challenge_prompt
|
||||
assert response_data["original_statement"] in challenge_prompt
|
||||
assert "you must say so" in challenge_prompt
|
||||
assert "Provide your honest assessment, not automatic agreement" in challenge_prompt
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_error_handling(self):
|
||||
"""Test error handling in execute method"""
|
||||
# Test with invalid arguments (non-dict)
|
||||
with patch.object(self.tool, "get_request_model", side_effect=Exception("Test error")):
|
||||
result = await self.tool.execute({"prompt": "test"})
|
||||
|
||||
assert len(result) == 1
|
||||
response_data = json.loads(result[0].text)
|
||||
assert response_data["status"] == "error"
|
||||
assert "Test error" in response_data["error"]
|
||||
|
||||
def test_wrap_prompt_for_challenge(self):
|
||||
"""Test the prompt wrapping functionality"""
|
||||
original_prompt = "Python is the best programming language"
|
||||
wrapped = self.tool._wrap_prompt_for_challenge(original_prompt)
|
||||
|
||||
# Check structure
|
||||
assert "CHALLENGE THIS STATEMENT - Do not automatically agree" in wrapped
|
||||
assert "Is this actually correct? Check carefully" in wrapped
|
||||
assert f'"{original_prompt}"' in wrapped
|
||||
assert "you must say so" in wrapped
|
||||
assert "Provide your honest assessment, not automatic agreement" in wrapped
|
||||
|
||||
def test_multiple_prompts(self):
|
||||
"""Test that tool handles various types of prompts correctly"""
|
||||
test_prompts = [
|
||||
"All code should be written in assembly for maximum performance",
|
||||
"Comments are unnecessary if code is self-documenting",
|
||||
"Testing is a waste of time for experienced developers",
|
||||
"Global variables make code easier to understand",
|
||||
"The more design patterns used, the better the code",
|
||||
]
|
||||
|
||||
for prompt in test_prompts:
|
||||
request = ChallengeRequest(prompt=prompt)
|
||||
wrapped = self.tool._wrap_prompt_for_challenge(request.prompt)
|
||||
|
||||
# Each wrapped prompt should contain the original
|
||||
assert prompt in wrapped
|
||||
assert "CHALLENGE THIS STATEMENT" in wrapped
|
||||
|
||||
def test_tool_fields(self):
|
||||
"""Test tool-specific field definitions"""
|
||||
fields = self.tool.get_tool_fields()
|
||||
|
||||
assert "prompt" in fields
|
||||
assert fields["prompt"]["type"] == "string"
|
||||
assert "statement" in fields["prompt"]["description"]
|
||||
assert "challenge" in fields["prompt"]["description"]
|
||||
|
||||
def test_required_fields_list(self):
|
||||
"""Test required fields list"""
|
||||
required = self.tool.get_required_fields()
|
||||
assert required == ["prompt"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_not_used_methods(self):
|
||||
"""Test that methods not used by challenge tool work correctly"""
|
||||
request = ChallengeRequest(prompt="test")
|
||||
|
||||
# These methods aren't used since challenge doesn't call AI
|
||||
prompt = await self.tool.prepare_prompt(request)
|
||||
assert prompt == ""
|
||||
|
||||
response = self.tool.format_response("test response", request)
|
||||
assert response == "test response"
|
||||
|
||||
def test_special_characters_in_prompt(self):
|
||||
"""Test handling of special characters in prompts"""
|
||||
special_prompt = 'The "best" way to handle errors is to use try/except: pass'
|
||||
request = ChallengeRequest(prompt=special_prompt)
|
||||
wrapped = self.tool._wrap_prompt_for_challenge(request.prompt)
|
||||
|
||||
# Should handle quotes properly
|
||||
assert special_prompt in wrapped
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unicode_support(self):
|
||||
"""Test that tool handles unicode characters correctly"""
|
||||
unicode_prompt = "软件开发中最重要的是写代码,测试不重要 🚀"
|
||||
arguments = {"prompt": unicode_prompt}
|
||||
|
||||
result = await self.tool.execute(arguments)
|
||||
response_data = json.loads(result[0].text)
|
||||
|
||||
assert response_data["original_statement"] == unicode_prompt
|
||||
assert unicode_prompt in response_data["challenge_prompt"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
Reference in New Issue
Block a user