feat: Add LOCAL variable support for responses with UTF-8 JSON encoding.
Description: This feature adds support for UTF-8 encoding in JSON responses, allowing for proper handling of special characters and emojis. - Implement unit tests for UTF-8 encoding in various model providers including Gemini, OpenAI, and OpenAI Compatible. - Validate UTF-8 support in token counting, content generation, and error handling. - Introduce tests for JSON serialization ensuring proper handling of French characters and emojis. - Create tests for language instruction generation based on locale settings. - Validate UTF-8 handling in workflow tools including AnalyzeTool, CodereviewTool, and DebugIssueTool. - Ensure that all tests check for correct UTF-8 character preservation and proper JSON formatting. - Add integration tests to verify the interaction between locale settings and model responses.
This commit is contained in:
427
tests/test_utf8_localization.py
Normal file
427
tests/test_utf8_localization.py
Normal file
@@ -0,0 +1,427 @@
|
||||
"""
|
||||
Unit tests to validate UTF-8 localization and encoding
|
||||
of French characters.
|
||||
|
||||
These tests check:
|
||||
1. Language instruction generation according to LOCALE
|
||||
2. UTF-8 encoding with json.dumps(ensure_ascii=False)
|
||||
3. French characters and emojis are displayed correctly
|
||||
4. MCP tools return localized content
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.chat import ChatTool
|
||||
from tools.codereview import CodereviewTool
|
||||
from tools.shared.base_tool import BaseTool
|
||||
|
||||
|
||||
class TestUTF8Localization(unittest.TestCase):
|
||||
"""Tests for UTF-8 localization and French character encoding."""
|
||||
|
||||
def setUp(self):
|
||||
"""Test setup."""
|
||||
self.original_locale = os.getenv("LOCALE")
|
||||
|
||||
def tearDown(self):
|
||||
"""Cleanup after tests."""
|
||||
if self.original_locale is not None:
|
||||
os.environ["LOCALE"] = self.original_locale
|
||||
else:
|
||||
os.environ.pop("LOCALE", None)
|
||||
|
||||
def test_language_instruction_generation_french(self):
|
||||
"""Test language instruction generation for French."""
|
||||
# Set LOCALE to French
|
||||
os.environ["LOCALE"] = "fr-FR"
|
||||
|
||||
# Test get_language_instruction method
|
||||
tool = BaseTool(api_key="test")
|
||||
instruction = tool.get_language_instruction()
|
||||
|
||||
# Checks
|
||||
self.assertIsInstance(instruction, str)
|
||||
self.assertIn("fr-FR", instruction)
|
||||
self.assertTrue(instruction.endswith("\n\n"))
|
||||
|
||||
def test_language_instruction_generation_english(self):
|
||||
"""Test language instruction generation for English."""
|
||||
# Set LOCALE to English
|
||||
os.environ["LOCALE"] = "en-US"
|
||||
|
||||
tool = BaseTool(api_key="test")
|
||||
instruction = tool.get_language_instruction()
|
||||
|
||||
# Checks
|
||||
self.assertIsInstance(instruction, str)
|
||||
self.assertIn("en-US", instruction)
|
||||
self.assertTrue(instruction.endswith("\n\n"))
|
||||
|
||||
def test_language_instruction_empty_locale(self):
|
||||
"""Test with empty LOCALE."""
|
||||
# Set LOCALE to empty
|
||||
os.environ["LOCALE"] = ""
|
||||
|
||||
tool = BaseTool(api_key="test")
|
||||
instruction = tool.get_language_instruction()
|
||||
|
||||
# Should return empty string
|
||||
self.assertEqual(instruction, "")
|
||||
|
||||
def test_language_instruction_no_locale(self):
|
||||
"""Test with no LOCALE variable set."""
|
||||
# Remove LOCALE
|
||||
os.environ.pop("LOCALE", None)
|
||||
|
||||
tool = BaseTool(api_key="test")
|
||||
instruction = tool.get_language_instruction()
|
||||
|
||||
# Should return empty string
|
||||
self.assertEqual(instruction, "")
|
||||
|
||||
def test_json_dumps_utf8_encoding(self):
|
||||
"""Test that json.dumps uses ensure_ascii=False for UTF-8."""
|
||||
# Test data with French characters and emojis
|
||||
test_data = {
|
||||
"status": "succès",
|
||||
"message": "Tâche terminée avec succès",
|
||||
"details": {
|
||||
"créé": "2024-01-01",
|
||||
"développeur": "Jean Dupont",
|
||||
"préférences": ["français", "développement"],
|
||||
"emojis": "🔴 🟠 🟡 🟢 ✅ ❌",
|
||||
},
|
||||
}
|
||||
|
||||
# Test with ensure_ascii=False (correct)
|
||||
json_correct = json.dumps(test_data, ensure_ascii=False, indent=2)
|
||||
|
||||
# Check that UTF-8 characters are preserved
|
||||
self.assertIn("succès", json_correct)
|
||||
self.assertIn("terminée", json_correct)
|
||||
self.assertIn("créé", json_correct)
|
||||
self.assertIn("développeur", json_correct)
|
||||
self.assertIn("préférences", json_correct)
|
||||
self.assertIn("français", json_correct)
|
||||
self.assertIn("développement", json_correct)
|
||||
self.assertIn("🔴", json_correct)
|
||||
self.assertIn("🟢", json_correct)
|
||||
self.assertIn("✅", json_correct)
|
||||
|
||||
# Check that characters are NOT escaped
|
||||
self.assertNotIn("\\u", json_correct)
|
||||
self.assertNotIn("\\ud83d", json_correct)
|
||||
|
||||
def test_json_dumps_ascii_encoding_comparison(self):
|
||||
"""Test comparison between ensure_ascii=True and False."""
|
||||
test_data = {"message": "Développement réussi! 🎉"}
|
||||
|
||||
# With ensure_ascii=True (old, incorrect behavior)
|
||||
json_escaped = json.dumps(test_data, ensure_ascii=True)
|
||||
|
||||
# With ensure_ascii=False (new, correct behavior)
|
||||
json_utf8 = json.dumps(test_data, ensure_ascii=False)
|
||||
|
||||
# Checks
|
||||
self.assertIn("\\u", json_escaped) # Characters are escaped
|
||||
self.assertNotIn("é", json_escaped) # UTF-8 characters are escaped
|
||||
|
||||
self.assertNotIn("\\u", json_utf8) # No escaped characters
|
||||
self.assertIn("é", json_utf8) # UTF-8 characters preserved
|
||||
self.assertIn("🎉", json_utf8) # Emojis preserved
|
||||
|
||||
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
||||
def test_chat_tool_french_response(self, mock_get_provider):
|
||||
"""Test that the chat tool returns a response in French."""
|
||||
# Set to French
|
||||
os.environ["LOCALE"] = "fr-FR"
|
||||
|
||||
# Mock provider
|
||||
mock_provider = Mock()
|
||||
mock_provider.get_provider_type.return_value = Mock(value="test")
|
||||
mock_provider.generate_content.return_value = Mock(
|
||||
content="Bonjour! Je peux vous aider avec vos tâches de développement.",
|
||||
usage={},
|
||||
model_name="test-model",
|
||||
metadata={},
|
||||
)
|
||||
mock_get_provider.return_value = mock_provider
|
||||
|
||||
# Test chat tool
|
||||
chat_tool = ChatTool()
|
||||
result = chat_tool.execute({"prompt": "Peux-tu m'aider?", "model": "test-model"})
|
||||
|
||||
# Checks
|
||||
self.assertIsNotNone(result)
|
||||
self.assertEqual(len(result), 1)
|
||||
|
||||
# Parse JSON response
|
||||
response_data = json.loads(result[0].text)
|
||||
|
||||
# Check that response contains French content
|
||||
self.assertIn("status", response_data)
|
||||
self.assertIn("content", response_data)
|
||||
|
||||
# Check that language instruction was added
|
||||
mock_provider.generate_content.assert_called_once()
|
||||
call_args = mock_provider.generate_content.call_args
|
||||
system_prompt = call_args.kwargs.get("system_prompt", "")
|
||||
self.assertIn("fr-FR", system_prompt)
|
||||
|
||||
def test_french_characters_in_file_content(self):
|
||||
"""Test reading and writing files with French characters."""
|
||||
# Test content with French characters
|
||||
test_content = """
|
||||
# System configuration
|
||||
# Created by: Lead Developer
|
||||
# Creation date: December 15, 2024
|
||||
|
||||
def process_data(preferences, parameters):
|
||||
'''
|
||||
Processes data according to user preferences.
|
||||
|
||||
Args:
|
||||
preferences: User preferences dictionary
|
||||
parameters: Configuration parameters
|
||||
|
||||
Returns:
|
||||
Processing result
|
||||
'''
|
||||
return "Processing completed successfully! ✅"
|
||||
|
||||
# Helper functions
|
||||
def generate_report():
|
||||
'''Generates a summary report.'''
|
||||
return {
|
||||
"status": "success",
|
||||
"data": "Report generated",
|
||||
"emojis": "📊 📈 📉"
|
||||
}
|
||||
"""
|
||||
|
||||
# Test writing and reading
|
||||
with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8", delete=False) as f:
|
||||
f.write(test_content)
|
||||
temp_file = f.name
|
||||
|
||||
try:
|
||||
# Read file
|
||||
with open(temp_file, "r", encoding="utf-8") as f:
|
||||
read_content = f.read()
|
||||
|
||||
# Checks
|
||||
self.assertEqual(read_content, test_content)
|
||||
self.assertIn("Lead Developer", read_content)
|
||||
self.assertIn("Creation", read_content)
|
||||
self.assertIn("data", read_content)
|
||||
self.assertIn("preferences", read_content)
|
||||
self.assertIn("parameters", read_content)
|
||||
self.assertIn("completed", read_content)
|
||||
self.assertIn("successfully", read_content)
|
||||
self.assertIn("✅", read_content)
|
||||
self.assertIn("success", read_content)
|
||||
self.assertIn("generated", read_content)
|
||||
self.assertIn("📊", read_content)
|
||||
|
||||
finally:
|
||||
# Cleanup
|
||||
os.unlink(temp_file)
|
||||
|
||||
def test_system_prompt_integration_french(self):
|
||||
"""Test integration of language instruction in system prompts."""
|
||||
# Set to French
|
||||
os.environ["LOCALE"] = "fr-FR"
|
||||
|
||||
tool = BaseTool(api_key="test")
|
||||
base_prompt = "You are a helpful assistant."
|
||||
|
||||
# Test adding language instruction
|
||||
enhanced_prompt = tool.add_language_instruction(base_prompt)
|
||||
|
||||
# Checks
|
||||
self.assertIn("fr-FR", enhanced_prompt)
|
||||
self.assertIn(base_prompt, enhanced_prompt)
|
||||
self.assertTrue(enhanced_prompt.startswith("Always respond in fr-FR"))
|
||||
|
||||
def test_system_prompt_integration_no_locale(self):
|
||||
"""Test integration with no LOCALE set."""
|
||||
# No LOCALE
|
||||
os.environ.pop("LOCALE", None)
|
||||
|
||||
tool = BaseTool(api_key="test")
|
||||
base_prompt = "You are a helpful assistant."
|
||||
|
||||
# Test adding language instruction
|
||||
enhanced_prompt = tool.add_language_instruction(base_prompt)
|
||||
|
||||
# Should return original prompt unchanged
|
||||
self.assertEqual(enhanced_prompt, base_prompt)
|
||||
|
||||
def test_unicode_normalization(self):
|
||||
"""Test Unicode normalization for accented characters."""
|
||||
# Test with different Unicode encodings
|
||||
test_cases = [
|
||||
"café", # e + acute accent combined
|
||||
"café", # e with precomposed acute accent
|
||||
"naïf", # i + diaeresis
|
||||
"coeur", # oe ligature
|
||||
"été", # e + acute accent
|
||||
]
|
||||
|
||||
for text in test_cases:
|
||||
# Test that json.dumps preserves characters
|
||||
json_output = json.dumps({"text": text}, ensure_ascii=False)
|
||||
self.assertIn(text, json_output)
|
||||
|
||||
# Parse and check
|
||||
parsed = json.loads(json_output)
|
||||
self.assertEqual(parsed["text"], text)
|
||||
|
||||
def test_emoji_preservation(self):
|
||||
"""Test emoji preservation in JSON encoding."""
|
||||
# Emojis used in Zen MCP tools
|
||||
emojis = [
|
||||
"🔴", # Critical
|
||||
"🟠", # High
|
||||
"🟡", # Medium
|
||||
"🟢", # Low
|
||||
"✅", # Success
|
||||
"❌", # Error
|
||||
"⚠️", # Warning
|
||||
"📊", # Charts
|
||||
"🎉", # Celebration
|
||||
"🚀", # Rocket
|
||||
"🇫🇷", # French flag
|
||||
]
|
||||
|
||||
test_data = {"emojis": emojis, "message": " ".join(emojis)}
|
||||
|
||||
# Test with ensure_ascii=False
|
||||
json_output = json.dumps(test_data, ensure_ascii=False)
|
||||
|
||||
# Checks
|
||||
for emoji in emojis:
|
||||
self.assertIn(emoji, json_output)
|
||||
|
||||
# No escaped characters
|
||||
self.assertNotIn("\\u", json_output)
|
||||
|
||||
# Test parsing
|
||||
parsed = json.loads(json_output)
|
||||
self.assertEqual(parsed["emojis"], emojis)
|
||||
self.assertEqual(parsed["message"], " ".join(emojis))
|
||||
|
||||
|
||||
class TestLocalizationIntegration(unittest.TestCase):
|
||||
"""Integration tests for localization with real tools."""
|
||||
|
||||
def setUp(self):
|
||||
"""Integration test setup."""
|
||||
self.original_locale = os.getenv("LOCALE")
|
||||
|
||||
def tearDown(self):
|
||||
"""Cleanup after integration tests."""
|
||||
if self.original_locale is not None:
|
||||
os.environ["LOCALE"] = self.original_locale
|
||||
else:
|
||||
os.environ.pop("LOCALE", None)
|
||||
|
||||
@patch("tools.shared.base_tool.BaseTool.get_model_provider")
|
||||
def test_codereview_tool_french_locale(self, mock_get_provider):
|
||||
"""Test that the codereview tool uses French localization."""
|
||||
# Set to French
|
||||
os.environ["LOCALE"] = "fr-FR"
|
||||
|
||||
# Mock provider with French response
|
||||
mock_provider = Mock()
|
||||
mock_provider.get_provider_type.return_value = Mock(value="test")
|
||||
mock_provider.generate_content.return_value = Mock(
|
||||
content=json.dumps(
|
||||
{"status": "analysis_complete", "raw_analysis": "Code review completed. No critical issues found. 🟢"},
|
||||
ensure_ascii=False,
|
||||
),
|
||||
usage={},
|
||||
model_name="test-model",
|
||||
metadata={},
|
||||
)
|
||||
mock_get_provider.return_value = mock_provider
|
||||
|
||||
# Test codereview tool
|
||||
codereview_tool = CodereviewTool()
|
||||
result = codereview_tool.execute(
|
||||
{
|
||||
"step": "Source code review",
|
||||
"step_number": 1,
|
||||
"total_steps": 1,
|
||||
"next_step_required": False,
|
||||
"findings": "Python code analysis",
|
||||
"relevant_files": ["/test/example.py"],
|
||||
"model": "test-model",
|
||||
}
|
||||
)
|
||||
|
||||
# Checks
|
||||
self.assertIsNotNone(result)
|
||||
self.assertEqual(len(result), 1)
|
||||
|
||||
# Parse JSON response - should be valid UTF-8
|
||||
response_text = result[0].text
|
||||
response_data = json.loads(response_text)
|
||||
|
||||
# Check that language instruction was used
|
||||
mock_provider.generate_content.assert_called()
|
||||
call_args = mock_provider.generate_content.call_args
|
||||
system_prompt = call_args.kwargs.get("system_prompt", "")
|
||||
self.assertIn("fr-FR", system_prompt)
|
||||
|
||||
# Check that response contains UTF-8 characters
|
||||
if "expert_analysis" in response_data:
|
||||
expert_analysis = response_data["expert_analysis"]
|
||||
if "raw_analysis" in expert_analysis:
|
||||
analysis = expert_analysis["raw_analysis"]
|
||||
# Should contain French characters
|
||||
self.assertTrue(
|
||||
any(char in analysis for char in ["é", "è", "à", "ç", "ê", "û", "î", "ô"]) or "🟢" in analysis
|
||||
)
|
||||
|
||||
def test_multiple_locales_switching(self):
|
||||
"""Test switching locales during execution."""
|
||||
tool = BaseTool(api_key="test")
|
||||
|
||||
# French
|
||||
os.environ["LOCALE"] = "fr-FR"
|
||||
instruction_fr = tool.get_language_instruction()
|
||||
self.assertIn("fr-FR", instruction_fr)
|
||||
|
||||
# English
|
||||
os.environ["LOCALE"] = "en-US"
|
||||
instruction_en = tool.get_language_instruction()
|
||||
self.assertIn("en-US", instruction_en)
|
||||
|
||||
# Spanish
|
||||
os.environ["LOCALE"] = "es-ES"
|
||||
instruction_es = tool.get_language_instruction()
|
||||
self.assertIn("es-ES", instruction_es)
|
||||
|
||||
# Chinese
|
||||
os.environ["LOCALE"] = "zh-CN"
|
||||
instruction_zh = tool.get_language_instruction()
|
||||
self.assertIn("zh-CN", instruction_zh)
|
||||
|
||||
# Check that all instructions are different
|
||||
instructions = [instruction_fr, instruction_en, instruction_es, instruction_zh]
|
||||
for i, inst1 in enumerate(instructions):
|
||||
for j, inst2 in enumerate(instructions):
|
||||
if i != j:
|
||||
self.assertNotEqual(inst1, inst2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test configuration
|
||||
pytest.main([__file__, "-v", "--tb=short"])
|
||||
Reference in New Issue
Block a user