""" Unit tests to validate UTF-8 localization and encoding of French characters. These tests check: 1. Language instruction generation according to LOCALE 2. UTF-8 encoding with json.dumps(ensure_ascii=False) 3. French characters and emojis are displayed correctly 4. MCP tools return localized content """ import json import os import tempfile import unittest from unittest.mock import Mock, patch import pytest from tools.chat import ChatTool from tools.codereview import CodereviewTool from tools.shared.base_tool import BaseTool class TestUTF8Localization(unittest.TestCase): """Tests for UTF-8 localization and French character encoding.""" def setUp(self): """Test setup.""" self.original_locale = os.getenv("LOCALE") def tearDown(self): """Cleanup after tests.""" if self.original_locale is not None: os.environ["LOCALE"] = self.original_locale else: os.environ.pop("LOCALE", None) def test_language_instruction_generation_french(self): """Test language instruction generation for French.""" # Set LOCALE to French os.environ["LOCALE"] = "fr-FR" # Test get_language_instruction method tool = BaseTool(api_key="test") instruction = tool.get_language_instruction() # Checks self.assertIsInstance(instruction, str) self.assertIn("fr-FR", instruction) self.assertTrue(instruction.endswith("\n\n")) def test_language_instruction_generation_english(self): """Test language instruction generation for English.""" # Set LOCALE to English os.environ["LOCALE"] = "en-US" tool = BaseTool(api_key="test") instruction = tool.get_language_instruction() # Checks self.assertIsInstance(instruction, str) self.assertIn("en-US", instruction) self.assertTrue(instruction.endswith("\n\n")) def test_language_instruction_empty_locale(self): """Test with empty LOCALE.""" # Set LOCALE to empty os.environ["LOCALE"] = "" tool = BaseTool(api_key="test") instruction = tool.get_language_instruction() # Should return empty string self.assertEqual(instruction, "") def test_language_instruction_no_locale(self): """Test with no LOCALE variable set.""" # Remove LOCALE os.environ.pop("LOCALE", None) tool = BaseTool(api_key="test") instruction = tool.get_language_instruction() # Should return empty string self.assertEqual(instruction, "") def test_json_dumps_utf8_encoding(self): """Test that json.dumps uses ensure_ascii=False for UTF-8.""" # Test data with French characters and emojis test_data = { "status": "succès", "message": "Tâche terminée avec succès", "details": { "créé": "2024-01-01", "développeur": "Jean Dupont", "préférences": ["français", "développement"], "emojis": "🔴 🟠 🟡 🟢 ✅ ❌", }, } # Test with ensure_ascii=False (correct) json_correct = json.dumps(test_data, ensure_ascii=False, indent=2) # Check that UTF-8 characters are preserved self.assertIn("succès", json_correct) self.assertIn("terminée", json_correct) self.assertIn("créé", json_correct) self.assertIn("développeur", json_correct) self.assertIn("préférences", json_correct) self.assertIn("français", json_correct) self.assertIn("développement", json_correct) self.assertIn("🔴", json_correct) self.assertIn("🟢", json_correct) self.assertIn("✅", json_correct) # Check that characters are NOT escaped self.assertNotIn("\\u", json_correct) self.assertNotIn("\\ud83d", json_correct) def test_json_dumps_ascii_encoding_comparison(self): """Test comparison between ensure_ascii=True and False.""" test_data = {"message": "Développement réussi! 🎉"} # With ensure_ascii=True (old, incorrect behavior) json_escaped = json.dumps(test_data, ensure_ascii=True) # With ensure_ascii=False (new, correct behavior) json_utf8 = json.dumps(test_data, ensure_ascii=False) # Checks self.assertIn("\\u", json_escaped) # Characters are escaped self.assertNotIn("é", json_escaped) # UTF-8 characters are escaped self.assertNotIn("\\u", json_utf8) # No escaped characters self.assertIn("é", json_utf8) # UTF-8 characters preserved self.assertIn("🎉", json_utf8) # Emojis preserved @patch("tools.shared.base_tool.BaseTool.get_model_provider") def test_chat_tool_french_response(self, mock_get_provider): """Test that the chat tool returns a response in French.""" # Set to French os.environ["LOCALE"] = "fr-FR" # Mock provider mock_provider = Mock() mock_provider.get_provider_type.return_value = Mock(value="test") mock_provider.generate_content.return_value = Mock( content="Bonjour! Je peux vous aider avec vos tâches de développement.", usage={}, model_name="test-model", metadata={}, ) mock_get_provider.return_value = mock_provider # Test chat tool chat_tool = ChatTool() result = chat_tool.execute({"prompt": "Peux-tu m'aider?", "model": "test-model"}) # Checks self.assertIsNotNone(result) self.assertEqual(len(result), 1) # Parse JSON response response_data = json.loads(result[0].text) # Check that response contains French content self.assertIn("status", response_data) self.assertIn("content", response_data) # Check that language instruction was added mock_provider.generate_content.assert_called_once() call_args = mock_provider.generate_content.call_args system_prompt = call_args.kwargs.get("system_prompt", "") self.assertIn("fr-FR", system_prompt) def test_french_characters_in_file_content(self): """Test reading and writing files with French characters.""" # Test content with French characters test_content = """ # System configuration # Created by: Lead Developer # Creation date: December 15, 2024 def process_data(preferences, parameters): ''' Processes data according to user preferences. Args: preferences: User preferences dictionary parameters: Configuration parameters Returns: Processing result ''' return "Processing completed successfully! ✅" # Helper functions def generate_report(): '''Generates a summary report.''' return { "status": "success", "data": "Report generated", "emojis": "📊 📈 📉" } """ # Test writing and reading with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8", delete=False) as f: f.write(test_content) temp_file = f.name try: # Read file with open(temp_file, "r", encoding="utf-8") as f: read_content = f.read() # Checks self.assertEqual(read_content, test_content) self.assertIn("Lead Developer", read_content) self.assertIn("Creation", read_content) self.assertIn("data", read_content) self.assertIn("preferences", read_content) self.assertIn("parameters", read_content) self.assertIn("completed", read_content) self.assertIn("successfully", read_content) self.assertIn("✅", read_content) self.assertIn("success", read_content) self.assertIn("generated", read_content) self.assertIn("📊", read_content) finally: # Cleanup os.unlink(temp_file) def test_system_prompt_integration_french(self): """Test integration of language instruction in system prompts.""" # Set to French os.environ["LOCALE"] = "fr-FR" tool = BaseTool(api_key="test") base_prompt = "You are a helpful assistant." # Test adding language instruction enhanced_prompt = tool.add_language_instruction(base_prompt) # Checks self.assertIn("fr-FR", enhanced_prompt) self.assertIn(base_prompt, enhanced_prompt) self.assertTrue(enhanced_prompt.startswith("Always respond in fr-FR")) def test_system_prompt_integration_no_locale(self): """Test integration with no LOCALE set.""" # No LOCALE os.environ.pop("LOCALE", None) tool = BaseTool(api_key="test") base_prompt = "You are a helpful assistant." # Test adding language instruction enhanced_prompt = tool.add_language_instruction(base_prompt) # Should return original prompt unchanged self.assertEqual(enhanced_prompt, base_prompt) def test_unicode_normalization(self): """Test Unicode normalization for accented characters.""" # Test with different Unicode encodings test_cases = [ "café", # e + acute accent combined "café", # e with precomposed acute accent "naïf", # i + diaeresis "coeur", # oe ligature "été", # e + acute accent ] for text in test_cases: # Test that json.dumps preserves characters json_output = json.dumps({"text": text}, ensure_ascii=False) self.assertIn(text, json_output) # Parse and check parsed = json.loads(json_output) self.assertEqual(parsed["text"], text) def test_emoji_preservation(self): """Test emoji preservation in JSON encoding.""" # Emojis used in Zen MCP tools emojis = [ "🔴", # Critical "🟠", # High "🟡", # Medium "🟢", # Low "✅", # Success "❌", # Error "⚠️", # Warning "📊", # Charts "🎉", # Celebration "🚀", # Rocket "🇫🇷", # French flag ] test_data = {"emojis": emojis, "message": " ".join(emojis)} # Test with ensure_ascii=False json_output = json.dumps(test_data, ensure_ascii=False) # Checks for emoji in emojis: self.assertIn(emoji, json_output) # No escaped characters self.assertNotIn("\\u", json_output) # Test parsing parsed = json.loads(json_output) self.assertEqual(parsed["emojis"], emojis) self.assertEqual(parsed["message"], " ".join(emojis)) class TestLocalizationIntegration(unittest.TestCase): """Integration tests for localization with real tools.""" def setUp(self): """Integration test setup.""" self.original_locale = os.getenv("LOCALE") def tearDown(self): """Cleanup after integration tests.""" if self.original_locale is not None: os.environ["LOCALE"] = self.original_locale else: os.environ.pop("LOCALE", None) @patch("tools.shared.base_tool.BaseTool.get_model_provider") def test_codereview_tool_french_locale(self, mock_get_provider): """Test that the codereview tool uses French localization.""" # Set to French os.environ["LOCALE"] = "fr-FR" # Mock provider with French response mock_provider = Mock() mock_provider.get_provider_type.return_value = Mock(value="test") mock_provider.generate_content.return_value = Mock( content=json.dumps( {"status": "analysis_complete", "raw_analysis": "Code review completed. No critical issues found. 🟢"}, ensure_ascii=False, ), usage={}, model_name="test-model", metadata={}, ) mock_get_provider.return_value = mock_provider # Test codereview tool codereview_tool = CodereviewTool() result = codereview_tool.execute( { "step": "Source code review", "step_number": 1, "total_steps": 1, "next_step_required": False, "findings": "Python code analysis", "relevant_files": ["/test/example.py"], "model": "test-model", } ) # Checks self.assertIsNotNone(result) self.assertEqual(len(result), 1) # Parse JSON response - should be valid UTF-8 response_text = result[0].text response_data = json.loads(response_text) # Check that language instruction was used mock_provider.generate_content.assert_called() call_args = mock_provider.generate_content.call_args system_prompt = call_args.kwargs.get("system_prompt", "") self.assertIn("fr-FR", system_prompt) # Check that response contains UTF-8 characters if "expert_analysis" in response_data: expert_analysis = response_data["expert_analysis"] if "raw_analysis" in expert_analysis: analysis = expert_analysis["raw_analysis"] # Should contain French characters self.assertTrue( any(char in analysis for char in ["é", "è", "à", "ç", "ê", "û", "î", "ô"]) or "🟢" in analysis ) def test_multiple_locales_switching(self): """Test switching locales during execution.""" tool = BaseTool(api_key="test") # French os.environ["LOCALE"] = "fr-FR" instruction_fr = tool.get_language_instruction() self.assertIn("fr-FR", instruction_fr) # English os.environ["LOCALE"] = "en-US" instruction_en = tool.get_language_instruction() self.assertIn("en-US", instruction_en) # Spanish os.environ["LOCALE"] = "es-ES" instruction_es = tool.get_language_instruction() self.assertIn("es-ES", instruction_es) # Chinese os.environ["LOCALE"] = "zh-CN" instruction_zh = tool.get_language_instruction() self.assertIn("zh-CN", instruction_zh) # Check that all instructions are different instructions = [instruction_fr, instruction_en, instruction_es, instruction_zh] for i, inst1 in enumerate(instructions): for j, inst2 in enumerate(instructions): if i != j: self.assertNotEqual(inst1, inst2) if __name__ == "__main__": # Test configuration pytest.main([__file__, "-v", "--tb=short"])