feat: Add LOCAL variable support for responses with UTF-8 JSON encoding.

Description: This feature adds support for UTF-8 encoding in JSON responses, allowing for proper handling of special characters and emojis. - Implement unit tests for UTF-8 encoding in various model providers including Gemini, OpenAI, and OpenAI Compatible. - Validate UTF-8 support in token counting, content generation, and error handling. - Introduce tests for JSON serialization ensuring proper handling of French characters and emojis. - Create tests for language instruction generation based on locale settings. - Validate UTF-8 handling in workflow tools including AnalyzeTool, CodereviewTool, and DebugIssueTool. - Ensure that all tests check for correct UTF-8 character preservation and proper JSON formatting. - Add integration tests to verify the interaction between locale settings and model responses.
2025-06-22 19:13:02 +02:00
parent 132c6ca025
commit e9c5662b3a
22 changed files with 1994 additions and 49 deletions
--- a/tests/test_utf8_localization.py
+++ b/tests/test_utf8_localization.py
@@ -0,0 +1,427 @@
+"""
+Unit tests to validate UTF-8 localization and encoding
+of French characters.
+
+These tests check:
+1. Language instruction generation according to LOCALE
+2. UTF-8 encoding with json.dumps(ensure_ascii=False)
+3. French characters and emojis are displayed correctly
+4. MCP tools return localized content
+"""
+
+import json
+import os
+import tempfile
+import unittest
+from unittest.mock import Mock, patch
+
+import pytest
+
+from tools.chat import ChatTool
+from tools.codereview import CodereviewTool
+from tools.shared.base_tool import BaseTool
+
+
+class TestUTF8Localization(unittest.TestCase):
+    """Tests for UTF-8 localization and French character encoding."""
+
+    def setUp(self):
+        """Test setup."""
+        self.original_locale = os.getenv("LOCALE")
+
+    def tearDown(self):
+        """Cleanup after tests."""
+        if self.original_locale is not None:
+            os.environ["LOCALE"] = self.original_locale
+        else:
+            os.environ.pop("LOCALE", None)
+
+    def test_language_instruction_generation_french(self):
+        """Test language instruction generation for French."""
+        # Set LOCALE to French
+        os.environ["LOCALE"] = "fr-FR"
+
+        # Test get_language_instruction method
+        tool = BaseTool(api_key="test")
+        instruction = tool.get_language_instruction()
+
+        # Checks
+        self.assertIsInstance(instruction, str)
+        self.assertIn("fr-FR", instruction)
+        self.assertTrue(instruction.endswith("\n\n"))
+
+    def test_language_instruction_generation_english(self):
+        """Test language instruction generation for English."""
+        # Set LOCALE to English
+        os.environ["LOCALE"] = "en-US"
+
+        tool = BaseTool(api_key="test")
+        instruction = tool.get_language_instruction()
+
+        # Checks
+        self.assertIsInstance(instruction, str)
+        self.assertIn("en-US", instruction)
+        self.assertTrue(instruction.endswith("\n\n"))
+
+    def test_language_instruction_empty_locale(self):
+        """Test with empty LOCALE."""
+        # Set LOCALE to empty
+        os.environ["LOCALE"] = ""
+
+        tool = BaseTool(api_key="test")
+        instruction = tool.get_language_instruction()
+
+        # Should return empty string
+        self.assertEqual(instruction, "")
+
+    def test_language_instruction_no_locale(self):
+        """Test with no LOCALE variable set."""
+        # Remove LOCALE
+        os.environ.pop("LOCALE", None)
+
+        tool = BaseTool(api_key="test")
+        instruction = tool.get_language_instruction()
+
+        # Should return empty string
+        self.assertEqual(instruction, "")
+
+    def test_json_dumps_utf8_encoding(self):
+        """Test that json.dumps uses ensure_ascii=False for UTF-8."""
+        # Test data with French characters and emojis
+        test_data = {
+            "status": "succès",
+            "message": "Tâche terminée avec succès",
+            "details": {
+                "créé": "2024-01-01",
+                "développeur": "Jean Dupont",
+                "préférences": ["français", "développement"],
+                "emojis": "🔴 🟠 🟡 🟢 ✅ ❌",
+            },
+        }
+
+        # Test with ensure_ascii=False (correct)
+        json_correct = json.dumps(test_data, ensure_ascii=False, indent=2)
+
+        # Check that UTF-8 characters are preserved
+        self.assertIn("succès", json_correct)
+        self.assertIn("terminée", json_correct)
+        self.assertIn("créé", json_correct)
+        self.assertIn("développeur", json_correct)
+        self.assertIn("préférences", json_correct)
+        self.assertIn("français", json_correct)
+        self.assertIn("développement", json_correct)
+        self.assertIn("🔴", json_correct)
+        self.assertIn("🟢", json_correct)
+        self.assertIn("✅", json_correct)
+
+        # Check that characters are NOT escaped
+        self.assertNotIn("\\u", json_correct)
+        self.assertNotIn("\\ud83d", json_correct)
+
+    def test_json_dumps_ascii_encoding_comparison(self):
+        """Test comparison between ensure_ascii=True and False."""
+        test_data = {"message": "Développement réussi! 🎉"}
+
+        # With ensure_ascii=True (old, incorrect behavior)
+        json_escaped = json.dumps(test_data, ensure_ascii=True)
+
+        # With ensure_ascii=False (new, correct behavior)
+        json_utf8 = json.dumps(test_data, ensure_ascii=False)
+
+        # Checks
+        self.assertIn("\\u", json_escaped)  # Characters are escaped
+        self.assertNotIn("é", json_escaped)  # UTF-8 characters are escaped
+
+        self.assertNotIn("\\u", json_utf8)  # No escaped characters
+        self.assertIn("é", json_utf8)  # UTF-8 characters preserved
+        self.assertIn("🎉", json_utf8)  # Emojis preserved
+
+    @patch("tools.shared.base_tool.BaseTool.get_model_provider")
+    def test_chat_tool_french_response(self, mock_get_provider):
+        """Test that the chat tool returns a response in French."""
+        # Set to French
+        os.environ["LOCALE"] = "fr-FR"
+
+        # Mock provider
+        mock_provider = Mock()
+        mock_provider.get_provider_type.return_value = Mock(value="test")
+        mock_provider.generate_content.return_value = Mock(
+            content="Bonjour! Je peux vous aider avec vos tâches de développement.",
+            usage={},
+            model_name="test-model",
+            metadata={},
+        )
+        mock_get_provider.return_value = mock_provider
+
+        # Test chat tool
+        chat_tool = ChatTool()
+        result = chat_tool.execute({"prompt": "Peux-tu m'aider?", "model": "test-model"})
+
+        # Checks
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result), 1)
+
+        # Parse JSON response
+        response_data = json.loads(result[0].text)
+
+        # Check that response contains French content
+        self.assertIn("status", response_data)
+        self.assertIn("content", response_data)
+
+        # Check that language instruction was added
+        mock_provider.generate_content.assert_called_once()
+        call_args = mock_provider.generate_content.call_args
+        system_prompt = call_args.kwargs.get("system_prompt", "")
+        self.assertIn("fr-FR", system_prompt)
+
+    def test_french_characters_in_file_content(self):
+        """Test reading and writing files with French characters."""
+        # Test content with French characters
+        test_content = """
+# System configuration
+# Created by: Lead Developer
+# Creation date: December 15, 2024
+
+def process_data(preferences, parameters):
+    '''
+    Processes data according to user preferences.
+    
+    Args:
+        preferences: User preferences dictionary
+        parameters: Configuration parameters
+        
+    Returns:
+        Processing result
+    '''
+    return "Processing completed successfully! ✅"
+
+# Helper functions
+def generate_report():
+    '''Generates a summary report.'''
+    return {
+        "status": "success",
+        "data": "Report generated",
+        "emojis": "📊 📈 📉"
+    }
+"""
+
+        # Test writing and reading
+        with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8", delete=False) as f:
+            f.write(test_content)
+            temp_file = f.name
+
+        try:
+            # Read file
+            with open(temp_file, "r", encoding="utf-8") as f:
+                read_content = f.read()
+
+            # Checks
+            self.assertEqual(read_content, test_content)
+            self.assertIn("Lead Developer", read_content)
+            self.assertIn("Creation", read_content)
+            self.assertIn("data", read_content)
+            self.assertIn("preferences", read_content)
+            self.assertIn("parameters", read_content)
+            self.assertIn("completed", read_content)
+            self.assertIn("successfully", read_content)
+            self.assertIn("✅", read_content)
+            self.assertIn("success", read_content)
+            self.assertIn("generated", read_content)
+            self.assertIn("📊", read_content)
+
+        finally:
+            # Cleanup
+            os.unlink(temp_file)
+
+    def test_system_prompt_integration_french(self):
+        """Test integration of language instruction in system prompts."""
+        # Set to French
+        os.environ["LOCALE"] = "fr-FR"
+
+        tool = BaseTool(api_key="test")
+        base_prompt = "You are a helpful assistant."
+
+        # Test adding language instruction
+        enhanced_prompt = tool.add_language_instruction(base_prompt)
+
+        # Checks
+        self.assertIn("fr-FR", enhanced_prompt)
+        self.assertIn(base_prompt, enhanced_prompt)
+        self.assertTrue(enhanced_prompt.startswith("Always respond in fr-FR"))
+
+    def test_system_prompt_integration_no_locale(self):
+        """Test integration with no LOCALE set."""
+        # No LOCALE
+        os.environ.pop("LOCALE", None)
+
+        tool = BaseTool(api_key="test")
+        base_prompt = "You are a helpful assistant."
+
+        # Test adding language instruction
+        enhanced_prompt = tool.add_language_instruction(base_prompt)
+
+        # Should return original prompt unchanged
+        self.assertEqual(enhanced_prompt, base_prompt)
+
+    def test_unicode_normalization(self):
+        """Test Unicode normalization for accented characters."""
+        # Test with different Unicode encodings
+        test_cases = [
+            "café",  # e + acute accent combined
+            "café",  # e with precomposed acute accent
+            "naïf",  # i + diaeresis
+            "coeur",  # oe ligature
+            "été",  # e + acute accent
+        ]
+
+        for text in test_cases:
+            # Test that json.dumps preserves characters
+            json_output = json.dumps({"text": text}, ensure_ascii=False)
+            self.assertIn(text, json_output)
+
+            # Parse and check
+            parsed = json.loads(json_output)
+            self.assertEqual(parsed["text"], text)
+
+    def test_emoji_preservation(self):
+        """Test emoji preservation in JSON encoding."""
+        # Emojis used in Zen MCP tools
+        emojis = [
+            "🔴",  # Critical
+            "🟠",  # High
+            "🟡",  # Medium
+            "🟢",  # Low
+            "✅",  # Success
+            "❌",  # Error
+            "⚠️",  # Warning
+            "📊",  # Charts
+            "🎉",  # Celebration
+            "🚀",  # Rocket
+            "🇫🇷",  # French flag
+        ]
+
+        test_data = {"emojis": emojis, "message": " ".join(emojis)}
+
+        # Test with ensure_ascii=False
+        json_output = json.dumps(test_data, ensure_ascii=False)
+
+        # Checks
+        for emoji in emojis:
+            self.assertIn(emoji, json_output)
+
+        # No escaped characters
+        self.assertNotIn("\\u", json_output)
+
+        # Test parsing
+        parsed = json.loads(json_output)
+        self.assertEqual(parsed["emojis"], emojis)
+        self.assertEqual(parsed["message"], " ".join(emojis))
+
+
+class TestLocalizationIntegration(unittest.TestCase):
+    """Integration tests for localization with real tools."""
+
+    def setUp(self):
+        """Integration test setup."""
+        self.original_locale = os.getenv("LOCALE")
+
+    def tearDown(self):
+        """Cleanup after integration tests."""
+        if self.original_locale is not None:
+            os.environ["LOCALE"] = self.original_locale
+        else:
+            os.environ.pop("LOCALE", None)
+
+    @patch("tools.shared.base_tool.BaseTool.get_model_provider")
+    def test_codereview_tool_french_locale(self, mock_get_provider):
+        """Test that the codereview tool uses French localization."""
+        # Set to French
+        os.environ["LOCALE"] = "fr-FR"
+
+        # Mock provider with French response
+        mock_provider = Mock()
+        mock_provider.get_provider_type.return_value = Mock(value="test")
+        mock_provider.generate_content.return_value = Mock(
+            content=json.dumps(
+                {"status": "analysis_complete", "raw_analysis": "Code review completed. No critical issues found. 🟢"},
+                ensure_ascii=False,
+            ),
+            usage={},
+            model_name="test-model",
+            metadata={},
+        )
+        mock_get_provider.return_value = mock_provider
+
+        # Test codereview tool
+        codereview_tool = CodereviewTool()
+        result = codereview_tool.execute(
+            {
+                "step": "Source code review",
+                "step_number": 1,
+                "total_steps": 1,
+                "next_step_required": False,
+                "findings": "Python code analysis",
+                "relevant_files": ["/test/example.py"],
+                "model": "test-model",
+            }
+        )
+
+        # Checks
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result), 1)
+
+        # Parse JSON response - should be valid UTF-8
+        response_text = result[0].text
+        response_data = json.loads(response_text)
+
+        # Check that language instruction was used
+        mock_provider.generate_content.assert_called()
+        call_args = mock_provider.generate_content.call_args
+        system_prompt = call_args.kwargs.get("system_prompt", "")
+        self.assertIn("fr-FR", system_prompt)
+
+        # Check that response contains UTF-8 characters
+        if "expert_analysis" in response_data:
+            expert_analysis = response_data["expert_analysis"]
+            if "raw_analysis" in expert_analysis:
+                analysis = expert_analysis["raw_analysis"]
+                # Should contain French characters
+                self.assertTrue(
+                    any(char in analysis for char in ["é", "è", "à", "ç", "ê", "û", "î", "ô"]) or "🟢" in analysis
+                )
+
+    def test_multiple_locales_switching(self):
+        """Test switching locales during execution."""
+        tool = BaseTool(api_key="test")
+
+        # French
+        os.environ["LOCALE"] = "fr-FR"
+        instruction_fr = tool.get_language_instruction()
+        self.assertIn("fr-FR", instruction_fr)
+
+        # English
+        os.environ["LOCALE"] = "en-US"
+        instruction_en = tool.get_language_instruction()
+        self.assertIn("en-US", instruction_en)
+
+        # Spanish
+        os.environ["LOCALE"] = "es-ES"
+        instruction_es = tool.get_language_instruction()
+        self.assertIn("es-ES", instruction_es)
+
+        # Chinese
+        os.environ["LOCALE"] = "zh-CN"
+        instruction_zh = tool.get_language_instruction()
+        self.assertIn("zh-CN", instruction_zh)
+
+        # Check that all instructions are different
+        instructions = [instruction_fr, instruction_en, instruction_es, instruction_zh]
+        for i, inst1 in enumerate(instructions):
+            for j, inst2 in enumerate(instructions):
+                if i != j:
+                    self.assertNotEqual(inst1, inst2)
+
+
+if __name__ == "__main__":
+    # Test configuration
+    pytest.main([__file__, "-v", "--tb=short"])