my-pal-mcp-server/tests/test_utf8_localization.py

"""
Unit tests to validate UTF-8 localization and encoding
of French characters.

These tests check:
1. Language instruction generation according to LOCALE
2. UTF-8 encoding with json.dumps(ensure_ascii=False)
3. French characters and emojis are displayed correctly
4. MCP tools return localized content
"""

import json
import os
import tempfile
import unittest
from unittest.mock import Mock, patch

import pytest

from tools.chat import ChatTool
from tools.codereview import CodeReviewTool
from tools.shared.base_tool import BaseTool


class TestUTF8Localization(unittest.TestCase):
    """Tests for UTF-8 localization and French character encoding."""

    def setUp(self):
        """Test setup."""
        self.original_locale = os.getenv("LOCALE")

    def tearDown(self):
        """Cleanup after tests."""
        if self.original_locale is not None:
            os.environ["LOCALE"] = self.original_locale
        else:
            os.environ.pop("LOCALE", None)

    def test_language_instruction_generation_french(self):
        """Test language instruction generation for French."""
        # Set LOCALE to French
        os.environ["LOCALE"] = "fr-FR"

        # Test get_language_instruction method
        tool = BaseTool(api_key="test")
        instruction = tool.get_language_instruction()

        # Checks
        self.assertIsInstance(instruction, str)
        self.assertIn("fr-FR", instruction)
        self.assertTrue(instruction.endswith("\n\n"))

    def test_language_instruction_generation_english(self):
        """Test language instruction generation for English."""
        # Set LOCALE to English
        os.environ["LOCALE"] = "en-US"

        tool = BaseTool(api_key="test")
        instruction = tool.get_language_instruction()

        # Checks
        self.assertIsInstance(instruction, str)
        self.assertIn("en-US", instruction)
        self.assertTrue(instruction.endswith("\n\n"))

    def test_language_instruction_empty_locale(self):
        """Test with empty LOCALE."""
        # Set LOCALE to empty
        os.environ["LOCALE"] = ""

        tool = BaseTool(api_key="test")
        instruction = tool.get_language_instruction()

        # Should return empty string
        self.assertEqual(instruction, "")

    def test_language_instruction_no_locale(self):
        """Test with no LOCALE variable set."""
        # Remove LOCALE
        os.environ.pop("LOCALE", None)

        tool = BaseTool(api_key="test")
        instruction = tool.get_language_instruction()

        # Should return empty string
        self.assertEqual(instruction, "")

    def test_json_dumps_utf8_encoding(self):
        """Test that json.dumps uses ensure_ascii=False for UTF-8."""
        # Test data with French characters and emojis
        test_data = {
            "status": "succès",
            "message": "Tâche terminée avec succès",
            "details": {
                "créé": "2024-01-01",
                "développeur": "Jean Dupont",
                "préférences": ["français", "développement"],
                "emojis": "🔴 🟠 🟡 🟢 ✅ ❌",
            },
        }

        # Test with ensure_ascii=False (correct)
        json_correct = json.dumps(test_data, ensure_ascii=False, indent=2)

        # Check that UTF-8 characters are preserved
        self.assertIn("succès", json_correct)
        self.assertIn("terminée", json_correct)
        self.assertIn("créé", json_correct)
        self.assertIn("développeur", json_correct)
        self.assertIn("préférences", json_correct)
        self.assertIn("français", json_correct)
        self.assertIn("développement", json_correct)
        self.assertIn("🔴", json_correct)
        self.assertIn("🟢", json_correct)
        self.assertIn("✅", json_correct)

        # Check that characters are NOT escaped
        self.assertNotIn("\\u", json_correct)
        self.assertNotIn("\\ud83d", json_correct)

    def test_json_dumps_ascii_encoding_comparison(self):
        """Test comparison between ensure_ascii=True and False."""
        test_data = {"message": "Développement réussi! 🎉"}

        # With ensure_ascii=True (old, incorrect behavior)
        json_escaped = json.dumps(test_data, ensure_ascii=True)

        # With ensure_ascii=False (new, correct behavior)
        json_utf8 = json.dumps(test_data, ensure_ascii=False)

        # Checks
        self.assertIn("\\u", json_escaped)  # Characters are escaped
        self.assertNotIn("é", json_escaped)  # UTF-8 characters are escaped

        self.assertNotIn("\\u", json_utf8)  # No escaped characters
        self.assertIn("é", json_utf8)  # UTF-8 characters preserved
        self.assertIn("🎉", json_utf8)  # Emojis preserved

    @patch("tools.shared.base_tool.BaseTool.get_model_provider")
    def test_chat_tool_french_response(self, mock_get_provider):
        """Test that the chat tool returns a response in French."""
        # Set to French
        os.environ["LOCALE"] = "fr-FR"

        # Mock provider
        mock_provider = Mock()
        mock_provider.get_provider_type.return_value = Mock(value="test")
        mock_provider.generate_content.return_value = Mock(
            content="Bonjour! Je peux vous aider avec vos tâches de développement.",
            usage={},
            model_name="test-model",
            metadata={},
        )
        mock_get_provider.return_value = mock_provider

        # Test chat tool
        chat_tool = ChatTool()
        result = chat_tool.execute({"prompt": "Peux-tu m'aider?", "model": "test-model"})

        # Checks
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)

        # Parse JSON response
        response_data = json.loads(result[0].text)

        # Check that response contains French content
        self.assertIn("status", response_data)
        self.assertIn("content", response_data)

        # Check that language instruction was added
        mock_provider.generate_content.assert_called_once()
        call_args = mock_provider.generate_content.call_args
        system_prompt = call_args.kwargs.get("system_prompt", "")
        self.assertIn("fr-FR", system_prompt)

    def test_french_characters_in_file_content(self):
        """Test reading and writing files with French characters."""
        # Test content with French characters
        test_content = """
# System configuration
# Created by: Lead Developer
# Creation date: December 15, 2024

def process_data(preferences, parameters):
    '''
    Processes data according to user preferences.

    Args:
        preferences: User preferences dictionary
        parameters: Configuration parameters

    Returns:
        Processing result
    '''
    return "Processing completed successfully! ✅"

# Helper functions
def generate_report():
    '''Generates a summary report.'''
    return {
        "status": "success",
        "data": "Report generated",
        "emojis": "📊 📈 📉"
    }
"""

        # Test writing and reading
        with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8", delete=False) as f:
            f.write(test_content)
            temp_file = f.name

        try:
            # Read file
            with open(temp_file, "r", encoding="utf-8") as f:
                read_content = f.read()

            # Checks
            self.assertEqual(read_content, test_content)
            self.assertIn("Lead Developer", read_content)
            self.assertIn("Creation", read_content)
            self.assertIn("data", read_content)
            self.assertIn("preferences", read_content)
            self.assertIn("parameters", read_content)
            self.assertIn("completed", read_content)
            self.assertIn("successfully", read_content)
            self.assertIn("✅", read_content)
            self.assertIn("success", read_content)
            self.assertIn("generated", read_content)
            self.assertIn("📊", read_content)

        finally:
            # Cleanup
            os.unlink(temp_file)

    def test_system_prompt_integration_french(self):
        """Test integration of language instruction in system prompts."""
        # Set to French
        os.environ["LOCALE"] = "fr-FR"

        tool = BaseTool(api_key="test")
        base_prompt = "You are a helpful assistant."

        # Test adding language instruction
        enhanced_prompt = tool.add_language_instruction(base_prompt)

        # Checks
        self.assertIn("fr-FR", enhanced_prompt)
        self.assertIn(base_prompt, enhanced_prompt)
        self.assertTrue(enhanced_prompt.startswith("Always respond in fr-FR"))

    def test_system_prompt_integration_no_locale(self):
        """Test integration with no LOCALE set."""
        # No LOCALE
        os.environ.pop("LOCALE", None)

        tool = BaseTool(api_key="test")
        base_prompt = "You are a helpful assistant."

        # Test adding language instruction
        enhanced_prompt = tool.add_language_instruction(base_prompt)

        # Should return original prompt unchanged
        self.assertEqual(enhanced_prompt, base_prompt)

    def test_unicode_normalization(self):
        """Test Unicode normalization for accented characters."""
        # Test with different Unicode encodings
        test_cases = [
            "café",  # e + acute accent combined
            "café",  # e with precomposed acute accent
            "naïf",  # i + diaeresis
            "coeur",  # oe ligature
            "été",  # e + acute accent
        ]

        for text in test_cases:
            # Test that json.dumps preserves characters
            json_output = json.dumps({"text": text}, ensure_ascii=False)
            self.assertIn(text, json_output)

            # Parse and check
            parsed = json.loads(json_output)
            self.assertEqual(parsed["text"], text)

    def test_emoji_preservation(self):
        """Test emoji preservation in JSON encoding."""
        # Emojis used in Zen MCP tools
        emojis = [
            "🔴",  # Critical
            "🟠",  # High
            "🟡",  # Medium
            "🟢",  # Low
            "✅",  # Success
            "❌",  # Error
            "⚠️",  # Warning
            "📊",  # Charts
            "🎉",  # Celebration
            "🚀",  # Rocket
            "🇫🇷",  # French flag
        ]

        test_data = {"emojis": emojis, "message": " ".join(emojis)}

        # Test with ensure_ascii=False
        json_output = json.dumps(test_data, ensure_ascii=False)

        # Checks
        for emoji in emojis:
            self.assertIn(emoji, json_output)

        # No escaped characters
        self.assertNotIn("\\u", json_output)

        # Test parsing
        parsed = json.loads(json_output)
        self.assertEqual(parsed["emojis"], emojis)
        self.assertEqual(parsed["message"], " ".join(emojis))


class TestLocalizationIntegration(unittest.TestCase):
    """Integration tests for localization with real tools."""

    def setUp(self):
        """Integration test setup."""
        self.original_locale = os.getenv("LOCALE")

    def tearDown(self):
        """Cleanup after integration tests."""
        if self.original_locale is not None:
            os.environ["LOCALE"] = self.original_locale
        else:
            os.environ.pop("LOCALE", None)

    @patch("tools.shared.base_tool.BaseTool.get_model_provider")
    def test_codereview_tool_french_locale(self, mock_get_provider):
        """Test that the codereview tool uses French localization."""
        # Set to French
        os.environ["LOCALE"] = "fr-FR"

        # Mock provider with French response
        mock_provider = Mock()
        mock_provider.get_provider_type.return_value = Mock(value="test")
        mock_provider.generate_content.return_value = Mock(
            content=json.dumps(
                {"status": "analysis_complete", "raw_analysis": "Code review completed. No critical issues found. 🟢"},
                ensure_ascii=False,
            ),
            usage={},
            model_name="test-model",
            metadata={},
        )
        mock_get_provider.return_value = mock_provider

        # Test codereview tool
        codereview_tool = CodeReviewTool()
        result = codereview_tool.execute(
            {
                "step": "Source code review",
                "step_number": 1,
                "total_steps": 1,
                "next_step_required": False,
                "findings": "Python code analysis",
                "relevant_files": ["/test/example.py"],
                "model": "test-model",
            }
        )

        # Checks
        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)

        # Parse JSON response - should be valid UTF-8
        response_text = result[0].text
        response_data = json.loads(response_text)

        # Check that language instruction was used
        mock_provider.generate_content.assert_called()
        call_args = mock_provider.generate_content.call_args
        system_prompt = call_args.kwargs.get("system_prompt", "")
        self.assertIn("fr-FR", system_prompt)

        # Check that response contains UTF-8 characters
        if "expert_analysis" in response_data:
            expert_analysis = response_data["expert_analysis"]
            if "raw_analysis" in expert_analysis:
                analysis = expert_analysis["raw_analysis"]
                # Should contain French characters
                self.assertTrue(
                    any(char in analysis for char in ["é", "è", "à", "ç", "ê", "û", "î", "ô"]) or "🟢" in analysis
                )

    def test_multiple_locales_switching(self):
        """Test switching locales during execution."""
        tool = BaseTool(api_key="test")

        # French
        os.environ["LOCALE"] = "fr-FR"
        instruction_fr = tool.get_language_instruction()
        self.assertIn("fr-FR", instruction_fr)

        # English
        os.environ["LOCALE"] = "en-US"
        instruction_en = tool.get_language_instruction()
        self.assertIn("en-US", instruction_en)

        # Spanish
        os.environ["LOCALE"] = "es-ES"
        instruction_es = tool.get_language_instruction()
        self.assertIn("es-ES", instruction_es)

        # Chinese
        os.environ["LOCALE"] = "zh-CN"
        instruction_zh = tool.get_language_instruction()
        self.assertIn("zh-CN", instruction_zh)

        # Check that all instructions are different
        instructions = [instruction_fr, instruction_en, instruction_es, instruction_zh]
        for i, inst1 in enumerate(instructions):
            for j, inst2 in enumerate(instructions):
                if i != j:
                    self.assertNotEqual(inst1, inst2)


if __name__ == "__main__":
    # Test configuration
    pytest.main([__file__, "-v", "--tb=short"])