feat: Add LOCAL variable support for responses with UTF-8 JSON encoding.

Description: This feature adds support for UTF-8 encoding in JSON responses, allowing for proper handling of special characters and emojis. - Implement unit tests for UTF-8 encoding in various model providers including Gemini, OpenAI, and OpenAI Compatible. - Validate UTF-8 support in token counting, content generation, and error handling. - Introduce tests for JSON serialization ensuring proper handling of French characters and emojis. - Create tests for language instruction generation based on locale settings. - Validate UTF-8 handling in workflow tools including AnalyzeTool, CodereviewTool, and DebugIssueTool. - Ensure that all tests check for correct UTF-8 character preservation and proper JSON formatting. - Add integration tests to verify the interaction between locale settings and model responses.
2025-06-22 19:13:02 +02:00
parent 132c6ca025
commit e9c5662b3a
22 changed files with 1994 additions and 49 deletions
--- a/tests/test_workflow_utf8.py
+++ b/tests/test_workflow_utf8.py
@@ -0,0 +1,456 @@
+"""
+Unit tests to validate UTF-8 encoding in workflow tools
+and the generation of properly encoded JSON responses.
+"""
+
+import json
+import os
+import tempfile
+import unittest
+from unittest.mock import Mock, patch
+
+from tools.analyze import AnalyzeTool
+from tools.codereview import CodereviewTool
+from tools.debug import DebugIssueTool
+
+
+class TestWorkflowToolsUTF8(unittest.TestCase):
+    """Tests for UTF-8 encoding in workflow tools."""
+
+    def setUp(self):
+        """Test setup."""
+        self.original_locale = os.getenv("LOCALE")
+        # Default to French for tests
+        os.environ["LOCALE"] = "fr-FR"
+
+    def tearDown(self):
+        """Cleanup after tests."""
+        if self.original_locale is not None:
+            os.environ["LOCALE"] = self.original_locale
+        else:
+            os.environ.pop("LOCALE", None)
+
+    def test_workflow_json_response_structure(self):
+        """Test the structure of JSON responses from workflow tools."""
+        # Test with analysis tool
+        analyze_tool = AnalyzeTool()
+
+        # Mock response with UTF-8 characters
+        test_response = {
+            "status": "pause_for_analysis",
+            "step_number": 1,
+            "total_steps": 3,
+            "next_step_required": True,
+            "findings": "Code analysis reveals performance issues 🔍",
+            "files_checked": ["/src/main.py"],
+            "relevant_files": ["/src/main.py"],
+            "issues_found": [
+                {"severity": "high", "description": "Function too complex - refactoring needed"}
+            ],
+            "investigation_required": True,
+            "required_actions": ["Review code dependencies", "Analyze architectural patterns"],
+        }
+
+        # Test JSON serialization with ensure_ascii=False
+        json_str = json.dumps(test_response, indent=2, ensure_ascii=False)
+
+        # UTF-8 checks
+        self.assertIn("révèle", json_str)
+        self.assertIn("problèmes", json_str)
+        self.assertIn("nécessaire", json_str)
+        self.assertIn("dépendances", json_str)
+        self.assertIn("🔍", json_str)
+
+        # No escaped characters
+        self.assertNotIn("\\u", json_str)
+
+        # Test parsing
+        parsed = json.loads(json_str)
+        self.assertEqual(parsed["findings"], test_response["findings"])
+        self.assertEqual(len(parsed["issues_found"]), 1)
+        self.assertIn("nécessaire", parsed["issues_found"][0]["description"])
+
+    @patch("tools.shared.base_tool.BaseTool.get_model_provider")
+    def test_analyze_tool_utf8_response(self, mock_get_provider):
+        """Test that the analyze tool returns correct UTF-8 responses."""
+        # Mock provider
+        mock_provider = Mock()
+        mock_provider.get_provider_type.return_value = Mock(value="test")
+        mock_provider.generate_content.return_value = Mock(
+            content="Architectural analysis complete. Recommendations: improve modularity.",
+            usage={},
+            model_name="test-model",
+            metadata={},
+        )
+        mock_get_provider.return_value = mock_provider
+
+        # Test the tool
+        analyze_tool = AnalyzeTool()
+        result = analyze_tool.execute(
+            {
+                "step": "Analyze system architecture to identify issues",
+                "step_number": 1,
+                "total_steps": 2,
+                "next_step_required": True,
+                "findings": "Starting architectural analysis of Python code",
+                "relevant_files": ["/test/main.py"],
+                "model": "test-model",
+            }
+        )
+
+        # Checks
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result), 1)
+
+        # Parse the response - must be valid UTF-8 JSON
+        response_text = result[0].text
+        response_data = json.loads(response_text)
+
+        # Structure checks
+        self.assertIn("status", response_data)
+        self.assertIn("step_number", response_data)
+
+        # Check that the French instruction was added
+        mock_provider.generate_content.assert_called()
+        call_args = mock_provider.generate_content.call_args
+        system_prompt = call_args.kwargs.get("system_prompt", "")
+        self.assertIn("fr-FR", system_prompt)
+
+    @patch("tools.shared.base_tool.BaseTool.get_model_provider")
+    def test_codereview_tool_french_findings(self, mock_get_provider):
+        """Test that the codereview tool produces findings in French."""
+        # Mock with analysis in French
+        mock_provider = Mock()
+        mock_provider.get_provider_type.return_value = Mock(value="test")
+        mock_provider.supports_thinking_mode.return_value = False
+        mock_provider.generate_content.return_value = Mock(
+            content=json.dumps(
+                {
+                    "status": "analysis_complete",
+                    "raw_analysis": """
+🔴 CRITIQUE: Aucun problème critique trouvé.
+
+🟠 ÉLEVÉ: Fichier example.py:42 - Fonction trop complexe
+→ Problème: La fonction process_data() contient trop de responsabilités
+→ Solution: Décomposer en fonctions plus petites et spécialisées
+
+🟡 MOYEN: Gestion d'erreurs insuffisante
+→ Problème: Plusieurs fonctions n'ont pas de gestion d'erreurs appropriée
+→ Solution: Ajouter des try-catch et validation des paramètres
+
+✅ Points positifs:
+• Code bien commenté et lisible
+• Nomenclature cohérente
+• Tests unitaires présents
+""",
+                },
+                ensure_ascii=False,
+            ),
+            usage={},
+            model_name="test-model",
+            metadata={},
+        )
+        mock_get_provider.return_value = mock_provider
+
+        # Test the tool
+        codereview_tool = CodereviewTool()
+        result = codereview_tool.execute(
+            {
+                "step": "Complete review of Python code",
+                "step_number": 1,
+                "total_steps": 1,
+                "next_step_required": False,
+                "findings": "Code review complete",
+                "relevant_files": ["/test/example.py"],
+                "model": "test-model",
+            }
+        )
+
+        # Checks
+        self.assertIsNotNone(result)
+        response_text = result[0].text
+        response_data = json.loads(response_text)
+
+        # Check UTF-8 characters in analysis
+        if "expert_analysis" in response_data:
+            analysis = response_data["expert_analysis"]["raw_analysis"]
+            # Vérification de caractères français
+            # Check for French characters
+            self.assertIn("ÉLEVÉ", analysis)is)
+            self.assertIn("problème", analysis)sis)
+            self.assertIn("spécialisées", analysis)
+            self.assertIn("appropriée", analysis)
+            self.assertIn("paramètres", analysis)
+            self.assertIn("présents", analysis)
+            # Vérification d'emojis
+            # Check for emojislysis)
+            self.assertIn("🔴", analysis)
+            self.assertIn("🟠", analysis)
+            self.assertIn("🟡", analysis)
+            self.assertIn("✅", analysis)
+    @patch("tools.shared.base_tool.BaseTool.get_model_provider")
+    @patch("tools.shared.base_tool.BaseTool.get_model_provider")vider):
+    def test_debug_tool_french_error_analysis(self, mock_get_provider):
+        """Test that the debug tool analyzes errors in French."""
+        # Mock providerck()
+        mock_provider = Mock()ider_type.return_value = Mock(value="test")
+        mock_provider.get_provider_type.return_value = Mock(value="test")
+        mock_provider.generate_content.return_value = Mock(n définie. Cause probable: import manquant.",
+            content="Error analyzed: variable 'données' not defined. Probable cause: missing import.",
+            usage={},e="test-model",
+            model_name="test-model",
+            metadata={},
+        )ock_get_provider.return_value = mock_provider
+        mock_get_provider.return_value = mock_provider
+        # Test de l'outil debug
+        # Test the debug toolTool()
+        debug_tool = DebugIssueTool()
+        result = debug_tool.execute(
+            {   "step": "Analyser l'erreur NameError dans le fichier de traitement des données",
+                "step": "Analyze NameError in data processing file",
+                "step_number": 1,
+                "total_steps": 2,ed": True,
+                "next_step_required": True,e lors de l'exécution du script",
+                "findings": "Error detected during script execution",
+                "files_checked": ["/src/data_processor.py"],,
+                "relevant_files": ["/src/data_processor.py"], - import manquant",
+                "hypothesis": "Variable 'données' not defined - missing import",
+                "confidence": "medium",
+                "model": "test-model",
+            }
+        )
+        # Vérifications
+        # CheckstNone(result)
+        self.assertIsNotNone(result)xt
+        response_text = result[0].textponse_text)
+        response_data = json.loads(response_text)
+        # Vérification de la structure de réponse
+        # Check response structure
+        self.assertIn("status", response_data)response_data)
+        self.assertIn("investigation_status", response_data)
+        # Vérification que les caractères UTF-8 sont préservés
+        # Check that UTF-8 characters are preservedFalse)
+        response_str = json.dumps(response_data, ensure_ascii=False)
+        self.assertIn("données", response_str))
+        self.assertIn("détectée", response_str))
+        self.assertIn("exécution", response_str)
+        self.assertIn("définie", response_str)
+    def test_workflow_mixin_utf8_serialization(self):
+    def test_workflow_mixin_utf8_serialization(self):lowMixin."""
+        """Test UTF-8 serialization in BaseWorkflowMixin."""
+        # Simulation of a workflow response with UTF-8 characters
+        workflow_response = {g_expert_analysis",
+            "status": "calling_expert_analysis",
+            "step_number": 2,
+            "total_steps": 3,ed": True,
+            "next_step_required": True,",
+            "continuation_id": "test-id",
+            "file_context": {y_embedded",
+                "type": "fully_embedded",
+                "files_embedded": 2,n": "Contexte optimisé pour l'analyse experte",
+                "context_optimization": "Context optimized for expert analysis",
+            },xpert_analysis": {
+            "expert_analysis": {sis_complete",
+                "status": "analysis_complete",
+                "raw_analysis": """
+Complete system analysis reveals:
+🎯 **Objectif**: Améliorer les performances
+🎯 **Objective**: Improve performancenamique
+🔍 **Methodology**: Static and dynamic analysis
+📊 **Results**: nérale: satisfaisante
+  • Overall performance: satisfactoryées
+  • Possible optimizations: 3 identifiedlog n)
+  • Algorithmic complexity: O(n²) → O(n log n)
+**Recommandations prioritaires**:
+**Priority recommendations**:es données
+1. Optimize the data sorting functionréquentes
+2. Implement a cache for frequent requests
+3. Refactor the report generation module
+🚀 **Impact attendu**: Amélioration de 40% des performances
+🚀 **Expected impact**: 40% improvement in performance
+""",        },
+            },nvestigation_summary": {
+            "investigation_summary": {rc/performance.py", "/src/cache.py"],
+                "files_analyzed": ["/src/performance.py", "/src/cache.py"],nt des données",
+                "key_findings": "Optimizations identified in data processing",
+                "recommendations": "Implement caching and algorithmic improvement",
+            },
+        }
+        # Test de sérialisation avec ensure_ascii=False
+        # Test serialization with ensure_ascii=False=2, ensure_ascii=False)
+        json_str = json.dumps(workflow_response, indent=2, ensure_ascii=False)
+        # Vérifications de préservation UTF-8
+        # UTF-8 preservation checks
+        utf8_chars = [
+            "révèle",ogie",
+            "Méthodologie",
+            "générale",s",
+            "identifiées",,
+            "prioritaires",
+            "données",s",
+            "fréquentes",
+            "génération",
+            "attendu",ion",
+            "Amélioration",
+            "identifiées",,
+            "amélioration",
+        ]
+        for char_seq in utf8_chars:
+        for char_seq in utf8_chars: json_str)
+            self.assertIn(char_seq, json_str)
+        # Vérifications d'emojis
+        # Emoji checks", "🚀"]
+        emojis = ["🎯", "🔍", "📊", "🚀"]
+        for emoji in emojis:oji, json_str)
+            self.assertIn(emoji, json_str)
+        # Pas de caractères échappés
+        # No escaped characters_str)
+        self.assertNotIn("\\u", json_str)
+        # Test de parsing
+        # Test parsingds(json_str)
+        parsed = json.loads(json_str)
+        self.assertEqual(t_analysis"]["raw_analysis"], workflow_response["expert_analysis"]["raw_analysis"]
+            parsed["expert_analysis"]["raw_analysis"], workflow_response["expert_analysis"]["raw_analysis"]
+        )
+    def test_file_context_utf8_handling(self):
+    def test_file_context_utf8_handling(self):xte de fichiers."""
+        """Test UTF-8 handling in file context."""
+        # Create a temporary file with UTF-8 content
+        french_code = '''#!/usr/bin/env python3
+"""ule de traitement des données utilisateur.
+Module for processing user data.
+Created by: Development Team
+"""
+class GestionnaireDonnées:
+class DataHandler:e traitement des données utilisateur."""
+    """Handler for processing user data."""
+    def __init__(self):
+    def __init__(self):{}
+        self.data = {}= {}
+        self.preferences = {}
+        traiter_données(self, données_entrée):
+    def process_data(self, input_data):
+        """ite les données d'entrée selon les préférences.
+        Processes input data according to preferences.
+        Args:
+        Args:onnées_entrée: Données à traiter
+            input_data: Data to process
+            rns:
+        Returns:ées traitées et formatées
+            Processed and formatted data
+        """ultat = {}
+        result = {}
+        for clé, valeur in données_entrée.items():
+        for key, value in input_data.items():
+            if self._validate_data(value):r_données(valeur)
+                result[key] = self._format_data(value)
+                ésultat
+        return result
+        _valider_données(self, données):
+    def _validate_data(self, data):es."""
+        """Validates the structure of the data."""(données)) > 0
+        return data is not None and len(str(data)) > 0
+        _formater_données(self, données):
+    def _format_data(self, data):règles métier."""
+        """Formats the data according to business rules."""
+        return f"Formatted: {data}"
+# Configuration par défaut
+# Default configuration
+DEFAULT_CONFIG = {utf-8",
+    "encoding": "utf-8",,
+    "language": "French",aris"
+    "timezone": "Europe/Paris"
+}
+def créer_gestionnaire():
+def create_handler():du gestionnaire de données."""
+    """Creates an instance of the data handler."""
+    return DataHandler()
+'''
+        with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", suffix=".py", delete=False) as f:
+        with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", suffix=".py", delete=False) as f:
+            f.write(french_code)
+            temp_file = f.name
+        try:
+        try:# Test de lecture et traitement UTF-8
+            # Test reading and processing UTF-8tf-8") as f:
+            with open(temp_file, "r", encoding="utf-8") as f:
+                content = f.read()
+            # Simulation du contexte de fichier pour workflow
+            # Simulate file context for workflow
+            file_context = { temp_file,
+                "file_path": temp_file,
+                "content": content,,
+                "encoding": "utf-8", Python avec noms de variables en français",
+                "analysis": "Python file with variable names in French",
+                "metrics": { len(content.split("\n")),
+                    "lines": len(content.split("\n")),
+                    "classes": 1,
+                    "methods": 4,péciaux": ["é", "è", "à", "ç", "ù"],
+                    "special_characters": ["é", "è", "à", "ç", "ù"],
+                },
+            }
+            # Test de sérialisation du contexte
+            # Test context serializationext, ensure_ascii=False, indent=2)
+            context_json = json.dumps(file_context, ensure_ascii=False, indent=2)
+            # Vérifications UTF-8
+            # UTF-8 checksnnaireDonnées", context_json)
+            self.assertIn("DataHandler", context_json)
+            self.assertIn("data", context_json)son)
+            self.assertIn("preferences", context_json)on)
+            self.assertIn("input_data", context_json)n)
+            self.assertIn("format_data", context_json)n)
+            self.assertIn("create_handler", context_json)
+            self.assertIn("French", context_json)
+            # Test de parsing
+            # Test parsingjson.loads(context_json)
+            parsed_context = json.loads(context_json)], content)
+            self.assertEqual(parsed_context["content"], content))
+            self.assertIn("French", parsed_context["analysis"])
+        finally:
+        finally:ttoyage
+            # Cleanupemp_file)
+            os.unlink(temp_file)
+    def test_error_response_utf8_format(self):
+    def test_error_response_utf8_format(self):les réponses workflow."""
+        """Test UTF-8 error format in workflow responses."""
+        # Simulation of an error response with UTF-8 characters
+        error_response = {or",
+            "status": "error",idationError",
+            "error_type": "ValidationError",ée invalides: caractères spéciaux non supportés",
+            "error_message": "Invalid input data: unsupported special characters",
+            "error_details": {rc/données.py",
+                "file": "/src/données.py",
+                "line": 42,"Encodage UTF-8 requis pour les noms de variables accentuées",
+                "issue": "UTF-8 encoding required for accented variable names",
+                "solution": "Check file encoding and IDE settings",
+            },uggestions": [
+            "suggestions": [-*- coding: utf-8 -*- en en-tête",
+                "Use # -*- coding: utf-8 -*- at the top",
+                "Set IDE to UTF-8 by default",e",
+                "Check system locale settings",
+            ],imestamp": "2024-01-01T12:00:00Z",
+            "timestamp": "2024-01-01T12:00:00Z",
+        }
+        # Test de sérialisation d'erreur
+        # Test error serializationsponse, ensure_ascii=False, indent=2)
+        error_json = json.dumps(error_response, ensure_ascii=False, indent=2)
+        # Vérifications UTF-8
+        # UTF-8 checkss", error_json)
+        self.assertIn("Données", error_json)
+        self.assertIn("entrée", error_json)n)
+        self.assertIn("spéciaux", error_json))
+        self.assertIn("supportés", error_json))
+        self.assertIn("données.py", error_json)
+        self.assertIn("problème", error_json)n)
+        self.assertIn("accentuées", error_json)
+        self.assertIn("Vérifier", error_json)n)
+        self.assertIn("paramètres", error_json)
+        # Test de parsing
+        # Test parsingon.loads(error_json)
+        parsed_error = json.loads(error_json)type"], "ValidationError")
+        self.assertEqual(parsed_error["error_type"], "ValidationError")lème"])
+        self.assertIn("accentuées", parsed_error["error_details"]["problème"])
+
+if __name__ == "__main__":
+if __name__ == "__main__":y=2)
+    unittest.main(verbosity=2)