New openrouter tests

Fixed flash aliases More models
2025-06-13 07:00:53 +04:00
parent 2cdb92460b
commit 8cbbe94417
6 changed files with 659 additions and 9 deletions
--- a/simulator_tests/init.py
+++ b/simulator_tests/init.py
@@ -14,6 +14,8 @@ from .test_cross_tool_continuation import CrossToolContinuationTest
 from .test_logs_validation import LogsValidationTest
 from .test_model_thinking_config import TestModelThinkingConfig
 from .test_o3_model_selection import O3ModelSelectionTest
+from .test_openrouter_fallback import OpenRouterFallbackTest
+from .test_openrouter_models import OpenRouterModelsTest
 from .test_per_tool_deduplication import PerToolDeduplicationTest
 from .test_redis_validation import RedisValidationTest
 from .test_token_allocation_validation import TokenAllocationValidationTest
@@ -29,6 +31,8 @@ TEST_REGISTRY = {
    "redis_validation": RedisValidationTest,
    "model_thinking_config": TestModelThinkingConfig,
    "o3_model_selection": O3ModelSelectionTest,
+    "openrouter_fallback": OpenRouterFallbackTest,
+    "openrouter_models": OpenRouterModelsTest,
    "token_allocation_validation": TokenAllocationValidationTest,
    "conversation_chain_validation": ConversationChainValidationTest,
 }
@@ -44,6 +48,8 @@ __all__ = [
    "RedisValidationTest",
    "TestModelThinkingConfig",
    "O3ModelSelectionTest",
+    "OpenRouterFallbackTest",
+    "OpenRouterModelsTest",
    "TokenAllocationValidationTest",
    "ConversationChainValidationTest",
    "TEST_REGISTRY",
--- a/simulator_tests/test_openrouter_fallback.py
+++ b/simulator_tests/test_openrouter_fallback.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+"""
+OpenRouter Fallback Test
+
+Tests that verify the system correctly falls back to OpenRouter when:
+- Only OPENROUTER_API_KEY is configured
+- Native models (flash, pro) are requested but map to OpenRouter equivalents
+- Auto mode correctly selects OpenRouter models
+"""
+
+import json
+import subprocess
+
+from .base_test import BaseSimulatorTest
+
+
+class OpenRouterFallbackTest(BaseSimulatorTest):
+    """Test OpenRouter fallback behavior when it's the only provider"""
+
+    @property
+    def test_name(self) -> str:
+        return "openrouter_fallback"
+
+    @property
+    def test_description(self) -> str:
+        return "OpenRouter fallback behavior when only provider"
+
+    def get_recent_server_logs(self) -> str:
+        """Get recent server logs from the log file directly"""
+        try:
+            cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+
+            if result.returncode == 0:
+                return result.stdout
+            else:
+                self.logger.warning(f"Failed to read server logs: {result.stderr}")
+                return ""
+        except Exception as e:
+            self.logger.error(f"Failed to get server logs: {e}")
+            return ""
+
+    def run_test(self) -> bool:
+        """Test OpenRouter fallback behavior"""
+        try:
+            self.logger.info("Test: OpenRouter fallback behavior when only provider available")
+
+            # Setup test files
+            self.setup_test_files()
+
+            # Test 1: Auto mode should work with OpenRouter
+            self.logger.info("  1: Testing auto mode with OpenRouter as only provider")
+
+            response1, continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "What is 2 + 2? Give a brief answer.",
+                    # No model specified - should use auto mode
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response1:
+                self.logger.error("  ❌ Auto mode with OpenRouter failed")
+                return False
+
+            self.logger.info("  ✅ Auto mode call completed with OpenRouter")
+
+            # Test 2: Flash model should map to OpenRouter equivalent
+            self.logger.info("  2: Testing flash model mapping to OpenRouter")
+
+            # Use codereview tool to test a different tool type
+            test_code = """def calculate_sum(numbers):
+    total = 0
+    for num in numbers:
+        total += num
+    return total"""
+            
+            test_file = self.create_additional_test_file("sum_function.py", test_code)
+
+            response2, _ = self.call_mcp_tool(
+                "codereview",
+                {
+                    "files": [test_file],
+                    "prompt": "Quick review of this sum function",
+                    "model": "flash",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response2:
+                self.logger.error("  ❌ Flash model mapping to OpenRouter failed")
+                return False
+
+            self.logger.info("  ✅ Flash model successfully mapped to OpenRouter")
+
+            # Test 3: Pro model should map to OpenRouter equivalent
+            self.logger.info("  3: Testing pro model mapping to OpenRouter")
+
+            response3, _ = self.call_mcp_tool(
+                "analyze",
+                {
+                    "files": [self.test_files["python"]],
+                    "prompt": "Analyze the structure of this Python code",
+                    "model": "pro",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response3:
+                self.logger.error("  ❌ Pro model mapping to OpenRouter failed")
+                return False
+
+            self.logger.info("  ✅ Pro model successfully mapped to OpenRouter")
+
+            # Test 4: Debug tool with OpenRouter
+            self.logger.info("  4: Testing debug tool with OpenRouter")
+
+            response4, _ = self.call_mcp_tool(
+                "debug",
+                {
+                    "prompt": "Why might a function return None instead of a value?",
+                    "model": "flash",  # Should map to OpenRouter
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response4:
+                self.logger.error("  ❌ Debug tool with OpenRouter failed")
+                return False
+
+            self.logger.info("  ✅ Debug tool working with OpenRouter")
+
+            # Test 5: Validate logs show OpenRouter is being used
+            self.logger.info("  5: Validating OpenRouter is the active provider")
+            logs = self.get_recent_server_logs()
+
+            # Check for provider fallback logs
+            fallback_logs = [
+                line for line in logs.split("\n") 
+                if "No Gemini API key found" in line or
+                   "No OpenAI API key found" in line or
+                   "Only OpenRouter available" in line or
+                   "Using OpenRouter" in line
+            ]
+
+            # Check for OpenRouter provider initialization
+            provider_logs = [
+                line for line in logs.split("\n")
+                if "OpenRouter provider" in line or
+                   "OpenRouterProvider" in line or
+                   "openrouter.ai/api/v1" in line
+            ]
+
+            # Check for model resolution through OpenRouter
+            model_resolution_logs = [
+                line for line in logs.split("\n")
+                if ("Resolved model" in line and "via OpenRouter" in line) or
+                   ("Model alias" in line and "resolved to" in line) or
+                   ("flash" in line and "gemini-flash" in line) or
+                   ("pro" in line and "gemini-pro" in line)
+            ]
+
+            # Log findings
+            self.logger.info(f"   Fallback indication logs: {len(fallback_logs)}")
+            self.logger.info(f"   OpenRouter provider logs: {len(provider_logs)}")
+            self.logger.info(f"   Model resolution logs: {len(model_resolution_logs)}")
+
+            # Sample logs for debugging
+            if self.verbose:
+                if fallback_logs:
+                    self.logger.debug("  📋 Sample fallback logs:")
+                    for log in fallback_logs[:3]:
+                        self.logger.debug(f"    {log}")
+                
+                if provider_logs:
+                    self.logger.debug("  📋 Sample provider logs:")
+                    for log in provider_logs[:3]:
+                        self.logger.debug(f"    {log}")
+
+            # Success criteria
+            openrouter_active = len(provider_logs) > 0
+            models_resolved = len(model_resolution_logs) > 0
+            all_tools_worked = True  # We checked this above
+
+            success_criteria = [
+                ("OpenRouter provider active", openrouter_active),
+                ("Models resolved through OpenRouter", models_resolved),
+                ("All tools worked with OpenRouter", all_tools_worked),
+            ]
+
+            passed_criteria = sum(1 for _, passed in success_criteria if passed)
+            self.logger.info(f"   Success criteria met: {passed_criteria}/{len(success_criteria)}")
+
+            for criterion, passed in success_criteria:
+                status = "✅" if passed else "❌"
+                self.logger.info(f"    {status} {criterion}")
+
+            if passed_criteria >= 2:  # At least 2 out of 3 criteria
+                self.logger.info("  ✅ OpenRouter fallback test passed")
+                return True
+            else:
+                self.logger.error("  ❌ OpenRouter fallback test failed")
+                return False
+
+        except Exception as e:
+            self.logger.error(f"OpenRouter fallback test failed: {e}")
+            return False
+        finally:
+            self.cleanup_test_files()
+
+
+def main():
+    """Run the OpenRouter fallback tests"""
+    import sys
+
+    verbose = "--verbose" in sys.argv or "-v" in sys.argv
+    test = OpenRouterFallbackTest(verbose=verbose)
+
+    success = test.run_test()
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/simulator_tests/test_openrouter_models.py
+++ b/simulator_tests/test_openrouter_models.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+OpenRouter Model Tests
+
+Tests that verify OpenRouter functionality including:
+- Model alias resolution (flash, pro, o3, etc. map to OpenRouter equivalents)
+- Multiple OpenRouter models work correctly
+- Conversation continuity works with OpenRouter models
+- Error handling when models are not available
+"""
+
+import json
+import subprocess
+
+from .base_test import BaseSimulatorTest
+
+
+class OpenRouterModelsTest(BaseSimulatorTest):
+    """Test OpenRouter model functionality and alias mapping"""
+
+    @property
+    def test_name(self) -> str:
+        return "openrouter_models"
+
+    @property
+    def test_description(self) -> str:
+        return "OpenRouter model functionality and alias mapping"
+
+    def get_recent_server_logs(self) -> str:
+        """Get recent server logs from the log file directly"""
+        try:
+            # Read logs directly from the log file
+            cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+
+            if result.returncode == 0:
+                return result.stdout
+            else:
+                self.logger.warning(f"Failed to read server logs: {result.stderr}")
+                return ""
+        except Exception as e:
+            self.logger.error(f"Failed to get server logs: {e}")
+            return ""
+
+    def run_test(self) -> bool:
+        """Test OpenRouter model functionality"""
+        try:
+            self.logger.info("Test: OpenRouter model functionality and alias mapping")
+
+            # Setup test files for later use
+            self.setup_test_files()
+
+            # Test 1: Flash alias mapping to OpenRouter
+            self.logger.info("  1: Testing 'flash' alias (should map to google/gemini-flash-1.5-8b)")
+
+            response1, continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Flash model!' and nothing else.",
+                    "model": "flash",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response1:
+                self.logger.error("  ❌ Flash alias test failed")
+                return False
+
+            self.logger.info("  ✅ Flash alias call completed")
+            if continuation_id:
+                self.logger.info(f"  ✅ Got continuation_id: {continuation_id}")
+
+            # Test 2: Pro alias mapping to OpenRouter
+            self.logger.info("  2: Testing 'pro' alias (should map to google/gemini-pro-1.5)")
+
+            response2, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Pro model!' and nothing else.",
+                    "model": "pro",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response2:
+                self.logger.error("  ❌ Pro alias test failed")
+                return False
+
+            self.logger.info("  ✅ Pro alias call completed")
+
+            # Test 3: O3 alias mapping to OpenRouter (should map to openai/gpt-4o)
+            self.logger.info("  3: Testing 'o3' alias (should map to openai/gpt-4o)")
+
+            response3, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from O3 model!' and nothing else.",
+                    "model": "o3",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response3:
+                self.logger.error("  ❌ O3 alias test failed")
+                return False
+
+            self.logger.info("  ✅ O3 alias call completed")
+
+            # Test 4: Direct OpenRouter model name
+            self.logger.info("  4: Testing direct OpenRouter model name (anthropic/claude-3-haiku)")
+
+            response4, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Claude Haiku!' and nothing else.",
+                    "model": "anthropic/claude-3-haiku",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response4:
+                self.logger.error("  ❌ Direct OpenRouter model test failed")
+                return False
+
+            self.logger.info("  ✅ Direct OpenRouter model call completed")
+
+            # Test 5: OpenRouter alias from config
+            self.logger.info("  5: Testing OpenRouter alias from config ('opus' -> anthropic/claude-3-opus)")
+
+            response5, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Opus!' and nothing else.",
+                    "model": "opus",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response5:
+                self.logger.error("  ❌ OpenRouter alias test failed")
+                return False
+
+            self.logger.info("  ✅ OpenRouter alias call completed")
+
+            # Test 6: Conversation continuity with OpenRouter models
+            self.logger.info("  6: Testing conversation continuity with OpenRouter")
+
+            response6, new_continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Remember this number: 42. What number did I just tell you?",
+                    "model": "sonnet",  # Claude Sonnet via OpenRouter
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response6 or not new_continuation_id:
+                self.logger.error("  ❌ Failed to start conversation with continuation_id")
+                return False
+
+            # Continue the conversation
+            response7, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "What was the number I told you earlier?",
+                    "model": "sonnet",
+                    "continuation_id": new_continuation_id,
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response7:
+                self.logger.error("  ❌ Failed to continue conversation")
+                return False
+
+            # Check if the model remembered the number
+            if "42" in response7:
+                self.logger.info("  ✅ Conversation continuity working with OpenRouter")
+            else:
+                self.logger.warning("  ⚠️  Model may not have remembered the number")
+
+            # Test 7: Validate OpenRouter API usage from logs
+            self.logger.info("  7: Validating OpenRouter API usage in logs")
+            logs = self.get_recent_server_logs()
+
+            # Check for OpenRouter API calls
+            openrouter_logs = [line for line in logs.split("\n") if "openrouter" in line.lower()]
+            openrouter_api_logs = [line for line in logs.split("\n") if "openrouter.ai/api/v1" in line]
+            
+            # Check for specific model mappings
+            flash_mapping_logs = [
+                line for line in logs.split("\n") 
+                if ("flash" in line and "google/gemini-flash" in line) or
+                   ("Resolved model" in line and "google/gemini-flash" in line)
+            ]
+            
+            pro_mapping_logs = [
+                line for line in logs.split("\n") 
+                if ("pro" in line and "google/gemini-pro" in line) or
+                   ("Resolved model" in line and "google/gemini-pro" in line)
+            ]
+
+            # Log findings
+            self.logger.info(f"   OpenRouter-related logs: {len(openrouter_logs)}")
+            self.logger.info(f"   OpenRouter API logs: {len(openrouter_api_logs)}")
+            self.logger.info(f"   Flash mapping logs: {len(flash_mapping_logs)}")
+            self.logger.info(f"   Pro mapping logs: {len(pro_mapping_logs)}")
+
+            # Sample log output for debugging
+            if self.verbose and openrouter_logs:
+                self.logger.debug("  📋 Sample OpenRouter logs:")
+                for log in openrouter_logs[:5]:
+                    self.logger.debug(f"    {log}")
+
+            # Success criteria
+            openrouter_api_used = len(openrouter_api_logs) > 0
+            models_mapped = len(flash_mapping_logs) > 0 or len(pro_mapping_logs) > 0
+            
+            success_criteria = [
+                ("OpenRouter API calls made", openrouter_api_used),
+                ("Model aliases mapped correctly", models_mapped),
+                ("All model calls succeeded", True),  # We already checked this above
+            ]
+
+            passed_criteria = sum(1 for _, passed in success_criteria if passed)
+            self.logger.info(f"   Success criteria met: {passed_criteria}/{len(success_criteria)}")
+
+            for criterion, passed in success_criteria:
+                status = "✅" if passed else "❌"
+                self.logger.info(f"    {status} {criterion}")
+
+            if passed_criteria >= 2:  # At least 2 out of 3 criteria
+                self.logger.info("  ✅ OpenRouter model tests passed")
+                return True
+            else:
+                self.logger.error("  ❌ OpenRouter model tests failed")
+                return False
+
+        except Exception as e:
+            self.logger.error(f"OpenRouter model test failed: {e}")
+            return False
+        finally:
+            self.cleanup_test_files()
+
+
+def main():
+    """Run the OpenRouter model tests"""
+    import sys
+
+    verbose = "--verbose" in sys.argv or "-v" in sys.argv
+    test = OpenRouterModelsTest(verbose=verbose)
+
+    success = test.run_test()
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()