New openrouter tests

Fixed flash aliases More models
2025-06-13 07:00:53 +04:00
parent 2cdb92460b
commit 8cbbe94417
6 changed files with 659 additions and 9 deletions
--- a/simulator_tests/test_openrouter_models.py
+++ b/simulator_tests/test_openrouter_models.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+OpenRouter Model Tests
+
+Tests that verify OpenRouter functionality including:
+- Model alias resolution (flash, pro, o3, etc. map to OpenRouter equivalents)
+- Multiple OpenRouter models work correctly
+- Conversation continuity works with OpenRouter models
+- Error handling when models are not available
+"""
+
+import json
+import subprocess
+
+from .base_test import BaseSimulatorTest
+
+
+class OpenRouterModelsTest(BaseSimulatorTest):
+    """Test OpenRouter model functionality and alias mapping"""
+
+    @property
+    def test_name(self) -> str:
+        return "openrouter_models"
+
+    @property
+    def test_description(self) -> str:
+        return "OpenRouter model functionality and alias mapping"
+
+    def get_recent_server_logs(self) -> str:
+        """Get recent server logs from the log file directly"""
+        try:
+            # Read logs directly from the log file
+            cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+
+            if result.returncode == 0:
+                return result.stdout
+            else:
+                self.logger.warning(f"Failed to read server logs: {result.stderr}")
+                return ""
+        except Exception as e:
+            self.logger.error(f"Failed to get server logs: {e}")
+            return ""
+
+    def run_test(self) -> bool:
+        """Test OpenRouter model functionality"""
+        try:
+            self.logger.info("Test: OpenRouter model functionality and alias mapping")
+
+            # Setup test files for later use
+            self.setup_test_files()
+
+            # Test 1: Flash alias mapping to OpenRouter
+            self.logger.info("  1: Testing 'flash' alias (should map to google/gemini-flash-1.5-8b)")
+
+            response1, continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Flash model!' and nothing else.",
+                    "model": "flash",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response1:
+                self.logger.error("  ❌ Flash alias test failed")
+                return False
+
+            self.logger.info("  ✅ Flash alias call completed")
+            if continuation_id:
+                self.logger.info(f"  ✅ Got continuation_id: {continuation_id}")
+
+            # Test 2: Pro alias mapping to OpenRouter
+            self.logger.info("  2: Testing 'pro' alias (should map to google/gemini-pro-1.5)")
+
+            response2, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Pro model!' and nothing else.",
+                    "model": "pro",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response2:
+                self.logger.error("  ❌ Pro alias test failed")
+                return False
+
+            self.logger.info("  ✅ Pro alias call completed")
+
+            # Test 3: O3 alias mapping to OpenRouter (should map to openai/gpt-4o)
+            self.logger.info("  3: Testing 'o3' alias (should map to openai/gpt-4o)")
+
+            response3, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from O3 model!' and nothing else.",
+                    "model": "o3",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response3:
+                self.logger.error("  ❌ O3 alias test failed")
+                return False
+
+            self.logger.info("  ✅ O3 alias call completed")
+
+            # Test 4: Direct OpenRouter model name
+            self.logger.info("  4: Testing direct OpenRouter model name (anthropic/claude-3-haiku)")
+
+            response4, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Claude Haiku!' and nothing else.",
+                    "model": "anthropic/claude-3-haiku",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response4:
+                self.logger.error("  ❌ Direct OpenRouter model test failed")
+                return False
+
+            self.logger.info("  ✅ Direct OpenRouter model call completed")
+
+            # Test 5: OpenRouter alias from config
+            self.logger.info("  5: Testing OpenRouter alias from config ('opus' -> anthropic/claude-3-opus)")
+
+            response5, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Say 'Hello from Opus!' and nothing else.",
+                    "model": "opus",
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response5:
+                self.logger.error("  ❌ OpenRouter alias test failed")
+                return False
+
+            self.logger.info("  ✅ OpenRouter alias call completed")
+
+            # Test 6: Conversation continuity with OpenRouter models
+            self.logger.info("  6: Testing conversation continuity with OpenRouter")
+
+            response6, new_continuation_id = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "Remember this number: 42. What number did I just tell you?",
+                    "model": "sonnet",  # Claude Sonnet via OpenRouter
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response6 or not new_continuation_id:
+                self.logger.error("  ❌ Failed to start conversation with continuation_id")
+                return False
+
+            # Continue the conversation
+            response7, _ = self.call_mcp_tool(
+                "chat",
+                {
+                    "prompt": "What was the number I told you earlier?",
+                    "model": "sonnet",
+                    "continuation_id": new_continuation_id,
+                    "temperature": 0.1,
+                },
+            )
+
+            if not response7:
+                self.logger.error("  ❌ Failed to continue conversation")
+                return False
+
+            # Check if the model remembered the number
+            if "42" in response7:
+                self.logger.info("  ✅ Conversation continuity working with OpenRouter")
+            else:
+                self.logger.warning("  ⚠️  Model may not have remembered the number")
+
+            # Test 7: Validate OpenRouter API usage from logs
+            self.logger.info("  7: Validating OpenRouter API usage in logs")
+            logs = self.get_recent_server_logs()
+
+            # Check for OpenRouter API calls
+            openrouter_logs = [line for line in logs.split("\n") if "openrouter" in line.lower()]
+            openrouter_api_logs = [line for line in logs.split("\n") if "openrouter.ai/api/v1" in line]
+            
+            # Check for specific model mappings
+            flash_mapping_logs = [
+                line for line in logs.split("\n") 
+                if ("flash" in line and "google/gemini-flash" in line) or
+                   ("Resolved model" in line and "google/gemini-flash" in line)
+            ]
+            
+            pro_mapping_logs = [
+                line for line in logs.split("\n") 
+                if ("pro" in line and "google/gemini-pro" in line) or
+                   ("Resolved model" in line and "google/gemini-pro" in line)
+            ]
+
+            # Log findings
+            self.logger.info(f"   OpenRouter-related logs: {len(openrouter_logs)}")
+            self.logger.info(f"   OpenRouter API logs: {len(openrouter_api_logs)}")
+            self.logger.info(f"   Flash mapping logs: {len(flash_mapping_logs)}")
+            self.logger.info(f"   Pro mapping logs: {len(pro_mapping_logs)}")
+
+            # Sample log output for debugging
+            if self.verbose and openrouter_logs:
+                self.logger.debug("  📋 Sample OpenRouter logs:")
+                for log in openrouter_logs[:5]:
+                    self.logger.debug(f"    {log}")
+
+            # Success criteria
+            openrouter_api_used = len(openrouter_api_logs) > 0
+            models_mapped = len(flash_mapping_logs) > 0 or len(pro_mapping_logs) > 0
+            
+            success_criteria = [
+                ("OpenRouter API calls made", openrouter_api_used),
+                ("Model aliases mapped correctly", models_mapped),
+                ("All model calls succeeded", True),  # We already checked this above
+            ]
+
+            passed_criteria = sum(1 for _, passed in success_criteria if passed)
+            self.logger.info(f"   Success criteria met: {passed_criteria}/{len(success_criteria)}")
+
+            for criterion, passed in success_criteria:
+                status = "✅" if passed else "❌"
+                self.logger.info(f"    {status} {criterion}")
+
+            if passed_criteria >= 2:  # At least 2 out of 3 criteria
+                self.logger.info("  ✅ OpenRouter model tests passed")
+                return True
+            else:
+                self.logger.error("  ❌ OpenRouter model tests failed")
+                return False
+
+        except Exception as e:
+            self.logger.error(f"OpenRouter model test failed: {e}")
+            return False
+        finally:
+            self.cleanup_test_files()
+
+
+def main():
+    """Run the OpenRouter model tests"""
+    import sys
+
+    verbose = "--verbose" in sys.argv or "-v" in sys.argv
+    test = OpenRouterModelsTest(verbose=verbose)
+
+    success = test.run_test()
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()