diff --git a/conf/openrouter_models.json b/conf/openrouter_models.json index c88d3d2..1b97017 100644 --- a/conf/openrouter_models.json +++ b/conf/openrouter_models.json @@ -76,7 +76,7 @@ }, { "model_name": "google/gemini-pro-1.5", - "aliases": ["gemini-pro", "gemini", "pro-openrouter"], + "aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"], "context_window": 1048576, "supports_extended_thinking": false, "supports_json_mode": true, @@ -85,7 +85,7 @@ }, { "model_name": "google/gemini-flash-1.5-8b", - "aliases": ["gemini-flash", "flash-openrouter", "flash-8b"], + "aliases": ["flash","gemini-flash", "flash-openrouter", "flash-8b"], "context_window": 1048576, "supports_extended_thinking": false, "supports_json_mode": true, @@ -103,7 +103,7 @@ }, { "model_name": "meta-llama/llama-3-70b", - "aliases": ["llama3-70b", "llama-70b", "llama3"], + "aliases": ["llama","llama3-70b", "llama-70b", "llama3"], "context_window": 8192, "supports_extended_thinking": false, "supports_json_mode": false, @@ -136,6 +136,33 @@ "supports_json_mode": false, "supports_function_calling": false, "description": "Perplexity's online model with web search" + }, + { + "model_name": "openai/o3", + "aliases": ["o3"], + "context_window": 200000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "description": "OpenAI's o3 model - well-rounded and powerful across domains" + }, + { + "model_name": "openai/o3-mini", + "aliases": ["o3-mini", "o3mini"], + "context_window": 200000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "description": "OpenAI's o3-mini reasoning model - cost-efficient with STEM performance" + }, + { + "model_name": "openai/o3-mini-high", + "aliases": ["o3-mini-high", "o3mini-high"], + "context_window": 200000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems" } ] } \ No newline at end of file diff --git a/setup-docker.sh b/setup-docker.sh index a10aafa..e38543c 100755 --- a/setup-docker.sh +++ b/setup-docker.sh @@ -237,14 +237,14 @@ fi # Build and start services echo " - Building Zen MCP Server image..." -if $COMPOSE_CMD build --no-cache >/dev/null 2>&1; then +if $COMPOSE_CMD build >/dev/null 2>&1; then echo "✅ Docker image built successfully!" else echo "❌ Failed to build Docker image. Run '$COMPOSE_CMD build' manually to see errors." exit 1 fi -echo " - Starting Redis (needed for conversation memory)... please wait" +echo " - Starting all services (Redis + Zen MCP Server)..." if $COMPOSE_CMD up -d >/dev/null 2>&1; then echo "✅ Services started successfully!" else @@ -252,10 +252,6 @@ else exit 1 fi -# Wait for services to be healthy -echo " - Waiting for Redis to be ready..." -sleep 3 - # Check service status if $COMPOSE_CMD ps --format table | grep -q "Up" 2>/dev/null || false; then echo "✅ All services are running!" diff --git a/simulator_tests/__init__.py b/simulator_tests/__init__.py index 3b1bcac..58b76ec 100644 --- a/simulator_tests/__init__.py +++ b/simulator_tests/__init__.py @@ -14,6 +14,8 @@ from .test_cross_tool_continuation import CrossToolContinuationTest from .test_logs_validation import LogsValidationTest from .test_model_thinking_config import TestModelThinkingConfig from .test_o3_model_selection import O3ModelSelectionTest +from .test_openrouter_fallback import OpenRouterFallbackTest +from .test_openrouter_models import OpenRouterModelsTest from .test_per_tool_deduplication import PerToolDeduplicationTest from .test_redis_validation import RedisValidationTest from .test_token_allocation_validation import TokenAllocationValidationTest @@ -29,6 +31,8 @@ TEST_REGISTRY = { "redis_validation": RedisValidationTest, "model_thinking_config": TestModelThinkingConfig, "o3_model_selection": O3ModelSelectionTest, + "openrouter_fallback": OpenRouterFallbackTest, + "openrouter_models": OpenRouterModelsTest, "token_allocation_validation": TokenAllocationValidationTest, "conversation_chain_validation": ConversationChainValidationTest, } @@ -44,6 +48,8 @@ __all__ = [ "RedisValidationTest", "TestModelThinkingConfig", "O3ModelSelectionTest", + "OpenRouterFallbackTest", + "OpenRouterModelsTest", "TokenAllocationValidationTest", "ConversationChainValidationTest", "TEST_REGISTRY", diff --git a/simulator_tests/test_openrouter_fallback.py b/simulator_tests/test_openrouter_fallback.py new file mode 100644 index 0000000..48fc1d3 --- /dev/null +++ b/simulator_tests/test_openrouter_fallback.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +""" +OpenRouter Fallback Test + +Tests that verify the system correctly falls back to OpenRouter when: +- Only OPENROUTER_API_KEY is configured +- Native models (flash, pro) are requested but map to OpenRouter equivalents +- Auto mode correctly selects OpenRouter models +""" + +import json +import subprocess + +from .base_test import BaseSimulatorTest + + +class OpenRouterFallbackTest(BaseSimulatorTest): + """Test OpenRouter fallback behavior when it's the only provider""" + + @property + def test_name(self) -> str: + return "openrouter_fallback" + + @property + def test_description(self) -> str: + return "OpenRouter fallback behavior when only provider" + + def get_recent_server_logs(self) -> str: + """Get recent server logs from the log file directly""" + try: + cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"] + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + return result.stdout + else: + self.logger.warning(f"Failed to read server logs: {result.stderr}") + return "" + except Exception as e: + self.logger.error(f"Failed to get server logs: {e}") + return "" + + def run_test(self) -> bool: + """Test OpenRouter fallback behavior""" + try: + self.logger.info("Test: OpenRouter fallback behavior when only provider available") + + # Setup test files + self.setup_test_files() + + # Test 1: Auto mode should work with OpenRouter + self.logger.info(" 1: Testing auto mode with OpenRouter as only provider") + + response1, continuation_id = self.call_mcp_tool( + "chat", + { + "prompt": "What is 2 + 2? Give a brief answer.", + # No model specified - should use auto mode + "temperature": 0.1, + }, + ) + + if not response1: + self.logger.error(" ❌ Auto mode with OpenRouter failed") + return False + + self.logger.info(" ✅ Auto mode call completed with OpenRouter") + + # Test 2: Flash model should map to OpenRouter equivalent + self.logger.info(" 2: Testing flash model mapping to OpenRouter") + + # Use codereview tool to test a different tool type + test_code = """def calculate_sum(numbers): + total = 0 + for num in numbers: + total += num + return total""" + + test_file = self.create_additional_test_file("sum_function.py", test_code) + + response2, _ = self.call_mcp_tool( + "codereview", + { + "files": [test_file], + "prompt": "Quick review of this sum function", + "model": "flash", + "temperature": 0.1, + }, + ) + + if not response2: + self.logger.error(" ❌ Flash model mapping to OpenRouter failed") + return False + + self.logger.info(" ✅ Flash model successfully mapped to OpenRouter") + + # Test 3: Pro model should map to OpenRouter equivalent + self.logger.info(" 3: Testing pro model mapping to OpenRouter") + + response3, _ = self.call_mcp_tool( + "analyze", + { + "files": [self.test_files["python"]], + "prompt": "Analyze the structure of this Python code", + "model": "pro", + "temperature": 0.1, + }, + ) + + if not response3: + self.logger.error(" ❌ Pro model mapping to OpenRouter failed") + return False + + self.logger.info(" ✅ Pro model successfully mapped to OpenRouter") + + # Test 4: Debug tool with OpenRouter + self.logger.info(" 4: Testing debug tool with OpenRouter") + + response4, _ = self.call_mcp_tool( + "debug", + { + "prompt": "Why might a function return None instead of a value?", + "model": "flash", # Should map to OpenRouter + "temperature": 0.1, + }, + ) + + if not response4: + self.logger.error(" ❌ Debug tool with OpenRouter failed") + return False + + self.logger.info(" ✅ Debug tool working with OpenRouter") + + # Test 5: Validate logs show OpenRouter is being used + self.logger.info(" 5: Validating OpenRouter is the active provider") + logs = self.get_recent_server_logs() + + # Check for provider fallback logs + fallback_logs = [ + line for line in logs.split("\n") + if "No Gemini API key found" in line or + "No OpenAI API key found" in line or + "Only OpenRouter available" in line or + "Using OpenRouter" in line + ] + + # Check for OpenRouter provider initialization + provider_logs = [ + line for line in logs.split("\n") + if "OpenRouter provider" in line or + "OpenRouterProvider" in line or + "openrouter.ai/api/v1" in line + ] + + # Check for model resolution through OpenRouter + model_resolution_logs = [ + line for line in logs.split("\n") + if ("Resolved model" in line and "via OpenRouter" in line) or + ("Model alias" in line and "resolved to" in line) or + ("flash" in line and "gemini-flash" in line) or + ("pro" in line and "gemini-pro" in line) + ] + + # Log findings + self.logger.info(f" Fallback indication logs: {len(fallback_logs)}") + self.logger.info(f" OpenRouter provider logs: {len(provider_logs)}") + self.logger.info(f" Model resolution logs: {len(model_resolution_logs)}") + + # Sample logs for debugging + if self.verbose: + if fallback_logs: + self.logger.debug(" 📋 Sample fallback logs:") + for log in fallback_logs[:3]: + self.logger.debug(f" {log}") + + if provider_logs: + self.logger.debug(" 📋 Sample provider logs:") + for log in provider_logs[:3]: + self.logger.debug(f" {log}") + + # Success criteria + openrouter_active = len(provider_logs) > 0 + models_resolved = len(model_resolution_logs) > 0 + all_tools_worked = True # We checked this above + + success_criteria = [ + ("OpenRouter provider active", openrouter_active), + ("Models resolved through OpenRouter", models_resolved), + ("All tools worked with OpenRouter", all_tools_worked), + ] + + passed_criteria = sum(1 for _, passed in success_criteria if passed) + self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}") + + for criterion, passed in success_criteria: + status = "✅" if passed else "❌" + self.logger.info(f" {status} {criterion}") + + if passed_criteria >= 2: # At least 2 out of 3 criteria + self.logger.info(" ✅ OpenRouter fallback test passed") + return True + else: + self.logger.error(" ❌ OpenRouter fallback test failed") + return False + + except Exception as e: + self.logger.error(f"OpenRouter fallback test failed: {e}") + return False + finally: + self.cleanup_test_files() + + +def main(): + """Run the OpenRouter fallback tests""" + import sys + + verbose = "--verbose" in sys.argv or "-v" in sys.argv + test = OpenRouterFallbackTest(verbose=verbose) + + success = test.run_test() + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/simulator_tests/test_openrouter_models.py b/simulator_tests/test_openrouter_models.py new file mode 100644 index 0000000..1dcc281 --- /dev/null +++ b/simulator_tests/test_openrouter_models.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +""" +OpenRouter Model Tests + +Tests that verify OpenRouter functionality including: +- Model alias resolution (flash, pro, o3, etc. map to OpenRouter equivalents) +- Multiple OpenRouter models work correctly +- Conversation continuity works with OpenRouter models +- Error handling when models are not available +""" + +import json +import subprocess + +from .base_test import BaseSimulatorTest + + +class OpenRouterModelsTest(BaseSimulatorTest): + """Test OpenRouter model functionality and alias mapping""" + + @property + def test_name(self) -> str: + return "openrouter_models" + + @property + def test_description(self) -> str: + return "OpenRouter model functionality and alias mapping" + + def get_recent_server_logs(self) -> str: + """Get recent server logs from the log file directly""" + try: + # Read logs directly from the log file + cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"] + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + return result.stdout + else: + self.logger.warning(f"Failed to read server logs: {result.stderr}") + return "" + except Exception as e: + self.logger.error(f"Failed to get server logs: {e}") + return "" + + def run_test(self) -> bool: + """Test OpenRouter model functionality""" + try: + self.logger.info("Test: OpenRouter model functionality and alias mapping") + + # Setup test files for later use + self.setup_test_files() + + # Test 1: Flash alias mapping to OpenRouter + self.logger.info(" 1: Testing 'flash' alias (should map to google/gemini-flash-1.5-8b)") + + response1, continuation_id = self.call_mcp_tool( + "chat", + { + "prompt": "Say 'Hello from Flash model!' and nothing else.", + "model": "flash", + "temperature": 0.1, + }, + ) + + if not response1: + self.logger.error(" ❌ Flash alias test failed") + return False + + self.logger.info(" ✅ Flash alias call completed") + if continuation_id: + self.logger.info(f" ✅ Got continuation_id: {continuation_id}") + + # Test 2: Pro alias mapping to OpenRouter + self.logger.info(" 2: Testing 'pro' alias (should map to google/gemini-pro-1.5)") + + response2, _ = self.call_mcp_tool( + "chat", + { + "prompt": "Say 'Hello from Pro model!' and nothing else.", + "model": "pro", + "temperature": 0.1, + }, + ) + + if not response2: + self.logger.error(" ❌ Pro alias test failed") + return False + + self.logger.info(" ✅ Pro alias call completed") + + # Test 3: O3 alias mapping to OpenRouter (should map to openai/gpt-4o) + self.logger.info(" 3: Testing 'o3' alias (should map to openai/gpt-4o)") + + response3, _ = self.call_mcp_tool( + "chat", + { + "prompt": "Say 'Hello from O3 model!' and nothing else.", + "model": "o3", + "temperature": 0.1, + }, + ) + + if not response3: + self.logger.error(" ❌ O3 alias test failed") + return False + + self.logger.info(" ✅ O3 alias call completed") + + # Test 4: Direct OpenRouter model name + self.logger.info(" 4: Testing direct OpenRouter model name (anthropic/claude-3-haiku)") + + response4, _ = self.call_mcp_tool( + "chat", + { + "prompt": "Say 'Hello from Claude Haiku!' and nothing else.", + "model": "anthropic/claude-3-haiku", + "temperature": 0.1, + }, + ) + + if not response4: + self.logger.error(" ❌ Direct OpenRouter model test failed") + return False + + self.logger.info(" ✅ Direct OpenRouter model call completed") + + # Test 5: OpenRouter alias from config + self.logger.info(" 5: Testing OpenRouter alias from config ('opus' -> anthropic/claude-3-opus)") + + response5, _ = self.call_mcp_tool( + "chat", + { + "prompt": "Say 'Hello from Opus!' and nothing else.", + "model": "opus", + "temperature": 0.1, + }, + ) + + if not response5: + self.logger.error(" ❌ OpenRouter alias test failed") + return False + + self.logger.info(" ✅ OpenRouter alias call completed") + + # Test 6: Conversation continuity with OpenRouter models + self.logger.info(" 6: Testing conversation continuity with OpenRouter") + + response6, new_continuation_id = self.call_mcp_tool( + "chat", + { + "prompt": "Remember this number: 42. What number did I just tell you?", + "model": "sonnet", # Claude Sonnet via OpenRouter + "temperature": 0.1, + }, + ) + + if not response6 or not new_continuation_id: + self.logger.error(" ❌ Failed to start conversation with continuation_id") + return False + + # Continue the conversation + response7, _ = self.call_mcp_tool( + "chat", + { + "prompt": "What was the number I told you earlier?", + "model": "sonnet", + "continuation_id": new_continuation_id, + "temperature": 0.1, + }, + ) + + if not response7: + self.logger.error(" ❌ Failed to continue conversation") + return False + + # Check if the model remembered the number + if "42" in response7: + self.logger.info(" ✅ Conversation continuity working with OpenRouter") + else: + self.logger.warning(" ⚠️ Model may not have remembered the number") + + # Test 7: Validate OpenRouter API usage from logs + self.logger.info(" 7: Validating OpenRouter API usage in logs") + logs = self.get_recent_server_logs() + + # Check for OpenRouter API calls + openrouter_logs = [line for line in logs.split("\n") if "openrouter" in line.lower()] + openrouter_api_logs = [line for line in logs.split("\n") if "openrouter.ai/api/v1" in line] + + # Check for specific model mappings + flash_mapping_logs = [ + line for line in logs.split("\n") + if ("flash" in line and "google/gemini-flash" in line) or + ("Resolved model" in line and "google/gemini-flash" in line) + ] + + pro_mapping_logs = [ + line for line in logs.split("\n") + if ("pro" in line and "google/gemini-pro" in line) or + ("Resolved model" in line and "google/gemini-pro" in line) + ] + + # Log findings + self.logger.info(f" OpenRouter-related logs: {len(openrouter_logs)}") + self.logger.info(f" OpenRouter API logs: {len(openrouter_api_logs)}") + self.logger.info(f" Flash mapping logs: {len(flash_mapping_logs)}") + self.logger.info(f" Pro mapping logs: {len(pro_mapping_logs)}") + + # Sample log output for debugging + if self.verbose and openrouter_logs: + self.logger.debug(" 📋 Sample OpenRouter logs:") + for log in openrouter_logs[:5]: + self.logger.debug(f" {log}") + + # Success criteria + openrouter_api_used = len(openrouter_api_logs) > 0 + models_mapped = len(flash_mapping_logs) > 0 or len(pro_mapping_logs) > 0 + + success_criteria = [ + ("OpenRouter API calls made", openrouter_api_used), + ("Model aliases mapped correctly", models_mapped), + ("All model calls succeeded", True), # We already checked this above + ] + + passed_criteria = sum(1 for _, passed in success_criteria if passed) + self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}") + + for criterion, passed in success_criteria: + status = "✅" if passed else "❌" + self.logger.info(f" {status} {criterion}") + + if passed_criteria >= 2: # At least 2 out of 3 criteria + self.logger.info(" ✅ OpenRouter model tests passed") + return True + else: + self.logger.error(" ❌ OpenRouter model tests failed") + return False + + except Exception as e: + self.logger.error(f"OpenRouter model test failed: {e}") + return False + finally: + self.cleanup_test_files() + + +def main(): + """Run the OpenRouter model tests""" + import sys + + verbose = "--verbose" in sys.argv or "-v" in sys.argv + test = OpenRouterModelsTest(verbose=verbose) + + success = test.run_test() + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_model_mapping.py b/test_model_mapping.py new file mode 100755 index 0000000..5b6b585 --- /dev/null +++ b/test_model_mapping.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +Simple test script to demonstrate model mapping through the MCP server. +Tests how model aliases (flash, pro, o3) are mapped to OpenRouter models. +""" + +import subprocess +import json +import sys +from typing import Dict, Any + +def call_mcp_server(model: str, message: str = "Hello, which model are you?") -> Dict[str, Any]: + """Call the MCP server with a specific model and return the response.""" + + # Prepare the request + request = { + "jsonrpc": "2.0", + "method": "completion", + "params": { + "model": model, + "messages": [ + { + "role": "user", + "content": message + } + ], + "max_tokens": 100 + }, + "id": 1 + } + + # Call the server + cmd = [sys.executable, "server.py"] + + try: + # Send request to stdin and capture output + process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + stdout, stderr = process.communicate(input=json.dumps(request)) + + if process.returncode != 0: + return { + "error": f"Server returned non-zero exit code: {process.returncode}", + "stderr": stderr + } + + # Parse the response + try: + response = json.loads(stdout) + return response + except json.JSONDecodeError: + return { + "error": "Failed to parse JSON response", + "stdout": stdout, + "stderr": stderr + } + + except Exception as e: + return { + "error": f"Failed to call server: {str(e)}" + } + +def extract_model_info(response: Dict[str, Any]) -> Dict[str, str]: + """Extract model information from the response.""" + + if "error" in response: + return { + "status": "error", + "message": response.get("error", "Unknown error") + } + + # Look for result in the response + result = response.get("result", {}) + + # Extract relevant information + info = { + "status": "success", + "provider": "unknown", + "model": "unknown" + } + + # Try to find provider and model info in the response + # This might be in metadata or debug info depending on server implementation + if "metadata" in result: + metadata = result["metadata"] + info["provider"] = metadata.get("provider", "unknown") + info["model"] = metadata.get("model", "unknown") + + # Also check if the model info is in the response content itself + if "content" in result: + content = result["content"] + # Simple heuristic to detect OpenRouter models + if "openrouter" in content.lower() or any(x in content.lower() for x in ["claude", "gpt", "gemini"]): + info["provider"] = "openrouter" + + return info + +def main(): + """Test model mapping for different aliases.""" + + print("Model Mapping Test for MCP Server") + print("=" * 50) + print() + + # Test models + test_models = ["flash", "pro", "o3"] + + for model in test_models: + print(f"Testing model: '{model}'") + print("-" * 30) + + response = call_mcp_server(model) + model_info = extract_model_info(response) + + if model_info["status"] == "error": + print(f" ❌ Error: {model_info['message']}") + else: + print(f" ✓ Provider: {model_info['provider']}") + print(f" ✓ Model: {model_info['model']}") + + # Print raw response for debugging + if "--debug" in sys.argv: + print("\nDebug - Raw Response:") + print(json.dumps(response, indent=2)) + + print() + + print("\nNote: This test assumes the MCP server is configured with OpenRouter.") + print("The actual model mappings depend on the server configuration.") + +if __name__ == "__main__": + main() \ No newline at end of file