diff --git a/simulator_tests/test_o3_model_selection.py b/simulator_tests/test_o3_model_selection.py index 67fe13f..a70cda5 100644 --- a/simulator_tests/test_o3_model_selection.py +++ b/simulator_tests/test_o3_model_selection.py @@ -44,27 +44,33 @@ class O3ModelSelectionTest(BaseSimulatorTest): """Test O3 model selection and usage""" try: self.logger.info(" Test: O3 model selection and usage validation") - + # Check which API keys are configured - check_cmd = ["docker", "exec", self.container_name, "python", "-c", - "import os; print(f'OPENAI_KEY:{bool(os.environ.get(\"OPENAI_API_KEY\"))}|OPENROUTER_KEY:{bool(os.environ.get(\"OPENROUTER_API_KEY\"))}')"] + check_cmd = [ + "docker", + "exec", + self.container_name, + "python", + "-c", + 'import os; print(f\'OPENAI_KEY:{bool(os.environ.get("OPENAI_API_KEY"))}|OPENROUTER_KEY:{bool(os.environ.get("OPENROUTER_API_KEY"))}\')', + ] result = subprocess.run(check_cmd, capture_output=True, text=True) - + has_openai = False has_openrouter = False - + if result.returncode == 0: output = result.stdout.strip() if "OPENAI_KEY:True" in output: has_openai = True if "OPENROUTER_KEY:True" in output: has_openrouter = True - + # If only OpenRouter is configured, adjust test expectations if has_openrouter and not has_openai: self.logger.info(" ℹ️ Only OpenRouter configured - O3 models will be routed through OpenRouter") return self._run_openrouter_o3_test() - + # Original test for when OpenAI is configured self.logger.info(" ℹ️ OpenAI API configured - expecting direct OpenAI API calls") @@ -220,10 +226,10 @@ def multiply(x, y): try: # Setup test files self.setup_test_files() - + # Test 1: O3 model via OpenRouter self.logger.info(" 1: Testing O3 model via OpenRouter") - + response1, _ = self.call_mcp_tool( "chat", { @@ -232,16 +238,16 @@ def multiply(x, y): "temperature": 1.0, }, ) - + if not response1: self.logger.error(" ❌ O3 model test via OpenRouter failed") return False - + self.logger.info(" ✅ O3 model call via OpenRouter completed") - + # Test 2: O3-mini model via OpenRouter self.logger.info(" 2: Testing O3-mini model via OpenRouter") - + response2, _ = self.call_mcp_tool( "chat", { @@ -250,16 +256,16 @@ def multiply(x, y): "temperature": 1.0, }, ) - + if not response2: self.logger.error(" ❌ O3-mini model test via OpenRouter failed") return False - + self.logger.info(" ✅ O3-mini model call via OpenRouter completed") - + # Test 3: Codereview with O3 via OpenRouter self.logger.info(" 3: Testing O3 with codereview tool via OpenRouter") - + test_code = """def add(a, b): return a + b @@ -267,7 +273,7 @@ def multiply(x, y): return x * y """ test_file = self.create_additional_test_file("simple_math.py", test_code) - + response3, _ = self.call_mcp_tool( "codereview", { @@ -277,53 +283,61 @@ def multiply(x, y): "temperature": 1.0, }, ) - + if not response3: self.logger.error(" ❌ O3 with codereview tool via OpenRouter failed") return False - + self.logger.info(" ✅ O3 with codereview tool via OpenRouter completed") - + # Validate OpenRouter usage in logs self.logger.info(" 4: Validating OpenRouter usage in logs") logs = self.get_recent_server_logs() - + # Check for OpenRouter API calls - openrouter_api_logs = [line for line in logs.split("\n") if "openrouter" in line.lower() and ("API" in line or "request" in line)] - + openrouter_api_logs = [ + line + for line in logs.split("\n") + if "openrouter" in line.lower() and ("API" in line or "request" in line) + ] + # Check for model resolution through OpenRouter - openrouter_model_logs = [line for line in logs.split("\n") if "openrouter" in line.lower() and ("o3" in line or "model" in line)] - + openrouter_model_logs = [ + line for line in logs.split("\n") if "openrouter" in line.lower() and ("o3" in line or "model" in line) + ] + # Check for successful responses - openrouter_response_logs = [line for line in logs.split("\n") if "openrouter" in line.lower() and "response" in line] - + openrouter_response_logs = [ + line for line in logs.split("\n") if "openrouter" in line.lower() and "response" in line + ] + self.logger.info(f" OpenRouter API logs: {len(openrouter_api_logs)}") self.logger.info(f" OpenRouter model logs: {len(openrouter_model_logs)}") self.logger.info(f" OpenRouter response logs: {len(openrouter_response_logs)}") - + # Success criteria for OpenRouter openrouter_used = len(openrouter_api_logs) >= 3 or len(openrouter_model_logs) >= 3 all_calls_succeeded = response1 and response2 and response3 - + success_criteria = [ ("All O3 model calls succeeded", all_calls_succeeded), ("OpenRouter provider was used", openrouter_used), ] - + passed_criteria = sum(1 for _, passed in success_criteria if passed) self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}") - + for criterion, passed in success_criteria: status = "✅" if passed else "❌" self.logger.info(f" {status} {criterion}") - + if passed_criteria == len(success_criteria): self.logger.info(" ✅ O3 model selection via OpenRouter passed") return True else: self.logger.error(" ❌ O3 model selection via OpenRouter failed") return False - + except Exception as e: self.logger.error(f"OpenRouter O3 test failed: {e}") return False diff --git a/simulator_tests/test_openrouter_fallback.py b/simulator_tests/test_openrouter_fallback.py index 48fc1d3..570e5df 100644 --- a/simulator_tests/test_openrouter_fallback.py +++ b/simulator_tests/test_openrouter_fallback.py @@ -8,7 +8,6 @@ Tests that verify the system correctly falls back to OpenRouter when: - Auto mode correctly selects OpenRouter models """ -import json import subprocess from .base_test import BaseSimulatorTest @@ -45,6 +44,22 @@ class OpenRouterFallbackTest(BaseSimulatorTest): try: self.logger.info("Test: OpenRouter fallback behavior when only provider available") + # Check if OpenRouter API key is configured + check_cmd = [ + "docker", + "exec", + self.container_name, + "python", + "-c", + 'import os; print("OPENROUTER_KEY:" + str(bool(os.environ.get("OPENROUTER_API_KEY"))))', + ] + result = subprocess.run(check_cmd, capture_output=True, text=True) + + if result.returncode == 0 and "OPENROUTER_KEY:False" in result.stdout: + self.logger.info(" ⚠️ OpenRouter API key not configured - skipping test") + self.logger.info(" ℹ️ This test requires OPENROUTER_API_KEY to be set in .env") + return True # Return True to indicate test is skipped, not failed + # Setup test files self.setup_test_files() @@ -75,7 +90,7 @@ class OpenRouterFallbackTest(BaseSimulatorTest): for num in numbers: total += num return total""" - + test_file = self.create_additional_test_file("sum_function.py", test_code) response2, _ = self.call_mcp_tool( @@ -137,28 +152,29 @@ class OpenRouterFallbackTest(BaseSimulatorTest): # Check for provider fallback logs fallback_logs = [ - line for line in logs.split("\n") - if "No Gemini API key found" in line or - "No OpenAI API key found" in line or - "Only OpenRouter available" in line or - "Using OpenRouter" in line + line + for line in logs.split("\n") + if "No Gemini API key found" in line + or "No OpenAI API key found" in line + or "Only OpenRouter available" in line + or "Using OpenRouter" in line ] # Check for OpenRouter provider initialization provider_logs = [ - line for line in logs.split("\n") - if "OpenRouter provider" in line or - "OpenRouterProvider" in line or - "openrouter.ai/api/v1" in line + line + for line in logs.split("\n") + if "OpenRouter provider" in line or "OpenRouterProvider" in line or "openrouter.ai/api/v1" in line ] # Check for model resolution through OpenRouter model_resolution_logs = [ - line for line in logs.split("\n") - if ("Resolved model" in line and "via OpenRouter" in line) or - ("Model alias" in line and "resolved to" in line) or - ("flash" in line and "gemini-flash" in line) or - ("pro" in line and "gemini-pro" in line) + line + for line in logs.split("\n") + if ("Resolved model" in line and "via OpenRouter" in line) + or ("Model alias" in line and "resolved to" in line) + or ("flash" in line and "gemini-flash" in line) + or ("pro" in line and "gemini-pro" in line) ] # Log findings @@ -172,7 +188,7 @@ class OpenRouterFallbackTest(BaseSimulatorTest): self.logger.debug(" 📋 Sample fallback logs:") for log in fallback_logs[:3]: self.logger.debug(f" {log}") - + if provider_logs: self.logger.debug(" 📋 Sample provider logs:") for log in provider_logs[:3]: @@ -222,4 +238,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/simulator_tests/test_openrouter_models.py b/simulator_tests/test_openrouter_models.py index 1dcc281..1da13d4 100644 --- a/simulator_tests/test_openrouter_models.py +++ b/simulator_tests/test_openrouter_models.py @@ -9,7 +9,6 @@ Tests that verify OpenRouter functionality including: - Error handling when models are not available """ -import json import subprocess from .base_test import BaseSimulatorTest @@ -47,6 +46,22 @@ class OpenRouterModelsTest(BaseSimulatorTest): try: self.logger.info("Test: OpenRouter model functionality and alias mapping") + # Check if OpenRouter API key is configured + check_cmd = [ + "docker", + "exec", + self.container_name, + "python", + "-c", + 'import os; print("OPENROUTER_KEY:" + str(bool(os.environ.get("OPENROUTER_API_KEY"))))', + ] + result = subprocess.run(check_cmd, capture_output=True, text=True) + + if result.returncode == 0 and "OPENROUTER_KEY:False" in result.stdout: + self.logger.info(" ⚠️ OpenRouter API key not configured - skipping test") + self.logger.info(" ℹ️ This test requires OPENROUTER_API_KEY to be set in .env") + return True # Return True to indicate test is skipped, not failed + # Setup test files for later use self.setup_test_files() @@ -186,18 +201,20 @@ class OpenRouterModelsTest(BaseSimulatorTest): # Check for OpenRouter API calls openrouter_logs = [line for line in logs.split("\n") if "openrouter" in line.lower()] openrouter_api_logs = [line for line in logs.split("\n") if "openrouter.ai/api/v1" in line] - + # Check for specific model mappings flash_mapping_logs = [ - line for line in logs.split("\n") - if ("flash" in line and "google/gemini-flash" in line) or - ("Resolved model" in line and "google/gemini-flash" in line) + line + for line in logs.split("\n") + if ("flash" in line and "google/gemini-flash" in line) + or ("Resolved model" in line and "google/gemini-flash" in line) ] - + pro_mapping_logs = [ - line for line in logs.split("\n") - if ("pro" in line and "google/gemini-pro" in line) or - ("Resolved model" in line and "google/gemini-pro" in line) + line + for line in logs.split("\n") + if ("pro" in line and "google/gemini-pro" in line) + or ("Resolved model" in line and "google/gemini-pro" in line) ] # Log findings @@ -215,7 +232,7 @@ class OpenRouterModelsTest(BaseSimulatorTest): # Success criteria openrouter_api_used = len(openrouter_api_logs) > 0 models_mapped = len(flash_mapping_logs) > 0 or len(pro_mapping_logs) > 0 - + success_criteria = [ ("OpenRouter API calls made", openrouter_api_used), ("Model aliases mapped correctly", models_mapped), @@ -255,4 +272,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test_model_mapping.py b/test_model_mapping.py index 5b6b585..c3705ad 100755 --- a/test_model_mapping.py +++ b/test_model_mapping.py @@ -4,135 +4,110 @@ Simple test script to demonstrate model mapping through the MCP server. Tests how model aliases (flash, pro, o3) are mapped to OpenRouter models. """ -import subprocess import json +import subprocess import sys -from typing import Dict, Any +from typing import Any -def call_mcp_server(model: str, message: str = "Hello, which model are you?") -> Dict[str, Any]: + +def call_mcp_server(model: str, message: str = "Hello, which model are you?") -> dict[str, Any]: """Call the MCP server with a specific model and return the response.""" - + # Prepare the request request = { "jsonrpc": "2.0", "method": "completion", - "params": { - "model": model, - "messages": [ - { - "role": "user", - "content": message - } - ], - "max_tokens": 100 - }, - "id": 1 + "params": {"model": model, "messages": [{"role": "user", "content": message}], "max_tokens": 100}, + "id": 1, } - + # Call the server cmd = [sys.executable, "server.py"] - + try: # Send request to stdin and capture output process = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) - + stdout, stderr = process.communicate(input=json.dumps(request)) - + if process.returncode != 0: - return { - "error": f"Server returned non-zero exit code: {process.returncode}", - "stderr": stderr - } - + return {"error": f"Server returned non-zero exit code: {process.returncode}", "stderr": stderr} + # Parse the response try: response = json.loads(stdout) return response except json.JSONDecodeError: - return { - "error": "Failed to parse JSON response", - "stdout": stdout, - "stderr": stderr - } - - except Exception as e: - return { - "error": f"Failed to call server: {str(e)}" - } + return {"error": "Failed to parse JSON response", "stdout": stdout, "stderr": stderr} -def extract_model_info(response: Dict[str, Any]) -> Dict[str, str]: + except Exception as e: + return {"error": f"Failed to call server: {str(e)}"} + + +def extract_model_info(response: dict[str, Any]) -> dict[str, str]: """Extract model information from the response.""" - + if "error" in response: - return { - "status": "error", - "message": response.get("error", "Unknown error") - } - + return {"status": "error", "message": response.get("error", "Unknown error")} + # Look for result in the response result = response.get("result", {}) - + # Extract relevant information - info = { - "status": "success", - "provider": "unknown", - "model": "unknown" - } - + info = {"status": "success", "provider": "unknown", "model": "unknown"} + # Try to find provider and model info in the response # This might be in metadata or debug info depending on server implementation if "metadata" in result: metadata = result["metadata"] info["provider"] = metadata.get("provider", "unknown") info["model"] = metadata.get("model", "unknown") - + # Also check if the model info is in the response content itself if "content" in result: content = result["content"] # Simple heuristic to detect OpenRouter models if "openrouter" in content.lower() or any(x in content.lower() for x in ["claude", "gpt", "gemini"]): info["provider"] = "openrouter" - + return info + def main(): """Test model mapping for different aliases.""" - + print("Model Mapping Test for MCP Server") print("=" * 50) print() - + # Test models test_models = ["flash", "pro", "o3"] - + for model in test_models: print(f"Testing model: '{model}'") print("-" * 30) - + response = call_mcp_server(model) model_info = extract_model_info(response) - + if model_info["status"] == "error": print(f" ❌ Error: {model_info['message']}") else: print(f" ✓ Provider: {model_info['provider']}") print(f" ✓ Model: {model_info['model']}") - + # Print raw response for debugging if "--debug" in sys.argv: print("\nDebug - Raw Response:") print(json.dumps(response, indent=2)) - + print() - + print("\nNote: This test assumes the MCP server is configured with OpenRouter.") print("The actual model mappings depend on the server configuration.") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py index a32d41a..600400d 100644 --- a/tests/test_openrouter_provider.py +++ b/tests/test_openrouter_provider.py @@ -193,5 +193,5 @@ class TestOpenRouterFunctionality: provider = OpenRouterProvider(api_key="test-key") # Registry should be initialized - assert hasattr(provider, '_registry') + assert hasattr(provider, "_registry") assert provider._registry is not None diff --git a/tools/base.py b/tools/base.py index 1fdcbf0..a6ea035 100644 --- a/tools/base.py +++ b/tools/base.py @@ -186,7 +186,7 @@ class BaseTool(ABC): # Import registry directly to show available aliases # This works even without an API key from providers.openrouter_registry import OpenRouterModelRegistry - + registry = OpenRouterModelRegistry() aliases = registry.list_aliases() @@ -195,9 +195,7 @@ class BaseTool(ABC): # Show all aliases so Claude knows every option available all_aliases = sorted(aliases) alias_list = ", ".join(f"'{a}'" for a in all_aliases) - model_desc_parts.append( - f"\nOpenRouter models available via aliases: {alias_list}" - ) + model_desc_parts.append(f"\nOpenRouter models available via aliases: {alias_list}") else: model_desc_parts.append( "\nOpenRouter models: If configured, you can also use ANY model available on OpenRouter." @@ -225,7 +223,7 @@ class BaseTool(ABC): # Import registry directly to show available aliases # This works even without an API key from providers.openrouter_registry import OpenRouterModelRegistry - + registry = OpenRouterModelRegistry() aliases = registry.list_aliases()