New openrouter tests

Fixed flash aliases
More models
This commit is contained in:
Fahad
2025-06-13 07:00:53 +04:00
parent 2cdb92460b
commit 8cbbe94417
6 changed files with 659 additions and 9 deletions

View File

@@ -76,7 +76,7 @@
},
{
"model_name": "google/gemini-pro-1.5",
"aliases": ["gemini-pro", "gemini", "pro-openrouter"],
"aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"],
"context_window": 1048576,
"supports_extended_thinking": false,
"supports_json_mode": true,
@@ -85,7 +85,7 @@
},
{
"model_name": "google/gemini-flash-1.5-8b",
"aliases": ["gemini-flash", "flash-openrouter", "flash-8b"],
"aliases": ["flash","gemini-flash", "flash-openrouter", "flash-8b"],
"context_window": 1048576,
"supports_extended_thinking": false,
"supports_json_mode": true,
@@ -103,7 +103,7 @@
},
{
"model_name": "meta-llama/llama-3-70b",
"aliases": ["llama3-70b", "llama-70b", "llama3"],
"aliases": ["llama","llama3-70b", "llama-70b", "llama3"],
"context_window": 8192,
"supports_extended_thinking": false,
"supports_json_mode": false,
@@ -136,6 +136,33 @@
"supports_json_mode": false,
"supports_function_calling": false,
"description": "Perplexity's online model with web search"
},
{
"model_name": "openai/o3",
"aliases": ["o3"],
"context_window": 200000,
"supports_extended_thinking": false,
"supports_json_mode": true,
"supports_function_calling": true,
"description": "OpenAI's o3 model - well-rounded and powerful across domains"
},
{
"model_name": "openai/o3-mini",
"aliases": ["o3-mini", "o3mini"],
"context_window": 200000,
"supports_extended_thinking": false,
"supports_json_mode": true,
"supports_function_calling": true,
"description": "OpenAI's o3-mini reasoning model - cost-efficient with STEM performance"
},
{
"model_name": "openai/o3-mini-high",
"aliases": ["o3-mini-high", "o3mini-high"],
"context_window": 200000,
"supports_extended_thinking": false,
"supports_json_mode": true,
"supports_function_calling": true,
"description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems"
}
]
}

View File

@@ -237,14 +237,14 @@ fi
# Build and start services
echo " - Building Zen MCP Server image..."
if $COMPOSE_CMD build --no-cache >/dev/null 2>&1; then
if $COMPOSE_CMD build >/dev/null 2>&1; then
echo "✅ Docker image built successfully!"
else
echo "❌ Failed to build Docker image. Run '$COMPOSE_CMD build' manually to see errors."
exit 1
fi
echo " - Starting Redis (needed for conversation memory)... please wait"
echo " - Starting all services (Redis + Zen MCP Server)..."
if $COMPOSE_CMD up -d >/dev/null 2>&1; then
echo "✅ Services started successfully!"
else
@@ -252,10 +252,6 @@ else
exit 1
fi
# Wait for services to be healthy
echo " - Waiting for Redis to be ready..."
sleep 3
# Check service status
if $COMPOSE_CMD ps --format table | grep -q "Up" 2>/dev/null || false; then
echo "✅ All services are running!"

View File

@@ -14,6 +14,8 @@ from .test_cross_tool_continuation import CrossToolContinuationTest
from .test_logs_validation import LogsValidationTest
from .test_model_thinking_config import TestModelThinkingConfig
from .test_o3_model_selection import O3ModelSelectionTest
from .test_openrouter_fallback import OpenRouterFallbackTest
from .test_openrouter_models import OpenRouterModelsTest
from .test_per_tool_deduplication import PerToolDeduplicationTest
from .test_redis_validation import RedisValidationTest
from .test_token_allocation_validation import TokenAllocationValidationTest
@@ -29,6 +31,8 @@ TEST_REGISTRY = {
"redis_validation": RedisValidationTest,
"model_thinking_config": TestModelThinkingConfig,
"o3_model_selection": O3ModelSelectionTest,
"openrouter_fallback": OpenRouterFallbackTest,
"openrouter_models": OpenRouterModelsTest,
"token_allocation_validation": TokenAllocationValidationTest,
"conversation_chain_validation": ConversationChainValidationTest,
}
@@ -44,6 +48,8 @@ __all__ = [
"RedisValidationTest",
"TestModelThinkingConfig",
"O3ModelSelectionTest",
"OpenRouterFallbackTest",
"OpenRouterModelsTest",
"TokenAllocationValidationTest",
"ConversationChainValidationTest",
"TEST_REGISTRY",

View File

@@ -0,0 +1,225 @@
#!/usr/bin/env python3
"""
OpenRouter Fallback Test
Tests that verify the system correctly falls back to OpenRouter when:
- Only OPENROUTER_API_KEY is configured
- Native models (flash, pro) are requested but map to OpenRouter equivalents
- Auto mode correctly selects OpenRouter models
"""
import json
import subprocess
from .base_test import BaseSimulatorTest
class OpenRouterFallbackTest(BaseSimulatorTest):
"""Test OpenRouter fallback behavior when it's the only provider"""
@property
def test_name(self) -> str:
return "openrouter_fallback"
@property
def test_description(self) -> str:
return "OpenRouter fallback behavior when only provider"
def get_recent_server_logs(self) -> str:
"""Get recent server logs from the log file directly"""
try:
cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
return result.stdout
else:
self.logger.warning(f"Failed to read server logs: {result.stderr}")
return ""
except Exception as e:
self.logger.error(f"Failed to get server logs: {e}")
return ""
def run_test(self) -> bool:
"""Test OpenRouter fallback behavior"""
try:
self.logger.info("Test: OpenRouter fallback behavior when only provider available")
# Setup test files
self.setup_test_files()
# Test 1: Auto mode should work with OpenRouter
self.logger.info(" 1: Testing auto mode with OpenRouter as only provider")
response1, continuation_id = self.call_mcp_tool(
"chat",
{
"prompt": "What is 2 + 2? Give a brief answer.",
# No model specified - should use auto mode
"temperature": 0.1,
},
)
if not response1:
self.logger.error(" ❌ Auto mode with OpenRouter failed")
return False
self.logger.info(" ✅ Auto mode call completed with OpenRouter")
# Test 2: Flash model should map to OpenRouter equivalent
self.logger.info(" 2: Testing flash model mapping to OpenRouter")
# Use codereview tool to test a different tool type
test_code = """def calculate_sum(numbers):
total = 0
for num in numbers:
total += num
return total"""
test_file = self.create_additional_test_file("sum_function.py", test_code)
response2, _ = self.call_mcp_tool(
"codereview",
{
"files": [test_file],
"prompt": "Quick review of this sum function",
"model": "flash",
"temperature": 0.1,
},
)
if not response2:
self.logger.error(" ❌ Flash model mapping to OpenRouter failed")
return False
self.logger.info(" ✅ Flash model successfully mapped to OpenRouter")
# Test 3: Pro model should map to OpenRouter equivalent
self.logger.info(" 3: Testing pro model mapping to OpenRouter")
response3, _ = self.call_mcp_tool(
"analyze",
{
"files": [self.test_files["python"]],
"prompt": "Analyze the structure of this Python code",
"model": "pro",
"temperature": 0.1,
},
)
if not response3:
self.logger.error(" ❌ Pro model mapping to OpenRouter failed")
return False
self.logger.info(" ✅ Pro model successfully mapped to OpenRouter")
# Test 4: Debug tool with OpenRouter
self.logger.info(" 4: Testing debug tool with OpenRouter")
response4, _ = self.call_mcp_tool(
"debug",
{
"prompt": "Why might a function return None instead of a value?",
"model": "flash", # Should map to OpenRouter
"temperature": 0.1,
},
)
if not response4:
self.logger.error(" ❌ Debug tool with OpenRouter failed")
return False
self.logger.info(" ✅ Debug tool working with OpenRouter")
# Test 5: Validate logs show OpenRouter is being used
self.logger.info(" 5: Validating OpenRouter is the active provider")
logs = self.get_recent_server_logs()
# Check for provider fallback logs
fallback_logs = [
line for line in logs.split("\n")
if "No Gemini API key found" in line or
"No OpenAI API key found" in line or
"Only OpenRouter available" in line or
"Using OpenRouter" in line
]
# Check for OpenRouter provider initialization
provider_logs = [
line for line in logs.split("\n")
if "OpenRouter provider" in line or
"OpenRouterProvider" in line or
"openrouter.ai/api/v1" in line
]
# Check for model resolution through OpenRouter
model_resolution_logs = [
line for line in logs.split("\n")
if ("Resolved model" in line and "via OpenRouter" in line) or
("Model alias" in line and "resolved to" in line) or
("flash" in line and "gemini-flash" in line) or
("pro" in line and "gemini-pro" in line)
]
# Log findings
self.logger.info(f" Fallback indication logs: {len(fallback_logs)}")
self.logger.info(f" OpenRouter provider logs: {len(provider_logs)}")
self.logger.info(f" Model resolution logs: {len(model_resolution_logs)}")
# Sample logs for debugging
if self.verbose:
if fallback_logs:
self.logger.debug(" 📋 Sample fallback logs:")
for log in fallback_logs[:3]:
self.logger.debug(f" {log}")
if provider_logs:
self.logger.debug(" 📋 Sample provider logs:")
for log in provider_logs[:3]:
self.logger.debug(f" {log}")
# Success criteria
openrouter_active = len(provider_logs) > 0
models_resolved = len(model_resolution_logs) > 0
all_tools_worked = True # We checked this above
success_criteria = [
("OpenRouter provider active", openrouter_active),
("Models resolved through OpenRouter", models_resolved),
("All tools worked with OpenRouter", all_tools_worked),
]
passed_criteria = sum(1 for _, passed in success_criteria if passed)
self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}")
for criterion, passed in success_criteria:
status = "" if passed else ""
self.logger.info(f" {status} {criterion}")
if passed_criteria >= 2: # At least 2 out of 3 criteria
self.logger.info(" ✅ OpenRouter fallback test passed")
return True
else:
self.logger.error(" ❌ OpenRouter fallback test failed")
return False
except Exception as e:
self.logger.error(f"OpenRouter fallback test failed: {e}")
return False
finally:
self.cleanup_test_files()
def main():
"""Run the OpenRouter fallback tests"""
import sys
verbose = "--verbose" in sys.argv or "-v" in sys.argv
test = OpenRouterFallbackTest(verbose=verbose)
success = test.run_test()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,258 @@
#!/usr/bin/env python3
"""
OpenRouter Model Tests
Tests that verify OpenRouter functionality including:
- Model alias resolution (flash, pro, o3, etc. map to OpenRouter equivalents)
- Multiple OpenRouter models work correctly
- Conversation continuity works with OpenRouter models
- Error handling when models are not available
"""
import json
import subprocess
from .base_test import BaseSimulatorTest
class OpenRouterModelsTest(BaseSimulatorTest):
"""Test OpenRouter model functionality and alias mapping"""
@property
def test_name(self) -> str:
return "openrouter_models"
@property
def test_description(self) -> str:
return "OpenRouter model functionality and alias mapping"
def get_recent_server_logs(self) -> str:
"""Get recent server logs from the log file directly"""
try:
# Read logs directly from the log file
cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
return result.stdout
else:
self.logger.warning(f"Failed to read server logs: {result.stderr}")
return ""
except Exception as e:
self.logger.error(f"Failed to get server logs: {e}")
return ""
def run_test(self) -> bool:
"""Test OpenRouter model functionality"""
try:
self.logger.info("Test: OpenRouter model functionality and alias mapping")
# Setup test files for later use
self.setup_test_files()
# Test 1: Flash alias mapping to OpenRouter
self.logger.info(" 1: Testing 'flash' alias (should map to google/gemini-flash-1.5-8b)")
response1, continuation_id = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from Flash model!' and nothing else.",
"model": "flash",
"temperature": 0.1,
},
)
if not response1:
self.logger.error(" ❌ Flash alias test failed")
return False
self.logger.info(" ✅ Flash alias call completed")
if continuation_id:
self.logger.info(f" ✅ Got continuation_id: {continuation_id}")
# Test 2: Pro alias mapping to OpenRouter
self.logger.info(" 2: Testing 'pro' alias (should map to google/gemini-pro-1.5)")
response2, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from Pro model!' and nothing else.",
"model": "pro",
"temperature": 0.1,
},
)
if not response2:
self.logger.error(" ❌ Pro alias test failed")
return False
self.logger.info(" ✅ Pro alias call completed")
# Test 3: O3 alias mapping to OpenRouter (should map to openai/gpt-4o)
self.logger.info(" 3: Testing 'o3' alias (should map to openai/gpt-4o)")
response3, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from O3 model!' and nothing else.",
"model": "o3",
"temperature": 0.1,
},
)
if not response3:
self.logger.error(" ❌ O3 alias test failed")
return False
self.logger.info(" ✅ O3 alias call completed")
# Test 4: Direct OpenRouter model name
self.logger.info(" 4: Testing direct OpenRouter model name (anthropic/claude-3-haiku)")
response4, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from Claude Haiku!' and nothing else.",
"model": "anthropic/claude-3-haiku",
"temperature": 0.1,
},
)
if not response4:
self.logger.error(" ❌ Direct OpenRouter model test failed")
return False
self.logger.info(" ✅ Direct OpenRouter model call completed")
# Test 5: OpenRouter alias from config
self.logger.info(" 5: Testing OpenRouter alias from config ('opus' -> anthropic/claude-3-opus)")
response5, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from Opus!' and nothing else.",
"model": "opus",
"temperature": 0.1,
},
)
if not response5:
self.logger.error(" ❌ OpenRouter alias test failed")
return False
self.logger.info(" ✅ OpenRouter alias call completed")
# Test 6: Conversation continuity with OpenRouter models
self.logger.info(" 6: Testing conversation continuity with OpenRouter")
response6, new_continuation_id = self.call_mcp_tool(
"chat",
{
"prompt": "Remember this number: 42. What number did I just tell you?",
"model": "sonnet", # Claude Sonnet via OpenRouter
"temperature": 0.1,
},
)
if not response6 or not new_continuation_id:
self.logger.error(" ❌ Failed to start conversation with continuation_id")
return False
# Continue the conversation
response7, _ = self.call_mcp_tool(
"chat",
{
"prompt": "What was the number I told you earlier?",
"model": "sonnet",
"continuation_id": new_continuation_id,
"temperature": 0.1,
},
)
if not response7:
self.logger.error(" ❌ Failed to continue conversation")
return False
# Check if the model remembered the number
if "42" in response7:
self.logger.info(" ✅ Conversation continuity working with OpenRouter")
else:
self.logger.warning(" ⚠️ Model may not have remembered the number")
# Test 7: Validate OpenRouter API usage from logs
self.logger.info(" 7: Validating OpenRouter API usage in logs")
logs = self.get_recent_server_logs()
# Check for OpenRouter API calls
openrouter_logs = [line for line in logs.split("\n") if "openrouter" in line.lower()]
openrouter_api_logs = [line for line in logs.split("\n") if "openrouter.ai/api/v1" in line]
# Check for specific model mappings
flash_mapping_logs = [
line for line in logs.split("\n")
if ("flash" in line and "google/gemini-flash" in line) or
("Resolved model" in line and "google/gemini-flash" in line)
]
pro_mapping_logs = [
line for line in logs.split("\n")
if ("pro" in line and "google/gemini-pro" in line) or
("Resolved model" in line and "google/gemini-pro" in line)
]
# Log findings
self.logger.info(f" OpenRouter-related logs: {len(openrouter_logs)}")
self.logger.info(f" OpenRouter API logs: {len(openrouter_api_logs)}")
self.logger.info(f" Flash mapping logs: {len(flash_mapping_logs)}")
self.logger.info(f" Pro mapping logs: {len(pro_mapping_logs)}")
# Sample log output for debugging
if self.verbose and openrouter_logs:
self.logger.debug(" 📋 Sample OpenRouter logs:")
for log in openrouter_logs[:5]:
self.logger.debug(f" {log}")
# Success criteria
openrouter_api_used = len(openrouter_api_logs) > 0
models_mapped = len(flash_mapping_logs) > 0 or len(pro_mapping_logs) > 0
success_criteria = [
("OpenRouter API calls made", openrouter_api_used),
("Model aliases mapped correctly", models_mapped),
("All model calls succeeded", True), # We already checked this above
]
passed_criteria = sum(1 for _, passed in success_criteria if passed)
self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}")
for criterion, passed in success_criteria:
status = "" if passed else ""
self.logger.info(f" {status} {criterion}")
if passed_criteria >= 2: # At least 2 out of 3 criteria
self.logger.info(" ✅ OpenRouter model tests passed")
return True
else:
self.logger.error(" ❌ OpenRouter model tests failed")
return False
except Exception as e:
self.logger.error(f"OpenRouter model test failed: {e}")
return False
finally:
self.cleanup_test_files()
def main():
"""Run the OpenRouter model tests"""
import sys
verbose = "--verbose" in sys.argv or "-v" in sys.argv
test = OpenRouterModelsTest(verbose=verbose)
success = test.run_test()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()

138
test_model_mapping.py Executable file
View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python3
"""
Simple test script to demonstrate model mapping through the MCP server.
Tests how model aliases (flash, pro, o3) are mapped to OpenRouter models.
"""
import subprocess
import json
import sys
from typing import Dict, Any
def call_mcp_server(model: str, message: str = "Hello, which model are you?") -> Dict[str, Any]:
"""Call the MCP server with a specific model and return the response."""
# Prepare the request
request = {
"jsonrpc": "2.0",
"method": "completion",
"params": {
"model": model,
"messages": [
{
"role": "user",
"content": message
}
],
"max_tokens": 100
},
"id": 1
}
# Call the server
cmd = [sys.executable, "server.py"]
try:
# Send request to stdin and capture output
process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
stdout, stderr = process.communicate(input=json.dumps(request))
if process.returncode != 0:
return {
"error": f"Server returned non-zero exit code: {process.returncode}",
"stderr": stderr
}
# Parse the response
try:
response = json.loads(stdout)
return response
except json.JSONDecodeError:
return {
"error": "Failed to parse JSON response",
"stdout": stdout,
"stderr": stderr
}
except Exception as e:
return {
"error": f"Failed to call server: {str(e)}"
}
def extract_model_info(response: Dict[str, Any]) -> Dict[str, str]:
"""Extract model information from the response."""
if "error" in response:
return {
"status": "error",
"message": response.get("error", "Unknown error")
}
# Look for result in the response
result = response.get("result", {})
# Extract relevant information
info = {
"status": "success",
"provider": "unknown",
"model": "unknown"
}
# Try to find provider and model info in the response
# This might be in metadata or debug info depending on server implementation
if "metadata" in result:
metadata = result["metadata"]
info["provider"] = metadata.get("provider", "unknown")
info["model"] = metadata.get("model", "unknown")
# Also check if the model info is in the response content itself
if "content" in result:
content = result["content"]
# Simple heuristic to detect OpenRouter models
if "openrouter" in content.lower() or any(x in content.lower() for x in ["claude", "gpt", "gemini"]):
info["provider"] = "openrouter"
return info
def main():
"""Test model mapping for different aliases."""
print("Model Mapping Test for MCP Server")
print("=" * 50)
print()
# Test models
test_models = ["flash", "pro", "o3"]
for model in test_models:
print(f"Testing model: '{model}'")
print("-" * 30)
response = call_mcp_server(model)
model_info = extract_model_info(response)
if model_info["status"] == "error":
print(f" ❌ Error: {model_info['message']}")
else:
print(f" ✓ Provider: {model_info['provider']}")
print(f" ✓ Model: {model_info['model']}")
# Print raw response for debugging
if "--debug" in sys.argv:
print("\nDebug - Raw Response:")
print(json.dumps(response, indent=2))
print()
print("\nNote: This test assumes the MCP server is configured with OpenRouter.")
print("The actual model mappings depend on the server configuration.")
if __name__ == "__main__":
main()