New openrouter tests
Fixed flash aliases More models
This commit is contained in:
@@ -76,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"model_name": "google/gemini-pro-1.5",
|
||||
"aliases": ["gemini-pro", "gemini", "pro-openrouter"],
|
||||
"aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"],
|
||||
"context_window": 1048576,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_json_mode": true,
|
||||
@@ -85,7 +85,7 @@
|
||||
},
|
||||
{
|
||||
"model_name": "google/gemini-flash-1.5-8b",
|
||||
"aliases": ["gemini-flash", "flash-openrouter", "flash-8b"],
|
||||
"aliases": ["flash","gemini-flash", "flash-openrouter", "flash-8b"],
|
||||
"context_window": 1048576,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_json_mode": true,
|
||||
@@ -103,7 +103,7 @@
|
||||
},
|
||||
{
|
||||
"model_name": "meta-llama/llama-3-70b",
|
||||
"aliases": ["llama3-70b", "llama-70b", "llama3"],
|
||||
"aliases": ["llama","llama3-70b", "llama-70b", "llama3"],
|
||||
"context_window": 8192,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_json_mode": false,
|
||||
@@ -136,6 +136,33 @@
|
||||
"supports_json_mode": false,
|
||||
"supports_function_calling": false,
|
||||
"description": "Perplexity's online model with web search"
|
||||
},
|
||||
{
|
||||
"model_name": "openai/o3",
|
||||
"aliases": ["o3"],
|
||||
"context_window": 200000,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_json_mode": true,
|
||||
"supports_function_calling": true,
|
||||
"description": "OpenAI's o3 model - well-rounded and powerful across domains"
|
||||
},
|
||||
{
|
||||
"model_name": "openai/o3-mini",
|
||||
"aliases": ["o3-mini", "o3mini"],
|
||||
"context_window": 200000,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_json_mode": true,
|
||||
"supports_function_calling": true,
|
||||
"description": "OpenAI's o3-mini reasoning model - cost-efficient with STEM performance"
|
||||
},
|
||||
{
|
||||
"model_name": "openai/o3-mini-high",
|
||||
"aliases": ["o3-mini-high", "o3mini-high"],
|
||||
"context_window": 200000,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_json_mode": true,
|
||||
"supports_function_calling": true,
|
||||
"description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -237,14 +237,14 @@ fi
|
||||
|
||||
# Build and start services
|
||||
echo " - Building Zen MCP Server image..."
|
||||
if $COMPOSE_CMD build --no-cache >/dev/null 2>&1; then
|
||||
if $COMPOSE_CMD build >/dev/null 2>&1; then
|
||||
echo "✅ Docker image built successfully!"
|
||||
else
|
||||
echo "❌ Failed to build Docker image. Run '$COMPOSE_CMD build' manually to see errors."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo " - Starting Redis (needed for conversation memory)... please wait"
|
||||
echo " - Starting all services (Redis + Zen MCP Server)..."
|
||||
if $COMPOSE_CMD up -d >/dev/null 2>&1; then
|
||||
echo "✅ Services started successfully!"
|
||||
else
|
||||
@@ -252,10 +252,6 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Wait for services to be healthy
|
||||
echo " - Waiting for Redis to be ready..."
|
||||
sleep 3
|
||||
|
||||
# Check service status
|
||||
if $COMPOSE_CMD ps --format table | grep -q "Up" 2>/dev/null || false; then
|
||||
echo "✅ All services are running!"
|
||||
|
||||
@@ -14,6 +14,8 @@ from .test_cross_tool_continuation import CrossToolContinuationTest
|
||||
from .test_logs_validation import LogsValidationTest
|
||||
from .test_model_thinking_config import TestModelThinkingConfig
|
||||
from .test_o3_model_selection import O3ModelSelectionTest
|
||||
from .test_openrouter_fallback import OpenRouterFallbackTest
|
||||
from .test_openrouter_models import OpenRouterModelsTest
|
||||
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
||||
from .test_redis_validation import RedisValidationTest
|
||||
from .test_token_allocation_validation import TokenAllocationValidationTest
|
||||
@@ -29,6 +31,8 @@ TEST_REGISTRY = {
|
||||
"redis_validation": RedisValidationTest,
|
||||
"model_thinking_config": TestModelThinkingConfig,
|
||||
"o3_model_selection": O3ModelSelectionTest,
|
||||
"openrouter_fallback": OpenRouterFallbackTest,
|
||||
"openrouter_models": OpenRouterModelsTest,
|
||||
"token_allocation_validation": TokenAllocationValidationTest,
|
||||
"conversation_chain_validation": ConversationChainValidationTest,
|
||||
}
|
||||
@@ -44,6 +48,8 @@ __all__ = [
|
||||
"RedisValidationTest",
|
||||
"TestModelThinkingConfig",
|
||||
"O3ModelSelectionTest",
|
||||
"OpenRouterFallbackTest",
|
||||
"OpenRouterModelsTest",
|
||||
"TokenAllocationValidationTest",
|
||||
"ConversationChainValidationTest",
|
||||
"TEST_REGISTRY",
|
||||
|
||||
225
simulator_tests/test_openrouter_fallback.py
Normal file
225
simulator_tests/test_openrouter_fallback.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
OpenRouter Fallback Test
|
||||
|
||||
Tests that verify the system correctly falls back to OpenRouter when:
|
||||
- Only OPENROUTER_API_KEY is configured
|
||||
- Native models (flash, pro) are requested but map to OpenRouter equivalents
|
||||
- Auto mode correctly selects OpenRouter models
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from .base_test import BaseSimulatorTest
|
||||
|
||||
|
||||
class OpenRouterFallbackTest(BaseSimulatorTest):
|
||||
"""Test OpenRouter fallback behavior when it's the only provider"""
|
||||
|
||||
@property
|
||||
def test_name(self) -> str:
|
||||
return "openrouter_fallback"
|
||||
|
||||
@property
|
||||
def test_description(self) -> str:
|
||||
return "OpenRouter fallback behavior when only provider"
|
||||
|
||||
def get_recent_server_logs(self) -> str:
|
||||
"""Get recent server logs from the log file directly"""
|
||||
try:
|
||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
else:
|
||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get server logs: {e}")
|
||||
return ""
|
||||
|
||||
def run_test(self) -> bool:
|
||||
"""Test OpenRouter fallback behavior"""
|
||||
try:
|
||||
self.logger.info("Test: OpenRouter fallback behavior when only provider available")
|
||||
|
||||
# Setup test files
|
||||
self.setup_test_files()
|
||||
|
||||
# Test 1: Auto mode should work with OpenRouter
|
||||
self.logger.info(" 1: Testing auto mode with OpenRouter as only provider")
|
||||
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "What is 2 + 2? Give a brief answer.",
|
||||
# No model specified - should use auto mode
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response1:
|
||||
self.logger.error(" ❌ Auto mode with OpenRouter failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Auto mode call completed with OpenRouter")
|
||||
|
||||
# Test 2: Flash model should map to OpenRouter equivalent
|
||||
self.logger.info(" 2: Testing flash model mapping to OpenRouter")
|
||||
|
||||
# Use codereview tool to test a different tool type
|
||||
test_code = """def calculate_sum(numbers):
|
||||
total = 0
|
||||
for num in numbers:
|
||||
total += num
|
||||
return total"""
|
||||
|
||||
test_file = self.create_additional_test_file("sum_function.py", test_code)
|
||||
|
||||
response2, _ = self.call_mcp_tool(
|
||||
"codereview",
|
||||
{
|
||||
"files": [test_file],
|
||||
"prompt": "Quick review of this sum function",
|
||||
"model": "flash",
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response2:
|
||||
self.logger.error(" ❌ Flash model mapping to OpenRouter failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Flash model successfully mapped to OpenRouter")
|
||||
|
||||
# Test 3: Pro model should map to OpenRouter equivalent
|
||||
self.logger.info(" 3: Testing pro model mapping to OpenRouter")
|
||||
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"analyze",
|
||||
{
|
||||
"files": [self.test_files["python"]],
|
||||
"prompt": "Analyze the structure of this Python code",
|
||||
"model": "pro",
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error(" ❌ Pro model mapping to OpenRouter failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Pro model successfully mapped to OpenRouter")
|
||||
|
||||
# Test 4: Debug tool with OpenRouter
|
||||
self.logger.info(" 4: Testing debug tool with OpenRouter")
|
||||
|
||||
response4, _ = self.call_mcp_tool(
|
||||
"debug",
|
||||
{
|
||||
"prompt": "Why might a function return None instead of a value?",
|
||||
"model": "flash", # Should map to OpenRouter
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response4:
|
||||
self.logger.error(" ❌ Debug tool with OpenRouter failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Debug tool working with OpenRouter")
|
||||
|
||||
# Test 5: Validate logs show OpenRouter is being used
|
||||
self.logger.info(" 5: Validating OpenRouter is the active provider")
|
||||
logs = self.get_recent_server_logs()
|
||||
|
||||
# Check for provider fallback logs
|
||||
fallback_logs = [
|
||||
line for line in logs.split("\n")
|
||||
if "No Gemini API key found" in line or
|
||||
"No OpenAI API key found" in line or
|
||||
"Only OpenRouter available" in line or
|
||||
"Using OpenRouter" in line
|
||||
]
|
||||
|
||||
# Check for OpenRouter provider initialization
|
||||
provider_logs = [
|
||||
line for line in logs.split("\n")
|
||||
if "OpenRouter provider" in line or
|
||||
"OpenRouterProvider" in line or
|
||||
"openrouter.ai/api/v1" in line
|
||||
]
|
||||
|
||||
# Check for model resolution through OpenRouter
|
||||
model_resolution_logs = [
|
||||
line for line in logs.split("\n")
|
||||
if ("Resolved model" in line and "via OpenRouter" in line) or
|
||||
("Model alias" in line and "resolved to" in line) or
|
||||
("flash" in line and "gemini-flash" in line) or
|
||||
("pro" in line and "gemini-pro" in line)
|
||||
]
|
||||
|
||||
# Log findings
|
||||
self.logger.info(f" Fallback indication logs: {len(fallback_logs)}")
|
||||
self.logger.info(f" OpenRouter provider logs: {len(provider_logs)}")
|
||||
self.logger.info(f" Model resolution logs: {len(model_resolution_logs)}")
|
||||
|
||||
# Sample logs for debugging
|
||||
if self.verbose:
|
||||
if fallback_logs:
|
||||
self.logger.debug(" 📋 Sample fallback logs:")
|
||||
for log in fallback_logs[:3]:
|
||||
self.logger.debug(f" {log}")
|
||||
|
||||
if provider_logs:
|
||||
self.logger.debug(" 📋 Sample provider logs:")
|
||||
for log in provider_logs[:3]:
|
||||
self.logger.debug(f" {log}")
|
||||
|
||||
# Success criteria
|
||||
openrouter_active = len(provider_logs) > 0
|
||||
models_resolved = len(model_resolution_logs) > 0
|
||||
all_tools_worked = True # We checked this above
|
||||
|
||||
success_criteria = [
|
||||
("OpenRouter provider active", openrouter_active),
|
||||
("Models resolved through OpenRouter", models_resolved),
|
||||
("All tools worked with OpenRouter", all_tools_worked),
|
||||
]
|
||||
|
||||
passed_criteria = sum(1 for _, passed in success_criteria if passed)
|
||||
self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}")
|
||||
|
||||
for criterion, passed in success_criteria:
|
||||
status = "✅" if passed else "❌"
|
||||
self.logger.info(f" {status} {criterion}")
|
||||
|
||||
if passed_criteria >= 2: # At least 2 out of 3 criteria
|
||||
self.logger.info(" ✅ OpenRouter fallback test passed")
|
||||
return True
|
||||
else:
|
||||
self.logger.error(" ❌ OpenRouter fallback test failed")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"OpenRouter fallback test failed: {e}")
|
||||
return False
|
||||
finally:
|
||||
self.cleanup_test_files()
|
||||
|
||||
|
||||
def main():
|
||||
"""Run the OpenRouter fallback tests"""
|
||||
import sys
|
||||
|
||||
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
||||
test = OpenRouterFallbackTest(verbose=verbose)
|
||||
|
||||
success = test.run_test()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
258
simulator_tests/test_openrouter_models.py
Normal file
258
simulator_tests/test_openrouter_models.py
Normal file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
OpenRouter Model Tests
|
||||
|
||||
Tests that verify OpenRouter functionality including:
|
||||
- Model alias resolution (flash, pro, o3, etc. map to OpenRouter equivalents)
|
||||
- Multiple OpenRouter models work correctly
|
||||
- Conversation continuity works with OpenRouter models
|
||||
- Error handling when models are not available
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from .base_test import BaseSimulatorTest
|
||||
|
||||
|
||||
class OpenRouterModelsTest(BaseSimulatorTest):
|
||||
"""Test OpenRouter model functionality and alias mapping"""
|
||||
|
||||
@property
|
||||
def test_name(self) -> str:
|
||||
return "openrouter_models"
|
||||
|
||||
@property
|
||||
def test_description(self) -> str:
|
||||
return "OpenRouter model functionality and alias mapping"
|
||||
|
||||
def get_recent_server_logs(self) -> str:
|
||||
"""Get recent server logs from the log file directly"""
|
||||
try:
|
||||
# Read logs directly from the log file
|
||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
else:
|
||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get server logs: {e}")
|
||||
return ""
|
||||
|
||||
def run_test(self) -> bool:
|
||||
"""Test OpenRouter model functionality"""
|
||||
try:
|
||||
self.logger.info("Test: OpenRouter model functionality and alias mapping")
|
||||
|
||||
# Setup test files for later use
|
||||
self.setup_test_files()
|
||||
|
||||
# Test 1: Flash alias mapping to OpenRouter
|
||||
self.logger.info(" 1: Testing 'flash' alias (should map to google/gemini-flash-1.5-8b)")
|
||||
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "Say 'Hello from Flash model!' and nothing else.",
|
||||
"model": "flash",
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response1:
|
||||
self.logger.error(" ❌ Flash alias test failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Flash alias call completed")
|
||||
if continuation_id:
|
||||
self.logger.info(f" ✅ Got continuation_id: {continuation_id}")
|
||||
|
||||
# Test 2: Pro alias mapping to OpenRouter
|
||||
self.logger.info(" 2: Testing 'pro' alias (should map to google/gemini-pro-1.5)")
|
||||
|
||||
response2, _ = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "Say 'Hello from Pro model!' and nothing else.",
|
||||
"model": "pro",
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response2:
|
||||
self.logger.error(" ❌ Pro alias test failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Pro alias call completed")
|
||||
|
||||
# Test 3: O3 alias mapping to OpenRouter (should map to openai/gpt-4o)
|
||||
self.logger.info(" 3: Testing 'o3' alias (should map to openai/gpt-4o)")
|
||||
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "Say 'Hello from O3 model!' and nothing else.",
|
||||
"model": "o3",
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error(" ❌ O3 alias test failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ O3 alias call completed")
|
||||
|
||||
# Test 4: Direct OpenRouter model name
|
||||
self.logger.info(" 4: Testing direct OpenRouter model name (anthropic/claude-3-haiku)")
|
||||
|
||||
response4, _ = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "Say 'Hello from Claude Haiku!' and nothing else.",
|
||||
"model": "anthropic/claude-3-haiku",
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response4:
|
||||
self.logger.error(" ❌ Direct OpenRouter model test failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Direct OpenRouter model call completed")
|
||||
|
||||
# Test 5: OpenRouter alias from config
|
||||
self.logger.info(" 5: Testing OpenRouter alias from config ('opus' -> anthropic/claude-3-opus)")
|
||||
|
||||
response5, _ = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "Say 'Hello from Opus!' and nothing else.",
|
||||
"model": "opus",
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response5:
|
||||
self.logger.error(" ❌ OpenRouter alias test failed")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ OpenRouter alias call completed")
|
||||
|
||||
# Test 6: Conversation continuity with OpenRouter models
|
||||
self.logger.info(" 6: Testing conversation continuity with OpenRouter")
|
||||
|
||||
response6, new_continuation_id = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "Remember this number: 42. What number did I just tell you?",
|
||||
"model": "sonnet", # Claude Sonnet via OpenRouter
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response6 or not new_continuation_id:
|
||||
self.logger.error(" ❌ Failed to start conversation with continuation_id")
|
||||
return False
|
||||
|
||||
# Continue the conversation
|
||||
response7, _ = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "What was the number I told you earlier?",
|
||||
"model": "sonnet",
|
||||
"continuation_id": new_continuation_id,
|
||||
"temperature": 0.1,
|
||||
},
|
||||
)
|
||||
|
||||
if not response7:
|
||||
self.logger.error(" ❌ Failed to continue conversation")
|
||||
return False
|
||||
|
||||
# Check if the model remembered the number
|
||||
if "42" in response7:
|
||||
self.logger.info(" ✅ Conversation continuity working with OpenRouter")
|
||||
else:
|
||||
self.logger.warning(" ⚠️ Model may not have remembered the number")
|
||||
|
||||
# Test 7: Validate OpenRouter API usage from logs
|
||||
self.logger.info(" 7: Validating OpenRouter API usage in logs")
|
||||
logs = self.get_recent_server_logs()
|
||||
|
||||
# Check for OpenRouter API calls
|
||||
openrouter_logs = [line for line in logs.split("\n") if "openrouter" in line.lower()]
|
||||
openrouter_api_logs = [line for line in logs.split("\n") if "openrouter.ai/api/v1" in line]
|
||||
|
||||
# Check for specific model mappings
|
||||
flash_mapping_logs = [
|
||||
line for line in logs.split("\n")
|
||||
if ("flash" in line and "google/gemini-flash" in line) or
|
||||
("Resolved model" in line and "google/gemini-flash" in line)
|
||||
]
|
||||
|
||||
pro_mapping_logs = [
|
||||
line for line in logs.split("\n")
|
||||
if ("pro" in line and "google/gemini-pro" in line) or
|
||||
("Resolved model" in line and "google/gemini-pro" in line)
|
||||
]
|
||||
|
||||
# Log findings
|
||||
self.logger.info(f" OpenRouter-related logs: {len(openrouter_logs)}")
|
||||
self.logger.info(f" OpenRouter API logs: {len(openrouter_api_logs)}")
|
||||
self.logger.info(f" Flash mapping logs: {len(flash_mapping_logs)}")
|
||||
self.logger.info(f" Pro mapping logs: {len(pro_mapping_logs)}")
|
||||
|
||||
# Sample log output for debugging
|
||||
if self.verbose and openrouter_logs:
|
||||
self.logger.debug(" 📋 Sample OpenRouter logs:")
|
||||
for log in openrouter_logs[:5]:
|
||||
self.logger.debug(f" {log}")
|
||||
|
||||
# Success criteria
|
||||
openrouter_api_used = len(openrouter_api_logs) > 0
|
||||
models_mapped = len(flash_mapping_logs) > 0 or len(pro_mapping_logs) > 0
|
||||
|
||||
success_criteria = [
|
||||
("OpenRouter API calls made", openrouter_api_used),
|
||||
("Model aliases mapped correctly", models_mapped),
|
||||
("All model calls succeeded", True), # We already checked this above
|
||||
]
|
||||
|
||||
passed_criteria = sum(1 for _, passed in success_criteria if passed)
|
||||
self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}")
|
||||
|
||||
for criterion, passed in success_criteria:
|
||||
status = "✅" if passed else "❌"
|
||||
self.logger.info(f" {status} {criterion}")
|
||||
|
||||
if passed_criteria >= 2: # At least 2 out of 3 criteria
|
||||
self.logger.info(" ✅ OpenRouter model tests passed")
|
||||
return True
|
||||
else:
|
||||
self.logger.error(" ❌ OpenRouter model tests failed")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"OpenRouter model test failed: {e}")
|
||||
return False
|
||||
finally:
|
||||
self.cleanup_test_files()
|
||||
|
||||
|
||||
def main():
|
||||
"""Run the OpenRouter model tests"""
|
||||
import sys
|
||||
|
||||
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
||||
test = OpenRouterModelsTest(verbose=verbose)
|
||||
|
||||
success = test.run_test()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
138
test_model_mapping.py
Executable file
138
test_model_mapping.py
Executable file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple test script to demonstrate model mapping through the MCP server.
|
||||
Tests how model aliases (flash, pro, o3) are mapped to OpenRouter models.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import sys
|
||||
from typing import Dict, Any
|
||||
|
||||
def call_mcp_server(model: str, message: str = "Hello, which model are you?") -> Dict[str, Any]:
|
||||
"""Call the MCP server with a specific model and return the response."""
|
||||
|
||||
# Prepare the request
|
||||
request = {
|
||||
"jsonrpc": "2.0",
|
||||
"method": "completion",
|
||||
"params": {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": message
|
||||
}
|
||||
],
|
||||
"max_tokens": 100
|
||||
},
|
||||
"id": 1
|
||||
}
|
||||
|
||||
# Call the server
|
||||
cmd = [sys.executable, "server.py"]
|
||||
|
||||
try:
|
||||
# Send request to stdin and capture output
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
|
||||
stdout, stderr = process.communicate(input=json.dumps(request))
|
||||
|
||||
if process.returncode != 0:
|
||||
return {
|
||||
"error": f"Server returned non-zero exit code: {process.returncode}",
|
||||
"stderr": stderr
|
||||
}
|
||||
|
||||
# Parse the response
|
||||
try:
|
||||
response = json.loads(stdout)
|
||||
return response
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"error": "Failed to parse JSON response",
|
||||
"stdout": stdout,
|
||||
"stderr": stderr
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"error": f"Failed to call server: {str(e)}"
|
||||
}
|
||||
|
||||
def extract_model_info(response: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Extract model information from the response."""
|
||||
|
||||
if "error" in response:
|
||||
return {
|
||||
"status": "error",
|
||||
"message": response.get("error", "Unknown error")
|
||||
}
|
||||
|
||||
# Look for result in the response
|
||||
result = response.get("result", {})
|
||||
|
||||
# Extract relevant information
|
||||
info = {
|
||||
"status": "success",
|
||||
"provider": "unknown",
|
||||
"model": "unknown"
|
||||
}
|
||||
|
||||
# Try to find provider and model info in the response
|
||||
# This might be in metadata or debug info depending on server implementation
|
||||
if "metadata" in result:
|
||||
metadata = result["metadata"]
|
||||
info["provider"] = metadata.get("provider", "unknown")
|
||||
info["model"] = metadata.get("model", "unknown")
|
||||
|
||||
# Also check if the model info is in the response content itself
|
||||
if "content" in result:
|
||||
content = result["content"]
|
||||
# Simple heuristic to detect OpenRouter models
|
||||
if "openrouter" in content.lower() or any(x in content.lower() for x in ["claude", "gpt", "gemini"]):
|
||||
info["provider"] = "openrouter"
|
||||
|
||||
return info
|
||||
|
||||
def main():
|
||||
"""Test model mapping for different aliases."""
|
||||
|
||||
print("Model Mapping Test for MCP Server")
|
||||
print("=" * 50)
|
||||
print()
|
||||
|
||||
# Test models
|
||||
test_models = ["flash", "pro", "o3"]
|
||||
|
||||
for model in test_models:
|
||||
print(f"Testing model: '{model}'")
|
||||
print("-" * 30)
|
||||
|
||||
response = call_mcp_server(model)
|
||||
model_info = extract_model_info(response)
|
||||
|
||||
if model_info["status"] == "error":
|
||||
print(f" ❌ Error: {model_info['message']}")
|
||||
else:
|
||||
print(f" ✓ Provider: {model_info['provider']}")
|
||||
print(f" ✓ Model: {model_info['model']}")
|
||||
|
||||
# Print raw response for debugging
|
||||
if "--debug" in sys.argv:
|
||||
print("\nDebug - Raw Response:")
|
||||
print(json.dumps(response, indent=2))
|
||||
|
||||
print()
|
||||
|
||||
print("\nNote: This test assumes the MCP server is configured with OpenRouter.")
|
||||
print("The actual model mappings depend on the server configuration.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user