Breaking change: openrouter_models.json -> custom_models.json

* Support for Custom URLs and custom models, including locally hosted models such as ollama
* Support for native + openrouter + local models (i.e. dozens of models) means you can start delegating sub-tasks to particular models or work to local models such as localizations or other boring work etc.
* Several tests added
* precommit to also include untracked (new) files
* Logfile auto rollover
* Improved logging
This commit is contained in:
Fahad
2025-06-13 15:22:09 +04:00
parent f5fdf7b2ed
commit f44ca326ef
27 changed files with 1692 additions and 351 deletions

View File

@@ -1,19 +1,27 @@
{
"_README": {
"description": "OpenRouter model configuration for Zen MCP Server",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/openrouter.md",
"description": "Unified model configuration for multiple AI providers and endpoints, including OpenRouter",
"providers_supported": [
"OpenRouter - Access to GPT-4, Claude, Mistral, etc. via unified API",
"Custom API endpoints - Local models (Ollama, vLLM, LM Studio, etc.)",
"Self-hosted APIs - Any OpenAI-compatible endpoint"
],
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
"usage": "Models can be accessed via aliases (e.g., 'opus', 'local-llama') or full names (e.g., 'anthropic/claude-3-opus', 'llama3.2')",
"instructions": [
"Add new models by copying an existing entry and modifying it",
"Aliases are case-insensitive and should be unique across all models",
"context_window is the model's total context window size in tokens (input + output)",
"Set supports_* flags based on the model's actual capabilities",
"Models not listed here will use generic defaults (32K context window, basic features)"
"Models not listed here will use generic defaults (32K context window, basic features)",
"For OpenRouter models: Use official OpenRouter model names (e.g., 'anthropic/claude-3-opus')",
"For local/custom models: Use model names as they appear in your API (e.g., 'llama3.2', 'gpt-3.5-turbo')"
],
"field_descriptions": {
"model_name": "The official OpenRouter model identifier (e.g., 'anthropic/claude-3-opus')",
"model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-3-opus') or custom model name (e.g., 'llama3.2')",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter)",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"description": "Human-readable description of the model"
@@ -103,7 +111,7 @@
},
{
"model_name": "meta-llama/llama-3-70b",
"aliases": ["llama","llama3-70b", "llama-70b", "llama3"],
"aliases": ["llama", "llama3", "llama3-70b", "llama-70b", "llama3-openrouter"],
"context_window": 8192,
"supports_extended_thinking": false,
"supports_json_mode": false,
@@ -163,6 +171,15 @@
"supports_json_mode": true,
"supports_function_calling": true,
"description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems"
},
{
"model_name": "llama3.2",
"aliases": ["local-llama", "local", "llama3.2", "ollama-llama"],
"context_window": 128000,
"supports_extended_thinking": false,
"supports_json_mode": false,
"supports_function_calling": false,
"description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window"
}
]
}
}