feat: all native providers now read from catalog files like OpenRouter / Custom configs. Allows for greater control over the capabilities

This commit is contained in:
Fahad
2025-10-07 12:17:47 +04:00
parent 7d7c74b5a3
commit 2a706d5720
13 changed files with 704 additions and 397 deletions

View File

@@ -18,6 +18,7 @@
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the deployment must call Azure's /responses endpoint (O-series reasoning models). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}

111
conf/gemini_models.json Normal file
View File

@@ -0,0 +1,111 @@
{
"_README": {
"description": "Model metadata for Google's Gemini API access.",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
"usage": "Models listed here are exposed directly through the Gemini provider. Aliases are case-insensitive.",
"field_notes": "Matches providers/shared/model_capabilities.py.",
"field_descriptions": {
"model_name": "The model identifier (e.g., 'gemini-2.5-pro', 'gemini-2.0-flash')",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}
},
"models": [
{
"model_name": "gemini-2.5-pro",
"friendly_name": "Gemini (Pro 2.5)",
"aliases": [
"pro",
"gemini pro",
"gemini-pro"
],
"intelligence_score": 18,
"description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
"context_window": 1048576,
"max_output_tokens": 65536,
"max_thinking_tokens": 32768,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 32.0
},
{
"model_name": "gemini-2.0-flash",
"friendly_name": "Gemini (Flash 2.0)",
"aliases": [
"flash-2.0",
"flash2"
],
"intelligence_score": 9,
"description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
"context_window": 1048576,
"max_output_tokens": 65536,
"max_thinking_tokens": 24576,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
},
{
"model_name": "gemini-2.0-flash-lite",
"friendly_name": "Gemini (Flash Lite 2.0)",
"aliases": [
"flashlite",
"flash-lite"
],
"intelligence_score": 7,
"description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
"context_window": 1048576,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": false,
"supports_temperature": true
},
{
"model_name": "gemini-2.5-flash",
"friendly_name": "Gemini (Flash 2.5)",
"aliases": [
"flash",
"flash2.5"
],
"intelligence_score": 10,
"description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
"context_window": 1048576,
"max_output_tokens": 65536,
"max_thinking_tokens": 24576,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
}
]
}

235
conf/openai_models.json Normal file
View File

@@ -0,0 +1,235 @@
{
"_README": {
"description": "Model metadata for native OpenAI API access.",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
"usage": "Models listed here are exposed directly through the OpenAI provider. Aliases are case-insensitive.",
"field_notes": "Matches providers/shared/model_capabilities.py.",
"field_descriptions": {
"model_name": "The model identifier (e.g., 'gpt-5', 'o3-pro')",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}
},
"models": [
{
"model_name": "gpt-5",
"friendly_name": "OpenAI (GPT-5)",
"aliases": [
"gpt5",
"gpt-5"
],
"intelligence_score": 16,
"description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": false,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-5-pro",
"friendly_name": "OpenAI (GPT-5 Pro)",
"aliases": [
"gpt5pro",
"gpt5-pro"
],
"intelligence_score": 18,
"description": "GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support",
"context_window": 400000,
"max_output_tokens": 272000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": false,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"use_openai_response_api": true,
"default_reasoning_effort": "high",
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-5-mini",
"friendly_name": "OpenAI (GPT-5-mini)",
"aliases": [
"gpt5-mini",
"gpt5mini",
"mini"
],
"intelligence_score": 15,
"description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": false,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-5-nano",
"friendly_name": "OpenAI (GPT-5 nano)",
"aliases": [
"gpt5nano",
"gpt5-nano",
"nano"
],
"intelligence_score": 13,
"description": "GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "o3",
"friendly_name": "OpenAI (O3)",
"intelligence_score": 14,
"description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
"context_window": 200000,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "o3-mini",
"friendly_name": "OpenAI (O3-mini)",
"aliases": [
"o3mini"
],
"intelligence_score": 12,
"description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
"context_window": 200000,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "o3-pro",
"friendly_name": "OpenAI (O3-Pro)",
"aliases": [
"o3pro"
],
"intelligence_score": 15,
"description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
"context_window": 200000,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"use_openai_response_api": true,
"temperature_constraint": "fixed"
},
{
"model_name": "o4-mini",
"friendly_name": "OpenAI (O4-mini)",
"aliases": [
"o4mini"
],
"intelligence_score": 11,
"description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
"context_window": 200000,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-4.1",
"friendly_name": "OpenAI (GPT 4.1)",
"aliases": [
"gpt4.1"
],
"intelligence_score": 13,
"description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window",
"context_window": 1000000,
"max_output_tokens": 32768,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
},
{
"model_name": "gpt-5-codex",
"friendly_name": "OpenAI (GPT-5 Codex)",
"aliases": [
"gpt5-codex",
"codex",
"gpt-5-code",
"gpt5-code"
],
"intelligence_score": 17,
"description": "GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"use_openai_response_api": true
}
]
}

87
conf/xai_models.json Normal file
View File

@@ -0,0 +1,87 @@
{
"_README": {
"description": "Model metadata for X.AI (GROK) API access.",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
"usage": "Models listed here are exposed directly through the X.AI provider. Aliases are case-insensitive.",
"field_notes": "Matches providers/shared/model_capabilities.py.",
"field_descriptions": {
"model_name": "The model identifier (e.g., 'grok-4', 'grok-3-fast')",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}
},
"models": [
{
"model_name": "grok-4",
"friendly_name": "X.AI (Grok 4)",
"aliases": [
"grok",
"grok4",
"grok-4"
],
"intelligence_score": 16,
"description": "GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities",
"context_window": 256000,
"max_output_tokens": 256000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
},
{
"model_name": "grok-3",
"friendly_name": "X.AI (Grok 3)",
"aliases": [
"grok3"
],
"intelligence_score": 13,
"description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
"context_window": 131072,
"max_output_tokens": 131072,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": false,
"supports_images": false,
"supports_temperature": true
},
{
"model_name": "grok-3-fast",
"friendly_name": "X.AI (Grok 3 Fast)",
"aliases": [
"grok3fast",
"grokfast",
"grok3-fast"
],
"intelligence_score": 12,
"description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
"context_window": 131072,
"max_output_tokens": 131072,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": false,
"supports_images": false,
"supports_temperature": true
}
]
}