feat: all native providers now read from catalog files like OpenRouter / Custom configs. Allows for greater control over the capabilities

2025-10-07 12:17:47 +04:00
parent 7d7c74b5a3
commit 2a706d5720
13 changed files with 704 additions and 397 deletions
--- a/conf/azure_models.json
+++ b/conf/azure_models.json
@@ -18,6 +18,7 @@
      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
      "use_openai_response_api": "Set to true when the deployment must call Azure's /responses endpoint (O-series reasoning models). Leave false/omit for standard chat completions.",
+      "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
      "description": "Human-readable description of the model",
      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
    }
--- a/conf/gemini_models.json
+++ b/conf/gemini_models.json
@@ -0,0 +1,111 @@
+{
+  "_README": {
+    "description": "Model metadata for Google's Gemini API access.",
+    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
+    "usage": "Models listed here are exposed directly through the Gemini provider. Aliases are case-insensitive.",
+    "field_notes": "Matches providers/shared/model_capabilities.py.",
+    "field_descriptions": {
+      "model_name": "The model identifier (e.g., 'gemini-2.5-pro', 'gemini-2.0-flash')",
+      "aliases": "Array of short names users can type instead of the full model name",
+      "context_window": "Total number of tokens the model can process (input + output combined)",
+      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
+      "max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
+      "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+      "supports_json_mode": "Whether the model can guarantee valid JSON output",
+      "supports_function_calling": "Whether the model supports function/tool calling",
+      "supports_images": "Whether the model can process images/visual input",
+      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
+      "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
+      "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
+      "description": "Human-readable description of the model",
+      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
+    }
+  },
+  "models": [
+    {
+      "model_name": "gemini-2.5-pro",
+      "friendly_name": "Gemini (Pro 2.5)",
+      "aliases": [
+        "pro",
+        "gemini pro",
+        "gemini-pro"
+      ],
+      "intelligence_score": 18,
+      "description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "max_thinking_tokens": 32768,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 32.0
+    },
+    {
+      "model_name": "gemini-2.0-flash",
+      "friendly_name": "Gemini (Flash 2.0)",
+      "aliases": [
+        "flash-2.0",
+        "flash2"
+      ],
+      "intelligence_score": 9,
+      "description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "max_thinking_tokens": 24576,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0
+    },
+    {
+      "model_name": "gemini-2.0-flash-lite",
+      "friendly_name": "Gemini (Flash Lite 2.0)",
+      "aliases": [
+        "flashlite",
+        "flash-lite"
+      ],
+      "intelligence_score": 7,
+      "description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": false,
+      "supports_temperature": true
+    },
+    {
+      "model_name": "gemini-2.5-flash",
+      "friendly_name": "Gemini (Flash 2.5)",
+      "aliases": [
+        "flash",
+        "flash2.5"
+      ],
+      "intelligence_score": 10,
+      "description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "max_thinking_tokens": 24576,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0
+    }
+  ]
+}
--- a/conf/openai_models.json
+++ b/conf/openai_models.json
@@ -0,0 +1,235 @@
+{
+  "_README": {
+    "description": "Model metadata for native OpenAI API access.",
+    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
+    "usage": "Models listed here are exposed directly through the OpenAI provider. Aliases are case-insensitive.",
+    "field_notes": "Matches providers/shared/model_capabilities.py.",
+    "field_descriptions": {
+      "model_name": "The model identifier (e.g., 'gpt-5', 'o3-pro')",
+      "aliases": "Array of short names users can type instead of the full model name",
+      "context_window": "Total number of tokens the model can process (input + output combined)",
+      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
+      "max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
+      "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+      "supports_json_mode": "Whether the model can guarantee valid JSON output",
+      "supports_function_calling": "Whether the model supports function/tool calling",
+      "supports_images": "Whether the model can process images/visual input",
+      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
+      "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
+      "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
+      "description": "Human-readable description of the model",
+      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
+    }
+  },
+  "models": [
+    {
+      "model_name": "gpt-5",
+      "friendly_name": "OpenAI (GPT-5)",
+      "aliases": [
+        "gpt5",
+        "gpt-5"
+      ],
+      "intelligence_score": 16,
+      "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": false,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0,
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "gpt-5-pro",
+      "friendly_name": "OpenAI (GPT-5 Pro)",
+      "aliases": [
+        "gpt5pro",
+        "gpt5-pro"
+      ],
+      "intelligence_score": 18,
+      "description": "GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support",
+      "context_window": 400000,
+      "max_output_tokens": 272000,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": false,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0,
+      "use_openai_response_api": true,
+      "default_reasoning_effort": "high",
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "gpt-5-mini",
+      "friendly_name": "OpenAI (GPT-5-mini)",
+      "aliases": [
+        "gpt5-mini",
+        "gpt5mini",
+        "mini"
+      ],
+      "intelligence_score": 15,
+      "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": false,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0,
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "gpt-5-nano",
+      "friendly_name": "OpenAI (GPT-5 nano)",
+      "aliases": [
+        "gpt5nano",
+        "gpt5-nano",
+        "nano"
+      ],
+      "intelligence_score": 13,
+      "description": "GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0,
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "o3",
+      "friendly_name": "OpenAI (O3)",
+      "intelligence_score": 14,
+      "description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
+      "context_window": 200000,
+      "max_output_tokens": 65536,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": false,
+      "max_image_size_mb": 20.0,
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "o3-mini",
+      "friendly_name": "OpenAI (O3-mini)",
+      "aliases": [
+        "o3mini"
+      ],
+      "intelligence_score": 12,
+      "description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
+      "context_window": 200000,
+      "max_output_tokens": 65536,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": false,
+      "max_image_size_mb": 20.0,
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "o3-pro",
+      "friendly_name": "OpenAI (O3-Pro)",
+      "aliases": [
+        "o3pro"
+      ],
+      "intelligence_score": 15,
+      "description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
+      "context_window": 200000,
+      "max_output_tokens": 65536,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": false,
+      "max_image_size_mb": 20.0,
+      "use_openai_response_api": true,
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "o4-mini",
+      "friendly_name": "OpenAI (O4-mini)",
+      "aliases": [
+        "o4mini"
+      ],
+      "intelligence_score": 11,
+      "description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
+      "context_window": 200000,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": false,
+      "max_image_size_mb": 20.0,
+      "temperature_constraint": "fixed"
+    },
+    {
+      "model_name": "gpt-4.1",
+      "friendly_name": "OpenAI (GPT 4.1)",
+      "aliases": [
+        "gpt4.1"
+      ],
+      "intelligence_score": 13,
+      "description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window",
+      "context_window": 1000000,
+      "max_output_tokens": 32768,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0
+    },
+    {
+      "model_name": "gpt-5-codex",
+      "friendly_name": "OpenAI (GPT-5 Codex)",
+      "aliases": [
+        "gpt5-codex",
+        "codex",
+        "gpt-5-code",
+        "gpt5-code"
+      ],
+      "intelligence_score": 17,
+      "description": "GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.",
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0,
+      "use_openai_response_api": true
+    }
+  ]
+}
--- a/conf/xai_models.json
+++ b/conf/xai_models.json
@@ -0,0 +1,87 @@
+{
+  "_README": {
+    "description": "Model metadata for X.AI (GROK) API access.",
+    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
+    "usage": "Models listed here are exposed directly through the X.AI provider. Aliases are case-insensitive.",
+    "field_notes": "Matches providers/shared/model_capabilities.py.",
+    "field_descriptions": {
+      "model_name": "The model identifier (e.g., 'grok-4', 'grok-3-fast')",
+      "aliases": "Array of short names users can type instead of the full model name",
+      "context_window": "Total number of tokens the model can process (input + output combined)",
+      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
+      "max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
+      "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+      "supports_json_mode": "Whether the model can guarantee valid JSON output",
+      "supports_function_calling": "Whether the model supports function/tool calling",
+      "supports_images": "Whether the model can process images/visual input",
+      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
+      "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
+      "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
+      "description": "Human-readable description of the model",
+      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
+    }
+  },
+  "models": [
+    {
+      "model_name": "grok-4",
+      "friendly_name": "X.AI (Grok 4)",
+      "aliases": [
+        "grok",
+        "grok4",
+        "grok-4"
+      ],
+      "intelligence_score": 16,
+      "description": "GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities",
+      "context_window": 256000,
+      "max_output_tokens": 256000,
+      "supports_extended_thinking": true,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": true,
+      "supports_images": true,
+      "supports_temperature": true,
+      "max_image_size_mb": 20.0
+    },
+    {
+      "model_name": "grok-3",
+      "friendly_name": "X.AI (Grok 3)",
+      "aliases": [
+        "grok3"
+      ],
+      "intelligence_score": 13,
+      "description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
+      "context_window": 131072,
+      "max_output_tokens": 131072,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": false,
+      "supports_images": false,
+      "supports_temperature": true
+    },
+    {
+      "model_name": "grok-3-fast",
+      "friendly_name": "X.AI (Grok 3 Fast)",
+      "aliases": [
+        "grok3fast",
+        "grokfast",
+        "grok3-fast"
+      ],
+      "intelligence_score": 12,
+      "description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
+      "context_window": 131072,
+      "max_output_tokens": 131072,
+      "supports_extended_thinking": false,
+      "supports_system_prompts": true,
+      "supports_streaming": true,
+      "supports_function_calling": true,
+      "supports_json_mode": false,
+      "supports_images": false,
+      "supports_temperature": true
+    }
+  ]
+}