feat!: breaking change - OpenRouter models are now read from conf/openrouter_models.json while Custom / Self-hosted models are read from conf/custom_models.json

feat: Azure OpenAI / Azure AI Foundry support. Models should be defined in conf/azure_models.json (or a custom path). See .env.example for environment variables or see readme. https://github.com/BeehiveInnovations/zen-mcp-server/issues/265 feat: OpenRouter / Custom Models / Azure can separately also use custom config paths now (see .env.example ) refactor: Model registry class made abstract, OpenRouter / Custom Provider / Azure OpenAI now subclass these refactor: breaking change: `is_custom` property has been removed from model_capabilities.py (and thus custom_models.json) given each models are now read from separate configuration files
2025-10-04 21:10:56 +04:00
parent e91ed2a924
commit ff9a07a37a
40 changed files with 1651 additions and 852 deletions
--- a/conf/azure_models.json
+++ b/conf/azure_models.json
@@ -0,0 +1,45 @@
+{
+  "_README": {
+    "description": "Model metadata for Azure OpenAI / Azure AI Foundry-backed provider. The `models` definition can be copied from openrouter_models.json / custom_models.json",
+    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/azure_models.md",
+    "usage": "Models listed here are exposed through Azure AI Foundry. Aliases are case-insensitive.",
+    "field_notes": "Matches providers/shared/model_capabilities.py.",
+    "field_descriptions": {
+      "model_name": "The model identifier e.g., 'gpt-4'",
+      "deployment": "Azure model deployment name",
+      "aliases": "Array of short names users can type instead of the full model name",
+      "context_window": "Total number of tokens the model can process (input + output combined)",
+      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
+      "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+      "supports_json_mode": "Whether the model can guarantee valid JSON output",
+      "supports_function_calling": "Whether the model supports function/tool calling",
+      "supports_images": "Whether the model can process images/visual input",
+      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
+      "description": "Human-readable description of the model",
+      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
+    }
+  },
+  "_example_models": [
+    {
+      "model_name": "gpt-4",
+      "deployment": "gpt-4",
+      "aliases": [
+        "gpt4"
+      ],
+      "context_window": 128000,
+      "max_output_tokens": 16384,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": false,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
+      "description": "GPT-4 (128K context, 16K output)",
+      "intelligence_score": 10
+    }
+  ],
+  "models": []
+}
--- a/conf/custom_models.json
+++ b/conf/custom_models.json
@@ -1,383 +1,26 @@
 {
  "_README": {
-    "description": "Unified model configuration for multiple AI providers and endpoints, including OpenRouter",
-    "providers_supported": [
-      "OpenRouter - Access to GPT-4, Claude, Mistral, etc. via unified API",
-      "Custom API endpoints - Local models (Ollama, vLLM, LM Studio, etc.)",
-      "Self-hosted APIs - Any OpenAI-compatible endpoint"
-    ],
+    "description": "Model metadata for local/self-hosted OpenAI-compatible endpoints (Custom provider).",
    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
-    "usage": "Models can be accessed via aliases (e.g., 'opus', 'local-llama') or full names (e.g., 'anthropic/claude-opus-4', 'llama3.2')",
-    "instructions": [
-      "Add new models by copying an existing entry and modifying it",
-      "Aliases are case-insensitive and should be unique across all models",
-      "context_window is the model's total context window size in tokens (input + output)",
-      "Set supports_* flags based on the model's actual capabilities",
-      "Set is_custom=true for models that should ONLY work with custom endpoints (Ollama, vLLM, etc.)",
-      "Models not listed here will use generic defaults (32K context window, basic features)",
-      "For OpenRouter models: Use official OpenRouter model names (e.g., 'anthropic/claude-opus-4')",
-      "For local/custom models: Use model names as they appear in your API (e.g., 'llama3.2', 'gpt-3.5-turbo')"
-    ],
+    "usage": "Each entry will be advertised by the Custom provider. Aliases are case-insensitive.",
+    "field_notes": "Matches providers/shared/model_capabilities.py.",
    "field_descriptions": {
-      "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')",
+      "model_name": "The model identifier e.g., 'llama3.2'",
      "aliases": "Array of short names users can type instead of the full model name",
      "context_window": "Total number of tokens the model can process (input + output combined)",
      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
-      "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+      "supports_extended_thinking": "Whether the model supports extended reasoning tokens",
      "supports_json_mode": "Whether the model can guarantee valid JSON output",
      "supports_function_calling": "Whether the model supports function/tool calling",
      "supports_images": "Whether the model can process images/visual input",
      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
-      "is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.",
      "description": "Human-readable description of the model",
      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
-    },
-    "example_custom_model": {
-      "model_name": "my-local-model",
-      "aliases": [
-        "shortname",
-        "nickname",
-        "abbrev"
-      ],
-      "context_window": 128000,
-      "max_output_tokens": 32768,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 10.0,
-      "supports_temperature": true,
-      "temperature_constraint": "range",
-      "is_custom": true,
-      "description": "Example custom/local model for Ollama, vLLM, etc.",
-      "intelligence_score": 12
    }
  },
  "models": [
-    {
-      "model_name": "anthropic/claude-sonnet-4.5",
-      "aliases": [
-        "sonnet",
-        "sonnet4.5"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 64000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": false,
-      "supports_function_calling": false,
-      "supports_images": true,
-      "max_image_size_mb": 5.0,
-      "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency",
-      "intelligence_score": 12
-    },
-    {
-      "model_name": "anthropic/claude-opus-4.1",
-      "aliases": [
-        "opus",
-        "claude-opus"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 64000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": false,
-      "supports_function_calling": false,
-      "supports_images": true,
-      "max_image_size_mb": 5.0,
-      "description": "Claude Opus 4.1 - Our most capable and intelligent model yet",
-      "intelligence_score": 14
-    },
-    {
-      "model_name": "anthropic/claude-sonnet-4.1",
-      "aliases": [
-        "sonnet4.1"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 64000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": false,
-      "supports_function_calling": false,
-      "supports_images": true,
-      "max_image_size_mb": 5.0,
-      "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency",
-      "intelligence_score": 10
-    },
-    {
-      "model_name": "anthropic/claude-3.5-haiku",
-      "aliases": [
-        "haiku"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 64000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": false,
-      "supports_function_calling": false,
-      "supports_images": true,
-      "max_image_size_mb": 5.0,
-      "description": "Claude 3 Haiku - Fast and efficient with vision",
-      "intelligence_score": 8
-    },
-    {
-      "model_name": "google/gemini-2.5-pro",
-      "aliases": [
-        "pro",
-        "gemini-pro",
-        "gemini",
-        "pro-openrouter"
-      ],
-      "context_window": 1048576,
-      "max_output_tokens": 65536,
-      "supports_extended_thinking": true,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 20.0,
-      "description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
-      "intelligence_score": 18
-    },
-    {
-      "model_name": "google/gemini-2.5-flash",
-      "aliases": [
-        "flash",
-        "gemini-flash"
-      ],
-      "context_window": 1048576,
-      "max_output_tokens": 65536,
-      "supports_extended_thinking": true,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 15.0,
-      "description": "Google's Gemini 2.5 Flash via OpenRouter with vision",
-      "intelligence_score": 10
-    },
-    {
-      "model_name": "mistralai/mistral-large-2411",
-      "aliases": [
-        "mistral-large",
-        "mistral"
-      ],
-      "context_window": 128000,
-      "max_output_tokens": 32000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": false,
-      "max_image_size_mb": 0.0,
-      "description": "Mistral's largest model (text-only)",
-      "intelligence_score": 11
-    },
-    {
-      "model_name": "meta-llama/llama-3-70b",
-      "aliases": [
-        "llama",
-        "llama3",
-        "llama3-70b",
-        "llama-70b",
-        "llama3-openrouter"
-      ],
-      "context_window": 8192,
-      "max_output_tokens": 8192,
-      "supports_extended_thinking": false,
-      "supports_json_mode": false,
-      "supports_function_calling": false,
-      "supports_images": false,
-      "max_image_size_mb": 0.0,
-      "description": "Meta's Llama 3 70B model (text-only)",
-      "intelligence_score": 9
-    },
-    {
-      "model_name": "deepseek/deepseek-r1-0528",
-      "aliases": [
-        "deepseek-r1",
-        "deepseek",
-        "r1",
-        "deepseek-thinking"
-      ],
-      "context_window": 65536,
-      "max_output_tokens": 32768,
-      "supports_extended_thinking": true,
-      "supports_json_mode": true,
-      "supports_function_calling": false,
-      "supports_images": false,
-      "max_image_size_mb": 0.0,
-      "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)",
-      "intelligence_score": 15
-    },
-    {
-      "model_name": "perplexity/llama-3-sonar-large-32k-online",
-      "aliases": [
-        "perplexity",
-        "sonar",
-        "perplexity-online"
-      ],
-      "context_window": 32768,
-      "max_output_tokens": 32768,
-      "supports_extended_thinking": false,
-      "supports_json_mode": false,
-      "supports_function_calling": false,
-      "supports_images": false,
-      "max_image_size_mb": 0.0,
-      "description": "Perplexity's online model with web search (text-only)",
-      "intelligence_score": 9
-    },
-    {
-      "model_name": "openai/o3",
-      "aliases": [
-        "o3"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 100000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 20.0,
-      "supports_temperature": false,
-      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision",
-      "intelligence_score": 14
-    },
-    {
-      "model_name": "openai/o3-mini",
-      "aliases": [
-        "o3-mini",
-        "o3mini"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 100000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 20.0,
-      "supports_temperature": false,
-      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3-mini model - balanced performance and speed with vision",
-      "intelligence_score": 12
-    },
-    {
-      "model_name": "openai/o3-mini-high",
-      "aliases": [
-        "o3-mini-high",
-        "o3mini-high"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 100000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 20.0,
-      "supports_temperature": false,
-      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision",
-      "intelligence_score": 13
-    },
-    {
-      "model_name": "openai/o3-pro",
-      "aliases": [
-        "o3pro"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 100000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 20.0,
-      "supports_temperature": false,
-      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision",
-      "intelligence_score": 15
-    },
-    {
-      "model_name": "openai/o4-mini",
-      "aliases": [
-        "o4-mini",
-        "o4mini"
-      ],
-      "context_window": 200000,
-      "max_output_tokens": 100000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 20.0,
-      "supports_temperature": false,
-      "temperature_constraint": "fixed",
-      "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision",
-      "intelligence_score": 11
-    },
-    {
-      "model_name": "openai/gpt-5",
-      "aliases": [
-        "gpt5"
-      ],
-      "context_window": 400000,
-      "max_output_tokens": 128000,
-      "supports_extended_thinking": true,
-      "supports_json_mode": true,
-      "supports_function_calling": true,
-      "supports_images": true,
-      "max_image_size_mb": 20.0,
-      "supports_temperature": true,
-      "temperature_constraint": "range",
-      "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
-      "intelligence_score": 16
-    },
-    {
-      "model_name": "openai/gpt-5-codex",
-      "aliases": [
-        "codex",
-        "gpt5codex"
-      ],
-      "context_window": 400000,
-      "max_output_tokens": 128000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": false,
-      "supports_images": false,
-      "max_image_size_mb": 0.0,
-      "is_custom": false,
-      "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows",
-      "intelligence_score": 17
-    },
-    {
-      "model_name": "openai/gpt-5-mini",
-      "aliases": [
-        "gpt5mini"
-      ],
-      "context_window": 400000,
-      "max_output_tokens": 128000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": false,
-      "supports_images": false,
-      "max_image_size_mb": 0.0,
-      "supports_temperature": true,
-      "temperature_constraint": "fixed",
-      "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
-      "intelligence_score": 10
-    },
-    {
-      "model_name": "openai/gpt-5-nano",
-      "aliases": [
-        "gpt5nano"
-      ],
-      "context_window": 400000,
-      "max_output_tokens": 128000,
-      "supports_extended_thinking": false,
-      "supports_json_mode": true,
-      "supports_function_calling": false,
-      "supports_images": false,
-      "max_image_size_mb": 0.0,
-      "supports_temperature": true,
-      "temperature_constraint": "fixed",
-      "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
-      "intelligence_score": 8
-    },
    {
      "model_name": "llama3.2",
      "aliases": [
@@ -391,7 +34,6 @@
      "supports_function_calling": false,
      "supports_images": false,
      "max_image_size_mb": 0.0,
-      "is_custom": true,
      "description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)",
      "intelligence_score": 6
    }
--- a/conf/openrouter_models.json
+++ b/conf/openrouter_models.json
@@ -0,0 +1,346 @@
+{
+  "_README": {
+    "description": "Model metadata for OpenRouter-backed providers.",
+    "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
+    "usage": "Models listed here are exposed through OpenRouter. Aliases are case-insensitive.",
+    "field_notes": "Matches providers/shared/model_capabilities.py.",
+    "field_descriptions": {
+      "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')",
+      "aliases": "Array of short names users can type instead of the full model name",
+      "context_window": "Total number of tokens the model can process (input + output combined)",
+      "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
+      "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+      "supports_json_mode": "Whether the model can guarantee valid JSON output",
+      "supports_function_calling": "Whether the model supports function/tool calling",
+      "supports_images": "Whether the model can process images/visual input",
+      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
+      "description": "Human-readable description of the model",
+      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
+    }
+  },
+  "models": [
+    {
+      "model_name": "anthropic/claude-sonnet-4.5",
+      "aliases": [
+        "sonnet",
+        "sonnet4.5"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 64000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": false,
+      "supports_function_calling": false,
+      "supports_images": true,
+      "max_image_size_mb": 5.0,
+      "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency",
+      "intelligence_score": 12
+    },
+    {
+      "model_name": "anthropic/claude-opus-4.1",
+      "aliases": [
+        "opus",
+        "claude-opus"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 64000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": false,
+      "supports_function_calling": false,
+      "supports_images": true,
+      "max_image_size_mb": 5.0,
+      "description": "Claude Opus 4.1 - Our most capable and intelligent model yet",
+      "intelligence_score": 14
+    },
+    {
+      "model_name": "anthropic/claude-sonnet-4.1",
+      "aliases": [
+        "sonnet4.1"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 64000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": false,
+      "supports_function_calling": false,
+      "supports_images": true,
+      "max_image_size_mb": 5.0,
+      "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency",
+      "intelligence_score": 10
+    },
+    {
+      "model_name": "anthropic/claude-3.5-haiku",
+      "aliases": [
+        "haiku"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 64000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": false,
+      "supports_function_calling": false,
+      "supports_images": true,
+      "max_image_size_mb": 5.0,
+      "description": "Claude 3 Haiku - Fast and efficient with vision",
+      "intelligence_score": 8
+    },
+    {
+      "model_name": "google/gemini-2.5-pro",
+      "aliases": [
+        "pro",
+        "gemini-pro",
+        "gemini",
+        "pro-openrouter"
+      ],
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "supports_extended_thinking": true,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 20.0,
+      "description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
+      "intelligence_score": 18
+    },
+    {
+      "model_name": "google/gemini-2.5-flash",
+      "aliases": [
+        "flash",
+        "gemini-flash"
+      ],
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "supports_extended_thinking": true,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 15.0,
+      "description": "Google's Gemini 2.5 Flash via OpenRouter with vision",
+      "intelligence_score": 10
+    },
+    {
+      "model_name": "mistralai/mistral-large-2411",
+      "aliases": [
+        "mistral-large",
+        "mistral"
+      ],
+      "context_window": 128000,
+      "max_output_tokens": 32000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "description": "Mistral's largest model (text-only)",
+      "intelligence_score": 11
+    },
+    {
+      "model_name": "meta-llama/llama-3-70b",
+      "aliases": [
+        "llama",
+        "llama3",
+        "llama3-70b",
+        "llama-70b",
+        "llama3-openrouter"
+      ],
+      "context_window": 8192,
+      "max_output_tokens": 8192,
+      "supports_extended_thinking": false,
+      "supports_json_mode": false,
+      "supports_function_calling": false,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "description": "Meta's Llama 3 70B model (text-only)",
+      "intelligence_score": 9
+    },
+    {
+      "model_name": "deepseek/deepseek-r1-0528",
+      "aliases": [
+        "deepseek-r1",
+        "deepseek",
+        "r1",
+        "deepseek-thinking"
+      ],
+      "context_window": 65536,
+      "max_output_tokens": 32768,
+      "supports_extended_thinking": true,
+      "supports_json_mode": true,
+      "supports_function_calling": false,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)",
+      "intelligence_score": 15
+    },
+    {
+      "model_name": "perplexity/llama-3-sonar-large-32k-online",
+      "aliases": [
+        "perplexity",
+        "sonar",
+        "perplexity-online"
+      ],
+      "context_window": 32768,
+      "max_output_tokens": 32768,
+      "supports_extended_thinking": false,
+      "supports_json_mode": false,
+      "supports_function_calling": false,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "description": "Perplexity's online model with web search (text-only)",
+      "intelligence_score": 9
+    },
+    {
+      "model_name": "openai/o3",
+      "aliases": [
+        "o3"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 100000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
+      "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision",
+      "intelligence_score": 14
+    },
+    {
+      "model_name": "openai/o3-mini",
+      "aliases": [
+        "o3-mini",
+        "o3mini"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 100000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
+      "description": "OpenAI's o3-mini model - balanced performance and speed with vision",
+      "intelligence_score": 12
+    },
+    {
+      "model_name": "openai/o3-mini-high",
+      "aliases": [
+        "o3-mini-high",
+        "o3mini-high"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 100000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
+      "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision",
+      "intelligence_score": 13
+    },
+    {
+      "model_name": "openai/o3-pro",
+      "aliases": [
+        "o3pro"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 100000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
+      "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision",
+      "intelligence_score": 15
+    },
+    {
+      "model_name": "openai/o4-mini",
+      "aliases": [
+        "o4-mini",
+        "o4mini"
+      ],
+      "context_window": 200000,
+      "max_output_tokens": 100000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
+      "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision",
+      "intelligence_score": 11
+    },
+    {
+      "model_name": "openai/gpt-5",
+      "aliases": [
+        "gpt5"
+      ],
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": true,
+      "supports_json_mode": true,
+      "supports_function_calling": true,
+      "supports_images": true,
+      "max_image_size_mb": 20.0,
+      "supports_temperature": true,
+      "temperature_constraint": "range",
+      "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
+      "intelligence_score": 16
+    },
+    {
+      "model_name": "openai/gpt-5-codex",
+      "aliases": [
+        "codex",
+        "gpt5codex"
+      ],
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": false,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows",
+      "intelligence_score": 17
+    },
+    {
+      "model_name": "openai/gpt-5-mini",
+      "aliases": [
+        "gpt5mini"
+      ],
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": false,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "supports_temperature": true,
+      "temperature_constraint": "fixed",
+      "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
+      "intelligence_score": 10
+    },
+    {
+      "model_name": "openai/gpt-5-nano",
+      "aliases": [
+        "gpt5nano"
+      ],
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "supports_extended_thinking": false,
+      "supports_json_mode": true,
+      "supports_function_calling": false,
+      "supports_images": false,
+      "max_image_size_mb": 0.0,
+      "supports_temperature": true,
+      "temperature_constraint": "fixed",
+      "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
+      "intelligence_score": 8
+    }
+  ]
+}