diff --git a/conf/azure_models.json b/conf/azure_models.json index 35d6e74..898dfd1 100644 --- a/conf/azure_models.json +++ b/conf/azure_models.json @@ -18,6 +18,7 @@ "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", "use_openai_response_api": "Set to true when the deployment must call Azure's /responses endpoint (O-series reasoning models). Leave false/omit for standard chat completions.", + "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.", "description": "Human-readable description of the model", "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" } diff --git a/conf/gemini_models.json b/conf/gemini_models.json new file mode 100644 index 0000000..e8275e5 --- /dev/null +++ b/conf/gemini_models.json @@ -0,0 +1,111 @@ +{ + "_README": { + "description": "Model metadata for Google's Gemini API access.", + "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md", + "usage": "Models listed here are exposed directly through the Gemini provider. Aliases are case-insensitive.", + "field_notes": "Matches providers/shared/model_capabilities.py.", + "field_descriptions": { + "model_name": "The model identifier (e.g., 'gemini-2.5-pro', 'gemini-2.0-flash')", + "aliases": "Array of short names users can type instead of the full model name", + "context_window": "Total number of tokens the model can process (input + output combined)", + "max_output_tokens": "Maximum number of tokens the model can generate in a single response", + "max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested", + "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", + "supports_json_mode": "Whether the model can guarantee valid JSON output", + "supports_function_calling": "Whether the model supports function/tool calling", + "supports_images": "Whether the model can process images/visual input", + "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", + "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", + "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", + "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.", + "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.", + "description": "Human-readable description of the model", + "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" + } + }, + "models": [ + { + "model_name": "gemini-2.5-pro", + "friendly_name": "Gemini (Pro 2.5)", + "aliases": [ + "pro", + "gemini pro", + "gemini-pro" + ], + "intelligence_score": 18, + "description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis", + "context_window": 1048576, + "max_output_tokens": 65536, + "max_thinking_tokens": 32768, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 32.0 + }, + { + "model_name": "gemini-2.0-flash", + "friendly_name": "Gemini (Flash 2.0)", + "aliases": [ + "flash-2.0", + "flash2" + ], + "intelligence_score": 9, + "description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input", + "context_window": 1048576, + "max_output_tokens": 65536, + "max_thinking_tokens": 24576, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0 + }, + { + "model_name": "gemini-2.0-flash-lite", + "friendly_name": "Gemini (Flash Lite 2.0)", + "aliases": [ + "flashlite", + "flash-lite" + ], + "intelligence_score": 7, + "description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only", + "context_window": 1048576, + "max_output_tokens": 65536, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": false, + "supports_temperature": true + }, + { + "model_name": "gemini-2.5-flash", + "friendly_name": "Gemini (Flash 2.5)", + "aliases": [ + "flash", + "flash2.5" + ], + "intelligence_score": 10, + "description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations", + "context_window": 1048576, + "max_output_tokens": 65536, + "max_thinking_tokens": 24576, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0 + } + ] +} diff --git a/conf/openai_models.json b/conf/openai_models.json new file mode 100644 index 0000000..e5aea7f --- /dev/null +++ b/conf/openai_models.json @@ -0,0 +1,235 @@ +{ + "_README": { + "description": "Model metadata for native OpenAI API access.", + "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md", + "usage": "Models listed here are exposed directly through the OpenAI provider. Aliases are case-insensitive.", + "field_notes": "Matches providers/shared/model_capabilities.py.", + "field_descriptions": { + "model_name": "The model identifier (e.g., 'gpt-5', 'o3-pro')", + "aliases": "Array of short names users can type instead of the full model name", + "context_window": "Total number of tokens the model can process (input + output combined)", + "max_output_tokens": "Maximum number of tokens the model can generate in a single response", + "max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested", + "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", + "supports_json_mode": "Whether the model can guarantee valid JSON output", + "supports_function_calling": "Whether the model supports function/tool calling", + "supports_images": "Whether the model can process images/visual input", + "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", + "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", + "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", + "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.", + "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.", + "description": "Human-readable description of the model", + "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" + } + }, + "models": [ + { + "model_name": "gpt-5", + "friendly_name": "OpenAI (GPT-5)", + "aliases": [ + "gpt5", + "gpt-5" + ], + "intelligence_score": 16, + "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support", + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": false, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0, + "temperature_constraint": "fixed" + }, + { + "model_name": "gpt-5-pro", + "friendly_name": "OpenAI (GPT-5 Pro)", + "aliases": [ + "gpt5pro", + "gpt5-pro" + ], + "intelligence_score": 18, + "description": "GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support", + "context_window": 400000, + "max_output_tokens": 272000, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": false, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0, + "use_openai_response_api": true, + "default_reasoning_effort": "high", + "temperature_constraint": "fixed" + }, + { + "model_name": "gpt-5-mini", + "friendly_name": "OpenAI (GPT-5-mini)", + "aliases": [ + "gpt5-mini", + "gpt5mini", + "mini" + ], + "intelligence_score": 15, + "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support", + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": false, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0, + "temperature_constraint": "fixed" + }, + { + "model_name": "gpt-5-nano", + "friendly_name": "OpenAI (GPT-5 nano)", + "aliases": [ + "gpt5nano", + "gpt5-nano", + "nano" + ], + "intelligence_score": 13, + "description": "GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks", + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0, + "temperature_constraint": "fixed" + }, + { + "model_name": "o3", + "friendly_name": "OpenAI (O3)", + "intelligence_score": 14, + "description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis", + "context_window": 200000, + "max_output_tokens": 65536, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": false, + "max_image_size_mb": 20.0, + "temperature_constraint": "fixed" + }, + { + "model_name": "o3-mini", + "friendly_name": "OpenAI (O3-mini)", + "aliases": [ + "o3mini" + ], + "intelligence_score": 12, + "description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity", + "context_window": 200000, + "max_output_tokens": 65536, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": false, + "max_image_size_mb": 20.0, + "temperature_constraint": "fixed" + }, + { + "model_name": "o3-pro", + "friendly_name": "OpenAI (O3-Pro)", + "aliases": [ + "o3pro" + ], + "intelligence_score": 15, + "description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.", + "context_window": 200000, + "max_output_tokens": 65536, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": false, + "max_image_size_mb": 20.0, + "use_openai_response_api": true, + "temperature_constraint": "fixed" + }, + { + "model_name": "o4-mini", + "friendly_name": "OpenAI (O4-mini)", + "aliases": [ + "o4mini" + ], + "intelligence_score": 11, + "description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning", + "context_window": 200000, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": false, + "max_image_size_mb": 20.0, + "temperature_constraint": "fixed" + }, + { + "model_name": "gpt-4.1", + "friendly_name": "OpenAI (GPT 4.1)", + "aliases": [ + "gpt4.1" + ], + "intelligence_score": 13, + "description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window", + "context_window": 1000000, + "max_output_tokens": 32768, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0 + }, + { + "model_name": "gpt-5-codex", + "friendly_name": "OpenAI (GPT-5 Codex)", + "aliases": [ + "gpt5-codex", + "codex", + "gpt-5-code", + "gpt5-code" + ], + "intelligence_score": 17, + "description": "GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.", + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0, + "use_openai_response_api": true + } + ] +} diff --git a/conf/xai_models.json b/conf/xai_models.json new file mode 100644 index 0000000..1d179d7 --- /dev/null +++ b/conf/xai_models.json @@ -0,0 +1,87 @@ +{ + "_README": { + "description": "Model metadata for X.AI (GROK) API access.", + "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md", + "usage": "Models listed here are exposed directly through the X.AI provider. Aliases are case-insensitive.", + "field_notes": "Matches providers/shared/model_capabilities.py.", + "field_descriptions": { + "model_name": "The model identifier (e.g., 'grok-4', 'grok-3-fast')", + "aliases": "Array of short names users can type instead of the full model name", + "context_window": "Total number of tokens the model can process (input + output combined)", + "max_output_tokens": "Maximum number of tokens the model can generate in a single response", + "max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested", + "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", + "supports_json_mode": "Whether the model can guarantee valid JSON output", + "supports_function_calling": "Whether the model supports function/tool calling", + "supports_images": "Whether the model can process images/visual input", + "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", + "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", + "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", + "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.", + "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.", + "description": "Human-readable description of the model", + "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" + } + }, + "models": [ + { + "model_name": "grok-4", + "friendly_name": "X.AI (Grok 4)", + "aliases": [ + "grok", + "grok4", + "grok-4" + ], + "intelligence_score": 16, + "description": "GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities", + "context_window": 256000, + "max_output_tokens": 256000, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "max_image_size_mb": 20.0 + }, + { + "model_name": "grok-3", + "friendly_name": "X.AI (Grok 3)", + "aliases": [ + "grok3" + ], + "intelligence_score": 13, + "description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis", + "context_window": 131072, + "max_output_tokens": 131072, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": false, + "supports_images": false, + "supports_temperature": true + }, + { + "model_name": "grok-3-fast", + "friendly_name": "X.AI (Grok 3 Fast)", + "aliases": [ + "grok3fast", + "grokfast", + "grok3-fast" + ], + "intelligence_score": 12, + "description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive", + "context_window": 131072, + "max_output_tokens": 131072, + "supports_extended_thinking": false, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": false, + "supports_images": false, + "supports_temperature": true + } + ] +} diff --git a/docs/configuration.md b/docs/configuration.md index a489ec9..9b3885d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -67,16 +67,26 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) ``` -**Available Models:** -- **`auto`**: Claude automatically selects the optimal model -- **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis -- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses -- **`o3`**: Strong logical reasoning (200K context) -- **`o3-mini`**: Balanced speed/quality (200K context) -- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts -- **`grok-3`**: GROK-3 advanced reasoning (131K context) -- **`grok-4`**: GROK-4 flagship model (256K context) -- **Custom models**: via OpenRouter or local APIs +- **Available Models:** The canonical capability data for native providers lives in JSON manifests under `conf/`: + - `conf/openai_models.json` – OpenAI catalogue (can be overridden with `OPENAI_MODELS_CONFIG_PATH`) + - `conf/gemini_models.json` – Gemini catalogue (`GEMINI_MODELS_CONFIG_PATH`) + - `conf/xai_models.json` – X.AI / GROK catalogue (`XAI_MODELS_CONFIG_PATH`) + - `conf/openrouter_models.json` – OpenRouter catalogue (`OPENROUTER_MODELS_CONFIG_PATH`) + - `conf/custom_models.json` – Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`) + + Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags. Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, or expose additional aliases without touching Python code. + + The shipped defaults cover: + + | Provider | Canonical Models | Notable Aliases | + |----------|-----------------|-----------------| + | OpenAI | `gpt-5`, `gpt-5-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` | + | Gemini | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-2.0-flash-lite` | `pro`, `gemini-pro`, `flash`, `flash-2.0`, `flashlite` | + | X.AI | `grok-4`, `grok-3`, `grok-3-fast` | `grok`, `grok4`, `grok3`, `grok3fast`, `grokfast` | + | OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` | + | Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry | + + > **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support) without editing Python. ### Thinking Mode Configuration @@ -114,28 +124,11 @@ XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4 OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral ``` -**Supported Model Names:** +**Supported Model Names:** The names/aliases listed in the JSON manifests above are the authoritative source. Keep in mind: -**OpenAI Models:** -- `o3` (200K context, high reasoning) -- `o3-mini` (200K context, balanced) -- `o4-mini` (200K context, latest balanced) -- `mini` (shorthand for o4-mini) - -**Gemini Models:** -- `gemini-2.5-flash` (1M context, fast) -- `gemini-2.5-pro` (1M context, powerful) -- `flash` (shorthand for Flash model) -- `pro` (shorthand for Pro model) - -**X.AI GROK Models:** -- `grok-4` (256K context, flagship Grok model with reasoning, vision, and structured outputs) -- `grok-3` (131K context, advanced reasoning) -- `grok-3-fast` (131K context, higher performance) -- `grok` (shorthand for grok-4) -- `grok4` (shorthand for grok-4) -- `grok3` (shorthand for grok-3) -- `grokfast` (shorthand for grok-3-fast) +- Aliases are case-insensitive and defined per entry (for example, `mini` maps to `gpt-5-mini` by default, while `flash` maps to `gemini-2.5-flash`). +- When you override the manifest files you can add or remove aliases as needed; restriction policies (`*_ALLOWED_MODELS`) automatically pick up those changes. +- Models omitted from a manifest fall back to generic capability detection (where supported) and may have limited feature metadata. **Example Configurations:** ```env @@ -154,12 +147,14 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast ### Advanced Configuration -**Custom Model Configuration:** +**Custom Model Configuration & Manifest Overrides:** ```env -# Override default location of custom_models.json -CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json -# Override default location of openrouter_models.json -OPENROUTER_MODELS_CONFIG_PATH=/path/to/your/openrouter_models.json +# Override default location of built-in catalogues +OPENAI_MODELS_CONFIG_PATH=/path/to/openai_models.json +GEMINI_MODELS_CONFIG_PATH=/path/to/gemini_models.json +XAI_MODELS_CONFIG_PATH=/path/to/xai_models.json +OPENROUTER_MODELS_CONFIG_PATH=/path/to/openrouter_models.json +CUSTOM_MODELS_CONFIG_PATH=/path/to/custom_models.json ``` **Conversation Settings:** diff --git a/docs/custom_models.md b/docs/custom_models.md index 2db1694..c9df383 100644 --- a/docs/custom_models.md +++ b/docs/custom_models.md @@ -35,27 +35,33 @@ This guide covers setting up multiple AI model providers including OpenRouter, c ## Model Aliases -Zen ships two registries: +Zen ships multiple registries: -- `conf/openrouter_models.json` – metadata for models routed through OpenRouter. Override with `OPENROUTER_MODELS_CONFIG_PATH` if you maintain a custom copy. -- `conf/custom_models.json` – metadata for local or self-hosted OpenAI-compatible endpoints used by the Custom provider. Override with `CUSTOM_MODELS_CONFIG_PATH` if needed. +- `conf/openai_models.json` – native OpenAI catalogue (override with `OPENAI_MODELS_CONFIG_PATH`) +- `conf/gemini_models.json` – native Google Gemini catalogue (`GEMINI_MODELS_CONFIG_PATH`) +- `conf/xai_models.json` – native X.AI / GROK catalogue (`XAI_MODELS_CONFIG_PATH`) +- `conf/openrouter_models.json` – OpenRouter catalogue (`OPENROUTER_MODELS_CONFIG_PATH`) +- `conf/custom_models.json` – local/self-hosted OpenAI-compatible catalogue (`CUSTOM_MODELS_CONFIG_PATH`) Copy whichever file you need into your project (or point the corresponding `*_MODELS_CONFIG_PATH` env var at your own copy) and edit it to advertise the models you want. ### OpenRouter Models (Cloud) -| Alias | Maps to OpenRouter Model | -|-------|-------------------------| -| `opus` | `anthropic/claude-opus-4` | -| `sonnet`, `claude` | `anthropic/claude-sonnet-4` | -| `haiku` | `anthropic/claude-3.5-haiku` | -| `gpt4o`, `4o` | `openai/gpt-4o` | -| `gpt4o-mini`, `4o-mini` | `openai/gpt-4o-mini` | -| `pro`, `gemini` | `google/gemini-2.5-pro` | -| `flash` | `google/gemini-2.5-flash` | -| `mistral` | `mistral/mistral-large` | -| `deepseek`, `coder` | `deepseek/deepseek-coder` | -| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | +The curated defaults in `conf/openrouter_models.json` include popular entries such as: + +| Alias | Canonical Model | Highlights | +|-------|-----------------|------------| +| `opus`, `claude-opus` | `anthropic/claude-opus-4.1` | Flagship Claude reasoning model with vision | +| `sonnet`, `sonnet4.5` | `anthropic/claude-sonnet-4.5` | Balanced Claude with high context window | +| `haiku` | `anthropic/claude-3.5-haiku` | Fast Claude option with vision | +| `pro`, `gemini` | `google/gemini-2.5-pro` | Frontier Gemini with extended thinking | +| `flash` | `google/gemini-2.5-flash` | Ultra-fast Gemini with vision | +| `mistral` | `mistralai/mistral-large-2411` | Frontier Mistral (text only) | +| `llama3` | `meta-llama/llama-3-70b` | Large open-weight text model | +| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model | +| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | Search-augmented model | + +Consult the JSON file for the full list, aliases, and capability flags. Add new entries as OpenRouter releases additional models. ### Custom/Local Models @@ -65,6 +71,14 @@ Copy whichever file you need into your project (or point the corresponding `*_MO View the baseline OpenRouter catalogue in [`conf/openrouter_models.json`](conf/openrouter_models.json) and populate [`conf/custom_models.json`](conf/custom_models.json) with your local models. +Native catalogues (`conf/openai_models.json`, `conf/gemini_models.json`, `conf/xai_models.json`) follow the same schema. Updating those files lets you: + +- Expose new aliases (e.g., map `enterprise-pro` to `gpt-5-pro`) +- Advertise support for JSON mode or vision if the upstream provider adds it +- Adjust token limits when providers increase context windows + +Because providers load the manifests on import, you can tweak capabilities without touching Python. Restart the server after editing the JSON files so changes are picked up. + To control ordering in auto mode or the `listmodels` summary, adjust the [`intelligence_score`](model_ranking.md) for each entry (or rely on the automatic heuristic described there). diff --git a/providers/gemini.py b/providers/gemini.py index a01df16..c51f96b 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -14,7 +14,8 @@ from utils.env import get_env from utils.image_utils import validate_image from .base import ModelProvider -from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint +from .gemini_registry import GeminiModelRegistry +from .shared import ModelCapabilities, ModelResponse, ProviderType logger = logging.getLogger(__name__) @@ -27,88 +28,8 @@ class GeminiModelProvider(ModelProvider): request to the Gemini APIs. """ - # Model configurations using ModelCapabilities objects - MODEL_CAPABILITIES = { - "gemini-2.5-pro": ModelCapabilities( - provider=ProviderType.GOOGLE, - model_name="gemini-2.5-pro", - friendly_name="Gemini (Pro 2.5)", - intelligence_score=18, - context_window=1_048_576, # 1M tokens - max_output_tokens=65_536, - supports_extended_thinking=True, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, # Vision capability - max_image_size_mb=32.0, # Higher limit for Pro model - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - max_thinking_tokens=32768, # Max thinking tokens for Pro model - description="Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis", - aliases=["pro", "gemini pro", "gemini-pro"], - ), - "gemini-2.0-flash": ModelCapabilities( - provider=ProviderType.GOOGLE, - model_name="gemini-2.0-flash", - friendly_name="Gemini (Flash 2.0)", - intelligence_score=9, - context_window=1_048_576, # 1M tokens - max_output_tokens=65_536, - supports_extended_thinking=True, # Experimental thinking mode - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, # Vision capability - max_image_size_mb=20.0, # Conservative 20MB limit for reliability - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - max_thinking_tokens=24576, # Same as 2.5 flash for consistency - description="Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input", - aliases=["flash-2.0", "flash2"], - ), - "gemini-2.0-flash-lite": ModelCapabilities( - provider=ProviderType.GOOGLE, - model_name="gemini-2.0-flash-lite", - friendly_name="Gemin (Flash Lite 2.0)", - intelligence_score=7, - context_window=1_048_576, # 1M tokens - max_output_tokens=65_536, - supports_extended_thinking=False, # Not supported per user request - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=False, # Does not support images - max_image_size_mb=0.0, # No image support - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - description="Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only", - aliases=["flashlite", "flash-lite"], - ), - "gemini-2.5-flash": ModelCapabilities( - provider=ProviderType.GOOGLE, - model_name="gemini-2.5-flash", - friendly_name="Gemini (Flash 2.5)", - intelligence_score=10, - context_window=1_048_576, # 1M tokens - max_output_tokens=65_536, - supports_extended_thinking=True, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, # Vision capability - max_image_size_mb=20.0, # Conservative 20MB limit for reliability - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - max_thinking_tokens=24576, # Flash 2.5 thinking budget limit - description="Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations", - aliases=["flash", "flash2.5"], - ), - } + MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {} + _registry: Optional[GeminiModelRegistry] = None # Thinking mode configurations - percentages of model's max_thinking_tokens # These percentages work across all models that support thinking @@ -130,11 +51,50 @@ class GeminiModelProvider(ModelProvider): def __init__(self, api_key: str, **kwargs): """Initialize Gemini provider with API key and optional base URL.""" + self._ensure_registry() super().__init__(api_key, **kwargs) self._client = None self._token_counters = {} # Cache for token counting self._base_url = kwargs.get("base_url", None) # Optional custom endpoint self._timeout_override = self._resolve_http_timeout() + self._invalidate_capability_cache() + + # ------------------------------------------------------------------ + # Registry access + # ------------------------------------------------------------------ + + @classmethod + def _ensure_registry(cls, *, force_reload: bool = False) -> None: + """Load capability registry into MODEL_CAPABILITIES.""" + + if cls._registry is not None and not force_reload: + return + + try: + registry = GeminiModelRegistry() + except Exception as exc: # pragma: no cover - defensive logging + logger.warning("Unable to load Gemini model registry: %s", exc) + cls._registry = None + cls.MODEL_CAPABILITIES = {} + return + + cls._registry = registry + cls.MODEL_CAPABILITIES = dict(registry.model_map) + + @classmethod + def reload_registry(cls) -> None: + """Force registry reload (primarily for tests).""" + + cls._ensure_registry(force_reload=True) + + def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]: + self._ensure_registry() + return super().get_all_model_capabilities() + + def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]: + if self._registry is None: + return None + return dict(self._registry.model_map) # ------------------------------------------------------------------ # Capability surface @@ -225,6 +185,7 @@ class GeminiModelProvider(ModelProvider): # Validate parameters and fetch capabilities self.validate_parameters(model_name, temperature) capabilities = self.get_capabilities(model_name) + capability_map = self.get_all_model_capabilities() resolved_model_name = self._resolve_model_name(model_name) @@ -269,7 +230,7 @@ class GeminiModelProvider(ModelProvider): # Add thinking configuration for models that support it if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS: # Get model's max thinking tokens and calculate actual budget - model_config = self.MODEL_CAPABILITIES.get(resolved_model_name) + model_config = capability_map.get(resolved_model_name) if model_config and model_config.max_thinking_tokens > 0: max_thinking_tokens = model_config.max_thinking_tokens actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode]) @@ -542,6 +503,8 @@ class GeminiModelProvider(ModelProvider): if not allowed_models: return None + capability_map = self.get_all_model_capabilities() + # Helper to find best model from candidates def find_best(candidates: list[str]) -> Optional[str]: """Return best model from candidates (sorted for consistency).""" @@ -553,16 +516,14 @@ class GeminiModelProvider(ModelProvider): pro_thinking = [ m for m in allowed_models - if "pro" in m and m in self.MODEL_CAPABILITIES and self.MODEL_CAPABILITIES[m].supports_extended_thinking + if "pro" in m and m in capability_map and capability_map[m].supports_extended_thinking ] if pro_thinking: return find_best(pro_thinking) # Then any model that supports thinking any_thinking = [ - m - for m in allowed_models - if m in self.MODEL_CAPABILITIES and self.MODEL_CAPABILITIES[m].supports_extended_thinking + m for m in allowed_models if m in capability_map and capability_map[m].supports_extended_thinking ] if any_thinking: return find_best(any_thinking) @@ -590,3 +551,7 @@ class GeminiModelProvider(ModelProvider): # Ultimate fallback to best available model return find_best(allowed_models) + + +# Load registry data at import time for registry consumers +GeminiModelProvider._ensure_registry() diff --git a/providers/gemini_registry.py b/providers/gemini_registry.py new file mode 100644 index 0000000..dd9e8fb --- /dev/null +++ b/providers/gemini_registry.py @@ -0,0 +1,19 @@ +"""Registry loader for Gemini model capabilities.""" + +from __future__ import annotations + +from .model_registry_base import CapabilityModelRegistry +from .shared import ProviderType + + +class GeminiModelRegistry(CapabilityModelRegistry): + """Capability registry backed by `conf/gemini_models.json`.""" + + def __init__(self, config_path: str | None = None) -> None: + super().__init__( + env_var_name="GEMINI_MODELS_CONFIG_PATH", + default_filename="gemini_models.json", + provider=ProviderType.GOOGLE, + friendly_prefix="Gemini ({model})", + config_path=config_path, + ) diff --git a/providers/model_registry_base.py b/providers/model_registry_base.py index 20f4dd9..c6dedec 100644 --- a/providers/model_registry_base.py +++ b/providers/model_registry_base.py @@ -85,6 +85,11 @@ class CustomModelRegistryBase: def get_entry(self, model_name: str) -> dict | None: return self._extras.get(model_name) + def get_model_config(self, model_name: str) -> ModelCapabilities | None: + """Backwards-compatible accessor for registries expecting this helper.""" + + return self.model_map.get(model_name) or self.resolve(model_name) + def iter_entries(self) -> Iterable[tuple[str, ModelCapabilities, dict]]: for model_name, capability in self.model_map.items(): yield model_name, capability, self._extras.get(model_name, {}) diff --git a/providers/openai_provider.py b/providers/openai_provider.py index 32efe91..9d72ec9 100644 --- a/providers/openai_provider.py +++ b/providers/openai_provider.py @@ -7,7 +7,8 @@ if TYPE_CHECKING: from tools.models import ToolModelCategory from .openai_compatible import OpenAICompatibleProvider -from .shared import ModelCapabilities, ProviderType, TemperatureConstraint +from .openai_registry import OpenAIModelRegistry +from .shared import ModelCapabilities, ProviderType logger = logging.getLogger(__name__) @@ -20,208 +21,53 @@ class OpenAIModelProvider(OpenAICompatibleProvider): OpenAI-compatible gateways) while still respecting restriction policies. """ - # Model configurations using ModelCapabilities objects - MODEL_CAPABILITIES = { - "gpt-5": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="gpt-5", - friendly_name="OpenAI (GPT-5)", - intelligence_score=16, - context_window=400_000, # 400K tokens - max_output_tokens=128_000, # 128K max output tokens - supports_extended_thinking=True, # Supports reasoning tokens - supports_system_prompts=True, - supports_streaming=False, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, # GPT-5 supports vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=True, # Regular models accept temperature parameter - temperature_constraint=TemperatureConstraint.create("fixed"), - description="GPT-5 (400K context, 128K output) - Advanced model with reasoning support", - aliases=["gpt5", "gpt-5"], - ), - "gpt-5-pro": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="gpt-5-pro", - friendly_name="OpenAI (GPT-5 Pro)", - intelligence_score=18, - use_openai_response_api=True, - context_window=400_000, - max_output_tokens=272_000, - supports_extended_thinking=True, - supports_system_prompts=True, - supports_streaming=False, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, - max_image_size_mb=20.0, - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("fixed"), - default_reasoning_effort="high", - description="GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support", - aliases=["gpt5pro", "gpt5-pro"], - ), - "gpt-5-mini": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="gpt-5-mini", - friendly_name="OpenAI (GPT-5-mini)", - intelligence_score=15, - context_window=400_000, # 400K tokens - max_output_tokens=128_000, # 128K max output tokens - supports_extended_thinking=True, # Supports reasoning tokens - supports_system_prompts=True, - supports_streaming=False, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, # GPT-5-mini supports vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("fixed"), - description="GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support", - aliases=["gpt5-mini", "gpt5mini", "mini"], - ), - "gpt-5-nano": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="gpt-5-nano", - friendly_name="OpenAI (GPT-5 nano)", - intelligence_score=13, - context_window=400_000, - max_output_tokens=128_000, - supports_extended_thinking=True, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, - max_image_size_mb=20.0, - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("fixed"), - description="GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks", - aliases=["gpt5nano", "gpt5-nano", "nano"], - ), - "o3": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="o3", - friendly_name="OpenAI (O3)", - intelligence_score=14, - context_window=200_000, # 200K tokens - max_output_tokens=65536, # 64K max output tokens - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, # O3 models support vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=False, # O3 models don't accept temperature parameter - temperature_constraint=TemperatureConstraint.create("fixed"), - description="Strong reasoning (200K context) - Logical problems, code generation, systematic analysis", - aliases=[], - ), - "o3-mini": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="o3-mini", - friendly_name="OpenAI (O3-mini)", - intelligence_score=12, - context_window=200_000, - max_output_tokens=65536, - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, - max_image_size_mb=20.0, - supports_temperature=False, - temperature_constraint=TemperatureConstraint.create("fixed"), - description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity", - aliases=["o3mini"], - ), - "o3-pro": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="o3-pro", - friendly_name="OpenAI (O3-Pro)", - intelligence_score=15, - context_window=200_000, - max_output_tokens=65536, - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, - max_image_size_mb=20.0, - supports_temperature=False, - temperature_constraint=TemperatureConstraint.create("fixed"), - description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.", - aliases=["o3pro"], - use_openai_response_api=True, - ), - "o4-mini": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="o4-mini", - friendly_name="OpenAI (O4-mini)", - intelligence_score=11, - context_window=200_000, - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, - max_image_size_mb=20.0, - supports_temperature=False, - temperature_constraint=TemperatureConstraint.create("fixed"), - description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning", - aliases=["o4mini"], - ), - "gpt-4.1": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="gpt-4.1", - friendly_name="OpenAI (GPT 4.1)", - intelligence_score=13, - context_window=1_000_000, - max_output_tokens=32_768, - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, - max_image_size_mb=20.0, - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - description="GPT-4.1 (1M context) - Advanced reasoning model with large context window", - aliases=["gpt4.1"], - ), - "gpt-5-codex": ModelCapabilities( - provider=ProviderType.OPENAI, - model_name="gpt-5-codex", - friendly_name="OpenAI (GPT-5 Codex)", - intelligence_score=17, - context_window=400_000, - max_output_tokens=128_000, - supports_extended_thinking=True, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=True, - supports_images=True, - max_image_size_mb=20.0, - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - description="GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.", - aliases=["gpt5-codex", "codex", "gpt-5-code", "gpt5-code"], - use_openai_response_api=True, - ), - } + MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {} + _registry: Optional[OpenAIModelRegistry] = None def __init__(self, api_key: str, **kwargs): """Initialize OpenAI provider with API key.""" + self._ensure_registry() # Set default OpenAI base URL, allow override for regions/custom endpoints kwargs.setdefault("base_url", "https://api.openai.com/v1") super().__init__(api_key, **kwargs) + self._invalidate_capability_cache() + + # ------------------------------------------------------------------ + # Registry access + # ------------------------------------------------------------------ + + @classmethod + def _ensure_registry(cls, *, force_reload: bool = False) -> None: + """Load capability registry into MODEL_CAPABILITIES.""" + + if cls._registry is not None and not force_reload: + return + + try: + registry = OpenAIModelRegistry() + except Exception as exc: # pragma: no cover - defensive logging + logger.warning("Unable to load OpenAI model registry: %s", exc) + cls._registry = None + cls.MODEL_CAPABILITIES = {} + return + + cls._registry = registry + cls.MODEL_CAPABILITIES = dict(registry.model_map) + + @classmethod + def reload_registry(cls) -> None: + """Force registry reload (primarily for tests).""" + + cls._ensure_registry(force_reload=True) + + def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]: + self._ensure_registry() + return super().get_all_model_capabilities() + + def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]: + if self._registry is None: + return None + return dict(self._registry.model_map) # ------------------------------------------------------------------ # Capability surface @@ -234,6 +80,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): ) -> Optional[ModelCapabilities]: """Look up OpenAI capabilities from built-ins or the custom registry.""" + self._ensure_registry() builtin = super()._lookup_capabilities(canonical_name, requested_name) if builtin is not None: return builtin @@ -319,3 +166,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): # Include GPT-5-Codex for coding workflows preferred = find_first(["gpt-5", "gpt-5-codex", "gpt-5-pro", "gpt-5-mini", "o4-mini", "o3-mini"]) return preferred if preferred else allowed_models[0] + + +# Load registry data at import time so dependent providers (Azure) can reuse it +OpenAIModelProvider._ensure_registry() diff --git a/providers/openai_registry.py b/providers/openai_registry.py new file mode 100644 index 0000000..859547f --- /dev/null +++ b/providers/openai_registry.py @@ -0,0 +1,19 @@ +"""Registry loader for OpenAI model capabilities.""" + +from __future__ import annotations + +from .model_registry_base import CapabilityModelRegistry +from .shared import ProviderType + + +class OpenAIModelRegistry(CapabilityModelRegistry): + """Capability registry backed by `conf/openai_models.json`.""" + + def __init__(self, config_path: str | None = None) -> None: + super().__init__( + env_var_name="OPENAI_MODELS_CONFIG_PATH", + default_filename="openai_models.json", + provider=ProviderType.OPENAI, + friendly_prefix="OpenAI ({model})", + config_path=config_path, + ) diff --git a/providers/xai.py b/providers/xai.py index 4e29cd4..c51e2bb 100644 --- a/providers/xai.py +++ b/providers/xai.py @@ -7,7 +7,8 @@ if TYPE_CHECKING: from tools.models import ToolModelCategory from .openai_compatible import OpenAICompatibleProvider -from .shared import ModelCapabilities, ProviderType, TemperatureConstraint +from .shared import ModelCapabilities, ProviderType +from .xai_registry import XAIModelRegistry logger = logging.getLogger(__name__) @@ -21,72 +22,53 @@ class XAIModelProvider(OpenAICompatibleProvider): FRIENDLY_NAME = "X.AI" - # Model configurations using ModelCapabilities objects - MODEL_CAPABILITIES = { - "grok-4": ModelCapabilities( - provider=ProviderType.XAI, - model_name="grok-4", - friendly_name="X.AI (Grok 4)", - intelligence_score=16, - context_window=256_000, # 256K tokens - max_output_tokens=256_000, # 256K tokens max output - supports_extended_thinking=True, # Grok-4 supports reasoning mode - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, # Function calling supported - supports_json_mode=True, # Structured outputs supported - supports_images=True, # Multimodal capabilities - max_image_size_mb=20.0, # Standard image size limit - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - description="GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities", - aliases=["grok", "grok4", "grok-4"], - ), - "grok-3": ModelCapabilities( - provider=ProviderType.XAI, - model_name="grok-3", - friendly_name="X.AI (Grok 3)", - intelligence_score=13, - context_window=131_072, # 131K tokens - max_output_tokens=131072, - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet - supports_images=False, # Assuming GROK is text-only for now - max_image_size_mb=0.0, - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - description="GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis", - aliases=["grok3"], - ), - "grok-3-fast": ModelCapabilities( - provider=ProviderType.XAI, - model_name="grok-3-fast", - friendly_name="X.AI (Grok 3 Fast)", - intelligence_score=12, - context_window=131_072, # 131K tokens - max_output_tokens=131072, - supports_extended_thinking=False, - supports_system_prompts=True, - supports_streaming=True, - supports_function_calling=True, - supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet - supports_images=False, # Assuming GROK is text-only for now - max_image_size_mb=0.0, - supports_temperature=True, - temperature_constraint=TemperatureConstraint.create("range"), - description="GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive", - aliases=["grok3fast", "grokfast", "grok3-fast"], - ), - } + MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {} + _registry: Optional[XAIModelRegistry] = None def __init__(self, api_key: str, **kwargs): """Initialize X.AI provider with API key.""" # Set X.AI base URL kwargs.setdefault("base_url", "https://api.x.ai/v1") + self._ensure_registry() super().__init__(api_key, **kwargs) + self._invalidate_capability_cache() + + # ------------------------------------------------------------------ + # Registry access + # ------------------------------------------------------------------ + + @classmethod + def _ensure_registry(cls, *, force_reload: bool = False) -> None: + """Load capability registry into MODEL_CAPABILITIES.""" + + if cls._registry is not None and not force_reload: + return + + try: + registry = XAIModelRegistry() + except Exception as exc: # pragma: no cover - defensive logging + logger.warning("Unable to load X.AI model registry: %s", exc) + cls._registry = None + cls.MODEL_CAPABILITIES = {} + return + + cls._registry = registry + cls.MODEL_CAPABILITIES = dict(registry.model_map) + + @classmethod + def reload_registry(cls) -> None: + """Force registry reload (primarily for tests).""" + + cls._ensure_registry(force_reload=True) + + def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]: + self._ensure_registry() + return super().get_all_model_capabilities() + + def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]: + if self._registry is None: + return None + return dict(self._registry.model_map) def get_provider_type(self) -> ProviderType: """Get the provider type.""" @@ -135,3 +117,7 @@ class XAIModelProvider(OpenAICompatibleProvider): return "grok-3-fast" # Fall back to any available model return allowed_models[0] + + +# Load registry data at import time +XAIModelProvider._ensure_registry() diff --git a/providers/xai_registry.py b/providers/xai_registry.py new file mode 100644 index 0000000..80da85e --- /dev/null +++ b/providers/xai_registry.py @@ -0,0 +1,19 @@ +"""Registry loader for X.AI (GROK) model capabilities.""" + +from __future__ import annotations + +from .model_registry_base import CapabilityModelRegistry +from .shared import ProviderType + + +class XAIModelRegistry(CapabilityModelRegistry): + """Capability registry backed by `conf/xai_models.json`.""" + + def __init__(self, config_path: str | None = None) -> None: + super().__init__( + env_var_name="XAI_MODELS_CONFIG_PATH", + default_filename="xai_models.json", + provider=ProviderType.XAI, + friendly_prefix="X.AI ({model})", + config_path=config_path, + )