feat: all native providers now read from catalog files like OpenRouter / Custom configs. Allows for greater control over the capabilities

This commit is contained in:
Fahad
2025-10-07 12:17:47 +04:00
parent 7d7c74b5a3
commit 2a706d5720
13 changed files with 704 additions and 397 deletions

View File

@@ -18,6 +18,7 @@
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the deployment must call Azure's /responses endpoint (O-series reasoning models). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}

111
conf/gemini_models.json Normal file
View File

@@ -0,0 +1,111 @@
{
"_README": {
"description": "Model metadata for Google's Gemini API access.",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
"usage": "Models listed here are exposed directly through the Gemini provider. Aliases are case-insensitive.",
"field_notes": "Matches providers/shared/model_capabilities.py.",
"field_descriptions": {
"model_name": "The model identifier (e.g., 'gemini-2.5-pro', 'gemini-2.0-flash')",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}
},
"models": [
{
"model_name": "gemini-2.5-pro",
"friendly_name": "Gemini (Pro 2.5)",
"aliases": [
"pro",
"gemini pro",
"gemini-pro"
],
"intelligence_score": 18,
"description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
"context_window": 1048576,
"max_output_tokens": 65536,
"max_thinking_tokens": 32768,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 32.0
},
{
"model_name": "gemini-2.0-flash",
"friendly_name": "Gemini (Flash 2.0)",
"aliases": [
"flash-2.0",
"flash2"
],
"intelligence_score": 9,
"description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
"context_window": 1048576,
"max_output_tokens": 65536,
"max_thinking_tokens": 24576,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
},
{
"model_name": "gemini-2.0-flash-lite",
"friendly_name": "Gemini (Flash Lite 2.0)",
"aliases": [
"flashlite",
"flash-lite"
],
"intelligence_score": 7,
"description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
"context_window": 1048576,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": false,
"supports_temperature": true
},
{
"model_name": "gemini-2.5-flash",
"friendly_name": "Gemini (Flash 2.5)",
"aliases": [
"flash",
"flash2.5"
],
"intelligence_score": 10,
"description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
"context_window": 1048576,
"max_output_tokens": 65536,
"max_thinking_tokens": 24576,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
}
]
}

235
conf/openai_models.json Normal file
View File

@@ -0,0 +1,235 @@
{
"_README": {
"description": "Model metadata for native OpenAI API access.",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
"usage": "Models listed here are exposed directly through the OpenAI provider. Aliases are case-insensitive.",
"field_notes": "Matches providers/shared/model_capabilities.py.",
"field_descriptions": {
"model_name": "The model identifier (e.g., 'gpt-5', 'o3-pro')",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}
},
"models": [
{
"model_name": "gpt-5",
"friendly_name": "OpenAI (GPT-5)",
"aliases": [
"gpt5",
"gpt-5"
],
"intelligence_score": 16,
"description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": false,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-5-pro",
"friendly_name": "OpenAI (GPT-5 Pro)",
"aliases": [
"gpt5pro",
"gpt5-pro"
],
"intelligence_score": 18,
"description": "GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support",
"context_window": 400000,
"max_output_tokens": 272000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": false,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"use_openai_response_api": true,
"default_reasoning_effort": "high",
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-5-mini",
"friendly_name": "OpenAI (GPT-5-mini)",
"aliases": [
"gpt5-mini",
"gpt5mini",
"mini"
],
"intelligence_score": 15,
"description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": false,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-5-nano",
"friendly_name": "OpenAI (GPT-5 nano)",
"aliases": [
"gpt5nano",
"gpt5-nano",
"nano"
],
"intelligence_score": 13,
"description": "GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "o3",
"friendly_name": "OpenAI (O3)",
"intelligence_score": 14,
"description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
"context_window": 200000,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "o3-mini",
"friendly_name": "OpenAI (O3-mini)",
"aliases": [
"o3mini"
],
"intelligence_score": 12,
"description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
"context_window": 200000,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "o3-pro",
"friendly_name": "OpenAI (O3-Pro)",
"aliases": [
"o3pro"
],
"intelligence_score": 15,
"description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
"context_window": 200000,
"max_output_tokens": 65536,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"use_openai_response_api": true,
"temperature_constraint": "fixed"
},
{
"model_name": "o4-mini",
"friendly_name": "OpenAI (O4-mini)",
"aliases": [
"o4mini"
],
"intelligence_score": 11,
"description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
"context_window": 200000,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": false,
"max_image_size_mb": 20.0,
"temperature_constraint": "fixed"
},
{
"model_name": "gpt-4.1",
"friendly_name": "OpenAI (GPT 4.1)",
"aliases": [
"gpt4.1"
],
"intelligence_score": 13,
"description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window",
"context_window": 1000000,
"max_output_tokens": 32768,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
},
{
"model_name": "gpt-5-codex",
"friendly_name": "OpenAI (GPT-5 Codex)",
"aliases": [
"gpt5-codex",
"codex",
"gpt-5-code",
"gpt5-code"
],
"intelligence_score": 17,
"description": "GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0,
"use_openai_response_api": true
}
]
}

87
conf/xai_models.json Normal file
View File

@@ -0,0 +1,87 @@
{
"_README": {
"description": "Model metadata for X.AI (GROK) API access.",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
"usage": "Models listed here are exposed directly through the X.AI provider. Aliases are case-insensitive.",
"field_notes": "Matches providers/shared/model_capabilities.py.",
"field_descriptions": {
"model_name": "The model identifier (e.g., 'grok-4', 'grok-3-fast')",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
}
},
"models": [
{
"model_name": "grok-4",
"friendly_name": "X.AI (Grok 4)",
"aliases": [
"grok",
"grok4",
"grok-4"
],
"intelligence_score": 16,
"description": "GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities",
"context_window": 256000,
"max_output_tokens": 256000,
"supports_extended_thinking": true,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": true,
"supports_images": true,
"supports_temperature": true,
"max_image_size_mb": 20.0
},
{
"model_name": "grok-3",
"friendly_name": "X.AI (Grok 3)",
"aliases": [
"grok3"
],
"intelligence_score": 13,
"description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
"context_window": 131072,
"max_output_tokens": 131072,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": false,
"supports_images": false,
"supports_temperature": true
},
{
"model_name": "grok-3-fast",
"friendly_name": "X.AI (Grok 3 Fast)",
"aliases": [
"grok3fast",
"grokfast",
"grok3-fast"
],
"intelligence_score": 12,
"description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
"context_window": 131072,
"max_output_tokens": 131072,
"supports_extended_thinking": false,
"supports_system_prompts": true,
"supports_streaming": true,
"supports_function_calling": true,
"supports_json_mode": false,
"supports_images": false,
"supports_temperature": true
}
]
}

View File

@@ -67,16 +67,26 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model
DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
```
**Available Models:**
- **`auto`**: Claude automatically selects the optimal model
- **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis
- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses
- **`o3`**: Strong logical reasoning (200K context)
- **`o3-mini`**: Balanced speed/quality (200K context)
- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts
- **`grok-3`**: GROK-3 advanced reasoning (131K context)
- **`grok-4`**: GROK-4 flagship model (256K context)
- **Custom models**: via OpenRouter or local APIs
- **Available Models:** The canonical capability data for native providers lives in JSON manifests under `conf/`:
- `conf/openai_models.json` OpenAI catalogue (can be overridden with `OPENAI_MODELS_CONFIG_PATH`)
- `conf/gemini_models.json` Gemini catalogue (`GEMINI_MODELS_CONFIG_PATH`)
- `conf/xai_models.json` X.AI / GROK catalogue (`XAI_MODELS_CONFIG_PATH`)
- `conf/openrouter_models.json` OpenRouter catalogue (`OPENROUTER_MODELS_CONFIG_PATH`)
- `conf/custom_models.json` Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`)
Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags. Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, or expose additional aliases without touching Python code.
The shipped defaults cover:
| Provider | Canonical Models | Notable Aliases |
|----------|-----------------|-----------------|
| OpenAI | `gpt-5`, `gpt-5-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` |
| Gemini | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-2.0-flash-lite` | `pro`, `gemini-pro`, `flash`, `flash-2.0`, `flashlite` |
| X.AI | `grok-4`, `grok-3`, `grok-3-fast` | `grok`, `grok4`, `grok3`, `grok3fast`, `grokfast` |
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support) without editing Python.
### Thinking Mode Configuration
@@ -114,28 +124,11 @@ XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4
OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
```
**Supported Model Names:**
**Supported Model Names:** The names/aliases listed in the JSON manifests above are the authoritative source. Keep in mind:
**OpenAI Models:**
- `o3` (200K context, high reasoning)
- `o3-mini` (200K context, balanced)
- `o4-mini` (200K context, latest balanced)
- `mini` (shorthand for o4-mini)
**Gemini Models:**
- `gemini-2.5-flash` (1M context, fast)
- `gemini-2.5-pro` (1M context, powerful)
- `flash` (shorthand for Flash model)
- `pro` (shorthand for Pro model)
**X.AI GROK Models:**
- `grok-4` (256K context, flagship Grok model with reasoning, vision, and structured outputs)
- `grok-3` (131K context, advanced reasoning)
- `grok-3-fast` (131K context, higher performance)
- `grok` (shorthand for grok-4)
- `grok4` (shorthand for grok-4)
- `grok3` (shorthand for grok-3)
- `grokfast` (shorthand for grok-3-fast)
- Aliases are case-insensitive and defined per entry (for example, `mini` maps to `gpt-5-mini` by default, while `flash` maps to `gemini-2.5-flash`).
- When you override the manifest files you can add or remove aliases as needed; restriction policies (`*_ALLOWED_MODELS`) automatically pick up those changes.
- Models omitted from a manifest fall back to generic capability detection (where supported) and may have limited feature metadata.
**Example Configurations:**
```env
@@ -154,12 +147,14 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast
### Advanced Configuration
**Custom Model Configuration:**
**Custom Model Configuration & Manifest Overrides:**
```env
# Override default location of custom_models.json
CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
# Override default location of openrouter_models.json
OPENROUTER_MODELS_CONFIG_PATH=/path/to/your/openrouter_models.json
# Override default location of built-in catalogues
OPENAI_MODELS_CONFIG_PATH=/path/to/openai_models.json
GEMINI_MODELS_CONFIG_PATH=/path/to/gemini_models.json
XAI_MODELS_CONFIG_PATH=/path/to/xai_models.json
OPENROUTER_MODELS_CONFIG_PATH=/path/to/openrouter_models.json
CUSTOM_MODELS_CONFIG_PATH=/path/to/custom_models.json
```
**Conversation Settings:**

View File

@@ -35,27 +35,33 @@ This guide covers setting up multiple AI model providers including OpenRouter, c
## Model Aliases
Zen ships two registries:
Zen ships multiple registries:
- `conf/openrouter_models.json` metadata for models routed through OpenRouter. Override with `OPENROUTER_MODELS_CONFIG_PATH` if you maintain a custom copy.
- `conf/custom_models.json` metadata for local or self-hosted OpenAI-compatible endpoints used by the Custom provider. Override with `CUSTOM_MODELS_CONFIG_PATH` if needed.
- `conf/openai_models.json` native OpenAI catalogue (override with `OPENAI_MODELS_CONFIG_PATH`)
- `conf/gemini_models.json` native Google Gemini catalogue (`GEMINI_MODELS_CONFIG_PATH`)
- `conf/xai_models.json` native X.AI / GROK catalogue (`XAI_MODELS_CONFIG_PATH`)
- `conf/openrouter_models.json` OpenRouter catalogue (`OPENROUTER_MODELS_CONFIG_PATH`)
- `conf/custom_models.json` local/self-hosted OpenAI-compatible catalogue (`CUSTOM_MODELS_CONFIG_PATH`)
Copy whichever file you need into your project (or point the corresponding `*_MODELS_CONFIG_PATH` env var at your own copy) and edit it to advertise the models you want.
### OpenRouter Models (Cloud)
| Alias | Maps to OpenRouter Model |
|-------|-------------------------|
| `opus` | `anthropic/claude-opus-4` |
| `sonnet`, `claude` | `anthropic/claude-sonnet-4` |
| `haiku` | `anthropic/claude-3.5-haiku` |
| `gpt4o`, `4o` | `openai/gpt-4o` |
| `gpt4o-mini`, `4o-mini` | `openai/gpt-4o-mini` |
| `pro`, `gemini` | `google/gemini-2.5-pro` |
| `flash` | `google/gemini-2.5-flash` |
| `mistral` | `mistral/mistral-large` |
| `deepseek`, `coder` | `deepseek/deepseek-coder` |
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` |
The curated defaults in `conf/openrouter_models.json` include popular entries such as:
| Alias | Canonical Model | Highlights |
|-------|-----------------|------------|
| `opus`, `claude-opus` | `anthropic/claude-opus-4.1` | Flagship Claude reasoning model with vision |
| `sonnet`, `sonnet4.5` | `anthropic/claude-sonnet-4.5` | Balanced Claude with high context window |
| `haiku` | `anthropic/claude-3.5-haiku` | Fast Claude option with vision |
| `pro`, `gemini` | `google/gemini-2.5-pro` | Frontier Gemini with extended thinking |
| `flash` | `google/gemini-2.5-flash` | Ultra-fast Gemini with vision |
| `mistral` | `mistralai/mistral-large-2411` | Frontier Mistral (text only) |
| `llama3` | `meta-llama/llama-3-70b` | Large open-weight text model |
| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model |
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | Search-augmented model |
Consult the JSON file for the full list, aliases, and capability flags. Add new entries as OpenRouter releases additional models.
### Custom/Local Models
@@ -65,6 +71,14 @@ Copy whichever file you need into your project (or point the corresponding `*_MO
View the baseline OpenRouter catalogue in [`conf/openrouter_models.json`](conf/openrouter_models.json) and populate [`conf/custom_models.json`](conf/custom_models.json) with your local models.
Native catalogues (`conf/openai_models.json`, `conf/gemini_models.json`, `conf/xai_models.json`) follow the same schema. Updating those files lets you:
- Expose new aliases (e.g., map `enterprise-pro` to `gpt-5-pro`)
- Advertise support for JSON mode or vision if the upstream provider adds it
- Adjust token limits when providers increase context windows
Because providers load the manifests on import, you can tweak capabilities without touching Python. Restart the server after editing the JSON files so changes are picked up.
To control ordering in auto mode or the `listmodels` summary, adjust the
[`intelligence_score`](model_ranking.md) for each entry (or rely on the automatic
heuristic described there).

View File

@@ -14,7 +14,8 @@ from utils.env import get_env
from utils.image_utils import validate_image
from .base import ModelProvider
from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint
from .gemini_registry import GeminiModelRegistry
from .shared import ModelCapabilities, ModelResponse, ProviderType
logger = logging.getLogger(__name__)
@@ -27,88 +28,8 @@ class GeminiModelProvider(ModelProvider):
request to the Gemini APIs.
"""
# Model configurations using ModelCapabilities objects
MODEL_CAPABILITIES = {
"gemini-2.5-pro": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.5-pro",
friendly_name="Gemini (Pro 2.5)",
intelligence_score=18,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # Vision capability
max_image_size_mb=32.0, # Higher limit for Pro model
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
max_thinking_tokens=32768, # Max thinking tokens for Pro model
description="Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
aliases=["pro", "gemini pro", "gemini-pro"],
),
"gemini-2.0-flash": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.0-flash",
friendly_name="Gemini (Flash 2.0)",
intelligence_score=9,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True, # Experimental thinking mode
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # Vision capability
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
max_thinking_tokens=24576, # Same as 2.5 flash for consistency
description="Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
aliases=["flash-2.0", "flash2"],
),
"gemini-2.0-flash-lite": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.0-flash-lite",
friendly_name="Gemin (Flash Lite 2.0)",
intelligence_score=7,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=False, # Not supported per user request
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=False, # Does not support images
max_image_size_mb=0.0, # No image support
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
description="Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
aliases=["flashlite", "flash-lite"],
),
"gemini-2.5-flash": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.5-flash",
friendly_name="Gemini (Flash 2.5)",
intelligence_score=10,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # Vision capability
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
max_thinking_tokens=24576, # Flash 2.5 thinking budget limit
description="Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
aliases=["flash", "flash2.5"],
),
}
MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {}
_registry: Optional[GeminiModelRegistry] = None
# Thinking mode configurations - percentages of model's max_thinking_tokens
# These percentages work across all models that support thinking
@@ -130,11 +51,50 @@ class GeminiModelProvider(ModelProvider):
def __init__(self, api_key: str, **kwargs):
"""Initialize Gemini provider with API key and optional base URL."""
self._ensure_registry()
super().__init__(api_key, **kwargs)
self._client = None
self._token_counters = {} # Cache for token counting
self._base_url = kwargs.get("base_url", None) # Optional custom endpoint
self._timeout_override = self._resolve_http_timeout()
self._invalidate_capability_cache()
# ------------------------------------------------------------------
# Registry access
# ------------------------------------------------------------------
@classmethod
def _ensure_registry(cls, *, force_reload: bool = False) -> None:
"""Load capability registry into MODEL_CAPABILITIES."""
if cls._registry is not None and not force_reload:
return
try:
registry = GeminiModelRegistry()
except Exception as exc: # pragma: no cover - defensive logging
logger.warning("Unable to load Gemini model registry: %s", exc)
cls._registry = None
cls.MODEL_CAPABILITIES = {}
return
cls._registry = registry
cls.MODEL_CAPABILITIES = dict(registry.model_map)
@classmethod
def reload_registry(cls) -> None:
"""Force registry reload (primarily for tests)."""
cls._ensure_registry(force_reload=True)
def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]:
self._ensure_registry()
return super().get_all_model_capabilities()
def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]:
if self._registry is None:
return None
return dict(self._registry.model_map)
# ------------------------------------------------------------------
# Capability surface
@@ -225,6 +185,7 @@ class GeminiModelProvider(ModelProvider):
# Validate parameters and fetch capabilities
self.validate_parameters(model_name, temperature)
capabilities = self.get_capabilities(model_name)
capability_map = self.get_all_model_capabilities()
resolved_model_name = self._resolve_model_name(model_name)
@@ -269,7 +230,7 @@ class GeminiModelProvider(ModelProvider):
# Add thinking configuration for models that support it
if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS:
# Get model's max thinking tokens and calculate actual budget
model_config = self.MODEL_CAPABILITIES.get(resolved_model_name)
model_config = capability_map.get(resolved_model_name)
if model_config and model_config.max_thinking_tokens > 0:
max_thinking_tokens = model_config.max_thinking_tokens
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
@@ -542,6 +503,8 @@ class GeminiModelProvider(ModelProvider):
if not allowed_models:
return None
capability_map = self.get_all_model_capabilities()
# Helper to find best model from candidates
def find_best(candidates: list[str]) -> Optional[str]:
"""Return best model from candidates (sorted for consistency)."""
@@ -553,16 +516,14 @@ class GeminiModelProvider(ModelProvider):
pro_thinking = [
m
for m in allowed_models
if "pro" in m and m in self.MODEL_CAPABILITIES and self.MODEL_CAPABILITIES[m].supports_extended_thinking
if "pro" in m and m in capability_map and capability_map[m].supports_extended_thinking
]
if pro_thinking:
return find_best(pro_thinking)
# Then any model that supports thinking
any_thinking = [
m
for m in allowed_models
if m in self.MODEL_CAPABILITIES and self.MODEL_CAPABILITIES[m].supports_extended_thinking
m for m in allowed_models if m in capability_map and capability_map[m].supports_extended_thinking
]
if any_thinking:
return find_best(any_thinking)
@@ -590,3 +551,7 @@ class GeminiModelProvider(ModelProvider):
# Ultimate fallback to best available model
return find_best(allowed_models)
# Load registry data at import time for registry consumers
GeminiModelProvider._ensure_registry()

View File

@@ -0,0 +1,19 @@
"""Registry loader for Gemini model capabilities."""
from __future__ import annotations
from .model_registry_base import CapabilityModelRegistry
from .shared import ProviderType
class GeminiModelRegistry(CapabilityModelRegistry):
"""Capability registry backed by `conf/gemini_models.json`."""
def __init__(self, config_path: str | None = None) -> None:
super().__init__(
env_var_name="GEMINI_MODELS_CONFIG_PATH",
default_filename="gemini_models.json",
provider=ProviderType.GOOGLE,
friendly_prefix="Gemini ({model})",
config_path=config_path,
)

View File

@@ -85,6 +85,11 @@ class CustomModelRegistryBase:
def get_entry(self, model_name: str) -> dict | None:
return self._extras.get(model_name)
def get_model_config(self, model_name: str) -> ModelCapabilities | None:
"""Backwards-compatible accessor for registries expecting this helper."""
return self.model_map.get(model_name) or self.resolve(model_name)
def iter_entries(self) -> Iterable[tuple[str, ModelCapabilities, dict]]:
for model_name, capability in self.model_map.items():
yield model_name, capability, self._extras.get(model_name, {})

View File

@@ -7,7 +7,8 @@ if TYPE_CHECKING:
from tools.models import ToolModelCategory
from .openai_compatible import OpenAICompatibleProvider
from .shared import ModelCapabilities, ProviderType, TemperatureConstraint
from .openai_registry import OpenAIModelRegistry
from .shared import ModelCapabilities, ProviderType
logger = logging.getLogger(__name__)
@@ -20,208 +21,53 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
OpenAI-compatible gateways) while still respecting restriction policies.
"""
# Model configurations using ModelCapabilities objects
MODEL_CAPABILITIES = {
"gpt-5": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="gpt-5",
friendly_name="OpenAI (GPT-5)",
intelligence_score=16,
context_window=400_000, # 400K tokens
max_output_tokens=128_000, # 128K max output tokens
supports_extended_thinking=True, # Supports reasoning tokens
supports_system_prompts=True,
supports_streaming=False,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # GPT-5 supports vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=True, # Regular models accept temperature parameter
temperature_constraint=TemperatureConstraint.create("fixed"),
description="GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
aliases=["gpt5", "gpt-5"],
),
"gpt-5-pro": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="gpt-5-pro",
friendly_name="OpenAI (GPT-5 Pro)",
intelligence_score=18,
use_openai_response_api=True,
context_window=400_000,
max_output_tokens=272_000,
supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=False,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("fixed"),
default_reasoning_effort="high",
description="GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support",
aliases=["gpt5pro", "gpt5-pro"],
),
"gpt-5-mini": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="gpt-5-mini",
friendly_name="OpenAI (GPT-5-mini)",
intelligence_score=15,
context_window=400_000, # 400K tokens
max_output_tokens=128_000, # 128K max output tokens
supports_extended_thinking=True, # Supports reasoning tokens
supports_system_prompts=True,
supports_streaming=False,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # GPT-5-mini supports vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
aliases=["gpt5-mini", "gpt5mini", "mini"],
),
"gpt-5-nano": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="gpt-5-nano",
friendly_name="OpenAI (GPT-5 nano)",
intelligence_score=13,
context_window=400_000,
max_output_tokens=128_000,
supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
aliases=["gpt5nano", "gpt5-nano", "nano"],
),
"o3": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o3",
friendly_name="OpenAI (O3)",
intelligence_score=14,
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # O3 models support vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=False, # O3 models don't accept temperature parameter
temperature_constraint=TemperatureConstraint.create("fixed"),
description="Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
aliases=[],
),
"o3-mini": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o3-mini",
friendly_name="OpenAI (O3-mini)",
intelligence_score=12,
context_window=200_000,
max_output_tokens=65536,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=False,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
aliases=["o3mini"],
),
"o3-pro": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o3-pro",
friendly_name="OpenAI (O3-Pro)",
intelligence_score=15,
context_window=200_000,
max_output_tokens=65536,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=False,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
aliases=["o3pro"],
use_openai_response_api=True,
),
"o4-mini": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o4-mini",
friendly_name="OpenAI (O4-mini)",
intelligence_score=11,
context_window=200_000,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=False,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
aliases=["o4mini"],
),
"gpt-4.1": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="gpt-4.1",
friendly_name="OpenAI (GPT 4.1)",
intelligence_score=13,
context_window=1_000_000,
max_output_tokens=32_768,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
description="GPT-4.1 (1M context) - Advanced reasoning model with large context window",
aliases=["gpt4.1"],
),
"gpt-5-codex": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="gpt-5-codex",
friendly_name="OpenAI (GPT-5 Codex)",
intelligence_score=17,
context_window=400_000,
max_output_tokens=128_000,
supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
description="GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.",
aliases=["gpt5-codex", "codex", "gpt-5-code", "gpt5-code"],
use_openai_response_api=True,
),
}
MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {}
_registry: Optional[OpenAIModelRegistry] = None
def __init__(self, api_key: str, **kwargs):
"""Initialize OpenAI provider with API key."""
self._ensure_registry()
# Set default OpenAI base URL, allow override for regions/custom endpoints
kwargs.setdefault("base_url", "https://api.openai.com/v1")
super().__init__(api_key, **kwargs)
self._invalidate_capability_cache()
# ------------------------------------------------------------------
# Registry access
# ------------------------------------------------------------------
@classmethod
def _ensure_registry(cls, *, force_reload: bool = False) -> None:
"""Load capability registry into MODEL_CAPABILITIES."""
if cls._registry is not None and not force_reload:
return
try:
registry = OpenAIModelRegistry()
except Exception as exc: # pragma: no cover - defensive logging
logger.warning("Unable to load OpenAI model registry: %s", exc)
cls._registry = None
cls.MODEL_CAPABILITIES = {}
return
cls._registry = registry
cls.MODEL_CAPABILITIES = dict(registry.model_map)
@classmethod
def reload_registry(cls) -> None:
"""Force registry reload (primarily for tests)."""
cls._ensure_registry(force_reload=True)
def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]:
self._ensure_registry()
return super().get_all_model_capabilities()
def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]:
if self._registry is None:
return None
return dict(self._registry.model_map)
# ------------------------------------------------------------------
# Capability surface
@@ -234,6 +80,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
) -> Optional[ModelCapabilities]:
"""Look up OpenAI capabilities from built-ins or the custom registry."""
self._ensure_registry()
builtin = super()._lookup_capabilities(canonical_name, requested_name)
if builtin is not None:
return builtin
@@ -319,3 +166,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
# Include GPT-5-Codex for coding workflows
preferred = find_first(["gpt-5", "gpt-5-codex", "gpt-5-pro", "gpt-5-mini", "o4-mini", "o3-mini"])
return preferred if preferred else allowed_models[0]
# Load registry data at import time so dependent providers (Azure) can reuse it
OpenAIModelProvider._ensure_registry()

View File

@@ -0,0 +1,19 @@
"""Registry loader for OpenAI model capabilities."""
from __future__ import annotations
from .model_registry_base import CapabilityModelRegistry
from .shared import ProviderType
class OpenAIModelRegistry(CapabilityModelRegistry):
"""Capability registry backed by `conf/openai_models.json`."""
def __init__(self, config_path: str | None = None) -> None:
super().__init__(
env_var_name="OPENAI_MODELS_CONFIG_PATH",
default_filename="openai_models.json",
provider=ProviderType.OPENAI,
friendly_prefix="OpenAI ({model})",
config_path=config_path,
)

View File

@@ -7,7 +7,8 @@ if TYPE_CHECKING:
from tools.models import ToolModelCategory
from .openai_compatible import OpenAICompatibleProvider
from .shared import ModelCapabilities, ProviderType, TemperatureConstraint
from .shared import ModelCapabilities, ProviderType
from .xai_registry import XAIModelRegistry
logger = logging.getLogger(__name__)
@@ -21,72 +22,53 @@ class XAIModelProvider(OpenAICompatibleProvider):
FRIENDLY_NAME = "X.AI"
# Model configurations using ModelCapabilities objects
MODEL_CAPABILITIES = {
"grok-4": ModelCapabilities(
provider=ProviderType.XAI,
model_name="grok-4",
friendly_name="X.AI (Grok 4)",
intelligence_score=16,
context_window=256_000, # 256K tokens
max_output_tokens=256_000, # 256K tokens max output
supports_extended_thinking=True, # Grok-4 supports reasoning mode
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True, # Function calling supported
supports_json_mode=True, # Structured outputs supported
supports_images=True, # Multimodal capabilities
max_image_size_mb=20.0, # Standard image size limit
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
description="GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities",
aliases=["grok", "grok4", "grok-4"],
),
"grok-3": ModelCapabilities(
provider=ProviderType.XAI,
model_name="grok-3",
friendly_name="X.AI (Grok 3)",
intelligence_score=13,
context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
supports_images=False, # Assuming GROK is text-only for now
max_image_size_mb=0.0,
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
description="GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
aliases=["grok3"],
),
"grok-3-fast": ModelCapabilities(
provider=ProviderType.XAI,
model_name="grok-3-fast",
friendly_name="X.AI (Grok 3 Fast)",
intelligence_score=12,
context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
supports_images=False, # Assuming GROK is text-only for now
max_image_size_mb=0.0,
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
description="GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
aliases=["grok3fast", "grokfast", "grok3-fast"],
),
}
MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {}
_registry: Optional[XAIModelRegistry] = None
def __init__(self, api_key: str, **kwargs):
"""Initialize X.AI provider with API key."""
# Set X.AI base URL
kwargs.setdefault("base_url", "https://api.x.ai/v1")
self._ensure_registry()
super().__init__(api_key, **kwargs)
self._invalidate_capability_cache()
# ------------------------------------------------------------------
# Registry access
# ------------------------------------------------------------------
@classmethod
def _ensure_registry(cls, *, force_reload: bool = False) -> None:
"""Load capability registry into MODEL_CAPABILITIES."""
if cls._registry is not None and not force_reload:
return
try:
registry = XAIModelRegistry()
except Exception as exc: # pragma: no cover - defensive logging
logger.warning("Unable to load X.AI model registry: %s", exc)
cls._registry = None
cls.MODEL_CAPABILITIES = {}
return
cls._registry = registry
cls.MODEL_CAPABILITIES = dict(registry.model_map)
@classmethod
def reload_registry(cls) -> None:
"""Force registry reload (primarily for tests)."""
cls._ensure_registry(force_reload=True)
def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]:
self._ensure_registry()
return super().get_all_model_capabilities()
def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]:
if self._registry is None:
return None
return dict(self._registry.model_map)
def get_provider_type(self) -> ProviderType:
"""Get the provider type."""
@@ -135,3 +117,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
return "grok-3-fast"
# Fall back to any available model
return allowed_models[0]
# Load registry data at import time
XAIModelProvider._ensure_registry()

19
providers/xai_registry.py Normal file
View File

@@ -0,0 +1,19 @@
"""Registry loader for X.AI (GROK) model capabilities."""
from __future__ import annotations
from .model_registry_base import CapabilityModelRegistry
from .shared import ProviderType
class XAIModelRegistry(CapabilityModelRegistry):
"""Capability registry backed by `conf/xai_models.json`."""
def __init__(self, config_path: str | None = None) -> None:
super().__init__(
env_var_name="XAI_MODELS_CONFIG_PATH",
default_filename="xai_models.json",
provider=ProviderType.XAI,
friendly_prefix="X.AI ({model})",
config_path=config_path,
)