feat: all native providers now read from catalog files like OpenRouter / Custom configs. Allows for greater control over the capabilities
This commit is contained in:
@@ -18,6 +18,7 @@
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the deployment must call Azure's /responses endpoint (O-series reasoning models). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
||||
}
|
||||
|
||||
111
conf/gemini_models.json
Normal file
111
conf/gemini_models.json
Normal file
@@ -0,0 +1,111 @@
|
||||
{
|
||||
"_README": {
|
||||
"description": "Model metadata for Google's Gemini API access.",
|
||||
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
|
||||
"usage": "Models listed here are exposed directly through the Gemini provider. Aliases are case-insensitive.",
|
||||
"field_notes": "Matches providers/shared/model_capabilities.py.",
|
||||
"field_descriptions": {
|
||||
"model_name": "The model identifier (e.g., 'gemini-2.5-pro', 'gemini-2.0-flash')",
|
||||
"aliases": "Array of short names users can type instead of the full model name",
|
||||
"context_window": "Total number of tokens the model can process (input + output combined)",
|
||||
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
|
||||
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
|
||||
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
|
||||
"supports_json_mode": "Whether the model can guarantee valid JSON output",
|
||||
"supports_function_calling": "Whether the model supports function/tool calling",
|
||||
"supports_images": "Whether the model can process images/visual input",
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
||||
}
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"model_name": "gemini-2.5-pro",
|
||||
"friendly_name": "Gemini (Pro 2.5)",
|
||||
"aliases": [
|
||||
"pro",
|
||||
"gemini pro",
|
||||
"gemini-pro"
|
||||
],
|
||||
"intelligence_score": 18,
|
||||
"description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
|
||||
"context_window": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_thinking_tokens": 32768,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 32.0
|
||||
},
|
||||
{
|
||||
"model_name": "gemini-2.0-flash",
|
||||
"friendly_name": "Gemini (Flash 2.0)",
|
||||
"aliases": [
|
||||
"flash-2.0",
|
||||
"flash2"
|
||||
],
|
||||
"intelligence_score": 9,
|
||||
"description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
|
||||
"context_window": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_thinking_tokens": 24576,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0
|
||||
},
|
||||
{
|
||||
"model_name": "gemini-2.0-flash-lite",
|
||||
"friendly_name": "Gemini (Flash Lite 2.0)",
|
||||
"aliases": [
|
||||
"flashlite",
|
||||
"flash-lite"
|
||||
],
|
||||
"intelligence_score": 7,
|
||||
"description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
|
||||
"context_window": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": false,
|
||||
"supports_temperature": true
|
||||
},
|
||||
{
|
||||
"model_name": "gemini-2.5-flash",
|
||||
"friendly_name": "Gemini (Flash 2.5)",
|
||||
"aliases": [
|
||||
"flash",
|
||||
"flash2.5"
|
||||
],
|
||||
"intelligence_score": 10,
|
||||
"description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
|
||||
"context_window": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_thinking_tokens": 24576,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0
|
||||
}
|
||||
]
|
||||
}
|
||||
235
conf/openai_models.json
Normal file
235
conf/openai_models.json
Normal file
@@ -0,0 +1,235 @@
|
||||
{
|
||||
"_README": {
|
||||
"description": "Model metadata for native OpenAI API access.",
|
||||
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
|
||||
"usage": "Models listed here are exposed directly through the OpenAI provider. Aliases are case-insensitive.",
|
||||
"field_notes": "Matches providers/shared/model_capabilities.py.",
|
||||
"field_descriptions": {
|
||||
"model_name": "The model identifier (e.g., 'gpt-5', 'o3-pro')",
|
||||
"aliases": "Array of short names users can type instead of the full model name",
|
||||
"context_window": "Total number of tokens the model can process (input + output combined)",
|
||||
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
|
||||
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
|
||||
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
|
||||
"supports_json_mode": "Whether the model can guarantee valid JSON output",
|
||||
"supports_function_calling": "Whether the model supports function/tool calling",
|
||||
"supports_images": "Whether the model can process images/visual input",
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
||||
}
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"model_name": "gpt-5",
|
||||
"friendly_name": "OpenAI (GPT-5)",
|
||||
"aliases": [
|
||||
"gpt5",
|
||||
"gpt-5"
|
||||
],
|
||||
"intelligence_score": 16,
|
||||
"description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
|
||||
"context_window": 400000,
|
||||
"max_output_tokens": 128000,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": false,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5-pro",
|
||||
"friendly_name": "OpenAI (GPT-5 Pro)",
|
||||
"aliases": [
|
||||
"gpt5pro",
|
||||
"gpt5-pro"
|
||||
],
|
||||
"intelligence_score": 18,
|
||||
"description": "GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support",
|
||||
"context_window": 400000,
|
||||
"max_output_tokens": 272000,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": false,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"use_openai_response_api": true,
|
||||
"default_reasoning_effort": "high",
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5-mini",
|
||||
"friendly_name": "OpenAI (GPT-5-mini)",
|
||||
"aliases": [
|
||||
"gpt5-mini",
|
||||
"gpt5mini",
|
||||
"mini"
|
||||
],
|
||||
"intelligence_score": 15,
|
||||
"description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
|
||||
"context_window": 400000,
|
||||
"max_output_tokens": 128000,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": false,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5-nano",
|
||||
"friendly_name": "OpenAI (GPT-5 nano)",
|
||||
"aliases": [
|
||||
"gpt5nano",
|
||||
"gpt5-nano",
|
||||
"nano"
|
||||
],
|
||||
"intelligence_score": 13,
|
||||
"description": "GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
|
||||
"context_window": 400000,
|
||||
"max_output_tokens": 128000,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "o3",
|
||||
"friendly_name": "OpenAI (O3)",
|
||||
"intelligence_score": 14,
|
||||
"description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
|
||||
"context_window": 200000,
|
||||
"max_output_tokens": 65536,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": false,
|
||||
"max_image_size_mb": 20.0,
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "o3-mini",
|
||||
"friendly_name": "OpenAI (O3-mini)",
|
||||
"aliases": [
|
||||
"o3mini"
|
||||
],
|
||||
"intelligence_score": 12,
|
||||
"description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
|
||||
"context_window": 200000,
|
||||
"max_output_tokens": 65536,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": false,
|
||||
"max_image_size_mb": 20.0,
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "o3-pro",
|
||||
"friendly_name": "OpenAI (O3-Pro)",
|
||||
"aliases": [
|
||||
"o3pro"
|
||||
],
|
||||
"intelligence_score": 15,
|
||||
"description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
|
||||
"context_window": 200000,
|
||||
"max_output_tokens": 65536,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": false,
|
||||
"max_image_size_mb": 20.0,
|
||||
"use_openai_response_api": true,
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "o4-mini",
|
||||
"friendly_name": "OpenAI (O4-mini)",
|
||||
"aliases": [
|
||||
"o4mini"
|
||||
],
|
||||
"intelligence_score": 11,
|
||||
"description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
|
||||
"context_window": 200000,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": false,
|
||||
"max_image_size_mb": 20.0,
|
||||
"temperature_constraint": "fixed"
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4.1",
|
||||
"friendly_name": "OpenAI (GPT 4.1)",
|
||||
"aliases": [
|
||||
"gpt4.1"
|
||||
],
|
||||
"intelligence_score": 13,
|
||||
"description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window",
|
||||
"context_window": 1000000,
|
||||
"max_output_tokens": 32768,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5-codex",
|
||||
"friendly_name": "OpenAI (GPT-5 Codex)",
|
||||
"aliases": [
|
||||
"gpt5-codex",
|
||||
"codex",
|
||||
"gpt-5-code",
|
||||
"gpt5-code"
|
||||
],
|
||||
"intelligence_score": 17,
|
||||
"description": "GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.",
|
||||
"context_window": 400000,
|
||||
"max_output_tokens": 128000,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"use_openai_response_api": true
|
||||
}
|
||||
]
|
||||
}
|
||||
87
conf/xai_models.json
Normal file
87
conf/xai_models.json
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"_README": {
|
||||
"description": "Model metadata for X.AI (GROK) API access.",
|
||||
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
|
||||
"usage": "Models listed here are exposed directly through the X.AI provider. Aliases are case-insensitive.",
|
||||
"field_notes": "Matches providers/shared/model_capabilities.py.",
|
||||
"field_descriptions": {
|
||||
"model_name": "The model identifier (e.g., 'grok-4', 'grok-3-fast')",
|
||||
"aliases": "Array of short names users can type instead of the full model name",
|
||||
"context_window": "Total number of tokens the model can process (input + output combined)",
|
||||
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
|
||||
"max_thinking_tokens": "Maximum reasoning/thinking tokens the model will allocate when extended thinking is requested",
|
||||
"supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
|
||||
"supports_json_mode": "Whether the model can guarantee valid JSON output",
|
||||
"supports_function_calling": "Whether the model supports function/tool calling",
|
||||
"supports_images": "Whether the model can process images/visual input",
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||
"description": "Human-readable description of the model",
|
||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
||||
}
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"model_name": "grok-4",
|
||||
"friendly_name": "X.AI (Grok 4)",
|
||||
"aliases": [
|
||||
"grok",
|
||||
"grok4",
|
||||
"grok-4"
|
||||
],
|
||||
"intelligence_score": 16,
|
||||
"description": "GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities",
|
||||
"context_window": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"supports_extended_thinking": true,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": true,
|
||||
"supports_images": true,
|
||||
"supports_temperature": true,
|
||||
"max_image_size_mb": 20.0
|
||||
},
|
||||
{
|
||||
"model_name": "grok-3",
|
||||
"friendly_name": "X.AI (Grok 3)",
|
||||
"aliases": [
|
||||
"grok3"
|
||||
],
|
||||
"intelligence_score": 13,
|
||||
"description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
|
||||
"context_window": 131072,
|
||||
"max_output_tokens": 131072,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": false,
|
||||
"supports_images": false,
|
||||
"supports_temperature": true
|
||||
},
|
||||
{
|
||||
"model_name": "grok-3-fast",
|
||||
"friendly_name": "X.AI (Grok 3 Fast)",
|
||||
"aliases": [
|
||||
"grok3fast",
|
||||
"grokfast",
|
||||
"grok3-fast"
|
||||
],
|
||||
"intelligence_score": 12,
|
||||
"description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
|
||||
"context_window": 131072,
|
||||
"max_output_tokens": 131072,
|
||||
"supports_extended_thinking": false,
|
||||
"supports_system_prompts": true,
|
||||
"supports_streaming": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_json_mode": false,
|
||||
"supports_images": false,
|
||||
"supports_temperature": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -67,16 +67,26 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model
|
||||
DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
||||
```
|
||||
|
||||
**Available Models:**
|
||||
- **`auto`**: Claude automatically selects the optimal model
|
||||
- **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis
|
||||
- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses
|
||||
- **`o3`**: Strong logical reasoning (200K context)
|
||||
- **`o3-mini`**: Balanced speed/quality (200K context)
|
||||
- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts
|
||||
- **`grok-3`**: GROK-3 advanced reasoning (131K context)
|
||||
- **`grok-4`**: GROK-4 flagship model (256K context)
|
||||
- **Custom models**: via OpenRouter or local APIs
|
||||
- **Available Models:** The canonical capability data for native providers lives in JSON manifests under `conf/`:
|
||||
- `conf/openai_models.json` – OpenAI catalogue (can be overridden with `OPENAI_MODELS_CONFIG_PATH`)
|
||||
- `conf/gemini_models.json` – Gemini catalogue (`GEMINI_MODELS_CONFIG_PATH`)
|
||||
- `conf/xai_models.json` – X.AI / GROK catalogue (`XAI_MODELS_CONFIG_PATH`)
|
||||
- `conf/openrouter_models.json` – OpenRouter catalogue (`OPENROUTER_MODELS_CONFIG_PATH`)
|
||||
- `conf/custom_models.json` – Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`)
|
||||
|
||||
Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags. Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, or expose additional aliases without touching Python code.
|
||||
|
||||
The shipped defaults cover:
|
||||
|
||||
| Provider | Canonical Models | Notable Aliases |
|
||||
|----------|-----------------|-----------------|
|
||||
| OpenAI | `gpt-5`, `gpt-5-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` |
|
||||
| Gemini | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-2.0-flash-lite` | `pro`, `gemini-pro`, `flash`, `flash-2.0`, `flashlite` |
|
||||
| X.AI | `grok-4`, `grok-3`, `grok-3-fast` | `grok`, `grok4`, `grok3`, `grok3fast`, `grokfast` |
|
||||
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
|
||||
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
|
||||
|
||||
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support) without editing Python.
|
||||
|
||||
### Thinking Mode Configuration
|
||||
|
||||
@@ -114,28 +124,11 @@ XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4
|
||||
OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral
|
||||
```
|
||||
|
||||
**Supported Model Names:**
|
||||
**Supported Model Names:** The names/aliases listed in the JSON manifests above are the authoritative source. Keep in mind:
|
||||
|
||||
**OpenAI Models:**
|
||||
- `o3` (200K context, high reasoning)
|
||||
- `o3-mini` (200K context, balanced)
|
||||
- `o4-mini` (200K context, latest balanced)
|
||||
- `mini` (shorthand for o4-mini)
|
||||
|
||||
**Gemini Models:**
|
||||
- `gemini-2.5-flash` (1M context, fast)
|
||||
- `gemini-2.5-pro` (1M context, powerful)
|
||||
- `flash` (shorthand for Flash model)
|
||||
- `pro` (shorthand for Pro model)
|
||||
|
||||
**X.AI GROK Models:**
|
||||
- `grok-4` (256K context, flagship Grok model with reasoning, vision, and structured outputs)
|
||||
- `grok-3` (131K context, advanced reasoning)
|
||||
- `grok-3-fast` (131K context, higher performance)
|
||||
- `grok` (shorthand for grok-4)
|
||||
- `grok4` (shorthand for grok-4)
|
||||
- `grok3` (shorthand for grok-3)
|
||||
- `grokfast` (shorthand for grok-3-fast)
|
||||
- Aliases are case-insensitive and defined per entry (for example, `mini` maps to `gpt-5-mini` by default, while `flash` maps to `gemini-2.5-flash`).
|
||||
- When you override the manifest files you can add or remove aliases as needed; restriction policies (`*_ALLOWED_MODELS`) automatically pick up those changes.
|
||||
- Models omitted from a manifest fall back to generic capability detection (where supported) and may have limited feature metadata.
|
||||
|
||||
**Example Configurations:**
|
||||
```env
|
||||
@@ -154,12 +147,14 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast
|
||||
|
||||
### Advanced Configuration
|
||||
|
||||
**Custom Model Configuration:**
|
||||
**Custom Model Configuration & Manifest Overrides:**
|
||||
```env
|
||||
# Override default location of custom_models.json
|
||||
CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
|
||||
# Override default location of openrouter_models.json
|
||||
OPENROUTER_MODELS_CONFIG_PATH=/path/to/your/openrouter_models.json
|
||||
# Override default location of built-in catalogues
|
||||
OPENAI_MODELS_CONFIG_PATH=/path/to/openai_models.json
|
||||
GEMINI_MODELS_CONFIG_PATH=/path/to/gemini_models.json
|
||||
XAI_MODELS_CONFIG_PATH=/path/to/xai_models.json
|
||||
OPENROUTER_MODELS_CONFIG_PATH=/path/to/openrouter_models.json
|
||||
CUSTOM_MODELS_CONFIG_PATH=/path/to/custom_models.json
|
||||
```
|
||||
|
||||
**Conversation Settings:**
|
||||
|
||||
@@ -35,27 +35,33 @@ This guide covers setting up multiple AI model providers including OpenRouter, c
|
||||
|
||||
## Model Aliases
|
||||
|
||||
Zen ships two registries:
|
||||
Zen ships multiple registries:
|
||||
|
||||
- `conf/openrouter_models.json` – metadata for models routed through OpenRouter. Override with `OPENROUTER_MODELS_CONFIG_PATH` if you maintain a custom copy.
|
||||
- `conf/custom_models.json` – metadata for local or self-hosted OpenAI-compatible endpoints used by the Custom provider. Override with `CUSTOM_MODELS_CONFIG_PATH` if needed.
|
||||
- `conf/openai_models.json` – native OpenAI catalogue (override with `OPENAI_MODELS_CONFIG_PATH`)
|
||||
- `conf/gemini_models.json` – native Google Gemini catalogue (`GEMINI_MODELS_CONFIG_PATH`)
|
||||
- `conf/xai_models.json` – native X.AI / GROK catalogue (`XAI_MODELS_CONFIG_PATH`)
|
||||
- `conf/openrouter_models.json` – OpenRouter catalogue (`OPENROUTER_MODELS_CONFIG_PATH`)
|
||||
- `conf/custom_models.json` – local/self-hosted OpenAI-compatible catalogue (`CUSTOM_MODELS_CONFIG_PATH`)
|
||||
|
||||
Copy whichever file you need into your project (or point the corresponding `*_MODELS_CONFIG_PATH` env var at your own copy) and edit it to advertise the models you want.
|
||||
|
||||
### OpenRouter Models (Cloud)
|
||||
|
||||
| Alias | Maps to OpenRouter Model |
|
||||
|-------|-------------------------|
|
||||
| `opus` | `anthropic/claude-opus-4` |
|
||||
| `sonnet`, `claude` | `anthropic/claude-sonnet-4` |
|
||||
| `haiku` | `anthropic/claude-3.5-haiku` |
|
||||
| `gpt4o`, `4o` | `openai/gpt-4o` |
|
||||
| `gpt4o-mini`, `4o-mini` | `openai/gpt-4o-mini` |
|
||||
| `pro`, `gemini` | `google/gemini-2.5-pro` |
|
||||
| `flash` | `google/gemini-2.5-flash` |
|
||||
| `mistral` | `mistral/mistral-large` |
|
||||
| `deepseek`, `coder` | `deepseek/deepseek-coder` |
|
||||
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` |
|
||||
The curated defaults in `conf/openrouter_models.json` include popular entries such as:
|
||||
|
||||
| Alias | Canonical Model | Highlights |
|
||||
|-------|-----------------|------------|
|
||||
| `opus`, `claude-opus` | `anthropic/claude-opus-4.1` | Flagship Claude reasoning model with vision |
|
||||
| `sonnet`, `sonnet4.5` | `anthropic/claude-sonnet-4.5` | Balanced Claude with high context window |
|
||||
| `haiku` | `anthropic/claude-3.5-haiku` | Fast Claude option with vision |
|
||||
| `pro`, `gemini` | `google/gemini-2.5-pro` | Frontier Gemini with extended thinking |
|
||||
| `flash` | `google/gemini-2.5-flash` | Ultra-fast Gemini with vision |
|
||||
| `mistral` | `mistralai/mistral-large-2411` | Frontier Mistral (text only) |
|
||||
| `llama3` | `meta-llama/llama-3-70b` | Large open-weight text model |
|
||||
| `deepseek-r1` | `deepseek/deepseek-r1-0528` | DeepSeek reasoning model |
|
||||
| `perplexity` | `perplexity/llama-3-sonar-large-32k-online` | Search-augmented model |
|
||||
|
||||
Consult the JSON file for the full list, aliases, and capability flags. Add new entries as OpenRouter releases additional models.
|
||||
|
||||
### Custom/Local Models
|
||||
|
||||
@@ -65,6 +71,14 @@ Copy whichever file you need into your project (or point the corresponding `*_MO
|
||||
|
||||
View the baseline OpenRouter catalogue in [`conf/openrouter_models.json`](conf/openrouter_models.json) and populate [`conf/custom_models.json`](conf/custom_models.json) with your local models.
|
||||
|
||||
Native catalogues (`conf/openai_models.json`, `conf/gemini_models.json`, `conf/xai_models.json`) follow the same schema. Updating those files lets you:
|
||||
|
||||
- Expose new aliases (e.g., map `enterprise-pro` to `gpt-5-pro`)
|
||||
- Advertise support for JSON mode or vision if the upstream provider adds it
|
||||
- Adjust token limits when providers increase context windows
|
||||
|
||||
Because providers load the manifests on import, you can tweak capabilities without touching Python. Restart the server after editing the JSON files so changes are picked up.
|
||||
|
||||
To control ordering in auto mode or the `listmodels` summary, adjust the
|
||||
[`intelligence_score`](model_ranking.md) for each entry (or rely on the automatic
|
||||
heuristic described there).
|
||||
|
||||
@@ -14,7 +14,8 @@ from utils.env import get_env
|
||||
from utils.image_utils import validate_image
|
||||
|
||||
from .base import ModelProvider
|
||||
from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint
|
||||
from .gemini_registry import GeminiModelRegistry
|
||||
from .shared import ModelCapabilities, ModelResponse, ProviderType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -27,88 +28,8 @@ class GeminiModelProvider(ModelProvider):
|
||||
request to the Gemini APIs.
|
||||
"""
|
||||
|
||||
# Model configurations using ModelCapabilities objects
|
||||
MODEL_CAPABILITIES = {
|
||||
"gemini-2.5-pro": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.5-pro",
|
||||
friendly_name="Gemini (Pro 2.5)",
|
||||
intelligence_score=18,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # Vision capability
|
||||
max_image_size_mb=32.0, # Higher limit for Pro model
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
max_thinking_tokens=32768, # Max thinking tokens for Pro model
|
||||
description="Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
|
||||
aliases=["pro", "gemini pro", "gemini-pro"],
|
||||
),
|
||||
"gemini-2.0-flash": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.0-flash",
|
||||
friendly_name="Gemini (Flash 2.0)",
|
||||
intelligence_score=9,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True, # Experimental thinking mode
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # Vision capability
|
||||
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
max_thinking_tokens=24576, # Same as 2.5 flash for consistency
|
||||
description="Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
|
||||
aliases=["flash-2.0", "flash2"],
|
||||
),
|
||||
"gemini-2.0-flash-lite": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.0-flash-lite",
|
||||
friendly_name="Gemin (Flash Lite 2.0)",
|
||||
intelligence_score=7,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False, # Not supported per user request
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=False, # Does not support images
|
||||
max_image_size_mb=0.0, # No image support
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
description="Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
|
||||
aliases=["flashlite", "flash-lite"],
|
||||
),
|
||||
"gemini-2.5-flash": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.5-flash",
|
||||
friendly_name="Gemini (Flash 2.5)",
|
||||
intelligence_score=10,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # Vision capability
|
||||
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
max_thinking_tokens=24576, # Flash 2.5 thinking budget limit
|
||||
description="Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
|
||||
aliases=["flash", "flash2.5"],
|
||||
),
|
||||
}
|
||||
MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {}
|
||||
_registry: Optional[GeminiModelRegistry] = None
|
||||
|
||||
# Thinking mode configurations - percentages of model's max_thinking_tokens
|
||||
# These percentages work across all models that support thinking
|
||||
@@ -130,11 +51,50 @@ class GeminiModelProvider(ModelProvider):
|
||||
|
||||
def __init__(self, api_key: str, **kwargs):
|
||||
"""Initialize Gemini provider with API key and optional base URL."""
|
||||
self._ensure_registry()
|
||||
super().__init__(api_key, **kwargs)
|
||||
self._client = None
|
||||
self._token_counters = {} # Cache for token counting
|
||||
self._base_url = kwargs.get("base_url", None) # Optional custom endpoint
|
||||
self._timeout_override = self._resolve_http_timeout()
|
||||
self._invalidate_capability_cache()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Registry access
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def _ensure_registry(cls, *, force_reload: bool = False) -> None:
|
||||
"""Load capability registry into MODEL_CAPABILITIES."""
|
||||
|
||||
if cls._registry is not None and not force_reload:
|
||||
return
|
||||
|
||||
try:
|
||||
registry = GeminiModelRegistry()
|
||||
except Exception as exc: # pragma: no cover - defensive logging
|
||||
logger.warning("Unable to load Gemini model registry: %s", exc)
|
||||
cls._registry = None
|
||||
cls.MODEL_CAPABILITIES = {}
|
||||
return
|
||||
|
||||
cls._registry = registry
|
||||
cls.MODEL_CAPABILITIES = dict(registry.model_map)
|
||||
|
||||
@classmethod
|
||||
def reload_registry(cls) -> None:
|
||||
"""Force registry reload (primarily for tests)."""
|
||||
|
||||
cls._ensure_registry(force_reload=True)
|
||||
|
||||
def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]:
|
||||
self._ensure_registry()
|
||||
return super().get_all_model_capabilities()
|
||||
|
||||
def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]:
|
||||
if self._registry is None:
|
||||
return None
|
||||
return dict(self._registry.model_map)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Capability surface
|
||||
@@ -225,6 +185,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
# Validate parameters and fetch capabilities
|
||||
self.validate_parameters(model_name, temperature)
|
||||
capabilities = self.get_capabilities(model_name)
|
||||
capability_map = self.get_all_model_capabilities()
|
||||
|
||||
resolved_model_name = self._resolve_model_name(model_name)
|
||||
|
||||
@@ -269,7 +230,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
# Add thinking configuration for models that support it
|
||||
if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS:
|
||||
# Get model's max thinking tokens and calculate actual budget
|
||||
model_config = self.MODEL_CAPABILITIES.get(resolved_model_name)
|
||||
model_config = capability_map.get(resolved_model_name)
|
||||
if model_config and model_config.max_thinking_tokens > 0:
|
||||
max_thinking_tokens = model_config.max_thinking_tokens
|
||||
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
|
||||
@@ -542,6 +503,8 @@ class GeminiModelProvider(ModelProvider):
|
||||
if not allowed_models:
|
||||
return None
|
||||
|
||||
capability_map = self.get_all_model_capabilities()
|
||||
|
||||
# Helper to find best model from candidates
|
||||
def find_best(candidates: list[str]) -> Optional[str]:
|
||||
"""Return best model from candidates (sorted for consistency)."""
|
||||
@@ -553,16 +516,14 @@ class GeminiModelProvider(ModelProvider):
|
||||
pro_thinking = [
|
||||
m
|
||||
for m in allowed_models
|
||||
if "pro" in m and m in self.MODEL_CAPABILITIES and self.MODEL_CAPABILITIES[m].supports_extended_thinking
|
||||
if "pro" in m and m in capability_map and capability_map[m].supports_extended_thinking
|
||||
]
|
||||
if pro_thinking:
|
||||
return find_best(pro_thinking)
|
||||
|
||||
# Then any model that supports thinking
|
||||
any_thinking = [
|
||||
m
|
||||
for m in allowed_models
|
||||
if m in self.MODEL_CAPABILITIES and self.MODEL_CAPABILITIES[m].supports_extended_thinking
|
||||
m for m in allowed_models if m in capability_map and capability_map[m].supports_extended_thinking
|
||||
]
|
||||
if any_thinking:
|
||||
return find_best(any_thinking)
|
||||
@@ -590,3 +551,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
|
||||
# Ultimate fallback to best available model
|
||||
return find_best(allowed_models)
|
||||
|
||||
|
||||
# Load registry data at import time for registry consumers
|
||||
GeminiModelProvider._ensure_registry()
|
||||
|
||||
19
providers/gemini_registry.py
Normal file
19
providers/gemini_registry.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Registry loader for Gemini model capabilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .model_registry_base import CapabilityModelRegistry
|
||||
from .shared import ProviderType
|
||||
|
||||
|
||||
class GeminiModelRegistry(CapabilityModelRegistry):
|
||||
"""Capability registry backed by `conf/gemini_models.json`."""
|
||||
|
||||
def __init__(self, config_path: str | None = None) -> None:
|
||||
super().__init__(
|
||||
env_var_name="GEMINI_MODELS_CONFIG_PATH",
|
||||
default_filename="gemini_models.json",
|
||||
provider=ProviderType.GOOGLE,
|
||||
friendly_prefix="Gemini ({model})",
|
||||
config_path=config_path,
|
||||
)
|
||||
@@ -85,6 +85,11 @@ class CustomModelRegistryBase:
|
||||
def get_entry(self, model_name: str) -> dict | None:
|
||||
return self._extras.get(model_name)
|
||||
|
||||
def get_model_config(self, model_name: str) -> ModelCapabilities | None:
|
||||
"""Backwards-compatible accessor for registries expecting this helper."""
|
||||
|
||||
return self.model_map.get(model_name) or self.resolve(model_name)
|
||||
|
||||
def iter_entries(self) -> Iterable[tuple[str, ModelCapabilities, dict]]:
|
||||
for model_name, capability in self.model_map.items():
|
||||
yield model_name, capability, self._extras.get(model_name, {})
|
||||
|
||||
@@ -7,7 +7,8 @@ if TYPE_CHECKING:
|
||||
from tools.models import ToolModelCategory
|
||||
|
||||
from .openai_compatible import OpenAICompatibleProvider
|
||||
from .shared import ModelCapabilities, ProviderType, TemperatureConstraint
|
||||
from .openai_registry import OpenAIModelRegistry
|
||||
from .shared import ModelCapabilities, ProviderType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,208 +21,53 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
OpenAI-compatible gateways) while still respecting restriction policies.
|
||||
"""
|
||||
|
||||
# Model configurations using ModelCapabilities objects
|
||||
MODEL_CAPABILITIES = {
|
||||
"gpt-5": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5",
|
||||
friendly_name="OpenAI (GPT-5)",
|
||||
intelligence_score=16,
|
||||
context_window=400_000, # 400K tokens
|
||||
max_output_tokens=128_000, # 128K max output tokens
|
||||
supports_extended_thinking=True, # Supports reasoning tokens
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=False,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # GPT-5 supports vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=True, # Regular models accept temperature parameter
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
description="GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
|
||||
aliases=["gpt5", "gpt-5"],
|
||||
),
|
||||
"gpt-5-pro": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5-pro",
|
||||
friendly_name="OpenAI (GPT-5 Pro)",
|
||||
intelligence_score=18,
|
||||
use_openai_response_api=True,
|
||||
context_window=400_000,
|
||||
max_output_tokens=272_000,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=False,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
default_reasoning_effort="high",
|
||||
description="GPT-5 Pro (400K context, 272K output) - Advanced model with reasoning support",
|
||||
aliases=["gpt5pro", "gpt5-pro"],
|
||||
),
|
||||
"gpt-5-mini": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5-mini",
|
||||
friendly_name="OpenAI (GPT-5-mini)",
|
||||
intelligence_score=15,
|
||||
context_window=400_000, # 400K tokens
|
||||
max_output_tokens=128_000, # 128K max output tokens
|
||||
supports_extended_thinking=True, # Supports reasoning tokens
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=False,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # GPT-5-mini supports vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
description="GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
|
||||
aliases=["gpt5-mini", "gpt5mini", "mini"],
|
||||
),
|
||||
"gpt-5-nano": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5-nano",
|
||||
friendly_name="OpenAI (GPT-5 nano)",
|
||||
intelligence_score=13,
|
||||
context_window=400_000,
|
||||
max_output_tokens=128_000,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
description="GPT-5 nano (400K context) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
|
||||
aliases=["gpt5nano", "gpt5-nano", "nano"],
|
||||
),
|
||||
"o3": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3",
|
||||
friendly_name="OpenAI (O3)",
|
||||
intelligence_score=14,
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # O3 models support vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=False, # O3 models don't accept temperature parameter
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
description="Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
|
||||
aliases=[],
|
||||
),
|
||||
"o3-mini": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3-mini",
|
||||
friendly_name="OpenAI (O3-mini)",
|
||||
intelligence_score=12,
|
||||
context_window=200_000,
|
||||
max_output_tokens=65536,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=False,
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
|
||||
aliases=["o3mini"],
|
||||
),
|
||||
"o3-pro": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3-pro",
|
||||
friendly_name="OpenAI (O3-Pro)",
|
||||
intelligence_score=15,
|
||||
context_window=200_000,
|
||||
max_output_tokens=65536,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=False,
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
|
||||
aliases=["o3pro"],
|
||||
use_openai_response_api=True,
|
||||
),
|
||||
"o4-mini": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o4-mini",
|
||||
friendly_name="OpenAI (O4-mini)",
|
||||
intelligence_score=11,
|
||||
context_window=200_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=False,
|
||||
temperature_constraint=TemperatureConstraint.create("fixed"),
|
||||
description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
|
||||
aliases=["o4mini"],
|
||||
),
|
||||
"gpt-4.1": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-4.1",
|
||||
friendly_name="OpenAI (GPT 4.1)",
|
||||
intelligence_score=13,
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=32_768,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
description="GPT-4.1 (1M context) - Advanced reasoning model with large context window",
|
||||
aliases=["gpt4.1"],
|
||||
),
|
||||
"gpt-5-codex": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5-codex",
|
||||
friendly_name="OpenAI (GPT-5 Codex)",
|
||||
intelligence_score=17,
|
||||
context_window=400_000,
|
||||
max_output_tokens=128_000,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
description="GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.",
|
||||
aliases=["gpt5-codex", "codex", "gpt-5-code", "gpt5-code"],
|
||||
use_openai_response_api=True,
|
||||
),
|
||||
}
|
||||
MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {}
|
||||
_registry: Optional[OpenAIModelRegistry] = None
|
||||
|
||||
def __init__(self, api_key: str, **kwargs):
|
||||
"""Initialize OpenAI provider with API key."""
|
||||
self._ensure_registry()
|
||||
# Set default OpenAI base URL, allow override for regions/custom endpoints
|
||||
kwargs.setdefault("base_url", "https://api.openai.com/v1")
|
||||
super().__init__(api_key, **kwargs)
|
||||
self._invalidate_capability_cache()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Registry access
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def _ensure_registry(cls, *, force_reload: bool = False) -> None:
|
||||
"""Load capability registry into MODEL_CAPABILITIES."""
|
||||
|
||||
if cls._registry is not None and not force_reload:
|
||||
return
|
||||
|
||||
try:
|
||||
registry = OpenAIModelRegistry()
|
||||
except Exception as exc: # pragma: no cover - defensive logging
|
||||
logger.warning("Unable to load OpenAI model registry: %s", exc)
|
||||
cls._registry = None
|
||||
cls.MODEL_CAPABILITIES = {}
|
||||
return
|
||||
|
||||
cls._registry = registry
|
||||
cls.MODEL_CAPABILITIES = dict(registry.model_map)
|
||||
|
||||
@classmethod
|
||||
def reload_registry(cls) -> None:
|
||||
"""Force registry reload (primarily for tests)."""
|
||||
|
||||
cls._ensure_registry(force_reload=True)
|
||||
|
||||
def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]:
|
||||
self._ensure_registry()
|
||||
return super().get_all_model_capabilities()
|
||||
|
||||
def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]:
|
||||
if self._registry is None:
|
||||
return None
|
||||
return dict(self._registry.model_map)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Capability surface
|
||||
@@ -234,6 +80,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
) -> Optional[ModelCapabilities]:
|
||||
"""Look up OpenAI capabilities from built-ins or the custom registry."""
|
||||
|
||||
self._ensure_registry()
|
||||
builtin = super()._lookup_capabilities(canonical_name, requested_name)
|
||||
if builtin is not None:
|
||||
return builtin
|
||||
@@ -319,3 +166,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
# Include GPT-5-Codex for coding workflows
|
||||
preferred = find_first(["gpt-5", "gpt-5-codex", "gpt-5-pro", "gpt-5-mini", "o4-mini", "o3-mini"])
|
||||
return preferred if preferred else allowed_models[0]
|
||||
|
||||
|
||||
# Load registry data at import time so dependent providers (Azure) can reuse it
|
||||
OpenAIModelProvider._ensure_registry()
|
||||
|
||||
19
providers/openai_registry.py
Normal file
19
providers/openai_registry.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Registry loader for OpenAI model capabilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .model_registry_base import CapabilityModelRegistry
|
||||
from .shared import ProviderType
|
||||
|
||||
|
||||
class OpenAIModelRegistry(CapabilityModelRegistry):
|
||||
"""Capability registry backed by `conf/openai_models.json`."""
|
||||
|
||||
def __init__(self, config_path: str | None = None) -> None:
|
||||
super().__init__(
|
||||
env_var_name="OPENAI_MODELS_CONFIG_PATH",
|
||||
default_filename="openai_models.json",
|
||||
provider=ProviderType.OPENAI,
|
||||
friendly_prefix="OpenAI ({model})",
|
||||
config_path=config_path,
|
||||
)
|
||||
108
providers/xai.py
108
providers/xai.py
@@ -7,7 +7,8 @@ if TYPE_CHECKING:
|
||||
from tools.models import ToolModelCategory
|
||||
|
||||
from .openai_compatible import OpenAICompatibleProvider
|
||||
from .shared import ModelCapabilities, ProviderType, TemperatureConstraint
|
||||
from .shared import ModelCapabilities, ProviderType
|
||||
from .xai_registry import XAIModelRegistry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -21,72 +22,53 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
|
||||
FRIENDLY_NAME = "X.AI"
|
||||
|
||||
# Model configurations using ModelCapabilities objects
|
||||
MODEL_CAPABILITIES = {
|
||||
"grok-4": ModelCapabilities(
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-4",
|
||||
friendly_name="X.AI (Grok 4)",
|
||||
intelligence_score=16,
|
||||
context_window=256_000, # 256K tokens
|
||||
max_output_tokens=256_000, # 256K tokens max output
|
||||
supports_extended_thinking=True, # Grok-4 supports reasoning mode
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True, # Function calling supported
|
||||
supports_json_mode=True, # Structured outputs supported
|
||||
supports_images=True, # Multimodal capabilities
|
||||
max_image_size_mb=20.0, # Standard image size limit
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
description="GROK-4 (256K context) - Frontier multimodal reasoning model with advanced capabilities",
|
||||
aliases=["grok", "grok4", "grok-4"],
|
||||
),
|
||||
"grok-3": ModelCapabilities(
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-3",
|
||||
friendly_name="X.AI (Grok 3)",
|
||||
intelligence_score=13,
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
|
||||
supports_images=False, # Assuming GROK is text-only for now
|
||||
max_image_size_mb=0.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
description="GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
|
||||
aliases=["grok3"],
|
||||
),
|
||||
"grok-3-fast": ModelCapabilities(
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-3-fast",
|
||||
friendly_name="X.AI (Grok 3 Fast)",
|
||||
intelligence_score=12,
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
|
||||
supports_images=False, # Assuming GROK is text-only for now
|
||||
max_image_size_mb=0.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=TemperatureConstraint.create("range"),
|
||||
description="GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
|
||||
aliases=["grok3fast", "grokfast", "grok3-fast"],
|
||||
),
|
||||
}
|
||||
MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {}
|
||||
_registry: Optional[XAIModelRegistry] = None
|
||||
|
||||
def __init__(self, api_key: str, **kwargs):
|
||||
"""Initialize X.AI provider with API key."""
|
||||
# Set X.AI base URL
|
||||
kwargs.setdefault("base_url", "https://api.x.ai/v1")
|
||||
self._ensure_registry()
|
||||
super().__init__(api_key, **kwargs)
|
||||
self._invalidate_capability_cache()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Registry access
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def _ensure_registry(cls, *, force_reload: bool = False) -> None:
|
||||
"""Load capability registry into MODEL_CAPABILITIES."""
|
||||
|
||||
if cls._registry is not None and not force_reload:
|
||||
return
|
||||
|
||||
try:
|
||||
registry = XAIModelRegistry()
|
||||
except Exception as exc: # pragma: no cover - defensive logging
|
||||
logger.warning("Unable to load X.AI model registry: %s", exc)
|
||||
cls._registry = None
|
||||
cls.MODEL_CAPABILITIES = {}
|
||||
return
|
||||
|
||||
cls._registry = registry
|
||||
cls.MODEL_CAPABILITIES = dict(registry.model_map)
|
||||
|
||||
@classmethod
|
||||
def reload_registry(cls) -> None:
|
||||
"""Force registry reload (primarily for tests)."""
|
||||
|
||||
cls._ensure_registry(force_reload=True)
|
||||
|
||||
def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]:
|
||||
self._ensure_registry()
|
||||
return super().get_all_model_capabilities()
|
||||
|
||||
def get_model_registry(self) -> Optional[dict[str, ModelCapabilities]]:
|
||||
if self._registry is None:
|
||||
return None
|
||||
return dict(self._registry.model_map)
|
||||
|
||||
def get_provider_type(self) -> ProviderType:
|
||||
"""Get the provider type."""
|
||||
@@ -135,3 +117,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
return "grok-3-fast"
|
||||
# Fall back to any available model
|
||||
return allowed_models[0]
|
||||
|
||||
|
||||
# Load registry data at import time
|
||||
XAIModelProvider._ensure_registry()
|
||||
|
||||
19
providers/xai_registry.py
Normal file
19
providers/xai_registry.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Registry loader for X.AI (GROK) model capabilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .model_registry_base import CapabilityModelRegistry
|
||||
from .shared import ProviderType
|
||||
|
||||
|
||||
class XAIModelRegistry(CapabilityModelRegistry):
|
||||
"""Capability registry backed by `conf/xai_models.json`."""
|
||||
|
||||
def __init__(self, config_path: str | None = None) -> None:
|
||||
super().__init__(
|
||||
env_var_name="XAI_MODELS_CONFIG_PATH",
|
||||
default_filename="xai_models.json",
|
||||
provider=ProviderType.XAI,
|
||||
friendly_prefix="X.AI ({model})",
|
||||
config_path=config_path,
|
||||
)
|
||||
Reference in New Issue
Block a user