diff --git a/.env.example b/.env.example
index 7d6b301..51cfa1a 100644
--- a/.env.example
+++ b/.env.example
@@ -17,6 +17,15 @@ GEMINI_API_KEY=your_gemini_api_key_here
# Get your OpenAI API key from: https://platform.openai.com/api-keys
OPENAI_API_KEY=your_openai_api_key_here
+# Azure OpenAI mirrors OpenAI models through Azure-hosted deployments
+# Set the endpoint from Azure Portal. Models are defined in conf/azure_models.json
+# (or the file referenced by AZURE_MODELS_CONFIG_PATH).
+AZURE_OPENAI_API_KEY=your_azure_openai_key_here
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+# AZURE_OPENAI_API_VERSION=2024-02-15-preview
+# AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini
+# AZURE_MODELS_CONFIG_PATH=/absolute/path/to/custom_azure_models.json
+
# Get your X.AI API key from: https://console.x.ai/
XAI_API_KEY=your_xai_api_key_here
diff --git a/README.md b/README.md
index 8f6b131..e9e4f27 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
[zen_web.webm](https://github.com/user-attachments/assets/851e3911-7f06-47c0-a4ab-a2601236697c)
-
🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team
+
🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Azure / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team
@@ -85,6 +85,7 @@ For best results, use Claude Code with:
- **[OpenRouter](https://openrouter.ai/)** - Access multiple models with one API
- **[Gemini](https://makersuite.google.com/app/apikey)** - Google's latest models
- **[OpenAI](https://platform.openai.com/api-keys)** - O3, GPT-5 series
+- **[Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)** - Enterprise deployments of GPT-4o, GPT-4.1, GPT-5 family
- **[X.AI](https://console.x.ai/)** - Grok models
- **[DIAL](https://dialx.ai/)** - Vendor-agnostic model access
- **[Ollama](https://ollama.ai/)** - Local models (free)
@@ -132,6 +133,10 @@ cd zen-mcp-server
👉 **[Complete Setup Guide](docs/getting-started.md)** with detailed installation, configuration for Gemini / Codex, and troubleshooting
👉 **[Cursor & VS Code Setup](docs/getting-started.md#ide-clients)** for IDE integration instructions
+## Provider Configuration
+
+Zen activates any provider that has credentials in your `.env`. See `.env.example` for deeper customization.
+
## Core Tools
> **Note:** Each tool comes with its own multi-step workflow, parameters, and descriptions that consume valuable context window space even when not in use. To optimize performance, some tools are disabled by default. See [Tool Configuration](#tool-configuration) below to enable them.
@@ -247,7 +252,7 @@ DISABLED_TOOLS=
- **[Context revival](docs/context-revival.md)** - Continue conversations even after context resets
**Model Support**
-- **Multiple providers** - Gemini, OpenAI, X.AI, OpenRouter, DIAL, Ollama
+- **Multiple providers** - Gemini, OpenAI, Azure, X.AI, OpenRouter, DIAL, Ollama
- **Latest models** - GPT-5, Gemini 2.5 Pro, O3, Grok-4, local Llama
- **[Thinking modes](docs/advanced-usage.md#thinking-modes)** - Control reasoning depth vs cost
- **Vision support** - Analyze images, diagrams, screenshots
@@ -288,6 +293,7 @@ DISABLED_TOOLS=
- [Tools Reference](docs/tools/) - All tools with examples
- [Advanced Usage](docs/advanced-usage.md) - Power user features
- [Configuration](docs/configuration.md) - Environment variables, restrictions
+- [Adding Providers](docs/adding_providers.md) - Provider-specific setup (OpenAI, Azure, custom gateways)
- [Model Ranking Guide](docs/model_ranking.md) - How intelligence scores drive auto-mode suggestions
**🔧 Setup & Support**
@@ -303,10 +309,12 @@ Apache 2.0 License - see [LICENSE](LICENSE) file for details.
Built with the power of **Multi-Model AI** collaboration 🤝
- **A**ctual **I**ntelligence by real Humans
-- [MCP (Model Context Protocol)](https://modelcontextprotocol.com) by Anthropic
-- [Claude Code](https://claude.ai/code) - Your AI coding orchestrator
-- [Gemini 2.5 Pro & Flash](https://ai.google.dev/) - Extended thinking & fast analysis
-- [OpenAI O3 & GPT-5](https://openai.com/) - Strong reasoning & latest capabilities
+- [MCP (Model Context Protocol)](https://modelcontextprotocol.com)
+- [Codex CLI](https://developers.openai.com/codex/cli)
+- [Claude Code](https://claude.ai/code)
+- [Gemini](https://ai.google.dev/)
+- [OpenAI](https://openai.com/)
+- [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)
### Star History
diff --git a/conf/azure_models.json b/conf/azure_models.json
new file mode 100644
index 0000000..e1e3a3f
--- /dev/null
+++ b/conf/azure_models.json
@@ -0,0 +1,45 @@
+{
+ "_README": {
+ "description": "Model metadata for Azure OpenAI / Azure AI Foundry-backed provider. The `models` definition can be copied from openrouter_models.json / custom_models.json",
+ "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/azure_models.md",
+ "usage": "Models listed here are exposed through Azure AI Foundry. Aliases are case-insensitive.",
+ "field_notes": "Matches providers/shared/model_capabilities.py.",
+ "field_descriptions": {
+ "model_name": "The model identifier e.g., 'gpt-4'",
+ "deployment": "Azure model deployment name",
+ "aliases": "Array of short names users can type instead of the full model name",
+ "context_window": "Total number of tokens the model can process (input + output combined)",
+ "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
+ "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+ "supports_json_mode": "Whether the model can guarantee valid JSON output",
+ "supports_function_calling": "Whether the model supports function/tool calling",
+ "supports_images": "Whether the model can process images/visual input",
+ "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+ "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+ "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
+ "description": "Human-readable description of the model",
+ "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
+ }
+ },
+ "_example_models": [
+ {
+ "model_name": "gpt-4",
+ "deployment": "gpt-4",
+ "aliases": [
+ "gpt4"
+ ],
+ "context_window": 128000,
+ "max_output_tokens": 16384,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": false,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "supports_temperature": false,
+ "temperature_constraint": "fixed",
+ "description": "GPT-4 (128K context, 16K output)",
+ "intelligence_score": 10
+ }
+ ],
+ "models": []
+}
diff --git a/conf/custom_models.json b/conf/custom_models.json
index 144bf45..1934df3 100644
--- a/conf/custom_models.json
+++ b/conf/custom_models.json
@@ -1,383 +1,26 @@
{
"_README": {
- "description": "Unified model configuration for multiple AI providers and endpoints, including OpenRouter",
- "providers_supported": [
- "OpenRouter - Access to GPT-4, Claude, Mistral, etc. via unified API",
- "Custom API endpoints - Local models (Ollama, vLLM, LM Studio, etc.)",
- "Self-hosted APIs - Any OpenAI-compatible endpoint"
- ],
+ "description": "Model metadata for local/self-hosted OpenAI-compatible endpoints (Custom provider).",
"documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
- "usage": "Models can be accessed via aliases (e.g., 'opus', 'local-llama') or full names (e.g., 'anthropic/claude-opus-4', 'llama3.2')",
- "instructions": [
- "Add new models by copying an existing entry and modifying it",
- "Aliases are case-insensitive and should be unique across all models",
- "context_window is the model's total context window size in tokens (input + output)",
- "Set supports_* flags based on the model's actual capabilities",
- "Set is_custom=true for models that should ONLY work with custom endpoints (Ollama, vLLM, etc.)",
- "Models not listed here will use generic defaults (32K context window, basic features)",
- "For OpenRouter models: Use official OpenRouter model names (e.g., 'anthropic/claude-opus-4')",
- "For local/custom models: Use model names as they appear in your API (e.g., 'llama3.2', 'gpt-3.5-turbo')"
- ],
+ "usage": "Each entry will be advertised by the Custom provider. Aliases are case-insensitive.",
+ "field_notes": "Matches providers/shared/model_capabilities.py.",
"field_descriptions": {
- "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')",
+ "model_name": "The model identifier e.g., 'llama3.2'",
"aliases": "Array of short names users can type instead of the full model name",
"context_window": "Total number of tokens the model can process (input + output combined)",
"max_output_tokens": "Maximum number of tokens the model can generate in a single response",
- "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+ "supports_extended_thinking": "Whether the model supports extended reasoning tokens",
"supports_json_mode": "Whether the model can guarantee valid JSON output",
"supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
- "is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.",
"description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
- },
- "example_custom_model": {
- "model_name": "my-local-model",
- "aliases": [
- "shortname",
- "nickname",
- "abbrev"
- ],
- "context_window": 128000,
- "max_output_tokens": 32768,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 10.0,
- "supports_temperature": true,
- "temperature_constraint": "range",
- "is_custom": true,
- "description": "Example custom/local model for Ollama, vLLM, etc.",
- "intelligence_score": 12
}
},
"models": [
- {
- "model_name": "anthropic/claude-sonnet-4.5",
- "aliases": [
- "sonnet",
- "sonnet4.5"
- ],
- "context_window": 200000,
- "max_output_tokens": 64000,
- "supports_extended_thinking": false,
- "supports_json_mode": false,
- "supports_function_calling": false,
- "supports_images": true,
- "max_image_size_mb": 5.0,
- "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency",
- "intelligence_score": 12
- },
- {
- "model_name": "anthropic/claude-opus-4.1",
- "aliases": [
- "opus",
- "claude-opus"
- ],
- "context_window": 200000,
- "max_output_tokens": 64000,
- "supports_extended_thinking": false,
- "supports_json_mode": false,
- "supports_function_calling": false,
- "supports_images": true,
- "max_image_size_mb": 5.0,
- "description": "Claude Opus 4.1 - Our most capable and intelligent model yet",
- "intelligence_score": 14
- },
- {
- "model_name": "anthropic/claude-sonnet-4.1",
- "aliases": [
- "sonnet4.1"
- ],
- "context_window": 200000,
- "max_output_tokens": 64000,
- "supports_extended_thinking": false,
- "supports_json_mode": false,
- "supports_function_calling": false,
- "supports_images": true,
- "max_image_size_mb": 5.0,
- "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency",
- "intelligence_score": 10
- },
- {
- "model_name": "anthropic/claude-3.5-haiku",
- "aliases": [
- "haiku"
- ],
- "context_window": 200000,
- "max_output_tokens": 64000,
- "supports_extended_thinking": false,
- "supports_json_mode": false,
- "supports_function_calling": false,
- "supports_images": true,
- "max_image_size_mb": 5.0,
- "description": "Claude 3 Haiku - Fast and efficient with vision",
- "intelligence_score": 8
- },
- {
- "model_name": "google/gemini-2.5-pro",
- "aliases": [
- "pro",
- "gemini-pro",
- "gemini",
- "pro-openrouter"
- ],
- "context_window": 1048576,
- "max_output_tokens": 65536,
- "supports_extended_thinking": true,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 20.0,
- "description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
- "intelligence_score": 18
- },
- {
- "model_name": "google/gemini-2.5-flash",
- "aliases": [
- "flash",
- "gemini-flash"
- ],
- "context_window": 1048576,
- "max_output_tokens": 65536,
- "supports_extended_thinking": true,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 15.0,
- "description": "Google's Gemini 2.5 Flash via OpenRouter with vision",
- "intelligence_score": 10
- },
- {
- "model_name": "mistralai/mistral-large-2411",
- "aliases": [
- "mistral-large",
- "mistral"
- ],
- "context_window": 128000,
- "max_output_tokens": 32000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": false,
- "max_image_size_mb": 0.0,
- "description": "Mistral's largest model (text-only)",
- "intelligence_score": 11
- },
- {
- "model_name": "meta-llama/llama-3-70b",
- "aliases": [
- "llama",
- "llama3",
- "llama3-70b",
- "llama-70b",
- "llama3-openrouter"
- ],
- "context_window": 8192,
- "max_output_tokens": 8192,
- "supports_extended_thinking": false,
- "supports_json_mode": false,
- "supports_function_calling": false,
- "supports_images": false,
- "max_image_size_mb": 0.0,
- "description": "Meta's Llama 3 70B model (text-only)",
- "intelligence_score": 9
- },
- {
- "model_name": "deepseek/deepseek-r1-0528",
- "aliases": [
- "deepseek-r1",
- "deepseek",
- "r1",
- "deepseek-thinking"
- ],
- "context_window": 65536,
- "max_output_tokens": 32768,
- "supports_extended_thinking": true,
- "supports_json_mode": true,
- "supports_function_calling": false,
- "supports_images": false,
- "max_image_size_mb": 0.0,
- "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)",
- "intelligence_score": 15
- },
- {
- "model_name": "perplexity/llama-3-sonar-large-32k-online",
- "aliases": [
- "perplexity",
- "sonar",
- "perplexity-online"
- ],
- "context_window": 32768,
- "max_output_tokens": 32768,
- "supports_extended_thinking": false,
- "supports_json_mode": false,
- "supports_function_calling": false,
- "supports_images": false,
- "max_image_size_mb": 0.0,
- "description": "Perplexity's online model with web search (text-only)",
- "intelligence_score": 9
- },
- {
- "model_name": "openai/o3",
- "aliases": [
- "o3"
- ],
- "context_window": 200000,
- "max_output_tokens": 100000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 20.0,
- "supports_temperature": false,
- "temperature_constraint": "fixed",
- "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision",
- "intelligence_score": 14
- },
- {
- "model_name": "openai/o3-mini",
- "aliases": [
- "o3-mini",
- "o3mini"
- ],
- "context_window": 200000,
- "max_output_tokens": 100000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 20.0,
- "supports_temperature": false,
- "temperature_constraint": "fixed",
- "description": "OpenAI's o3-mini model - balanced performance and speed with vision",
- "intelligence_score": 12
- },
- {
- "model_name": "openai/o3-mini-high",
- "aliases": [
- "o3-mini-high",
- "o3mini-high"
- ],
- "context_window": 200000,
- "max_output_tokens": 100000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 20.0,
- "supports_temperature": false,
- "temperature_constraint": "fixed",
- "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision",
- "intelligence_score": 13
- },
- {
- "model_name": "openai/o3-pro",
- "aliases": [
- "o3pro"
- ],
- "context_window": 200000,
- "max_output_tokens": 100000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 20.0,
- "supports_temperature": false,
- "temperature_constraint": "fixed",
- "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision",
- "intelligence_score": 15
- },
- {
- "model_name": "openai/o4-mini",
- "aliases": [
- "o4-mini",
- "o4mini"
- ],
- "context_window": 200000,
- "max_output_tokens": 100000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 20.0,
- "supports_temperature": false,
- "temperature_constraint": "fixed",
- "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision",
- "intelligence_score": 11
- },
- {
- "model_name": "openai/gpt-5",
- "aliases": [
- "gpt5"
- ],
- "context_window": 400000,
- "max_output_tokens": 128000,
- "supports_extended_thinking": true,
- "supports_json_mode": true,
- "supports_function_calling": true,
- "supports_images": true,
- "max_image_size_mb": 20.0,
- "supports_temperature": true,
- "temperature_constraint": "range",
- "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
- "intelligence_score": 16
- },
- {
- "model_name": "openai/gpt-5-codex",
- "aliases": [
- "codex",
- "gpt5codex"
- ],
- "context_window": 400000,
- "max_output_tokens": 128000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": false,
- "supports_images": false,
- "max_image_size_mb": 0.0,
- "is_custom": false,
- "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows",
- "intelligence_score": 17
- },
- {
- "model_name": "openai/gpt-5-mini",
- "aliases": [
- "gpt5mini"
- ],
- "context_window": 400000,
- "max_output_tokens": 128000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": false,
- "supports_images": false,
- "max_image_size_mb": 0.0,
- "supports_temperature": true,
- "temperature_constraint": "fixed",
- "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
- "intelligence_score": 10
- },
- {
- "model_name": "openai/gpt-5-nano",
- "aliases": [
- "gpt5nano"
- ],
- "context_window": 400000,
- "max_output_tokens": 128000,
- "supports_extended_thinking": false,
- "supports_json_mode": true,
- "supports_function_calling": false,
- "supports_images": false,
- "max_image_size_mb": 0.0,
- "supports_temperature": true,
- "temperature_constraint": "fixed",
- "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
- "intelligence_score": 8
- },
{
"model_name": "llama3.2",
"aliases": [
@@ -391,7 +34,6 @@
"supports_function_calling": false,
"supports_images": false,
"max_image_size_mb": 0.0,
- "is_custom": true,
"description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)",
"intelligence_score": 6
}
diff --git a/conf/openrouter_models.json b/conf/openrouter_models.json
new file mode 100644
index 0000000..b3f35fc
--- /dev/null
+++ b/conf/openrouter_models.json
@@ -0,0 +1,346 @@
+{
+ "_README": {
+ "description": "Model metadata for OpenRouter-backed providers.",
+ "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md",
+ "usage": "Models listed here are exposed through OpenRouter. Aliases are case-insensitive.",
+ "field_notes": "Matches providers/shared/model_capabilities.py.",
+ "field_descriptions": {
+ "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')",
+ "aliases": "Array of short names users can type instead of the full model name",
+ "context_window": "Total number of tokens the model can process (input + output combined)",
+ "max_output_tokens": "Maximum number of tokens the model can generate in a single response",
+ "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)",
+ "supports_json_mode": "Whether the model can guarantee valid JSON output",
+ "supports_function_calling": "Whether the model supports function/tool calling",
+ "supports_images": "Whether the model can process images/visual input",
+ "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+ "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+ "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
+ "description": "Human-readable description of the model",
+ "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
+ }
+ },
+ "models": [
+ {
+ "model_name": "anthropic/claude-sonnet-4.5",
+ "aliases": [
+ "sonnet",
+ "sonnet4.5"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 64000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": false,
+ "supports_function_calling": false,
+ "supports_images": true,
+ "max_image_size_mb": 5.0,
+ "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency",
+ "intelligence_score": 12
+ },
+ {
+ "model_name": "anthropic/claude-opus-4.1",
+ "aliases": [
+ "opus",
+ "claude-opus"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 64000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": false,
+ "supports_function_calling": false,
+ "supports_images": true,
+ "max_image_size_mb": 5.0,
+ "description": "Claude Opus 4.1 - Our most capable and intelligent model yet",
+ "intelligence_score": 14
+ },
+ {
+ "model_name": "anthropic/claude-sonnet-4.1",
+ "aliases": [
+ "sonnet4.1"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 64000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": false,
+ "supports_function_calling": false,
+ "supports_images": true,
+ "max_image_size_mb": 5.0,
+ "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency",
+ "intelligence_score": 10
+ },
+ {
+ "model_name": "anthropic/claude-3.5-haiku",
+ "aliases": [
+ "haiku"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 64000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": false,
+ "supports_function_calling": false,
+ "supports_images": true,
+ "max_image_size_mb": 5.0,
+ "description": "Claude 3 Haiku - Fast and efficient with vision",
+ "intelligence_score": 8
+ },
+ {
+ "model_name": "google/gemini-2.5-pro",
+ "aliases": [
+ "pro",
+ "gemini-pro",
+ "gemini",
+ "pro-openrouter"
+ ],
+ "context_window": 1048576,
+ "max_output_tokens": 65536,
+ "supports_extended_thinking": true,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 20.0,
+ "description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
+ "intelligence_score": 18
+ },
+ {
+ "model_name": "google/gemini-2.5-flash",
+ "aliases": [
+ "flash",
+ "gemini-flash"
+ ],
+ "context_window": 1048576,
+ "max_output_tokens": 65536,
+ "supports_extended_thinking": true,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 15.0,
+ "description": "Google's Gemini 2.5 Flash via OpenRouter with vision",
+ "intelligence_score": 10
+ },
+ {
+ "model_name": "mistralai/mistral-large-2411",
+ "aliases": [
+ "mistral-large",
+ "mistral"
+ ],
+ "context_window": 128000,
+ "max_output_tokens": 32000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "description": "Mistral's largest model (text-only)",
+ "intelligence_score": 11
+ },
+ {
+ "model_name": "meta-llama/llama-3-70b",
+ "aliases": [
+ "llama",
+ "llama3",
+ "llama3-70b",
+ "llama-70b",
+ "llama3-openrouter"
+ ],
+ "context_window": 8192,
+ "max_output_tokens": 8192,
+ "supports_extended_thinking": false,
+ "supports_json_mode": false,
+ "supports_function_calling": false,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "description": "Meta's Llama 3 70B model (text-only)",
+ "intelligence_score": 9
+ },
+ {
+ "model_name": "deepseek/deepseek-r1-0528",
+ "aliases": [
+ "deepseek-r1",
+ "deepseek",
+ "r1",
+ "deepseek-thinking"
+ ],
+ "context_window": 65536,
+ "max_output_tokens": 32768,
+ "supports_extended_thinking": true,
+ "supports_json_mode": true,
+ "supports_function_calling": false,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)",
+ "intelligence_score": 15
+ },
+ {
+ "model_name": "perplexity/llama-3-sonar-large-32k-online",
+ "aliases": [
+ "perplexity",
+ "sonar",
+ "perplexity-online"
+ ],
+ "context_window": 32768,
+ "max_output_tokens": 32768,
+ "supports_extended_thinking": false,
+ "supports_json_mode": false,
+ "supports_function_calling": false,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "description": "Perplexity's online model with web search (text-only)",
+ "intelligence_score": 9
+ },
+ {
+ "model_name": "openai/o3",
+ "aliases": [
+ "o3"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 100000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 20.0,
+ "supports_temperature": false,
+ "temperature_constraint": "fixed",
+ "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision",
+ "intelligence_score": 14
+ },
+ {
+ "model_name": "openai/o3-mini",
+ "aliases": [
+ "o3-mini",
+ "o3mini"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 100000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 20.0,
+ "supports_temperature": false,
+ "temperature_constraint": "fixed",
+ "description": "OpenAI's o3-mini model - balanced performance and speed with vision",
+ "intelligence_score": 12
+ },
+ {
+ "model_name": "openai/o3-mini-high",
+ "aliases": [
+ "o3-mini-high",
+ "o3mini-high"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 100000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 20.0,
+ "supports_temperature": false,
+ "temperature_constraint": "fixed",
+ "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision",
+ "intelligence_score": 13
+ },
+ {
+ "model_name": "openai/o3-pro",
+ "aliases": [
+ "o3pro"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 100000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 20.0,
+ "supports_temperature": false,
+ "temperature_constraint": "fixed",
+ "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision",
+ "intelligence_score": 15
+ },
+ {
+ "model_name": "openai/o4-mini",
+ "aliases": [
+ "o4-mini",
+ "o4mini"
+ ],
+ "context_window": 200000,
+ "max_output_tokens": 100000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 20.0,
+ "supports_temperature": false,
+ "temperature_constraint": "fixed",
+ "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision",
+ "intelligence_score": 11
+ },
+ {
+ "model_name": "openai/gpt-5",
+ "aliases": [
+ "gpt5"
+ ],
+ "context_window": 400000,
+ "max_output_tokens": 128000,
+ "supports_extended_thinking": true,
+ "supports_json_mode": true,
+ "supports_function_calling": true,
+ "supports_images": true,
+ "max_image_size_mb": 20.0,
+ "supports_temperature": true,
+ "temperature_constraint": "range",
+ "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
+ "intelligence_score": 16
+ },
+ {
+ "model_name": "openai/gpt-5-codex",
+ "aliases": [
+ "codex",
+ "gpt5codex"
+ ],
+ "context_window": 400000,
+ "max_output_tokens": 128000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": false,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows",
+ "intelligence_score": 17
+ },
+ {
+ "model_name": "openai/gpt-5-mini",
+ "aliases": [
+ "gpt5mini"
+ ],
+ "context_window": 400000,
+ "max_output_tokens": 128000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": false,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "supports_temperature": true,
+ "temperature_constraint": "fixed",
+ "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
+ "intelligence_score": 10
+ },
+ {
+ "model_name": "openai/gpt-5-nano",
+ "aliases": [
+ "gpt5nano"
+ ],
+ "context_window": 400000,
+ "max_output_tokens": 128000,
+ "supports_extended_thinking": false,
+ "supports_json_mode": true,
+ "supports_function_calling": false,
+ "supports_images": false,
+ "max_image_size_mb": 0.0,
+ "supports_temperature": true,
+ "temperature_constraint": "fixed",
+ "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
+ "intelligence_score": 8
+ }
+ ]
+}
diff --git a/docs/adding_providers.md b/docs/adding_providers.md
index 8b0ce58..dae9785 100644
--- a/docs/adding_providers.md
+++ b/docs/adding_providers.md
@@ -9,6 +9,7 @@ Each provider:
- Defines supported models using `ModelCapabilities` objects
- Implements the minimal abstract hooks (`get_provider_type()` and `generate_content()`)
- Gets wired into `configure_providers()` so environment variables control activation
+- Can leverage helper subclasses (e.g., `AzureOpenAIProvider`) when only client wiring differs
### Intelligence score cheatsheet
@@ -31,6 +32,13 @@ features ([details here](model_ranking.md)).
⚠️ **Important**: If you implement a custom `generate_content()`, call `_resolve_model_name()` before invoking the SDK so aliases (e.g. `"gpt"` → `"gpt-4"`) resolve correctly. The shared implementations already do this for you.
+**Option C: Azure OpenAI (`AzureOpenAIProvider`)**
+- For Azure-hosted deployments of OpenAI models
+- Reuses the OpenAI-compatible pipeline but swaps in the `AzureOpenAI` client and a deployment mapping (canonical model → deployment ID)
+- Define deployments in [`conf/azure_models.json`](../conf/azure_models.json) (or the file referenced by `AZURE_MODELS_CONFIG_PATH`).
+- Entries follow the [`ModelCapabilities`](../providers/shared/model_capabilities.py) schema and must include a `deployment` identifier.
+ See [Azure OpenAI Configuration](azure_openai.md) for a step-by-step walkthrough.
+
## Step-by-Step Guide
### 1. Add Provider Type
@@ -227,6 +235,19 @@ DISABLED_TOOLS=debug,tracer
EXAMPLE_ALLOWED_MODELS=example-model-large,example-model-small
```
+For Azure OpenAI deployments:
+
+```bash
+AZURE_OPENAI_API_KEY=your_azure_openai_key_here
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+# Models are defined in conf/azure_models.json (or AZURE_MODELS_CONFIG_PATH)
+# AZURE_OPENAI_API_VERSION=2024-02-15-preview
+# AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini
+# AZURE_MODELS_CONFIG_PATH=/absolute/path/to/custom_azure_models.json
+```
+
+You can also define Azure models in [`conf/azure_models.json`](../conf/azure_models.json) (the bundled file is empty so you can copy it safely). Each entry mirrors the `ModelCapabilities` schema and must include a `deployment` field. Set `AZURE_MODELS_CONFIG_PATH` if you maintain a custom copy outside the repository.
+
**Note**: The `description` field in `ModelCapabilities` helps Claude choose the best model in auto mode.
### 5. Test Your Provider
diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md
index 4d1cd08..000c0f3 100644
--- a/docs/advanced-usage.md
+++ b/docs/advanced-usage.md
@@ -91,8 +91,8 @@ OPENAI_ALLOWED_MODELS=o3,o4-mini
**Important Notes:**
- Restrictions apply to all usage including auto mode
-- `OPENROUTER_ALLOWED_MODELS` only affects OpenRouter models accessed via custom provider (where `is_custom: false` in custom_models.json)
-- Custom local models (`is_custom: true`) are not affected by any restrictions
+- `OPENROUTER_ALLOWED_MODELS` only affects models defined in `conf/openrouter_models.json`
+- Custom local models (from `conf/custom_models.json`) are not affected by OpenRouter restrictions
## Thinking Modes
diff --git a/docs/azure_openai.md b/docs/azure_openai.md
new file mode 100644
index 0000000..d4f6d2f
--- /dev/null
+++ b/docs/azure_openai.md
@@ -0,0 +1,62 @@
+# Azure OpenAI Configuration
+
+Azure OpenAI support lets Zen MCP talk to GPT-4o, GPT-4.1, GPT-5, and o-series deployments that you expose through your Azure resource. This guide describes the configuration expected by the server: a couple of required environment variables plus a JSON manifest that lists every deployment you want to expose.
+
+## 1. Required Environment Variables
+
+Set these entries in your `.env` (or MCP `env` block).
+
+```bash
+AZURE_OPENAI_API_KEY=your_azure_openai_key_here
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+# AZURE_OPENAI_API_VERSION=2024-02-15-preview
+```
+
+Without the key and endpoint the provider is skipped entirely. Leave the key blank only if the endpoint truly allows anonymous access (rare for Azure).
+
+## 2. Define Deployments in `conf/azure_models.json`
+
+Azure models live in `conf/azure_models.json` (or the file pointed to by `AZURE_MODELS_CONFIG_PATH`). Each entry follows the same schema as [`ModelCapabilities`](../providers/shared/model_capabilities.py) with one additional required key: `deployment`. This field must exactly match the deployment name shown in the Azure Portal (for example `prod-gpt4o`). The provider routes requests by that value, so omitting it or using the wrong name will cause the server to skip the model.
+
+```json
+{
+ "models": [
+ {
+ "model_name": "gpt-4o",
+ "deployment": "prod-gpt4o",
+ "friendly_name": "Azure GPT-4o EU",
+ "intelligence_score": 18,
+ "context_window": 600000,
+ "max_output_tokens": 128000,
+ "supports_temperature": false,
+ "temperature_constraint": "fixed",
+ "aliases": ["gpt4o-eu"]
+ }
+ ]
+}
+```
+
+Tips:
+
+- Copy `conf/azure_models.json` into your repo and commit it, or point `AZURE_MODELS_CONFIG_PATH` at a custom path.
+- Add one object per deployment. Aliases are optional but help when you want short names like `gpt4o-eu`.
+- All capability fields are optional except `model_name`, `deployment`, and `friendly_name`. Anything you omit falls back to conservative defaults.
+
+## 3. Optional Restrictions
+
+Use `AZURE_OPENAI_ALLOWED_MODELS` to limit which Azure models Claude can access:
+
+```bash
+AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini
+```
+
+Aliases are matched case-insensitively.
+
+## 4. Quick Checklist
+
+- [ ] `AZURE_OPENAI_API_KEY` and `AZURE_OPENAI_ENDPOINT` are set
+- [ ] `conf/azure_models.json` (or the file referenced by `AZURE_MODELS_CONFIG_PATH`) lists every deployment with the desired metadata
+- [ ] Optional: `AZURE_OPENAI_ALLOWED_MODELS` to restrict usage
+- [ ] Restart `./run-server.sh` and run `listmodels` to confirm the Azure entries appear with the expected metadata
+
+See also: [`docs/adding_providers.md`](adding_providers.md) for the full provider architecture and [README (Provider Configuration)](../README.md#provider-configuration) for quick-start environment snippets.
diff --git a/docs/configuration.md b/docs/configuration.md
index 12e9d65..9b48fab 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -158,6 +158,8 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast
```env
# Override default location of custom_models.json
CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
+# Override default location of openrouter_models.json
+OPENROUTER_MODELS_CONFIG_PATH=/path/to/your/openrouter_models.json
```
**Conversation Settings:**
@@ -244,4 +246,4 @@ LOG_LEVEL=INFO
- **[Advanced Usage Guide](advanced-usage.md)** - Advanced model usage patterns, thinking modes, and power user workflows
- **[Context Revival Guide](context-revival.md)** - Conversation persistence and context revival across sessions
-- **[AI-to-AI Collaboration Guide](ai-collaboration.md)** - Multi-model coordination and conversation threading
\ No newline at end of file
+- **[AI-to-AI Collaboration Guide](ai-collaboration.md)** - Multi-model coordination and conversation threading
diff --git a/docs/custom_models.md b/docs/custom_models.md
index b2e7365..2db1694 100644
--- a/docs/custom_models.md
+++ b/docs/custom_models.md
@@ -35,7 +35,12 @@ This guide covers setting up multiple AI model providers including OpenRouter, c
## Model Aliases
-The server uses `conf/custom_models.json` to map convenient aliases to both OpenRouter and custom model names. This unified registry supports both cloud models (via OpenRouter) and local models (via custom endpoints).
+Zen ships two registries:
+
+- `conf/openrouter_models.json` – metadata for models routed through OpenRouter. Override with `OPENROUTER_MODELS_CONFIG_PATH` if you maintain a custom copy.
+- `conf/custom_models.json` – metadata for local or self-hosted OpenAI-compatible endpoints used by the Custom provider. Override with `CUSTOM_MODELS_CONFIG_PATH` if needed.
+
+Copy whichever file you need into your project (or point the corresponding `*_MODELS_CONFIG_PATH` env var at your own copy) and edit it to advertise the models you want.
### OpenRouter Models (Cloud)
@@ -58,7 +63,7 @@ The server uses `conf/custom_models.json` to map convenient aliases to both Open
|-------|-------------------|------|
| `local-llama`, `local` | `llama3.2` | Requires `CUSTOM_API_URL` configured |
-View the full list in [`conf/custom_models.json`](conf/custom_models.json).
+View the baseline OpenRouter catalogue in [`conf/openrouter_models.json`](conf/openrouter_models.json) and populate [`conf/custom_models.json`](conf/custom_models.json) with your local models.
To control ordering in auto mode or the `listmodels` summary, adjust the
[`intelligence_score`](model_ranking.md) for each entry (or rely on the automatic
@@ -152,7 +157,7 @@ CUSTOM_MODEL_NAME=your-loaded-model
## Using Models
-**Using model aliases (from conf/custom_models.json):**
+**Using model aliases (from the registry files):**
```
# OpenRouter models:
"Use opus for deep analysis" # → anthropic/claude-opus-4
@@ -185,20 +190,20 @@ CUSTOM_MODEL_NAME=your-loaded-model
The system automatically routes models to the appropriate provider:
-1. **Models with `is_custom: true`** → Always routed to Custom API (requires `CUSTOM_API_URL`)
-2. **Models with `is_custom: false` or omitted** → Routed to OpenRouter (requires `OPENROUTER_API_KEY`)
+1. Entries in `conf/custom_models.json` → Always routed through the Custom API (requires `CUSTOM_API_URL`)
+2. Entries in `conf/openrouter_models.json` → Routed through OpenRouter (requires `OPENROUTER_API_KEY`)
3. **Unknown models** → Fallback logic based on model name patterns
**Provider Priority Order:**
1. Native APIs (Google, OpenAI) - if API keys are available
-2. Custom endpoints - for models marked with `is_custom: true`
+2. Custom endpoints - for models declared in `conf/custom_models.json`
3. OpenRouter - catch-all for cloud models
This ensures clean separation between local and cloud models while maintaining flexibility for unknown models.
## Model Configuration
-The server uses `conf/custom_models.json` to define model aliases and capabilities. You can:
+These JSON files define model aliases and capabilities. You can:
1. **Use the default configuration** - Includes popular models with convenient aliases
2. **Customize the configuration** - Add your own models and aliases
@@ -206,7 +211,7 @@ The server uses `conf/custom_models.json` to define model aliases and capabiliti
### Adding Custom Models
-Edit `conf/custom_models.json` to add new models. The configuration supports both OpenRouter (cloud) and custom endpoint (local) models.
+Edit `conf/openrouter_models.json` to tweak OpenRouter behaviour or `conf/custom_models.json` to add local models. Each entry maps directly onto [`ModelCapabilities`](../providers/shared/model_capabilities.py).
#### Adding an OpenRouter Model
@@ -232,7 +237,6 @@ Edit `conf/custom_models.json` to add new models. The configuration supports bot
"supports_extended_thinking": false,
"supports_json_mode": false,
"supports_function_calling": false,
- "is_custom": true,
"description": "My custom Ollama/vLLM model"
}
```
@@ -244,10 +248,9 @@ Edit `conf/custom_models.json` to add new models. The configuration supports bot
- `supports_extended_thinking`: Whether the model has extended reasoning capabilities
- `supports_json_mode`: Whether the model can guarantee valid JSON output
- `supports_function_calling`: Whether the model supports function/tool calling
-- `is_custom`: **Set to `true` for models that should ONLY work with custom endpoints** (Ollama, vLLM, etc.)
- `description`: Human-readable description of the model
-**Important:** Always set `is_custom: true` for local models. This ensures they're only used when `CUSTOM_API_URL` is configured and prevents conflicts with OpenRouter.
+**Important:** Keep OpenRouter and Custom models in their respective files so that requests are routed correctly.
## Available Models
diff --git a/docs/index.md b/docs/index.md
index 3b8ca07..2681503 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -4,6 +4,7 @@
|----------|-------------|
| [Getting Started](getting-started.md) | Installation paths, prerequisite setup, and first-run guidance. |
| [Adding Providers](adding_providers.md) | How to register new AI providers and advertise capabilities. |
+| [Azure OpenAI](azure_openai.md) | Configure Azure deployments, capability overrides, and env mappings. |
| [Model Ranking](model_ranking.md) | How intelligence scores translate into auto-mode ordering. |
| [Custom Models](custom_models.md) | Configure OpenRouter/custom models and aliases. |
| [Adding Tools](adding_tools.md) | Create new tools using the shared base classes. |
diff --git a/docs/model_ranking.md b/docs/model_ranking.md
index 406d0be..64f32af 100644
--- a/docs/model_ranking.md
+++ b/docs/model_ranking.md
@@ -25,7 +25,7 @@ feature_bonus = (
+ (1 if supports_json_mode else 0)
+ (1 if supports_images else 0)
)
-penalty = 1 if is_custom else 0
+penalty = 1 if provider == CUSTOM else 0
effective_rank = clamp(base + ctx_bonus + output_bonus + feature_bonus - penalty, 0, 100)
```
diff --git a/providers/__init__.py b/providers/__init__.py
index 311fafa..9421edc 100644
--- a/providers/__init__.py
+++ b/providers/__init__.py
@@ -1,5 +1,6 @@
"""Model provider abstractions for supporting multiple AI providers."""
+from .azure_openai import AzureOpenAIProvider
from .base import ModelProvider
from .gemini import GeminiModelProvider
from .openai_compatible import OpenAICompatibleProvider
@@ -13,6 +14,7 @@ __all__ = [
"ModelResponse",
"ModelCapabilities",
"ModelProviderRegistry",
+ "AzureOpenAIProvider",
"GeminiModelProvider",
"OpenAIModelProvider",
"OpenAICompatibleProvider",
diff --git a/providers/azure_openai.py b/providers/azure_openai.py
new file mode 100644
index 0000000..0371b6f
--- /dev/null
+++ b/providers/azure_openai.py
@@ -0,0 +1,342 @@
+"""Azure OpenAI provider built on the OpenAI-compatible implementation."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import asdict, replace
+
+try: # pragma: no cover - optional dependency
+ from openai import AzureOpenAI
+except ImportError: # pragma: no cover
+ AzureOpenAI = None # type: ignore[assignment]
+
+from utils.env import get_env, suppress_env_vars
+
+from .azure_registry import AzureModelRegistry
+from .openai_compatible import OpenAICompatibleProvider
+from .openai_provider import OpenAIModelProvider
+from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint
+
+logger = logging.getLogger(__name__)
+
+
+class AzureOpenAIProvider(OpenAICompatibleProvider):
+ """Thin Azure wrapper that reuses the OpenAI-compatible request pipeline."""
+
+ FRIENDLY_NAME = "Azure OpenAI"
+ DEFAULT_API_VERSION = "2024-02-15-preview"
+
+ # The OpenAI-compatible base expects subclasses to expose capabilities via
+ # ``get_all_model_capabilities``. Azure deployments are user-defined, so we
+ # build the catalogue dynamically from environment configuration instead of
+ # relying on a static ``MODEL_CAPABILITIES`` map.
+ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {}
+
+ def __init__(
+ self,
+ api_key: str,
+ *,
+ azure_endpoint: str | None = None,
+ api_version: str | None = None,
+ deployments: dict[str, object] | None = None,
+ **kwargs,
+ ) -> None:
+ # Let the OpenAI-compatible base handle shared configuration such as
+ # timeouts, restriction-aware allowlists, and logging. ``base_url`` maps
+ # directly onto Azure's endpoint URL.
+ super().__init__(api_key, base_url=azure_endpoint, **kwargs)
+
+ if not azure_endpoint:
+ azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT")
+ if not azure_endpoint:
+ raise ValueError("Azure OpenAI endpoint is required via parameter or AZURE_OPENAI_ENDPOINT")
+
+ self.azure_endpoint = azure_endpoint.rstrip("/")
+ self.api_version = api_version or get_env("AZURE_OPENAI_API_VERSION", self.DEFAULT_API_VERSION)
+
+ registry_specs = self._load_registry_entries()
+ override_specs = self._normalise_deployments(deployments or {}) if deployments else {}
+
+ self._model_specs = self._merge_specs(registry_specs, override_specs)
+ if not self._model_specs:
+ raise ValueError(
+ "Azure OpenAI provider requires at least one configured deployment. "
+ "Populate conf/azure_models.json or set AZURE_MODELS_CONFIG_PATH."
+ )
+
+ self._capabilities = self._build_capabilities_map()
+ self._deployment_map = {name: spec["deployment"] for name, spec in self._model_specs.items()}
+ self._deployment_alias_lookup = {
+ deployment.lower(): canonical for canonical, deployment in self._deployment_map.items()
+ }
+ self._canonical_lookup = {name.lower(): name for name in self._model_specs.keys()}
+ self._invalidate_capability_cache()
+
+ # ------------------------------------------------------------------
+ # Capability helpers
+ # ------------------------------------------------------------------
+ def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]:
+ return dict(self._capabilities)
+
+ def get_provider_type(self) -> ProviderType:
+ return ProviderType.AZURE
+
+ def get_capabilities(self, model_name: str) -> ModelCapabilities: # type: ignore[override]
+ lowered = model_name.lower()
+ if lowered in self._deployment_alias_lookup:
+ canonical = self._deployment_alias_lookup[lowered]
+ return super().get_capabilities(canonical)
+ canonical = self._canonical_lookup.get(lowered)
+ if canonical:
+ return super().get_capabilities(canonical)
+ return super().get_capabilities(model_name)
+
+ def validate_model_name(self, model_name: str) -> bool: # type: ignore[override]
+ lowered = model_name.lower()
+ if lowered in self._deployment_alias_lookup or lowered in self._canonical_lookup:
+ return True
+ return super().validate_model_name(model_name)
+
+ def _build_capabilities_map(self) -> dict[str, ModelCapabilities]:
+ capabilities: dict[str, ModelCapabilities] = {}
+
+ for canonical_name, spec in self._model_specs.items():
+ template_capability: ModelCapabilities | None = spec.get("capability")
+ overrides = spec.get("overrides", {})
+
+ if template_capability:
+ cloned = replace(template_capability)
+ else:
+ template = OpenAIModelProvider.MODEL_CAPABILITIES.get(canonical_name)
+
+ if template:
+ friendly = template.friendly_name.replace("OpenAI", "Azure OpenAI", 1)
+ cloned = replace(
+ template,
+ provider=ProviderType.AZURE,
+ friendly_name=friendly,
+ aliases=list(template.aliases),
+ )
+ else:
+ deployment_name = spec.get("deployment", "")
+ cloned = ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name=canonical_name,
+ friendly_name=f"Azure OpenAI ({canonical_name})",
+ description=f"Azure deployment '{deployment_name}' for {canonical_name}",
+ aliases=[],
+ )
+
+ if overrides:
+ overrides = dict(overrides)
+ temp_override = overrides.get("temperature_constraint")
+ if isinstance(temp_override, str):
+ overrides["temperature_constraint"] = TemperatureConstraint.create(temp_override)
+
+ aliases_override = overrides.get("aliases")
+ if isinstance(aliases_override, str):
+ overrides["aliases"] = [alias.strip() for alias in aliases_override.split(",") if alias.strip()]
+ provider_override = overrides.get("provider")
+ if provider_override:
+ overrides.pop("provider", None)
+
+ try:
+ cloned = replace(cloned, **overrides)
+ except TypeError:
+ base_data = asdict(cloned)
+ base_data.update(overrides)
+ base_data["provider"] = ProviderType.AZURE
+ temp_value = base_data.get("temperature_constraint")
+ if isinstance(temp_value, str):
+ base_data["temperature_constraint"] = TemperatureConstraint.create(temp_value)
+ cloned = ModelCapabilities(**base_data)
+
+ if cloned.provider != ProviderType.AZURE:
+ cloned.provider = ProviderType.AZURE
+
+ capabilities[canonical_name] = cloned
+
+ return capabilities
+
+ def _load_registry_entries(self) -> dict[str, dict]:
+ try:
+ registry = AzureModelRegistry()
+ except Exception as exc: # pragma: no cover - registry failure should not crash provider
+ logger.warning("Unable to load Azure model registry: %s", exc)
+ return {}
+
+ entries: dict[str, dict] = {}
+ for model_name, capability, extra in registry.iter_entries():
+ deployment = extra.get("deployment")
+ if not deployment:
+ logger.warning("Azure model '%s' missing deployment in registry", model_name)
+ continue
+ entries[model_name] = {"deployment": deployment, "capability": capability}
+
+ return entries
+
+ @staticmethod
+ def _merge_specs(
+ registry_specs: dict[str, dict],
+ override_specs: dict[str, dict],
+ ) -> dict[str, dict]:
+ specs: dict[str, dict] = {}
+
+ for canonical, entry in registry_specs.items():
+ specs[canonical] = {
+ "deployment": entry.get("deployment"),
+ "capability": entry.get("capability"),
+ "overrides": {},
+ }
+
+ for canonical, entry in override_specs.items():
+ spec = specs.get(canonical, {"deployment": None, "capability": None, "overrides": {}})
+ deployment = entry.get("deployment")
+ if deployment:
+ spec["deployment"] = deployment
+ overrides = {k: v for k, v in entry.items() if k not in {"deployment"}}
+ overrides.pop("capability", None)
+ if overrides:
+ spec["overrides"].update(overrides)
+ specs[canonical] = spec
+
+ return {k: v for k, v in specs.items() if v.get("deployment")}
+
+ @staticmethod
+ def _normalise_deployments(mapping: dict[str, object]) -> dict[str, dict]:
+ normalised: dict[str, dict] = {}
+ for canonical, spec in mapping.items():
+ canonical_name = (canonical or "").strip()
+ if not canonical_name:
+ continue
+
+ deployment_name: str | None = None
+ overrides: dict[str, object] = {}
+
+ if isinstance(spec, str):
+ deployment_name = spec.strip()
+ elif isinstance(spec, dict):
+ deployment_name = spec.get("deployment") or spec.get("deployment_name")
+ overrides = {k: v for k, v in spec.items() if k not in {"deployment", "deployment_name"}}
+
+ if not deployment_name:
+ continue
+
+ normalised[canonical_name] = {"deployment": deployment_name.strip(), **overrides}
+
+ return normalised
+
+ # ------------------------------------------------------------------
+ # Azure-specific configuration
+ # ------------------------------------------------------------------
+ @property
+ def client(self): # type: ignore[override]
+ """Instantiate the Azure OpenAI client on first use."""
+
+ if self._client is None:
+ if AzureOpenAI is None:
+ raise ImportError(
+ "Azure OpenAI support requires the 'openai' package. Install it with `pip install openai`."
+ )
+
+ import httpx
+
+ proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"]
+
+ with suppress_env_vars(*proxy_env_vars):
+ try:
+ timeout_config = self.timeout_config
+
+ http_client = httpx.Client(timeout=timeout_config, follow_redirects=True)
+
+ client_kwargs = {
+ "api_key": self.api_key,
+ "azure_endpoint": self.azure_endpoint,
+ "api_version": self.api_version,
+ "http_client": http_client,
+ }
+
+ if self.DEFAULT_HEADERS:
+ client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy()
+
+ logger.debug(
+ "Initializing Azure OpenAI client endpoint=%s api_version=%s timeouts=%s",
+ self.azure_endpoint,
+ self.api_version,
+ timeout_config,
+ )
+
+ self._client = AzureOpenAI(**client_kwargs)
+
+ except Exception as exc:
+ logger.error("Failed to create Azure OpenAI client: %s", exc)
+ raise
+
+ return self._client
+
+ # ------------------------------------------------------------------
+ # Request delegation
+ # ------------------------------------------------------------------
+ def generate_content(
+ self,
+ prompt: str,
+ model_name: str,
+ system_prompt: str | None = None,
+ temperature: float = 0.3,
+ max_output_tokens: int | None = None,
+ images: list[str] | None = None,
+ **kwargs,
+ ) -> ModelResponse:
+ canonical_name, deployment_name = self._resolve_canonical_and_deployment(model_name)
+
+ # Delegate to the shared OpenAI-compatible implementation using the
+ # deployment name – Azure requires the deployment identifier in the
+ # ``model`` field. The returned ``ModelResponse`` is normalised so
+ # downstream consumers continue to see the canonical model name.
+ raw_response = super().generate_content(
+ prompt=prompt,
+ model_name=deployment_name,
+ system_prompt=system_prompt,
+ temperature=temperature,
+ max_output_tokens=max_output_tokens,
+ images=images,
+ **kwargs,
+ )
+
+ capabilities = self._capabilities.get(canonical_name)
+ friendly_name = capabilities.friendly_name if capabilities else self.FRIENDLY_NAME
+
+ return ModelResponse(
+ content=raw_response.content,
+ usage=raw_response.usage,
+ model_name=canonical_name,
+ friendly_name=friendly_name,
+ provider=ProviderType.AZURE,
+ metadata={**raw_response.metadata, "deployment": deployment_name},
+ )
+
+ def _resolve_canonical_and_deployment(self, model_name: str) -> tuple[str, str]:
+ resolved_canonical = self._resolve_model_name(model_name)
+
+ if resolved_canonical not in self._deployment_map:
+ # The base resolver may hand back the deployment alias. Try to map it
+ # back to a canonical entry.
+ for canonical, deployment in self._deployment_map.items():
+ if deployment.lower() == resolved_canonical.lower():
+ return canonical, deployment
+ raise ValueError(f"Model '{model_name}' is not configured for Azure OpenAI")
+
+ return resolved_canonical, self._deployment_map[resolved_canonical]
+
+ def _parse_allowed_models(self) -> set[str] | None: # type: ignore[override]
+ # Support both AZURE_ALLOWED_MODELS (inherited behaviour) and the
+ # clearer AZURE_OPENAI_ALLOWED_MODELS alias.
+ explicit = get_env("AZURE_OPENAI_ALLOWED_MODELS")
+ if explicit:
+ models = {m.strip().lower() for m in explicit.split(",") if m.strip()}
+ if models:
+ logger.info("Configured allowed models for Azure OpenAI: %s", sorted(models))
+ self._allowed_alias_cache = {}
+ return models
+
+ return super()._parse_allowed_models()
diff --git a/providers/azure_registry.py b/providers/azure_registry.py
new file mode 100644
index 0000000..302ebf2
--- /dev/null
+++ b/providers/azure_registry.py
@@ -0,0 +1,45 @@
+"""Registry loader for Azure OpenAI model configurations."""
+
+from __future__ import annotations
+
+import logging
+
+from .model_registry_base import CAPABILITY_FIELD_NAMES, CustomModelRegistryBase
+from .shared import ModelCapabilities, ProviderType, TemperatureConstraint
+
+logger = logging.getLogger(__name__)
+
+
+class AzureModelRegistry(CustomModelRegistryBase):
+ """Load Azure-specific model metadata from configuration files."""
+
+ def __init__(self, config_path: str | None = None) -> None:
+ super().__init__(
+ env_var_name="AZURE_MODELS_CONFIG_PATH",
+ default_filename="azure_models.json",
+ config_path=config_path,
+ )
+ self.reload()
+
+ def _extra_keys(self) -> set[str]:
+ return {"deployment", "deployment_name"}
+
+ def _provider_default(self) -> ProviderType:
+ return ProviderType.AZURE
+
+ def _default_friendly_name(self, model_name: str) -> str:
+ return f"Azure OpenAI ({model_name})"
+
+ def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
+ deployment = entry.pop("deployment", None) or entry.pop("deployment_name", None)
+ if not deployment:
+ raise ValueError(f"Azure model '{entry.get('model_name')}' is missing required 'deployment' field")
+
+ temp_hint = entry.get("temperature_constraint")
+ if isinstance(temp_hint, str):
+ entry["temperature_constraint"] = TemperatureConstraint.create(temp_hint)
+
+ filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
+ filtered.setdefault("provider", ProviderType.AZURE)
+ capability = ModelCapabilities(**filtered)
+ return capability, {"deployment": deployment}
diff --git a/providers/custom.py b/providers/custom.py
index dcd9462..ebb4826 100644
--- a/providers/custom.py
+++ b/providers/custom.py
@@ -1,10 +1,10 @@
"""Custom API provider implementation."""
import logging
-from typing import Optional
from utils.env import get_env
+from .custom_registry import CustomEndpointModelRegistry
from .openai_compatible import OpenAICompatibleProvider
from .openrouter_registry import OpenRouterModelRegistry
from .shared import ModelCapabilities, ProviderType
@@ -31,8 +31,8 @@ class CustomProvider(OpenAICompatibleProvider):
FRIENDLY_NAME = "Custom API"
- # Model registry for managing configurations and aliases (shared with OpenRouter)
- _registry: Optional[OpenRouterModelRegistry] = None
+ # Model registry for managing configurations and aliases
+ _registry: CustomEndpointModelRegistry | None = None
def __init__(self, api_key: str = "", base_url: str = "", **kwargs):
"""Initialize Custom provider for local/self-hosted models.
@@ -78,9 +78,9 @@ class CustomProvider(OpenAICompatibleProvider):
super().__init__(api_key, base_url=base_url, **kwargs)
- # Initialize model registry (shared with OpenRouter for consistent aliases)
+ # Initialize model registry
if CustomProvider._registry is None:
- CustomProvider._registry = OpenRouterModelRegistry()
+ CustomProvider._registry = CustomEndpointModelRegistry()
# Log loaded models and aliases only on first load
models = self._registry.list_models()
aliases = self._registry.list_aliases()
@@ -92,8 +92,8 @@ class CustomProvider(OpenAICompatibleProvider):
def _lookup_capabilities(
self,
canonical_name: str,
- requested_name: Optional[str] = None,
- ) -> Optional[ModelCapabilities]:
+ requested_name: str | None = None,
+ ) -> ModelCapabilities | None:
"""Return capabilities for models explicitly marked as custom."""
builtin = super()._lookup_capabilities(canonical_name, requested_name)
@@ -101,12 +101,12 @@ class CustomProvider(OpenAICompatibleProvider):
return builtin
registry_entry = self._registry.resolve(canonical_name)
- if registry_entry and getattr(registry_entry, "is_custom", False):
+ if registry_entry:
registry_entry.provider = ProviderType.CUSTOM
return registry_entry
logging.debug(
- "Custom provider cannot resolve model '%s'; ensure it is declared with 'is_custom': true in custom_models.json",
+ "Custom provider cannot resolve model '%s'; ensure it is declared in custom_models.json",
canonical_name,
)
return None
@@ -151,6 +151,15 @@ class CustomProvider(OpenAICompatibleProvider):
return base_model
logging.debug(f"Model '{model_name}' not found in registry, using as-is")
+ # Attempt to resolve via OpenRouter registry so aliases still map cleanly
+ openrouter_registry = OpenRouterModelRegistry()
+ openrouter_config = openrouter_registry.resolve(model_name)
+ if openrouter_config:
+ resolved = openrouter_config.model_name
+ self._alias_cache[cache_key] = resolved
+ self._alias_cache.setdefault(resolved.lower(), resolved)
+ return resolved
+
self._alias_cache[cache_key] = model_name
return model_name
@@ -160,9 +169,9 @@ class CustomProvider(OpenAICompatibleProvider):
if not self._registry:
return {}
- capabilities: dict[str, ModelCapabilities] = {}
- for model_name in self._registry.list_models():
- config = self._registry.resolve(model_name)
- if config and getattr(config, "is_custom", False):
- capabilities[model_name] = config
+ capabilities = {}
+ for model in self._registry.list_models():
+ config = self._registry.resolve(model)
+ if config:
+ capabilities[model] = config
return capabilities
diff --git a/providers/custom_registry.py b/providers/custom_registry.py
new file mode 100644
index 0000000..990a3b0
--- /dev/null
+++ b/providers/custom_registry.py
@@ -0,0 +1,26 @@
+"""Registry for models exposed via custom (local) OpenAI-compatible endpoints."""
+
+from __future__ import annotations
+
+from .model_registry_base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry
+from .shared import ModelCapabilities, ProviderType
+
+
+class CustomEndpointModelRegistry(CapabilityModelRegistry):
+ def __init__(self, config_path: str | None = None) -> None:
+ super().__init__(
+ env_var_name="CUSTOM_MODELS_CONFIG_PATH",
+ default_filename="custom_models.json",
+ provider=ProviderType.CUSTOM,
+ friendly_prefix="Custom ({model})",
+ config_path=config_path,
+ )
+ self.reload()
+
+ def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
+ entry["provider"] = ProviderType.CUSTOM
+ entry.setdefault("friendly_name", f"Custom ({entry['model_name']})")
+ filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
+ filtered.setdefault("provider", ProviderType.CUSTOM)
+ capability = ModelCapabilities(**filtered)
+ return capability, {}
diff --git a/providers/model_registry_base.py b/providers/model_registry_base.py
new file mode 100644
index 0000000..20f4dd9
--- /dev/null
+++ b/providers/model_registry_base.py
@@ -0,0 +1,241 @@
+"""Shared infrastructure for JSON-backed model registries."""
+
+from __future__ import annotations
+
+import importlib.resources
+import json
+import logging
+from collections.abc import Iterable
+from dataclasses import fields
+from pathlib import Path
+
+from utils.env import get_env
+from utils.file_utils import read_json_file
+
+from .shared import ModelCapabilities, ProviderType, TemperatureConstraint
+
+logger = logging.getLogger(__name__)
+
+
+CAPABILITY_FIELD_NAMES = {field.name for field in fields(ModelCapabilities)}
+
+
+class CustomModelRegistryBase:
+ """Load and expose capability metadata from a JSON manifest."""
+
+ def __init__(
+ self,
+ *,
+ env_var_name: str,
+ default_filename: str,
+ config_path: str | None = None,
+ ) -> None:
+ self._env_var_name = env_var_name
+ self._default_filename = default_filename
+ self._use_resources = False
+ self._resource_package = "conf"
+ self._default_path = Path(__file__).parent.parent / "conf" / default_filename
+
+ if config_path:
+ self.config_path = Path(config_path)
+ else:
+ env_path = get_env(env_var_name)
+ if env_path:
+ self.config_path = Path(env_path)
+ else:
+ try:
+ resource = importlib.resources.files(self._resource_package).joinpath(default_filename)
+ if hasattr(resource, "read_text"):
+ self._use_resources = True
+ self.config_path = None
+ else:
+ raise AttributeError("resource accessor not available")
+ except Exception:
+ self.config_path = Path(__file__).parent.parent / "conf" / default_filename
+
+ self.alias_map: dict[str, str] = {}
+ self.model_map: dict[str, ModelCapabilities] = {}
+ self._extras: dict[str, dict] = {}
+
+ def reload(self) -> None:
+ data = self._load_config_data()
+ configs = [config for config in self._parse_models(data) if config is not None]
+ self._build_maps(configs)
+
+ def list_models(self) -> list[str]:
+ return list(self.model_map.keys())
+
+ def list_aliases(self) -> list[str]:
+ return list(self.alias_map.keys())
+
+ def resolve(self, name_or_alias: str) -> ModelCapabilities | None:
+ key = name_or_alias.lower()
+ canonical = self.alias_map.get(key)
+ if canonical:
+ return self.model_map.get(canonical)
+
+ for model_name in self.model_map:
+ if model_name.lower() == key:
+ return self.model_map[model_name]
+ return None
+
+ def get_capabilities(self, name_or_alias: str) -> ModelCapabilities | None:
+ return self.resolve(name_or_alias)
+
+ def get_entry(self, model_name: str) -> dict | None:
+ return self._extras.get(model_name)
+
+ def iter_entries(self) -> Iterable[tuple[str, ModelCapabilities, dict]]:
+ for model_name, capability in self.model_map.items():
+ yield model_name, capability, self._extras.get(model_name, {})
+
+ # ------------------------------------------------------------------
+ # Internal helpers
+ # ------------------------------------------------------------------
+ def _load_config_data(self) -> dict:
+ if self._use_resources:
+ try:
+ resource = importlib.resources.files(self._resource_package).joinpath(self._default_filename)
+ if hasattr(resource, "read_text"):
+ config_text = resource.read_text(encoding="utf-8")
+ else: # pragma: no cover - legacy Python fallback
+ with resource.open("r", encoding="utf-8") as handle:
+ config_text = handle.read()
+ data = json.loads(config_text)
+ except FileNotFoundError:
+ logger.debug("Packaged %s not found", self._default_filename)
+ return {"models": []}
+ except Exception as exc:
+ logger.warning("Failed to read packaged %s: %s", self._default_filename, exc)
+ return {"models": []}
+ return data or {"models": []}
+
+ if not self.config_path:
+ raise FileNotFoundError("Registry configuration path is not set")
+
+ if not self.config_path.exists():
+ logger.debug("Model registry config not found at %s", self.config_path)
+ if self.config_path == self._default_path:
+ fallback = Path.cwd() / "conf" / self._default_filename
+ if fallback != self.config_path and fallback.exists():
+ logger.debug("Falling back to %s", fallback)
+ self.config_path = fallback
+ else:
+ return {"models": []}
+ else:
+ return {"models": []}
+
+ data = read_json_file(str(self.config_path))
+ return data or {"models": []}
+
+ @property
+ def use_resources(self) -> bool:
+ return self._use_resources
+
+ def _parse_models(self, data: dict) -> Iterable[ModelCapabilities | None]:
+ for raw in data.get("models", []):
+ if not isinstance(raw, dict):
+ continue
+ yield self._convert_entry(raw)
+
+ def _convert_entry(self, raw: dict) -> ModelCapabilities | None:
+ entry = dict(raw)
+ model_name = entry.get("model_name")
+ if not model_name:
+ return None
+
+ aliases = entry.get("aliases")
+ if isinstance(aliases, str):
+ entry["aliases"] = [alias.strip() for alias in aliases.split(",") if alias.strip()]
+
+ entry.setdefault("friendly_name", self._default_friendly_name(model_name))
+
+ temperature_hint = entry.get("temperature_constraint")
+ if isinstance(temperature_hint, str):
+ entry["temperature_constraint"] = TemperatureConstraint.create(temperature_hint)
+ elif temperature_hint is None:
+ entry["temperature_constraint"] = TemperatureConstraint.create("range")
+
+ if "max_tokens" in entry:
+ raise ValueError(
+ "`max_tokens` is no longer supported. Use `max_output_tokens` in your model configuration."
+ )
+
+ unknown_keys = set(entry.keys()) - CAPABILITY_FIELD_NAMES - self._extra_keys()
+ if unknown_keys:
+ raise ValueError("Unsupported fields in model configuration: " + ", ".join(sorted(unknown_keys)))
+
+ capability, extras = self._finalise_entry(entry)
+ capability.provider = self._provider_default()
+ self._extras[capability.model_name] = extras or {}
+ return capability
+
+ def _default_friendly_name(self, model_name: str) -> str:
+ return model_name
+
+ def _extra_keys(self) -> set[str]:
+ return set()
+
+ def _provider_default(self) -> ProviderType:
+ return ProviderType.OPENROUTER
+
+ def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
+ return ModelCapabilities(**{k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}), {}
+
+ def _build_maps(self, configs: Iterable[ModelCapabilities]) -> None:
+ alias_map: dict[str, str] = {}
+ model_map: dict[str, ModelCapabilities] = {}
+
+ for config in configs:
+ if not config:
+ continue
+ model_map[config.model_name] = config
+
+ model_name_lower = config.model_name.lower()
+ if model_name_lower not in alias_map:
+ alias_map[model_name_lower] = config.model_name
+
+ for alias in config.aliases:
+ alias_lower = alias.lower()
+ if alias_lower in alias_map and alias_map[alias_lower] != config.model_name:
+ raise ValueError(
+ f"Duplicate alias '{alias}' found for models '{alias_map[alias_lower]}' and '{config.model_name}'"
+ )
+ alias_map[alias_lower] = config.model_name
+
+ self.alias_map = alias_map
+ self.model_map = model_map
+
+
+class CapabilityModelRegistry(CustomModelRegistryBase):
+ """Registry that returns `ModelCapabilities` objects with alias support."""
+
+ def __init__(
+ self,
+ *,
+ env_var_name: str,
+ default_filename: str,
+ provider: ProviderType,
+ friendly_prefix: str,
+ config_path: str | None = None,
+ ) -> None:
+ self._provider = provider
+ self._friendly_prefix = friendly_prefix
+ super().__init__(
+ env_var_name=env_var_name,
+ default_filename=default_filename,
+ config_path=config_path,
+ )
+ self.reload()
+
+ def _provider_default(self) -> ProviderType:
+ return self._provider
+
+ def _default_friendly_name(self, model_name: str) -> str:
+ return self._friendly_prefix.format(model=model_name)
+
+ def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
+ filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
+ filtered.setdefault("provider", self._provider_default())
+ capability = ModelCapabilities(**filtered)
+ return capability, {}
diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py
index 94ae5e6..168549f 100644
--- a/providers/openai_compatible.py
+++ b/providers/openai_compatible.py
@@ -8,7 +8,7 @@ from urllib.parse import urlparse
from openai import OpenAI
-from utils.env import get_env
+from utils.env import get_env, suppress_env_vars
from utils.image_utils import validate_image
from .base import ModelProvider
@@ -257,80 +257,74 @@ class OpenAICompatibleProvider(ModelProvider):
def client(self):
"""Lazy initialization of OpenAI client with security checks and timeout configuration."""
if self._client is None:
- import os
-
import httpx
- # Temporarily disable proxy environment variables to prevent httpx from detecting them
- original_env = {}
proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"]
- for var in proxy_env_vars:
- if var in os.environ:
- original_env[var] = os.environ[var]
- del os.environ[var]
-
- try:
- # Create a custom httpx client that explicitly avoids proxy parameters
- timeout_config = (
- self.timeout_config
- if hasattr(self, "timeout_config") and self.timeout_config
- else httpx.Timeout(30.0)
- )
-
- # Create httpx client with minimal config to avoid proxy conflicts
- # Note: proxies parameter was removed in httpx 0.28.0
- # Check for test transport injection
- if hasattr(self, "_test_transport"):
- # Use custom transport for testing (HTTP recording/replay)
- http_client = httpx.Client(
- transport=self._test_transport,
- timeout=timeout_config,
- follow_redirects=True,
- )
- else:
- # Normal production client
- http_client = httpx.Client(
- timeout=timeout_config,
- follow_redirects=True,
- )
-
- # Keep client initialization minimal to avoid proxy parameter conflicts
- client_kwargs = {
- "api_key": self.api_key,
- "http_client": http_client,
- }
-
- if self.base_url:
- client_kwargs["base_url"] = self.base_url
-
- if self.organization:
- client_kwargs["organization"] = self.organization
-
- # Add default headers if any
- if self.DEFAULT_HEADERS:
- client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy()
-
- logging.debug(f"OpenAI client initialized with custom httpx client and timeout: {timeout_config}")
-
- # Create OpenAI client with custom httpx client
- self._client = OpenAI(**client_kwargs)
-
- except Exception as e:
- # If all else fails, try absolute minimal client without custom httpx
- logging.warning(f"Failed to create client with custom httpx, falling back to minimal config: {e}")
+ with suppress_env_vars(*proxy_env_vars):
try:
- minimal_kwargs = {"api_key": self.api_key}
+ # Create a custom httpx client that explicitly avoids proxy parameters
+ timeout_config = (
+ self.timeout_config
+ if hasattr(self, "timeout_config") and self.timeout_config
+ else httpx.Timeout(30.0)
+ )
+
+ # Create httpx client with minimal config to avoid proxy conflicts
+ # Note: proxies parameter was removed in httpx 0.28.0
+ # Check for test transport injection
+ if hasattr(self, "_test_transport"):
+ # Use custom transport for testing (HTTP recording/replay)
+ http_client = httpx.Client(
+ transport=self._test_transport,
+ timeout=timeout_config,
+ follow_redirects=True,
+ )
+ else:
+ # Normal production client
+ http_client = httpx.Client(
+ timeout=timeout_config,
+ follow_redirects=True,
+ )
+
+ # Keep client initialization minimal to avoid proxy parameter conflicts
+ client_kwargs = {
+ "api_key": self.api_key,
+ "http_client": http_client,
+ }
+
if self.base_url:
- minimal_kwargs["base_url"] = self.base_url
- self._client = OpenAI(**minimal_kwargs)
- except Exception as fallback_error:
- logging.error(f"Even minimal OpenAI client creation failed: {fallback_error}")
- raise
- finally:
- # Restore original proxy environment variables
- for var, value in original_env.items():
- os.environ[var] = value
+ client_kwargs["base_url"] = self.base_url
+
+ if self.organization:
+ client_kwargs["organization"] = self.organization
+
+ # Add default headers if any
+ if self.DEFAULT_HEADERS:
+ client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy()
+
+ logging.debug(
+ "OpenAI client initialized with custom httpx client and timeout: %s",
+ timeout_config,
+ )
+
+ # Create OpenAI client with custom httpx client
+ self._client = OpenAI(**client_kwargs)
+
+ except Exception as e:
+ # If all else fails, try absolute minimal client without custom httpx
+ logging.warning(
+ "Failed to create client with custom httpx, falling back to minimal config: %s",
+ e,
+ )
+ try:
+ minimal_kwargs = {"api_key": self.api_key}
+ if self.base_url:
+ minimal_kwargs["base_url"] = self.base_url
+ self._client = OpenAI(**minimal_kwargs)
+ except Exception as fallback_error:
+ logging.error("Even minimal OpenAI client creation failed: %s", fallback_error)
+ raise
return self._client
diff --git a/providers/openai_provider.py b/providers/openai_provider.py
index 63d0bfc..5b9e53e 100644
--- a/providers/openai_provider.py
+++ b/providers/openai_provider.py
@@ -103,16 +103,16 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="o3-mini",
friendly_name="OpenAI (O3-mini)",
intelligence_score=12,
- context_window=200_000, # 200K tokens
- max_output_tokens=65536, # 64K max output tokens
+ context_window=200_000,
+ max_output_tokens=65536,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
- supports_images=True, # O3 models support vision
- max_image_size_mb=20.0, # 20MB per OpenAI docs
- supports_temperature=False, # O3 models don't accept temperature parameter
+ supports_images=True,
+ max_image_size_mb=20.0,
+ supports_temperature=False,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
aliases=["o3mini"],
@@ -122,16 +122,16 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="o3-pro",
friendly_name="OpenAI (O3-Pro)",
intelligence_score=15,
- context_window=200_000, # 200K tokens
- max_output_tokens=65536, # 64K max output tokens
+ context_window=200_000,
+ max_output_tokens=65536,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
- supports_images=True, # O3 models support vision
- max_image_size_mb=20.0, # 20MB per OpenAI docs
- supports_temperature=False, # O3 models don't accept temperature parameter
+ supports_images=True,
+ max_image_size_mb=20.0,
+ supports_temperature=False,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
aliases=["o3pro"],
@@ -141,16 +141,15 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="o4-mini",
friendly_name="OpenAI (O4-mini)",
intelligence_score=11,
- context_window=200_000, # 200K tokens
- max_output_tokens=65536, # 64K max output tokens
+ context_window=200_000,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
- supports_images=True, # O4 models support vision
- max_image_size_mb=20.0, # 20MB per OpenAI docs
- supports_temperature=False, # O4 models don't accept temperature parameter
+ supports_images=True,
+ max_image_size_mb=20.0,
+ supports_temperature=False,
temperature_constraint=TemperatureConstraint.create("fixed"),
description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
aliases=["o4mini"],
@@ -160,16 +159,16 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
model_name="gpt-4.1",
friendly_name="OpenAI (GPT 4.1)",
intelligence_score=13,
- context_window=1_000_000, # 1M tokens
+ context_window=1_000_000,
max_output_tokens=32_768,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
- supports_images=True, # GPT-4.1 supports vision
- max_image_size_mb=20.0, # 20MB per OpenAI docs
- supports_temperature=True, # Regular models accept temperature parameter
+ supports_images=True,
+ max_image_size_mb=20.0,
+ supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
description="GPT-4.1 (1M context) - Advanced reasoning model with large context window",
aliases=["gpt4.1"],
@@ -178,19 +177,19 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="gpt-5-codex",
friendly_name="OpenAI (GPT-5 Codex)",
- intelligence_score=17, # Higher than GPT-5 for coding tasks
- context_window=400_000, # 400K tokens (same as GPT-5)
- max_output_tokens=128_000, # 128K output tokens
- supports_extended_thinking=True, # Responses API supports reasoning tokens
+ intelligence_score=17,
+ context_window=400_000,
+ max_output_tokens=128_000,
+ supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=True,
- supports_function_calling=True, # Enhanced for agentic software engineering
+ supports_function_calling=True,
supports_json_mode=True,
- supports_images=True, # Screenshots, wireframes, diagrams
- max_image_size_mb=20.0, # 20MB per OpenAI docs
+ supports_images=True,
+ max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=TemperatureConstraint.create("range"),
- description="GPT-5 Codex (400K context) - Uses Responses API for 40-80% cost savings. Specialized for coding, refactoring, and software architecture. 3% better performance on SWE-bench.",
+ description="GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.",
aliases=["gpt5-codex", "codex", "gpt-5-code", "gpt5-code"],
),
}
@@ -282,7 +281,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
if category == ToolModelCategory.EXTENDED_REASONING:
# Prefer models with extended thinking support
- # GPT-5-Codex first for coding tasks (uses Responses API with 40-80% cost savings)
+ # GPT-5-Codex first for coding tasks
preferred = find_first(["gpt-5-codex", "o3", "o3-pro", "gpt-5"])
return preferred if preferred else allowed_models[0]
diff --git a/providers/openrouter.py b/providers/openrouter.py
index 186a0f9..6d7cb49 100644
--- a/providers/openrouter.py
+++ b/providers/openrouter.py
@@ -1,7 +1,6 @@
"""OpenRouter provider implementation."""
import logging
-from typing import Optional
from utils.env import get_env
@@ -42,7 +41,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
}
# Model registry for managing configurations and aliases
- _registry: Optional[OpenRouterModelRegistry] = None
+ _registry: OpenRouterModelRegistry | None = None
def __init__(self, api_key: str, **kwargs):
"""Initialize OpenRouter provider.
@@ -70,8 +69,8 @@ class OpenRouterProvider(OpenAICompatibleProvider):
def _lookup_capabilities(
self,
canonical_name: str,
- requested_name: Optional[str] = None,
- ) -> Optional[ModelCapabilities]:
+ requested_name: str | None = None,
+ ) -> ModelCapabilities | None:
"""Fetch OpenRouter capabilities from the registry or build a generic fallback."""
capabilities = self._registry.get_capabilities(canonical_name)
@@ -143,7 +142,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
# Custom models belong to CustomProvider; skip them here so the two
# providers don't race over the same registrations (important for tests
# that stub the registry with minimal objects lacking attrs).
- if hasattr(config, "is_custom") and config.is_custom is True:
+ if config.provider == ProviderType.CUSTOM:
continue
if restriction_service:
@@ -211,7 +210,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
continue
# See note in list_models: respect the CustomProvider boundary.
- if hasattr(config, "is_custom") and config.is_custom is True:
+ if config.provider == ProviderType.CUSTOM:
continue
capabilities[model_name] = config
diff --git a/providers/openrouter_registry.py b/providers/openrouter_registry.py
index d2cc219..25f8dbf 100644
--- a/providers/openrouter_registry.py
+++ b/providers/openrouter_registry.py
@@ -1,293 +1,38 @@
"""OpenRouter model registry for managing model configurations and aliases."""
-import importlib.resources
-import logging
-from pathlib import Path
-from typing import Optional
+from __future__ import annotations
-from utils.env import get_env
-
-# Import handled via importlib.resources.files() calls directly
-from utils.file_utils import read_json_file
-
-from .shared import (
- ModelCapabilities,
- ProviderType,
- TemperatureConstraint,
-)
+from .model_registry_base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry
+from .shared import ModelCapabilities, ProviderType
-class OpenRouterModelRegistry:
- """In-memory view of OpenRouter and custom model metadata.
+class OpenRouterModelRegistry(CapabilityModelRegistry):
+ """Capability registry backed by `conf/openrouter_models.json`."""
- Role
- Parse the packaged ``conf/custom_models.json`` (or user-specified
- overrides), construct alias and capability maps, and serve those
- structures to providers that rely on OpenRouter semantics (both the
- OpenRouter provider itself and the Custom provider).
+ def __init__(self, config_path: str | None = None) -> None:
+ super().__init__(
+ env_var_name="OPENROUTER_MODELS_CONFIG_PATH",
+ default_filename="openrouter_models.json",
+ provider=ProviderType.OPENROUTER,
+ friendly_prefix="OpenRouter ({model})",
+ config_path=config_path,
+ )
- Key duties
- * Load :class:`ModelCapabilities` definitions from configuration files
- * Maintain a case-insensitive alias → canonical name map for fast
- resolution
- * Provide helpers to list models, list aliases, and resolve an arbitrary
- name to its capability object without repeatedly touching the file
- system.
- """
-
- def __init__(self, config_path: Optional[str] = None):
- """Initialize the registry.
-
- Args:
- config_path: Path to config file. If None, uses default locations.
- """
- self.alias_map: dict[str, str] = {} # alias -> model_name
- self.model_map: dict[str, ModelCapabilities] = {} # model_name -> config
-
- # Determine config path and loading strategy
- self.use_resources = False
- if config_path:
- # Direct config_path parameter
- self.config_path = Path(config_path)
+ def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]:
+ provider_override = entry.get("provider")
+ if isinstance(provider_override, str):
+ entry_provider = ProviderType(provider_override.lower())
+ elif isinstance(provider_override, ProviderType):
+ entry_provider = provider_override
else:
- # Check environment variable first
- env_path = get_env("CUSTOM_MODELS_CONFIG_PATH")
- if env_path:
- # Environment variable path
- self.config_path = Path(env_path)
- else:
- # Try importlib.resources for robust packaging support
- self.config_path = None
- self.use_resources = False
+ entry_provider = ProviderType.OPENROUTER
- try:
- resource_traversable = importlib.resources.files("conf").joinpath("custom_models.json")
- if hasattr(resource_traversable, "read_text"):
- self.use_resources = True
- else:
- raise AttributeError("read_text not available")
- except Exception:
- pass
+ if entry_provider == ProviderType.CUSTOM:
+ entry.setdefault("friendly_name", f"Custom ({entry['model_name']})")
+ else:
+ entry.setdefault("friendly_name", f"OpenRouter ({entry['model_name']})")
- if not self.use_resources:
- # Fallback to file system paths
- potential_paths = [
- Path(__file__).parent.parent / "conf" / "custom_models.json",
- Path.cwd() / "conf" / "custom_models.json",
- ]
-
- for path in potential_paths:
- if path.exists():
- self.config_path = path
- break
-
- if self.config_path is None:
- self.config_path = potential_paths[0]
-
- # Load configuration
- self.reload()
-
- def reload(self) -> None:
- """Reload configuration from disk."""
- try:
- configs = self._read_config()
- self._build_maps(configs)
- caller_info = ""
- try:
- import inspect
-
- caller_frame = inspect.currentframe().f_back
- if caller_frame:
- caller_name = caller_frame.f_code.co_name
- caller_file = (
- caller_frame.f_code.co_filename.split("/")[-1] if caller_frame.f_code.co_filename else "unknown"
- )
- # Look for tool context
- while caller_frame:
- frame_locals = caller_frame.f_locals
- if "self" in frame_locals and hasattr(frame_locals["self"], "get_name"):
- tool_name = frame_locals["self"].get_name()
- caller_info = f" (called from {tool_name} tool)"
- break
- caller_frame = caller_frame.f_back
- if not caller_info:
- caller_info = f" (called from {caller_name} in {caller_file})"
- except Exception:
- # If frame inspection fails, just continue without caller info
- pass
-
- logging.debug(
- f"Loaded {len(self.model_map)} OpenRouter models with {len(self.alias_map)} aliases{caller_info}"
- )
- except ValueError as e:
- # Re-raise ValueError only for duplicate aliases (critical config errors)
- logging.error(f"Failed to load OpenRouter model configuration: {e}")
- # Initialize with empty maps on failure
- self.alias_map = {}
- self.model_map = {}
- if "Duplicate alias" in str(e):
- raise
- except Exception as e:
- logging.error(f"Failed to load OpenRouter model configuration: {e}")
- # Initialize with empty maps on failure
- self.alias_map = {}
- self.model_map = {}
-
- def _read_config(self) -> list[ModelCapabilities]:
- """Read configuration from file or package resources.
-
- Returns:
- List of model configurations
- """
- try:
- if self.use_resources:
- # Use importlib.resources for packaged environments
- try:
- resource_path = importlib.resources.files("conf").joinpath("custom_models.json")
- if hasattr(resource_path, "read_text"):
- # Python 3.9+
- config_text = resource_path.read_text(encoding="utf-8")
- else:
- # Python 3.8 fallback
- with resource_path.open("r", encoding="utf-8") as f:
- config_text = f.read()
-
- import json
-
- data = json.loads(config_text)
- logging.debug("Loaded OpenRouter config from package resources")
- except Exception as e:
- logging.warning(f"Failed to load config from resources: {e}")
- return []
- else:
- # Use file path loading
- if not self.config_path.exists():
- logging.warning(f"OpenRouter model config not found at {self.config_path}")
- return []
-
- # Use centralized JSON reading utility
- data = read_json_file(str(self.config_path))
- logging.debug(f"Loaded OpenRouter config from file: {self.config_path}")
-
- if data is None:
- location = "resources" if self.use_resources else str(self.config_path)
- raise ValueError(f"Could not read or parse JSON from {location}")
-
- # Parse models
- configs = []
- for model_data in data.get("models", []):
- # Create ModelCapabilities directly from JSON data
- # Handle temperature_constraint conversion
- temp_constraint_str = model_data.get("temperature_constraint")
- temp_constraint = TemperatureConstraint.create(temp_constraint_str or "range")
-
- # Set provider-specific defaults based on is_custom flag
- is_custom = model_data.get("is_custom", False)
- if is_custom:
- model_data.setdefault("provider", ProviderType.CUSTOM)
- model_data.setdefault("friendly_name", f"Custom ({model_data.get('model_name', 'Unknown')})")
- else:
- model_data.setdefault("provider", ProviderType.OPENROUTER)
- model_data.setdefault("friendly_name", f"OpenRouter ({model_data.get('model_name', 'Unknown')})")
- model_data["temperature_constraint"] = temp_constraint
-
- # Remove the string version of temperature_constraint before creating ModelCapabilities
- if "temperature_constraint" in model_data and isinstance(model_data["temperature_constraint"], str):
- del model_data["temperature_constraint"]
- model_data["temperature_constraint"] = temp_constraint
-
- config = ModelCapabilities(**model_data)
- configs.append(config)
-
- return configs
- except ValueError:
- # Re-raise ValueError for specific config errors
- raise
- except Exception as e:
- location = "resources" if self.use_resources else str(self.config_path)
- raise ValueError(f"Error reading config from {location}: {e}")
-
- def _build_maps(self, configs: list[ModelCapabilities]) -> None:
- """Build alias and model maps from configurations.
-
- Args:
- configs: List of model configurations
- """
- alias_map = {}
- model_map = {}
-
- for config in configs:
- # Add to model map
- model_map[config.model_name] = config
-
- # Add the model_name itself as an alias for case-insensitive lookup
- # But only if it's not already in the aliases list
- model_name_lower = config.model_name.lower()
- aliases_lower = [alias.lower() for alias in config.aliases]
-
- if model_name_lower not in aliases_lower:
- if model_name_lower in alias_map:
- existing_model = alias_map[model_name_lower]
- if existing_model != config.model_name:
- raise ValueError(
- f"Duplicate model name '{config.model_name}' (case-insensitive) found for models "
- f"'{existing_model}' and '{config.model_name}'"
- )
- else:
- alias_map[model_name_lower] = config.model_name
-
- # Add aliases
- for alias in config.aliases:
- alias_lower = alias.lower()
- if alias_lower in alias_map:
- existing_model = alias_map[alias_lower]
- raise ValueError(
- f"Duplicate alias '{alias}' found for models '{existing_model}' and '{config.model_name}'"
- )
- alias_map[alias_lower] = config.model_name
-
- # Atomic update
- self.alias_map = alias_map
- self.model_map = model_map
-
- def resolve(self, name_or_alias: str) -> Optional[ModelCapabilities]:
- """Resolve a model name or alias to configuration.
-
- Args:
- name_or_alias: Model name or alias to resolve
-
- Returns:
- Model configuration if found, None otherwise
- """
- # Try alias lookup (case-insensitive) - this now includes model names too
- alias_lower = name_or_alias.lower()
- if alias_lower in self.alias_map:
- model_name = self.alias_map[alias_lower]
- return self.model_map.get(model_name)
-
- return None
-
- def get_capabilities(self, name_or_alias: str) -> Optional[ModelCapabilities]:
- """Get model capabilities for a name or alias.
-
- Args:
- name_or_alias: Model name or alias
-
- Returns:
- ModelCapabilities if found, None otherwise
- """
- # Registry now returns ModelCapabilities directly
- return self.resolve(name_or_alias)
-
- def get_model_config(self, name_or_alias: str) -> Optional[ModelCapabilities]:
- """Backward-compatible wrapper used by providers and older tests."""
-
- return self.resolve(name_or_alias)
-
- def list_models(self) -> list[str]:
- """List all available model names."""
- return list(self.model_map.keys())
-
- def list_aliases(self) -> list[str]:
- """List all available aliases."""
- return list(self.alias_map.keys())
+ filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}
+ filtered.setdefault("provider", entry_provider)
+ capability = ModelCapabilities(**filtered)
+ return capability, {}
diff --git a/providers/registry.py b/providers/registry.py
index 708de5c..cd28c42 100644
--- a/providers/registry.py
+++ b/providers/registry.py
@@ -38,6 +38,7 @@ class ModelProviderRegistry:
PROVIDER_PRIORITY_ORDER = [
ProviderType.GOOGLE, # Direct Gemini access
ProviderType.OPENAI, # Direct OpenAI access
+ ProviderType.AZURE, # Azure-hosted OpenAI deployments
ProviderType.XAI, # Direct X.AI GROK access
ProviderType.DIAL, # DIAL unified API access
ProviderType.CUSTOM, # Local/self-hosted models
@@ -123,6 +124,21 @@ class ModelProviderRegistry:
provider_kwargs["base_url"] = gemini_base_url
logging.info(f"Initialized Gemini provider with custom endpoint: {gemini_base_url}")
provider = provider_class(**provider_kwargs)
+ elif provider_type == ProviderType.AZURE:
+ if not api_key:
+ return None
+
+ azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT")
+ if not azure_endpoint:
+ logging.warning("AZURE_OPENAI_ENDPOINT missing – skipping Azure OpenAI provider")
+ return None
+
+ azure_version = get_env("AZURE_OPENAI_API_VERSION")
+ provider = provider_class(
+ api_key=api_key,
+ azure_endpoint=azure_endpoint,
+ api_version=azure_version,
+ )
else:
if not api_key:
return None
@@ -318,6 +334,7 @@ class ModelProviderRegistry:
key_mapping = {
ProviderType.GOOGLE: "GEMINI_API_KEY",
ProviderType.OPENAI: "OPENAI_API_KEY",
+ ProviderType.AZURE: "AZURE_OPENAI_API_KEY",
ProviderType.XAI: "XAI_API_KEY",
ProviderType.OPENROUTER: "OPENROUTER_API_KEY",
ProviderType.CUSTOM: "CUSTOM_API_KEY", # Can be empty for providers that don't need auth
diff --git a/providers/shared/model_capabilities.py b/providers/shared/model_capabilities.py
index e06d3db..cf7e208 100644
--- a/providers/shared/model_capabilities.py
+++ b/providers/shared/model_capabilities.py
@@ -53,7 +53,6 @@ class ModelCapabilities:
# Additional attributes
max_image_size_mb: float = 0.0
- is_custom: bool = False
temperature_constraint: TemperatureConstraint = field(
default_factory=lambda: RangeTemperatureConstraint(0.0, 2.0, 0.3)
)
@@ -102,9 +101,6 @@ class ModelCapabilities:
if self.supports_images:
score += 1
- if self.is_custom:
- score -= 1
-
return max(0, min(100, score))
@staticmethod
diff --git a/providers/shared/provider_type.py b/providers/shared/provider_type.py
index 44153f0..a1b3137 100644
--- a/providers/shared/provider_type.py
+++ b/providers/shared/provider_type.py
@@ -10,6 +10,7 @@ class ProviderType(Enum):
GOOGLE = "google"
OPENAI = "openai"
+ AZURE = "azure"
XAI = "xai"
OPENROUTER = "openrouter"
CUSTOM = "custom"
diff --git a/pyproject.toml b/pyproject.toml
index 9d6c113..f153b79 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ py-modules = ["server", "config"]
"*" = ["conf/*.json"]
[tool.setuptools.data-files]
-"conf" = ["conf/custom_models.json"]
+"conf" = ["conf/custom_models.json", "conf/openrouter_models.json", "conf/azure_models.json"]
[project.scripts]
zen-mcp-server = "server:run"
diff --git a/server.py b/server.py
index b60f445..eb32ac2 100644
--- a/server.py
+++ b/server.py
@@ -377,6 +377,7 @@ def configure_providers():
value = get_env(key)
logger.debug(f" {key}: {'[PRESENT]' if value else '[MISSING]'}")
from providers import ModelProviderRegistry
+ from providers.azure_openai import AzureOpenAIProvider
from providers.custom import CustomProvider
from providers.dial import DIALModelProvider
from providers.gemini import GeminiModelProvider
@@ -411,6 +412,27 @@ def configure_providers():
else:
logger.debug("OpenAI API key is placeholder value")
+ # Check for Azure OpenAI configuration
+ azure_key = get_env("AZURE_OPENAI_API_KEY")
+ azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT")
+ azure_models_available = False
+ if azure_key and azure_key != "your_azure_openai_key_here" and azure_endpoint:
+ try:
+ from providers.azure_registry import AzureModelRegistry
+
+ azure_registry = AzureModelRegistry()
+ if azure_registry.list_models():
+ valid_providers.append("Azure OpenAI")
+ has_native_apis = True
+ azure_models_available = True
+ logger.info("Azure OpenAI configuration detected")
+ else:
+ logger.warning(
+ "Azure OpenAI models configuration is empty. Populate conf/azure_models.json or set AZURE_MODELS_CONFIG_PATH."
+ )
+ except Exception as exc:
+ logger.warning(f"Failed to load Azure OpenAI models: {exc}")
+
# Check for X.AI API key
xai_key = get_env("XAI_API_KEY")
if xai_key and xai_key != "your_xai_api_key_here":
@@ -468,6 +490,10 @@ def configure_providers():
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
registered_providers.append(ProviderType.OPENAI.value)
logger.debug(f"Registered provider: {ProviderType.OPENAI.value}")
+ if azure_models_available:
+ ModelProviderRegistry.register_provider(ProviderType.AZURE, AzureOpenAIProvider)
+ registered_providers.append(ProviderType.AZURE.value)
+ logger.debug(f"Registered provider: {ProviderType.AZURE.value}")
if xai_key and xai_key != "your_xai_api_key_here":
ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider)
registered_providers.append(ProviderType.XAI.value)
diff --git a/tests/test_auto_mode_model_listing.py b/tests/test_auto_mode_model_listing.py
index 8df0542..dec487f 100644
--- a/tests/test_auto_mode_model_listing.py
+++ b/tests/test_auto_mode_model_listing.py
@@ -64,6 +64,14 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
monkeypatch.setenv("OPENAI_API_KEY", "test-openai")
monkeypatch.setenv("OPENROUTER_API_KEY", "test-openrouter")
monkeypatch.delenv("XAI_API_KEY", raising=False)
+ # Ensure Azure provider stays disabled regardless of developer workstation env
+ for azure_var in (
+ "AZURE_OPENAI_API_KEY",
+ "AZURE_OPENAI_ENDPOINT",
+ "AZURE_OPENAI_ALLOWED_MODELS",
+ "AZURE_MODELS_CONFIG_PATH",
+ ):
+ monkeypatch.delenv(azure_var, raising=False)
monkeypatch.setenv("ZEN_MCP_FORCE_ENV_OVERRIDE", "false")
env_config.reload_env({"ZEN_MCP_FORCE_ENV_OVERRIDE": "false"})
try:
@@ -103,6 +111,13 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry):
for var in ("XAI_API_KEY", "CUSTOM_API_URL", "CUSTOM_API_KEY", "DIAL_API_KEY"):
monkeypatch.delenv(var, raising=False)
+ for azure_var in (
+ "AZURE_OPENAI_API_KEY",
+ "AZURE_OPENAI_ENDPOINT",
+ "AZURE_OPENAI_ALLOWED_MODELS",
+ "AZURE_MODELS_CONFIG_PATH",
+ ):
+ monkeypatch.delenv(azure_var, raising=False)
ModelProviderRegistry.reset_for_testing()
model_restrictions._restriction_service = None
@@ -136,6 +151,13 @@ def test_error_listing_without_restrictions_shows_full_catalog(monkeypatch, rese
monkeypatch.setenv("OPENROUTER_API_KEY", "test-openrouter")
monkeypatch.setenv("XAI_API_KEY", "test-xai")
monkeypatch.setenv("ZEN_MCP_FORCE_ENV_OVERRIDE", "false")
+ for azure_var in (
+ "AZURE_OPENAI_API_KEY",
+ "AZURE_OPENAI_ENDPOINT",
+ "AZURE_OPENAI_ALLOWED_MODELS",
+ "AZURE_MODELS_CONFIG_PATH",
+ ):
+ monkeypatch.delenv(azure_var, raising=False)
env_config.reload_env({"ZEN_MCP_FORCE_ENV_OVERRIDE": "false"})
try:
import dotenv
diff --git a/tests/test_azure_openai_provider.py b/tests/test_azure_openai_provider.py
new file mode 100644
index 0000000..1d154e0
--- /dev/null
+++ b/tests/test_azure_openai_provider.py
@@ -0,0 +1,145 @@
+import sys
+import types
+
+import pytest
+
+if "openai" not in sys.modules: # pragma: no cover - test shim for optional dependency
+ stub = types.ModuleType("openai")
+ stub.AzureOpenAI = object # Replaced with a mock inside tests
+ sys.modules["openai"] = stub
+
+from providers.azure_openai import AzureOpenAIProvider
+from providers.shared import ModelCapabilities, ProviderType
+
+
+class _DummyResponse:
+ def __init__(self):
+ self.choices = [
+ types.SimpleNamespace(
+ message=types.SimpleNamespace(content="hello"),
+ finish_reason="stop",
+ )
+ ]
+ self.model = "prod-gpt4o"
+ self.id = "resp-123"
+ self.created = 0
+ self.usage = types.SimpleNamespace(
+ prompt_tokens=5,
+ completion_tokens=3,
+ total_tokens=8,
+ )
+
+
+@pytest.fixture
+def dummy_azure_client(monkeypatch):
+ captured = {}
+
+ class _DummyAzureClient:
+ def __init__(self, **kwargs):
+ captured["client_kwargs"] = kwargs
+ self.chat = types.SimpleNamespace(completions=types.SimpleNamespace(create=self._create_completion))
+ self.responses = types.SimpleNamespace(create=self._create_response)
+
+ def _create_completion(self, **kwargs):
+ captured["request_kwargs"] = kwargs
+ return _DummyResponse()
+
+ def _create_response(self, **kwargs):
+ captured["responses_kwargs"] = kwargs
+ return _DummyResponse()
+
+ monkeypatch.delenv("AZURE_OPENAI_ALLOWED_MODELS", raising=False)
+ monkeypatch.setattr("providers.azure_openai.AzureOpenAI", _DummyAzureClient)
+ return captured
+
+
+def test_generate_content_uses_deployment_mapping(dummy_azure_client):
+ provider = AzureOpenAIProvider(
+ api_key="key",
+ azure_endpoint="https://example.openai.azure.com/",
+ deployments={"gpt-4o": "prod-gpt4o"},
+ )
+
+ result = provider.generate_content("hello", "gpt-4o")
+
+ assert dummy_azure_client["request_kwargs"]["model"] == "prod-gpt4o"
+ assert result.model_name == "gpt-4o"
+ assert result.provider == ProviderType.AZURE
+ assert provider.validate_model_name("prod-gpt4o")
+
+
+def test_generate_content_accepts_deployment_alias(dummy_azure_client):
+ provider = AzureOpenAIProvider(
+ api_key="key",
+ azure_endpoint="https://example.openai.azure.com/",
+ deployments={"gpt-4o-mini": "mini-deployment"},
+ )
+
+ # Calling with the deployment alias should still resolve properly.
+ result = provider.generate_content("hi", "mini-deployment")
+
+ assert dummy_azure_client["request_kwargs"]["model"] == "mini-deployment"
+ assert result.model_name == "gpt-4o-mini"
+
+
+def test_client_initialization_uses_endpoint_and_version(dummy_azure_client):
+ provider = AzureOpenAIProvider(
+ api_key="key",
+ azure_endpoint="https://example.openai.azure.com/",
+ api_version="2024-03-15-preview",
+ deployments={"gpt-4o": "prod"},
+ )
+
+ _ = provider.client
+
+ assert dummy_azure_client["client_kwargs"]["azure_endpoint"] == "https://example.openai.azure.com"
+ assert dummy_azure_client["client_kwargs"]["api_version"] == "2024-03-15-preview"
+
+
+def test_deployment_overrides_capabilities(dummy_azure_client):
+ provider = AzureOpenAIProvider(
+ api_key="key",
+ azure_endpoint="https://example.openai.azure.com/",
+ deployments={
+ "gpt-4o": {
+ "deployment": "prod-gpt4o",
+ "friendly_name": "Azure GPT-4o EU",
+ "intelligence_score": 19,
+ "supports_temperature": False,
+ "temperature_constraint": "fixed",
+ }
+ },
+ )
+
+ caps = provider.get_capabilities("gpt-4o")
+ assert caps.friendly_name == "Azure GPT-4o EU"
+ assert caps.intelligence_score == 19
+ assert not caps.supports_temperature
+
+
+def test_registry_configuration_merges_capabilities(dummy_azure_client, monkeypatch):
+ def fake_registry_entries(self):
+ capability = ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name="gpt-4o",
+ friendly_name="Azure GPT-4o Registry",
+ context_window=500_000,
+ max_output_tokens=128_000,
+ )
+ return {"gpt-4o": {"deployment": "registry-deployment", "capability": capability}}
+
+ monkeypatch.setattr(AzureOpenAIProvider, "_load_registry_entries", fake_registry_entries)
+
+ provider = AzureOpenAIProvider(
+ api_key="key",
+ azure_endpoint="https://example.openai.azure.com/",
+ )
+
+ # Capability should come from registry
+ caps = provider.get_capabilities("gpt-4o")
+ assert caps.friendly_name == "Azure GPT-4o Registry"
+ assert caps.context_window == 500_000
+
+ # API call should use deployment defined in registry
+ provider.generate_content("hello", "gpt-4o")
+ assert dummy_azure_client["request_kwargs"]["model"] == "registry-deployment"
diff --git a/tests/test_custom_openai_temperature_fix.py b/tests/test_custom_openai_temperature_fix.py
index b13441f..8f933d9 100644
--- a/tests/test_custom_openai_temperature_fix.py
+++ b/tests/test_custom_openai_temperature_fix.py
@@ -34,8 +34,7 @@ class TestCustomOpenAITemperatureParameterFix:
config_models = [
{
"model_name": "gpt-5-2025-08-07",
- "provider": "ProviderType.OPENAI",
- "is_custom": True,
+ "provider": "openai",
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": True,
diff --git a/tests/test_custom_provider.py b/tests/test_custom_provider.py
index 2733e2c..6683259 100644
--- a/tests/test_custom_provider.py
+++ b/tests/test_custom_provider.py
@@ -62,9 +62,9 @@ class TestCustomProvider:
with pytest.raises(ValueError):
provider.get_capabilities("o3")
- # Test with a custom model (is_custom=true)
+ # Test with a custom model from the local registry
capabilities = provider.get_capabilities("local-llama")
- assert capabilities.provider == ProviderType.CUSTOM # local-llama has is_custom=true
+ assert capabilities.provider == ProviderType.CUSTOM
assert capabilities.context_window > 0
finally:
diff --git a/tests/test_model_enumeration.py b/tests/test_model_enumeration.py
index 0b95154..790387a 100644
--- a/tests/test_model_enumeration.py
+++ b/tests/test_model_enumeration.py
@@ -181,7 +181,7 @@ class TestModelEnumeration:
# Configure environment with OpenRouter access only
self._setup_environment({"OPENROUTER_API_KEY": "test-openrouter-key"})
- # Create a temporary custom model config with a free variant
+ # Create a temporary OpenRouter model config with a free variant
custom_config = {
"models": [
{
@@ -199,9 +199,9 @@ class TestModelEnumeration:
]
}
- config_path = tmp_path / "custom_models.json"
+ config_path = tmp_path / "openrouter_models.json"
config_path.write_text(json.dumps(custom_config), encoding="utf-8")
- monkeypatch.setenv("CUSTOM_MODELS_CONFIG_PATH", str(config_path))
+ monkeypatch.setenv("OPENROUTER_MODELS_CONFIG_PATH", str(config_path))
# Reset cached registries so the temporary config is loaded
from tools.shared.base_tool import BaseTool
diff --git a/tests/test_model_restrictions.py b/tests/test_model_restrictions.py
index 6096764..a04c389 100644
--- a/tests/test_model_restrictions.py
+++ b/tests/test_model_restrictions.py
@@ -366,8 +366,8 @@ class TestCustomProviderOpenRouterRestrictions:
assert not provider.validate_model_name("sonnet")
assert not provider.validate_model_name("haiku")
- # Should still validate custom models (is_custom=true) regardless of restrictions
- assert provider.validate_model_name("local-llama") # This has is_custom=true
+ # Should still validate custom models defined in conf/custom_models.json
+ assert provider.validate_model_name("local-llama")
@patch.dict(os.environ, {"OPENROUTER_ALLOWED_MODELS": "opus", "OPENROUTER_API_KEY": "test-key"})
def test_custom_provider_openrouter_capabilities_restrictions(self):
@@ -389,7 +389,7 @@ class TestCustomProviderOpenRouterRestrictions:
with pytest.raises(ValueError):
provider.get_capabilities("haiku")
- # Should still work for custom models (is_custom=true)
+ # Should still work for custom models
capabilities = provider.get_capabilities("local-llama")
assert capabilities.provider == ProviderType.CUSTOM
diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py
index f38d3e8..4c57f81 100644
--- a/tests/test_openrouter_provider.py
+++ b/tests/test_openrouter_provider.py
@@ -172,7 +172,7 @@ class TestOpenRouterAutoMode:
def mock_resolve(model_name):
if model_name in model_names:
mock_config = Mock()
- mock_config.is_custom = False
+ mock_config.provider = ProviderType.OPENROUTER
mock_config.aliases = [] # Empty list of aliases
mock_config.get_effective_capability_rank = Mock(return_value=50) # Add ranking method
return mock_config
diff --git a/tests/test_openrouter_registry.py b/tests/test_openrouter_registry.py
index 866cc3f..bb41ce8 100644
--- a/tests/test_openrouter_registry.py
+++ b/tests/test_openrouter_registry.py
@@ -3,6 +3,7 @@
import json
import os
import tempfile
+from unittest.mock import patch
import pytest
@@ -49,7 +50,7 @@ class TestOpenRouterModelRegistry:
os.unlink(temp_path)
def test_environment_variable_override(self):
- """Test OPENROUTER_MODELS_PATH environment variable."""
+ """Test OPENROUTER_MODELS_CONFIG_PATH environment variable."""
# Create custom config
config_data = {
"models": [
@@ -63,8 +64,8 @@ class TestOpenRouterModelRegistry:
try:
# Set environment variable
- original_env = os.environ.get("CUSTOM_MODELS_CONFIG_PATH")
- os.environ["CUSTOM_MODELS_CONFIG_PATH"] = temp_path
+ original_env = os.environ.get("OPENROUTER_MODELS_CONFIG_PATH")
+ os.environ["OPENROUTER_MODELS_CONFIG_PATH"] = temp_path
# Create registry without explicit path
registry = OpenRouterModelRegistry()
@@ -76,9 +77,9 @@ class TestOpenRouterModelRegistry:
finally:
# Restore environment
if original_env is not None:
- os.environ["CUSTOM_MODELS_CONFIG_PATH"] = original_env
+ os.environ["OPENROUTER_MODELS_CONFIG_PATH"] = original_env
else:
- del os.environ["CUSTOM_MODELS_CONFIG_PATH"]
+ del os.environ["OPENROUTER_MODELS_CONFIG_PATH"]
os.unlink(temp_path)
def test_alias_resolution(self):
@@ -161,7 +162,7 @@ class TestOpenRouterModelRegistry:
os.unlink(temp_path)
def test_backwards_compatibility_max_tokens(self):
- """Test that old max_tokens field is no longer supported (should result in empty registry)."""
+ """Test that legacy max_tokens field maps to max_output_tokens."""
config_data = {
"models": [
{
@@ -178,19 +179,17 @@ class TestOpenRouterModelRegistry:
temp_path = f.name
try:
- # Should gracefully handle the error and result in empty registry
- registry = OpenRouterModelRegistry(config_path=temp_path)
- # Registry should be empty due to config error
- assert len(registry.list_models()) == 0
- assert len(registry.list_aliases()) == 0
- assert registry.resolve("old") is None
+ with patch.dict("os.environ", {}, clear=True):
+ with pytest.raises(ValueError, match="max_output_tokens"):
+ OpenRouterModelRegistry(config_path=temp_path)
finally:
os.unlink(temp_path)
def test_missing_config_file(self):
"""Test behavior with missing config file."""
# Use a non-existent path
- registry = OpenRouterModelRegistry(config_path="/non/existent/path.json")
+ with patch.dict("os.environ", {}, clear=True):
+ registry = OpenRouterModelRegistry(config_path="/non/existent/path.json")
# Should initialize with empty maps
assert len(registry.list_models()) == 0
diff --git a/tests/test_uvx_resource_packaging.py b/tests/test_uvx_resource_packaging.py
index 86df066..bbc0571 100644
--- a/tests/test_uvx_resource_packaging.py
+++ b/tests/test_uvx_resource_packaging.py
@@ -1,5 +1,7 @@
"""Tests for uvx path resolution functionality."""
+import json
+import tempfile
from pathlib import Path
from unittest.mock import patch
@@ -18,8 +20,8 @@ class TestUvxPathResolution:
def test_config_path_resolution(self):
"""Test that the config path resolution finds the config file in multiple locations."""
# Check that the config file exists in the development location
- config_file = Path(__file__).parent.parent / "conf" / "custom_models.json"
- assert config_file.exists(), "Config file should exist in conf/custom_models.json"
+ config_file = Path(__file__).parent.parent / "conf" / "openrouter_models.json"
+ assert config_file.exists(), "Config file should exist in conf/openrouter_models.json"
# Test that a registry can find and use the config
registry = OpenRouterModelRegistry()
@@ -34,7 +36,7 @@ class TestUvxPathResolution:
def test_explicit_config_path_override(self):
"""Test that explicit config path works correctly."""
- config_path = Path(__file__).parent.parent / "conf" / "custom_models.json"
+ config_path = Path(__file__).parent.parent / "conf" / "openrouter_models.json"
registry = OpenRouterModelRegistry(config_path=str(config_path))
@@ -44,41 +46,62 @@ class TestUvxPathResolution:
def test_environment_variable_override(self):
"""Test that CUSTOM_MODELS_CONFIG_PATH environment variable works."""
- config_path = Path(__file__).parent.parent / "conf" / "custom_models.json"
+ config_path = Path(__file__).parent.parent / "conf" / "openrouter_models.json"
- with patch.dict("os.environ", {"CUSTOM_MODELS_CONFIG_PATH": str(config_path)}):
+ with patch.dict("os.environ", {"OPENROUTER_MODELS_CONFIG_PATH": str(config_path)}):
registry = OpenRouterModelRegistry()
# Should use environment path
assert registry.config_path == config_path
assert len(registry.list_models()) > 0
- @patch("providers.openrouter_registry.importlib.resources.files")
- @patch("pathlib.Path.exists")
- def test_multiple_path_fallback(self, mock_exists, mock_files):
- """Test that multiple path resolution works for different deployment scenarios."""
- # Make resources loading fail to trigger file system fallback
+ @patch("providers.model_registry_base.importlib.resources.files")
+ def test_multiple_path_fallback(self, mock_files):
+ """Test that file-system fallback works when resource loading fails."""
mock_files.side_effect = Exception("Resource loading failed")
- # Simulate dev path failing, and working directory path succeeding
- # The third `True` is for the check within `reload()`
- mock_exists.side_effect = [False, True, True]
+ with tempfile.TemporaryDirectory() as tmpdir:
+ temp_dir = Path(tmpdir)
+ conf_dir = temp_dir / "conf"
+ conf_dir.mkdir(parents=True, exist_ok=True)
+ config_path = conf_dir / "openrouter_models.json"
+ config_path.write_text(
+ json.dumps(
+ {
+ "models": [
+ {
+ "model_name": "test/model",
+ "aliases": ["testalias"],
+ "context_window": 1024,
+ "max_output_tokens": 512,
+ }
+ ]
+ },
+ indent=2,
+ )
+ )
- registry = OpenRouterModelRegistry()
+ original_exists = Path.exists
- # Should have fallen back to file system mode
- assert not registry.use_resources, "Should fall back to file system when resources fail"
+ def fake_exists(path_self):
+ if str(path_self).endswith("conf/openrouter_models.json") and path_self != config_path:
+ return False
+ if path_self == config_path:
+ return True
+ return original_exists(path_self)
- # Assert that the registry fell back to the second potential path
- assert registry.config_path == Path.cwd() / "conf" / "custom_models.json"
+ with patch("pathlib.Path.cwd", return_value=temp_dir), patch("pathlib.Path.exists", fake_exists):
+ registry = OpenRouterModelRegistry()
- # Should load models successfully
- assert len(registry.list_models()) > 0
+ assert not registry.use_resources
+ assert registry.config_path == config_path
+ assert "test/model" in registry.list_models()
def test_missing_config_handling(self):
"""Test behavior when config file is missing."""
# Use a non-existent path
- registry = OpenRouterModelRegistry(config_path="/nonexistent/path/config.json")
+ with patch.dict("os.environ", {}, clear=True):
+ registry = OpenRouterModelRegistry(config_path="/nonexistent/path/config.json")
# Should gracefully handle missing config
assert len(registry.list_models()) == 0
diff --git a/tests/test_xai_provider.py b/tests/test_xai_provider.py
index 392be5b..b9cf06c 100644
--- a/tests/test_xai_provider.py
+++ b/tests/test_xai_provider.py
@@ -166,8 +166,10 @@ class TestXAIProvider:
"""Test model restrictions functionality."""
# Clear cached restriction service
import utils.model_restrictions
+ from providers.registry import ModelProviderRegistry
utils.model_restrictions._restriction_service = None
+ ModelProviderRegistry.reset_for_testing()
provider = XAIModelProvider("test-key")
@@ -187,8 +189,10 @@ class TestXAIProvider:
"""Test multiple models in restrictions."""
# Clear cached restriction service
import utils.model_restrictions
+ from providers.registry import ModelProviderRegistry
utils.model_restrictions._restriction_service = None
+ ModelProviderRegistry.reset_for_testing()
provider = XAIModelProvider("test-key")
diff --git a/tools/listmodels.py b/tools/listmodels.py
index 9cbc990..ebdcc8d 100644
--- a/tools/listmodels.py
+++ b/tools/listmodels.py
@@ -11,6 +11,8 @@ from typing import Any, Optional
from mcp.types import TextContent
+from providers.custom_registry import CustomEndpointModelRegistry
+from providers.openrouter_registry import OpenRouterModelRegistry
from tools.models import ToolModelCategory, ToolOutput
from tools.shared.base_models import ToolRequest
from tools.shared.base_tool import BaseTool
@@ -80,7 +82,6 @@ class ListModelsTool(BaseTool):
Returns:
Formatted list of models by provider
"""
- from providers.openrouter_registry import OpenRouterModelRegistry
from providers.registry import ModelProviderRegistry
from providers.shared import ProviderType
from utils.model_restrictions import get_restriction_service
@@ -99,6 +100,7 @@ class ListModelsTool(BaseTool):
provider_info = {
ProviderType.GOOGLE: {"name": "Google Gemini", "env_key": "GEMINI_API_KEY"},
ProviderType.OPENAI: {"name": "OpenAI", "env_key": "OPENAI_API_KEY"},
+ ProviderType.AZURE: {"name": "Azure OpenAI", "env_key": "AZURE_OPENAI_API_KEY"},
ProviderType.XAI: {"name": "X.AI (Grok)", "env_key": "XAI_API_KEY"},
ProviderType.DIAL: {"name": "AI DIAL", "env_key": "DIAL_API_KEY"},
}
@@ -317,12 +319,12 @@ class ListModelsTool(BaseTool):
output_lines.append("**Description**: Local models via Ollama, vLLM, LM Studio, etc.")
try:
- registry = OpenRouterModelRegistry()
+ registry = CustomEndpointModelRegistry()
custom_models = []
for alias in registry.list_aliases():
config = registry.resolve(alias)
- if config and config.is_custom:
+ if config:
custom_models.append((alias, config))
if custom_models:
diff --git a/tools/shared/base_tool.py b/tools/shared/base_tool.py
index ac72d7d..d041e7b 100644
--- a/tools/shared/base_tool.py
+++ b/tools/shared/base_tool.py
@@ -82,6 +82,7 @@ class BaseTool(ABC):
# Class-level cache for OpenRouter registry to avoid multiple loads
_openrouter_registry_cache = None
+ _custom_registry_cache = None
@classmethod
def _get_openrouter_registry(cls):
@@ -94,6 +95,16 @@ class BaseTool(ABC):
logger.debug("Created cached OpenRouter registry instance")
return BaseTool._openrouter_registry_cache
+ @classmethod
+ def _get_custom_registry(cls):
+ """Get cached custom-endpoint registry instance."""
+ if BaseTool._custom_registry_cache is None:
+ from providers.custom_registry import CustomEndpointModelRegistry
+
+ BaseTool._custom_registry_cache = CustomEndpointModelRegistry()
+ logger.debug("Created cached Custom registry instance")
+ return BaseTool._custom_registry_cache
+
def __init__(self):
# Cache tool metadata at initialization to avoid repeated calls
self.name = self.get_name()
@@ -266,14 +277,10 @@ class BaseTool(ABC):
custom_url = get_env("CUSTOM_API_URL")
if custom_url:
try:
- registry = self._get_openrouter_registry()
- # Find all custom models (is_custom=true)
+ registry = self._get_custom_registry()
for alias in registry.list_aliases():
- config = registry.resolve(alias)
- # Check if this is a custom model that requires custom endpoints
- if config and config.is_custom:
- if alias not in all_models:
- all_models.append(alias)
+ if alias not in all_models:
+ all_models.append(alias)
except Exception as e:
import logging
@@ -1282,12 +1289,7 @@ When recommending searches, be specific about what information you need and why
try:
registry = self._get_openrouter_registry()
- # Include every known alias so MCP enum matches registry capabilities
for alias in registry.list_aliases():
- config = registry.resolve(alias)
- if config and config.is_custom:
- # Custom-only models require CUSTOM_API_URL; defer to custom block
- continue
if alias not in all_models:
all_models.append(alias)
except Exception as exc: # pragma: no cover - logged for observability
@@ -1299,10 +1301,9 @@ When recommending searches, be specific about what information you need and why
custom_url = get_env("CUSTOM_API_URL")
if custom_url:
try:
- registry = self._get_openrouter_registry()
+ registry = self._get_custom_registry()
for alias in registry.list_aliases():
- config = registry.resolve(alias)
- if config and config.is_custom and alias not in all_models:
+ if alias not in all_models:
all_models.append(alias)
except Exception as exc: # pragma: no cover - logged for observability
import logging
diff --git a/utils/env.py b/utils/env.py
index 4cff03c..17aad87 100644
--- a/utils/env.py
+++ b/utils/env.py
@@ -4,6 +4,7 @@ from __future__ import annotations
import os
from collections.abc import Mapping
+from contextlib import contextmanager
from pathlib import Path
try:
@@ -86,3 +87,25 @@ def get_all_env() -> dict[str, str | None]:
"""Expose the loaded .env mapping for diagnostics/logging."""
return dict(_DOTENV_VALUES)
+
+
+@contextmanager
+def suppress_env_vars(*names: str):
+ """Temporarily remove environment variables during the context.
+
+ Args:
+ names: Environment variable names to remove. Empty or falsy names are ignored.
+ """
+
+ removed: dict[str, str] = {}
+ try:
+ for name in names:
+ if not name:
+ continue
+ if name in os.environ:
+ removed[name] = os.environ[name]
+ del os.environ[name]
+ yield
+ finally:
+ for name, value in removed.items():
+ os.environ[name] = value