diff --git a/.env.example b/.env.example index 7d6b301..51cfa1a 100644 --- a/.env.example +++ b/.env.example @@ -17,6 +17,15 @@ GEMINI_API_KEY=your_gemini_api_key_here # Get your OpenAI API key from: https://platform.openai.com/api-keys OPENAI_API_KEY=your_openai_api_key_here +# Azure OpenAI mirrors OpenAI models through Azure-hosted deployments +# Set the endpoint from Azure Portal. Models are defined in conf/azure_models.json +# (or the file referenced by AZURE_MODELS_CONFIG_PATH). +AZURE_OPENAI_API_KEY=your_azure_openai_key_here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +# AZURE_OPENAI_API_VERSION=2024-02-15-preview +# AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini +# AZURE_MODELS_CONFIG_PATH=/absolute/path/to/custom_azure_models.json + # Get your X.AI API key from: https://console.x.ai/ XAI_API_KEY=your_xai_api_key_here diff --git a/README.md b/README.md index 8f6b131..e9e4f27 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [zen_web.webm](https://github.com/user-attachments/assets/851e3911-7f06-47c0-a4ab-a2601236697c)
- 🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team + 🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Azure / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team

@@ -85,6 +85,7 @@ For best results, use Claude Code with: - **[OpenRouter](https://openrouter.ai/)** - Access multiple models with one API - **[Gemini](https://makersuite.google.com/app/apikey)** - Google's latest models - **[OpenAI](https://platform.openai.com/api-keys)** - O3, GPT-5 series +- **[Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)** - Enterprise deployments of GPT-4o, GPT-4.1, GPT-5 family - **[X.AI](https://console.x.ai/)** - Grok models - **[DIAL](https://dialx.ai/)** - Vendor-agnostic model access - **[Ollama](https://ollama.ai/)** - Local models (free) @@ -132,6 +133,10 @@ cd zen-mcp-server 👉 **[Complete Setup Guide](docs/getting-started.md)** with detailed installation, configuration for Gemini / Codex, and troubleshooting 👉 **[Cursor & VS Code Setup](docs/getting-started.md#ide-clients)** for IDE integration instructions +## Provider Configuration + +Zen activates any provider that has credentials in your `.env`. See `.env.example` for deeper customization. + ## Core Tools > **Note:** Each tool comes with its own multi-step workflow, parameters, and descriptions that consume valuable context window space even when not in use. To optimize performance, some tools are disabled by default. See [Tool Configuration](#tool-configuration) below to enable them. @@ -247,7 +252,7 @@ DISABLED_TOOLS= - **[Context revival](docs/context-revival.md)** - Continue conversations even after context resets **Model Support** -- **Multiple providers** - Gemini, OpenAI, X.AI, OpenRouter, DIAL, Ollama +- **Multiple providers** - Gemini, OpenAI, Azure, X.AI, OpenRouter, DIAL, Ollama - **Latest models** - GPT-5, Gemini 2.5 Pro, O3, Grok-4, local Llama - **[Thinking modes](docs/advanced-usage.md#thinking-modes)** - Control reasoning depth vs cost - **Vision support** - Analyze images, diagrams, screenshots @@ -288,6 +293,7 @@ DISABLED_TOOLS= - [Tools Reference](docs/tools/) - All tools with examples - [Advanced Usage](docs/advanced-usage.md) - Power user features - [Configuration](docs/configuration.md) - Environment variables, restrictions +- [Adding Providers](docs/adding_providers.md) - Provider-specific setup (OpenAI, Azure, custom gateways) - [Model Ranking Guide](docs/model_ranking.md) - How intelligence scores drive auto-mode suggestions **🔧 Setup & Support** @@ -303,10 +309,12 @@ Apache 2.0 License - see [LICENSE](LICENSE) file for details. Built with the power of **Multi-Model AI** collaboration 🤝 - **A**ctual **I**ntelligence by real Humans -- [MCP (Model Context Protocol)](https://modelcontextprotocol.com) by Anthropic -- [Claude Code](https://claude.ai/code) - Your AI coding orchestrator -- [Gemini 2.5 Pro & Flash](https://ai.google.dev/) - Extended thinking & fast analysis -- [OpenAI O3 & GPT-5](https://openai.com/) - Strong reasoning & latest capabilities +- [MCP (Model Context Protocol)](https://modelcontextprotocol.com) +- [Codex CLI](https://developers.openai.com/codex/cli) +- [Claude Code](https://claude.ai/code) +- [Gemini](https://ai.google.dev/) +- [OpenAI](https://openai.com/) +- [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/) ### Star History diff --git a/conf/azure_models.json b/conf/azure_models.json new file mode 100644 index 0000000..e1e3a3f --- /dev/null +++ b/conf/azure_models.json @@ -0,0 +1,45 @@ +{ + "_README": { + "description": "Model metadata for Azure OpenAI / Azure AI Foundry-backed provider. The `models` definition can be copied from openrouter_models.json / custom_models.json", + "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/azure_models.md", + "usage": "Models listed here are exposed through Azure AI Foundry. Aliases are case-insensitive.", + "field_notes": "Matches providers/shared/model_capabilities.py.", + "field_descriptions": { + "model_name": "The model identifier e.g., 'gpt-4'", + "deployment": "Azure model deployment name", + "aliases": "Array of short names users can type instead of the full model name", + "context_window": "Total number of tokens the model can process (input + output combined)", + "max_output_tokens": "Maximum number of tokens the model can generate in a single response", + "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", + "supports_json_mode": "Whether the model can guarantee valid JSON output", + "supports_function_calling": "Whether the model supports function/tool calling", + "supports_images": "Whether the model can process images/visual input", + "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", + "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", + "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", + "description": "Human-readable description of the model", + "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" + } + }, + "_example_models": [ + { + "model_name": "gpt-4", + "deployment": "gpt-4", + "aliases": [ + "gpt4" + ], + "context_window": 128000, + "max_output_tokens": 16384, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": false, + "supports_images": false, + "max_image_size_mb": 0.0, + "supports_temperature": false, + "temperature_constraint": "fixed", + "description": "GPT-4 (128K context, 16K output)", + "intelligence_score": 10 + } + ], + "models": [] +} diff --git a/conf/custom_models.json b/conf/custom_models.json index 144bf45..1934df3 100644 --- a/conf/custom_models.json +++ b/conf/custom_models.json @@ -1,383 +1,26 @@ { "_README": { - "description": "Unified model configuration for multiple AI providers and endpoints, including OpenRouter", - "providers_supported": [ - "OpenRouter - Access to GPT-4, Claude, Mistral, etc. via unified API", - "Custom API endpoints - Local models (Ollama, vLLM, LM Studio, etc.)", - "Self-hosted APIs - Any OpenAI-compatible endpoint" - ], + "description": "Model metadata for local/self-hosted OpenAI-compatible endpoints (Custom provider).", "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md", - "usage": "Models can be accessed via aliases (e.g., 'opus', 'local-llama') or full names (e.g., 'anthropic/claude-opus-4', 'llama3.2')", - "instructions": [ - "Add new models by copying an existing entry and modifying it", - "Aliases are case-insensitive and should be unique across all models", - "context_window is the model's total context window size in tokens (input + output)", - "Set supports_* flags based on the model's actual capabilities", - "Set is_custom=true for models that should ONLY work with custom endpoints (Ollama, vLLM, etc.)", - "Models not listed here will use generic defaults (32K context window, basic features)", - "For OpenRouter models: Use official OpenRouter model names (e.g., 'anthropic/claude-opus-4')", - "For local/custom models: Use model names as they appear in your API (e.g., 'llama3.2', 'gpt-3.5-turbo')" - ], + "usage": "Each entry will be advertised by the Custom provider. Aliases are case-insensitive.", + "field_notes": "Matches providers/shared/model_capabilities.py.", "field_descriptions": { - "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')", + "model_name": "The model identifier e.g., 'llama3.2'", "aliases": "Array of short names users can type instead of the full model name", "context_window": "Total number of tokens the model can process (input + output combined)", "max_output_tokens": "Maximum number of tokens the model can generate in a single response", - "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", + "supports_extended_thinking": "Whether the model supports extended reasoning tokens", "supports_json_mode": "Whether the model can guarantee valid JSON output", "supports_function_calling": "Whether the model supports function/tool calling", "supports_images": "Whether the model can process images/visual input", "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", - "is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.", "description": "Human-readable description of the model", "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" - }, - "example_custom_model": { - "model_name": "my-local-model", - "aliases": [ - "shortname", - "nickname", - "abbrev" - ], - "context_window": 128000, - "max_output_tokens": 32768, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 10.0, - "supports_temperature": true, - "temperature_constraint": "range", - "is_custom": true, - "description": "Example custom/local model for Ollama, vLLM, etc.", - "intelligence_score": 12 } }, "models": [ - { - "model_name": "anthropic/claude-sonnet-4.5", - "aliases": [ - "sonnet", - "sonnet4.5" - ], - "context_window": 200000, - "max_output_tokens": 64000, - "supports_extended_thinking": false, - "supports_json_mode": false, - "supports_function_calling": false, - "supports_images": true, - "max_image_size_mb": 5.0, - "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency", - "intelligence_score": 12 - }, - { - "model_name": "anthropic/claude-opus-4.1", - "aliases": [ - "opus", - "claude-opus" - ], - "context_window": 200000, - "max_output_tokens": 64000, - "supports_extended_thinking": false, - "supports_json_mode": false, - "supports_function_calling": false, - "supports_images": true, - "max_image_size_mb": 5.0, - "description": "Claude Opus 4.1 - Our most capable and intelligent model yet", - "intelligence_score": 14 - }, - { - "model_name": "anthropic/claude-sonnet-4.1", - "aliases": [ - "sonnet4.1" - ], - "context_window": 200000, - "max_output_tokens": 64000, - "supports_extended_thinking": false, - "supports_json_mode": false, - "supports_function_calling": false, - "supports_images": true, - "max_image_size_mb": 5.0, - "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency", - "intelligence_score": 10 - }, - { - "model_name": "anthropic/claude-3.5-haiku", - "aliases": [ - "haiku" - ], - "context_window": 200000, - "max_output_tokens": 64000, - "supports_extended_thinking": false, - "supports_json_mode": false, - "supports_function_calling": false, - "supports_images": true, - "max_image_size_mb": 5.0, - "description": "Claude 3 Haiku - Fast and efficient with vision", - "intelligence_score": 8 - }, - { - "model_name": "google/gemini-2.5-pro", - "aliases": [ - "pro", - "gemini-pro", - "gemini", - "pro-openrouter" - ], - "context_window": 1048576, - "max_output_tokens": 65536, - "supports_extended_thinking": true, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "description": "Google's Gemini 2.5 Pro via OpenRouter with vision", - "intelligence_score": 18 - }, - { - "model_name": "google/gemini-2.5-flash", - "aliases": [ - "flash", - "gemini-flash" - ], - "context_window": 1048576, - "max_output_tokens": 65536, - "supports_extended_thinking": true, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 15.0, - "description": "Google's Gemini 2.5 Flash via OpenRouter with vision", - "intelligence_score": 10 - }, - { - "model_name": "mistralai/mistral-large-2411", - "aliases": [ - "mistral-large", - "mistral" - ], - "context_window": 128000, - "max_output_tokens": 32000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": false, - "max_image_size_mb": 0.0, - "description": "Mistral's largest model (text-only)", - "intelligence_score": 11 - }, - { - "model_name": "meta-llama/llama-3-70b", - "aliases": [ - "llama", - "llama3", - "llama3-70b", - "llama-70b", - "llama3-openrouter" - ], - "context_window": 8192, - "max_output_tokens": 8192, - "supports_extended_thinking": false, - "supports_json_mode": false, - "supports_function_calling": false, - "supports_images": false, - "max_image_size_mb": 0.0, - "description": "Meta's Llama 3 70B model (text-only)", - "intelligence_score": 9 - }, - { - "model_name": "deepseek/deepseek-r1-0528", - "aliases": [ - "deepseek-r1", - "deepseek", - "r1", - "deepseek-thinking" - ], - "context_window": 65536, - "max_output_tokens": 32768, - "supports_extended_thinking": true, - "supports_json_mode": true, - "supports_function_calling": false, - "supports_images": false, - "max_image_size_mb": 0.0, - "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)", - "intelligence_score": 15 - }, - { - "model_name": "perplexity/llama-3-sonar-large-32k-online", - "aliases": [ - "perplexity", - "sonar", - "perplexity-online" - ], - "context_window": 32768, - "max_output_tokens": 32768, - "supports_extended_thinking": false, - "supports_json_mode": false, - "supports_function_calling": false, - "supports_images": false, - "max_image_size_mb": 0.0, - "description": "Perplexity's online model with web search (text-only)", - "intelligence_score": 9 - }, - { - "model_name": "openai/o3", - "aliases": [ - "o3" - ], - "context_window": 200000, - "max_output_tokens": 100000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "supports_temperature": false, - "temperature_constraint": "fixed", - "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision", - "intelligence_score": 14 - }, - { - "model_name": "openai/o3-mini", - "aliases": [ - "o3-mini", - "o3mini" - ], - "context_window": 200000, - "max_output_tokens": 100000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "supports_temperature": false, - "temperature_constraint": "fixed", - "description": "OpenAI's o3-mini model - balanced performance and speed with vision", - "intelligence_score": 12 - }, - { - "model_name": "openai/o3-mini-high", - "aliases": [ - "o3-mini-high", - "o3mini-high" - ], - "context_window": 200000, - "max_output_tokens": 100000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "supports_temperature": false, - "temperature_constraint": "fixed", - "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision", - "intelligence_score": 13 - }, - { - "model_name": "openai/o3-pro", - "aliases": [ - "o3pro" - ], - "context_window": 200000, - "max_output_tokens": 100000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "supports_temperature": false, - "temperature_constraint": "fixed", - "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision", - "intelligence_score": 15 - }, - { - "model_name": "openai/o4-mini", - "aliases": [ - "o4-mini", - "o4mini" - ], - "context_window": 200000, - "max_output_tokens": 100000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "supports_temperature": false, - "temperature_constraint": "fixed", - "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision", - "intelligence_score": 11 - }, - { - "model_name": "openai/gpt-5", - "aliases": [ - "gpt5" - ], - "context_window": 400000, - "max_output_tokens": 128000, - "supports_extended_thinking": true, - "supports_json_mode": true, - "supports_function_calling": true, - "supports_images": true, - "max_image_size_mb": 20.0, - "supports_temperature": true, - "temperature_constraint": "range", - "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support", - "intelligence_score": 16 - }, - { - "model_name": "openai/gpt-5-codex", - "aliases": [ - "codex", - "gpt5codex" - ], - "context_window": 400000, - "max_output_tokens": 128000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": false, - "supports_images": false, - "max_image_size_mb": 0.0, - "is_custom": false, - "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows", - "intelligence_score": 17 - }, - { - "model_name": "openai/gpt-5-mini", - "aliases": [ - "gpt5mini" - ], - "context_window": 400000, - "max_output_tokens": 128000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": false, - "supports_images": false, - "max_image_size_mb": 0.0, - "supports_temperature": true, - "temperature_constraint": "fixed", - "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support", - "intelligence_score": 10 - }, - { - "model_name": "openai/gpt-5-nano", - "aliases": [ - "gpt5nano" - ], - "context_window": 400000, - "max_output_tokens": 128000, - "supports_extended_thinking": false, - "supports_json_mode": true, - "supports_function_calling": false, - "supports_images": false, - "max_image_size_mb": 0.0, - "supports_temperature": true, - "temperature_constraint": "fixed", - "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks", - "intelligence_score": 8 - }, { "model_name": "llama3.2", "aliases": [ @@ -391,7 +34,6 @@ "supports_function_calling": false, "supports_images": false, "max_image_size_mb": 0.0, - "is_custom": true, "description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)", "intelligence_score": 6 } diff --git a/conf/openrouter_models.json b/conf/openrouter_models.json new file mode 100644 index 0000000..b3f35fc --- /dev/null +++ b/conf/openrouter_models.json @@ -0,0 +1,346 @@ +{ + "_README": { + "description": "Model metadata for OpenRouter-backed providers.", + "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md", + "usage": "Models listed here are exposed through OpenRouter. Aliases are case-insensitive.", + "field_notes": "Matches providers/shared/model_capabilities.py.", + "field_descriptions": { + "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')", + "aliases": "Array of short names users can type instead of the full model name", + "context_window": "Total number of tokens the model can process (input + output combined)", + "max_output_tokens": "Maximum number of tokens the model can generate in a single response", + "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", + "supports_json_mode": "Whether the model can guarantee valid JSON output", + "supports_function_calling": "Whether the model supports function/tool calling", + "supports_images": "Whether the model can process images/visual input", + "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", + "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)", + "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range", + "description": "Human-readable description of the model", + "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" + } + }, + "models": [ + { + "model_name": "anthropic/claude-sonnet-4.5", + "aliases": [ + "sonnet", + "sonnet4.5" + ], + "context_window": 200000, + "max_output_tokens": 64000, + "supports_extended_thinking": false, + "supports_json_mode": false, + "supports_function_calling": false, + "supports_images": true, + "max_image_size_mb": 5.0, + "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency", + "intelligence_score": 12 + }, + { + "model_name": "anthropic/claude-opus-4.1", + "aliases": [ + "opus", + "claude-opus" + ], + "context_window": 200000, + "max_output_tokens": 64000, + "supports_extended_thinking": false, + "supports_json_mode": false, + "supports_function_calling": false, + "supports_images": true, + "max_image_size_mb": 5.0, + "description": "Claude Opus 4.1 - Our most capable and intelligent model yet", + "intelligence_score": 14 + }, + { + "model_name": "anthropic/claude-sonnet-4.1", + "aliases": [ + "sonnet4.1" + ], + "context_window": 200000, + "max_output_tokens": 64000, + "supports_extended_thinking": false, + "supports_json_mode": false, + "supports_function_calling": false, + "supports_images": true, + "max_image_size_mb": 5.0, + "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency", + "intelligence_score": 10 + }, + { + "model_name": "anthropic/claude-3.5-haiku", + "aliases": [ + "haiku" + ], + "context_window": 200000, + "max_output_tokens": 64000, + "supports_extended_thinking": false, + "supports_json_mode": false, + "supports_function_calling": false, + "supports_images": true, + "max_image_size_mb": 5.0, + "description": "Claude 3 Haiku - Fast and efficient with vision", + "intelligence_score": 8 + }, + { + "model_name": "google/gemini-2.5-pro", + "aliases": [ + "pro", + "gemini-pro", + "gemini", + "pro-openrouter" + ], + "context_window": 1048576, + "max_output_tokens": 65536, + "supports_extended_thinking": true, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "description": "Google's Gemini 2.5 Pro via OpenRouter with vision", + "intelligence_score": 18 + }, + { + "model_name": "google/gemini-2.5-flash", + "aliases": [ + "flash", + "gemini-flash" + ], + "context_window": 1048576, + "max_output_tokens": 65536, + "supports_extended_thinking": true, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 15.0, + "description": "Google's Gemini 2.5 Flash via OpenRouter with vision", + "intelligence_score": 10 + }, + { + "model_name": "mistralai/mistral-large-2411", + "aliases": [ + "mistral-large", + "mistral" + ], + "context_window": 128000, + "max_output_tokens": 32000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": false, + "max_image_size_mb": 0.0, + "description": "Mistral's largest model (text-only)", + "intelligence_score": 11 + }, + { + "model_name": "meta-llama/llama-3-70b", + "aliases": [ + "llama", + "llama3", + "llama3-70b", + "llama-70b", + "llama3-openrouter" + ], + "context_window": 8192, + "max_output_tokens": 8192, + "supports_extended_thinking": false, + "supports_json_mode": false, + "supports_function_calling": false, + "supports_images": false, + "max_image_size_mb": 0.0, + "description": "Meta's Llama 3 70B model (text-only)", + "intelligence_score": 9 + }, + { + "model_name": "deepseek/deepseek-r1-0528", + "aliases": [ + "deepseek-r1", + "deepseek", + "r1", + "deepseek-thinking" + ], + "context_window": 65536, + "max_output_tokens": 32768, + "supports_extended_thinking": true, + "supports_json_mode": true, + "supports_function_calling": false, + "supports_images": false, + "max_image_size_mb": 0.0, + "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)", + "intelligence_score": 15 + }, + { + "model_name": "perplexity/llama-3-sonar-large-32k-online", + "aliases": [ + "perplexity", + "sonar", + "perplexity-online" + ], + "context_window": 32768, + "max_output_tokens": 32768, + "supports_extended_thinking": false, + "supports_json_mode": false, + "supports_function_calling": false, + "supports_images": false, + "max_image_size_mb": 0.0, + "description": "Perplexity's online model with web search (text-only)", + "intelligence_score": 9 + }, + { + "model_name": "openai/o3", + "aliases": [ + "o3" + ], + "context_window": 200000, + "max_output_tokens": 100000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "supports_temperature": false, + "temperature_constraint": "fixed", + "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision", + "intelligence_score": 14 + }, + { + "model_name": "openai/o3-mini", + "aliases": [ + "o3-mini", + "o3mini" + ], + "context_window": 200000, + "max_output_tokens": 100000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "supports_temperature": false, + "temperature_constraint": "fixed", + "description": "OpenAI's o3-mini model - balanced performance and speed with vision", + "intelligence_score": 12 + }, + { + "model_name": "openai/o3-mini-high", + "aliases": [ + "o3-mini-high", + "o3mini-high" + ], + "context_window": 200000, + "max_output_tokens": 100000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "supports_temperature": false, + "temperature_constraint": "fixed", + "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision", + "intelligence_score": 13 + }, + { + "model_name": "openai/o3-pro", + "aliases": [ + "o3pro" + ], + "context_window": 200000, + "max_output_tokens": 100000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "supports_temperature": false, + "temperature_constraint": "fixed", + "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision", + "intelligence_score": 15 + }, + { + "model_name": "openai/o4-mini", + "aliases": [ + "o4-mini", + "o4mini" + ], + "context_window": 200000, + "max_output_tokens": 100000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "supports_temperature": false, + "temperature_constraint": "fixed", + "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision", + "intelligence_score": 11 + }, + { + "model_name": "openai/gpt-5", + "aliases": [ + "gpt5" + ], + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": true, + "supports_json_mode": true, + "supports_function_calling": true, + "supports_images": true, + "max_image_size_mb": 20.0, + "supports_temperature": true, + "temperature_constraint": "range", + "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support", + "intelligence_score": 16 + }, + { + "model_name": "openai/gpt-5-codex", + "aliases": [ + "codex", + "gpt5codex" + ], + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": false, + "supports_images": false, + "max_image_size_mb": 0.0, + "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows", + "intelligence_score": 17 + }, + { + "model_name": "openai/gpt-5-mini", + "aliases": [ + "gpt5mini" + ], + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": false, + "supports_images": false, + "max_image_size_mb": 0.0, + "supports_temperature": true, + "temperature_constraint": "fixed", + "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support", + "intelligence_score": 10 + }, + { + "model_name": "openai/gpt-5-nano", + "aliases": [ + "gpt5nano" + ], + "context_window": 400000, + "max_output_tokens": 128000, + "supports_extended_thinking": false, + "supports_json_mode": true, + "supports_function_calling": false, + "supports_images": false, + "max_image_size_mb": 0.0, + "supports_temperature": true, + "temperature_constraint": "fixed", + "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks", + "intelligence_score": 8 + } + ] +} diff --git a/docs/adding_providers.md b/docs/adding_providers.md index 8b0ce58..dae9785 100644 --- a/docs/adding_providers.md +++ b/docs/adding_providers.md @@ -9,6 +9,7 @@ Each provider: - Defines supported models using `ModelCapabilities` objects - Implements the minimal abstract hooks (`get_provider_type()` and `generate_content()`) - Gets wired into `configure_providers()` so environment variables control activation +- Can leverage helper subclasses (e.g., `AzureOpenAIProvider`) when only client wiring differs ### Intelligence score cheatsheet @@ -31,6 +32,13 @@ features ([details here](model_ranking.md)). ⚠️ **Important**: If you implement a custom `generate_content()`, call `_resolve_model_name()` before invoking the SDK so aliases (e.g. `"gpt"` → `"gpt-4"`) resolve correctly. The shared implementations already do this for you. +**Option C: Azure OpenAI (`AzureOpenAIProvider`)** +- For Azure-hosted deployments of OpenAI models +- Reuses the OpenAI-compatible pipeline but swaps in the `AzureOpenAI` client and a deployment mapping (canonical model → deployment ID) +- Define deployments in [`conf/azure_models.json`](../conf/azure_models.json) (or the file referenced by `AZURE_MODELS_CONFIG_PATH`). +- Entries follow the [`ModelCapabilities`](../providers/shared/model_capabilities.py) schema and must include a `deployment` identifier. + See [Azure OpenAI Configuration](azure_openai.md) for a step-by-step walkthrough. + ## Step-by-Step Guide ### 1. Add Provider Type @@ -227,6 +235,19 @@ DISABLED_TOOLS=debug,tracer EXAMPLE_ALLOWED_MODELS=example-model-large,example-model-small ``` +For Azure OpenAI deployments: + +```bash +AZURE_OPENAI_API_KEY=your_azure_openai_key_here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +# Models are defined in conf/azure_models.json (or AZURE_MODELS_CONFIG_PATH) +# AZURE_OPENAI_API_VERSION=2024-02-15-preview +# AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini +# AZURE_MODELS_CONFIG_PATH=/absolute/path/to/custom_azure_models.json +``` + +You can also define Azure models in [`conf/azure_models.json`](../conf/azure_models.json) (the bundled file is empty so you can copy it safely). Each entry mirrors the `ModelCapabilities` schema and must include a `deployment` field. Set `AZURE_MODELS_CONFIG_PATH` if you maintain a custom copy outside the repository. + **Note**: The `description` field in `ModelCapabilities` helps Claude choose the best model in auto mode. ### 5. Test Your Provider diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md index 4d1cd08..000c0f3 100644 --- a/docs/advanced-usage.md +++ b/docs/advanced-usage.md @@ -91,8 +91,8 @@ OPENAI_ALLOWED_MODELS=o3,o4-mini **Important Notes:** - Restrictions apply to all usage including auto mode -- `OPENROUTER_ALLOWED_MODELS` only affects OpenRouter models accessed via custom provider (where `is_custom: false` in custom_models.json) -- Custom local models (`is_custom: true`) are not affected by any restrictions +- `OPENROUTER_ALLOWED_MODELS` only affects models defined in `conf/openrouter_models.json` +- Custom local models (from `conf/custom_models.json`) are not affected by OpenRouter restrictions ## Thinking Modes diff --git a/docs/azure_openai.md b/docs/azure_openai.md new file mode 100644 index 0000000..d4f6d2f --- /dev/null +++ b/docs/azure_openai.md @@ -0,0 +1,62 @@ +# Azure OpenAI Configuration + +Azure OpenAI support lets Zen MCP talk to GPT-4o, GPT-4.1, GPT-5, and o-series deployments that you expose through your Azure resource. This guide describes the configuration expected by the server: a couple of required environment variables plus a JSON manifest that lists every deployment you want to expose. + +## 1. Required Environment Variables + +Set these entries in your `.env` (or MCP `env` block). + +```bash +AZURE_OPENAI_API_KEY=your_azure_openai_key_here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +# AZURE_OPENAI_API_VERSION=2024-02-15-preview +``` + +Without the key and endpoint the provider is skipped entirely. Leave the key blank only if the endpoint truly allows anonymous access (rare for Azure). + +## 2. Define Deployments in `conf/azure_models.json` + +Azure models live in `conf/azure_models.json` (or the file pointed to by `AZURE_MODELS_CONFIG_PATH`). Each entry follows the same schema as [`ModelCapabilities`](../providers/shared/model_capabilities.py) with one additional required key: `deployment`. This field must exactly match the deployment name shown in the Azure Portal (for example `prod-gpt4o`). The provider routes requests by that value, so omitting it or using the wrong name will cause the server to skip the model. + +```json +{ + "models": [ + { + "model_name": "gpt-4o", + "deployment": "prod-gpt4o", + "friendly_name": "Azure GPT-4o EU", + "intelligence_score": 18, + "context_window": 600000, + "max_output_tokens": 128000, + "supports_temperature": false, + "temperature_constraint": "fixed", + "aliases": ["gpt4o-eu"] + } + ] +} +``` + +Tips: + +- Copy `conf/azure_models.json` into your repo and commit it, or point `AZURE_MODELS_CONFIG_PATH` at a custom path. +- Add one object per deployment. Aliases are optional but help when you want short names like `gpt4o-eu`. +- All capability fields are optional except `model_name`, `deployment`, and `friendly_name`. Anything you omit falls back to conservative defaults. + +## 3. Optional Restrictions + +Use `AZURE_OPENAI_ALLOWED_MODELS` to limit which Azure models Claude can access: + +```bash +AZURE_OPENAI_ALLOWED_MODELS=gpt-4o,gpt-4o-mini +``` + +Aliases are matched case-insensitively. + +## 4. Quick Checklist + +- [ ] `AZURE_OPENAI_API_KEY` and `AZURE_OPENAI_ENDPOINT` are set +- [ ] `conf/azure_models.json` (or the file referenced by `AZURE_MODELS_CONFIG_PATH`) lists every deployment with the desired metadata +- [ ] Optional: `AZURE_OPENAI_ALLOWED_MODELS` to restrict usage +- [ ] Restart `./run-server.sh` and run `listmodels` to confirm the Azure entries appear with the expected metadata + +See also: [`docs/adding_providers.md`](adding_providers.md) for the full provider architecture and [README (Provider Configuration)](../README.md#provider-configuration) for quick-start environment snippets. diff --git a/docs/configuration.md b/docs/configuration.md index 12e9d65..9b48fab 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -158,6 +158,8 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast ```env # Override default location of custom_models.json CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json +# Override default location of openrouter_models.json +OPENROUTER_MODELS_CONFIG_PATH=/path/to/your/openrouter_models.json ``` **Conversation Settings:** @@ -244,4 +246,4 @@ LOG_LEVEL=INFO - **[Advanced Usage Guide](advanced-usage.md)** - Advanced model usage patterns, thinking modes, and power user workflows - **[Context Revival Guide](context-revival.md)** - Conversation persistence and context revival across sessions -- **[AI-to-AI Collaboration Guide](ai-collaboration.md)** - Multi-model coordination and conversation threading \ No newline at end of file +- **[AI-to-AI Collaboration Guide](ai-collaboration.md)** - Multi-model coordination and conversation threading diff --git a/docs/custom_models.md b/docs/custom_models.md index b2e7365..2db1694 100644 --- a/docs/custom_models.md +++ b/docs/custom_models.md @@ -35,7 +35,12 @@ This guide covers setting up multiple AI model providers including OpenRouter, c ## Model Aliases -The server uses `conf/custom_models.json` to map convenient aliases to both OpenRouter and custom model names. This unified registry supports both cloud models (via OpenRouter) and local models (via custom endpoints). +Zen ships two registries: + +- `conf/openrouter_models.json` – metadata for models routed through OpenRouter. Override with `OPENROUTER_MODELS_CONFIG_PATH` if you maintain a custom copy. +- `conf/custom_models.json` – metadata for local or self-hosted OpenAI-compatible endpoints used by the Custom provider. Override with `CUSTOM_MODELS_CONFIG_PATH` if needed. + +Copy whichever file you need into your project (or point the corresponding `*_MODELS_CONFIG_PATH` env var at your own copy) and edit it to advertise the models you want. ### OpenRouter Models (Cloud) @@ -58,7 +63,7 @@ The server uses `conf/custom_models.json` to map convenient aliases to both Open |-------|-------------------|------| | `local-llama`, `local` | `llama3.2` | Requires `CUSTOM_API_URL` configured | -View the full list in [`conf/custom_models.json`](conf/custom_models.json). +View the baseline OpenRouter catalogue in [`conf/openrouter_models.json`](conf/openrouter_models.json) and populate [`conf/custom_models.json`](conf/custom_models.json) with your local models. To control ordering in auto mode or the `listmodels` summary, adjust the [`intelligence_score`](model_ranking.md) for each entry (or rely on the automatic @@ -152,7 +157,7 @@ CUSTOM_MODEL_NAME=your-loaded-model ## Using Models -**Using model aliases (from conf/custom_models.json):** +**Using model aliases (from the registry files):** ``` # OpenRouter models: "Use opus for deep analysis" # → anthropic/claude-opus-4 @@ -185,20 +190,20 @@ CUSTOM_MODEL_NAME=your-loaded-model The system automatically routes models to the appropriate provider: -1. **Models with `is_custom: true`** → Always routed to Custom API (requires `CUSTOM_API_URL`) -2. **Models with `is_custom: false` or omitted** → Routed to OpenRouter (requires `OPENROUTER_API_KEY`) +1. Entries in `conf/custom_models.json` → Always routed through the Custom API (requires `CUSTOM_API_URL`) +2. Entries in `conf/openrouter_models.json` → Routed through OpenRouter (requires `OPENROUTER_API_KEY`) 3. **Unknown models** → Fallback logic based on model name patterns **Provider Priority Order:** 1. Native APIs (Google, OpenAI) - if API keys are available -2. Custom endpoints - for models marked with `is_custom: true` +2. Custom endpoints - for models declared in `conf/custom_models.json` 3. OpenRouter - catch-all for cloud models This ensures clean separation between local and cloud models while maintaining flexibility for unknown models. ## Model Configuration -The server uses `conf/custom_models.json` to define model aliases and capabilities. You can: +These JSON files define model aliases and capabilities. You can: 1. **Use the default configuration** - Includes popular models with convenient aliases 2. **Customize the configuration** - Add your own models and aliases @@ -206,7 +211,7 @@ The server uses `conf/custom_models.json` to define model aliases and capabiliti ### Adding Custom Models -Edit `conf/custom_models.json` to add new models. The configuration supports both OpenRouter (cloud) and custom endpoint (local) models. +Edit `conf/openrouter_models.json` to tweak OpenRouter behaviour or `conf/custom_models.json` to add local models. Each entry maps directly onto [`ModelCapabilities`](../providers/shared/model_capabilities.py). #### Adding an OpenRouter Model @@ -232,7 +237,6 @@ Edit `conf/custom_models.json` to add new models. The configuration supports bot "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, - "is_custom": true, "description": "My custom Ollama/vLLM model" } ``` @@ -244,10 +248,9 @@ Edit `conf/custom_models.json` to add new models. The configuration supports bot - `supports_extended_thinking`: Whether the model has extended reasoning capabilities - `supports_json_mode`: Whether the model can guarantee valid JSON output - `supports_function_calling`: Whether the model supports function/tool calling -- `is_custom`: **Set to `true` for models that should ONLY work with custom endpoints** (Ollama, vLLM, etc.) - `description`: Human-readable description of the model -**Important:** Always set `is_custom: true` for local models. This ensures they're only used when `CUSTOM_API_URL` is configured and prevents conflicts with OpenRouter. +**Important:** Keep OpenRouter and Custom models in their respective files so that requests are routed correctly. ## Available Models diff --git a/docs/index.md b/docs/index.md index 3b8ca07..2681503 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,6 +4,7 @@ |----------|-------------| | [Getting Started](getting-started.md) | Installation paths, prerequisite setup, and first-run guidance. | | [Adding Providers](adding_providers.md) | How to register new AI providers and advertise capabilities. | +| [Azure OpenAI](azure_openai.md) | Configure Azure deployments, capability overrides, and env mappings. | | [Model Ranking](model_ranking.md) | How intelligence scores translate into auto-mode ordering. | | [Custom Models](custom_models.md) | Configure OpenRouter/custom models and aliases. | | [Adding Tools](adding_tools.md) | Create new tools using the shared base classes. | diff --git a/docs/model_ranking.md b/docs/model_ranking.md index 406d0be..64f32af 100644 --- a/docs/model_ranking.md +++ b/docs/model_ranking.md @@ -25,7 +25,7 @@ feature_bonus = ( + (1 if supports_json_mode else 0) + (1 if supports_images else 0) ) -penalty = 1 if is_custom else 0 +penalty = 1 if provider == CUSTOM else 0 effective_rank = clamp(base + ctx_bonus + output_bonus + feature_bonus - penalty, 0, 100) ``` diff --git a/providers/__init__.py b/providers/__init__.py index 311fafa..9421edc 100644 --- a/providers/__init__.py +++ b/providers/__init__.py @@ -1,5 +1,6 @@ """Model provider abstractions for supporting multiple AI providers.""" +from .azure_openai import AzureOpenAIProvider from .base import ModelProvider from .gemini import GeminiModelProvider from .openai_compatible import OpenAICompatibleProvider @@ -13,6 +14,7 @@ __all__ = [ "ModelResponse", "ModelCapabilities", "ModelProviderRegistry", + "AzureOpenAIProvider", "GeminiModelProvider", "OpenAIModelProvider", "OpenAICompatibleProvider", diff --git a/providers/azure_openai.py b/providers/azure_openai.py new file mode 100644 index 0000000..0371b6f --- /dev/null +++ b/providers/azure_openai.py @@ -0,0 +1,342 @@ +"""Azure OpenAI provider built on the OpenAI-compatible implementation.""" + +from __future__ import annotations + +import logging +from dataclasses import asdict, replace + +try: # pragma: no cover - optional dependency + from openai import AzureOpenAI +except ImportError: # pragma: no cover + AzureOpenAI = None # type: ignore[assignment] + +from utils.env import get_env, suppress_env_vars + +from .azure_registry import AzureModelRegistry +from .openai_compatible import OpenAICompatibleProvider +from .openai_provider import OpenAIModelProvider +from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint + +logger = logging.getLogger(__name__) + + +class AzureOpenAIProvider(OpenAICompatibleProvider): + """Thin Azure wrapper that reuses the OpenAI-compatible request pipeline.""" + + FRIENDLY_NAME = "Azure OpenAI" + DEFAULT_API_VERSION = "2024-02-15-preview" + + # The OpenAI-compatible base expects subclasses to expose capabilities via + # ``get_all_model_capabilities``. Azure deployments are user-defined, so we + # build the catalogue dynamically from environment configuration instead of + # relying on a static ``MODEL_CAPABILITIES`` map. + MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {} + + def __init__( + self, + api_key: str, + *, + azure_endpoint: str | None = None, + api_version: str | None = None, + deployments: dict[str, object] | None = None, + **kwargs, + ) -> None: + # Let the OpenAI-compatible base handle shared configuration such as + # timeouts, restriction-aware allowlists, and logging. ``base_url`` maps + # directly onto Azure's endpoint URL. + super().__init__(api_key, base_url=azure_endpoint, **kwargs) + + if not azure_endpoint: + azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT") + if not azure_endpoint: + raise ValueError("Azure OpenAI endpoint is required via parameter or AZURE_OPENAI_ENDPOINT") + + self.azure_endpoint = azure_endpoint.rstrip("/") + self.api_version = api_version or get_env("AZURE_OPENAI_API_VERSION", self.DEFAULT_API_VERSION) + + registry_specs = self._load_registry_entries() + override_specs = self._normalise_deployments(deployments or {}) if deployments else {} + + self._model_specs = self._merge_specs(registry_specs, override_specs) + if not self._model_specs: + raise ValueError( + "Azure OpenAI provider requires at least one configured deployment. " + "Populate conf/azure_models.json or set AZURE_MODELS_CONFIG_PATH." + ) + + self._capabilities = self._build_capabilities_map() + self._deployment_map = {name: spec["deployment"] for name, spec in self._model_specs.items()} + self._deployment_alias_lookup = { + deployment.lower(): canonical for canonical, deployment in self._deployment_map.items() + } + self._canonical_lookup = {name.lower(): name for name in self._model_specs.keys()} + self._invalidate_capability_cache() + + # ------------------------------------------------------------------ + # Capability helpers + # ------------------------------------------------------------------ + def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]: + return dict(self._capabilities) + + def get_provider_type(self) -> ProviderType: + return ProviderType.AZURE + + def get_capabilities(self, model_name: str) -> ModelCapabilities: # type: ignore[override] + lowered = model_name.lower() + if lowered in self._deployment_alias_lookup: + canonical = self._deployment_alias_lookup[lowered] + return super().get_capabilities(canonical) + canonical = self._canonical_lookup.get(lowered) + if canonical: + return super().get_capabilities(canonical) + return super().get_capabilities(model_name) + + def validate_model_name(self, model_name: str) -> bool: # type: ignore[override] + lowered = model_name.lower() + if lowered in self._deployment_alias_lookup or lowered in self._canonical_lookup: + return True + return super().validate_model_name(model_name) + + def _build_capabilities_map(self) -> dict[str, ModelCapabilities]: + capabilities: dict[str, ModelCapabilities] = {} + + for canonical_name, spec in self._model_specs.items(): + template_capability: ModelCapabilities | None = spec.get("capability") + overrides = spec.get("overrides", {}) + + if template_capability: + cloned = replace(template_capability) + else: + template = OpenAIModelProvider.MODEL_CAPABILITIES.get(canonical_name) + + if template: + friendly = template.friendly_name.replace("OpenAI", "Azure OpenAI", 1) + cloned = replace( + template, + provider=ProviderType.AZURE, + friendly_name=friendly, + aliases=list(template.aliases), + ) + else: + deployment_name = spec.get("deployment", "") + cloned = ModelCapabilities( + provider=ProviderType.AZURE, + model_name=canonical_name, + friendly_name=f"Azure OpenAI ({canonical_name})", + description=f"Azure deployment '{deployment_name}' for {canonical_name}", + aliases=[], + ) + + if overrides: + overrides = dict(overrides) + temp_override = overrides.get("temperature_constraint") + if isinstance(temp_override, str): + overrides["temperature_constraint"] = TemperatureConstraint.create(temp_override) + + aliases_override = overrides.get("aliases") + if isinstance(aliases_override, str): + overrides["aliases"] = [alias.strip() for alias in aliases_override.split(",") if alias.strip()] + provider_override = overrides.get("provider") + if provider_override: + overrides.pop("provider", None) + + try: + cloned = replace(cloned, **overrides) + except TypeError: + base_data = asdict(cloned) + base_data.update(overrides) + base_data["provider"] = ProviderType.AZURE + temp_value = base_data.get("temperature_constraint") + if isinstance(temp_value, str): + base_data["temperature_constraint"] = TemperatureConstraint.create(temp_value) + cloned = ModelCapabilities(**base_data) + + if cloned.provider != ProviderType.AZURE: + cloned.provider = ProviderType.AZURE + + capabilities[canonical_name] = cloned + + return capabilities + + def _load_registry_entries(self) -> dict[str, dict]: + try: + registry = AzureModelRegistry() + except Exception as exc: # pragma: no cover - registry failure should not crash provider + logger.warning("Unable to load Azure model registry: %s", exc) + return {} + + entries: dict[str, dict] = {} + for model_name, capability, extra in registry.iter_entries(): + deployment = extra.get("deployment") + if not deployment: + logger.warning("Azure model '%s' missing deployment in registry", model_name) + continue + entries[model_name] = {"deployment": deployment, "capability": capability} + + return entries + + @staticmethod + def _merge_specs( + registry_specs: dict[str, dict], + override_specs: dict[str, dict], + ) -> dict[str, dict]: + specs: dict[str, dict] = {} + + for canonical, entry in registry_specs.items(): + specs[canonical] = { + "deployment": entry.get("deployment"), + "capability": entry.get("capability"), + "overrides": {}, + } + + for canonical, entry in override_specs.items(): + spec = specs.get(canonical, {"deployment": None, "capability": None, "overrides": {}}) + deployment = entry.get("deployment") + if deployment: + spec["deployment"] = deployment + overrides = {k: v for k, v in entry.items() if k not in {"deployment"}} + overrides.pop("capability", None) + if overrides: + spec["overrides"].update(overrides) + specs[canonical] = spec + + return {k: v for k, v in specs.items() if v.get("deployment")} + + @staticmethod + def _normalise_deployments(mapping: dict[str, object]) -> dict[str, dict]: + normalised: dict[str, dict] = {} + for canonical, spec in mapping.items(): + canonical_name = (canonical or "").strip() + if not canonical_name: + continue + + deployment_name: str | None = None + overrides: dict[str, object] = {} + + if isinstance(spec, str): + deployment_name = spec.strip() + elif isinstance(spec, dict): + deployment_name = spec.get("deployment") or spec.get("deployment_name") + overrides = {k: v for k, v in spec.items() if k not in {"deployment", "deployment_name"}} + + if not deployment_name: + continue + + normalised[canonical_name] = {"deployment": deployment_name.strip(), **overrides} + + return normalised + + # ------------------------------------------------------------------ + # Azure-specific configuration + # ------------------------------------------------------------------ + @property + def client(self): # type: ignore[override] + """Instantiate the Azure OpenAI client on first use.""" + + if self._client is None: + if AzureOpenAI is None: + raise ImportError( + "Azure OpenAI support requires the 'openai' package. Install it with `pip install openai`." + ) + + import httpx + + proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"] + + with suppress_env_vars(*proxy_env_vars): + try: + timeout_config = self.timeout_config + + http_client = httpx.Client(timeout=timeout_config, follow_redirects=True) + + client_kwargs = { + "api_key": self.api_key, + "azure_endpoint": self.azure_endpoint, + "api_version": self.api_version, + "http_client": http_client, + } + + if self.DEFAULT_HEADERS: + client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() + + logger.debug( + "Initializing Azure OpenAI client endpoint=%s api_version=%s timeouts=%s", + self.azure_endpoint, + self.api_version, + timeout_config, + ) + + self._client = AzureOpenAI(**client_kwargs) + + except Exception as exc: + logger.error("Failed to create Azure OpenAI client: %s", exc) + raise + + return self._client + + # ------------------------------------------------------------------ + # Request delegation + # ------------------------------------------------------------------ + def generate_content( + self, + prompt: str, + model_name: str, + system_prompt: str | None = None, + temperature: float = 0.3, + max_output_tokens: int | None = None, + images: list[str] | None = None, + **kwargs, + ) -> ModelResponse: + canonical_name, deployment_name = self._resolve_canonical_and_deployment(model_name) + + # Delegate to the shared OpenAI-compatible implementation using the + # deployment name – Azure requires the deployment identifier in the + # ``model`` field. The returned ``ModelResponse`` is normalised so + # downstream consumers continue to see the canonical model name. + raw_response = super().generate_content( + prompt=prompt, + model_name=deployment_name, + system_prompt=system_prompt, + temperature=temperature, + max_output_tokens=max_output_tokens, + images=images, + **kwargs, + ) + + capabilities = self._capabilities.get(canonical_name) + friendly_name = capabilities.friendly_name if capabilities else self.FRIENDLY_NAME + + return ModelResponse( + content=raw_response.content, + usage=raw_response.usage, + model_name=canonical_name, + friendly_name=friendly_name, + provider=ProviderType.AZURE, + metadata={**raw_response.metadata, "deployment": deployment_name}, + ) + + def _resolve_canonical_and_deployment(self, model_name: str) -> tuple[str, str]: + resolved_canonical = self._resolve_model_name(model_name) + + if resolved_canonical not in self._deployment_map: + # The base resolver may hand back the deployment alias. Try to map it + # back to a canonical entry. + for canonical, deployment in self._deployment_map.items(): + if deployment.lower() == resolved_canonical.lower(): + return canonical, deployment + raise ValueError(f"Model '{model_name}' is not configured for Azure OpenAI") + + return resolved_canonical, self._deployment_map[resolved_canonical] + + def _parse_allowed_models(self) -> set[str] | None: # type: ignore[override] + # Support both AZURE_ALLOWED_MODELS (inherited behaviour) and the + # clearer AZURE_OPENAI_ALLOWED_MODELS alias. + explicit = get_env("AZURE_OPENAI_ALLOWED_MODELS") + if explicit: + models = {m.strip().lower() for m in explicit.split(",") if m.strip()} + if models: + logger.info("Configured allowed models for Azure OpenAI: %s", sorted(models)) + self._allowed_alias_cache = {} + return models + + return super()._parse_allowed_models() diff --git a/providers/azure_registry.py b/providers/azure_registry.py new file mode 100644 index 0000000..302ebf2 --- /dev/null +++ b/providers/azure_registry.py @@ -0,0 +1,45 @@ +"""Registry loader for Azure OpenAI model configurations.""" + +from __future__ import annotations + +import logging + +from .model_registry_base import CAPABILITY_FIELD_NAMES, CustomModelRegistryBase +from .shared import ModelCapabilities, ProviderType, TemperatureConstraint + +logger = logging.getLogger(__name__) + + +class AzureModelRegistry(CustomModelRegistryBase): + """Load Azure-specific model metadata from configuration files.""" + + def __init__(self, config_path: str | None = None) -> None: + super().__init__( + env_var_name="AZURE_MODELS_CONFIG_PATH", + default_filename="azure_models.json", + config_path=config_path, + ) + self.reload() + + def _extra_keys(self) -> set[str]: + return {"deployment", "deployment_name"} + + def _provider_default(self) -> ProviderType: + return ProviderType.AZURE + + def _default_friendly_name(self, model_name: str) -> str: + return f"Azure OpenAI ({model_name})" + + def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]: + deployment = entry.pop("deployment", None) or entry.pop("deployment_name", None) + if not deployment: + raise ValueError(f"Azure model '{entry.get('model_name')}' is missing required 'deployment' field") + + temp_hint = entry.get("temperature_constraint") + if isinstance(temp_hint, str): + entry["temperature_constraint"] = TemperatureConstraint.create(temp_hint) + + filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES} + filtered.setdefault("provider", ProviderType.AZURE) + capability = ModelCapabilities(**filtered) + return capability, {"deployment": deployment} diff --git a/providers/custom.py b/providers/custom.py index dcd9462..ebb4826 100644 --- a/providers/custom.py +++ b/providers/custom.py @@ -1,10 +1,10 @@ """Custom API provider implementation.""" import logging -from typing import Optional from utils.env import get_env +from .custom_registry import CustomEndpointModelRegistry from .openai_compatible import OpenAICompatibleProvider from .openrouter_registry import OpenRouterModelRegistry from .shared import ModelCapabilities, ProviderType @@ -31,8 +31,8 @@ class CustomProvider(OpenAICompatibleProvider): FRIENDLY_NAME = "Custom API" - # Model registry for managing configurations and aliases (shared with OpenRouter) - _registry: Optional[OpenRouterModelRegistry] = None + # Model registry for managing configurations and aliases + _registry: CustomEndpointModelRegistry | None = None def __init__(self, api_key: str = "", base_url: str = "", **kwargs): """Initialize Custom provider for local/self-hosted models. @@ -78,9 +78,9 @@ class CustomProvider(OpenAICompatibleProvider): super().__init__(api_key, base_url=base_url, **kwargs) - # Initialize model registry (shared with OpenRouter for consistent aliases) + # Initialize model registry if CustomProvider._registry is None: - CustomProvider._registry = OpenRouterModelRegistry() + CustomProvider._registry = CustomEndpointModelRegistry() # Log loaded models and aliases only on first load models = self._registry.list_models() aliases = self._registry.list_aliases() @@ -92,8 +92,8 @@ class CustomProvider(OpenAICompatibleProvider): def _lookup_capabilities( self, canonical_name: str, - requested_name: Optional[str] = None, - ) -> Optional[ModelCapabilities]: + requested_name: str | None = None, + ) -> ModelCapabilities | None: """Return capabilities for models explicitly marked as custom.""" builtin = super()._lookup_capabilities(canonical_name, requested_name) @@ -101,12 +101,12 @@ class CustomProvider(OpenAICompatibleProvider): return builtin registry_entry = self._registry.resolve(canonical_name) - if registry_entry and getattr(registry_entry, "is_custom", False): + if registry_entry: registry_entry.provider = ProviderType.CUSTOM return registry_entry logging.debug( - "Custom provider cannot resolve model '%s'; ensure it is declared with 'is_custom': true in custom_models.json", + "Custom provider cannot resolve model '%s'; ensure it is declared in custom_models.json", canonical_name, ) return None @@ -151,6 +151,15 @@ class CustomProvider(OpenAICompatibleProvider): return base_model logging.debug(f"Model '{model_name}' not found in registry, using as-is") + # Attempt to resolve via OpenRouter registry so aliases still map cleanly + openrouter_registry = OpenRouterModelRegistry() + openrouter_config = openrouter_registry.resolve(model_name) + if openrouter_config: + resolved = openrouter_config.model_name + self._alias_cache[cache_key] = resolved + self._alias_cache.setdefault(resolved.lower(), resolved) + return resolved + self._alias_cache[cache_key] = model_name return model_name @@ -160,9 +169,9 @@ class CustomProvider(OpenAICompatibleProvider): if not self._registry: return {} - capabilities: dict[str, ModelCapabilities] = {} - for model_name in self._registry.list_models(): - config = self._registry.resolve(model_name) - if config and getattr(config, "is_custom", False): - capabilities[model_name] = config + capabilities = {} + for model in self._registry.list_models(): + config = self._registry.resolve(model) + if config: + capabilities[model] = config return capabilities diff --git a/providers/custom_registry.py b/providers/custom_registry.py new file mode 100644 index 0000000..990a3b0 --- /dev/null +++ b/providers/custom_registry.py @@ -0,0 +1,26 @@ +"""Registry for models exposed via custom (local) OpenAI-compatible endpoints.""" + +from __future__ import annotations + +from .model_registry_base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry +from .shared import ModelCapabilities, ProviderType + + +class CustomEndpointModelRegistry(CapabilityModelRegistry): + def __init__(self, config_path: str | None = None) -> None: + super().__init__( + env_var_name="CUSTOM_MODELS_CONFIG_PATH", + default_filename="custom_models.json", + provider=ProviderType.CUSTOM, + friendly_prefix="Custom ({model})", + config_path=config_path, + ) + self.reload() + + def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]: + entry["provider"] = ProviderType.CUSTOM + entry.setdefault("friendly_name", f"Custom ({entry['model_name']})") + filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES} + filtered.setdefault("provider", ProviderType.CUSTOM) + capability = ModelCapabilities(**filtered) + return capability, {} diff --git a/providers/model_registry_base.py b/providers/model_registry_base.py new file mode 100644 index 0000000..20f4dd9 --- /dev/null +++ b/providers/model_registry_base.py @@ -0,0 +1,241 @@ +"""Shared infrastructure for JSON-backed model registries.""" + +from __future__ import annotations + +import importlib.resources +import json +import logging +from collections.abc import Iterable +from dataclasses import fields +from pathlib import Path + +from utils.env import get_env +from utils.file_utils import read_json_file + +from .shared import ModelCapabilities, ProviderType, TemperatureConstraint + +logger = logging.getLogger(__name__) + + +CAPABILITY_FIELD_NAMES = {field.name for field in fields(ModelCapabilities)} + + +class CustomModelRegistryBase: + """Load and expose capability metadata from a JSON manifest.""" + + def __init__( + self, + *, + env_var_name: str, + default_filename: str, + config_path: str | None = None, + ) -> None: + self._env_var_name = env_var_name + self._default_filename = default_filename + self._use_resources = False + self._resource_package = "conf" + self._default_path = Path(__file__).parent.parent / "conf" / default_filename + + if config_path: + self.config_path = Path(config_path) + else: + env_path = get_env(env_var_name) + if env_path: + self.config_path = Path(env_path) + else: + try: + resource = importlib.resources.files(self._resource_package).joinpath(default_filename) + if hasattr(resource, "read_text"): + self._use_resources = True + self.config_path = None + else: + raise AttributeError("resource accessor not available") + except Exception: + self.config_path = Path(__file__).parent.parent / "conf" / default_filename + + self.alias_map: dict[str, str] = {} + self.model_map: dict[str, ModelCapabilities] = {} + self._extras: dict[str, dict] = {} + + def reload(self) -> None: + data = self._load_config_data() + configs = [config for config in self._parse_models(data) if config is not None] + self._build_maps(configs) + + def list_models(self) -> list[str]: + return list(self.model_map.keys()) + + def list_aliases(self) -> list[str]: + return list(self.alias_map.keys()) + + def resolve(self, name_or_alias: str) -> ModelCapabilities | None: + key = name_or_alias.lower() + canonical = self.alias_map.get(key) + if canonical: + return self.model_map.get(canonical) + + for model_name in self.model_map: + if model_name.lower() == key: + return self.model_map[model_name] + return None + + def get_capabilities(self, name_or_alias: str) -> ModelCapabilities | None: + return self.resolve(name_or_alias) + + def get_entry(self, model_name: str) -> dict | None: + return self._extras.get(model_name) + + def iter_entries(self) -> Iterable[tuple[str, ModelCapabilities, dict]]: + for model_name, capability in self.model_map.items(): + yield model_name, capability, self._extras.get(model_name, {}) + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + def _load_config_data(self) -> dict: + if self._use_resources: + try: + resource = importlib.resources.files(self._resource_package).joinpath(self._default_filename) + if hasattr(resource, "read_text"): + config_text = resource.read_text(encoding="utf-8") + else: # pragma: no cover - legacy Python fallback + with resource.open("r", encoding="utf-8") as handle: + config_text = handle.read() + data = json.loads(config_text) + except FileNotFoundError: + logger.debug("Packaged %s not found", self._default_filename) + return {"models": []} + except Exception as exc: + logger.warning("Failed to read packaged %s: %s", self._default_filename, exc) + return {"models": []} + return data or {"models": []} + + if not self.config_path: + raise FileNotFoundError("Registry configuration path is not set") + + if not self.config_path.exists(): + logger.debug("Model registry config not found at %s", self.config_path) + if self.config_path == self._default_path: + fallback = Path.cwd() / "conf" / self._default_filename + if fallback != self.config_path and fallback.exists(): + logger.debug("Falling back to %s", fallback) + self.config_path = fallback + else: + return {"models": []} + else: + return {"models": []} + + data = read_json_file(str(self.config_path)) + return data or {"models": []} + + @property + def use_resources(self) -> bool: + return self._use_resources + + def _parse_models(self, data: dict) -> Iterable[ModelCapabilities | None]: + for raw in data.get("models", []): + if not isinstance(raw, dict): + continue + yield self._convert_entry(raw) + + def _convert_entry(self, raw: dict) -> ModelCapabilities | None: + entry = dict(raw) + model_name = entry.get("model_name") + if not model_name: + return None + + aliases = entry.get("aliases") + if isinstance(aliases, str): + entry["aliases"] = [alias.strip() for alias in aliases.split(",") if alias.strip()] + + entry.setdefault("friendly_name", self._default_friendly_name(model_name)) + + temperature_hint = entry.get("temperature_constraint") + if isinstance(temperature_hint, str): + entry["temperature_constraint"] = TemperatureConstraint.create(temperature_hint) + elif temperature_hint is None: + entry["temperature_constraint"] = TemperatureConstraint.create("range") + + if "max_tokens" in entry: + raise ValueError( + "`max_tokens` is no longer supported. Use `max_output_tokens` in your model configuration." + ) + + unknown_keys = set(entry.keys()) - CAPABILITY_FIELD_NAMES - self._extra_keys() + if unknown_keys: + raise ValueError("Unsupported fields in model configuration: " + ", ".join(sorted(unknown_keys))) + + capability, extras = self._finalise_entry(entry) + capability.provider = self._provider_default() + self._extras[capability.model_name] = extras or {} + return capability + + def _default_friendly_name(self, model_name: str) -> str: + return model_name + + def _extra_keys(self) -> set[str]: + return set() + + def _provider_default(self) -> ProviderType: + return ProviderType.OPENROUTER + + def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]: + return ModelCapabilities(**{k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES}), {} + + def _build_maps(self, configs: Iterable[ModelCapabilities]) -> None: + alias_map: dict[str, str] = {} + model_map: dict[str, ModelCapabilities] = {} + + for config in configs: + if not config: + continue + model_map[config.model_name] = config + + model_name_lower = config.model_name.lower() + if model_name_lower not in alias_map: + alias_map[model_name_lower] = config.model_name + + for alias in config.aliases: + alias_lower = alias.lower() + if alias_lower in alias_map and alias_map[alias_lower] != config.model_name: + raise ValueError( + f"Duplicate alias '{alias}' found for models '{alias_map[alias_lower]}' and '{config.model_name}'" + ) + alias_map[alias_lower] = config.model_name + + self.alias_map = alias_map + self.model_map = model_map + + +class CapabilityModelRegistry(CustomModelRegistryBase): + """Registry that returns `ModelCapabilities` objects with alias support.""" + + def __init__( + self, + *, + env_var_name: str, + default_filename: str, + provider: ProviderType, + friendly_prefix: str, + config_path: str | None = None, + ) -> None: + self._provider = provider + self._friendly_prefix = friendly_prefix + super().__init__( + env_var_name=env_var_name, + default_filename=default_filename, + config_path=config_path, + ) + self.reload() + + def _provider_default(self) -> ProviderType: + return self._provider + + def _default_friendly_name(self, model_name: str) -> str: + return self._friendly_prefix.format(model=model_name) + + def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]: + filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES} + filtered.setdefault("provider", self._provider_default()) + capability = ModelCapabilities(**filtered) + return capability, {} diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index 94ae5e6..168549f 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -8,7 +8,7 @@ from urllib.parse import urlparse from openai import OpenAI -from utils.env import get_env +from utils.env import get_env, suppress_env_vars from utils.image_utils import validate_image from .base import ModelProvider @@ -257,80 +257,74 @@ class OpenAICompatibleProvider(ModelProvider): def client(self): """Lazy initialization of OpenAI client with security checks and timeout configuration.""" if self._client is None: - import os - import httpx - # Temporarily disable proxy environment variables to prevent httpx from detecting them - original_env = {} proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"] - for var in proxy_env_vars: - if var in os.environ: - original_env[var] = os.environ[var] - del os.environ[var] - - try: - # Create a custom httpx client that explicitly avoids proxy parameters - timeout_config = ( - self.timeout_config - if hasattr(self, "timeout_config") and self.timeout_config - else httpx.Timeout(30.0) - ) - - # Create httpx client with minimal config to avoid proxy conflicts - # Note: proxies parameter was removed in httpx 0.28.0 - # Check for test transport injection - if hasattr(self, "_test_transport"): - # Use custom transport for testing (HTTP recording/replay) - http_client = httpx.Client( - transport=self._test_transport, - timeout=timeout_config, - follow_redirects=True, - ) - else: - # Normal production client - http_client = httpx.Client( - timeout=timeout_config, - follow_redirects=True, - ) - - # Keep client initialization minimal to avoid proxy parameter conflicts - client_kwargs = { - "api_key": self.api_key, - "http_client": http_client, - } - - if self.base_url: - client_kwargs["base_url"] = self.base_url - - if self.organization: - client_kwargs["organization"] = self.organization - - # Add default headers if any - if self.DEFAULT_HEADERS: - client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() - - logging.debug(f"OpenAI client initialized with custom httpx client and timeout: {timeout_config}") - - # Create OpenAI client with custom httpx client - self._client = OpenAI(**client_kwargs) - - except Exception as e: - # If all else fails, try absolute minimal client without custom httpx - logging.warning(f"Failed to create client with custom httpx, falling back to minimal config: {e}") + with suppress_env_vars(*proxy_env_vars): try: - minimal_kwargs = {"api_key": self.api_key} + # Create a custom httpx client that explicitly avoids proxy parameters + timeout_config = ( + self.timeout_config + if hasattr(self, "timeout_config") and self.timeout_config + else httpx.Timeout(30.0) + ) + + # Create httpx client with minimal config to avoid proxy conflicts + # Note: proxies parameter was removed in httpx 0.28.0 + # Check for test transport injection + if hasattr(self, "_test_transport"): + # Use custom transport for testing (HTTP recording/replay) + http_client = httpx.Client( + transport=self._test_transport, + timeout=timeout_config, + follow_redirects=True, + ) + else: + # Normal production client + http_client = httpx.Client( + timeout=timeout_config, + follow_redirects=True, + ) + + # Keep client initialization minimal to avoid proxy parameter conflicts + client_kwargs = { + "api_key": self.api_key, + "http_client": http_client, + } + if self.base_url: - minimal_kwargs["base_url"] = self.base_url - self._client = OpenAI(**minimal_kwargs) - except Exception as fallback_error: - logging.error(f"Even minimal OpenAI client creation failed: {fallback_error}") - raise - finally: - # Restore original proxy environment variables - for var, value in original_env.items(): - os.environ[var] = value + client_kwargs["base_url"] = self.base_url + + if self.organization: + client_kwargs["organization"] = self.organization + + # Add default headers if any + if self.DEFAULT_HEADERS: + client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() + + logging.debug( + "OpenAI client initialized with custom httpx client and timeout: %s", + timeout_config, + ) + + # Create OpenAI client with custom httpx client + self._client = OpenAI(**client_kwargs) + + except Exception as e: + # If all else fails, try absolute minimal client without custom httpx + logging.warning( + "Failed to create client with custom httpx, falling back to minimal config: %s", + e, + ) + try: + minimal_kwargs = {"api_key": self.api_key} + if self.base_url: + minimal_kwargs["base_url"] = self.base_url + self._client = OpenAI(**minimal_kwargs) + except Exception as fallback_error: + logging.error("Even minimal OpenAI client creation failed: %s", fallback_error) + raise return self._client diff --git a/providers/openai_provider.py b/providers/openai_provider.py index 63d0bfc..5b9e53e 100644 --- a/providers/openai_provider.py +++ b/providers/openai_provider.py @@ -103,16 +103,16 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="o3-mini", friendly_name="OpenAI (O3-mini)", intelligence_score=12, - context_window=200_000, # 200K tokens - max_output_tokens=65536, # 64K max output tokens + context_window=200_000, + max_output_tokens=65536, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, supports_function_calling=True, supports_json_mode=True, - supports_images=True, # O3 models support vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=False, # O3 models don't accept temperature parameter + supports_images=True, + max_image_size_mb=20.0, + supports_temperature=False, temperature_constraint=TemperatureConstraint.create("fixed"), description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity", aliases=["o3mini"], @@ -122,16 +122,16 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="o3-pro", friendly_name="OpenAI (O3-Pro)", intelligence_score=15, - context_window=200_000, # 200K tokens - max_output_tokens=65536, # 64K max output tokens + context_window=200_000, + max_output_tokens=65536, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, supports_function_calling=True, supports_json_mode=True, - supports_images=True, # O3 models support vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=False, # O3 models don't accept temperature parameter + supports_images=True, + max_image_size_mb=20.0, + supports_temperature=False, temperature_constraint=TemperatureConstraint.create("fixed"), description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.", aliases=["o3pro"], @@ -141,16 +141,15 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="o4-mini", friendly_name="OpenAI (O4-mini)", intelligence_score=11, - context_window=200_000, # 200K tokens - max_output_tokens=65536, # 64K max output tokens + context_window=200_000, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, supports_function_calling=True, supports_json_mode=True, - supports_images=True, # O4 models support vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=False, # O4 models don't accept temperature parameter + supports_images=True, + max_image_size_mb=20.0, + supports_temperature=False, temperature_constraint=TemperatureConstraint.create("fixed"), description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning", aliases=["o4mini"], @@ -160,16 +159,16 @@ class OpenAIModelProvider(OpenAICompatibleProvider): model_name="gpt-4.1", friendly_name="OpenAI (GPT 4.1)", intelligence_score=13, - context_window=1_000_000, # 1M tokens + context_window=1_000_000, max_output_tokens=32_768, supports_extended_thinking=False, supports_system_prompts=True, supports_streaming=True, supports_function_calling=True, supports_json_mode=True, - supports_images=True, # GPT-4.1 supports vision - max_image_size_mb=20.0, # 20MB per OpenAI docs - supports_temperature=True, # Regular models accept temperature parameter + supports_images=True, + max_image_size_mb=20.0, + supports_temperature=True, temperature_constraint=TemperatureConstraint.create("range"), description="GPT-4.1 (1M context) - Advanced reasoning model with large context window", aliases=["gpt4.1"], @@ -178,19 +177,19 @@ class OpenAIModelProvider(OpenAICompatibleProvider): provider=ProviderType.OPENAI, model_name="gpt-5-codex", friendly_name="OpenAI (GPT-5 Codex)", - intelligence_score=17, # Higher than GPT-5 for coding tasks - context_window=400_000, # 400K tokens (same as GPT-5) - max_output_tokens=128_000, # 128K output tokens - supports_extended_thinking=True, # Responses API supports reasoning tokens + intelligence_score=17, + context_window=400_000, + max_output_tokens=128_000, + supports_extended_thinking=True, supports_system_prompts=True, supports_streaming=True, - supports_function_calling=True, # Enhanced for agentic software engineering + supports_function_calling=True, supports_json_mode=True, - supports_images=True, # Screenshots, wireframes, diagrams - max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_images=True, + max_image_size_mb=20.0, supports_temperature=True, temperature_constraint=TemperatureConstraint.create("range"), - description="GPT-5 Codex (400K context) - Uses Responses API for 40-80% cost savings. Specialized for coding, refactoring, and software architecture. 3% better performance on SWE-bench.", + description="GPT-5 Codex (400K context) Specialized for coding, refactoring, and software architecture.", aliases=["gpt5-codex", "codex", "gpt-5-code", "gpt5-code"], ), } @@ -282,7 +281,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider): if category == ToolModelCategory.EXTENDED_REASONING: # Prefer models with extended thinking support - # GPT-5-Codex first for coding tasks (uses Responses API with 40-80% cost savings) + # GPT-5-Codex first for coding tasks preferred = find_first(["gpt-5-codex", "o3", "o3-pro", "gpt-5"]) return preferred if preferred else allowed_models[0] diff --git a/providers/openrouter.py b/providers/openrouter.py index 186a0f9..6d7cb49 100644 --- a/providers/openrouter.py +++ b/providers/openrouter.py @@ -1,7 +1,6 @@ """OpenRouter provider implementation.""" import logging -from typing import Optional from utils.env import get_env @@ -42,7 +41,7 @@ class OpenRouterProvider(OpenAICompatibleProvider): } # Model registry for managing configurations and aliases - _registry: Optional[OpenRouterModelRegistry] = None + _registry: OpenRouterModelRegistry | None = None def __init__(self, api_key: str, **kwargs): """Initialize OpenRouter provider. @@ -70,8 +69,8 @@ class OpenRouterProvider(OpenAICompatibleProvider): def _lookup_capabilities( self, canonical_name: str, - requested_name: Optional[str] = None, - ) -> Optional[ModelCapabilities]: + requested_name: str | None = None, + ) -> ModelCapabilities | None: """Fetch OpenRouter capabilities from the registry or build a generic fallback.""" capabilities = self._registry.get_capabilities(canonical_name) @@ -143,7 +142,7 @@ class OpenRouterProvider(OpenAICompatibleProvider): # Custom models belong to CustomProvider; skip them here so the two # providers don't race over the same registrations (important for tests # that stub the registry with minimal objects lacking attrs). - if hasattr(config, "is_custom") and config.is_custom is True: + if config.provider == ProviderType.CUSTOM: continue if restriction_service: @@ -211,7 +210,7 @@ class OpenRouterProvider(OpenAICompatibleProvider): continue # See note in list_models: respect the CustomProvider boundary. - if hasattr(config, "is_custom") and config.is_custom is True: + if config.provider == ProviderType.CUSTOM: continue capabilities[model_name] = config diff --git a/providers/openrouter_registry.py b/providers/openrouter_registry.py index d2cc219..25f8dbf 100644 --- a/providers/openrouter_registry.py +++ b/providers/openrouter_registry.py @@ -1,293 +1,38 @@ """OpenRouter model registry for managing model configurations and aliases.""" -import importlib.resources -import logging -from pathlib import Path -from typing import Optional +from __future__ import annotations -from utils.env import get_env - -# Import handled via importlib.resources.files() calls directly -from utils.file_utils import read_json_file - -from .shared import ( - ModelCapabilities, - ProviderType, - TemperatureConstraint, -) +from .model_registry_base import CAPABILITY_FIELD_NAMES, CapabilityModelRegistry +from .shared import ModelCapabilities, ProviderType -class OpenRouterModelRegistry: - """In-memory view of OpenRouter and custom model metadata. +class OpenRouterModelRegistry(CapabilityModelRegistry): + """Capability registry backed by `conf/openrouter_models.json`.""" - Role - Parse the packaged ``conf/custom_models.json`` (or user-specified - overrides), construct alias and capability maps, and serve those - structures to providers that rely on OpenRouter semantics (both the - OpenRouter provider itself and the Custom provider). + def __init__(self, config_path: str | None = None) -> None: + super().__init__( + env_var_name="OPENROUTER_MODELS_CONFIG_PATH", + default_filename="openrouter_models.json", + provider=ProviderType.OPENROUTER, + friendly_prefix="OpenRouter ({model})", + config_path=config_path, + ) - Key duties - * Load :class:`ModelCapabilities` definitions from configuration files - * Maintain a case-insensitive alias → canonical name map for fast - resolution - * Provide helpers to list models, list aliases, and resolve an arbitrary - name to its capability object without repeatedly touching the file - system. - """ - - def __init__(self, config_path: Optional[str] = None): - """Initialize the registry. - - Args: - config_path: Path to config file. If None, uses default locations. - """ - self.alias_map: dict[str, str] = {} # alias -> model_name - self.model_map: dict[str, ModelCapabilities] = {} # model_name -> config - - # Determine config path and loading strategy - self.use_resources = False - if config_path: - # Direct config_path parameter - self.config_path = Path(config_path) + def _finalise_entry(self, entry: dict) -> tuple[ModelCapabilities, dict]: + provider_override = entry.get("provider") + if isinstance(provider_override, str): + entry_provider = ProviderType(provider_override.lower()) + elif isinstance(provider_override, ProviderType): + entry_provider = provider_override else: - # Check environment variable first - env_path = get_env("CUSTOM_MODELS_CONFIG_PATH") - if env_path: - # Environment variable path - self.config_path = Path(env_path) - else: - # Try importlib.resources for robust packaging support - self.config_path = None - self.use_resources = False + entry_provider = ProviderType.OPENROUTER - try: - resource_traversable = importlib.resources.files("conf").joinpath("custom_models.json") - if hasattr(resource_traversable, "read_text"): - self.use_resources = True - else: - raise AttributeError("read_text not available") - except Exception: - pass + if entry_provider == ProviderType.CUSTOM: + entry.setdefault("friendly_name", f"Custom ({entry['model_name']})") + else: + entry.setdefault("friendly_name", f"OpenRouter ({entry['model_name']})") - if not self.use_resources: - # Fallback to file system paths - potential_paths = [ - Path(__file__).parent.parent / "conf" / "custom_models.json", - Path.cwd() / "conf" / "custom_models.json", - ] - - for path in potential_paths: - if path.exists(): - self.config_path = path - break - - if self.config_path is None: - self.config_path = potential_paths[0] - - # Load configuration - self.reload() - - def reload(self) -> None: - """Reload configuration from disk.""" - try: - configs = self._read_config() - self._build_maps(configs) - caller_info = "" - try: - import inspect - - caller_frame = inspect.currentframe().f_back - if caller_frame: - caller_name = caller_frame.f_code.co_name - caller_file = ( - caller_frame.f_code.co_filename.split("/")[-1] if caller_frame.f_code.co_filename else "unknown" - ) - # Look for tool context - while caller_frame: - frame_locals = caller_frame.f_locals - if "self" in frame_locals and hasattr(frame_locals["self"], "get_name"): - tool_name = frame_locals["self"].get_name() - caller_info = f" (called from {tool_name} tool)" - break - caller_frame = caller_frame.f_back - if not caller_info: - caller_info = f" (called from {caller_name} in {caller_file})" - except Exception: - # If frame inspection fails, just continue without caller info - pass - - logging.debug( - f"Loaded {len(self.model_map)} OpenRouter models with {len(self.alias_map)} aliases{caller_info}" - ) - except ValueError as e: - # Re-raise ValueError only for duplicate aliases (critical config errors) - logging.error(f"Failed to load OpenRouter model configuration: {e}") - # Initialize with empty maps on failure - self.alias_map = {} - self.model_map = {} - if "Duplicate alias" in str(e): - raise - except Exception as e: - logging.error(f"Failed to load OpenRouter model configuration: {e}") - # Initialize with empty maps on failure - self.alias_map = {} - self.model_map = {} - - def _read_config(self) -> list[ModelCapabilities]: - """Read configuration from file or package resources. - - Returns: - List of model configurations - """ - try: - if self.use_resources: - # Use importlib.resources for packaged environments - try: - resource_path = importlib.resources.files("conf").joinpath("custom_models.json") - if hasattr(resource_path, "read_text"): - # Python 3.9+ - config_text = resource_path.read_text(encoding="utf-8") - else: - # Python 3.8 fallback - with resource_path.open("r", encoding="utf-8") as f: - config_text = f.read() - - import json - - data = json.loads(config_text) - logging.debug("Loaded OpenRouter config from package resources") - except Exception as e: - logging.warning(f"Failed to load config from resources: {e}") - return [] - else: - # Use file path loading - if not self.config_path.exists(): - logging.warning(f"OpenRouter model config not found at {self.config_path}") - return [] - - # Use centralized JSON reading utility - data = read_json_file(str(self.config_path)) - logging.debug(f"Loaded OpenRouter config from file: {self.config_path}") - - if data is None: - location = "resources" if self.use_resources else str(self.config_path) - raise ValueError(f"Could not read or parse JSON from {location}") - - # Parse models - configs = [] - for model_data in data.get("models", []): - # Create ModelCapabilities directly from JSON data - # Handle temperature_constraint conversion - temp_constraint_str = model_data.get("temperature_constraint") - temp_constraint = TemperatureConstraint.create(temp_constraint_str or "range") - - # Set provider-specific defaults based on is_custom flag - is_custom = model_data.get("is_custom", False) - if is_custom: - model_data.setdefault("provider", ProviderType.CUSTOM) - model_data.setdefault("friendly_name", f"Custom ({model_data.get('model_name', 'Unknown')})") - else: - model_data.setdefault("provider", ProviderType.OPENROUTER) - model_data.setdefault("friendly_name", f"OpenRouter ({model_data.get('model_name', 'Unknown')})") - model_data["temperature_constraint"] = temp_constraint - - # Remove the string version of temperature_constraint before creating ModelCapabilities - if "temperature_constraint" in model_data and isinstance(model_data["temperature_constraint"], str): - del model_data["temperature_constraint"] - model_data["temperature_constraint"] = temp_constraint - - config = ModelCapabilities(**model_data) - configs.append(config) - - return configs - except ValueError: - # Re-raise ValueError for specific config errors - raise - except Exception as e: - location = "resources" if self.use_resources else str(self.config_path) - raise ValueError(f"Error reading config from {location}: {e}") - - def _build_maps(self, configs: list[ModelCapabilities]) -> None: - """Build alias and model maps from configurations. - - Args: - configs: List of model configurations - """ - alias_map = {} - model_map = {} - - for config in configs: - # Add to model map - model_map[config.model_name] = config - - # Add the model_name itself as an alias for case-insensitive lookup - # But only if it's not already in the aliases list - model_name_lower = config.model_name.lower() - aliases_lower = [alias.lower() for alias in config.aliases] - - if model_name_lower not in aliases_lower: - if model_name_lower in alias_map: - existing_model = alias_map[model_name_lower] - if existing_model != config.model_name: - raise ValueError( - f"Duplicate model name '{config.model_name}' (case-insensitive) found for models " - f"'{existing_model}' and '{config.model_name}'" - ) - else: - alias_map[model_name_lower] = config.model_name - - # Add aliases - for alias in config.aliases: - alias_lower = alias.lower() - if alias_lower in alias_map: - existing_model = alias_map[alias_lower] - raise ValueError( - f"Duplicate alias '{alias}' found for models '{existing_model}' and '{config.model_name}'" - ) - alias_map[alias_lower] = config.model_name - - # Atomic update - self.alias_map = alias_map - self.model_map = model_map - - def resolve(self, name_or_alias: str) -> Optional[ModelCapabilities]: - """Resolve a model name or alias to configuration. - - Args: - name_or_alias: Model name or alias to resolve - - Returns: - Model configuration if found, None otherwise - """ - # Try alias lookup (case-insensitive) - this now includes model names too - alias_lower = name_or_alias.lower() - if alias_lower in self.alias_map: - model_name = self.alias_map[alias_lower] - return self.model_map.get(model_name) - - return None - - def get_capabilities(self, name_or_alias: str) -> Optional[ModelCapabilities]: - """Get model capabilities for a name or alias. - - Args: - name_or_alias: Model name or alias - - Returns: - ModelCapabilities if found, None otherwise - """ - # Registry now returns ModelCapabilities directly - return self.resolve(name_or_alias) - - def get_model_config(self, name_or_alias: str) -> Optional[ModelCapabilities]: - """Backward-compatible wrapper used by providers and older tests.""" - - return self.resolve(name_or_alias) - - def list_models(self) -> list[str]: - """List all available model names.""" - return list(self.model_map.keys()) - - def list_aliases(self) -> list[str]: - """List all available aliases.""" - return list(self.alias_map.keys()) + filtered = {k: v for k, v in entry.items() if k in CAPABILITY_FIELD_NAMES} + filtered.setdefault("provider", entry_provider) + capability = ModelCapabilities(**filtered) + return capability, {} diff --git a/providers/registry.py b/providers/registry.py index 708de5c..cd28c42 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -38,6 +38,7 @@ class ModelProviderRegistry: PROVIDER_PRIORITY_ORDER = [ ProviderType.GOOGLE, # Direct Gemini access ProviderType.OPENAI, # Direct OpenAI access + ProviderType.AZURE, # Azure-hosted OpenAI deployments ProviderType.XAI, # Direct X.AI GROK access ProviderType.DIAL, # DIAL unified API access ProviderType.CUSTOM, # Local/self-hosted models @@ -123,6 +124,21 @@ class ModelProviderRegistry: provider_kwargs["base_url"] = gemini_base_url logging.info(f"Initialized Gemini provider with custom endpoint: {gemini_base_url}") provider = provider_class(**provider_kwargs) + elif provider_type == ProviderType.AZURE: + if not api_key: + return None + + azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT") + if not azure_endpoint: + logging.warning("AZURE_OPENAI_ENDPOINT missing – skipping Azure OpenAI provider") + return None + + azure_version = get_env("AZURE_OPENAI_API_VERSION") + provider = provider_class( + api_key=api_key, + azure_endpoint=azure_endpoint, + api_version=azure_version, + ) else: if not api_key: return None @@ -318,6 +334,7 @@ class ModelProviderRegistry: key_mapping = { ProviderType.GOOGLE: "GEMINI_API_KEY", ProviderType.OPENAI: "OPENAI_API_KEY", + ProviderType.AZURE: "AZURE_OPENAI_API_KEY", ProviderType.XAI: "XAI_API_KEY", ProviderType.OPENROUTER: "OPENROUTER_API_KEY", ProviderType.CUSTOM: "CUSTOM_API_KEY", # Can be empty for providers that don't need auth diff --git a/providers/shared/model_capabilities.py b/providers/shared/model_capabilities.py index e06d3db..cf7e208 100644 --- a/providers/shared/model_capabilities.py +++ b/providers/shared/model_capabilities.py @@ -53,7 +53,6 @@ class ModelCapabilities: # Additional attributes max_image_size_mb: float = 0.0 - is_custom: bool = False temperature_constraint: TemperatureConstraint = field( default_factory=lambda: RangeTemperatureConstraint(0.0, 2.0, 0.3) ) @@ -102,9 +101,6 @@ class ModelCapabilities: if self.supports_images: score += 1 - if self.is_custom: - score -= 1 - return max(0, min(100, score)) @staticmethod diff --git a/providers/shared/provider_type.py b/providers/shared/provider_type.py index 44153f0..a1b3137 100644 --- a/providers/shared/provider_type.py +++ b/providers/shared/provider_type.py @@ -10,6 +10,7 @@ class ProviderType(Enum): GOOGLE = "google" OPENAI = "openai" + AZURE = "azure" XAI = "xai" OPENROUTER = "openrouter" CUSTOM = "custom" diff --git a/pyproject.toml b/pyproject.toml index 9d6c113..f153b79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ py-modules = ["server", "config"] "*" = ["conf/*.json"] [tool.setuptools.data-files] -"conf" = ["conf/custom_models.json"] +"conf" = ["conf/custom_models.json", "conf/openrouter_models.json", "conf/azure_models.json"] [project.scripts] zen-mcp-server = "server:run" diff --git a/server.py b/server.py index b60f445..eb32ac2 100644 --- a/server.py +++ b/server.py @@ -377,6 +377,7 @@ def configure_providers(): value = get_env(key) logger.debug(f" {key}: {'[PRESENT]' if value else '[MISSING]'}") from providers import ModelProviderRegistry + from providers.azure_openai import AzureOpenAIProvider from providers.custom import CustomProvider from providers.dial import DIALModelProvider from providers.gemini import GeminiModelProvider @@ -411,6 +412,27 @@ def configure_providers(): else: logger.debug("OpenAI API key is placeholder value") + # Check for Azure OpenAI configuration + azure_key = get_env("AZURE_OPENAI_API_KEY") + azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT") + azure_models_available = False + if azure_key and azure_key != "your_azure_openai_key_here" and azure_endpoint: + try: + from providers.azure_registry import AzureModelRegistry + + azure_registry = AzureModelRegistry() + if azure_registry.list_models(): + valid_providers.append("Azure OpenAI") + has_native_apis = True + azure_models_available = True + logger.info("Azure OpenAI configuration detected") + else: + logger.warning( + "Azure OpenAI models configuration is empty. Populate conf/azure_models.json or set AZURE_MODELS_CONFIG_PATH." + ) + except Exception as exc: + logger.warning(f"Failed to load Azure OpenAI models: {exc}") + # Check for X.AI API key xai_key = get_env("XAI_API_KEY") if xai_key and xai_key != "your_xai_api_key_here": @@ -468,6 +490,10 @@ def configure_providers(): ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) registered_providers.append(ProviderType.OPENAI.value) logger.debug(f"Registered provider: {ProviderType.OPENAI.value}") + if azure_models_available: + ModelProviderRegistry.register_provider(ProviderType.AZURE, AzureOpenAIProvider) + registered_providers.append(ProviderType.AZURE.value) + logger.debug(f"Registered provider: {ProviderType.AZURE.value}") if xai_key and xai_key != "your_xai_api_key_here": ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider) registered_providers.append(ProviderType.XAI.value) diff --git a/tests/test_auto_mode_model_listing.py b/tests/test_auto_mode_model_listing.py index 8df0542..dec487f 100644 --- a/tests/test_auto_mode_model_listing.py +++ b/tests/test_auto_mode_model_listing.py @@ -64,6 +64,14 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry): monkeypatch.setenv("OPENAI_API_KEY", "test-openai") monkeypatch.setenv("OPENROUTER_API_KEY", "test-openrouter") monkeypatch.delenv("XAI_API_KEY", raising=False) + # Ensure Azure provider stays disabled regardless of developer workstation env + for azure_var in ( + "AZURE_OPENAI_API_KEY", + "AZURE_OPENAI_ENDPOINT", + "AZURE_OPENAI_ALLOWED_MODELS", + "AZURE_MODELS_CONFIG_PATH", + ): + monkeypatch.delenv(azure_var, raising=False) monkeypatch.setenv("ZEN_MCP_FORCE_ENV_OVERRIDE", "false") env_config.reload_env({"ZEN_MCP_FORCE_ENV_OVERRIDE": "false"}) try: @@ -103,6 +111,13 @@ def test_error_listing_respects_env_restrictions(monkeypatch, reset_registry): for var in ("XAI_API_KEY", "CUSTOM_API_URL", "CUSTOM_API_KEY", "DIAL_API_KEY"): monkeypatch.delenv(var, raising=False) + for azure_var in ( + "AZURE_OPENAI_API_KEY", + "AZURE_OPENAI_ENDPOINT", + "AZURE_OPENAI_ALLOWED_MODELS", + "AZURE_MODELS_CONFIG_PATH", + ): + monkeypatch.delenv(azure_var, raising=False) ModelProviderRegistry.reset_for_testing() model_restrictions._restriction_service = None @@ -136,6 +151,13 @@ def test_error_listing_without_restrictions_shows_full_catalog(monkeypatch, rese monkeypatch.setenv("OPENROUTER_API_KEY", "test-openrouter") monkeypatch.setenv("XAI_API_KEY", "test-xai") monkeypatch.setenv("ZEN_MCP_FORCE_ENV_OVERRIDE", "false") + for azure_var in ( + "AZURE_OPENAI_API_KEY", + "AZURE_OPENAI_ENDPOINT", + "AZURE_OPENAI_ALLOWED_MODELS", + "AZURE_MODELS_CONFIG_PATH", + ): + monkeypatch.delenv(azure_var, raising=False) env_config.reload_env({"ZEN_MCP_FORCE_ENV_OVERRIDE": "false"}) try: import dotenv diff --git a/tests/test_azure_openai_provider.py b/tests/test_azure_openai_provider.py new file mode 100644 index 0000000..1d154e0 --- /dev/null +++ b/tests/test_azure_openai_provider.py @@ -0,0 +1,145 @@ +import sys +import types + +import pytest + +if "openai" not in sys.modules: # pragma: no cover - test shim for optional dependency + stub = types.ModuleType("openai") + stub.AzureOpenAI = object # Replaced with a mock inside tests + sys.modules["openai"] = stub + +from providers.azure_openai import AzureOpenAIProvider +from providers.shared import ModelCapabilities, ProviderType + + +class _DummyResponse: + def __init__(self): + self.choices = [ + types.SimpleNamespace( + message=types.SimpleNamespace(content="hello"), + finish_reason="stop", + ) + ] + self.model = "prod-gpt4o" + self.id = "resp-123" + self.created = 0 + self.usage = types.SimpleNamespace( + prompt_tokens=5, + completion_tokens=3, + total_tokens=8, + ) + + +@pytest.fixture +def dummy_azure_client(monkeypatch): + captured = {} + + class _DummyAzureClient: + def __init__(self, **kwargs): + captured["client_kwargs"] = kwargs + self.chat = types.SimpleNamespace(completions=types.SimpleNamespace(create=self._create_completion)) + self.responses = types.SimpleNamespace(create=self._create_response) + + def _create_completion(self, **kwargs): + captured["request_kwargs"] = kwargs + return _DummyResponse() + + def _create_response(self, **kwargs): + captured["responses_kwargs"] = kwargs + return _DummyResponse() + + monkeypatch.delenv("AZURE_OPENAI_ALLOWED_MODELS", raising=False) + monkeypatch.setattr("providers.azure_openai.AzureOpenAI", _DummyAzureClient) + return captured + + +def test_generate_content_uses_deployment_mapping(dummy_azure_client): + provider = AzureOpenAIProvider( + api_key="key", + azure_endpoint="https://example.openai.azure.com/", + deployments={"gpt-4o": "prod-gpt4o"}, + ) + + result = provider.generate_content("hello", "gpt-4o") + + assert dummy_azure_client["request_kwargs"]["model"] == "prod-gpt4o" + assert result.model_name == "gpt-4o" + assert result.provider == ProviderType.AZURE + assert provider.validate_model_name("prod-gpt4o") + + +def test_generate_content_accepts_deployment_alias(dummy_azure_client): + provider = AzureOpenAIProvider( + api_key="key", + azure_endpoint="https://example.openai.azure.com/", + deployments={"gpt-4o-mini": "mini-deployment"}, + ) + + # Calling with the deployment alias should still resolve properly. + result = provider.generate_content("hi", "mini-deployment") + + assert dummy_azure_client["request_kwargs"]["model"] == "mini-deployment" + assert result.model_name == "gpt-4o-mini" + + +def test_client_initialization_uses_endpoint_and_version(dummy_azure_client): + provider = AzureOpenAIProvider( + api_key="key", + azure_endpoint="https://example.openai.azure.com/", + api_version="2024-03-15-preview", + deployments={"gpt-4o": "prod"}, + ) + + _ = provider.client + + assert dummy_azure_client["client_kwargs"]["azure_endpoint"] == "https://example.openai.azure.com" + assert dummy_azure_client["client_kwargs"]["api_version"] == "2024-03-15-preview" + + +def test_deployment_overrides_capabilities(dummy_azure_client): + provider = AzureOpenAIProvider( + api_key="key", + azure_endpoint="https://example.openai.azure.com/", + deployments={ + "gpt-4o": { + "deployment": "prod-gpt4o", + "friendly_name": "Azure GPT-4o EU", + "intelligence_score": 19, + "supports_temperature": False, + "temperature_constraint": "fixed", + } + }, + ) + + caps = provider.get_capabilities("gpt-4o") + assert caps.friendly_name == "Azure GPT-4o EU" + assert caps.intelligence_score == 19 + assert not caps.supports_temperature + + +def test_registry_configuration_merges_capabilities(dummy_azure_client, monkeypatch): + def fake_registry_entries(self): + capability = ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-4o", + friendly_name="Azure GPT-4o Registry", + context_window=500_000, + max_output_tokens=128_000, + ) + return {"gpt-4o": {"deployment": "registry-deployment", "capability": capability}} + + monkeypatch.setattr(AzureOpenAIProvider, "_load_registry_entries", fake_registry_entries) + + provider = AzureOpenAIProvider( + api_key="key", + azure_endpoint="https://example.openai.azure.com/", + ) + + # Capability should come from registry + caps = provider.get_capabilities("gpt-4o") + assert caps.friendly_name == "Azure GPT-4o Registry" + assert caps.context_window == 500_000 + + # API call should use deployment defined in registry + provider.generate_content("hello", "gpt-4o") + assert dummy_azure_client["request_kwargs"]["model"] == "registry-deployment" diff --git a/tests/test_custom_openai_temperature_fix.py b/tests/test_custom_openai_temperature_fix.py index b13441f..8f933d9 100644 --- a/tests/test_custom_openai_temperature_fix.py +++ b/tests/test_custom_openai_temperature_fix.py @@ -34,8 +34,7 @@ class TestCustomOpenAITemperatureParameterFix: config_models = [ { "model_name": "gpt-5-2025-08-07", - "provider": "ProviderType.OPENAI", - "is_custom": True, + "provider": "openai", "context_window": 400000, "max_output_tokens": 128000, "supports_extended_thinking": True, diff --git a/tests/test_custom_provider.py b/tests/test_custom_provider.py index 2733e2c..6683259 100644 --- a/tests/test_custom_provider.py +++ b/tests/test_custom_provider.py @@ -62,9 +62,9 @@ class TestCustomProvider: with pytest.raises(ValueError): provider.get_capabilities("o3") - # Test with a custom model (is_custom=true) + # Test with a custom model from the local registry capabilities = provider.get_capabilities("local-llama") - assert capabilities.provider == ProviderType.CUSTOM # local-llama has is_custom=true + assert capabilities.provider == ProviderType.CUSTOM assert capabilities.context_window > 0 finally: diff --git a/tests/test_model_enumeration.py b/tests/test_model_enumeration.py index 0b95154..790387a 100644 --- a/tests/test_model_enumeration.py +++ b/tests/test_model_enumeration.py @@ -181,7 +181,7 @@ class TestModelEnumeration: # Configure environment with OpenRouter access only self._setup_environment({"OPENROUTER_API_KEY": "test-openrouter-key"}) - # Create a temporary custom model config with a free variant + # Create a temporary OpenRouter model config with a free variant custom_config = { "models": [ { @@ -199,9 +199,9 @@ class TestModelEnumeration: ] } - config_path = tmp_path / "custom_models.json" + config_path = tmp_path / "openrouter_models.json" config_path.write_text(json.dumps(custom_config), encoding="utf-8") - monkeypatch.setenv("CUSTOM_MODELS_CONFIG_PATH", str(config_path)) + monkeypatch.setenv("OPENROUTER_MODELS_CONFIG_PATH", str(config_path)) # Reset cached registries so the temporary config is loaded from tools.shared.base_tool import BaseTool diff --git a/tests/test_model_restrictions.py b/tests/test_model_restrictions.py index 6096764..a04c389 100644 --- a/tests/test_model_restrictions.py +++ b/tests/test_model_restrictions.py @@ -366,8 +366,8 @@ class TestCustomProviderOpenRouterRestrictions: assert not provider.validate_model_name("sonnet") assert not provider.validate_model_name("haiku") - # Should still validate custom models (is_custom=true) regardless of restrictions - assert provider.validate_model_name("local-llama") # This has is_custom=true + # Should still validate custom models defined in conf/custom_models.json + assert provider.validate_model_name("local-llama") @patch.dict(os.environ, {"OPENROUTER_ALLOWED_MODELS": "opus", "OPENROUTER_API_KEY": "test-key"}) def test_custom_provider_openrouter_capabilities_restrictions(self): @@ -389,7 +389,7 @@ class TestCustomProviderOpenRouterRestrictions: with pytest.raises(ValueError): provider.get_capabilities("haiku") - # Should still work for custom models (is_custom=true) + # Should still work for custom models capabilities = provider.get_capabilities("local-llama") assert capabilities.provider == ProviderType.CUSTOM diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py index f38d3e8..4c57f81 100644 --- a/tests/test_openrouter_provider.py +++ b/tests/test_openrouter_provider.py @@ -172,7 +172,7 @@ class TestOpenRouterAutoMode: def mock_resolve(model_name): if model_name in model_names: mock_config = Mock() - mock_config.is_custom = False + mock_config.provider = ProviderType.OPENROUTER mock_config.aliases = [] # Empty list of aliases mock_config.get_effective_capability_rank = Mock(return_value=50) # Add ranking method return mock_config diff --git a/tests/test_openrouter_registry.py b/tests/test_openrouter_registry.py index 866cc3f..bb41ce8 100644 --- a/tests/test_openrouter_registry.py +++ b/tests/test_openrouter_registry.py @@ -3,6 +3,7 @@ import json import os import tempfile +from unittest.mock import patch import pytest @@ -49,7 +50,7 @@ class TestOpenRouterModelRegistry: os.unlink(temp_path) def test_environment_variable_override(self): - """Test OPENROUTER_MODELS_PATH environment variable.""" + """Test OPENROUTER_MODELS_CONFIG_PATH environment variable.""" # Create custom config config_data = { "models": [ @@ -63,8 +64,8 @@ class TestOpenRouterModelRegistry: try: # Set environment variable - original_env = os.environ.get("CUSTOM_MODELS_CONFIG_PATH") - os.environ["CUSTOM_MODELS_CONFIG_PATH"] = temp_path + original_env = os.environ.get("OPENROUTER_MODELS_CONFIG_PATH") + os.environ["OPENROUTER_MODELS_CONFIG_PATH"] = temp_path # Create registry without explicit path registry = OpenRouterModelRegistry() @@ -76,9 +77,9 @@ class TestOpenRouterModelRegistry: finally: # Restore environment if original_env is not None: - os.environ["CUSTOM_MODELS_CONFIG_PATH"] = original_env + os.environ["OPENROUTER_MODELS_CONFIG_PATH"] = original_env else: - del os.environ["CUSTOM_MODELS_CONFIG_PATH"] + del os.environ["OPENROUTER_MODELS_CONFIG_PATH"] os.unlink(temp_path) def test_alias_resolution(self): @@ -161,7 +162,7 @@ class TestOpenRouterModelRegistry: os.unlink(temp_path) def test_backwards_compatibility_max_tokens(self): - """Test that old max_tokens field is no longer supported (should result in empty registry).""" + """Test that legacy max_tokens field maps to max_output_tokens.""" config_data = { "models": [ { @@ -178,19 +179,17 @@ class TestOpenRouterModelRegistry: temp_path = f.name try: - # Should gracefully handle the error and result in empty registry - registry = OpenRouterModelRegistry(config_path=temp_path) - # Registry should be empty due to config error - assert len(registry.list_models()) == 0 - assert len(registry.list_aliases()) == 0 - assert registry.resolve("old") is None + with patch.dict("os.environ", {}, clear=True): + with pytest.raises(ValueError, match="max_output_tokens"): + OpenRouterModelRegistry(config_path=temp_path) finally: os.unlink(temp_path) def test_missing_config_file(self): """Test behavior with missing config file.""" # Use a non-existent path - registry = OpenRouterModelRegistry(config_path="/non/existent/path.json") + with patch.dict("os.environ", {}, clear=True): + registry = OpenRouterModelRegistry(config_path="/non/existent/path.json") # Should initialize with empty maps assert len(registry.list_models()) == 0 diff --git a/tests/test_uvx_resource_packaging.py b/tests/test_uvx_resource_packaging.py index 86df066..bbc0571 100644 --- a/tests/test_uvx_resource_packaging.py +++ b/tests/test_uvx_resource_packaging.py @@ -1,5 +1,7 @@ """Tests for uvx path resolution functionality.""" +import json +import tempfile from pathlib import Path from unittest.mock import patch @@ -18,8 +20,8 @@ class TestUvxPathResolution: def test_config_path_resolution(self): """Test that the config path resolution finds the config file in multiple locations.""" # Check that the config file exists in the development location - config_file = Path(__file__).parent.parent / "conf" / "custom_models.json" - assert config_file.exists(), "Config file should exist in conf/custom_models.json" + config_file = Path(__file__).parent.parent / "conf" / "openrouter_models.json" + assert config_file.exists(), "Config file should exist in conf/openrouter_models.json" # Test that a registry can find and use the config registry = OpenRouterModelRegistry() @@ -34,7 +36,7 @@ class TestUvxPathResolution: def test_explicit_config_path_override(self): """Test that explicit config path works correctly.""" - config_path = Path(__file__).parent.parent / "conf" / "custom_models.json" + config_path = Path(__file__).parent.parent / "conf" / "openrouter_models.json" registry = OpenRouterModelRegistry(config_path=str(config_path)) @@ -44,41 +46,62 @@ class TestUvxPathResolution: def test_environment_variable_override(self): """Test that CUSTOM_MODELS_CONFIG_PATH environment variable works.""" - config_path = Path(__file__).parent.parent / "conf" / "custom_models.json" + config_path = Path(__file__).parent.parent / "conf" / "openrouter_models.json" - with patch.dict("os.environ", {"CUSTOM_MODELS_CONFIG_PATH": str(config_path)}): + with patch.dict("os.environ", {"OPENROUTER_MODELS_CONFIG_PATH": str(config_path)}): registry = OpenRouterModelRegistry() # Should use environment path assert registry.config_path == config_path assert len(registry.list_models()) > 0 - @patch("providers.openrouter_registry.importlib.resources.files") - @patch("pathlib.Path.exists") - def test_multiple_path_fallback(self, mock_exists, mock_files): - """Test that multiple path resolution works for different deployment scenarios.""" - # Make resources loading fail to trigger file system fallback + @patch("providers.model_registry_base.importlib.resources.files") + def test_multiple_path_fallback(self, mock_files): + """Test that file-system fallback works when resource loading fails.""" mock_files.side_effect = Exception("Resource loading failed") - # Simulate dev path failing, and working directory path succeeding - # The third `True` is for the check within `reload()` - mock_exists.side_effect = [False, True, True] + with tempfile.TemporaryDirectory() as tmpdir: + temp_dir = Path(tmpdir) + conf_dir = temp_dir / "conf" + conf_dir.mkdir(parents=True, exist_ok=True) + config_path = conf_dir / "openrouter_models.json" + config_path.write_text( + json.dumps( + { + "models": [ + { + "model_name": "test/model", + "aliases": ["testalias"], + "context_window": 1024, + "max_output_tokens": 512, + } + ] + }, + indent=2, + ) + ) - registry = OpenRouterModelRegistry() + original_exists = Path.exists - # Should have fallen back to file system mode - assert not registry.use_resources, "Should fall back to file system when resources fail" + def fake_exists(path_self): + if str(path_self).endswith("conf/openrouter_models.json") and path_self != config_path: + return False + if path_self == config_path: + return True + return original_exists(path_self) - # Assert that the registry fell back to the second potential path - assert registry.config_path == Path.cwd() / "conf" / "custom_models.json" + with patch("pathlib.Path.cwd", return_value=temp_dir), patch("pathlib.Path.exists", fake_exists): + registry = OpenRouterModelRegistry() - # Should load models successfully - assert len(registry.list_models()) > 0 + assert not registry.use_resources + assert registry.config_path == config_path + assert "test/model" in registry.list_models() def test_missing_config_handling(self): """Test behavior when config file is missing.""" # Use a non-existent path - registry = OpenRouterModelRegistry(config_path="/nonexistent/path/config.json") + with patch.dict("os.environ", {}, clear=True): + registry = OpenRouterModelRegistry(config_path="/nonexistent/path/config.json") # Should gracefully handle missing config assert len(registry.list_models()) == 0 diff --git a/tests/test_xai_provider.py b/tests/test_xai_provider.py index 392be5b..b9cf06c 100644 --- a/tests/test_xai_provider.py +++ b/tests/test_xai_provider.py @@ -166,8 +166,10 @@ class TestXAIProvider: """Test model restrictions functionality.""" # Clear cached restriction service import utils.model_restrictions + from providers.registry import ModelProviderRegistry utils.model_restrictions._restriction_service = None + ModelProviderRegistry.reset_for_testing() provider = XAIModelProvider("test-key") @@ -187,8 +189,10 @@ class TestXAIProvider: """Test multiple models in restrictions.""" # Clear cached restriction service import utils.model_restrictions + from providers.registry import ModelProviderRegistry utils.model_restrictions._restriction_service = None + ModelProviderRegistry.reset_for_testing() provider = XAIModelProvider("test-key") diff --git a/tools/listmodels.py b/tools/listmodels.py index 9cbc990..ebdcc8d 100644 --- a/tools/listmodels.py +++ b/tools/listmodels.py @@ -11,6 +11,8 @@ from typing import Any, Optional from mcp.types import TextContent +from providers.custom_registry import CustomEndpointModelRegistry +from providers.openrouter_registry import OpenRouterModelRegistry from tools.models import ToolModelCategory, ToolOutput from tools.shared.base_models import ToolRequest from tools.shared.base_tool import BaseTool @@ -80,7 +82,6 @@ class ListModelsTool(BaseTool): Returns: Formatted list of models by provider """ - from providers.openrouter_registry import OpenRouterModelRegistry from providers.registry import ModelProviderRegistry from providers.shared import ProviderType from utils.model_restrictions import get_restriction_service @@ -99,6 +100,7 @@ class ListModelsTool(BaseTool): provider_info = { ProviderType.GOOGLE: {"name": "Google Gemini", "env_key": "GEMINI_API_KEY"}, ProviderType.OPENAI: {"name": "OpenAI", "env_key": "OPENAI_API_KEY"}, + ProviderType.AZURE: {"name": "Azure OpenAI", "env_key": "AZURE_OPENAI_API_KEY"}, ProviderType.XAI: {"name": "X.AI (Grok)", "env_key": "XAI_API_KEY"}, ProviderType.DIAL: {"name": "AI DIAL", "env_key": "DIAL_API_KEY"}, } @@ -317,12 +319,12 @@ class ListModelsTool(BaseTool): output_lines.append("**Description**: Local models via Ollama, vLLM, LM Studio, etc.") try: - registry = OpenRouterModelRegistry() + registry = CustomEndpointModelRegistry() custom_models = [] for alias in registry.list_aliases(): config = registry.resolve(alias) - if config and config.is_custom: + if config: custom_models.append((alias, config)) if custom_models: diff --git a/tools/shared/base_tool.py b/tools/shared/base_tool.py index ac72d7d..d041e7b 100644 --- a/tools/shared/base_tool.py +++ b/tools/shared/base_tool.py @@ -82,6 +82,7 @@ class BaseTool(ABC): # Class-level cache for OpenRouter registry to avoid multiple loads _openrouter_registry_cache = None + _custom_registry_cache = None @classmethod def _get_openrouter_registry(cls): @@ -94,6 +95,16 @@ class BaseTool(ABC): logger.debug("Created cached OpenRouter registry instance") return BaseTool._openrouter_registry_cache + @classmethod + def _get_custom_registry(cls): + """Get cached custom-endpoint registry instance.""" + if BaseTool._custom_registry_cache is None: + from providers.custom_registry import CustomEndpointModelRegistry + + BaseTool._custom_registry_cache = CustomEndpointModelRegistry() + logger.debug("Created cached Custom registry instance") + return BaseTool._custom_registry_cache + def __init__(self): # Cache tool metadata at initialization to avoid repeated calls self.name = self.get_name() @@ -266,14 +277,10 @@ class BaseTool(ABC): custom_url = get_env("CUSTOM_API_URL") if custom_url: try: - registry = self._get_openrouter_registry() - # Find all custom models (is_custom=true) + registry = self._get_custom_registry() for alias in registry.list_aliases(): - config = registry.resolve(alias) - # Check if this is a custom model that requires custom endpoints - if config and config.is_custom: - if alias not in all_models: - all_models.append(alias) + if alias not in all_models: + all_models.append(alias) except Exception as e: import logging @@ -1282,12 +1289,7 @@ When recommending searches, be specific about what information you need and why try: registry = self._get_openrouter_registry() - # Include every known alias so MCP enum matches registry capabilities for alias in registry.list_aliases(): - config = registry.resolve(alias) - if config and config.is_custom: - # Custom-only models require CUSTOM_API_URL; defer to custom block - continue if alias not in all_models: all_models.append(alias) except Exception as exc: # pragma: no cover - logged for observability @@ -1299,10 +1301,9 @@ When recommending searches, be specific about what information you need and why custom_url = get_env("CUSTOM_API_URL") if custom_url: try: - registry = self._get_openrouter_registry() + registry = self._get_custom_registry() for alias in registry.list_aliases(): - config = registry.resolve(alias) - if config and config.is_custom and alias not in all_models: + if alias not in all_models: all_models.append(alias) except Exception as exc: # pragma: no cover - logged for observability import logging diff --git a/utils/env.py b/utils/env.py index 4cff03c..17aad87 100644 --- a/utils/env.py +++ b/utils/env.py @@ -4,6 +4,7 @@ from __future__ import annotations import os from collections.abc import Mapping +from contextlib import contextmanager from pathlib import Path try: @@ -86,3 +87,25 @@ def get_all_env() -> dict[str, str | None]: """Expose the loaded .env mapping for diagnostics/logging.""" return dict(_DOTENV_VALUES) + + +@contextmanager +def suppress_env_vars(*names: str): + """Temporarily remove environment variables during the context. + + Args: + names: Environment variable names to remove. Empty or falsy names are ignored. + """ + + removed: dict[str, str] = {} + try: + for name in names: + if not name: + continue + if name in os.environ: + removed[name] = os.environ[name] + del os.environ[name] + yield + finally: + for name, value in removed.items(): + os.environ[name] = value