From 3960835793d3fe4d5f19e05f827d6f8d713f59b1 Mon Sep 17 00:00:00 2001 From: Brad Fair Date: Sun, 22 Jun 2025 12:09:58 -0500 Subject: [PATCH 1/9] Fix duplicate version tool in MCP tool list (#121) * fix: remove duplicate version tool registration The version tool was appearing twice in the MCP tool list due to: - VersionTool class properly registered in TOOLS dictionary (line 181) - Hardcoded Tool() registration in handle_list_tools() (lines 451-462) This duplicate was leftover from the architectural migration: - June 8, 2025: Original hardcoded "get_version" tool added - June 14, 2025: Renamed from "get_version" to "version" - June 21, 2025: VersionTool class added during workflow architecture migration - The old hardcoded registration was never removed The hardcoded registration has been removed since VersionTool provides identical functionality through the proper architecture. Fixes: BeehiveInnovations/zen-mcp-server#120 * fix: complete removal of legacy version tool code Following up on the duplicate version tool fix, this commit removes all remaining dead code identified by Gemini Code Assist: - Removed dead elif block for version tool (lines 639-643) This block was unreachable since version is handled by TOOLS registry - Removed orphaned handle_version() function (lines 942-1030) No longer called after elif block removal - Fixed imports: removed unused __author__ and __updated__ imports These were remnants from the June 2025 migration from function-based to class-based tools. The VersionTool class now handles all version functionality through the standard tool architecture. All 546 tests pass - no functional changes. Related to: BeehiveInnovations/zen-mcp-server#120 --- server.py | 114 ------------------------------------------------------ 1 file changed, 114 deletions(-) diff --git a/server.py b/server.py index 2e3bdb2..1b0f969 100644 --- a/server.py +++ b/server.py @@ -23,7 +23,6 @@ import logging import os import sys import time -from datetime import datetime from logging.handlers import RotatingFileHandler from pathlib import Path from typing import Any, Optional @@ -52,8 +51,6 @@ from mcp.types import ( # noqa: E402 from config import ( # noqa: E402 DEFAULT_MODEL, - __author__, - __updated__, __version__, ) from tools import ( # noqa: E402 @@ -446,21 +443,6 @@ async def handle_list_tools() -> list[Tool]: ) ) - # Add utility tools that provide server metadata and configuration info - # These tools don't require AI processing but are useful for clients - tools.extend( - [ - Tool( - name="version", - description=( - "VERSION & CONFIGURATION - Get server version, configuration details, " - "and list of available tools. Useful for debugging and understanding capabilities." - ), - inputSchema={"type": "object", "properties": {}}, - ), - ] - ) - # Log cache efficiency info if os.getenv("OPENROUTER_API_KEY") and os.getenv("OPENROUTER_API_KEY") != "your_openrouter_api_key_here": logger.debug("OpenRouter registry cache used efficiently across all tool schemas") @@ -650,13 +632,6 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon pass return result - # Route to utility tools that provide server information - elif name == "version": - logger.info(f"Executing utility tool '{name}'") - result = await handle_version() - logger.info(f"Utility tool '{name}' execution completed") - return result - # Handle unknown tool requests gracefully else: return [TextContent(type="text", text=f"Unknown tool: {name}")] @@ -960,95 +935,6 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any return enhanced_arguments -async def handle_version() -> list[TextContent]: - """ - Get comprehensive version and configuration information about the server. - - Provides details about the server version, configuration settings, - available tools, and runtime environment. Useful for debugging and - understanding the server's capabilities. - - Returns: - Formatted text with version and configuration details - """ - # Import thinking mode here to avoid circular imports - from config import DEFAULT_THINKING_MODE_THINKDEEP - - # Gather comprehensive server information - version_info = { - "version": __version__, - "updated": __updated__, - "author": __author__, - "default_model": DEFAULT_MODEL, - "default_thinking_mode_thinkdeep": DEFAULT_THINKING_MODE_THINKDEEP, - "max_context_tokens": "Dynamic (model-specific)", - "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", - "server_started": datetime.now().isoformat(), - "available_tools": list(TOOLS.keys()) + ["version"], - } - - # Check configured providers and available models - from providers import ModelProviderRegistry - from providers.base import ProviderType - - configured_providers = [] - available_models = ModelProviderRegistry.get_available_models(respect_restrictions=True) - - # Group models by provider - models_by_provider = {} - for model_name, provider_type in available_models.items(): - if provider_type not in models_by_provider: - models_by_provider[provider_type] = [] - models_by_provider[provider_type].append(model_name) - - # Format provider information with actual available models - if ProviderType.GOOGLE in models_by_provider: - gemini_models = ", ".join(sorted(models_by_provider[ProviderType.GOOGLE])) - configured_providers.append(f"Gemini ({gemini_models})") - if ProviderType.OPENAI in models_by_provider: - openai_models = ", ".join(sorted(models_by_provider[ProviderType.OPENAI])) - configured_providers.append(f"OpenAI ({openai_models})") - if ProviderType.XAI in models_by_provider: - xai_models = ", ".join(sorted(models_by_provider[ProviderType.XAI])) - configured_providers.append(f"X.AI ({xai_models})") - if ProviderType.CUSTOM in models_by_provider: - custom_models = ", ".join(sorted(models_by_provider[ProviderType.CUSTOM])) - custom_url = os.getenv("CUSTOM_API_URL", "") - configured_providers.append(f"Custom API ({custom_url}) - Models: {custom_models}") - if ProviderType.OPENROUTER in models_by_provider: - # For OpenRouter, show a summary since there could be many models - openrouter_count = len(models_by_provider[ProviderType.OPENROUTER]) - configured_providers.append(f"OpenRouter ({openrouter_count} models via conf/custom_models.json)") - - # Format the information in a human-readable way - text = f"""Zen MCP Server v{__version__} -Updated: {__updated__} -Author: {__author__} - -Configuration: -- Default Model: {DEFAULT_MODEL} -- Default Thinking Mode (ThinkDeep): {DEFAULT_THINKING_MODE_THINKDEEP} -- Max Context: Dynamic (model-specific) -- Python: {version_info["python_version"]} -- Started: {version_info["server_started"]} - -Configured Providers: -{chr(10).join(f" - {provider}" for provider in configured_providers)} - -Available Tools: -{chr(10).join(f" - {tool}" for tool in version_info["available_tools"])} - -All Available Models: -{chr(10).join(f" - {model}" for model in sorted(available_models.keys()))} - -For updates, visit: https://github.com/BeehiveInnovations/zen-mcp-server""" - - # Create standardized tool output - tool_output = ToolOutput(status="success", content=text, content_type="text", metadata={"tool_name": "version"}) - - return [TextContent(type="text", text=tool_output.model_dump_json())] - - @server.list_prompts() async def handle_list_prompts() -> list[Prompt]: """ From 669160755aacebc41c19434b4b73f9286ec2fb6f Mon Sep 17 00:00:00 2001 From: Fahad Date: Sun, 22 Jun 2025 10:23:09 -0700 Subject: [PATCH 2/9] Detect pyenv when available --- .gitattributes | 27 ++++++++ run-server.sh | 174 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 191 insertions(+), 10 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c8f9e2f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,27 @@ +# Ensure shell scripts always have LF line endings on checkout +*.sh text eol=lf +*.bash text eol=lf + +# Python files +*.py text eol=lf + +# Shell script without extension +run-server text eol=lf +code_quality_checks text eol=lf +run_integration_tests text eol=lf + +# General text files +*.md text +*.txt text +*.yml text +*.yaml text +*.json text +*.xml text + +# Binary files +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.pdf binary \ No newline at end of file diff --git a/run-server.sh b/run-server.sh index 4b1a890..d2d0ebe 100755 --- a/run-server.sh +++ b/run-server.sh @@ -8,6 +8,16 @@ set -euo pipefail # Handles environment setup, dependency installation, and configuration. # ============================================================================ +# Initialize pyenv if available (do this early) +if [[ -d "$HOME/.pyenv" ]]; then + export PYENV_ROOT="$HOME/.pyenv" + export PATH="$PYENV_ROOT/bin:$PATH" + if command -v pyenv &> /dev/null; then + eval "$(pyenv init --path)" 2>/dev/null || true + eval "$(pyenv init -)" 2>/dev/null || true + fi +fi + # ---------------------------------------------------------------------------- # Constants and Configuration # ---------------------------------------------------------------------------- @@ -183,6 +193,12 @@ cleanup_docker() { # Find suitable Python command find_python() { + # Pyenv should already be initialized at script start, but check if .python-version exists + if [[ -f ".python-version" ]] && command -v pyenv &> /dev/null; then + # Ensure pyenv respects the local .python-version + pyenv local &>/dev/null || true + fi + # Prefer Python 3.12 for best compatibility local python_cmds=("python3.12" "python3.13" "python3.11" "python3.10" "python3" "python" "py") @@ -195,24 +211,157 @@ find_python() { # Check minimum version (3.10) for better library compatibility if [[ $major_version -ge 10 ]]; then - echo "$cmd" - print_success "Found Python: $version" - - # Recommend Python 3.12 - if [[ $major_version -ne 12 ]]; then - print_info "Note: Python 3.12 is recommended for best compatibility." + # Verify the command actually exists (important for pyenv) + if command -v "$cmd" &> /dev/null; then + echo "$cmd" + print_success "Found Python: $version" + + # Recommend Python 3.12 + if [[ $major_version -ne 12 ]]; then + print_info "Note: Python 3.12 is recommended for best compatibility." + fi + + return 0 fi - - return 0 fi fi fi done - print_error "Python 3.10+ not found. Please install Python 3.10 or newer (3.12 recommended)." + # No suitable Python found - check if we can use pyenv + local os_type=$(detect_os) + + # Check for pyenv on Unix-like systems (macOS/Linux) + if [[ "$os_type" == "macos" || "$os_type" == "linux" || "$os_type" == "wsl" ]]; then + if command -v pyenv &> /dev/null; then + # pyenv exists, check if Python 3.12 is installed + if ! pyenv versions 2>/dev/null | grep -E "3\.(1[2-9]|[2-9][0-9])" >/dev/null; then + echo "" + echo "Python 3.10+ is required. Pyenv can install Python 3.12 locally for this project." + read -p "Install Python 3.12 using pyenv? (Y/n): " -n 1 -r + echo "" + if [[ ! $REPLY =~ ^[Nn]$ ]]; then + if install_python_with_pyenv; then + # Try finding Python again + if python_cmd=$(find_python); then + echo "$python_cmd" + return 0 + fi + fi + fi + else + # Python 3.12+ is installed in pyenv but may not be active + # Check if .python-version exists + if [[ ! -f ".python-version" ]] || ! grep -qE "3\.(1[2-9]|[2-9][0-9])" .python-version 2>/dev/null; then + echo "" + print_info "Python 3.12 is installed via pyenv but not set for this project." + read -p "Set Python 3.12.0 for this project? (Y/n): " -n 1 -r + echo "" + if [[ ! $REPLY =~ ^[Nn]$ ]]; then + # Find the first suitable Python version + local py_version=$(pyenv versions --bare | grep -E "^3\.(1[2-9]|[2-9][0-9])" | head -1) + if [[ -n "$py_version" ]]; then + pyenv local "$py_version" + print_success "Set Python $py_version for this project" + # Re-initialize pyenv to pick up the change + eval "$(pyenv init --path)" 2>/dev/null || true + eval "$(pyenv init -)" 2>/dev/null || true + # Try finding Python again + if python_cmd=$(find_python); then + echo "$python_cmd" + return 0 + fi + fi + fi + fi + fi + else + # No pyenv installed - show instructions + echo "" >&2 + print_error "Python 3.10+ not found. The 'mcp' package requires Python 3.10+." + echo "" >&2 + + if [[ "$os_type" == "macos" ]]; then + echo "To install Python locally for this project:" >&2 + echo "" >&2 + echo "1. Install pyenv (manages Python versions per project):" >&2 + echo " brew install pyenv" >&2 + echo "" >&2 + echo "2. Add to ~/.zshrc:" >&2 + echo ' export PYENV_ROOT="$HOME/.pyenv"' >&2 + echo ' export PATH="$PYENV_ROOT/bin:$PATH"' >&2 + echo ' eval "$(pyenv init -)"' >&2 + echo "" >&2 + echo "3. Restart terminal, then run:" >&2 + echo " pyenv install 3.12.0" >&2 + echo " cd $(pwd)" >&2 + echo " pyenv local 3.12.0" >&2 + echo " ./run-server.sh" >&2 + else + # Linux/WSL + echo "To install Python locally for this project:" >&2 + echo "" >&2 + echo "1. Install pyenv:" >&2 + echo " curl https://pyenv.run | bash" >&2 + echo "" >&2 + echo "2. Add to ~/.bashrc:" >&2 + echo ' export PYENV_ROOT="$HOME/.pyenv"' >&2 + echo ' export PATH="$PYENV_ROOT/bin:$PATH"' >&2 + echo ' eval "$(pyenv init -)"' >&2 + echo "" >&2 + echo "3. Restart terminal, then run:" >&2 + echo " pyenv install 3.12.0" >&2 + echo " cd $(pwd)" >&2 + echo " pyenv local 3.12.0" >&2 + echo " ./run-server.sh" >&2 + fi + fi + else + # Other systems (shouldn't happen with bash script) + print_error "Python 3.10+ not found. Please install Python 3.10 or newer." + fi + return 1 } +# Install Python with pyenv (when pyenv is already installed) +install_python_with_pyenv() { + # Ensure pyenv is initialized + export PYENV_ROOT="${PYENV_ROOT:-$HOME/.pyenv}" + export PATH="$PYENV_ROOT/bin:$PATH" + eval "$(pyenv init -)" 2>/dev/null || true + + print_info "Installing Python 3.12 (this may take a few minutes)..." + if pyenv install -s 3.12.0; then + print_success "Python 3.12 installed" + + # Set local Python version for this project + pyenv local 3.12.0 + print_success "Python 3.12 set for this project" + + # Show shell configuration instructions + echo "" + print_info "To make pyenv work in new terminals, add to your shell config:" + local shell_config="~/.zshrc" + if [[ "$SHELL" == *"bash"* ]]; then + shell_config="~/.bashrc" + fi + echo ' export PYENV_ROOT="$HOME/.pyenv"' + echo ' command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' + echo ' eval "$(pyenv init -)"' + echo "" + + # Re-initialize pyenv to use the newly installed Python + eval "$(pyenv init --path)" 2>/dev/null || true + eval "$(pyenv init -)" 2>/dev/null || true + + return 0 + else + print_error "Failed to install Python 3.12" + return 1 + fi +} + # Detect Linux distribution detect_linux_distro() { if [[ -f /etc/os-release ]]; then @@ -814,6 +963,8 @@ validate_api_keys() { echo " XAI_API_KEY=your-actual-key" >&2 echo " OPENROUTER_API_KEY=your-actual-key" >&2 echo "" >&2 + print_info "After adding your API keys, run ./run-server.sh again" >&2 + echo "" >&2 return 1 fi @@ -1220,7 +1371,10 @@ main() { # Step 2: Find Python local python_cmd - python_cmd=$(find_python) || exit 1 + if ! python_cmd=$(find_python); then + # find_python already printed error messages, just exit + exit 1 + fi # Step 3: Setup environment file setup_env_file || exit 1 From dc6083694d186ebb75a18fe40007d8a6ee424bb9 Mon Sep 17 00:00:00 2001 From: Fahad Date: Sun, 22 Jun 2025 22:56:47 +0400 Subject: [PATCH 3/9] Updated readme with recommendations --- docs/tools/debug.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/tools/debug.md b/docs/tools/debug.md index 8ba7389..a6955ea 100644 --- a/docs/tools/debug.md +++ b/docs/tools/debug.md @@ -2,19 +2,21 @@ **Step-by-step investigation followed by expert debugging assistance** -The `debug` tool guides Claude through a systematic investigation process where Claude performs methodical code examination, evidence collection, and hypothesis formation across multiple steps. Once the investigation is complete, the tool provides expert analysis from the selected AI model based on all gathered findings. - -## Thinking Mode - -**Default is `medium` (8,192 tokens).** Use `high` for tricky bugs (investment in finding root cause) or `low` for simple errors (save tokens). +The `debug` workflow guides Claude through a systematic investigation process where Claude performs methodical code +examination, evidence collection, and hypothesis formation across multiple steps. Once the investigation is complete, +the tool provides expert analysis from the selected AI model (optionally) based on all gathered findings. ## Example Prompts -**Basic Usage:** ``` Get gemini to debug why my API returns 400 errors randomly with the full stack trace: [paste traceback] ``` +You can also ask it to debug on its own, no external model required (**recommended in most cases**). +``` +Use debug tool to find out why the app is crashing, here are some app logs [paste app logs] and a crash trace: [paste crash trace] +``` + ## How It Works The debug tool implements a **systematic investigation methodology** where Claude is guided through structured debugging steps: From 92a16b57cf2cef8566b9c2b7e015374ab7b4e106 Mon Sep 17 00:00:00 2001 From: Fahad Date: Sun, 22 Jun 2025 22:59:42 +0400 Subject: [PATCH 4/9] Updated readme with recommendations --- docs/tools/debug.md | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/docs/tools/debug.md b/docs/tools/debug.md index a6955ea..7efc454 100644 --- a/docs/tools/debug.md +++ b/docs/tools/debug.md @@ -80,39 +80,34 @@ This structured approach ensures Claude performs methodical groundwork before ex ## Usage Examples -**Basic Error Debugging:** +**Error Debugging:** ``` -"Debug this TypeError: 'NoneType' object has no attribute 'split' in my parser.py" +Debug this TypeError: 'NoneType' object has no attribute 'split' in my parser.py ``` **With Stack Trace:** ``` -"Use gemini to debug why my API returns 500 errors with this stack trace: [paste full traceback]" +Use gemini to debug why my API returns 500 errors with this stack trace: [paste full traceback] ``` **With File Context:** ``` -"Debug the authentication failure in auth.py and user_model.py with o3" +Debug without using external model, the authentication failure in auth.py and user_model.py ``` **Performance Debugging:** ``` -"Use pro to debug why my application is consuming excessive memory during bulk operations" -``` - -**With Visual Context:** -``` -"Debug this crash using the error screenshot and the related crash_report.log" +Debug without using external model to find out why the app is consuming excessive memory during bulk edit operations ``` **Runtime Environment Issues:** ``` -"Debug deployment issues with server startup failures, here's the runtime info: [environment details]" +Debug deployment issues with server startup failures, here's the runtime info: [environment details] ``` ## Investigation Methodology -The debug tool enforces a structured investigation process: +The debug tool enforces a thorough, structured investigation process: **Step-by-Step Investigation (Claude-Led):** 1. **Initial Problem Description:** Claude describes the issue and begins thinking about possible causes, side-effects, and contributing factors @@ -122,7 +117,7 @@ The debug tool enforces a structured investigation process: 5. **Iterative Refinement:** Claude can backtrack and revise previous steps as understanding evolves 6. **Investigation Completion:** Claude signals when sufficient evidence has been gathered -**Expert Analysis Phase (AI Model):** +**Expert Analysis Phase (Another AI Model When Used):** Once investigation is complete, the selected AI model performs: - **Root Cause Analysis:** Deep analysis of all investigation findings and evidence - **Solution Recommendations:** Specific fixes with implementation guidance From 874e730a4166e215502706c4fc24e9b402c280ea Mon Sep 17 00:00:00 2001 From: Fahad Date: Sun, 22 Jun 2025 23:04:55 +0400 Subject: [PATCH 5/9] Updated readme with recommendations --- README.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e187452..e4cd957 100644 --- a/README.md +++ b/README.md @@ -390,9 +390,19 @@ Nice! This is just one instance - take a look at [another example here](docs/too Systematic investigation-guided debugging that walks Claude through step-by-step root cause analysis. This workflow tool enforces a structured investigation process where Claude performs methodical code examination, evidence collection, and hypothesis formation across multiple steps before receiving expert analysis from the selected AI model. When Claude's confidence reaches **100% certainty** during the investigative workflow, expert analysis via another model is skipped to save on tokens and cost, and Claude proceeds directly to fixing the issue. ``` -See logs under /Users/me/project/diagnostics.log and related code under the sync folder. Logs show that sync -works but sometimes it gets stuck and there are no errors displayed to the user. Using zen's debug tool with gemini pro, find out -why this is happening and what the root cause is and its fix +See logs under /Users/me/project/diagnostics.log and related code under the sync folder. +Logs show that sync works but sometimes it gets stuck and there are no errors displayed to +the user. Using zen's debug tool with gemini pro, find out why this is happening and what the root +cause is and its fix +``` + +You can also add `do not use another model` to make Claude perform the entire workflow on its own. This is recommended +for most debugging workflows, as Claude is able to confidently find the bug by the time the workflow ends. + +When in doubt, you can always follow up with a new prompt and ask Claude to share its findings with another model: + +```text +Use continuation with thinkdeep, share details with o4-mini-high to find out what the best fix is for this ``` **[📖 Read More](docs/tools/debug.md)** - Step-by-step investigation methodology with workflow enforcement From 8262d47c1e81600c981dd4f7814a6099632b243e Mon Sep 17 00:00:00 2001 From: Fahad Date: Sun, 22 Jun 2025 23:05:44 +0400 Subject: [PATCH 6/9] Updated readme with recommendations --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e4cd957..c540380 100644 --- a/README.md +++ b/README.md @@ -397,7 +397,7 @@ cause is and its fix ``` You can also add `do not use another model` to make Claude perform the entire workflow on its own. This is recommended -for most debugging workflows, as Claude is able to confidently find the bug by the time the workflow ends. +for most debugging workflows, as Claude is usually able to confidently find the root cause by the time the workflow ends. When in doubt, you can always follow up with a new prompt and ask Claude to share its findings with another model: From 4ae0344b146201933155f3f8a206d1f941a37e4b Mon Sep 17 00:00:00 2001 From: omryn-vera Date: Mon, 23 Jun 2025 11:57:13 +0200 Subject: [PATCH 7/9] feat: Update Claude model references from v3 to v4 (fixes issue #118) (#119) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Update Claude model references from v3 to v4 - Update model configurations from claude-3-opus to claude-4-opus - Update model configurations from claude-3-sonnet to claude-4-sonnet - Maintain backward compatibility through existing aliases (opus, sonnet, claude) - Update provider registry preferred models list - Update all test cases and assertions to reflect new model names - Update documentation and examples consistently across all files - Add Claude 4 model support while preserving existing functionality Files modified: 15 (config, docs, providers, tests, tools) Pattern: Systematic claude-3-* → claude-4-* model reference migration 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude * PR feedback: changed anthropic/claude-4-opus -> anthropic/claude-opus-4 and anthropic/claude-4-haiku -> anthropic/claude-3.5-haiku * changed anthropic/claude-4-sonnet -> anthropic/claude-sonnet-4 * PR feedback removed specific model from test mock * PR feedback removed base.py --------- Co-authored-by: Omry Nachman Co-authored-by: Claude --- conf/custom_models.json | 20 ++++++------ docs/adding_providers.md | 2 +- docs/custom_models.md | 16 +++++----- providers/registry.py | 4 +-- simulator_tests/test_openrouter_models.py | 2 +- tests/test_auto_mode_comprehensive.py | 2 +- tests/test_listmodels_restrictions.py | 14 ++++----- tests/test_model_restrictions.py | 6 ++-- tests/test_openrouter_provider.py | 38 +++++++++++------------ tests/test_openrouter_registry.py | 12 +++---- tests/test_per_tool_model_defaults.py | 4 +-- tests/test_provider_routing_bugs.py | 4 +-- tools/shared/base_tool.py | 2 +- 13 files changed, 63 insertions(+), 63 deletions(-) diff --git a/conf/custom_models.json b/conf/custom_models.json index 2b9f7c7..2a3bcf3 100644 --- a/conf/custom_models.json +++ b/conf/custom_models.json @@ -7,7 +7,7 @@ "Self-hosted APIs - Any OpenAI-compatible endpoint" ], "documentation": "https://github.com/BeehiveInnovations/zen-mcp-server/blob/main/docs/custom_models.md", - "usage": "Models can be accessed via aliases (e.g., 'opus', 'local-llama') or full names (e.g., 'anthropic/claude-3-opus', 'llama3.2')", + "usage": "Models can be accessed via aliases (e.g., 'opus', 'local-llama') or full names (e.g., 'anthropic/claude-opus-4', 'llama3.2')", "instructions": [ "Add new models by copying an existing entry and modifying it", "Aliases are case-insensitive and should be unique across all models", @@ -15,11 +15,11 @@ "Set supports_* flags based on the model's actual capabilities", "Set is_custom=true for models that should ONLY work with custom endpoints (Ollama, vLLM, etc.)", "Models not listed here will use generic defaults (32K context window, basic features)", - "For OpenRouter models: Use official OpenRouter model names (e.g., 'anthropic/claude-3-opus')", + "For OpenRouter models: Use official OpenRouter model names (e.g., 'anthropic/claude-opus-4')", "For local/custom models: Use model names as they appear in your API (e.g., 'llama3.2', 'gpt-3.5-turbo')" ], "field_descriptions": { - "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-3-opus') or custom model name (e.g., 'llama3.2')", + "model_name": "The model identifier - OpenRouter format (e.g., 'anthropic/claude-opus-4') or custom model name (e.g., 'llama3.2')", "aliases": "Array of short names users can type instead of the full model name", "context_window": "Total number of tokens the model can process (input + output combined)", "supports_extended_thinking": "Whether the model supports extended reasoning tokens (currently none do via OpenRouter or custom APIs)", @@ -49,29 +49,29 @@ }, "models": [ { - "model_name": "anthropic/claude-3-opus", - "aliases": ["opus", "claude-opus", "claude3-opus", "claude-3-opus"], + "model_name": "anthropic/claude-opus-4", + "aliases": ["opus", "claude-opus", "claude4-opus", "claude-4-opus"], "context_window": 200000, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, "supports_images": true, "max_image_size_mb": 5.0, - "description": "Claude 3 Opus - Most capable Claude model with vision" + "description": "Claude 4 Opus - Most capable Claude model with vision" }, { - "model_name": "anthropic/claude-3-sonnet", - "aliases": ["sonnet", "claude-sonnet", "claude3-sonnet", "claude-3-sonnet", "claude"], + "model_name": "anthropic/claude-sonnet-4", + "aliases": ["sonnet", "claude-sonnet", "claude4-sonnet", "claude-4-sonnet", "claude"], "context_window": 200000, "supports_extended_thinking": false, "supports_json_mode": false, "supports_function_calling": false, "supports_images": true, "max_image_size_mb": 5.0, - "description": "Claude 3 Sonnet - Balanced performance with vision" + "description": "Claude 4 Sonnet - Balanced performance with vision" }, { - "model_name": "anthropic/claude-3-haiku", + "model_name": "anthropic/claude-3.5-haiku", "aliases": ["haiku", "claude-haiku", "claude3-haiku", "claude-3-haiku"], "context_window": 200000, "supports_extended_thinking": false, diff --git a/docs/adding_providers.md b/docs/adding_providers.md index f3f3a4e..c93574f 100644 --- a/docs/adding_providers.md +++ b/docs/adding_providers.md @@ -690,7 +690,7 @@ When a user requests a model (e.g., "pro", "o3", "example-large-v1"), the system 2. OpenAI skips (Gemini already handled it) 3. OpenRouter never sees it -### Example: Model "claude-3-opus" +### Example: Model "claude-4-opus" 1. **Gemini provider** checks: NO, not my model → skip 2. **OpenAI provider** checks: NO, not my model → skip diff --git a/docs/custom_models.md b/docs/custom_models.md index 8094675..45f6967 100644 --- a/docs/custom_models.md +++ b/docs/custom_models.md @@ -41,9 +41,9 @@ The server uses `conf/custom_models.json` to map convenient aliases to both Open | Alias | Maps to OpenRouter Model | |-------|-------------------------| -| `opus` | `anthropic/claude-3-opus` | -| `sonnet`, `claude` | `anthropic/claude-3-sonnet` | -| `haiku` | `anthropic/claude-3-haiku` | +| `opus` | `anthropic/claude-opus-4` | +| `sonnet`, `claude` | `anthropic/claude-sonnet-4` | +| `haiku` | `anthropic/claude-3.5-haiku` | | `gpt4o`, `4o` | `openai/gpt-4o` | | `gpt4o-mini`, `4o-mini` | `openai/gpt-4o-mini` | | `pro`, `gemini` | `google/gemini-2.5-pro` | @@ -151,8 +151,8 @@ CUSTOM_MODEL_NAME=your-loaded-model **Using model aliases (from conf/custom_models.json):** ``` # OpenRouter models: -"Use opus for deep analysis" # → anthropic/claude-3-opus -"Use sonnet to review this code" # → anthropic/claude-3-sonnet +"Use opus for deep analysis" # → anthropic/claude-opus-4 +"Use sonnet to review this code" # → anthropic/claude-sonnet-4 "Use pro via zen to analyze this" # → google/gemini-2.5-pro "Use gpt4o via zen to analyze this" # → openai/gpt-4o "Use mistral via zen to optimize" # → mistral/mistral-large @@ -165,7 +165,7 @@ CUSTOM_MODEL_NAME=your-loaded-model **Using full model names:** ``` # OpenRouter models: -"Use anthropic/claude-3-opus via zen for deep analysis" +"Use anthropic/claude-opus-4 via zen for deep analysis" "Use openai/gpt-4o via zen to debug this" "Use deepseek/deepseek-coder via zen to generate code" @@ -249,7 +249,7 @@ Edit `conf/custom_models.json` to add new models. The configuration supports bot Popular models available through OpenRouter: - **GPT-4** - OpenAI's most capable model -- **Claude 3** - Anthropic's models (Opus, Sonnet, Haiku) +- **Claude 4** - Anthropic's models (Opus, Sonnet, Haiku) - **Mistral** - Including Mistral Large - **Llama 3** - Meta's open models - Many more at [openrouter.ai/models](https://openrouter.ai/models) @@ -258,4 +258,4 @@ Popular models available through OpenRouter: - **"Model not found"**: Check exact model name at openrouter.ai/models - **"Insufficient credits"**: Add credits to your OpenRouter account -- **"Model not available"**: Check your OpenRouter dashboard for model access permissions \ No newline at end of file +- **"Model not available"**: Check your OpenRouter dashboard for model access permissions diff --git a/providers/registry.py b/providers/registry.py index 8fa0478..a5efcf0 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -402,8 +402,8 @@ class ModelProviderRegistry: if openrouter_provider: # Prefer models known for deep reasoning preferred_models = [ - "anthropic/claude-3.5-sonnet", - "anthropic/claude-3-opus-20240229", + "anthropic/claude-sonnet-4", + "anthropic/claude-opus-4", "google/gemini-2.5-pro", "google/gemini-pro-1.5", "meta-llama/llama-3.1-70b-instruct", diff --git a/simulator_tests/test_openrouter_models.py b/simulator_tests/test_openrouter_models.py index 5a52efd..bd69806 100644 --- a/simulator_tests/test_openrouter_models.py +++ b/simulator_tests/test_openrouter_models.py @@ -117,7 +117,7 @@ class OpenRouterModelsTest(BaseSimulatorTest): self.logger.info(" ✅ Direct OpenRouter model call completed") # Test 5: OpenRouter alias from config - self.logger.info(" 5: Testing OpenRouter alias from config ('opus' -> anthropic/claude-3-opus)") + self.logger.info(" 5: Testing OpenRouter alias from config ('opus' -> anthropic/claude-opus-4)") response5, _ = self.call_mcp_tool( "chat", diff --git a/tests/test_auto_mode_comprehensive.py b/tests/test_auto_mode_comprehensive.py index d7e00ae..8539fdf 100644 --- a/tests/test_auto_mode_comprehensive.py +++ b/tests/test_auto_mode_comprehensive.py @@ -527,7 +527,7 @@ class TestAutoModeComprehensive: "google/gemini-2.5-pro", "openai/o3", "openai/o4-mini", - "anthropic/claude-3-opus", + "anthropic/claude-opus-4", ] with patch.object(OpenRouterProvider, "_registry", mock_registry): diff --git a/tests/test_listmodels_restrictions.py b/tests/test_listmodels_restrictions.py index 8d26902..5d9f06d 100644 --- a/tests/test_listmodels_restrictions.py +++ b/tests/test_listmodels_restrictions.py @@ -53,8 +53,8 @@ class TestListModelsRestrictions(unittest.TestCase): # Set up mock to return only allowed models when restrictions are respected # Include both aliased models and full model names without aliases self.mock_openrouter.list_models.return_value = [ - "anthropic/claude-3-opus-20240229", # Has alias "opus" - "anthropic/claude-3-sonnet-20240229", # Has alias "sonnet" + "anthropic/claude-opus-4", # Has alias "opus" + "anthropic/claude-sonnet-4", # Has alias "sonnet" "deepseek/deepseek-r1-0528:free", # No alias, full name "qwen/qwen3-235b-a22b-04-28:free", # No alias, full name ] @@ -67,12 +67,12 @@ class TestListModelsRestrictions(unittest.TestCase): def resolve_side_effect(model_name): if "opus" in model_name.lower(): config = MagicMock() - config.model_name = "anthropic/claude-3-opus-20240229" + config.model_name = "anthropic/claude-opus-4-20240229" config.context_window = 200000 return config elif "sonnet" in model_name.lower(): config = MagicMock() - config.model_name = "anthropic/claude-3-sonnet-20240229" + config.model_name = "anthropic/claude-sonnet-4-20240229" config.context_window = 200000 return config return None # No config for models without aliases @@ -93,8 +93,8 @@ class TestListModelsRestrictions(unittest.TestCase): mock_get_models.return_value = { "gemini-2.5-flash": ProviderType.GOOGLE, "gemini-2.5-pro": ProviderType.GOOGLE, - "anthropic/claude-3-opus-20240229": ProviderType.OPENROUTER, - "anthropic/claude-3-sonnet-20240229": ProviderType.OPENROUTER, + "anthropic/claude-opus-4-20240229": ProviderType.OPENROUTER, + "anthropic/claude-sonnet-4-20240229": ProviderType.OPENROUTER, "deepseek/deepseek-r1-0528:free": ProviderType.OPENROUTER, "qwen/qwen3-235b-a22b-04-28:free": ProviderType.OPENROUTER, } @@ -172,7 +172,7 @@ class TestListModelsRestrictions(unittest.TestCase): utils.model_restrictions._restriction_service = None # Set up mock to return many models when no restrictions - all_models = [f"provider{i//10}/model-{i}" for i in range(50)] # Simulate 50 models from different providers + all_models = [f"provider{i // 10}/model-{i}" for i in range(50)] # Simulate 50 models from different providers self.mock_openrouter.list_models.return_value = all_models # Mock registry instance diff --git a/tests/test_model_restrictions.py b/tests/test_model_restrictions.py index b4b0e66..bd34a81 100644 --- a/tests/test_model_restrictions.py +++ b/tests/test_model_restrictions.py @@ -24,7 +24,7 @@ class TestModelRestrictionService: assert service.is_allowed(ProviderType.OPENAI, "o3-mini") assert service.is_allowed(ProviderType.GOOGLE, "gemini-2.5-pro") assert service.is_allowed(ProviderType.GOOGLE, "gemini-2.5-flash") - assert service.is_allowed(ProviderType.OPENROUTER, "anthropic/claude-3-opus") + assert service.is_allowed(ProviderType.OPENROUTER, "anthropic/claude-opus-4") assert service.is_allowed(ProviderType.OPENROUTER, "openai/o3") # Should have no restrictions @@ -44,7 +44,7 @@ class TestModelRestrictionService: # Google and OpenRouter should have no restrictions assert service.is_allowed(ProviderType.GOOGLE, "gemini-2.5-pro") - assert service.is_allowed(ProviderType.OPENROUTER, "anthropic/claude-3-opus") + assert service.is_allowed(ProviderType.OPENROUTER, "anthropic/claude-opus-4") def test_load_multiple_models_restriction(self): """Test loading multiple allowed models.""" @@ -159,7 +159,7 @@ class TestModelRestrictionService: # Should only allow specified OpenRouter models assert service.is_allowed(ProviderType.OPENROUTER, "opus") assert service.is_allowed(ProviderType.OPENROUTER, "sonnet") - assert service.is_allowed(ProviderType.OPENROUTER, "anthropic/claude-3-opus", "opus") # With original name + assert service.is_allowed(ProviderType.OPENROUTER, "anthropic/claude-opus-4", "opus") # With original name assert not service.is_allowed(ProviderType.OPENROUTER, "haiku") assert not service.is_allowed(ProviderType.OPENROUTER, "anthropic/claude-3-haiku") assert not service.is_allowed(ProviderType.OPENROUTER, "mistral-large") diff --git a/tests/test_openrouter_provider.py b/tests/test_openrouter_provider.py index 0dd2b78..da10678 100644 --- a/tests/test_openrouter_provider.py +++ b/tests/test_openrouter_provider.py @@ -44,7 +44,7 @@ class TestOpenRouterProvider: # Should accept any model - OpenRouter handles validation assert provider.validate_model_name("gpt-4") is True - assert provider.validate_model_name("claude-3-opus") is True + assert provider.validate_model_name("claude-4-opus") is True assert provider.validate_model_name("any-model-name") is True assert provider.validate_model_name("GPT-4") is True assert provider.validate_model_name("unknown-model") is True @@ -71,26 +71,26 @@ class TestOpenRouterProvider: provider = OpenRouterProvider(api_key="test-key") # Test alias resolution - assert provider._resolve_model_name("opus") == "anthropic/claude-3-opus" - assert provider._resolve_model_name("sonnet") == "anthropic/claude-3-sonnet" + assert provider._resolve_model_name("opus") == "anthropic/claude-opus-4" + assert provider._resolve_model_name("sonnet") == "anthropic/claude-sonnet-4" assert provider._resolve_model_name("o3") == "openai/o3" assert provider._resolve_model_name("o3-mini") == "openai/o3-mini" assert provider._resolve_model_name("o3mini") == "openai/o3-mini" assert provider._resolve_model_name("o4-mini") == "openai/o4-mini" assert provider._resolve_model_name("o4-mini-high") == "openai/o4-mini-high" - assert provider._resolve_model_name("claude") == "anthropic/claude-3-sonnet" + assert provider._resolve_model_name("claude") == "anthropic/claude-sonnet-4" assert provider._resolve_model_name("mistral") == "mistralai/mistral-large-2411" assert provider._resolve_model_name("deepseek") == "deepseek/deepseek-r1-0528" assert provider._resolve_model_name("r1") == "deepseek/deepseek-r1-0528" # Test case-insensitive - assert provider._resolve_model_name("OPUS") == "anthropic/claude-3-opus" + assert provider._resolve_model_name("OPUS") == "anthropic/claude-opus-4" assert provider._resolve_model_name("O3") == "openai/o3" assert provider._resolve_model_name("Mistral") == "mistralai/mistral-large-2411" - assert provider._resolve_model_name("CLAUDE") == "anthropic/claude-3-sonnet" + assert provider._resolve_model_name("CLAUDE") == "anthropic/claude-sonnet-4" # Test direct model names (should pass through unchanged) - assert provider._resolve_model_name("anthropic/claude-3-opus") == "anthropic/claude-3-opus" + assert provider._resolve_model_name("anthropic/claude-opus-4") == "anthropic/claude-opus-4" assert provider._resolve_model_name("openai/o3") == "openai/o3" # Test unknown models pass through @@ -155,8 +155,8 @@ class TestOpenRouterAutoMode: "google/gemini-2.5-pro", "openai/o3", "openai/o3-mini", - "anthropic/claude-3-opus", - "anthropic/claude-3-sonnet", + "anthropic/claude-opus-4", + "anthropic/claude-sonnet-4", ] ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) @@ -181,7 +181,7 @@ class TestOpenRouterAutoMode: os.environ.pop("OPENAI_API_KEY", None) os.environ["OPENROUTER_API_KEY"] = "test-openrouter-key" os.environ.pop("OPENROUTER_ALLOWED_MODELS", None) - os.environ["OPENROUTER_ALLOWED_MODELS"] = "anthropic/claude-3-opus,google/gemini-2.5-flash" + os.environ["OPENROUTER_ALLOWED_MODELS"] = "anthropic/claude-opus-4,google/gemini-2.5-flash" os.environ["DEFAULT_MODEL"] = "auto" # Force reload to pick up new environment variable @@ -193,8 +193,8 @@ class TestOpenRouterAutoMode: mock_models = [ "google/gemini-2.5-flash", "google/gemini-2.5-pro", - "anthropic/claude-3-opus", - "anthropic/claude-3-sonnet", + "anthropic/claude-opus-4", + "anthropic/claude-sonnet-4", ] mock_registry.list_models.return_value = mock_models @@ -212,7 +212,7 @@ class TestOpenRouterAutoMode: assert len(available_models) > 0, "Should have some allowed models" - expected_allowed = {"google/gemini-2.5-flash", "anthropic/claude-3-opus"} + expected_allowed = {"google/gemini-2.5-flash", "anthropic/claude-opus-4"} assert ( set(available_models.keys()) == expected_allowed @@ -263,7 +263,7 @@ class TestOpenRouterRegistry: # Should have loaded models models = registry.list_models() assert len(models) > 0 - assert "anthropic/claude-3-opus" in models + assert "anthropic/claude-opus-4" in models assert "openai/o3" in models # Should have loaded aliases @@ -282,13 +282,13 @@ class TestOpenRouterRegistry: # Test known model caps = registry.get_capabilities("opus") assert caps is not None - assert caps.model_name == "anthropic/claude-3-opus" + assert caps.model_name == "anthropic/claude-opus-4" assert caps.context_window == 200000 # Claude's context window # Test using full model name - caps = registry.get_capabilities("anthropic/claude-3-opus") + caps = registry.get_capabilities("anthropic/claude-opus-4") assert caps is not None - assert caps.model_name == "anthropic/claude-3-opus" + assert caps.model_name == "anthropic/claude-opus-4" # Test unknown model caps = registry.get_capabilities("non-existent-model") @@ -301,11 +301,11 @@ class TestOpenRouterRegistry: registry = OpenRouterModelRegistry() # All these should resolve to Claude Sonnet - sonnet_aliases = ["sonnet", "claude", "claude-sonnet", "claude3-sonnet"] + sonnet_aliases = ["sonnet", "claude", "claude-sonnet", "claude4-sonnet"] for alias in sonnet_aliases: config = registry.resolve(alias) assert config is not None - assert config.model_name == "anthropic/claude-3-sonnet" + assert config.model_name == "anthropic/claude-sonnet-4" class TestOpenRouterFunctionality: diff --git a/tests/test_openrouter_registry.py b/tests/test_openrouter_registry.py index 1a5a2f1..4b8bbbf 100644 --- a/tests/test_openrouter_registry.py +++ b/tests/test_openrouter_registry.py @@ -74,9 +74,9 @@ class TestOpenRouterModelRegistry: # Test various aliases test_cases = [ - ("opus", "anthropic/claude-3-opus"), - ("OPUS", "anthropic/claude-3-opus"), # Case insensitive - ("claude", "anthropic/claude-3-sonnet"), + ("opus", "anthropic/claude-opus-4"), + ("OPUS", "anthropic/claude-opus-4"), # Case insensitive + ("claude", "anthropic/claude-sonnet-4"), ("o3", "openai/o3"), ("deepseek", "deepseek/deepseek-r1-0528"), ("mistral", "mistralai/mistral-large-2411"), @@ -92,9 +92,9 @@ class TestOpenRouterModelRegistry: registry = OpenRouterModelRegistry() # Should be able to look up by full model name - config = registry.resolve("anthropic/claude-3-opus") + config = registry.resolve("anthropic/claude-opus-4") assert config is not None - assert config.model_name == "anthropic/claude-3-opus" + assert config.model_name == "anthropic/claude-opus-4" config = registry.resolve("openai/o3") assert config is not None @@ -118,7 +118,7 @@ class TestOpenRouterModelRegistry: caps = config.to_capabilities() assert caps.provider == ProviderType.OPENROUTER - assert caps.model_name == "anthropic/claude-3-opus" + assert caps.model_name == "anthropic/claude-opus-4" assert caps.friendly_name == "OpenRouter" assert caps.context_window == 200000 assert not caps.supports_extended_thinking diff --git a/tests/test_per_tool_model_defaults.py b/tests/test_per_tool_model_defaults.py index 92c904c..f2b9b5e 100644 --- a/tests/test_per_tool_model_defaults.py +++ b/tests/test_per_tool_model_defaults.py @@ -288,11 +288,11 @@ class TestProviderHelperMethods: with patch.object(ModelProviderRegistry, "get_provider") as mock_get_provider: # Mock openrouter provider mock_openrouter = MagicMock() - mock_openrouter.validate_model_name.side_effect = lambda m: m == "anthropic/claude-3.5-sonnet" + mock_openrouter.validate_model_name.side_effect = lambda m: m == "anthropic/claude-sonnet-4" mock_get_provider.side_effect = lambda ptype: mock_openrouter if ptype == ProviderType.OPENROUTER else None model = ModelProviderRegistry._find_extended_thinking_model() - assert model == "anthropic/claude-3.5-sonnet" + assert model == "anthropic/claude-sonnet-4" def test_find_extended_thinking_model_none_found(self): """Test when no thinking model is found.""" diff --git a/tests/test_provider_routing_bugs.py b/tests/test_provider_routing_bugs.py index 2ceda5a..9ed125b 100644 --- a/tests/test_provider_routing_bugs.py +++ b/tests/test_provider_routing_bugs.py @@ -318,7 +318,7 @@ class TestOpenRouterAliasRestrictions: os.environ.pop("OPENAI_API_KEY", None) os.environ.pop("XAI_API_KEY", None) os.environ["OPENROUTER_API_KEY"] = "test-key" - os.environ["OPENROUTER_ALLOWED_MODELS"] = "o3-mini,anthropic/claude-3-opus,flash" + os.environ["OPENROUTER_ALLOWED_MODELS"] = "o3-mini,anthropic/claude-opus-4,flash" # Register OpenRouter provider from providers.openrouter import OpenRouterProvider @@ -330,7 +330,7 @@ class TestOpenRouterAliasRestrictions: expected_models = { "openai/o3-mini", # from alias - "anthropic/claude-3-opus", # full name + "anthropic/claude-opus-4", # full name "google/gemini-2.5-flash", # from alias } diff --git a/tools/shared/base_tool.py b/tools/shared/base_tool.py index 7bff37f..a98baf8 100644 --- a/tools/shared/base_tool.py +++ b/tools/shared/base_tool.py @@ -448,7 +448,7 @@ class BaseTool(ABC): except Exception: description += ( " OpenRouter: Any model available on openrouter.ai " - "(e.g., 'gpt-4', 'claude-3-opus', 'mistral-large')." + "(e.g., 'gpt-4', 'claude-4-opus', 'mistral-large')." ) description += f" Defaults to '{DEFAULT_MODEL}' if not specified." From 0623ce3546fd744877b4fa09cf8943b4bb5c85dc Mon Sep 17 00:00:00 2001 From: Illya Havsiyevych <44289086+illya-havsiyevych@users.noreply.github.com> Date: Mon, 23 Jun 2025 13:07:10 +0300 Subject: [PATCH 8/9] feat: DIAL provider implementation (#112) ## Description This PR implements a new [DIAL](https://dialx.ai/dial_api) (Data & AI Layer) provider for the Zen MCP Server, enabling unified access to multiple AI models through the DIAL API platform. DIAL provides enterprise-grade AI model access with deployment-specific routing similar to Azure OpenAI. ## Changes Made - [x] Added support of atexit: - Ensures automatic cleanup of provider resources (HTTP clients, connection pools) on server shutdown - Fixed bug using ModelProviderRegistry.get_available_providers() instead of accessing private _providers - Works with SIGTERM/Ctrl+C for graceful shutdown in both development and containerized environments - [x] Added new DIAL provider (`providers/dial.py`) inheriting from `OpenAICompatibleProvider` - [x] Updated server.py to register DIAL provider during initialization - [x] Updated provider registry to include DIAL provider type - [x] Implemented deployment-specific routing for DIAL's Azure OpenAI-style endpoints - [x] Implemented performance optimizations: - Connection pooling with httpx for better performance - Thread-safe client caching with double-check locking pattern - Proper resource cleanup with `close()` method - [x] Added comprehensive unit tests with 16 test cases (`tests/test_dial_provider.py`) - [x] Added DIAL configuration to `.env.example` with documentation - [x] Added support for configurable API version via `DIAL_API_VERSION` environment variable - [x] Added DIAL model restrictions support via `DIAL_ALLOWED_MODELS` environment variable ### Supported DIAL Models: - OpenAI models: o3, o4-mini (and their dated versions) - Google models: gemini-2.5-pro, gemini-2.5-flash (including search variant) - Anthropic models: Claude 4 Opus/Sonnet (with and without thinking mode) ### Environment Variables: - `DIAL_API_KEY`: Required API key for DIAL authentication - `DIAL_API_HOST`: Optional base URL (defaults to https://core.dialx.ai) - `DIAL_API_VERSION`: Optional API version header (defaults to 2025-01-01-preview) - `DIAL_ALLOWED_MODELS`: Optional comma-separated list of allowed models ### Breaking Changes: - None ### Dependencies: - No new dependencies added (uses existing OpenAI SDK with custom routing) --- .env.example | 45 +++- README.md | 14 +- providers/base.py | 10 + providers/dial.py | 525 ++++++++++++++++++++++++++++++++++++ providers/registry.py | 2 + run-server.sh | 3 + server.py | 34 ++- tests/test_dial_provider.py | 273 +++++++++++++++++++ tools/listmodels.py | 1 + utils/model_restrictions.py | 2 + 10 files changed, 900 insertions(+), 9 deletions(-) create mode 100644 providers/dial.py create mode 100644 tests/test_dial_provider.py diff --git a/.env.example b/.env.example index a7e6376..1d88d4c 100644 --- a/.env.example +++ b/.env.example @@ -3,8 +3,11 @@ # API Keys - At least one is required # -# IMPORTANT: Use EITHER OpenRouter OR native APIs (Gemini/OpenAI), not both! -# Having both creates ambiguity about which provider serves each model. +# IMPORTANT: Choose ONE approach: +# - Native APIs (Gemini/OpenAI/XAI) for direct access +# - DIAL for unified enterprise access +# - OpenRouter for unified cloud access +# Having multiple unified providers creates ambiguity about which serves each model. # # Option 1: Use native APIs (recommended for direct access) # Get your Gemini API key from: https://makersuite.google.com/app/apikey @@ -16,6 +19,12 @@ OPENAI_API_KEY=your_openai_api_key_here # Get your X.AI API key from: https://console.x.ai/ XAI_API_KEY=your_xai_api_key_here +# Get your DIAL API key and configure host URL +# DIAL provides unified access to multiple AI models through a single API +DIAL_API_KEY=your_dial_api_key_here +# DIAL_API_HOST=https://core.dialx.ai # Optional: Base URL without /openai suffix (auto-appended) +# DIAL_API_VERSION=2025-01-01-preview # Optional: API version header for DIAL requests + # Option 2: Use OpenRouter for access to multiple models through one API # Get your OpenRouter API key from: https://openrouter.ai/ # If using OpenRouter, comment out the native API keys above @@ -27,7 +36,8 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here # CUSTOM_MODEL_NAME=llama3.2 # Default model name # Optional: Default model to use -# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high' etc +# Options: 'auto' (Claude picks best model), 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', 'o4-mini-high', +# 'grok', 'opus-4', 'sonnet-4', or any DIAL model if DIAL is configured # When set to 'auto', Claude will select the best model for each task # Defaults to 'auto' if not specified DEFAULT_MODEL=auto @@ -70,6 +80,26 @@ DEFAULT_THINKING_MODE_THINKDEEP=high # - grok3 (shorthand for grok-3) # - grokfast (shorthand for grok-3-fast) # +# Supported DIAL models (when available in your DIAL deployment): +# - o3-2025-04-16 (200K context, latest O3 release) +# - o4-mini-2025-04-16 (200K context, latest O4 mini) +# - o3 (shorthand for o3-2025-04-16) +# - o4-mini (shorthand for o4-mini-2025-04-16) +# - anthropic.claude-sonnet-4-20250514-v1:0 (200K context, Claude 4 Sonnet) +# - anthropic.claude-sonnet-4-20250514-v1:0-with-thinking (200K context, Claude 4 Sonnet with thinking mode) +# - anthropic.claude-opus-4-20250514-v1:0 (200K context, Claude 4 Opus) +# - anthropic.claude-opus-4-20250514-v1:0-with-thinking (200K context, Claude 4 Opus with thinking mode) +# - sonnet-4 (shorthand for Claude 4 Sonnet) +# - sonnet-4-thinking (shorthand for Claude 4 Sonnet with thinking) +# - opus-4 (shorthand for Claude 4 Opus) +# - opus-4-thinking (shorthand for Claude 4 Opus with thinking) +# - gemini-2.5-pro-preview-03-25-google-search (1M context, with Google Search) +# - gemini-2.5-pro-preview-05-06 (1M context, latest preview) +# - gemini-2.5-flash-preview-05-20 (1M context, latest flash preview) +# - gemini-2.5-pro (shorthand for gemini-2.5-pro-preview-05-06) +# - gemini-2.5-pro-search (shorthand for gemini-2.5-pro-preview-03-25-google-search) +# - gemini-2.5-flash (shorthand for gemini-2.5-flash-preview-05-20) +# # Examples: # OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini # Only allow mini models (cost control) # GOOGLE_ALLOWED_MODELS=flash # Only allow Flash (fast responses) @@ -77,21 +107,26 @@ DEFAULT_THINKING_MODE_THINKDEEP=high # OPENAI_ALLOWED_MODELS=o4-mini # Single model standardization # GOOGLE_ALLOWED_MODELS=flash,pro # Allow both Gemini models # XAI_ALLOWED_MODELS=grok,grok-3-fast # Allow both GROK variants +# DIAL_ALLOWED_MODELS=o3,o4-mini # Only allow O3/O4 models via DIAL +# DIAL_ALLOWED_MODELS=opus-4,sonnet-4 # Only Claude 4 models (without thinking) +# DIAL_ALLOWED_MODELS=opus-4-thinking,sonnet-4-thinking # Only Claude 4 with thinking mode +# DIAL_ALLOWED_MODELS=gemini-2.5-pro,gemini-2.5-flash # Only Gemini 2.5 models via DIAL # # Note: These restrictions apply even in 'auto' mode - Claude will only pick from allowed models # OPENAI_ALLOWED_MODELS= # GOOGLE_ALLOWED_MODELS= # XAI_ALLOWED_MODELS= +# DIAL_ALLOWED_MODELS= # Optional: Custom model configuration file path # Override the default location of custom_models.json # CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json -# Note: Redis is no longer used - conversations are stored in memory +# Note: Conversations are stored in memory during the session # Optional: Conversation timeout (hours) # How long AI-to-AI conversation threads persist before expiring -# Longer timeouts use more Redis memory but allow resuming conversations later +# Longer timeouts use more memory but allow resuming conversations later # Defaults to 3 hours if not specified CONVERSATION_TIMEOUT_HOURS=3 diff --git a/README.md b/README.md index c540380..40552da 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [zen_web.webm](https://github.com/user-attachments/assets/851e3911-7f06-47c0-a4ab-a2601236697c)
- 🤖 Claude + [Gemini / OpenAI / Grok / OpenRouter / Ollama / Any Model] = Your Ultimate AI Development Team + 🤖 Claude + [Gemini / OpenAI / Grok / OpenRouter / DIAL / Ollama / Any Model] = Your Ultimate AI Development Team

@@ -145,6 +145,7 @@ The final implementation resulted in a 26% improvement in JSON parsing performan - **Gemini**: Visit [Google AI Studio](https://makersuite.google.com/app/apikey) and generate an API key. For best results with Gemini 2.5 Pro, use a paid API key as the free tier has limited access to the latest models. - **OpenAI**: Visit [OpenAI Platform](https://platform.openai.com/api-keys) to get an API key for O3 model access. - **X.AI**: Visit [X.AI Console](https://console.x.ai/) to get an API key for GROK model access. +- **DIAL**: Visit [DIAL Platform](https://dialx.ai/) to get an API key for accessing multiple models through their unified API. DIAL is an open-source AI orchestration platform that provides vendor-agnostic access to models from major providers, open-source community, and self-hosted deployments. [API Documentation](https://dialx.ai/dial_api) **Option C: Custom API Endpoints (Local models like Ollama, vLLM)** [Please see the setup guide](docs/custom_models.md#option-2-custom-api-setup-ollama-vllm-etc). With a custom API you can use: @@ -154,7 +155,7 @@ The final implementation resulted in a 26% improvement in JSON parsing performan - **Text Generation WebUI**: Popular local interface for running models - **Any OpenAI-compatible API**: Custom endpoints for your own infrastructure -> **Note:** Using all three options may create ambiguity about which provider / model to use if there is an overlap. +> **Note:** Using multiple provider options may create ambiguity about which provider / model to use if there is an overlap. > If all APIs are configured, native APIs will take priority when there is a clash in model name, such as for `gemini` and `o3`. > Configure your model aliases and give them unique names in [`conf/custom_models.json`](conf/custom_models.json) @@ -192,6 +193,12 @@ nano .env # GEMINI_API_KEY=your-gemini-api-key-here # For Gemini models # OPENAI_API_KEY=your-openai-api-key-here # For O3 model # OPENROUTER_API_KEY=your-openrouter-key # For OpenRouter (see docs/custom_models.md) +# DIAL_API_KEY=your-dial-api-key-here # For DIAL platform + +# For DIAL (optional configuration): +# DIAL_API_HOST=https://core.dialx.ai # Default DIAL host (optional) +# DIAL_API_VERSION=2024-12-01-preview # API version (optional) +# DIAL_ALLOWED_MODELS=o3,gemini-2.5-pro # Restrict to specific models (optional) # For local models (Ollama, vLLM, etc.): # CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example @@ -537,10 +544,11 @@ Configure the Zen MCP Server through environment variables in your `.env` file. DEFAULT_MODEL=auto GEMINI_API_KEY=your-gemini-key OPENAI_API_KEY=your-openai-key +DIAL_API_KEY=your-dial-key # Optional: Access to multiple models via DIAL ``` **Key Configuration Options:** -- **API Keys**: Native APIs (Gemini, OpenAI, X.AI), OpenRouter, or Custom endpoints (Ollama, vLLM) +- **API Keys**: Native APIs (Gemini, OpenAI, X.AI), OpenRouter, DIAL, or Custom endpoints (Ollama, vLLM) - **Model Selection**: Auto mode or specific model defaults - **Usage Restrictions**: Control which models can be used for cost control - **Conversation Settings**: Timeout, turn limits, memory configuration diff --git a/providers/base.py b/providers/base.py index e0b3882..c8b1ec7 100644 --- a/providers/base.py +++ b/providers/base.py @@ -17,6 +17,7 @@ class ProviderType(Enum): XAI = "xai" OPENROUTER = "openrouter" CUSTOM = "custom" + DIAL = "dial" class TemperatureConstraint(ABC): @@ -326,3 +327,12 @@ class ModelProvider(ABC): Resolved model name """ return model_name + + def close(self): + """Clean up any resources held by the provider. + + Default implementation does nothing. + Subclasses should override if they hold resources that need cleanup. + """ + # Base implementation: no resources to clean up + return diff --git a/providers/dial.py b/providers/dial.py new file mode 100644 index 0000000..617858c --- /dev/null +++ b/providers/dial.py @@ -0,0 +1,525 @@ +"""DIAL (Data & AI Layer) model provider implementation.""" + +import logging +import os +import threading +import time +from typing import Optional + +from .base import ( + ModelCapabilities, + ModelResponse, + ProviderType, + RangeTemperatureConstraint, +) +from .openai_compatible import OpenAICompatibleProvider + +logger = logging.getLogger(__name__) + + +class DIALModelProvider(OpenAICompatibleProvider): + """DIAL provider using OpenAI-compatible API. + + DIAL provides access to various AI models through a unified API interface. + Supports GPT, Claude, Gemini, and other models via DIAL deployments. + """ + + FRIENDLY_NAME = "DIAL" + + # Retry configuration for API calls + MAX_RETRIES = 4 + RETRY_DELAYS = [1, 3, 5, 8] # seconds + + # Supported DIAL models (these can be customized based on your DIAL deployment) + SUPPORTED_MODELS = { + "o3-2025-04-16": { + "context_window": 200_000, + "supports_extended_thinking": False, + "supports_vision": True, + }, + "o4-mini-2025-04-16": { + "context_window": 200_000, + "supports_extended_thinking": False, + "supports_vision": True, + }, + "anthropic.claude-sonnet-4-20250514-v1:0": { + "context_window": 200_000, + "supports_extended_thinking": False, + "supports_vision": True, + }, + "anthropic.claude-sonnet-4-20250514-v1:0-with-thinking": { + "context_window": 200_000, + "supports_extended_thinking": True, # Thinking mode variant + "supports_vision": True, + }, + "anthropic.claude-opus-4-20250514-v1:0": { + "context_window": 200_000, + "supports_extended_thinking": False, + "supports_vision": True, + }, + "anthropic.claude-opus-4-20250514-v1:0-with-thinking": { + "context_window": 200_000, + "supports_extended_thinking": True, # Thinking mode variant + "supports_vision": True, + }, + "gemini-2.5-pro-preview-03-25-google-search": { + "context_window": 1_000_000, + "supports_extended_thinking": False, # DIAL doesn't expose thinking mode + "supports_vision": True, + }, + "gemini-2.5-pro-preview-05-06": { + "context_window": 1_000_000, + "supports_extended_thinking": False, + "supports_vision": True, + }, + "gemini-2.5-flash-preview-05-20": { + "context_window": 1_000_000, + "supports_extended_thinking": False, + "supports_vision": True, + }, + # Shorthands + "o3": "o3-2025-04-16", + "o4-mini": "o4-mini-2025-04-16", + "sonnet-4": "anthropic.claude-sonnet-4-20250514-v1:0", + "sonnet-4-thinking": "anthropic.claude-sonnet-4-20250514-v1:0-with-thinking", + "opus-4": "anthropic.claude-opus-4-20250514-v1:0", + "opus-4-thinking": "anthropic.claude-opus-4-20250514-v1:0-with-thinking", + "gemini-2.5-pro": "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro-search": "gemini-2.5-pro-preview-03-25-google-search", + "gemini-2.5-flash": "gemini-2.5-flash-preview-05-20", + } + + def __init__(self, api_key: str, **kwargs): + """Initialize DIAL provider with API key and host. + + Args: + api_key: DIAL API key for authentication + **kwargs: Additional configuration options + """ + # Get DIAL API host from environment or kwargs + dial_host = kwargs.get("base_url") or os.getenv("DIAL_API_HOST") or "https://core.dialx.ai" + + # DIAL uses /openai endpoint for OpenAI-compatible API + if not dial_host.endswith("/openai"): + dial_host = f"{dial_host.rstrip('/')}/openai" + + kwargs["base_url"] = dial_host + + # Get API version from environment or use default + self.api_version = os.getenv("DIAL_API_VERSION", "2024-12-01-preview") + + # Add DIAL-specific headers + # DIAL uses Api-Key header instead of Authorization: Bearer + # Reference: https://dialx.ai/dial_api#section/Authorization + self.DEFAULT_HEADERS = { + "Api-Key": api_key, + } + + # Store the actual API key for use in Api-Key header + self._dial_api_key = api_key + + # Pass a placeholder API key to OpenAI client - we'll override the auth header in httpx + # The actual authentication happens via the Api-Key header in the httpx client + super().__init__("placeholder-not-used", **kwargs) + + # Cache for deployment-specific clients to avoid recreating them on each request + self._deployment_clients = {} + # Lock to ensure thread-safe client creation + self._client_lock = threading.Lock() + + # Create a SINGLE shared httpx client for the provider instance + import httpx + + # Create custom event hooks to remove Authorization header + def remove_auth_header(request): + """Remove Authorization header that OpenAI client adds.""" + # httpx headers are case-insensitive, so we need to check all variations + headers_to_remove = [] + for header_name in request.headers: + if header_name.lower() == "authorization": + headers_to_remove.append(header_name) + + for header_name in headers_to_remove: + del request.headers[header_name] + + self._http_client = httpx.Client( + timeout=self.timeout_config, + verify=True, + follow_redirects=True, + headers=self.DEFAULT_HEADERS.copy(), # Include DIAL headers including Api-Key + limits=httpx.Limits( + max_keepalive_connections=5, + max_connections=10, + keepalive_expiry=30.0, + ), + event_hooks={"request": [remove_auth_header]}, + ) + + logger.info(f"Initialized DIAL provider with host: {dial_host} and api-version: {self.api_version}") + + def get_capabilities(self, model_name: str) -> ModelCapabilities: + """Get capabilities for a specific model. + + Args: + model_name: Name of the model (can be shorthand) + + Returns: + ModelCapabilities object + + Raises: + ValueError: If model is not supported or not allowed + """ + resolved_name = self._resolve_model_name(model_name) + + if resolved_name not in self.SUPPORTED_MODELS: + raise ValueError(f"Unsupported DIAL model: {model_name}") + + # Check restrictions + from utils.model_restrictions import get_restriction_service + + restriction_service = get_restriction_service() + if not restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model_name): + raise ValueError(f"Model '{model_name}' is not allowed by restriction policy.") + + config = self.SUPPORTED_MODELS[resolved_name] + + return ModelCapabilities( + provider=ProviderType.DIAL, + model_name=resolved_name, + friendly_name=self.FRIENDLY_NAME, + context_window=config["context_window"], + supports_extended_thinking=config["supports_extended_thinking"], + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_images=config.get("supports_vision", False), + temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7), + ) + + def get_provider_type(self) -> ProviderType: + """Get the provider type.""" + return ProviderType.DIAL + + def validate_model_name(self, model_name: str) -> bool: + """Validate if the model name is supported. + + Args: + model_name: Model name to validate + + Returns: + True if model is supported and allowed, False otherwise + """ + resolved_name = self._resolve_model_name(model_name) + + if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict): + return False + + # Check against base class allowed_models if configured + if self.allowed_models is not None: + # Check both original and resolved names (case-insensitive) + if model_name.lower() not in self.allowed_models and resolved_name.lower() not in self.allowed_models: + logger.debug(f"DIAL model '{model_name}' -> '{resolved_name}' not in allowed_models list") + return False + + # Also check restrictions via ModelRestrictionService + from utils.model_restrictions import get_restriction_service + + restriction_service = get_restriction_service() + if not restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model_name): + logger.debug(f"DIAL model '{model_name}' -> '{resolved_name}' blocked by restrictions") + return False + + return True + + def _resolve_model_name(self, model_name: str) -> str: + """Resolve model shorthand to full name. + + Args: + model_name: Model name or shorthand + + Returns: + Full model name + """ + shorthand_value = self.SUPPORTED_MODELS.get(model_name) + if isinstance(shorthand_value, str): + return shorthand_value + return model_name + + def _get_deployment_client(self, deployment: str): + """Get or create a cached client for a specific deployment. + + This avoids recreating OpenAI clients on every request, improving performance. + Reuses the shared HTTP client for connection pooling. + + Args: + deployment: The deployment/model name + + Returns: + OpenAI client configured for the specific deployment + """ + # Check if client already exists without locking for performance + if deployment in self._deployment_clients: + return self._deployment_clients[deployment] + + # Use lock to ensure thread-safe client creation + with self._client_lock: + # Double-check pattern: check again inside the lock + if deployment not in self._deployment_clients: + from openai import OpenAI + + # Build deployment-specific URL + base_url = str(self.client.base_url) + if base_url.endswith("/"): + base_url = base_url[:-1] + + # Remove /openai suffix if present to reconstruct properly + if base_url.endswith("/openai"): + base_url = base_url[:-7] + + deployment_url = f"{base_url}/openai/deployments/{deployment}" + + # Create and cache the client, REUSING the shared http_client + # Use placeholder API key - Authorization header will be removed by http_client event hook + self._deployment_clients[deployment] = OpenAI( + api_key="placeholder-not-used", + base_url=deployment_url, + http_client=self._http_client, # Pass the shared client with Api-Key header + default_query={"api-version": self.api_version}, # Add api-version as query param + ) + + return self._deployment_clients[deployment] + + def generate_content( + self, + prompt: str, + model_name: str, + system_prompt: Optional[str] = None, + temperature: float = 0.7, + max_output_tokens: Optional[int] = None, + images: Optional[list[str]] = None, + **kwargs, + ) -> ModelResponse: + """Generate content using DIAL's deployment-specific endpoint. + + DIAL uses Azure OpenAI-style deployment endpoints: + /openai/deployments/{deployment}/chat/completions + + Args: + prompt: User prompt + model_name: Model name or alias + system_prompt: Optional system prompt + temperature: Sampling temperature + max_output_tokens: Maximum tokens to generate + **kwargs: Additional provider-specific parameters + + Returns: + ModelResponse with generated content and metadata + """ + # Validate model name against allow-list + if not self.validate_model_name(model_name): + raise ValueError(f"Model '{model_name}' not in allowed models list. Allowed models: {self.allowed_models}") + + # Validate parameters + self.validate_parameters(model_name, temperature) + + # Prepare messages + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + # Build user message content + user_message_content = [] + if prompt: + user_message_content.append({"type": "text", "text": prompt}) + + if images and self._supports_vision(model_name): + for img_path in images: + processed_image = self._process_image(img_path) + if processed_image: + user_message_content.append(processed_image) + elif images: + logger.warning(f"Model {model_name} does not support images, ignoring {len(images)} image(s)") + + # Add user message. If only text, content will be a string, otherwise a list. + if len(user_message_content) == 1 and user_message_content[0]["type"] == "text": + messages.append({"role": "user", "content": prompt}) + else: + messages.append({"role": "user", "content": user_message_content}) + + # Resolve model name + resolved_model = self._resolve_model_name(model_name) + + # Build completion parameters + completion_params = { + "model": resolved_model, + "messages": messages, + } + + # Check model capabilities + try: + capabilities = self.get_capabilities(model_name) + supports_temperature = getattr(capabilities, "supports_temperature", True) + except Exception as e: + logger.debug(f"Failed to check temperature support for {model_name}: {e}") + supports_temperature = True + + # Add temperature parameter if supported + if supports_temperature: + completion_params["temperature"] = temperature + + # Add max tokens if specified and model supports it + if max_output_tokens and supports_temperature: + completion_params["max_tokens"] = max_output_tokens + + # Add additional parameters + for key, value in kwargs.items(): + if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]: + if not supports_temperature and key in ["top_p", "frequency_penalty", "presence_penalty"]: + continue + completion_params[key] = value + + # DIAL-specific: Get cached client for deployment endpoint + deployment_client = self._get_deployment_client(resolved_model) + + # Retry logic with progressive delays + last_exception = None + + for attempt in range(self.MAX_RETRIES): + try: + # Generate completion using deployment-specific client + response = deployment_client.chat.completions.create(**completion_params) + + # Extract content and usage + content = response.choices[0].message.content + usage = self._extract_usage(response) + + return ModelResponse( + content=content, + usage=usage, + model_name=model_name, + friendly_name=self.FRIENDLY_NAME, + provider=self.get_provider_type(), + metadata={ + "finish_reason": response.choices[0].finish_reason, + "model": response.model, + "id": response.id, + "created": response.created, + }, + ) + + except Exception as e: + last_exception = e + + # Check if this is a retryable error + is_retryable = self._is_error_retryable(e) + + if not is_retryable: + # Non-retryable error, raise immediately + raise ValueError(f"DIAL API error for model {model_name}: {str(e)}") + + # If this isn't the last attempt and error is retryable, wait and retry + if attempt < self.MAX_RETRIES - 1: + delay = self.RETRY_DELAYS[attempt] + logger.info( + f"DIAL API error (attempt {attempt + 1}/{self.MAX_RETRIES}), " f"retrying in {delay}s: {str(e)}" + ) + time.sleep(delay) + continue + + # All retries exhausted + raise ValueError( + f"DIAL API error for model {model_name} after {self.MAX_RETRIES} attempts: {str(last_exception)}" + ) + + def _supports_vision(self, model_name: str) -> bool: + """Check if the model supports vision (image processing). + + Args: + model_name: Model name to check + + Returns: + True if model supports vision, False otherwise + """ + resolved_name = self._resolve_model_name(model_name) + + if resolved_name in self.SUPPORTED_MODELS and isinstance(self.SUPPORTED_MODELS[resolved_name], dict): + return self.SUPPORTED_MODELS[resolved_name].get("supports_vision", False) + + # Fall back to parent implementation for unknown models + return super()._supports_vision(model_name) + + def list_models(self, respect_restrictions: bool = True) -> list[str]: + """Return a list of model names supported by this provider. + + Args: + respect_restrictions: Whether to apply provider-specific restriction logic. + + Returns: + List of model names available from this provider + """ + # Get all model keys (both full names and aliases) + all_models = list(self.SUPPORTED_MODELS.keys()) + + if not respect_restrictions: + return all_models + + # Apply restrictions if configured + from utils.model_restrictions import get_restriction_service + + restriction_service = get_restriction_service() + + # Filter based on restrictions + allowed_models = [] + for model in all_models: + resolved_name = self._resolve_model_name(model) + if restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model): + allowed_models.append(model) + + return allowed_models + + def list_all_known_models(self) -> list[str]: + """Return all model names known by this provider, including alias targets. + + This is used for validation purposes to ensure restriction policies + can validate against both aliases and their target model names. + + Returns: + List of all model names and alias targets known by this provider + """ + # Collect all unique model names (both aliases and targets) + all_models = set() + + for key, value in self.SUPPORTED_MODELS.items(): + # Add the key (could be alias or full name) + all_models.add(key) + + # If it's an alias (string value), add the target too + if isinstance(value, str): + all_models.add(value) + + return sorted(all_models) + + def close(self): + """Clean up HTTP clients when provider is closed.""" + logger.info("Closing DIAL provider HTTP clients...") + + # Clear the deployment clients cache + # Note: We don't need to close individual OpenAI clients since they + # use the shared httpx.Client which we close separately + self._deployment_clients.clear() + + # Close the shared HTTP client + if hasattr(self, "_http_client"): + try: + self._http_client.close() + logger.debug("Closed shared HTTP client") + except Exception as e: + logger.warning(f"Error closing shared HTTP client: {e}") + + # Also close the client created by the superclass (OpenAICompatibleProvider) + # as it holds its own httpx.Client instance that is not used by DIAL's generate_content + if hasattr(self, "client") and self.client and hasattr(self.client, "close"): + try: + self.client.close() + logger.debug("Closed superclass's OpenAI client") + except Exception as e: + logger.warning(f"Error closing superclass's OpenAI client: {e}") diff --git a/providers/registry.py b/providers/registry.py index a5efcf0..baa9222 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -118,6 +118,7 @@ class ModelProviderRegistry: ProviderType.GOOGLE, # Direct Gemini access ProviderType.OPENAI, # Direct OpenAI access ProviderType.XAI, # Direct X.AI GROK access + ProviderType.DIAL, # DIAL unified API access ProviderType.CUSTOM, # Local/self-hosted models ProviderType.OPENROUTER, # Catch-all for cloud models ] @@ -237,6 +238,7 @@ class ModelProviderRegistry: ProviderType.XAI: "XAI_API_KEY", ProviderType.OPENROUTER: "OPENROUTER_API_KEY", ProviderType.CUSTOM: "CUSTOM_API_KEY", # Can be empty for providers that don't need auth + ProviderType.DIAL: "DIAL_API_KEY", } env_var = key_mapping.get(provider_type) diff --git a/run-server.sh b/run-server.sh index d2d0ebe..243f0e0 100755 --- a/run-server.sh +++ b/run-server.sh @@ -883,6 +883,7 @@ setup_env_file() { "GEMINI_API_KEY:your_gemini_api_key_here" "OPENAI_API_KEY:your_openai_api_key_here" "XAI_API_KEY:your_xai_api_key_here" + "DIAL_API_KEY:your_dial_api_key_here" "OPENROUTER_API_KEY:your_openrouter_api_key_here" ) @@ -934,6 +935,7 @@ validate_api_keys() { "GEMINI_API_KEY:your_gemini_api_key_here" "OPENAI_API_KEY:your_openai_api_key_here" "XAI_API_KEY:your_xai_api_key_here" + "DIAL_API_KEY:your_dial_api_key_here" "OPENROUTER_API_KEY:your_openrouter_api_key_here" ) @@ -961,6 +963,7 @@ validate_api_keys() { echo " GEMINI_API_KEY=your-actual-key" >&2 echo " OPENAI_API_KEY=your-actual-key" >&2 echo " XAI_API_KEY=your-actual-key" >&2 + echo " DIAL_API_KEY=your-actual-key" >&2 echo " OPENROUTER_API_KEY=your-actual-key" >&2 echo "" >&2 print_info "After adding your API keys, run ./run-server.sh again" >&2 diff --git a/server.py b/server.py index 1b0f969..19904fb 100644 --- a/server.py +++ b/server.py @@ -19,6 +19,7 @@ as defined by the MCP protocol. """ import asyncio +import atexit import logging import os import sys @@ -271,6 +272,7 @@ def configure_providers(): from providers import ModelProviderRegistry from providers.base import ProviderType from providers.custom import CustomProvider + from providers.dial import DIALModelProvider from providers.gemini import GeminiModelProvider from providers.openai_provider import OpenAIModelProvider from providers.openrouter import OpenRouterProvider @@ -303,6 +305,13 @@ def configure_providers(): has_native_apis = True logger.info("X.AI API key found - GROK models available") + # Check for DIAL API key + dial_key = os.getenv("DIAL_API_KEY") + if dial_key and dial_key != "your_dial_api_key_here": + valid_providers.append("DIAL") + has_native_apis = True + logger.info("DIAL API key found - DIAL models available") + # Check for OpenRouter API key openrouter_key = os.getenv("OPENROUTER_API_KEY") if openrouter_key and openrouter_key != "your_openrouter_api_key_here": @@ -336,6 +345,8 @@ def configure_providers(): ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) if xai_key and xai_key != "your_xai_api_key_here": ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider) + if dial_key and dial_key != "your_dial_api_key_here": + ModelProviderRegistry.register_provider(ProviderType.DIAL, DIALModelProvider) # 2. Custom provider second (for local/private models) if has_custom: @@ -358,6 +369,7 @@ def configure_providers(): "- GEMINI_API_KEY for Gemini models\n" "- OPENAI_API_KEY for OpenAI o3 model\n" "- XAI_API_KEY for X.AI GROK models\n" + "- DIAL_API_KEY for DIAL models\n" "- OPENROUTER_API_KEY for OpenRouter (multiple models)\n" "- CUSTOM_API_URL for local models (Ollama, vLLM, etc.)" ) @@ -376,6 +388,25 @@ def configure_providers(): if len(priority_info) > 1: logger.info(f"Provider priority: {' → '.join(priority_info)}") + # Register cleanup function for providers + def cleanup_providers(): + """Clean up all registered providers on shutdown.""" + try: + registry = ModelProviderRegistry() + if hasattr(registry, "_initialized_providers"): + for provider in list(registry._initialized_providers.items()): + try: + if provider and hasattr(provider, "close"): + provider.close() + except Exception: + # Logger might be closed during shutdown + pass + except Exception: + # Silently ignore any errors during cleanup + pass + + atexit.register(cleanup_providers) + # Check and log model restrictions restriction_service = get_restriction_service() restrictions = restriction_service.get_restriction_summary() @@ -390,7 +421,8 @@ def configure_providers(): # Validate restrictions against known models provider_instances = {} - for provider_type in [ProviderType.GOOGLE, ProviderType.OPENAI]: + provider_types_to_validate = [ProviderType.GOOGLE, ProviderType.OPENAI, ProviderType.XAI, ProviderType.DIAL] + for provider_type in provider_types_to_validate: provider = ModelProviderRegistry.get_provider(provider_type) if provider: provider_instances[provider_type] = provider diff --git a/tests/test_dial_provider.py b/tests/test_dial_provider.py new file mode 100644 index 0000000..4a22cb6 --- /dev/null +++ b/tests/test_dial_provider.py @@ -0,0 +1,273 @@ +"""Tests for DIAL provider implementation.""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from providers.base import ProviderType +from providers.dial import DIALModelProvider + + +class TestDIALProvider: + """Test DIAL provider functionality.""" + + @patch.dict(os.environ, {"DIAL_API_KEY": "test-key", "DIAL_API_HOST": "https://test.dialx.ai"}) + def test_initialization_with_host(self): + """Test provider initialization with custom host.""" + provider = DIALModelProvider("test-key") + assert provider._dial_api_key == "test-key" # Check internal API key storage + assert provider.api_key == "placeholder-not-used" # OpenAI client uses placeholder, auth header removed by hook + assert provider.base_url == "https://test.dialx.ai/openai" + assert provider.get_provider_type() == ProviderType.DIAL + + @patch.dict(os.environ, {"DIAL_API_KEY": "test-key", "DIAL_API_HOST": ""}, clear=True) + def test_initialization_default_host(self): + """Test provider initialization with default host.""" + provider = DIALModelProvider("test-key") + assert provider._dial_api_key == "test-key" # Check internal API key storage + assert provider.api_key == "placeholder-not-used" # OpenAI client uses placeholder, auth header removed by hook + assert provider.base_url == "https://core.dialx.ai/openai" + + def test_initialization_host_normalization(self): + """Test that host URL is normalized to include /openai suffix.""" + # Test with host missing /openai + provider = DIALModelProvider("test-key", base_url="https://custom.dialx.ai") + assert provider.base_url == "https://custom.dialx.ai/openai" + + # Test with host already having /openai + provider = DIALModelProvider("test-key", base_url="https://custom.dialx.ai/openai") + assert provider.base_url == "https://custom.dialx.ai/openai" + + @patch.dict(os.environ, {"DIAL_ALLOWED_MODELS": ""}, clear=False) + @patch("utils.model_restrictions._restriction_service", None) + def test_model_validation(self): + """Test model name validation.""" + provider = DIALModelProvider("test-key") + + # Test valid models + assert provider.validate_model_name("o3-2025-04-16") is True + assert provider.validate_model_name("o3") is True # Shorthand + assert provider.validate_model_name("anthropic.claude-opus-4-20250514-v1:0") is True + assert provider.validate_model_name("opus-4") is True # Shorthand + assert provider.validate_model_name("gemini-2.5-pro-preview-05-06") is True + assert provider.validate_model_name("gemini-2.5-pro") is True # Shorthand + + # Test invalid model + assert provider.validate_model_name("invalid-model") is False + + def test_resolve_model_name(self): + """Test model name resolution for shorthands.""" + provider = DIALModelProvider("test-key") + + # Test shorthand resolution + assert provider._resolve_model_name("o3") == "o3-2025-04-16" + assert provider._resolve_model_name("o4-mini") == "o4-mini-2025-04-16" + assert provider._resolve_model_name("opus-4") == "anthropic.claude-opus-4-20250514-v1:0" + assert provider._resolve_model_name("sonnet-4") == "anthropic.claude-sonnet-4-20250514-v1:0" + assert provider._resolve_model_name("gemini-2.5-pro") == "gemini-2.5-pro-preview-05-06" + assert provider._resolve_model_name("gemini-2.5-flash") == "gemini-2.5-flash-preview-05-20" + + # Test full name passthrough + assert provider._resolve_model_name("o3-2025-04-16") == "o3-2025-04-16" + assert ( + provider._resolve_model_name("anthropic.claude-opus-4-20250514-v1:0") + == "anthropic.claude-opus-4-20250514-v1:0" + ) + + @patch.dict(os.environ, {"DIAL_ALLOWED_MODELS": ""}, clear=False) + @patch("utils.model_restrictions._restriction_service", None) + def test_get_capabilities(self): + """Test getting model capabilities.""" + provider = DIALModelProvider("test-key") + + # Test O3 capabilities + capabilities = provider.get_capabilities("o3") + assert capabilities.model_name == "o3-2025-04-16" + assert capabilities.friendly_name == "DIAL" + assert capabilities.context_window == 200_000 + assert capabilities.provider == ProviderType.DIAL + assert capabilities.supports_images is True + assert capabilities.supports_extended_thinking is False + + # Test Claude 4 capabilities + capabilities = provider.get_capabilities("opus-4") + assert capabilities.model_name == "anthropic.claude-opus-4-20250514-v1:0" + assert capabilities.context_window == 200_000 + assert capabilities.supports_images is True + assert capabilities.supports_extended_thinking is False + + # Test Claude 4 with thinking mode + capabilities = provider.get_capabilities("opus-4-thinking") + assert capabilities.model_name == "anthropic.claude-opus-4-20250514-v1:0-with-thinking" + assert capabilities.context_window == 200_000 + assert capabilities.supports_images is True + assert capabilities.supports_extended_thinking is True + + # Test Gemini capabilities + capabilities = provider.get_capabilities("gemini-2.5-pro") + assert capabilities.model_name == "gemini-2.5-pro-preview-05-06" + assert capabilities.context_window == 1_000_000 + assert capabilities.supports_images is True + + # Test temperature constraint + assert capabilities.temperature_constraint.min_temp == 0.0 + assert capabilities.temperature_constraint.max_temp == 2.0 + assert capabilities.temperature_constraint.default_temp == 0.7 + + @patch.dict(os.environ, {"DIAL_ALLOWED_MODELS": ""}, clear=False) + @patch("utils.model_restrictions._restriction_service", None) + def test_get_capabilities_invalid_model(self): + """Test that get_capabilities raises for invalid models.""" + provider = DIALModelProvider("test-key") + + with pytest.raises(ValueError, match="Unsupported DIAL model"): + provider.get_capabilities("invalid-model") + + @patch("utils.model_restrictions.get_restriction_service") + def test_get_capabilities_restricted_model(self, mock_get_restriction): + """Test that get_capabilities respects model restrictions.""" + provider = DIALModelProvider("test-key") + + # Mock restriction service to block the model + mock_service = MagicMock() + mock_service.is_allowed.return_value = False + mock_get_restriction.return_value = mock_service + + with pytest.raises(ValueError, match="not allowed by restriction policy"): + provider.get_capabilities("o3") + + @patch.dict(os.environ, {"DIAL_ALLOWED_MODELS": ""}, clear=False) + @patch("utils.model_restrictions._restriction_service", None) + def test_supports_vision(self): + """Test vision support detection.""" + provider = DIALModelProvider("test-key") + + # Test models with vision support + assert provider._supports_vision("o3-2025-04-16") is True + assert provider._supports_vision("o3") is True # Via resolution + assert provider._supports_vision("anthropic.claude-opus-4-20250514-v1:0") is True + assert provider._supports_vision("gemini-2.5-pro-preview-05-06") is True + + # Test unknown model (falls back to parent implementation) + assert provider._supports_vision("unknown-model") is False + + @patch("openai.OpenAI") # Mock the OpenAI class directly from openai module + def test_generate_content_with_alias(self, mock_openai_class): + """Test that generate_content properly resolves aliases and uses deployment routing.""" + # Create mock client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock(message=MagicMock(content="Test response"))] + mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) + mock_response.model = "gpt-4" + mock_response.id = "test-id" + mock_response.created = 1234567890 + mock_response.choices[0].finish_reason = "stop" + + mock_client.chat.completions.create.return_value = mock_response + mock_openai_class.return_value = mock_client + + provider = DIALModelProvider("test-key") + + # Generate content with shorthand + response = provider.generate_content(prompt="Test prompt", model_name="o3", temperature=0.7) # Shorthand + + # Verify OpenAI was instantiated with deployment-specific URL + mock_openai_class.assert_called_once() + call_args = mock_openai_class.call_args + assert "/deployments/o3-2025-04-16" in call_args[1]["base_url"] + + # Verify the resolved model name was passed to the API + mock_client.chat.completions.create.assert_called_once() + create_call_args = mock_client.chat.completions.create.call_args + assert create_call_args[1]["model"] == "o3-2025-04-16" # Resolved name + + # Verify response + assert response.content == "Test response" + assert response.model_name == "o3" # Original name preserved + assert response.metadata["model"] == "gpt-4" # API returned model name from mock + + def test_provider_type(self): + """Test provider type identification.""" + provider = DIALModelProvider("test-key") + assert provider.get_provider_type() == ProviderType.DIAL + + def test_friendly_name(self): + """Test provider friendly name.""" + provider = DIALModelProvider("test-key") + assert provider.FRIENDLY_NAME == "DIAL" + + @patch.dict(os.environ, {"DIAL_API_VERSION": "2024-12-01"}) + def test_configurable_api_version(self): + """Test that API version can be configured via environment variable.""" + provider = DIALModelProvider("test-key") + # Check that the custom API version is stored + assert provider.api_version == "2024-12-01" + + def test_default_api_version(self): + """Test that default API version is used when not configured.""" + # Clear any existing DIAL_API_VERSION from environment + with patch.dict(os.environ, {}, clear=True): + # Keep other env vars but ensure DIAL_API_VERSION is not set + if "DIAL_API_VERSION" in os.environ: + del os.environ["DIAL_API_VERSION"] + + provider = DIALModelProvider("test-key") + # Check that the default API version is used + assert provider.api_version == "2024-12-01-preview" + # Check that Api-Key header is set + assert provider.DEFAULT_HEADERS["Api-Key"] == "test-key" + + @patch.dict(os.environ, {"DIAL_ALLOWED_MODELS": "o3-2025-04-16,anthropic.claude-opus-4-20250514-v1:0"}) + @patch("utils.model_restrictions._restriction_service", None) + def test_allowed_models_restriction(self): + """Test model allow-list functionality.""" + provider = DIALModelProvider("test-key") + + # These should be allowed + assert provider.validate_model_name("o3-2025-04-16") is True + assert provider.validate_model_name("o3") is True # Alias for o3-2025-04-16 + assert provider.validate_model_name("anthropic.claude-opus-4-20250514-v1:0") is True + assert provider.validate_model_name("opus-4") is True # Resolves to anthropic.claude-opus-4-20250514-v1:0 + + # These should be blocked + assert provider.validate_model_name("gemini-2.5-pro-preview-05-06") is False + assert provider.validate_model_name("o4-mini-2025-04-16") is False + assert provider.validate_model_name("sonnet-4") is False # sonnet-4 is not in allowed list + + @patch("httpx.Client") + @patch("openai.OpenAI") + def test_close_method(self, mock_openai_class, mock_httpx_client_class): + """Test that the close method properly closes HTTP clients.""" + # Mock the httpx.Client instance that DIALModelProvider will create + mock_shared_http_client = MagicMock() + mock_httpx_client_class.return_value = mock_shared_http_client + + # Mock the OpenAI client instances + mock_openai_client_1 = MagicMock() + mock_openai_client_2 = MagicMock() + # Configure side_effect to return different mocks for subsequent calls + mock_openai_class.side_effect = [mock_openai_client_1, mock_openai_client_2] + + provider = DIALModelProvider("test-key") + + # Mock the superclass's _client attribute directly + mock_superclass_client = MagicMock() + provider._client = mock_superclass_client + + # Simulate getting clients for two different deployments to populate _deployment_clients + provider._get_deployment_client("model_a") + provider._get_deployment_client("model_b") + + # Now call close + provider.close() + + # Assert that the shared httpx client's close method was called + mock_shared_http_client.close.assert_called_once() + + # Assert that the superclass client's close method was called + mock_superclass_client.close.assert_called_once() + + # Assert that the deployment clients cache is cleared + assert not provider._deployment_clients diff --git a/tools/listmodels.py b/tools/listmodels.py index 6a623b9..265fbcc 100644 --- a/tools/listmodels.py +++ b/tools/listmodels.py @@ -84,6 +84,7 @@ class ListModelsTool(BaseTool): ProviderType.GOOGLE: {"name": "Google Gemini", "env_key": "GEMINI_API_KEY"}, ProviderType.OPENAI: {"name": "OpenAI", "env_key": "OPENAI_API_KEY"}, ProviderType.XAI: {"name": "X.AI (Grok)", "env_key": "XAI_API_KEY"}, + ProviderType.DIAL: {"name": "AI DIAL", "env_key": "DIAL_API_KEY"}, } # Check each native provider type diff --git a/utils/model_restrictions.py b/utils/model_restrictions.py index 0b7ff25..834c0a2 100644 --- a/utils/model_restrictions.py +++ b/utils/model_restrictions.py @@ -11,6 +11,7 @@ Environment Variables: - GOOGLE_ALLOWED_MODELS: Comma-separated list of allowed Gemini models - XAI_ALLOWED_MODELS: Comma-separated list of allowed X.AI GROK models - OPENROUTER_ALLOWED_MODELS: Comma-separated list of allowed OpenRouter models +- DIAL_ALLOWED_MODELS: Comma-separated list of allowed DIAL models Example: OPENAI_ALLOWED_MODELS=o3-mini,o4-mini @@ -44,6 +45,7 @@ class ModelRestrictionService: ProviderType.GOOGLE: "GOOGLE_ALLOWED_MODELS", ProviderType.XAI: "XAI_ALLOWED_MODELS", ProviderType.OPENROUTER: "OPENROUTER_ALLOWED_MODELS", + ProviderType.DIAL: "DIAL_ALLOWED_MODELS", } def __init__(self): From b4852c825f6a56d077a93fef37efdf49705bd952 Mon Sep 17 00:00:00 2001 From: Fahad Date: Mon, 23 Jun 2025 14:26:39 +0400 Subject: [PATCH 9/9] Support for DIAL Claude-3 models updated to Claude 4 --- config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index bd330eb..c824e29 100644 --- a/config.py +++ b/config.py @@ -14,9 +14,9 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "5.6.0" +__version__ = "5.6.1" # Last update date in ISO format -__updated__ = "2025-06-22" +__updated__ = "2025-06-23" # Primary maintainer __author__ = "Fahad Gilani"