Merge branch 'feat-local_support_with_UTF-8_encoding-update' of https://github.com/GiGiDKR/zen-mcp-server into feat-local_support_with_UTF-8_encoding-update

This commit is contained in:
OhMyApps
2025-06-23 22:24:47 +02:00
57 changed files with 1589 additions and 863 deletions

View File

@@ -132,6 +132,7 @@ class ModelCapabilities:
model_name: str
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
context_window: int # Total context window size in tokens
max_output_tokens: int # Maximum output tokens per request
supports_extended_thinking: bool = False
supports_system_prompts: bool = True
supports_streaming: bool = True
@@ -140,6 +141,19 @@ class ModelCapabilities:
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
# Additional fields for comprehensive model information
description: str = "" # Human-readable description of the model
aliases: list[str] = field(default_factory=list) # Alternative names/shortcuts for the model
# JSON mode support (for providers that support structured output)
supports_json_mode: bool = False
# Thinking mode support (for models with thinking capabilities)
max_thinking_tokens: int = 0 # Maximum thinking tokens for extended reasoning models
# Custom model flag (for models that only work with custom endpoints)
is_custom: bool = False # Whether this model requires custom API endpoints
# Temperature constraint object - preferred way to define temperature limits
temperature_constraint: TemperatureConstraint = field(
default_factory=lambda: RangeTemperatureConstraint(0.0, 2.0, 0.7)
@@ -251,7 +265,7 @@ class ModelProvider(ABC):
capabilities = self.get_capabilities(model_name)
# Check if model supports temperature at all
if hasattr(capabilities, "supports_temperature") and not capabilities.supports_temperature:
if not capabilities.supports_temperature:
return None
# Get temperature range
@@ -290,19 +304,109 @@ class ModelProvider(ABC):
"""Check if the model supports extended thinking mode."""
pass
@abstractmethod
def get_model_configurations(self) -> dict[str, ModelCapabilities]:
"""Get model configurations for this provider.
This is a hook method that subclasses can override to provide
their model configurations from different sources.
Returns:
Dictionary mapping model names to their ModelCapabilities objects
"""
# Return SUPPORTED_MODELS if it exists (must contain ModelCapabilities objects)
if hasattr(self, "SUPPORTED_MODELS"):
return {k: v for k, v in self.SUPPORTED_MODELS.items() if isinstance(v, ModelCapabilities)}
return {}
def get_all_model_aliases(self) -> dict[str, list[str]]:
"""Get all model aliases for this provider.
This is a hook method that subclasses can override to provide
aliases from different sources.
Returns:
Dictionary mapping model names to their list of aliases
"""
# Default implementation extracts from ModelCapabilities objects
aliases = {}
for model_name, capabilities in self.get_model_configurations().items():
if capabilities.aliases:
aliases[model_name] = capabilities.aliases
return aliases
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name.
This implementation uses the hook methods to support different
model configuration sources.
Args:
model_name: Model name that may be an alias
Returns:
Resolved model name
"""
# Get model configurations from the hook method
model_configs = self.get_model_configurations()
# First check if it's already a base model name (case-sensitive exact match)
if model_name in model_configs:
return model_name
# Check case-insensitively for both base models and aliases
model_name_lower = model_name.lower()
# Check base model names case-insensitively
for base_model in model_configs:
if base_model.lower() == model_name_lower:
return base_model
# Check aliases from the hook method
all_aliases = self.get_all_model_aliases()
for base_model, aliases in all_aliases.items():
if any(alias.lower() == model_name_lower for alias in aliases):
return base_model
# If not found, return as-is
return model_name
def list_models(self, respect_restrictions: bool = True) -> list[str]:
"""Return a list of model names supported by this provider.
This implementation uses the get_model_configurations() hook
to support different model configuration sources.
Args:
respect_restrictions: Whether to apply provider-specific restriction logic.
Returns:
List of model names available from this provider
"""
pass
from utils.model_restrictions import get_restriction_service
restriction_service = get_restriction_service() if respect_restrictions else None
models = []
# Get model configurations from the hook method
model_configs = self.get_model_configurations()
for model_name in model_configs:
# Check restrictions if enabled
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
continue
# Add the base model
models.append(model_name)
# Get aliases from the hook method
all_aliases = self.get_all_model_aliases()
for model_name, aliases in all_aliases.items():
# Only add aliases for models that passed restriction check
if model_name in models:
models.extend(aliases)
return models
@abstractmethod
def list_all_known_models(self) -> list[str]:
"""Return all model names known by this provider, including alias targets.
@@ -312,21 +416,22 @@ class ModelProvider(ABC):
Returns:
List of all model names and alias targets known by this provider
"""
pass
all_models = set()
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name.
# Get model configurations from the hook method
model_configs = self.get_model_configurations()
Base implementation returns the model name unchanged.
Subclasses should override to provide alias resolution.
# Add all base model names
for model_name in model_configs:
all_models.add(model_name.lower())
Args:
model_name: Model name that may be an alias
# Get aliases from the hook method and add them
all_aliases = self.get_all_model_aliases()
for _model_name, aliases in all_aliases.items():
for alias in aliases:
all_models.add(alias.lower())
Returns:
Resolved model name
"""
return model_name
return list(all_models)
def close(self):
"""Clean up any resources held by the provider.

View File

@@ -158,6 +158,7 @@ class CustomProvider(OpenAICompatibleProvider):
model_name=resolved_name,
friendly_name=f"{self.FRIENDLY_NAME} ({resolved_name})",
context_window=32_768, # Conservative default
max_output_tokens=32_768, # Conservative default max output
supports_extended_thinking=False, # Most custom models don't support this
supports_system_prompts=True,
supports_streaming=True,
@@ -187,7 +188,7 @@ class CustomProvider(OpenAICompatibleProvider):
Returns:
True if model is intended for custom/local endpoint
"""
logging.debug(f"Custom provider validating model: '{model_name}'")
# logging.debug(f"Custom provider validating model: '{model_name}'")
# Try to resolve through registry first
config = self._registry.resolve(model_name)
@@ -195,12 +196,12 @@ class CustomProvider(OpenAICompatibleProvider):
model_id = config.model_name
# Use explicit is_custom flag for clean validation
if config.is_custom:
logging.debug(f"Model '{model_name}' -> '{model_id}' validated via registry (custom model)")
logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' validated via registry")
return True
else:
# This is a cloud/OpenRouter model - CustomProvider should NOT handle these
# Let OpenRouter provider handle them instead
logging.debug(f"Model '{model_name}' -> '{model_id}' rejected (cloud model, defer to OpenRouter)")
# logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' not custom (defer to OpenRouter)")
return False
# Handle version tags for unknown models (e.g., "my-model:latest")
@@ -268,65 +269,50 @@ class CustomProvider(OpenAICompatibleProvider):
def supports_thinking_mode(self, model_name: str) -> bool:
"""Check if the model supports extended thinking mode.
Most custom/local models don't support extended thinking.
Args:
model_name: Model to check
Returns:
False (custom models generally don't support thinking mode)
True if model supports thinking mode, False otherwise
"""
# Check if model is in registry
config = self._registry.resolve(model_name) if self._registry else None
if config and config.is_custom:
# Trust the config from custom_models.json
return config.supports_extended_thinking
# Default to False for unknown models
return False
def list_models(self, respect_restrictions: bool = True) -> list[str]:
"""Return a list of model names supported by this provider.
def get_model_configurations(self) -> dict[str, ModelCapabilities]:
"""Get model configurations from the registry.
Args:
respect_restrictions: Whether to apply provider-specific restriction logic.
For CustomProvider, we convert registry configurations to ModelCapabilities objects.
Returns:
List of model names available from this provider
Dictionary mapping model names to their ModelCapabilities objects
"""
from utils.model_restrictions import get_restriction_service
restriction_service = get_restriction_service() if respect_restrictions else None
models = []
configs = {}
if self._registry:
# Get all models from the registry
all_models = self._registry.list_models()
aliases = self._registry.list_aliases()
# Add models that are validated by the custom provider
for model_name in all_models + aliases:
# Use the provider's validation logic to determine if this model
# is appropriate for the custom endpoint
# Get all models from registry
for model_name in self._registry.list_models():
# Only include custom models that this provider validates
if self.validate_model_name(model_name):
# Check restrictions if enabled
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
continue
config = self._registry.resolve(model_name)
if config and config.is_custom:
# Use ModelCapabilities directly from registry
configs[model_name] = config
models.append(model_name)
return configs
return models
def list_all_known_models(self) -> list[str]:
"""Return all model names known by this provider, including alias targets.
def get_all_model_aliases(self) -> dict[str, list[str]]:
"""Get all model aliases from the registry.
Returns:
List of all model names and alias targets known by this provider
Dictionary mapping model names to their list of aliases
"""
all_models = set()
if self._registry:
# Get all models and aliases from the registry
all_models.update(model.lower() for model in self._registry.list_models())
all_models.update(alias.lower() for alias in self._registry.list_aliases())
# For each alias, also add its target
for alias in self._registry.list_aliases():
config = self._registry.resolve(alias)
if config:
all_models.add(config.model_name.lower())
return list(all_models)
# Since aliases are now included in the configurations,
# we can use the base class implementation
return super().get_all_model_aliases()

View File

@@ -10,7 +10,7 @@ from .base import (
ModelCapabilities,
ModelResponse,
ProviderType,
RangeTemperatureConstraint,
create_temperature_constraint,
)
from .openai_compatible import OpenAICompatibleProvider
@@ -30,63 +30,170 @@ class DIALModelProvider(OpenAICompatibleProvider):
MAX_RETRIES = 4
RETRY_DELAYS = [1, 3, 5, 8] # seconds
# Supported DIAL models (these can be customized based on your DIAL deployment)
# Model configurations using ModelCapabilities objects
SUPPORTED_MODELS = {
"o3-2025-04-16": {
"context_window": 200_000,
"supports_extended_thinking": False,
"supports_vision": True,
},
"o4-mini-2025-04-16": {
"context_window": 200_000,
"supports_extended_thinking": False,
"supports_vision": True,
},
"anthropic.claude-sonnet-4-20250514-v1:0": {
"context_window": 200_000,
"supports_extended_thinking": False,
"supports_vision": True,
},
"anthropic.claude-sonnet-4-20250514-v1:0-with-thinking": {
"context_window": 200_000,
"supports_extended_thinking": True, # Thinking mode variant
"supports_vision": True,
},
"anthropic.claude-opus-4-20250514-v1:0": {
"context_window": 200_000,
"supports_extended_thinking": False,
"supports_vision": True,
},
"anthropic.claude-opus-4-20250514-v1:0-with-thinking": {
"context_window": 200_000,
"supports_extended_thinking": True, # Thinking mode variant
"supports_vision": True,
},
"gemini-2.5-pro-preview-03-25-google-search": {
"context_window": 1_000_000,
"supports_extended_thinking": False, # DIAL doesn't expose thinking mode
"supports_vision": True,
},
"gemini-2.5-pro-preview-05-06": {
"context_window": 1_000_000,
"supports_extended_thinking": False,
"supports_vision": True,
},
"gemini-2.5-flash-preview-05-20": {
"context_window": 1_000_000,
"supports_extended_thinking": False,
"supports_vision": True,
},
# Shorthands
"o3": "o3-2025-04-16",
"o4-mini": "o4-mini-2025-04-16",
"sonnet-4": "anthropic.claude-sonnet-4-20250514-v1:0",
"sonnet-4-thinking": "anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
"opus-4": "anthropic.claude-opus-4-20250514-v1:0",
"opus-4-thinking": "anthropic.claude-opus-4-20250514-v1:0-with-thinking",
"gemini-2.5-pro": "gemini-2.5-pro-preview-05-06",
"gemini-2.5-pro-search": "gemini-2.5-pro-preview-03-25-google-search",
"gemini-2.5-flash": "gemini-2.5-flash-preview-05-20",
"o3-2025-04-16": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="o3-2025-04-16",
friendly_name="DIAL (O3)",
context_window=200_000,
max_output_tokens=100_000,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # DIAL may not expose function calling
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=False, # O3 models don't accept temperature
temperature_constraint=create_temperature_constraint("fixed"),
description="OpenAI O3 via DIAL - Strong reasoning model",
aliases=["o3"],
),
"o4-mini-2025-04-16": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="o4-mini-2025-04-16",
friendly_name="DIAL (O4-mini)",
context_window=200_000,
max_output_tokens=100_000,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # DIAL may not expose function calling
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=False, # O4 models don't accept temperature
temperature_constraint=create_temperature_constraint("fixed"),
description="OpenAI O4-mini via DIAL - Fast reasoning model",
aliases=["o4-mini"],
),
"anthropic.claude-sonnet-4-20250514-v1:0": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="anthropic.claude-sonnet-4-20250514-v1:0",
friendly_name="DIAL (Sonnet 4)",
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # Claude doesn't have function calling
supports_json_mode=False, # Claude doesn't have JSON mode
supports_images=True,
max_image_size_mb=5.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Claude Sonnet 4 via DIAL - Balanced performance",
aliases=["sonnet-4"],
),
"anthropic.claude-sonnet-4-20250514-v1:0-with-thinking": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
friendly_name="DIAL (Sonnet 4 Thinking)",
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=True, # Thinking mode variant
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # Claude doesn't have function calling
supports_json_mode=False, # Claude doesn't have JSON mode
supports_images=True,
max_image_size_mb=5.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Claude Sonnet 4 with thinking mode via DIAL",
aliases=["sonnet-4-thinking"],
),
"anthropic.claude-opus-4-20250514-v1:0": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="anthropic.claude-opus-4-20250514-v1:0",
friendly_name="DIAL (Opus 4)",
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # Claude doesn't have function calling
supports_json_mode=False, # Claude doesn't have JSON mode
supports_images=True,
max_image_size_mb=5.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Claude Opus 4 via DIAL - Most capable Claude model",
aliases=["opus-4"],
),
"anthropic.claude-opus-4-20250514-v1:0-with-thinking": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="anthropic.claude-opus-4-20250514-v1:0-with-thinking",
friendly_name="DIAL (Opus 4 Thinking)",
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=True, # Thinking mode variant
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # Claude doesn't have function calling
supports_json_mode=False, # Claude doesn't have JSON mode
supports_images=True,
max_image_size_mb=5.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Claude Opus 4 with thinking mode via DIAL",
aliases=["opus-4-thinking"],
),
"gemini-2.5-pro-preview-03-25-google-search": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="gemini-2.5-pro-preview-03-25-google-search",
friendly_name="DIAL (Gemini 2.5 Pro Search)",
context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False, # DIAL doesn't expose thinking mode
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # DIAL may not expose function calling
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Gemini 2.5 Pro with Google Search via DIAL",
aliases=["gemini-2.5-pro-search"],
),
"gemini-2.5-pro-preview-05-06": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="gemini-2.5-pro-preview-05-06",
friendly_name="DIAL (Gemini 2.5 Pro)",
context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # DIAL may not expose function calling
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Gemini 2.5 Pro via DIAL - Deep reasoning",
aliases=["gemini-2.5-pro"],
),
"gemini-2.5-flash-preview-05-20": ModelCapabilities(
provider=ProviderType.DIAL,
model_name="gemini-2.5-flash-preview-05-20",
friendly_name="DIAL (Gemini Flash 2.5)",
context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=False, # DIAL may not expose function calling
supports_json_mode=True,
supports_images=True,
max_image_size_mb=20.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Gemini 2.5 Flash via DIAL - Ultra-fast",
aliases=["gemini-2.5-flash"],
),
}
def __init__(self, api_key: str, **kwargs):
@@ -181,20 +288,8 @@ class DIALModelProvider(OpenAICompatibleProvider):
if not restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model_name):
raise ValueError(f"Model '{model_name}' is not allowed by restriction policy.")
config = self.SUPPORTED_MODELS[resolved_name]
return ModelCapabilities(
provider=ProviderType.DIAL,
model_name=resolved_name,
friendly_name=self.FRIENDLY_NAME,
context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_images=config.get("supports_vision", False),
temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
)
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
return self.SUPPORTED_MODELS[resolved_name]
def get_provider_type(self) -> ProviderType:
"""Get the provider type."""
@@ -211,7 +306,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
"""
resolved_name = self._resolve_model_name(model_name)
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
if resolved_name not in self.SUPPORTED_MODELS:
return False
# Check against base class allowed_models if configured
@@ -231,20 +326,6 @@ class DIALModelProvider(OpenAICompatibleProvider):
return True
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name.
Args:
model_name: Model name or shorthand
Returns:
Full model name
"""
shorthand_value = self.SUPPORTED_MODELS.get(model_name)
if isinstance(shorthand_value, str):
return shorthand_value
return model_name
def _get_deployment_client(self, deployment: str):
"""Get or create a cached client for a specific deployment.
@@ -357,7 +438,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
# Check model capabilities
try:
capabilities = self.get_capabilities(model_name)
supports_temperature = getattr(capabilities, "supports_temperature", True)
supports_temperature = capabilities.supports_temperature
except Exception as e:
logger.debug(f"Failed to check temperature support for {model_name}: {e}")
supports_temperature = True
@@ -441,63 +522,12 @@ class DIALModelProvider(OpenAICompatibleProvider):
"""
resolved_name = self._resolve_model_name(model_name)
if resolved_name in self.SUPPORTED_MODELS and isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
return self.SUPPORTED_MODELS[resolved_name].get("supports_vision", False)
if resolved_name in self.SUPPORTED_MODELS:
return self.SUPPORTED_MODELS[resolved_name].supports_images
# Fall back to parent implementation for unknown models
return super()._supports_vision(model_name)
def list_models(self, respect_restrictions: bool = True) -> list[str]:
"""Return a list of model names supported by this provider.
Args:
respect_restrictions: Whether to apply provider-specific restriction logic.
Returns:
List of model names available from this provider
"""
# Get all model keys (both full names and aliases)
all_models = list(self.SUPPORTED_MODELS.keys())
if not respect_restrictions:
return all_models
# Apply restrictions if configured
from utils.model_restrictions import get_restriction_service
restriction_service = get_restriction_service()
# Filter based on restrictions
allowed_models = []
for model in all_models:
resolved_name = self._resolve_model_name(model)
if restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model):
allowed_models.append(model)
return allowed_models
def list_all_known_models(self) -> list[str]:
"""Return all model names known by this provider, including alias targets.
This is used for validation purposes to ensure restriction policies
can validate against both aliases and their target model names.
Returns:
List of all model names and alias targets known by this provider
"""
# Collect all unique model names (both aliases and targets)
all_models = set()
for key, value in self.SUPPORTED_MODELS.items():
# Add the key (could be alias or full name)
all_models.add(key)
# If it's an alias (string value), add the target too
if isinstance(value, str):
all_models.add(value)
return sorted(all_models)
def close(self):
"""Clean up HTTP clients when provider is closed."""
logger.info("Closing DIAL provider HTTP clients...")

View File

@@ -9,7 +9,7 @@ from typing import Optional
from google import genai
from google.genai import types
from .base import ModelCapabilities, ModelProvider, ModelResponse, ProviderType, RangeTemperatureConstraint
from .base import ModelCapabilities, ModelProvider, ModelResponse, ProviderType, create_temperature_constraint
logger = logging.getLogger(__name__)
@@ -17,47 +17,83 @@ logger = logging.getLogger(__name__)
class GeminiModelProvider(ModelProvider):
"""Google Gemini model provider implementation."""
# Model configurations
# Model configurations using ModelCapabilities objects
SUPPORTED_MODELS = {
"gemini-2.0-flash": {
"context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": True, # Experimental thinking mode
"max_thinking_tokens": 24576, # Same as 2.5 flash for consistency
"supports_images": True, # Vision capability
"max_image_size_mb": 20.0, # Conservative 20MB limit for reliability
"description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
},
"gemini-2.0-flash-lite": {
"context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": False, # Not supported per user request
"max_thinking_tokens": 0, # No thinking support
"supports_images": False, # Does not support images
"max_image_size_mb": 0.0, # No image support
"description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
},
"gemini-2.5-flash": {
"context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": True,
"max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit
"supports_images": True, # Vision capability
"max_image_size_mb": 20.0, # Conservative 20MB limit for reliability
"description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
},
"gemini-2.5-pro": {
"context_window": 1_048_576, # 1M tokens
"supports_extended_thinking": True,
"max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit
"supports_images": True, # Vision capability
"max_image_size_mb": 32.0, # Higher limit for Pro model
"description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
},
# Shorthands
"flash": "gemini-2.5-flash",
"flash-2.0": "gemini-2.0-flash",
"flash2": "gemini-2.0-flash",
"flashlite": "gemini-2.0-flash-lite",
"flash-lite": "gemini-2.0-flash-lite",
"pro": "gemini-2.5-pro",
"gemini-2.0-flash": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.0-flash",
friendly_name="Gemini (Flash 2.0)",
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True, # Experimental thinking mode
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # Vision capability
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
max_thinking_tokens=24576, # Same as 2.5 flash for consistency
description="Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
aliases=["flash-2.0", "flash2"],
),
"gemini-2.0-flash-lite": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.0-flash-lite",
friendly_name="Gemin (Flash Lite 2.0)",
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=False, # Not supported per user request
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=False, # Does not support images
max_image_size_mb=0.0, # No image support
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
aliases=["flashlite", "flash-lite"],
),
"gemini-2.5-flash": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.5-flash",
friendly_name="Gemini (Flash 2.5)",
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # Vision capability
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
max_thinking_tokens=24576, # Flash 2.5 thinking budget limit
description="Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
aliases=["flash", "flash2.5"],
),
"gemini-2.5-pro": ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name="gemini-2.5-pro",
friendly_name="Gemini (Pro 2.5)",
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # Vision capability
max_image_size_mb=32.0, # Higher limit for Pro model
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
max_thinking_tokens=32768, # Max thinking tokens for Pro model
description="Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
aliases=["pro", "gemini pro", "gemini-pro"],
),
}
# Thinking mode configurations - percentages of model's max_thinking_tokens
@@ -70,6 +106,14 @@ class GeminiModelProvider(ModelProvider):
"max": 1.0, # 100% of max - full thinking budget
}
# Model-specific thinking token limits
MAX_THINKING_TOKENS = {
"gemini-2.0-flash": 24576, # Same as 2.5 flash for consistency
"gemini-2.0-flash-lite": 0, # No thinking support
"gemini-2.5-flash": 24576, # Flash 2.5 thinking budget limit
"gemini-2.5-pro": 32768, # Pro 2.5 thinking budget limit
}
def __init__(self, api_key: str, **kwargs):
"""Initialize Gemini provider with API key."""
super().__init__(api_key, **kwargs)
@@ -100,25 +144,8 @@ class GeminiModelProvider(ModelProvider):
if not restriction_service.is_allowed(ProviderType.GOOGLE, resolved_name, model_name):
raise ValueError(f"Gemini model '{resolved_name}' is not allowed by restriction policy.")
config = self.SUPPORTED_MODELS[resolved_name]
# Gemini models support 0.0-2.0 temperature range
temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
return ModelCapabilities(
provider=ProviderType.GOOGLE,
model_name=resolved_name,
friendly_name="Gemini",
context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_images=config.get("supports_images", False),
max_image_size_mb=config.get("max_image_size_mb", 0.0),
supports_temperature=True, # Gemini models accept temperature parameter
temperature_constraint=temp_constraint,
)
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
return self.SUPPORTED_MODELS[resolved_name]
def generate_content(
self,
@@ -179,8 +206,8 @@ class GeminiModelProvider(ModelProvider):
if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS:
# Get model's max thinking tokens and calculate actual budget
model_config = self.SUPPORTED_MODELS.get(resolved_name)
if model_config and "max_thinking_tokens" in model_config:
max_thinking_tokens = model_config["max_thinking_tokens"]
if model_config and model_config.max_thinking_tokens > 0:
max_thinking_tokens = model_config.max_thinking_tokens
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)
@@ -258,7 +285,7 @@ class GeminiModelProvider(ModelProvider):
resolved_name = self._resolve_model_name(model_name)
# First check if model is supported
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
if resolved_name not in self.SUPPORTED_MODELS:
return False
# Then check if model is allowed by restrictions
@@ -281,78 +308,20 @@ class GeminiModelProvider(ModelProvider):
def get_thinking_budget(self, model_name: str, thinking_mode: str) -> int:
"""Get actual thinking token budget for a model and thinking mode."""
resolved_name = self._resolve_model_name(model_name)
model_config = self.SUPPORTED_MODELS.get(resolved_name, {})
model_config = self.SUPPORTED_MODELS.get(resolved_name)
if not model_config.get("supports_extended_thinking", False):
if not model_config or not model_config.supports_extended_thinking:
return 0
if thinking_mode not in self.THINKING_BUDGETS:
return 0
max_thinking_tokens = model_config.get("max_thinking_tokens", 0)
max_thinking_tokens = model_config.max_thinking_tokens
if max_thinking_tokens == 0:
return 0
return int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
def list_models(self, respect_restrictions: bool = True) -> list[str]:
"""Return a list of model names supported by this provider.
Args:
respect_restrictions: Whether to apply provider-specific restriction logic.
Returns:
List of model names available from this provider
"""
from utils.model_restrictions import get_restriction_service
restriction_service = get_restriction_service() if respect_restrictions else None
models = []
for model_name, config in self.SUPPORTED_MODELS.items():
# Handle both base models (dict configs) and aliases (string values)
if isinstance(config, str):
# This is an alias - check if the target model would be allowed
target_model = config
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
continue
# Allow the alias
models.append(model_name)
else:
# This is a base model with config dict
# Check restrictions if enabled
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
continue
models.append(model_name)
return models
def list_all_known_models(self) -> list[str]:
"""Return all model names known by this provider, including alias targets.
Returns:
List of all model names and alias targets known by this provider
"""
all_models = set()
for model_name, config in self.SUPPORTED_MODELS.items():
# Add the model name itself
all_models.add(model_name.lower())
# If it's an alias (string value), add the target model too
if isinstance(config, str):
all_models.add(config.lower())
return list(all_models)
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name."""
# Check if it's a shorthand
shorthand_value = self.SUPPORTED_MODELS.get(model_name.lower())
if isinstance(shorthand_value, str):
return shorthand_value
return model_name
def _extract_usage(self, response) -> dict[str, int]:
"""Extract token usage from Gemini response."""
usage = {}

View File

@@ -686,7 +686,6 @@ class OpenAICompatibleProvider(ModelProvider):
"o3-mini",
"o3-pro",
"o4-mini",
"o4-mini-high",
# Note: Claude models would be handled by a separate provider
}
supports = model_name.lower() in vision_models

View File

@@ -17,71 +17,98 @@ logger = logging.getLogger(__name__)
class OpenAIModelProvider(OpenAICompatibleProvider):
"""Official OpenAI API provider (api.openai.com)."""
# Model configurations
# Model configurations using ModelCapabilities objects
SUPPORTED_MODELS = {
"o3": {
"context_window": 200_000, # 200K tokens
"supports_extended_thinking": False,
"supports_images": True, # O3 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O3 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
"description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
},
"o3-mini": {
"context_window": 200_000, # 200K tokens
"supports_extended_thinking": False,
"supports_images": True, # O3 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O3 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
"description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
},
"o3-pro-2025-06-10": {
"context_window": 200_000, # 200K tokens
"supports_extended_thinking": False,
"supports_images": True, # O3 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O3 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
"description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
},
# Aliases
"o3-pro": "o3-pro-2025-06-10",
"o4-mini": {
"context_window": 200_000, # 200K tokens
"supports_extended_thinking": False,
"supports_images": True, # O4 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O4 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
"description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
},
"o4-mini-high": {
"context_window": 200_000, # 200K tokens
"supports_extended_thinking": False,
"supports_images": True, # O4 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O4 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
"description": "Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks",
},
"gpt-4.1-2025-04-14": {
"context_window": 1_000_000, # 1M tokens
"supports_extended_thinking": False,
"supports_images": True, # GPT-4.1 supports vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": True, # Regular models accept temperature parameter
"temperature_constraint": "range", # 0.0-2.0 range
"description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window",
},
# Shorthands
"mini": "o4-mini", # Default 'mini' to latest mini model
"o3mini": "o3-mini",
"o4mini": "o4-mini",
"o4minihigh": "o4-mini-high",
"o4minihi": "o4-mini-high",
"gpt4.1": "gpt-4.1-2025-04-14",
"o3": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o3",
friendly_name="OpenAI (O3)",
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # O3 models support vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=False, # O3 models don't accept temperature parameter
temperature_constraint=create_temperature_constraint("fixed"),
description="Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
aliases=[],
),
"o3-mini": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o3-mini",
friendly_name="OpenAI (O3-mini)",
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # O3 models support vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=False, # O3 models don't accept temperature parameter
temperature_constraint=create_temperature_constraint("fixed"),
description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
aliases=["o3mini", "o3-mini"],
),
"o3-pro-2025-06-10": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o3-pro-2025-06-10",
friendly_name="OpenAI (O3-Pro)",
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # O3 models support vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=False, # O3 models don't accept temperature parameter
temperature_constraint=create_temperature_constraint("fixed"),
description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
aliases=["o3-pro"],
),
"o4-mini": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="o4-mini",
friendly_name="OpenAI (O4-mini)",
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # O4 models support vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=False, # O4 models don't accept temperature parameter
temperature_constraint=create_temperature_constraint("fixed"),
description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
aliases=["mini", "o4mini", "o4-mini"],
),
"gpt-4.1-2025-04-14": ModelCapabilities(
provider=ProviderType.OPENAI,
model_name="gpt-4.1-2025-04-14",
friendly_name="OpenAI (GPT 4.1)",
context_window=1_000_000, # 1M tokens
max_output_tokens=32_768,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=True,
supports_images=True, # GPT-4.1 supports vision
max_image_size_mb=20.0, # 20MB per OpenAI docs
supports_temperature=True, # Regular models accept temperature parameter
temperature_constraint=create_temperature_constraint("range"),
description="GPT-4.1 (1M context) - Advanced reasoning model with large context window",
aliases=["gpt4.1"],
),
}
def __init__(self, api_key: str, **kwargs):
@@ -95,7 +122,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
# Resolve shorthand
resolved_name = self._resolve_model_name(model_name)
if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str):
if resolved_name not in self.SUPPORTED_MODELS:
raise ValueError(f"Unsupported OpenAI model: {model_name}")
# Check if model is allowed by restrictions
@@ -105,27 +132,8 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
if not restriction_service.is_allowed(ProviderType.OPENAI, resolved_name, model_name):
raise ValueError(f"OpenAI model '{model_name}' is not allowed by restriction policy.")
config = self.SUPPORTED_MODELS[resolved_name]
# Get temperature constraints and support from configuration
supports_temperature = config.get("supports_temperature", True) # Default to True for backward compatibility
temp_constraint_type = config.get("temperature_constraint", "range") # Default to range
temp_constraint = create_temperature_constraint(temp_constraint_type)
return ModelCapabilities(
provider=ProviderType.OPENAI,
model_name=model_name,
friendly_name="OpenAI",
context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_images=config.get("supports_images", False),
max_image_size_mb=config.get("max_image_size_mb", 0.0),
supports_temperature=supports_temperature,
temperature_constraint=temp_constraint,
)
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
return self.SUPPORTED_MODELS[resolved_name]
def get_provider_type(self) -> ProviderType:
"""Get the provider type."""
@@ -136,7 +144,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
resolved_name = self._resolve_model_name(model_name)
# First check if model is supported
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
if resolved_name not in self.SUPPORTED_MODELS:
return False
# Then check if model is allowed by restrictions
@@ -177,61 +185,3 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
# Currently no OpenAI models support extended thinking
# This may change with future O3 models
return False
def list_models(self, respect_restrictions: bool = True) -> list[str]:
"""Return a list of model names supported by this provider.
Args:
respect_restrictions: Whether to apply provider-specific restriction logic.
Returns:
List of model names available from this provider
"""
from utils.model_restrictions import get_restriction_service
restriction_service = get_restriction_service() if respect_restrictions else None
models = []
for model_name, config in self.SUPPORTED_MODELS.items():
# Handle both base models (dict configs) and aliases (string values)
if isinstance(config, str):
# This is an alias - check if the target model would be allowed
target_model = config
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
continue
# Allow the alias
models.append(model_name)
else:
# This is a base model with config dict
# Check restrictions if enabled
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
continue
models.append(model_name)
return models
def list_all_known_models(self) -> list[str]:
"""Return all model names known by this provider, including alias targets.
Returns:
List of all model names and alias targets known by this provider
"""
all_models = set()
for model_name, config in self.SUPPORTED_MODELS.items():
# Add the model name itself
all_models.add(model_name.lower())
# If it's an alias (string value), add the target model too
if isinstance(config, str):
all_models.add(config.lower())
return list(all_models)
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name."""
# Check if it's a shorthand
shorthand_value = self.SUPPORTED_MODELS.get(model_name)
if isinstance(shorthand_value, str):
return shorthand_value
return model_name

View File

@@ -50,14 +50,6 @@ class OpenRouterProvider(OpenAICompatibleProvider):
aliases = self._registry.list_aliases()
logging.info(f"OpenRouter loaded {len(models)} models with {len(aliases)} aliases")
def _parse_allowed_models(self) -> None:
"""Override to disable environment-based allow-list.
OpenRouter model access is controlled via the OpenRouter dashboard,
not through environment variables.
"""
return None
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model aliases to OpenRouter model names.
@@ -109,6 +101,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
model_name=resolved_name,
friendly_name=self.FRIENDLY_NAME,
context_window=32_768, # Conservative default context window
max_output_tokens=32_768,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
@@ -130,16 +123,34 @@ class OpenRouterProvider(OpenAICompatibleProvider):
As the catch-all provider, OpenRouter accepts any model name that wasn't
handled by higher-priority providers. OpenRouter will validate based on
the API key's permissions.
the API key's permissions and local restrictions.
Args:
model_name: Model name to validate
Returns:
Always True - OpenRouter is the catch-all provider
True if model is allowed, False if restricted
"""
# Accept any model name - OpenRouter is the fallback provider
# Higher priority providers (native APIs, custom endpoints) get first chance
# Check model restrictions if configured
from utils.model_restrictions import get_restriction_service
restriction_service = get_restriction_service()
if restriction_service:
# Check if model name itself is allowed
if restriction_service.is_allowed(self.get_provider_type(), model_name):
return True
# Also check aliases - model_name might be an alias
model_config = self._registry.resolve(model_name)
if model_config and model_config.aliases:
for alias in model_config.aliases:
if restriction_service.is_allowed(self.get_provider_type(), alias):
return True
# If restrictions are configured and model/alias not in allowed list, reject
return False
# No restrictions configured - accept any model name as the fallback provider
return True
def generate_content(
@@ -260,3 +271,35 @@ class OpenRouterProvider(OpenAICompatibleProvider):
all_models.add(config.model_name.lower())
return list(all_models)
def get_model_configurations(self) -> dict[str, ModelCapabilities]:
"""Get model configurations from the registry.
For OpenRouter, we convert registry configurations to ModelCapabilities objects.
Returns:
Dictionary mapping model names to their ModelCapabilities objects
"""
configs = {}
if self._registry:
# Get all models from registry
for model_name in self._registry.list_models():
# Only include models that this provider validates
if self.validate_model_name(model_name):
config = self._registry.resolve(model_name)
if config and not config.is_custom: # Only OpenRouter models, not custom ones
# Use ModelCapabilities directly from registry
configs[model_name] = config
return configs
def get_all_model_aliases(self) -> dict[str, list[str]]:
"""Get all model aliases from the registry.
Returns:
Dictionary mapping model names to their list of aliases
"""
# Since aliases are now included in the configurations,
# we can use the base class implementation
return super().get_all_model_aliases()

View File

@@ -2,7 +2,6 @@
import logging
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
@@ -11,58 +10,10 @@ from utils.file_utils import read_json_file
from .base import (
ModelCapabilities,
ProviderType,
TemperatureConstraint,
create_temperature_constraint,
)
@dataclass
class OpenRouterModelConfig:
"""Configuration for an OpenRouter model."""
model_name: str
aliases: list[str] = field(default_factory=list)
context_window: int = 32768 # Total context window size in tokens
supports_extended_thinking: bool = False
supports_system_prompts: bool = True
supports_streaming: bool = True
supports_function_calling: bool = False
supports_json_mode: bool = False
supports_images: bool = False # Whether model can process images
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
temperature_constraint: Optional[str] = (
None # Type of temperature constraint: "fixed", "range", "discrete", or None for default range
)
is_custom: bool = False # True for models that should only be used with custom endpoints
description: str = ""
def _create_temperature_constraint(self) -> TemperatureConstraint:
"""Create temperature constraint object from configuration.
Returns:
TemperatureConstraint object based on configuration
"""
return create_temperature_constraint(self.temperature_constraint or "range")
def to_capabilities(self) -> ModelCapabilities:
"""Convert to ModelCapabilities object."""
return ModelCapabilities(
provider=ProviderType.OPENROUTER,
model_name=self.model_name,
friendly_name="OpenRouter",
context_window=self.context_window,
supports_extended_thinking=self.supports_extended_thinking,
supports_system_prompts=self.supports_system_prompts,
supports_streaming=self.supports_streaming,
supports_function_calling=self.supports_function_calling,
supports_images=self.supports_images,
max_image_size_mb=self.max_image_size_mb,
supports_temperature=self.supports_temperature,
temperature_constraint=self._create_temperature_constraint(),
)
class OpenRouterModelRegistry:
"""Registry for managing OpenRouter model configurations and aliases."""
@@ -73,7 +24,7 @@ class OpenRouterModelRegistry:
config_path: Path to config file. If None, uses default locations.
"""
self.alias_map: dict[str, str] = {} # alias -> model_name
self.model_map: dict[str, OpenRouterModelConfig] = {} # model_name -> config
self.model_map: dict[str, ModelCapabilities] = {} # model_name -> config
# Determine config path
if config_path:
@@ -139,7 +90,7 @@ class OpenRouterModelRegistry:
self.alias_map = {}
self.model_map = {}
def _read_config(self) -> list[OpenRouterModelConfig]:
def _read_config(self) -> list[ModelCapabilities]:
"""Read configuration from file.
Returns:
@@ -158,7 +109,27 @@ class OpenRouterModelRegistry:
# Parse models
configs = []
for model_data in data.get("models", []):
config = OpenRouterModelConfig(**model_data)
# Create ModelCapabilities directly from JSON data
# Handle temperature_constraint conversion
temp_constraint_str = model_data.get("temperature_constraint")
temp_constraint = create_temperature_constraint(temp_constraint_str or "range")
# Set provider-specific defaults based on is_custom flag
is_custom = model_data.get("is_custom", False)
if is_custom:
model_data.setdefault("provider", ProviderType.CUSTOM)
model_data.setdefault("friendly_name", f"Custom ({model_data.get('model_name', 'Unknown')})")
else:
model_data.setdefault("provider", ProviderType.OPENROUTER)
model_data.setdefault("friendly_name", f"OpenRouter ({model_data.get('model_name', 'Unknown')})")
model_data["temperature_constraint"] = temp_constraint
# Remove the string version of temperature_constraint before creating ModelCapabilities
if "temperature_constraint" in model_data and isinstance(model_data["temperature_constraint"], str):
del model_data["temperature_constraint"]
model_data["temperature_constraint"] = temp_constraint
config = ModelCapabilities(**model_data)
configs.append(config)
return configs
@@ -168,7 +139,7 @@ class OpenRouterModelRegistry:
except Exception as e:
raise ValueError(f"Error reading config from {self.config_path}: {e}")
def _build_maps(self, configs: list[OpenRouterModelConfig]) -> None:
def _build_maps(self, configs: list[ModelCapabilities]) -> None:
"""Build alias and model maps from configurations.
Args:
@@ -211,7 +182,7 @@ class OpenRouterModelRegistry:
self.alias_map = alias_map
self.model_map = model_map
def resolve(self, name_or_alias: str) -> Optional[OpenRouterModelConfig]:
def resolve(self, name_or_alias: str) -> Optional[ModelCapabilities]:
"""Resolve a model name or alias to configuration.
Args:
@@ -237,10 +208,8 @@ class OpenRouterModelRegistry:
Returns:
ModelCapabilities if found, None otherwise
"""
config = self.resolve(name_or_alias)
if config:
return config.to_capabilities()
return None
# Registry now returns ModelCapabilities directly
return self.resolve(name_or_alias)
def list_models(self) -> list[str]:
"""List all available model names."""

View File

@@ -24,8 +24,6 @@ class ModelProviderRegistry:
cls._instance._providers = {}
cls._instance._initialized_providers = {}
logging.debug(f"REGISTRY: Created instance {cls._instance}")
else:
logging.debug(f"REGISTRY: Returning existing instance {cls._instance}")
return cls._instance
@classmethod
@@ -129,7 +127,6 @@ class ModelProviderRegistry:
logging.debug(f"Available providers in registry: {list(instance._providers.keys())}")
for provider_type in PROVIDER_PRIORITY_ORDER:
logging.debug(f"Checking provider_type: {provider_type}")
if provider_type in instance._providers:
logging.debug(f"Found {provider_type} in registry")
# Get or create provider instance

View File

@@ -7,7 +7,7 @@ from .base import (
ModelCapabilities,
ModelResponse,
ProviderType,
RangeTemperatureConstraint,
create_temperature_constraint,
)
from .openai_compatible import OpenAICompatibleProvider
@@ -19,23 +19,44 @@ class XAIModelProvider(OpenAICompatibleProvider):
FRIENDLY_NAME = "X.AI"
# Model configurations
# Model configurations using ModelCapabilities objects
SUPPORTED_MODELS = {
"grok-3": {
"context_window": 131_072, # 131K tokens
"supports_extended_thinking": False,
"description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
},
"grok-3-fast": {
"context_window": 131_072, # 131K tokens
"supports_extended_thinking": False,
"description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
},
# Shorthands for convenience
"grok": "grok-3", # Default to grok-3
"grok3": "grok-3",
"grok3fast": "grok-3-fast",
"grokfast": "grok-3-fast",
"grok-3": ModelCapabilities(
provider=ProviderType.XAI,
model_name="grok-3",
friendly_name="X.AI (Grok 3)",
context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
supports_images=False, # Assuming GROK is text-only for now
max_image_size_mb=0.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
aliases=["grok", "grok3"],
),
"grok-3-fast": ModelCapabilities(
provider=ProviderType.XAI,
model_name="grok-3-fast",
friendly_name="X.AI (Grok 3 Fast)",
context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False,
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
supports_images=False, # Assuming GROK is text-only for now
max_image_size_mb=0.0,
supports_temperature=True,
temperature_constraint=create_temperature_constraint("range"),
description="GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
aliases=["grok3fast", "grokfast", "grok3-fast"],
),
}
def __init__(self, api_key: str, **kwargs):
@@ -49,7 +70,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
# Resolve shorthand
resolved_name = self._resolve_model_name(model_name)
if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str):
if resolved_name not in self.SUPPORTED_MODELS:
raise ValueError(f"Unsupported X.AI model: {model_name}")
# Check if model is allowed by restrictions
@@ -59,23 +80,8 @@ class XAIModelProvider(OpenAICompatibleProvider):
if not restriction_service.is_allowed(ProviderType.XAI, resolved_name, model_name):
raise ValueError(f"X.AI model '{model_name}' is not allowed by restriction policy.")
config = self.SUPPORTED_MODELS[resolved_name]
# Define temperature constraints for GROK models
# GROK supports the standard OpenAI temperature range
temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
return ModelCapabilities(
provider=ProviderType.XAI,
model_name=resolved_name,
friendly_name=self.FRIENDLY_NAME,
context_window=config["context_window"],
supports_extended_thinking=config["supports_extended_thinking"],
supports_system_prompts=True,
supports_streaming=True,
supports_function_calling=True,
temperature_constraint=temp_constraint,
)
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
return self.SUPPORTED_MODELS[resolved_name]
def get_provider_type(self) -> ProviderType:
"""Get the provider type."""
@@ -86,7 +92,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
resolved_name = self._resolve_model_name(model_name)
# First check if model is supported
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
if resolved_name not in self.SUPPORTED_MODELS:
return False
# Then check if model is allowed by restrictions
@@ -127,61 +133,3 @@ class XAIModelProvider(OpenAICompatibleProvider):
# Currently GROK models do not support extended thinking
# This may change with future GROK model releases
return False
def list_models(self, respect_restrictions: bool = True) -> list[str]:
"""Return a list of model names supported by this provider.
Args:
respect_restrictions: Whether to apply provider-specific restriction logic.
Returns:
List of model names available from this provider
"""
from utils.model_restrictions import get_restriction_service
restriction_service = get_restriction_service() if respect_restrictions else None
models = []
for model_name, config in self.SUPPORTED_MODELS.items():
# Handle both base models (dict configs) and aliases (string values)
if isinstance(config, str):
# This is an alias - check if the target model would be allowed
target_model = config
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
continue
# Allow the alias
models.append(model_name)
else:
# This is a base model with config dict
# Check restrictions if enabled
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
continue
models.append(model_name)
return models
def list_all_known_models(self) -> list[str]:
"""Return all model names known by this provider, including alias targets.
Returns:
List of all model names and alias targets known by this provider
"""
all_models = set()
for model_name, config in self.SUPPORTED_MODELS.items():
# Add the model name itself
all_models.add(model_name.lower())
# If it's an alias (string value), add the target model too
if isinstance(config, str):
all_models.add(config.lower())
return list(all_models)
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name."""
# Check if it's a shorthand
shorthand_value = self.SUPPORTED_MODELS.get(model_name)
if isinstance(shorthand_value, str):
return shorthand_value
return model_name