Merge branch 'feat-local_support_with_UTF-8_encoding-update' of https://github.com/GiGiDKR/zen-mcp-server into feat-local_support_with_UTF-8_encoding-update
This commit is contained in:
@@ -132,6 +132,7 @@ class ModelCapabilities:
|
||||
model_name: str
|
||||
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
|
||||
context_window: int # Total context window size in tokens
|
||||
max_output_tokens: int # Maximum output tokens per request
|
||||
supports_extended_thinking: bool = False
|
||||
supports_system_prompts: bool = True
|
||||
supports_streaming: bool = True
|
||||
@@ -140,6 +141,19 @@ class ModelCapabilities:
|
||||
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
|
||||
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
|
||||
|
||||
# Additional fields for comprehensive model information
|
||||
description: str = "" # Human-readable description of the model
|
||||
aliases: list[str] = field(default_factory=list) # Alternative names/shortcuts for the model
|
||||
|
||||
# JSON mode support (for providers that support structured output)
|
||||
supports_json_mode: bool = False
|
||||
|
||||
# Thinking mode support (for models with thinking capabilities)
|
||||
max_thinking_tokens: int = 0 # Maximum thinking tokens for extended reasoning models
|
||||
|
||||
# Custom model flag (for models that only work with custom endpoints)
|
||||
is_custom: bool = False # Whether this model requires custom API endpoints
|
||||
|
||||
# Temperature constraint object - preferred way to define temperature limits
|
||||
temperature_constraint: TemperatureConstraint = field(
|
||||
default_factory=lambda: RangeTemperatureConstraint(0.0, 2.0, 0.7)
|
||||
@@ -251,7 +265,7 @@ class ModelProvider(ABC):
|
||||
capabilities = self.get_capabilities(model_name)
|
||||
|
||||
# Check if model supports temperature at all
|
||||
if hasattr(capabilities, "supports_temperature") and not capabilities.supports_temperature:
|
||||
if not capabilities.supports_temperature:
|
||||
return None
|
||||
|
||||
# Get temperature range
|
||||
@@ -290,19 +304,109 @@ class ModelProvider(ABC):
|
||||
"""Check if the model supports extended thinking mode."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_model_configurations(self) -> dict[str, ModelCapabilities]:
|
||||
"""Get model configurations for this provider.
|
||||
|
||||
This is a hook method that subclasses can override to provide
|
||||
their model configurations from different sources.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping model names to their ModelCapabilities objects
|
||||
"""
|
||||
# Return SUPPORTED_MODELS if it exists (must contain ModelCapabilities objects)
|
||||
if hasattr(self, "SUPPORTED_MODELS"):
|
||||
return {k: v for k, v in self.SUPPORTED_MODELS.items() if isinstance(v, ModelCapabilities)}
|
||||
return {}
|
||||
|
||||
def get_all_model_aliases(self) -> dict[str, list[str]]:
|
||||
"""Get all model aliases for this provider.
|
||||
|
||||
This is a hook method that subclasses can override to provide
|
||||
aliases from different sources.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping model names to their list of aliases
|
||||
"""
|
||||
# Default implementation extracts from ModelCapabilities objects
|
||||
aliases = {}
|
||||
for model_name, capabilities in self.get_model_configurations().items():
|
||||
if capabilities.aliases:
|
||||
aliases[model_name] = capabilities.aliases
|
||||
return aliases
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model shorthand to full name.
|
||||
|
||||
This implementation uses the hook methods to support different
|
||||
model configuration sources.
|
||||
|
||||
Args:
|
||||
model_name: Model name that may be an alias
|
||||
|
||||
Returns:
|
||||
Resolved model name
|
||||
"""
|
||||
# Get model configurations from the hook method
|
||||
model_configs = self.get_model_configurations()
|
||||
|
||||
# First check if it's already a base model name (case-sensitive exact match)
|
||||
if model_name in model_configs:
|
||||
return model_name
|
||||
|
||||
# Check case-insensitively for both base models and aliases
|
||||
model_name_lower = model_name.lower()
|
||||
|
||||
# Check base model names case-insensitively
|
||||
for base_model in model_configs:
|
||||
if base_model.lower() == model_name_lower:
|
||||
return base_model
|
||||
|
||||
# Check aliases from the hook method
|
||||
all_aliases = self.get_all_model_aliases()
|
||||
for base_model, aliases in all_aliases.items():
|
||||
if any(alias.lower() == model_name_lower for alias in aliases):
|
||||
return base_model
|
||||
|
||||
# If not found, return as-is
|
||||
return model_name
|
||||
|
||||
def list_models(self, respect_restrictions: bool = True) -> list[str]:
|
||||
"""Return a list of model names supported by this provider.
|
||||
|
||||
This implementation uses the get_model_configurations() hook
|
||||
to support different model configuration sources.
|
||||
|
||||
Args:
|
||||
respect_restrictions: Whether to apply provider-specific restriction logic.
|
||||
|
||||
Returns:
|
||||
List of model names available from this provider
|
||||
"""
|
||||
pass
|
||||
from utils.model_restrictions import get_restriction_service
|
||||
|
||||
restriction_service = get_restriction_service() if respect_restrictions else None
|
||||
models = []
|
||||
|
||||
# Get model configurations from the hook method
|
||||
model_configs = self.get_model_configurations()
|
||||
|
||||
for model_name in model_configs:
|
||||
# Check restrictions if enabled
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
continue
|
||||
|
||||
# Add the base model
|
||||
models.append(model_name)
|
||||
|
||||
# Get aliases from the hook method
|
||||
all_aliases = self.get_all_model_aliases()
|
||||
for model_name, aliases in all_aliases.items():
|
||||
# Only add aliases for models that passed restriction check
|
||||
if model_name in models:
|
||||
models.extend(aliases)
|
||||
|
||||
return models
|
||||
|
||||
@abstractmethod
|
||||
def list_all_known_models(self) -> list[str]:
|
||||
"""Return all model names known by this provider, including alias targets.
|
||||
|
||||
@@ -312,21 +416,22 @@ class ModelProvider(ABC):
|
||||
Returns:
|
||||
List of all model names and alias targets known by this provider
|
||||
"""
|
||||
pass
|
||||
all_models = set()
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model shorthand to full name.
|
||||
# Get model configurations from the hook method
|
||||
model_configs = self.get_model_configurations()
|
||||
|
||||
Base implementation returns the model name unchanged.
|
||||
Subclasses should override to provide alias resolution.
|
||||
# Add all base model names
|
||||
for model_name in model_configs:
|
||||
all_models.add(model_name.lower())
|
||||
|
||||
Args:
|
||||
model_name: Model name that may be an alias
|
||||
# Get aliases from the hook method and add them
|
||||
all_aliases = self.get_all_model_aliases()
|
||||
for _model_name, aliases in all_aliases.items():
|
||||
for alias in aliases:
|
||||
all_models.add(alias.lower())
|
||||
|
||||
Returns:
|
||||
Resolved model name
|
||||
"""
|
||||
return model_name
|
||||
return list(all_models)
|
||||
|
||||
def close(self):
|
||||
"""Clean up any resources held by the provider.
|
||||
|
||||
@@ -158,6 +158,7 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
model_name=resolved_name,
|
||||
friendly_name=f"{self.FRIENDLY_NAME} ({resolved_name})",
|
||||
context_window=32_768, # Conservative default
|
||||
max_output_tokens=32_768, # Conservative default max output
|
||||
supports_extended_thinking=False, # Most custom models don't support this
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -187,7 +188,7 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
Returns:
|
||||
True if model is intended for custom/local endpoint
|
||||
"""
|
||||
logging.debug(f"Custom provider validating model: '{model_name}'")
|
||||
# logging.debug(f"Custom provider validating model: '{model_name}'")
|
||||
|
||||
# Try to resolve through registry first
|
||||
config = self._registry.resolve(model_name)
|
||||
@@ -195,12 +196,12 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
model_id = config.model_name
|
||||
# Use explicit is_custom flag for clean validation
|
||||
if config.is_custom:
|
||||
logging.debug(f"Model '{model_name}' -> '{model_id}' validated via registry (custom model)")
|
||||
logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' validated via registry")
|
||||
return True
|
||||
else:
|
||||
# This is a cloud/OpenRouter model - CustomProvider should NOT handle these
|
||||
# Let OpenRouter provider handle them instead
|
||||
logging.debug(f"Model '{model_name}' -> '{model_id}' rejected (cloud model, defer to OpenRouter)")
|
||||
# logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' not custom (defer to OpenRouter)")
|
||||
return False
|
||||
|
||||
# Handle version tags for unknown models (e.g., "my-model:latest")
|
||||
@@ -268,65 +269,50 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
def supports_thinking_mode(self, model_name: str) -> bool:
|
||||
"""Check if the model supports extended thinking mode.
|
||||
|
||||
Most custom/local models don't support extended thinking.
|
||||
|
||||
Args:
|
||||
model_name: Model to check
|
||||
|
||||
Returns:
|
||||
False (custom models generally don't support thinking mode)
|
||||
True if model supports thinking mode, False otherwise
|
||||
"""
|
||||
# Check if model is in registry
|
||||
config = self._registry.resolve(model_name) if self._registry else None
|
||||
if config and config.is_custom:
|
||||
# Trust the config from custom_models.json
|
||||
return config.supports_extended_thinking
|
||||
|
||||
# Default to False for unknown models
|
||||
return False
|
||||
|
||||
def list_models(self, respect_restrictions: bool = True) -> list[str]:
|
||||
"""Return a list of model names supported by this provider.
|
||||
def get_model_configurations(self) -> dict[str, ModelCapabilities]:
|
||||
"""Get model configurations from the registry.
|
||||
|
||||
Args:
|
||||
respect_restrictions: Whether to apply provider-specific restriction logic.
|
||||
For CustomProvider, we convert registry configurations to ModelCapabilities objects.
|
||||
|
||||
Returns:
|
||||
List of model names available from this provider
|
||||
Dictionary mapping model names to their ModelCapabilities objects
|
||||
"""
|
||||
from utils.model_restrictions import get_restriction_service
|
||||
|
||||
restriction_service = get_restriction_service() if respect_restrictions else None
|
||||
models = []
|
||||
configs = {}
|
||||
|
||||
if self._registry:
|
||||
# Get all models from the registry
|
||||
all_models = self._registry.list_models()
|
||||
aliases = self._registry.list_aliases()
|
||||
|
||||
# Add models that are validated by the custom provider
|
||||
for model_name in all_models + aliases:
|
||||
# Use the provider's validation logic to determine if this model
|
||||
# is appropriate for the custom endpoint
|
||||
# Get all models from registry
|
||||
for model_name in self._registry.list_models():
|
||||
# Only include custom models that this provider validates
|
||||
if self.validate_model_name(model_name):
|
||||
# Check restrictions if enabled
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
continue
|
||||
config = self._registry.resolve(model_name)
|
||||
if config and config.is_custom:
|
||||
# Use ModelCapabilities directly from registry
|
||||
configs[model_name] = config
|
||||
|
||||
models.append(model_name)
|
||||
return configs
|
||||
|
||||
return models
|
||||
|
||||
def list_all_known_models(self) -> list[str]:
|
||||
"""Return all model names known by this provider, including alias targets.
|
||||
def get_all_model_aliases(self) -> dict[str, list[str]]:
|
||||
"""Get all model aliases from the registry.
|
||||
|
||||
Returns:
|
||||
List of all model names and alias targets known by this provider
|
||||
Dictionary mapping model names to their list of aliases
|
||||
"""
|
||||
all_models = set()
|
||||
|
||||
if self._registry:
|
||||
# Get all models and aliases from the registry
|
||||
all_models.update(model.lower() for model in self._registry.list_models())
|
||||
all_models.update(alias.lower() for alias in self._registry.list_aliases())
|
||||
|
||||
# For each alias, also add its target
|
||||
for alias in self._registry.list_aliases():
|
||||
config = self._registry.resolve(alias)
|
||||
if config:
|
||||
all_models.add(config.model_name.lower())
|
||||
|
||||
return list(all_models)
|
||||
# Since aliases are now included in the configurations,
|
||||
# we can use the base class implementation
|
||||
return super().get_all_model_aliases()
|
||||
|
||||
@@ -10,7 +10,7 @@ from .base import (
|
||||
ModelCapabilities,
|
||||
ModelResponse,
|
||||
ProviderType,
|
||||
RangeTemperatureConstraint,
|
||||
create_temperature_constraint,
|
||||
)
|
||||
from .openai_compatible import OpenAICompatibleProvider
|
||||
|
||||
@@ -30,63 +30,170 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
MAX_RETRIES = 4
|
||||
RETRY_DELAYS = [1, 3, 5, 8] # seconds
|
||||
|
||||
# Supported DIAL models (these can be customized based on your DIAL deployment)
|
||||
# Model configurations using ModelCapabilities objects
|
||||
SUPPORTED_MODELS = {
|
||||
"o3-2025-04-16": {
|
||||
"context_window": 200_000,
|
||||
"supports_extended_thinking": False,
|
||||
"supports_vision": True,
|
||||
},
|
||||
"o4-mini-2025-04-16": {
|
||||
"context_window": 200_000,
|
||||
"supports_extended_thinking": False,
|
||||
"supports_vision": True,
|
||||
},
|
||||
"anthropic.claude-sonnet-4-20250514-v1:0": {
|
||||
"context_window": 200_000,
|
||||
"supports_extended_thinking": False,
|
||||
"supports_vision": True,
|
||||
},
|
||||
"anthropic.claude-sonnet-4-20250514-v1:0-with-thinking": {
|
||||
"context_window": 200_000,
|
||||
"supports_extended_thinking": True, # Thinking mode variant
|
||||
"supports_vision": True,
|
||||
},
|
||||
"anthropic.claude-opus-4-20250514-v1:0": {
|
||||
"context_window": 200_000,
|
||||
"supports_extended_thinking": False,
|
||||
"supports_vision": True,
|
||||
},
|
||||
"anthropic.claude-opus-4-20250514-v1:0-with-thinking": {
|
||||
"context_window": 200_000,
|
||||
"supports_extended_thinking": True, # Thinking mode variant
|
||||
"supports_vision": True,
|
||||
},
|
||||
"gemini-2.5-pro-preview-03-25-google-search": {
|
||||
"context_window": 1_000_000,
|
||||
"supports_extended_thinking": False, # DIAL doesn't expose thinking mode
|
||||
"supports_vision": True,
|
||||
},
|
||||
"gemini-2.5-pro-preview-05-06": {
|
||||
"context_window": 1_000_000,
|
||||
"supports_extended_thinking": False,
|
||||
"supports_vision": True,
|
||||
},
|
||||
"gemini-2.5-flash-preview-05-20": {
|
||||
"context_window": 1_000_000,
|
||||
"supports_extended_thinking": False,
|
||||
"supports_vision": True,
|
||||
},
|
||||
# Shorthands
|
||||
"o3": "o3-2025-04-16",
|
||||
"o4-mini": "o4-mini-2025-04-16",
|
||||
"sonnet-4": "anthropic.claude-sonnet-4-20250514-v1:0",
|
||||
"sonnet-4-thinking": "anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
|
||||
"opus-4": "anthropic.claude-opus-4-20250514-v1:0",
|
||||
"opus-4-thinking": "anthropic.claude-opus-4-20250514-v1:0-with-thinking",
|
||||
"gemini-2.5-pro": "gemini-2.5-pro-preview-05-06",
|
||||
"gemini-2.5-pro-search": "gemini-2.5-pro-preview-03-25-google-search",
|
||||
"gemini-2.5-flash": "gemini-2.5-flash-preview-05-20",
|
||||
"o3-2025-04-16": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="o3-2025-04-16",
|
||||
friendly_name="DIAL (O3)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=100_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # DIAL may not expose function calling
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=False, # O3 models don't accept temperature
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="OpenAI O3 via DIAL - Strong reasoning model",
|
||||
aliases=["o3"],
|
||||
),
|
||||
"o4-mini-2025-04-16": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="o4-mini-2025-04-16",
|
||||
friendly_name="DIAL (O4-mini)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=100_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # DIAL may not expose function calling
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=False, # O4 models don't accept temperature
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="OpenAI O4-mini via DIAL - Fast reasoning model",
|
||||
aliases=["o4-mini"],
|
||||
),
|
||||
"anthropic.claude-sonnet-4-20250514-v1:0": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-sonnet-4-20250514-v1:0",
|
||||
friendly_name="DIAL (Sonnet 4)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # Claude doesn't have function calling
|
||||
supports_json_mode=False, # Claude doesn't have JSON mode
|
||||
supports_images=True,
|
||||
max_image_size_mb=5.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Claude Sonnet 4 via DIAL - Balanced performance",
|
||||
aliases=["sonnet-4"],
|
||||
),
|
||||
"anthropic.claude-sonnet-4-20250514-v1:0-with-thinking": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
|
||||
friendly_name="DIAL (Sonnet 4 Thinking)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=True, # Thinking mode variant
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # Claude doesn't have function calling
|
||||
supports_json_mode=False, # Claude doesn't have JSON mode
|
||||
supports_images=True,
|
||||
max_image_size_mb=5.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Claude Sonnet 4 with thinking mode via DIAL",
|
||||
aliases=["sonnet-4-thinking"],
|
||||
),
|
||||
"anthropic.claude-opus-4-20250514-v1:0": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-opus-4-20250514-v1:0",
|
||||
friendly_name="DIAL (Opus 4)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # Claude doesn't have function calling
|
||||
supports_json_mode=False, # Claude doesn't have JSON mode
|
||||
supports_images=True,
|
||||
max_image_size_mb=5.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Claude Opus 4 via DIAL - Most capable Claude model",
|
||||
aliases=["opus-4"],
|
||||
),
|
||||
"anthropic.claude-opus-4-20250514-v1:0-with-thinking": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-opus-4-20250514-v1:0-with-thinking",
|
||||
friendly_name="DIAL (Opus 4 Thinking)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=True, # Thinking mode variant
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # Claude doesn't have function calling
|
||||
supports_json_mode=False, # Claude doesn't have JSON mode
|
||||
supports_images=True,
|
||||
max_image_size_mb=5.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Claude Opus 4 with thinking mode via DIAL",
|
||||
aliases=["opus-4-thinking"],
|
||||
),
|
||||
"gemini-2.5-pro-preview-03-25-google-search": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="gemini-2.5-pro-preview-03-25-google-search",
|
||||
friendly_name="DIAL (Gemini 2.5 Pro Search)",
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False, # DIAL doesn't expose thinking mode
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # DIAL may not expose function calling
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Gemini 2.5 Pro with Google Search via DIAL",
|
||||
aliases=["gemini-2.5-pro-search"],
|
||||
),
|
||||
"gemini-2.5-pro-preview-05-06": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="gemini-2.5-pro-preview-05-06",
|
||||
friendly_name="DIAL (Gemini 2.5 Pro)",
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # DIAL may not expose function calling
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Gemini 2.5 Pro via DIAL - Deep reasoning",
|
||||
aliases=["gemini-2.5-pro"],
|
||||
),
|
||||
"gemini-2.5-flash-preview-05-20": ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="gemini-2.5-flash-preview-05-20",
|
||||
friendly_name="DIAL (Gemini Flash 2.5)",
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # DIAL may not expose function calling
|
||||
supports_json_mode=True,
|
||||
supports_images=True,
|
||||
max_image_size_mb=20.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Gemini 2.5 Flash via DIAL - Ultra-fast",
|
||||
aliases=["gemini-2.5-flash"],
|
||||
),
|
||||
}
|
||||
|
||||
def __init__(self, api_key: str, **kwargs):
|
||||
@@ -181,20 +288,8 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
if not restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model_name):
|
||||
raise ValueError(f"Model '{model_name}' is not allowed by restriction policy.")
|
||||
|
||||
config = self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
return ModelCapabilities(
|
||||
provider=ProviderType.DIAL,
|
||||
model_name=resolved_name,
|
||||
friendly_name=self.FRIENDLY_NAME,
|
||||
context_window=config["context_window"],
|
||||
supports_extended_thinking=config["supports_extended_thinking"],
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_images=config.get("supports_vision", False),
|
||||
temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
|
||||
)
|
||||
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
|
||||
return self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
def get_provider_type(self) -> ProviderType:
|
||||
"""Get the provider type."""
|
||||
@@ -211,7 +306,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
"""
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
|
||||
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
|
||||
if resolved_name not in self.SUPPORTED_MODELS:
|
||||
return False
|
||||
|
||||
# Check against base class allowed_models if configured
|
||||
@@ -231,20 +326,6 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
|
||||
return True
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model shorthand to full name.
|
||||
|
||||
Args:
|
||||
model_name: Model name or shorthand
|
||||
|
||||
Returns:
|
||||
Full model name
|
||||
"""
|
||||
shorthand_value = self.SUPPORTED_MODELS.get(model_name)
|
||||
if isinstance(shorthand_value, str):
|
||||
return shorthand_value
|
||||
return model_name
|
||||
|
||||
def _get_deployment_client(self, deployment: str):
|
||||
"""Get or create a cached client for a specific deployment.
|
||||
|
||||
@@ -357,7 +438,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
# Check model capabilities
|
||||
try:
|
||||
capabilities = self.get_capabilities(model_name)
|
||||
supports_temperature = getattr(capabilities, "supports_temperature", True)
|
||||
supports_temperature = capabilities.supports_temperature
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to check temperature support for {model_name}: {e}")
|
||||
supports_temperature = True
|
||||
@@ -441,63 +522,12 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
"""
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
|
||||
if resolved_name in self.SUPPORTED_MODELS and isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
|
||||
return self.SUPPORTED_MODELS[resolved_name].get("supports_vision", False)
|
||||
if resolved_name in self.SUPPORTED_MODELS:
|
||||
return self.SUPPORTED_MODELS[resolved_name].supports_images
|
||||
|
||||
# Fall back to parent implementation for unknown models
|
||||
return super()._supports_vision(model_name)
|
||||
|
||||
def list_models(self, respect_restrictions: bool = True) -> list[str]:
|
||||
"""Return a list of model names supported by this provider.
|
||||
|
||||
Args:
|
||||
respect_restrictions: Whether to apply provider-specific restriction logic.
|
||||
|
||||
Returns:
|
||||
List of model names available from this provider
|
||||
"""
|
||||
# Get all model keys (both full names and aliases)
|
||||
all_models = list(self.SUPPORTED_MODELS.keys())
|
||||
|
||||
if not respect_restrictions:
|
||||
return all_models
|
||||
|
||||
# Apply restrictions if configured
|
||||
from utils.model_restrictions import get_restriction_service
|
||||
|
||||
restriction_service = get_restriction_service()
|
||||
|
||||
# Filter based on restrictions
|
||||
allowed_models = []
|
||||
for model in all_models:
|
||||
resolved_name = self._resolve_model_name(model)
|
||||
if restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model):
|
||||
allowed_models.append(model)
|
||||
|
||||
return allowed_models
|
||||
|
||||
def list_all_known_models(self) -> list[str]:
|
||||
"""Return all model names known by this provider, including alias targets.
|
||||
|
||||
This is used for validation purposes to ensure restriction policies
|
||||
can validate against both aliases and their target model names.
|
||||
|
||||
Returns:
|
||||
List of all model names and alias targets known by this provider
|
||||
"""
|
||||
# Collect all unique model names (both aliases and targets)
|
||||
all_models = set()
|
||||
|
||||
for key, value in self.SUPPORTED_MODELS.items():
|
||||
# Add the key (could be alias or full name)
|
||||
all_models.add(key)
|
||||
|
||||
# If it's an alias (string value), add the target too
|
||||
if isinstance(value, str):
|
||||
all_models.add(value)
|
||||
|
||||
return sorted(all_models)
|
||||
|
||||
def close(self):
|
||||
"""Clean up HTTP clients when provider is closed."""
|
||||
logger.info("Closing DIAL provider HTTP clients...")
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import Optional
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
from .base import ModelCapabilities, ModelProvider, ModelResponse, ProviderType, RangeTemperatureConstraint
|
||||
from .base import ModelCapabilities, ModelProvider, ModelResponse, ProviderType, create_temperature_constraint
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -17,47 +17,83 @@ logger = logging.getLogger(__name__)
|
||||
class GeminiModelProvider(ModelProvider):
|
||||
"""Google Gemini model provider implementation."""
|
||||
|
||||
# Model configurations
|
||||
# Model configurations using ModelCapabilities objects
|
||||
SUPPORTED_MODELS = {
|
||||
"gemini-2.0-flash": {
|
||||
"context_window": 1_048_576, # 1M tokens
|
||||
"supports_extended_thinking": True, # Experimental thinking mode
|
||||
"max_thinking_tokens": 24576, # Same as 2.5 flash for consistency
|
||||
"supports_images": True, # Vision capability
|
||||
"max_image_size_mb": 20.0, # Conservative 20MB limit for reliability
|
||||
"description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
|
||||
},
|
||||
"gemini-2.0-flash-lite": {
|
||||
"context_window": 1_048_576, # 1M tokens
|
||||
"supports_extended_thinking": False, # Not supported per user request
|
||||
"max_thinking_tokens": 0, # No thinking support
|
||||
"supports_images": False, # Does not support images
|
||||
"max_image_size_mb": 0.0, # No image support
|
||||
"description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
|
||||
},
|
||||
"gemini-2.5-flash": {
|
||||
"context_window": 1_048_576, # 1M tokens
|
||||
"supports_extended_thinking": True,
|
||||
"max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit
|
||||
"supports_images": True, # Vision capability
|
||||
"max_image_size_mb": 20.0, # Conservative 20MB limit for reliability
|
||||
"description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
|
||||
},
|
||||
"gemini-2.5-pro": {
|
||||
"context_window": 1_048_576, # 1M tokens
|
||||
"supports_extended_thinking": True,
|
||||
"max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit
|
||||
"supports_images": True, # Vision capability
|
||||
"max_image_size_mb": 32.0, # Higher limit for Pro model
|
||||
"description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
|
||||
},
|
||||
# Shorthands
|
||||
"flash": "gemini-2.5-flash",
|
||||
"flash-2.0": "gemini-2.0-flash",
|
||||
"flash2": "gemini-2.0-flash",
|
||||
"flashlite": "gemini-2.0-flash-lite",
|
||||
"flash-lite": "gemini-2.0-flash-lite",
|
||||
"pro": "gemini-2.5-pro",
|
||||
"gemini-2.0-flash": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.0-flash",
|
||||
friendly_name="Gemini (Flash 2.0)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True, # Experimental thinking mode
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # Vision capability
|
||||
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
max_thinking_tokens=24576, # Same as 2.5 flash for consistency
|
||||
description="Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
|
||||
aliases=["flash-2.0", "flash2"],
|
||||
),
|
||||
"gemini-2.0-flash-lite": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.0-flash-lite",
|
||||
friendly_name="Gemin (Flash Lite 2.0)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False, # Not supported per user request
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=False, # Does not support images
|
||||
max_image_size_mb=0.0, # No image support
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
|
||||
aliases=["flashlite", "flash-lite"],
|
||||
),
|
||||
"gemini-2.5-flash": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.5-flash",
|
||||
friendly_name="Gemini (Flash 2.5)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # Vision capability
|
||||
max_image_size_mb=20.0, # Conservative 20MB limit for reliability
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
max_thinking_tokens=24576, # Flash 2.5 thinking budget limit
|
||||
description="Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
|
||||
aliases=["flash", "flash2.5"],
|
||||
),
|
||||
"gemini-2.5-pro": ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.5-pro",
|
||||
friendly_name="Gemini (Pro 2.5)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # Vision capability
|
||||
max_image_size_mb=32.0, # Higher limit for Pro model
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
max_thinking_tokens=32768, # Max thinking tokens for Pro model
|
||||
description="Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
|
||||
aliases=["pro", "gemini pro", "gemini-pro"],
|
||||
),
|
||||
}
|
||||
|
||||
# Thinking mode configurations - percentages of model's max_thinking_tokens
|
||||
@@ -70,6 +106,14 @@ class GeminiModelProvider(ModelProvider):
|
||||
"max": 1.0, # 100% of max - full thinking budget
|
||||
}
|
||||
|
||||
# Model-specific thinking token limits
|
||||
MAX_THINKING_TOKENS = {
|
||||
"gemini-2.0-flash": 24576, # Same as 2.5 flash for consistency
|
||||
"gemini-2.0-flash-lite": 0, # No thinking support
|
||||
"gemini-2.5-flash": 24576, # Flash 2.5 thinking budget limit
|
||||
"gemini-2.5-pro": 32768, # Pro 2.5 thinking budget limit
|
||||
}
|
||||
|
||||
def __init__(self, api_key: str, **kwargs):
|
||||
"""Initialize Gemini provider with API key."""
|
||||
super().__init__(api_key, **kwargs)
|
||||
@@ -100,25 +144,8 @@ class GeminiModelProvider(ModelProvider):
|
||||
if not restriction_service.is_allowed(ProviderType.GOOGLE, resolved_name, model_name):
|
||||
raise ValueError(f"Gemini model '{resolved_name}' is not allowed by restriction policy.")
|
||||
|
||||
config = self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
# Gemini models support 0.0-2.0 temperature range
|
||||
temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
|
||||
|
||||
return ModelCapabilities(
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name=resolved_name,
|
||||
friendly_name="Gemini",
|
||||
context_window=config["context_window"],
|
||||
supports_extended_thinking=config["supports_extended_thinking"],
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_images=config.get("supports_images", False),
|
||||
max_image_size_mb=config.get("max_image_size_mb", 0.0),
|
||||
supports_temperature=True, # Gemini models accept temperature parameter
|
||||
temperature_constraint=temp_constraint,
|
||||
)
|
||||
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
|
||||
return self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
def generate_content(
|
||||
self,
|
||||
@@ -179,8 +206,8 @@ class GeminiModelProvider(ModelProvider):
|
||||
if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS:
|
||||
# Get model's max thinking tokens and calculate actual budget
|
||||
model_config = self.SUPPORTED_MODELS.get(resolved_name)
|
||||
if model_config and "max_thinking_tokens" in model_config:
|
||||
max_thinking_tokens = model_config["max_thinking_tokens"]
|
||||
if model_config and model_config.max_thinking_tokens > 0:
|
||||
max_thinking_tokens = model_config.max_thinking_tokens
|
||||
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
|
||||
generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)
|
||||
|
||||
@@ -258,7 +285,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
|
||||
# First check if model is supported
|
||||
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
|
||||
if resolved_name not in self.SUPPORTED_MODELS:
|
||||
return False
|
||||
|
||||
# Then check if model is allowed by restrictions
|
||||
@@ -281,78 +308,20 @@ class GeminiModelProvider(ModelProvider):
|
||||
def get_thinking_budget(self, model_name: str, thinking_mode: str) -> int:
|
||||
"""Get actual thinking token budget for a model and thinking mode."""
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
model_config = self.SUPPORTED_MODELS.get(resolved_name, {})
|
||||
model_config = self.SUPPORTED_MODELS.get(resolved_name)
|
||||
|
||||
if not model_config.get("supports_extended_thinking", False):
|
||||
if not model_config or not model_config.supports_extended_thinking:
|
||||
return 0
|
||||
|
||||
if thinking_mode not in self.THINKING_BUDGETS:
|
||||
return 0
|
||||
|
||||
max_thinking_tokens = model_config.get("max_thinking_tokens", 0)
|
||||
max_thinking_tokens = model_config.max_thinking_tokens
|
||||
if max_thinking_tokens == 0:
|
||||
return 0
|
||||
|
||||
return int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
|
||||
|
||||
def list_models(self, respect_restrictions: bool = True) -> list[str]:
|
||||
"""Return a list of model names supported by this provider.
|
||||
|
||||
Args:
|
||||
respect_restrictions: Whether to apply provider-specific restriction logic.
|
||||
|
||||
Returns:
|
||||
List of model names available from this provider
|
||||
"""
|
||||
from utils.model_restrictions import get_restriction_service
|
||||
|
||||
restriction_service = get_restriction_service() if respect_restrictions else None
|
||||
models = []
|
||||
|
||||
for model_name, config in self.SUPPORTED_MODELS.items():
|
||||
# Handle both base models (dict configs) and aliases (string values)
|
||||
if isinstance(config, str):
|
||||
# This is an alias - check if the target model would be allowed
|
||||
target_model = config
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
|
||||
continue
|
||||
# Allow the alias
|
||||
models.append(model_name)
|
||||
else:
|
||||
# This is a base model with config dict
|
||||
# Check restrictions if enabled
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
continue
|
||||
models.append(model_name)
|
||||
|
||||
return models
|
||||
|
||||
def list_all_known_models(self) -> list[str]:
|
||||
"""Return all model names known by this provider, including alias targets.
|
||||
|
||||
Returns:
|
||||
List of all model names and alias targets known by this provider
|
||||
"""
|
||||
all_models = set()
|
||||
|
||||
for model_name, config in self.SUPPORTED_MODELS.items():
|
||||
# Add the model name itself
|
||||
all_models.add(model_name.lower())
|
||||
|
||||
# If it's an alias (string value), add the target model too
|
||||
if isinstance(config, str):
|
||||
all_models.add(config.lower())
|
||||
|
||||
return list(all_models)
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model shorthand to full name."""
|
||||
# Check if it's a shorthand
|
||||
shorthand_value = self.SUPPORTED_MODELS.get(model_name.lower())
|
||||
if isinstance(shorthand_value, str):
|
||||
return shorthand_value
|
||||
return model_name
|
||||
|
||||
def _extract_usage(self, response) -> dict[str, int]:
|
||||
"""Extract token usage from Gemini response."""
|
||||
usage = {}
|
||||
|
||||
@@ -686,7 +686,6 @@ class OpenAICompatibleProvider(ModelProvider):
|
||||
"o3-mini",
|
||||
"o3-pro",
|
||||
"o4-mini",
|
||||
"o4-mini-high",
|
||||
# Note: Claude models would be handled by a separate provider
|
||||
}
|
||||
supports = model_name.lower() in vision_models
|
||||
|
||||
@@ -17,71 +17,98 @@ logger = logging.getLogger(__name__)
|
||||
class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
"""Official OpenAI API provider (api.openai.com)."""
|
||||
|
||||
# Model configurations
|
||||
# Model configurations using ModelCapabilities objects
|
||||
SUPPORTED_MODELS = {
|
||||
"o3": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O3 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
"description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
|
||||
},
|
||||
"o3-mini": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O3 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
"description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
|
||||
},
|
||||
"o3-pro-2025-06-10": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O3 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
"description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
|
||||
},
|
||||
# Aliases
|
||||
"o3-pro": "o3-pro-2025-06-10",
|
||||
"o4-mini": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O4 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O4 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
"description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
|
||||
},
|
||||
"o4-mini-high": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O4 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O4 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
"description": "Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks",
|
||||
},
|
||||
"gpt-4.1-2025-04-14": {
|
||||
"context_window": 1_000_000, # 1M tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # GPT-4.1 supports vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": True, # Regular models accept temperature parameter
|
||||
"temperature_constraint": "range", # 0.0-2.0 range
|
||||
"description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window",
|
||||
},
|
||||
# Shorthands
|
||||
"mini": "o4-mini", # Default 'mini' to latest mini model
|
||||
"o3mini": "o3-mini",
|
||||
"o4mini": "o4-mini",
|
||||
"o4minihigh": "o4-mini-high",
|
||||
"o4minihi": "o4-mini-high",
|
||||
"gpt4.1": "gpt-4.1-2025-04-14",
|
||||
"o3": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3",
|
||||
friendly_name="OpenAI (O3)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # O3 models support vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=False, # O3 models don't accept temperature parameter
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
|
||||
aliases=[],
|
||||
),
|
||||
"o3-mini": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3-mini",
|
||||
friendly_name="OpenAI (O3-mini)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # O3 models support vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=False, # O3 models don't accept temperature parameter
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
|
||||
aliases=["o3mini", "o3-mini"],
|
||||
),
|
||||
"o3-pro-2025-06-10": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3-pro-2025-06-10",
|
||||
friendly_name="OpenAI (O3-Pro)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # O3 models support vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=False, # O3 models don't accept temperature parameter
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
|
||||
aliases=["o3-pro"],
|
||||
),
|
||||
"o4-mini": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o4-mini",
|
||||
friendly_name="OpenAI (O4-mini)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # O4 models support vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=False, # O4 models don't accept temperature parameter
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
|
||||
aliases=["mini", "o4mini", "o4-mini"],
|
||||
),
|
||||
"gpt-4.1-2025-04-14": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-4.1-2025-04-14",
|
||||
friendly_name="OpenAI (GPT 4.1)",
|
||||
context_window=1_000_000, # 1M tokens
|
||||
max_output_tokens=32_768,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # GPT-4.1 supports vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=True, # Regular models accept temperature parameter
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="GPT-4.1 (1M context) - Advanced reasoning model with large context window",
|
||||
aliases=["gpt4.1"],
|
||||
),
|
||||
}
|
||||
|
||||
def __init__(self, api_key: str, **kwargs):
|
||||
@@ -95,7 +122,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
# Resolve shorthand
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
|
||||
if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str):
|
||||
if resolved_name not in self.SUPPORTED_MODELS:
|
||||
raise ValueError(f"Unsupported OpenAI model: {model_name}")
|
||||
|
||||
# Check if model is allowed by restrictions
|
||||
@@ -105,27 +132,8 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
if not restriction_service.is_allowed(ProviderType.OPENAI, resolved_name, model_name):
|
||||
raise ValueError(f"OpenAI model '{model_name}' is not allowed by restriction policy.")
|
||||
|
||||
config = self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
# Get temperature constraints and support from configuration
|
||||
supports_temperature = config.get("supports_temperature", True) # Default to True for backward compatibility
|
||||
temp_constraint_type = config.get("temperature_constraint", "range") # Default to range
|
||||
temp_constraint = create_temperature_constraint(temp_constraint_type)
|
||||
|
||||
return ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name=model_name,
|
||||
friendly_name="OpenAI",
|
||||
context_window=config["context_window"],
|
||||
supports_extended_thinking=config["supports_extended_thinking"],
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_images=config.get("supports_images", False),
|
||||
max_image_size_mb=config.get("max_image_size_mb", 0.0),
|
||||
supports_temperature=supports_temperature,
|
||||
temperature_constraint=temp_constraint,
|
||||
)
|
||||
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
|
||||
return self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
def get_provider_type(self) -> ProviderType:
|
||||
"""Get the provider type."""
|
||||
@@ -136,7 +144,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
|
||||
# First check if model is supported
|
||||
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
|
||||
if resolved_name not in self.SUPPORTED_MODELS:
|
||||
return False
|
||||
|
||||
# Then check if model is allowed by restrictions
|
||||
@@ -177,61 +185,3 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
# Currently no OpenAI models support extended thinking
|
||||
# This may change with future O3 models
|
||||
return False
|
||||
|
||||
def list_models(self, respect_restrictions: bool = True) -> list[str]:
|
||||
"""Return a list of model names supported by this provider.
|
||||
|
||||
Args:
|
||||
respect_restrictions: Whether to apply provider-specific restriction logic.
|
||||
|
||||
Returns:
|
||||
List of model names available from this provider
|
||||
"""
|
||||
from utils.model_restrictions import get_restriction_service
|
||||
|
||||
restriction_service = get_restriction_service() if respect_restrictions else None
|
||||
models = []
|
||||
|
||||
for model_name, config in self.SUPPORTED_MODELS.items():
|
||||
# Handle both base models (dict configs) and aliases (string values)
|
||||
if isinstance(config, str):
|
||||
# This is an alias - check if the target model would be allowed
|
||||
target_model = config
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
|
||||
continue
|
||||
# Allow the alias
|
||||
models.append(model_name)
|
||||
else:
|
||||
# This is a base model with config dict
|
||||
# Check restrictions if enabled
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
continue
|
||||
models.append(model_name)
|
||||
|
||||
return models
|
||||
|
||||
def list_all_known_models(self) -> list[str]:
|
||||
"""Return all model names known by this provider, including alias targets.
|
||||
|
||||
Returns:
|
||||
List of all model names and alias targets known by this provider
|
||||
"""
|
||||
all_models = set()
|
||||
|
||||
for model_name, config in self.SUPPORTED_MODELS.items():
|
||||
# Add the model name itself
|
||||
all_models.add(model_name.lower())
|
||||
|
||||
# If it's an alias (string value), add the target model too
|
||||
if isinstance(config, str):
|
||||
all_models.add(config.lower())
|
||||
|
||||
return list(all_models)
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model shorthand to full name."""
|
||||
# Check if it's a shorthand
|
||||
shorthand_value = self.SUPPORTED_MODELS.get(model_name)
|
||||
if isinstance(shorthand_value, str):
|
||||
return shorthand_value
|
||||
return model_name
|
||||
|
||||
@@ -50,14 +50,6 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
||||
aliases = self._registry.list_aliases()
|
||||
logging.info(f"OpenRouter loaded {len(models)} models with {len(aliases)} aliases")
|
||||
|
||||
def _parse_allowed_models(self) -> None:
|
||||
"""Override to disable environment-based allow-list.
|
||||
|
||||
OpenRouter model access is controlled via the OpenRouter dashboard,
|
||||
not through environment variables.
|
||||
"""
|
||||
return None
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model aliases to OpenRouter model names.
|
||||
|
||||
@@ -109,6 +101,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
||||
model_name=resolved_name,
|
||||
friendly_name=self.FRIENDLY_NAME,
|
||||
context_window=32_768, # Conservative default context window
|
||||
max_output_tokens=32_768,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -130,16 +123,34 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
||||
|
||||
As the catch-all provider, OpenRouter accepts any model name that wasn't
|
||||
handled by higher-priority providers. OpenRouter will validate based on
|
||||
the API key's permissions.
|
||||
the API key's permissions and local restrictions.
|
||||
|
||||
Args:
|
||||
model_name: Model name to validate
|
||||
|
||||
Returns:
|
||||
Always True - OpenRouter is the catch-all provider
|
||||
True if model is allowed, False if restricted
|
||||
"""
|
||||
# Accept any model name - OpenRouter is the fallback provider
|
||||
# Higher priority providers (native APIs, custom endpoints) get first chance
|
||||
# Check model restrictions if configured
|
||||
from utils.model_restrictions import get_restriction_service
|
||||
|
||||
restriction_service = get_restriction_service()
|
||||
if restriction_service:
|
||||
# Check if model name itself is allowed
|
||||
if restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
return True
|
||||
|
||||
# Also check aliases - model_name might be an alias
|
||||
model_config = self._registry.resolve(model_name)
|
||||
if model_config and model_config.aliases:
|
||||
for alias in model_config.aliases:
|
||||
if restriction_service.is_allowed(self.get_provider_type(), alias):
|
||||
return True
|
||||
|
||||
# If restrictions are configured and model/alias not in allowed list, reject
|
||||
return False
|
||||
|
||||
# No restrictions configured - accept any model name as the fallback provider
|
||||
return True
|
||||
|
||||
def generate_content(
|
||||
@@ -260,3 +271,35 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
||||
all_models.add(config.model_name.lower())
|
||||
|
||||
return list(all_models)
|
||||
|
||||
def get_model_configurations(self) -> dict[str, ModelCapabilities]:
|
||||
"""Get model configurations from the registry.
|
||||
|
||||
For OpenRouter, we convert registry configurations to ModelCapabilities objects.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping model names to their ModelCapabilities objects
|
||||
"""
|
||||
configs = {}
|
||||
|
||||
if self._registry:
|
||||
# Get all models from registry
|
||||
for model_name in self._registry.list_models():
|
||||
# Only include models that this provider validates
|
||||
if self.validate_model_name(model_name):
|
||||
config = self._registry.resolve(model_name)
|
||||
if config and not config.is_custom: # Only OpenRouter models, not custom ones
|
||||
# Use ModelCapabilities directly from registry
|
||||
configs[model_name] = config
|
||||
|
||||
return configs
|
||||
|
||||
def get_all_model_aliases(self) -> dict[str, list[str]]:
|
||||
"""Get all model aliases from the registry.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping model names to their list of aliases
|
||||
"""
|
||||
# Since aliases are now included in the configurations,
|
||||
# we can use the base class implementation
|
||||
return super().get_all_model_aliases()
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -11,58 +10,10 @@ from utils.file_utils import read_json_file
|
||||
from .base import (
|
||||
ModelCapabilities,
|
||||
ProviderType,
|
||||
TemperatureConstraint,
|
||||
create_temperature_constraint,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenRouterModelConfig:
|
||||
"""Configuration for an OpenRouter model."""
|
||||
|
||||
model_name: str
|
||||
aliases: list[str] = field(default_factory=list)
|
||||
context_window: int = 32768 # Total context window size in tokens
|
||||
supports_extended_thinking: bool = False
|
||||
supports_system_prompts: bool = True
|
||||
supports_streaming: bool = True
|
||||
supports_function_calling: bool = False
|
||||
supports_json_mode: bool = False
|
||||
supports_images: bool = False # Whether model can process images
|
||||
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
|
||||
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
|
||||
temperature_constraint: Optional[str] = (
|
||||
None # Type of temperature constraint: "fixed", "range", "discrete", or None for default range
|
||||
)
|
||||
is_custom: bool = False # True for models that should only be used with custom endpoints
|
||||
description: str = ""
|
||||
|
||||
def _create_temperature_constraint(self) -> TemperatureConstraint:
|
||||
"""Create temperature constraint object from configuration.
|
||||
|
||||
Returns:
|
||||
TemperatureConstraint object based on configuration
|
||||
"""
|
||||
return create_temperature_constraint(self.temperature_constraint or "range")
|
||||
|
||||
def to_capabilities(self) -> ModelCapabilities:
|
||||
"""Convert to ModelCapabilities object."""
|
||||
return ModelCapabilities(
|
||||
provider=ProviderType.OPENROUTER,
|
||||
model_name=self.model_name,
|
||||
friendly_name="OpenRouter",
|
||||
context_window=self.context_window,
|
||||
supports_extended_thinking=self.supports_extended_thinking,
|
||||
supports_system_prompts=self.supports_system_prompts,
|
||||
supports_streaming=self.supports_streaming,
|
||||
supports_function_calling=self.supports_function_calling,
|
||||
supports_images=self.supports_images,
|
||||
max_image_size_mb=self.max_image_size_mb,
|
||||
supports_temperature=self.supports_temperature,
|
||||
temperature_constraint=self._create_temperature_constraint(),
|
||||
)
|
||||
|
||||
|
||||
class OpenRouterModelRegistry:
|
||||
"""Registry for managing OpenRouter model configurations and aliases."""
|
||||
|
||||
@@ -73,7 +24,7 @@ class OpenRouterModelRegistry:
|
||||
config_path: Path to config file. If None, uses default locations.
|
||||
"""
|
||||
self.alias_map: dict[str, str] = {} # alias -> model_name
|
||||
self.model_map: dict[str, OpenRouterModelConfig] = {} # model_name -> config
|
||||
self.model_map: dict[str, ModelCapabilities] = {} # model_name -> config
|
||||
|
||||
# Determine config path
|
||||
if config_path:
|
||||
@@ -139,7 +90,7 @@ class OpenRouterModelRegistry:
|
||||
self.alias_map = {}
|
||||
self.model_map = {}
|
||||
|
||||
def _read_config(self) -> list[OpenRouterModelConfig]:
|
||||
def _read_config(self) -> list[ModelCapabilities]:
|
||||
"""Read configuration from file.
|
||||
|
||||
Returns:
|
||||
@@ -158,7 +109,27 @@ class OpenRouterModelRegistry:
|
||||
# Parse models
|
||||
configs = []
|
||||
for model_data in data.get("models", []):
|
||||
config = OpenRouterModelConfig(**model_data)
|
||||
# Create ModelCapabilities directly from JSON data
|
||||
# Handle temperature_constraint conversion
|
||||
temp_constraint_str = model_data.get("temperature_constraint")
|
||||
temp_constraint = create_temperature_constraint(temp_constraint_str or "range")
|
||||
|
||||
# Set provider-specific defaults based on is_custom flag
|
||||
is_custom = model_data.get("is_custom", False)
|
||||
if is_custom:
|
||||
model_data.setdefault("provider", ProviderType.CUSTOM)
|
||||
model_data.setdefault("friendly_name", f"Custom ({model_data.get('model_name', 'Unknown')})")
|
||||
else:
|
||||
model_data.setdefault("provider", ProviderType.OPENROUTER)
|
||||
model_data.setdefault("friendly_name", f"OpenRouter ({model_data.get('model_name', 'Unknown')})")
|
||||
model_data["temperature_constraint"] = temp_constraint
|
||||
|
||||
# Remove the string version of temperature_constraint before creating ModelCapabilities
|
||||
if "temperature_constraint" in model_data and isinstance(model_data["temperature_constraint"], str):
|
||||
del model_data["temperature_constraint"]
|
||||
model_data["temperature_constraint"] = temp_constraint
|
||||
|
||||
config = ModelCapabilities(**model_data)
|
||||
configs.append(config)
|
||||
|
||||
return configs
|
||||
@@ -168,7 +139,7 @@ class OpenRouterModelRegistry:
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading config from {self.config_path}: {e}")
|
||||
|
||||
def _build_maps(self, configs: list[OpenRouterModelConfig]) -> None:
|
||||
def _build_maps(self, configs: list[ModelCapabilities]) -> None:
|
||||
"""Build alias and model maps from configurations.
|
||||
|
||||
Args:
|
||||
@@ -211,7 +182,7 @@ class OpenRouterModelRegistry:
|
||||
self.alias_map = alias_map
|
||||
self.model_map = model_map
|
||||
|
||||
def resolve(self, name_or_alias: str) -> Optional[OpenRouterModelConfig]:
|
||||
def resolve(self, name_or_alias: str) -> Optional[ModelCapabilities]:
|
||||
"""Resolve a model name or alias to configuration.
|
||||
|
||||
Args:
|
||||
@@ -237,10 +208,8 @@ class OpenRouterModelRegistry:
|
||||
Returns:
|
||||
ModelCapabilities if found, None otherwise
|
||||
"""
|
||||
config = self.resolve(name_or_alias)
|
||||
if config:
|
||||
return config.to_capabilities()
|
||||
return None
|
||||
# Registry now returns ModelCapabilities directly
|
||||
return self.resolve(name_or_alias)
|
||||
|
||||
def list_models(self) -> list[str]:
|
||||
"""List all available model names."""
|
||||
|
||||
@@ -24,8 +24,6 @@ class ModelProviderRegistry:
|
||||
cls._instance._providers = {}
|
||||
cls._instance._initialized_providers = {}
|
||||
logging.debug(f"REGISTRY: Created instance {cls._instance}")
|
||||
else:
|
||||
logging.debug(f"REGISTRY: Returning existing instance {cls._instance}")
|
||||
return cls._instance
|
||||
|
||||
@classmethod
|
||||
@@ -129,7 +127,6 @@ class ModelProviderRegistry:
|
||||
logging.debug(f"Available providers in registry: {list(instance._providers.keys())}")
|
||||
|
||||
for provider_type in PROVIDER_PRIORITY_ORDER:
|
||||
logging.debug(f"Checking provider_type: {provider_type}")
|
||||
if provider_type in instance._providers:
|
||||
logging.debug(f"Found {provider_type} in registry")
|
||||
# Get or create provider instance
|
||||
|
||||
136
providers/xai.py
136
providers/xai.py
@@ -7,7 +7,7 @@ from .base import (
|
||||
ModelCapabilities,
|
||||
ModelResponse,
|
||||
ProviderType,
|
||||
RangeTemperatureConstraint,
|
||||
create_temperature_constraint,
|
||||
)
|
||||
from .openai_compatible import OpenAICompatibleProvider
|
||||
|
||||
@@ -19,23 +19,44 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
|
||||
FRIENDLY_NAME = "X.AI"
|
||||
|
||||
# Model configurations
|
||||
# Model configurations using ModelCapabilities objects
|
||||
SUPPORTED_MODELS = {
|
||||
"grok-3": {
|
||||
"context_window": 131_072, # 131K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
|
||||
},
|
||||
"grok-3-fast": {
|
||||
"context_window": 131_072, # 131K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
|
||||
},
|
||||
# Shorthands for convenience
|
||||
"grok": "grok-3", # Default to grok-3
|
||||
"grok3": "grok-3",
|
||||
"grok3fast": "grok-3-fast",
|
||||
"grokfast": "grok-3-fast",
|
||||
"grok-3": ModelCapabilities(
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-3",
|
||||
friendly_name="X.AI (Grok 3)",
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
|
||||
supports_images=False, # Assuming GROK is text-only for now
|
||||
max_image_size_mb=0.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
|
||||
aliases=["grok", "grok3"],
|
||||
),
|
||||
"grok-3-fast": ModelCapabilities(
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-3-fast",
|
||||
friendly_name="X.AI (Grok 3 Fast)",
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=False, # Assuming GROK doesn't have JSON mode yet
|
||||
supports_images=False, # Assuming GROK is text-only for now
|
||||
max_image_size_mb=0.0,
|
||||
supports_temperature=True,
|
||||
temperature_constraint=create_temperature_constraint("range"),
|
||||
description="GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
|
||||
aliases=["grok3fast", "grokfast", "grok3-fast"],
|
||||
),
|
||||
}
|
||||
|
||||
def __init__(self, api_key: str, **kwargs):
|
||||
@@ -49,7 +70,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
# Resolve shorthand
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
|
||||
if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str):
|
||||
if resolved_name not in self.SUPPORTED_MODELS:
|
||||
raise ValueError(f"Unsupported X.AI model: {model_name}")
|
||||
|
||||
# Check if model is allowed by restrictions
|
||||
@@ -59,23 +80,8 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
if not restriction_service.is_allowed(ProviderType.XAI, resolved_name, model_name):
|
||||
raise ValueError(f"X.AI model '{model_name}' is not allowed by restriction policy.")
|
||||
|
||||
config = self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
# Define temperature constraints for GROK models
|
||||
# GROK supports the standard OpenAI temperature range
|
||||
temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
|
||||
|
||||
return ModelCapabilities(
|
||||
provider=ProviderType.XAI,
|
||||
model_name=resolved_name,
|
||||
friendly_name=self.FRIENDLY_NAME,
|
||||
context_window=config["context_window"],
|
||||
supports_extended_thinking=config["supports_extended_thinking"],
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
temperature_constraint=temp_constraint,
|
||||
)
|
||||
# Return the ModelCapabilities object directly from SUPPORTED_MODELS
|
||||
return self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
def get_provider_type(self) -> ProviderType:
|
||||
"""Get the provider type."""
|
||||
@@ -86,7 +92,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
resolved_name = self._resolve_model_name(model_name)
|
||||
|
||||
# First check if model is supported
|
||||
if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
|
||||
if resolved_name not in self.SUPPORTED_MODELS:
|
||||
return False
|
||||
|
||||
# Then check if model is allowed by restrictions
|
||||
@@ -127,61 +133,3 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
# Currently GROK models do not support extended thinking
|
||||
# This may change with future GROK model releases
|
||||
return False
|
||||
|
||||
def list_models(self, respect_restrictions: bool = True) -> list[str]:
|
||||
"""Return a list of model names supported by this provider.
|
||||
|
||||
Args:
|
||||
respect_restrictions: Whether to apply provider-specific restriction logic.
|
||||
|
||||
Returns:
|
||||
List of model names available from this provider
|
||||
"""
|
||||
from utils.model_restrictions import get_restriction_service
|
||||
|
||||
restriction_service = get_restriction_service() if respect_restrictions else None
|
||||
models = []
|
||||
|
||||
for model_name, config in self.SUPPORTED_MODELS.items():
|
||||
# Handle both base models (dict configs) and aliases (string values)
|
||||
if isinstance(config, str):
|
||||
# This is an alias - check if the target model would be allowed
|
||||
target_model = config
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
|
||||
continue
|
||||
# Allow the alias
|
||||
models.append(model_name)
|
||||
else:
|
||||
# This is a base model with config dict
|
||||
# Check restrictions if enabled
|
||||
if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
|
||||
continue
|
||||
models.append(model_name)
|
||||
|
||||
return models
|
||||
|
||||
def list_all_known_models(self) -> list[str]:
|
||||
"""Return all model names known by this provider, including alias targets.
|
||||
|
||||
Returns:
|
||||
List of all model names and alias targets known by this provider
|
||||
"""
|
||||
all_models = set()
|
||||
|
||||
for model_name, config in self.SUPPORTED_MODELS.items():
|
||||
# Add the model name itself
|
||||
all_models.add(model_name.lower())
|
||||
|
||||
# If it's an alias (string value), add the target model too
|
||||
if isinstance(config, str):
|
||||
all_models.add(config.lower())
|
||||
|
||||
return list(all_models)
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model shorthand to full name."""
|
||||
# Check if it's a shorthand
|
||||
shorthand_value = self.SUPPORTED_MODELS.get(model_name)
|
||||
if isinstance(shorthand_value, str):
|
||||
return shorthand_value
|
||||
return model_name
|
||||
|
||||
Reference in New Issue
Block a user