Merge branch 'feat-local_support_with_UTF-8_encoding-update' of https://github.com/GiGiDKR/zen-mcp-server into feat-local_support_with_UTF-8_encoding-update

2025-06-23 22:24:47 +02:00
parent 7e5f95531b a355b80afc
commit 1fd48f034f
57 changed files with 1589 additions and 863 deletions
--- a/providers/base.py
+++ b/providers/base.py
@@ -132,6 +132,7 @@ class ModelCapabilities:
    model_name: str
    friendly_name: str  # Human-friendly name like "Gemini" or "OpenAI"
    context_window: int  # Total context window size in tokens
+    max_output_tokens: int  # Maximum output tokens per request
    supports_extended_thinking: bool = False
    supports_system_prompts: bool = True
    supports_streaming: bool = True
@@ -140,6 +141,19 @@ class ModelCapabilities:
    max_image_size_mb: float = 0.0  # Maximum total size for all images in MB
    supports_temperature: bool = True  # Whether model accepts temperature parameter in API calls

+    # Additional fields for comprehensive model information
+    description: str = ""  # Human-readable description of the model
+    aliases: list[str] = field(default_factory=list)  # Alternative names/shortcuts for the model
+
+    # JSON mode support (for providers that support structured output)
+    supports_json_mode: bool = False
+
+    # Thinking mode support (for models with thinking capabilities)
+    max_thinking_tokens: int = 0  # Maximum thinking tokens for extended reasoning models
+
+    # Custom model flag (for models that only work with custom endpoints)
+    is_custom: bool = False  # Whether this model requires custom API endpoints
+
    # Temperature constraint object - preferred way to define temperature limits
    temperature_constraint: TemperatureConstraint = field(
        default_factory=lambda: RangeTemperatureConstraint(0.0, 2.0, 0.7)
@@ -251,7 +265,7 @@ class ModelProvider(ABC):
            capabilities = self.get_capabilities(model_name)

            # Check if model supports temperature at all
-            if hasattr(capabilities, "supports_temperature") and not capabilities.supports_temperature:
+            if not capabilities.supports_temperature:
                return None

            # Get temperature range
@@ -290,19 +304,109 @@ class ModelProvider(ABC):
        """Check if the model supports extended thinking mode."""
        pass

-    @abstractmethod
+    def get_model_configurations(self) -> dict[str, ModelCapabilities]:
+        """Get model configurations for this provider.
+
+        This is a hook method that subclasses can override to provide
+        their model configurations from different sources.
+
+        Returns:
+            Dictionary mapping model names to their ModelCapabilities objects
+        """
+        # Return SUPPORTED_MODELS if it exists (must contain ModelCapabilities objects)
+        if hasattr(self, "SUPPORTED_MODELS"):
+            return {k: v for k, v in self.SUPPORTED_MODELS.items() if isinstance(v, ModelCapabilities)}
+        return {}
+
+    def get_all_model_aliases(self) -> dict[str, list[str]]:
+        """Get all model aliases for this provider.
+
+        This is a hook method that subclasses can override to provide
+        aliases from different sources.
+
+        Returns:
+            Dictionary mapping model names to their list of aliases
+        """
+        # Default implementation extracts from ModelCapabilities objects
+        aliases = {}
+        for model_name, capabilities in self.get_model_configurations().items():
+            if capabilities.aliases:
+                aliases[model_name] = capabilities.aliases
+        return aliases
+
+    def _resolve_model_name(self, model_name: str) -> str:
+        """Resolve model shorthand to full name.
+
+        This implementation uses the hook methods to support different
+        model configuration sources.
+
+        Args:
+            model_name: Model name that may be an alias
+
+        Returns:
+            Resolved model name
+        """
+        # Get model configurations from the hook method
+        model_configs = self.get_model_configurations()
+
+        # First check if it's already a base model name (case-sensitive exact match)
+        if model_name in model_configs:
+            return model_name
+
+        # Check case-insensitively for both base models and aliases
+        model_name_lower = model_name.lower()
+
+        # Check base model names case-insensitively
+        for base_model in model_configs:
+            if base_model.lower() == model_name_lower:
+                return base_model
+
+        # Check aliases from the hook method
+        all_aliases = self.get_all_model_aliases()
+        for base_model, aliases in all_aliases.items():
+            if any(alias.lower() == model_name_lower for alias in aliases):
+                return base_model
+
+        # If not found, return as-is
+        return model_name
+
    def list_models(self, respect_restrictions: bool = True) -> list[str]:
        """Return a list of model names supported by this provider.

+        This implementation uses the get_model_configurations() hook
+        to support different model configuration sources.
+
        Args:
            respect_restrictions: Whether to apply provider-specific restriction logic.

        Returns:
            List of model names available from this provider
        """
-        pass
+        from utils.model_restrictions import get_restriction_service
+
+        restriction_service = get_restriction_service() if respect_restrictions else None
+        models = []
+
+        # Get model configurations from the hook method
+        model_configs = self.get_model_configurations()
+
+        for model_name in model_configs:
+            # Check restrictions if enabled
+            if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
+                continue
+
+            # Add the base model
+            models.append(model_name)
+
+        # Get aliases from the hook method
+        all_aliases = self.get_all_model_aliases()
+        for model_name, aliases in all_aliases.items():
+            # Only add aliases for models that passed restriction check
+            if model_name in models:
+                models.extend(aliases)
+
+        return models

-    @abstractmethod
    def list_all_known_models(self) -> list[str]:
        """Return all model names known by this provider, including alias targets.

@@ -312,21 +416,22 @@ class ModelProvider(ABC):
        Returns:
            List of all model names and alias targets known by this provider
        """
-        pass
+        all_models = set()

-    def _resolve_model_name(self, model_name: str) -> str:
-        """Resolve model shorthand to full name.
+        # Get model configurations from the hook method
+        model_configs = self.get_model_configurations()

-        Base implementation returns the model name unchanged.
-        Subclasses should override to provide alias resolution.
+        # Add all base model names
+        for model_name in model_configs:
+            all_models.add(model_name.lower())

-        Args:
-            model_name: Model name that may be an alias
+        # Get aliases from the hook method and add them
+        all_aliases = self.get_all_model_aliases()
+        for _model_name, aliases in all_aliases.items():
+            for alias in aliases:
+                all_models.add(alias.lower())

-        Returns:
-            Resolved model name
-        """
-        return model_name
+        return list(all_models)

    def close(self):
        """Clean up any resources held by the provider.
--- a/providers/custom.py
+++ b/providers/custom.py
@@ -158,6 +158,7 @@ class CustomProvider(OpenAICompatibleProvider):
                model_name=resolved_name,
                friendly_name=f"{self.FRIENDLY_NAME} ({resolved_name})",
                context_window=32_768,  # Conservative default
+                max_output_tokens=32_768,  # Conservative default max output
                supports_extended_thinking=False,  # Most custom models don't support this
                supports_system_prompts=True,
                supports_streaming=True,
@@ -187,7 +188,7 @@ class CustomProvider(OpenAICompatibleProvider):
        Returns:
            True if model is intended for custom/local endpoint
        """
-        logging.debug(f"Custom provider validating model: '{model_name}'")
+        # logging.debug(f"Custom provider validating model: '{model_name}'")

        # Try to resolve through registry first
        config = self._registry.resolve(model_name)
@@ -195,12 +196,12 @@ class CustomProvider(OpenAICompatibleProvider):
            model_id = config.model_name
            # Use explicit is_custom flag for clean validation
            if config.is_custom:
-                logging.debug(f"Model '{model_name}' -> '{model_id}' validated via registry (custom model)")
+                logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' validated via registry")
                return True
            else:
                # This is a cloud/OpenRouter model - CustomProvider should NOT handle these
                # Let OpenRouter provider handle them instead
-                logging.debug(f"Model '{model_name}' -> '{model_id}' rejected (cloud model, defer to OpenRouter)")
+                # logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' not custom (defer to OpenRouter)")
                return False

        # Handle version tags for unknown models (e.g., "my-model:latest")
@@ -268,65 +269,50 @@ class CustomProvider(OpenAICompatibleProvider):
    def supports_thinking_mode(self, model_name: str) -> bool:
        """Check if the model supports extended thinking mode.

-        Most custom/local models don't support extended thinking.
-
        Args:
            model_name: Model to check

        Returns:
-            False (custom models generally don't support thinking mode)
+            True if model supports thinking mode, False otherwise
        """
+        # Check if model is in registry
+        config = self._registry.resolve(model_name) if self._registry else None
+        if config and config.is_custom:
+            # Trust the config from custom_models.json
+            return config.supports_extended_thinking
+
+        # Default to False for unknown models
        return False

-    def list_models(self, respect_restrictions: bool = True) -> list[str]:
-        """Return a list of model names supported by this provider.
+    def get_model_configurations(self) -> dict[str, ModelCapabilities]:
+        """Get model configurations from the registry.

-        Args:
-            respect_restrictions: Whether to apply provider-specific restriction logic.
+        For CustomProvider, we convert registry configurations to ModelCapabilities objects.

        Returns:
-            List of model names available from this provider
+            Dictionary mapping model names to their ModelCapabilities objects
        """
-        from utils.model_restrictions import get_restriction_service

-        restriction_service = get_restriction_service() if respect_restrictions else None
-        models = []
+        configs = {}

        if self._registry:
-            # Get all models from the registry
-            all_models = self._registry.list_models()
-            aliases = self._registry.list_aliases()
-
-            # Add models that are validated by the custom provider
-            for model_name in all_models + aliases:
-                # Use the provider's validation logic to determine if this model
-                # is appropriate for the custom endpoint
+            # Get all models from registry
+            for model_name in self._registry.list_models():
+                # Only include custom models that this provider validates
                if self.validate_model_name(model_name):
-                    # Check restrictions if enabled
-                    if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
-                        continue
+                    config = self._registry.resolve(model_name)
+                    if config and config.is_custom:
+                        # Use ModelCapabilities directly from registry
+                        configs[model_name] = config

-                    models.append(model_name)
+        return configs

-        return models
-
-    def list_all_known_models(self) -> list[str]:
-        """Return all model names known by this provider, including alias targets.
+    def get_all_model_aliases(self) -> dict[str, list[str]]:
+        """Get all model aliases from the registry.

        Returns:
-            List of all model names and alias targets known by this provider
+            Dictionary mapping model names to their list of aliases
        """
-        all_models = set()
-
-        if self._registry:
-            # Get all models and aliases from the registry
-            all_models.update(model.lower() for model in self._registry.list_models())
-            all_models.update(alias.lower() for alias in self._registry.list_aliases())
-
-            # For each alias, also add its target
-            for alias in self._registry.list_aliases():
-                config = self._registry.resolve(alias)
-                if config:
-                    all_models.add(config.model_name.lower())
-
-        return list(all_models)
+        # Since aliases are now included in the configurations,
+        # we can use the base class implementation
+        return super().get_all_model_aliases()
--- a/providers/dial.py
+++ b/providers/dial.py
@@ -10,7 +10,7 @@ from .base import (
    ModelCapabilities,
    ModelResponse,
    ProviderType,
-    RangeTemperatureConstraint,
+    create_temperature_constraint,
 )
 from .openai_compatible import OpenAICompatibleProvider

@@ -30,63 +30,170 @@ class DIALModelProvider(OpenAICompatibleProvider):
    MAX_RETRIES = 4
    RETRY_DELAYS = [1, 3, 5, 8]  # seconds

-    # Supported DIAL models (these can be customized based on your DIAL deployment)
+    # Model configurations using ModelCapabilities objects
    SUPPORTED_MODELS = {
-        "o3-2025-04-16": {
-            "context_window": 200_000,
-            "supports_extended_thinking": False,
-            "supports_vision": True,
-        },
-        "o4-mini-2025-04-16": {
-            "context_window": 200_000,
-            "supports_extended_thinking": False,
-            "supports_vision": True,
-        },
-        "anthropic.claude-sonnet-4-20250514-v1:0": {
-            "context_window": 200_000,
-            "supports_extended_thinking": False,
-            "supports_vision": True,
-        },
-        "anthropic.claude-sonnet-4-20250514-v1:0-with-thinking": {
-            "context_window": 200_000,
-            "supports_extended_thinking": True,  # Thinking mode variant
-            "supports_vision": True,
-        },
-        "anthropic.claude-opus-4-20250514-v1:0": {
-            "context_window": 200_000,
-            "supports_extended_thinking": False,
-            "supports_vision": True,
-        },
-        "anthropic.claude-opus-4-20250514-v1:0-with-thinking": {
-            "context_window": 200_000,
-            "supports_extended_thinking": True,  # Thinking mode variant
-            "supports_vision": True,
-        },
-        "gemini-2.5-pro-preview-03-25-google-search": {
-            "context_window": 1_000_000,
-            "supports_extended_thinking": False,  # DIAL doesn't expose thinking mode
-            "supports_vision": True,
-        },
-        "gemini-2.5-pro-preview-05-06": {
-            "context_window": 1_000_000,
-            "supports_extended_thinking": False,
-            "supports_vision": True,
-        },
-        "gemini-2.5-flash-preview-05-20": {
-            "context_window": 1_000_000,
-            "supports_extended_thinking": False,
-            "supports_vision": True,
-        },
-        # Shorthands
-        "o3": "o3-2025-04-16",
-        "o4-mini": "o4-mini-2025-04-16",
-        "sonnet-4": "anthropic.claude-sonnet-4-20250514-v1:0",
-        "sonnet-4-thinking": "anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
-        "opus-4": "anthropic.claude-opus-4-20250514-v1:0",
-        "opus-4-thinking": "anthropic.claude-opus-4-20250514-v1:0-with-thinking",
-        "gemini-2.5-pro": "gemini-2.5-pro-preview-05-06",
-        "gemini-2.5-pro-search": "gemini-2.5-pro-preview-03-25-google-search",
-        "gemini-2.5-flash": "gemini-2.5-flash-preview-05-20",
+        "o3-2025-04-16": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="o3-2025-04-16",
+            friendly_name="DIAL (O3)",
+            context_window=200_000,
+            max_output_tokens=100_000,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # DIAL may not expose function calling
+            supports_json_mode=True,
+            supports_images=True,
+            max_image_size_mb=20.0,
+            supports_temperature=False,  # O3 models don't accept temperature
+            temperature_constraint=create_temperature_constraint("fixed"),
+            description="OpenAI O3 via DIAL - Strong reasoning model",
+            aliases=["o3"],
+        ),
+        "o4-mini-2025-04-16": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="o4-mini-2025-04-16",
+            friendly_name="DIAL (O4-mini)",
+            context_window=200_000,
+            max_output_tokens=100_000,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # DIAL may not expose function calling
+            supports_json_mode=True,
+            supports_images=True,
+            max_image_size_mb=20.0,
+            supports_temperature=False,  # O4 models don't accept temperature
+            temperature_constraint=create_temperature_constraint("fixed"),
+            description="OpenAI O4-mini via DIAL - Fast reasoning model",
+            aliases=["o4-mini"],
+        ),
+        "anthropic.claude-sonnet-4-20250514-v1:0": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="anthropic.claude-sonnet-4-20250514-v1:0",
+            friendly_name="DIAL (Sonnet 4)",
+            context_window=200_000,
+            max_output_tokens=64_000,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # Claude doesn't have function calling
+            supports_json_mode=False,  # Claude doesn't have JSON mode
+            supports_images=True,
+            max_image_size_mb=5.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Claude Sonnet 4 via DIAL - Balanced performance",
+            aliases=["sonnet-4"],
+        ),
+        "anthropic.claude-sonnet-4-20250514-v1:0-with-thinking": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
+            friendly_name="DIAL (Sonnet 4 Thinking)",
+            context_window=200_000,
+            max_output_tokens=64_000,
+            supports_extended_thinking=True,  # Thinking mode variant
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # Claude doesn't have function calling
+            supports_json_mode=False,  # Claude doesn't have JSON mode
+            supports_images=True,
+            max_image_size_mb=5.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Claude Sonnet 4 with thinking mode via DIAL",
+            aliases=["sonnet-4-thinking"],
+        ),
+        "anthropic.claude-opus-4-20250514-v1:0": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="anthropic.claude-opus-4-20250514-v1:0",
+            friendly_name="DIAL (Opus 4)",
+            context_window=200_000,
+            max_output_tokens=64_000,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # Claude doesn't have function calling
+            supports_json_mode=False,  # Claude doesn't have JSON mode
+            supports_images=True,
+            max_image_size_mb=5.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Claude Opus 4 via DIAL - Most capable Claude model",
+            aliases=["opus-4"],
+        ),
+        "anthropic.claude-opus-4-20250514-v1:0-with-thinking": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="anthropic.claude-opus-4-20250514-v1:0-with-thinking",
+            friendly_name="DIAL (Opus 4 Thinking)",
+            context_window=200_000,
+            max_output_tokens=64_000,
+            supports_extended_thinking=True,  # Thinking mode variant
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # Claude doesn't have function calling
+            supports_json_mode=False,  # Claude doesn't have JSON mode
+            supports_images=True,
+            max_image_size_mb=5.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Claude Opus 4 with thinking mode via DIAL",
+            aliases=["opus-4-thinking"],
+        ),
+        "gemini-2.5-pro-preview-03-25-google-search": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="gemini-2.5-pro-preview-03-25-google-search",
+            friendly_name="DIAL (Gemini 2.5 Pro Search)",
+            context_window=1_000_000,
+            max_output_tokens=65_536,
+            supports_extended_thinking=False,  # DIAL doesn't expose thinking mode
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # DIAL may not expose function calling
+            supports_json_mode=True,
+            supports_images=True,
+            max_image_size_mb=20.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Gemini 2.5 Pro with Google Search via DIAL",
+            aliases=["gemini-2.5-pro-search"],
+        ),
+        "gemini-2.5-pro-preview-05-06": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="gemini-2.5-pro-preview-05-06",
+            friendly_name="DIAL (Gemini 2.5 Pro)",
+            context_window=1_000_000,
+            max_output_tokens=65_536,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # DIAL may not expose function calling
+            supports_json_mode=True,
+            supports_images=True,
+            max_image_size_mb=20.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Gemini 2.5 Pro via DIAL - Deep reasoning",
+            aliases=["gemini-2.5-pro"],
+        ),
+        "gemini-2.5-flash-preview-05-20": ModelCapabilities(
+            provider=ProviderType.DIAL,
+            model_name="gemini-2.5-flash-preview-05-20",
+            friendly_name="DIAL (Gemini Flash 2.5)",
+            context_window=1_000_000,
+            max_output_tokens=65_536,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=False,  # DIAL may not expose function calling
+            supports_json_mode=True,
+            supports_images=True,
+            max_image_size_mb=20.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Gemini 2.5 Flash via DIAL - Ultra-fast",
+            aliases=["gemini-2.5-flash"],
+        ),
    }

    def __init__(self, api_key: str, **kwargs):
@@ -181,20 +288,8 @@ class DIALModelProvider(OpenAICompatibleProvider):
        if not restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model_name):
            raise ValueError(f"Model '{model_name}' is not allowed by restriction policy.")

-        config = self.SUPPORTED_MODELS[resolved_name]
-
-        return ModelCapabilities(
-            provider=ProviderType.DIAL,
-            model_name=resolved_name,
-            friendly_name=self.FRIENDLY_NAME,
-            context_window=config["context_window"],
-            supports_extended_thinking=config["supports_extended_thinking"],
-            supports_system_prompts=True,
-            supports_streaming=True,
-            supports_function_calling=True,
-            supports_images=config.get("supports_vision", False),
-            temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
-        )
+        # Return the ModelCapabilities object directly from SUPPORTED_MODELS
+        return self.SUPPORTED_MODELS[resolved_name]

    def get_provider_type(self) -> ProviderType:
        """Get the provider type."""
@@ -211,7 +306,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
        """
        resolved_name = self._resolve_model_name(model_name)

-        if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
+        if resolved_name not in self.SUPPORTED_MODELS:
            return False

        # Check against base class allowed_models if configured
@@ -231,20 +326,6 @@ class DIALModelProvider(OpenAICompatibleProvider):

        return True

-    def _resolve_model_name(self, model_name: str) -> str:
-        """Resolve model shorthand to full name.
-
-        Args:
-            model_name: Model name or shorthand
-
-        Returns:
-            Full model name
-        """
-        shorthand_value = self.SUPPORTED_MODELS.get(model_name)
-        if isinstance(shorthand_value, str):
-            return shorthand_value
-        return model_name
-
    def _get_deployment_client(self, deployment: str):
        """Get or create a cached client for a specific deployment.

@@ -357,7 +438,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
        # Check model capabilities
        try:
            capabilities = self.get_capabilities(model_name)
-            supports_temperature = getattr(capabilities, "supports_temperature", True)
+            supports_temperature = capabilities.supports_temperature
        except Exception as e:
            logger.debug(f"Failed to check temperature support for {model_name}: {e}")
            supports_temperature = True
@@ -441,63 +522,12 @@ class DIALModelProvider(OpenAICompatibleProvider):
        """
        resolved_name = self._resolve_model_name(model_name)

-        if resolved_name in self.SUPPORTED_MODELS and isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
-            return self.SUPPORTED_MODELS[resolved_name].get("supports_vision", False)
+        if resolved_name in self.SUPPORTED_MODELS:
+            return self.SUPPORTED_MODELS[resolved_name].supports_images

        # Fall back to parent implementation for unknown models
        return super()._supports_vision(model_name)

-    def list_models(self, respect_restrictions: bool = True) -> list[str]:
-        """Return a list of model names supported by this provider.
-
-        Args:
-            respect_restrictions: Whether to apply provider-specific restriction logic.
-
-        Returns:
-            List of model names available from this provider
-        """
-        # Get all model keys (both full names and aliases)
-        all_models = list(self.SUPPORTED_MODELS.keys())
-
-        if not respect_restrictions:
-            return all_models
-
-        # Apply restrictions if configured
-        from utils.model_restrictions import get_restriction_service
-
-        restriction_service = get_restriction_service()
-
-        # Filter based on restrictions
-        allowed_models = []
-        for model in all_models:
-            resolved_name = self._resolve_model_name(model)
-            if restriction_service.is_allowed(ProviderType.DIAL, resolved_name, model):
-                allowed_models.append(model)
-
-        return allowed_models
-
-    def list_all_known_models(self) -> list[str]:
-        """Return all model names known by this provider, including alias targets.
-
-        This is used for validation purposes to ensure restriction policies
-        can validate against both aliases and their target model names.
-
-        Returns:
-            List of all model names and alias targets known by this provider
-        """
-        # Collect all unique model names (both aliases and targets)
-        all_models = set()
-
-        for key, value in self.SUPPORTED_MODELS.items():
-            # Add the key (could be alias or full name)
-            all_models.add(key)
-
-            # If it's an alias (string value), add the target too
-            if isinstance(value, str):
-                all_models.add(value)
-
-        return sorted(all_models)
-
    def close(self):
        """Clean up HTTP clients when provider is closed."""
        logger.info("Closing DIAL provider HTTP clients...")
--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -9,7 +9,7 @@ from typing import Optional
 from google import genai
 from google.genai import types

-from .base import ModelCapabilities, ModelProvider, ModelResponse, ProviderType, RangeTemperatureConstraint
+from .base import ModelCapabilities, ModelProvider, ModelResponse, ProviderType, create_temperature_constraint

 logger = logging.getLogger(__name__)

@@ -17,47 +17,83 @@ logger = logging.getLogger(__name__)
 class GeminiModelProvider(ModelProvider):
    """Google Gemini model provider implementation."""

-    # Model configurations
+    # Model configurations using ModelCapabilities objects
    SUPPORTED_MODELS = {
-        "gemini-2.0-flash": {
-            "context_window": 1_048_576,  # 1M tokens
-            "supports_extended_thinking": True,  # Experimental thinking mode
-            "max_thinking_tokens": 24576,  # Same as 2.5 flash for consistency
-            "supports_images": True,  # Vision capability
-            "max_image_size_mb": 20.0,  # Conservative 20MB limit for reliability
-            "description": "Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
-        },
-        "gemini-2.0-flash-lite": {
-            "context_window": 1_048_576,  # 1M tokens
-            "supports_extended_thinking": False,  # Not supported per user request
-            "max_thinking_tokens": 0,  # No thinking support
-            "supports_images": False,  # Does not support images
-            "max_image_size_mb": 0.0,  # No image support
-            "description": "Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
-        },
-        "gemini-2.5-flash": {
-            "context_window": 1_048_576,  # 1M tokens
-            "supports_extended_thinking": True,
-            "max_thinking_tokens": 24576,  # Flash 2.5 thinking budget limit
-            "supports_images": True,  # Vision capability
-            "max_image_size_mb": 20.0,  # Conservative 20MB limit for reliability
-            "description": "Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
-        },
-        "gemini-2.5-pro": {
-            "context_window": 1_048_576,  # 1M tokens
-            "supports_extended_thinking": True,
-            "max_thinking_tokens": 32768,  # Pro 2.5 thinking budget limit
-            "supports_images": True,  # Vision capability
-            "max_image_size_mb": 32.0,  # Higher limit for Pro model
-            "description": "Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
-        },
-        # Shorthands
-        "flash": "gemini-2.5-flash",
-        "flash-2.0": "gemini-2.0-flash",
-        "flash2": "gemini-2.0-flash",
-        "flashlite": "gemini-2.0-flash-lite",
-        "flash-lite": "gemini-2.0-flash-lite",
-        "pro": "gemini-2.5-pro",
+        "gemini-2.0-flash": ModelCapabilities(
+            provider=ProviderType.GOOGLE,
+            model_name="gemini-2.0-flash",
+            friendly_name="Gemini (Flash 2.0)",
+            context_window=1_048_576,  # 1M tokens
+            max_output_tokens=65_536,
+            supports_extended_thinking=True,  # Experimental thinking mode
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # Vision capability
+            max_image_size_mb=20.0,  # Conservative 20MB limit for reliability
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            max_thinking_tokens=24576,  # Same as 2.5 flash for consistency
+            description="Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input",
+            aliases=["flash-2.0", "flash2"],
+        ),
+        "gemini-2.0-flash-lite": ModelCapabilities(
+            provider=ProviderType.GOOGLE,
+            model_name="gemini-2.0-flash-lite",
+            friendly_name="Gemin (Flash Lite 2.0)",
+            context_window=1_048_576,  # 1M tokens
+            max_output_tokens=65_536,
+            supports_extended_thinking=False,  # Not supported per user request
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=False,  # Does not support images
+            max_image_size_mb=0.0,  # No image support
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="Gemini 2.0 Flash Lite (1M context) - Lightweight fast model, text-only",
+            aliases=["flashlite", "flash-lite"],
+        ),
+        "gemini-2.5-flash": ModelCapabilities(
+            provider=ProviderType.GOOGLE,
+            model_name="gemini-2.5-flash",
+            friendly_name="Gemini (Flash 2.5)",
+            context_window=1_048_576,  # 1M tokens
+            max_output_tokens=65_536,
+            supports_extended_thinking=True,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # Vision capability
+            max_image_size_mb=20.0,  # Conservative 20MB limit for reliability
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            max_thinking_tokens=24576,  # Flash 2.5 thinking budget limit
+            description="Ultra-fast (1M context) - Quick analysis, simple queries, rapid iterations",
+            aliases=["flash", "flash2.5"],
+        ),
+        "gemini-2.5-pro": ModelCapabilities(
+            provider=ProviderType.GOOGLE,
+            model_name="gemini-2.5-pro",
+            friendly_name="Gemini (Pro 2.5)",
+            context_window=1_048_576,  # 1M tokens
+            max_output_tokens=65_536,
+            supports_extended_thinking=True,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # Vision capability
+            max_image_size_mb=32.0,  # Higher limit for Pro model
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            max_thinking_tokens=32768,  # Max thinking tokens for Pro model
+            description="Deep reasoning + thinking mode (1M context) - Complex problems, architecture, deep analysis",
+            aliases=["pro", "gemini pro", "gemini-pro"],
+        ),
    }

    # Thinking mode configurations - percentages of model's max_thinking_tokens
@@ -70,6 +106,14 @@ class GeminiModelProvider(ModelProvider):
        "max": 1.0,  # 100% of max - full thinking budget
    }

+    # Model-specific thinking token limits
+    MAX_THINKING_TOKENS = {
+        "gemini-2.0-flash": 24576,  # Same as 2.5 flash for consistency
+        "gemini-2.0-flash-lite": 0,  # No thinking support
+        "gemini-2.5-flash": 24576,  # Flash 2.5 thinking budget limit
+        "gemini-2.5-pro": 32768,  # Pro 2.5 thinking budget limit
+    }
+
    def __init__(self, api_key: str, **kwargs):
        """Initialize Gemini provider with API key."""
        super().__init__(api_key, **kwargs)
@@ -100,25 +144,8 @@ class GeminiModelProvider(ModelProvider):
        if not restriction_service.is_allowed(ProviderType.GOOGLE, resolved_name, model_name):
            raise ValueError(f"Gemini model '{resolved_name}' is not allowed by restriction policy.")

-        config = self.SUPPORTED_MODELS[resolved_name]
-
-        # Gemini models support 0.0-2.0 temperature range
-        temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
-
-        return ModelCapabilities(
-            provider=ProviderType.GOOGLE,
-            model_name=resolved_name,
-            friendly_name="Gemini",
-            context_window=config["context_window"],
-            supports_extended_thinking=config["supports_extended_thinking"],
-            supports_system_prompts=True,
-            supports_streaming=True,
-            supports_function_calling=True,
-            supports_images=config.get("supports_images", False),
-            max_image_size_mb=config.get("max_image_size_mb", 0.0),
-            supports_temperature=True,  # Gemini models accept temperature parameter
-            temperature_constraint=temp_constraint,
-        )
+        # Return the ModelCapabilities object directly from SUPPORTED_MODELS
+        return self.SUPPORTED_MODELS[resolved_name]

    def generate_content(
        self,
@@ -179,8 +206,8 @@ class GeminiModelProvider(ModelProvider):
        if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS:
            # Get model's max thinking tokens and calculate actual budget
            model_config = self.SUPPORTED_MODELS.get(resolved_name)
-            if model_config and "max_thinking_tokens" in model_config:
-                max_thinking_tokens = model_config["max_thinking_tokens"]
+            if model_config and model_config.max_thinking_tokens > 0:
+                max_thinking_tokens = model_config.max_thinking_tokens
                actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
                generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)

@@ -258,7 +285,7 @@ class GeminiModelProvider(ModelProvider):
        resolved_name = self._resolve_model_name(model_name)

        # First check if model is supported
-        if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
+        if resolved_name not in self.SUPPORTED_MODELS:
            return False

        # Then check if model is allowed by restrictions
@@ -281,78 +308,20 @@ class GeminiModelProvider(ModelProvider):
    def get_thinking_budget(self, model_name: str, thinking_mode: str) -> int:
        """Get actual thinking token budget for a model and thinking mode."""
        resolved_name = self._resolve_model_name(model_name)
-        model_config = self.SUPPORTED_MODELS.get(resolved_name, {})
+        model_config = self.SUPPORTED_MODELS.get(resolved_name)

-        if not model_config.get("supports_extended_thinking", False):
+        if not model_config or not model_config.supports_extended_thinking:
            return 0

        if thinking_mode not in self.THINKING_BUDGETS:
            return 0

-        max_thinking_tokens = model_config.get("max_thinking_tokens", 0)
+        max_thinking_tokens = model_config.max_thinking_tokens
        if max_thinking_tokens == 0:
            return 0

        return int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])

-    def list_models(self, respect_restrictions: bool = True) -> list[str]:
-        """Return a list of model names supported by this provider.
-
-        Args:
-            respect_restrictions: Whether to apply provider-specific restriction logic.
-
-        Returns:
-            List of model names available from this provider
-        """
-        from utils.model_restrictions import get_restriction_service
-
-        restriction_service = get_restriction_service() if respect_restrictions else None
-        models = []
-
-        for model_name, config in self.SUPPORTED_MODELS.items():
-            # Handle both base models (dict configs) and aliases (string values)
-            if isinstance(config, str):
-                # This is an alias - check if the target model would be allowed
-                target_model = config
-                if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
-                    continue
-                # Allow the alias
-                models.append(model_name)
-            else:
-                # This is a base model with config dict
-                # Check restrictions if enabled
-                if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
-                    continue
-                models.append(model_name)
-
-        return models
-
-    def list_all_known_models(self) -> list[str]:
-        """Return all model names known by this provider, including alias targets.
-
-        Returns:
-            List of all model names and alias targets known by this provider
-        """
-        all_models = set()
-
-        for model_name, config in self.SUPPORTED_MODELS.items():
-            # Add the model name itself
-            all_models.add(model_name.lower())
-
-            # If it's an alias (string value), add the target model too
-            if isinstance(config, str):
-                all_models.add(config.lower())
-
-        return list(all_models)
-
-    def _resolve_model_name(self, model_name: str) -> str:
-        """Resolve model shorthand to full name."""
-        # Check if it's a shorthand
-        shorthand_value = self.SUPPORTED_MODELS.get(model_name.lower())
-        if isinstance(shorthand_value, str):
-            return shorthand_value
-        return model_name
-
    def _extract_usage(self, response) -> dict[str, int]:
        """Extract token usage from Gemini response."""
        usage = {}
--- a/providers/openai_compatible.py
+++ b/providers/openai_compatible.py
@@ -686,7 +686,6 @@ class OpenAICompatibleProvider(ModelProvider):
            "o3-mini",
            "o3-pro",
            "o4-mini",
-            "o4-mini-high",
            # Note: Claude models would be handled by a separate provider
        }
        supports = model_name.lower() in vision_models
--- a/providers/openai_provider.py
+++ b/providers/openai_provider.py
@@ -17,71 +17,98 @@ logger = logging.getLogger(__name__)
 class OpenAIModelProvider(OpenAICompatibleProvider):
    """Official OpenAI API provider (api.openai.com)."""

-    # Model configurations
+    # Model configurations using ModelCapabilities objects
    SUPPORTED_MODELS = {
-        "o3": {
-            "context_window": 200_000,  # 200K tokens
-            "supports_extended_thinking": False,
-            "supports_images": True,  # O3 models support vision
-            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
-            "supports_temperature": False,  # O3 models don't accept temperature parameter
-            "temperature_constraint": "fixed",  # Fixed at 1.0
-            "description": "Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
-        },
-        "o3-mini": {
-            "context_window": 200_000,  # 200K tokens
-            "supports_extended_thinking": False,
-            "supports_images": True,  # O3 models support vision
-            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
-            "supports_temperature": False,  # O3 models don't accept temperature parameter
-            "temperature_constraint": "fixed",  # Fixed at 1.0
-            "description": "Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
-        },
-        "o3-pro-2025-06-10": {
-            "context_window": 200_000,  # 200K tokens
-            "supports_extended_thinking": False,
-            "supports_images": True,  # O3 models support vision
-            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
-            "supports_temperature": False,  # O3 models don't accept temperature parameter
-            "temperature_constraint": "fixed",  # Fixed at 1.0
-            "description": "Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
-        },
-        # Aliases
-        "o3-pro": "o3-pro-2025-06-10",
-        "o4-mini": {
-            "context_window": 200_000,  # 200K tokens
-            "supports_extended_thinking": False,
-            "supports_images": True,  # O4 models support vision
-            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
-            "supports_temperature": False,  # O4 models don't accept temperature parameter
-            "temperature_constraint": "fixed",  # Fixed at 1.0
-            "description": "Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
-        },
-        "o4-mini-high": {
-            "context_window": 200_000,  # 200K tokens
-            "supports_extended_thinking": False,
-            "supports_images": True,  # O4 models support vision
-            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
-            "supports_temperature": False,  # O4 models don't accept temperature parameter
-            "temperature_constraint": "fixed",  # Fixed at 1.0
-            "description": "Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks",
-        },
-        "gpt-4.1-2025-04-14": {
-            "context_window": 1_000_000,  # 1M tokens
-            "supports_extended_thinking": False,
-            "supports_images": True,  # GPT-4.1 supports vision
-            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
-            "supports_temperature": True,  # Regular models accept temperature parameter
-            "temperature_constraint": "range",  # 0.0-2.0 range
-            "description": "GPT-4.1 (1M context) - Advanced reasoning model with large context window",
-        },
-        # Shorthands
-        "mini": "o4-mini",  # Default 'mini' to latest mini model
-        "o3mini": "o3-mini",
-        "o4mini": "o4-mini",
-        "o4minihigh": "o4-mini-high",
-        "o4minihi": "o4-mini-high",
-        "gpt4.1": "gpt-4.1-2025-04-14",
+        "o3": ModelCapabilities(
+            provider=ProviderType.OPENAI,
+            model_name="o3",
+            friendly_name="OpenAI (O3)",
+            context_window=200_000,  # 200K tokens
+            max_output_tokens=65536,  # 64K max output tokens
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # O3 models support vision
+            max_image_size_mb=20.0,  # 20MB per OpenAI docs
+            supports_temperature=False,  # O3 models don't accept temperature parameter
+            temperature_constraint=create_temperature_constraint("fixed"),
+            description="Strong reasoning (200K context) - Logical problems, code generation, systematic analysis",
+            aliases=[],
+        ),
+        "o3-mini": ModelCapabilities(
+            provider=ProviderType.OPENAI,
+            model_name="o3-mini",
+            friendly_name="OpenAI (O3-mini)",
+            context_window=200_000,  # 200K tokens
+            max_output_tokens=65536,  # 64K max output tokens
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # O3 models support vision
+            max_image_size_mb=20.0,  # 20MB per OpenAI docs
+            supports_temperature=False,  # O3 models don't accept temperature parameter
+            temperature_constraint=create_temperature_constraint("fixed"),
+            description="Fast O3 variant (200K context) - Balanced performance/speed, moderate complexity",
+            aliases=["o3mini", "o3-mini"],
+        ),
+        "o3-pro-2025-06-10": ModelCapabilities(
+            provider=ProviderType.OPENAI,
+            model_name="o3-pro-2025-06-10",
+            friendly_name="OpenAI (O3-Pro)",
+            context_window=200_000,  # 200K tokens
+            max_output_tokens=65536,  # 64K max output tokens
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # O3 models support vision
+            max_image_size_mb=20.0,  # 20MB per OpenAI docs
+            supports_temperature=False,  # O3 models don't accept temperature parameter
+            temperature_constraint=create_temperature_constraint("fixed"),
+            description="Professional-grade reasoning (200K context) - EXTREMELY EXPENSIVE: Only for the most complex problems requiring universe-scale complexity analysis OR when the user explicitly asks for this model. Use sparingly for critical architectural decisions or exceptionally complex debugging that other models cannot handle.",
+            aliases=["o3-pro"],
+        ),
+        "o4-mini": ModelCapabilities(
+            provider=ProviderType.OPENAI,
+            model_name="o4-mini",
+            friendly_name="OpenAI (O4-mini)",
+            context_window=200_000,  # 200K tokens
+            max_output_tokens=65536,  # 64K max output tokens
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # O4 models support vision
+            max_image_size_mb=20.0,  # 20MB per OpenAI docs
+            supports_temperature=False,  # O4 models don't accept temperature parameter
+            temperature_constraint=create_temperature_constraint("fixed"),
+            description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
+            aliases=["mini", "o4mini", "o4-mini"],
+        ),
+        "gpt-4.1-2025-04-14": ModelCapabilities(
+            provider=ProviderType.OPENAI,
+            model_name="gpt-4.1-2025-04-14",
+            friendly_name="OpenAI (GPT 4.1)",
+            context_window=1_000_000,  # 1M tokens
+            max_output_tokens=32_768,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=True,
+            supports_images=True,  # GPT-4.1 supports vision
+            max_image_size_mb=20.0,  # 20MB per OpenAI docs
+            supports_temperature=True,  # Regular models accept temperature parameter
+            temperature_constraint=create_temperature_constraint("range"),
+            description="GPT-4.1 (1M context) - Advanced reasoning model with large context window",
+            aliases=["gpt4.1"],
+        ),
    }

    def __init__(self, api_key: str, **kwargs):
@@ -95,7 +122,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
        # Resolve shorthand
        resolved_name = self._resolve_model_name(model_name)

-        if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str):
+        if resolved_name not in self.SUPPORTED_MODELS:
            raise ValueError(f"Unsupported OpenAI model: {model_name}")

        # Check if model is allowed by restrictions
@@ -105,27 +132,8 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
        if not restriction_service.is_allowed(ProviderType.OPENAI, resolved_name, model_name):
            raise ValueError(f"OpenAI model '{model_name}' is not allowed by restriction policy.")

-        config = self.SUPPORTED_MODELS[resolved_name]
-
-        # Get temperature constraints and support from configuration
-        supports_temperature = config.get("supports_temperature", True)  # Default to True for backward compatibility
-        temp_constraint_type = config.get("temperature_constraint", "range")  # Default to range
-        temp_constraint = create_temperature_constraint(temp_constraint_type)
-
-        return ModelCapabilities(
-            provider=ProviderType.OPENAI,
-            model_name=model_name,
-            friendly_name="OpenAI",
-            context_window=config["context_window"],
-            supports_extended_thinking=config["supports_extended_thinking"],
-            supports_system_prompts=True,
-            supports_streaming=True,
-            supports_function_calling=True,
-            supports_images=config.get("supports_images", False),
-            max_image_size_mb=config.get("max_image_size_mb", 0.0),
-            supports_temperature=supports_temperature,
-            temperature_constraint=temp_constraint,
-        )
+        # Return the ModelCapabilities object directly from SUPPORTED_MODELS
+        return self.SUPPORTED_MODELS[resolved_name]

    def get_provider_type(self) -> ProviderType:
        """Get the provider type."""
@@ -136,7 +144,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
        resolved_name = self._resolve_model_name(model_name)

        # First check if model is supported
-        if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
+        if resolved_name not in self.SUPPORTED_MODELS:
            return False

        # Then check if model is allowed by restrictions
@@ -177,61 +185,3 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
        # Currently no OpenAI models support extended thinking
        # This may change with future O3 models
        return False
-
-    def list_models(self, respect_restrictions: bool = True) -> list[str]:
-        """Return a list of model names supported by this provider.
-
-        Args:
-            respect_restrictions: Whether to apply provider-specific restriction logic.
-
-        Returns:
-            List of model names available from this provider
-        """
-        from utils.model_restrictions import get_restriction_service
-
-        restriction_service = get_restriction_service() if respect_restrictions else None
-        models = []
-
-        for model_name, config in self.SUPPORTED_MODELS.items():
-            # Handle both base models (dict configs) and aliases (string values)
-            if isinstance(config, str):
-                # This is an alias - check if the target model would be allowed
-                target_model = config
-                if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
-                    continue
-                # Allow the alias
-                models.append(model_name)
-            else:
-                # This is a base model with config dict
-                # Check restrictions if enabled
-                if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
-                    continue
-                models.append(model_name)
-
-        return models
-
-    def list_all_known_models(self) -> list[str]:
-        """Return all model names known by this provider, including alias targets.
-
-        Returns:
-            List of all model names and alias targets known by this provider
-        """
-        all_models = set()
-
-        for model_name, config in self.SUPPORTED_MODELS.items():
-            # Add the model name itself
-            all_models.add(model_name.lower())
-
-            # If it's an alias (string value), add the target model too
-            if isinstance(config, str):
-                all_models.add(config.lower())
-
-        return list(all_models)
-
-    def _resolve_model_name(self, model_name: str) -> str:
-        """Resolve model shorthand to full name."""
-        # Check if it's a shorthand
-        shorthand_value = self.SUPPORTED_MODELS.get(model_name)
-        if isinstance(shorthand_value, str):
-            return shorthand_value
-        return model_name
--- a/providers/openrouter.py
+++ b/providers/openrouter.py
@@ -50,14 +50,6 @@ class OpenRouterProvider(OpenAICompatibleProvider):
            aliases = self._registry.list_aliases()
            logging.info(f"OpenRouter loaded {len(models)} models with {len(aliases)} aliases")

-    def _parse_allowed_models(self) -> None:
-        """Override to disable environment-based allow-list.
-
-        OpenRouter model access is controlled via the OpenRouter dashboard,
-        not through environment variables.
-        """
-        return None
-
    def _resolve_model_name(self, model_name: str) -> str:
        """Resolve model aliases to OpenRouter model names.

@@ -109,6 +101,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
                model_name=resolved_name,
                friendly_name=self.FRIENDLY_NAME,
                context_window=32_768,  # Conservative default context window
+                max_output_tokens=32_768,
                supports_extended_thinking=False,
                supports_system_prompts=True,
                supports_streaming=True,
@@ -130,16 +123,34 @@ class OpenRouterProvider(OpenAICompatibleProvider):

        As the catch-all provider, OpenRouter accepts any model name that wasn't
        handled by higher-priority providers. OpenRouter will validate based on
-        the API key's permissions.
+        the API key's permissions and local restrictions.

        Args:
            model_name: Model name to validate

        Returns:
-            Always True - OpenRouter is the catch-all provider
+            True if model is allowed, False if restricted
        """
-        # Accept any model name - OpenRouter is the fallback provider
-        # Higher priority providers (native APIs, custom endpoints) get first chance
+        # Check model restrictions if configured
+        from utils.model_restrictions import get_restriction_service
+
+        restriction_service = get_restriction_service()
+        if restriction_service:
+            # Check if model name itself is allowed
+            if restriction_service.is_allowed(self.get_provider_type(), model_name):
+                return True
+
+            # Also check aliases - model_name might be an alias
+            model_config = self._registry.resolve(model_name)
+            if model_config and model_config.aliases:
+                for alias in model_config.aliases:
+                    if restriction_service.is_allowed(self.get_provider_type(), alias):
+                        return True
+
+            # If restrictions are configured and model/alias not in allowed list, reject
+            return False
+
+        # No restrictions configured - accept any model name as the fallback provider
        return True

    def generate_content(
@@ -260,3 +271,35 @@ class OpenRouterProvider(OpenAICompatibleProvider):
                    all_models.add(config.model_name.lower())

        return list(all_models)
+
+    def get_model_configurations(self) -> dict[str, ModelCapabilities]:
+        """Get model configurations from the registry.
+
+        For OpenRouter, we convert registry configurations to ModelCapabilities objects.
+
+        Returns:
+            Dictionary mapping model names to their ModelCapabilities objects
+        """
+        configs = {}
+
+        if self._registry:
+            # Get all models from registry
+            for model_name in self._registry.list_models():
+                # Only include models that this provider validates
+                if self.validate_model_name(model_name):
+                    config = self._registry.resolve(model_name)
+                    if config and not config.is_custom:  # Only OpenRouter models, not custom ones
+                        # Use ModelCapabilities directly from registry
+                        configs[model_name] = config
+
+        return configs
+
+    def get_all_model_aliases(self) -> dict[str, list[str]]:
+        """Get all model aliases from the registry.
+
+        Returns:
+            Dictionary mapping model names to their list of aliases
+        """
+        # Since aliases are now included in the configurations,
+        # we can use the base class implementation
+        return super().get_all_model_aliases()
--- a/providers/openrouter_registry.py
+++ b/providers/openrouter_registry.py
@@ -2,7 +2,6 @@

 import logging
 import os
-from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional

@@ -11,58 +10,10 @@ from utils.file_utils import read_json_file
 from .base import (
    ModelCapabilities,
    ProviderType,
-    TemperatureConstraint,
    create_temperature_constraint,
 )


-@dataclass
-class OpenRouterModelConfig:
-    """Configuration for an OpenRouter model."""
-
-    model_name: str
-    aliases: list[str] = field(default_factory=list)
-    context_window: int = 32768  # Total context window size in tokens
-    supports_extended_thinking: bool = False
-    supports_system_prompts: bool = True
-    supports_streaming: bool = True
-    supports_function_calling: bool = False
-    supports_json_mode: bool = False
-    supports_images: bool = False  # Whether model can process images
-    max_image_size_mb: float = 0.0  # Maximum total size for all images in MB
-    supports_temperature: bool = True  # Whether model accepts temperature parameter in API calls
-    temperature_constraint: Optional[str] = (
-        None  # Type of temperature constraint: "fixed", "range", "discrete", or None for default range
-    )
-    is_custom: bool = False  # True for models that should only be used with custom endpoints
-    description: str = ""
-
-    def _create_temperature_constraint(self) -> TemperatureConstraint:
-        """Create temperature constraint object from configuration.
-
-        Returns:
-            TemperatureConstraint object based on configuration
-        """
-        return create_temperature_constraint(self.temperature_constraint or "range")
-
-    def to_capabilities(self) -> ModelCapabilities:
-        """Convert to ModelCapabilities object."""
-        return ModelCapabilities(
-            provider=ProviderType.OPENROUTER,
-            model_name=self.model_name,
-            friendly_name="OpenRouter",
-            context_window=self.context_window,
-            supports_extended_thinking=self.supports_extended_thinking,
-            supports_system_prompts=self.supports_system_prompts,
-            supports_streaming=self.supports_streaming,
-            supports_function_calling=self.supports_function_calling,
-            supports_images=self.supports_images,
-            max_image_size_mb=self.max_image_size_mb,
-            supports_temperature=self.supports_temperature,
-            temperature_constraint=self._create_temperature_constraint(),
-        )
-
-
 class OpenRouterModelRegistry:
    """Registry for managing OpenRouter model configurations and aliases."""

@@ -73,7 +24,7 @@ class OpenRouterModelRegistry:
            config_path: Path to config file. If None, uses default locations.
        """
        self.alias_map: dict[str, str] = {}  # alias -> model_name
-        self.model_map: dict[str, OpenRouterModelConfig] = {}  # model_name -> config
+        self.model_map: dict[str, ModelCapabilities] = {}  # model_name -> config

        # Determine config path
        if config_path:
@@ -139,7 +90,7 @@ class OpenRouterModelRegistry:
            self.alias_map = {}
            self.model_map = {}

-    def _read_config(self) -> list[OpenRouterModelConfig]:
+    def _read_config(self) -> list[ModelCapabilities]:
        """Read configuration from file.

        Returns:
@@ -158,7 +109,27 @@ class OpenRouterModelRegistry:
            # Parse models
            configs = []
            for model_data in data.get("models", []):
-                config = OpenRouterModelConfig(**model_data)
+                # Create ModelCapabilities directly from JSON data
+                # Handle temperature_constraint conversion
+                temp_constraint_str = model_data.get("temperature_constraint")
+                temp_constraint = create_temperature_constraint(temp_constraint_str or "range")
+
+                # Set provider-specific defaults based on is_custom flag
+                is_custom = model_data.get("is_custom", False)
+                if is_custom:
+                    model_data.setdefault("provider", ProviderType.CUSTOM)
+                    model_data.setdefault("friendly_name", f"Custom ({model_data.get('model_name', 'Unknown')})")
+                else:
+                    model_data.setdefault("provider", ProviderType.OPENROUTER)
+                    model_data.setdefault("friendly_name", f"OpenRouter ({model_data.get('model_name', 'Unknown')})")
+                model_data["temperature_constraint"] = temp_constraint
+
+                # Remove the string version of temperature_constraint before creating ModelCapabilities
+                if "temperature_constraint" in model_data and isinstance(model_data["temperature_constraint"], str):
+                    del model_data["temperature_constraint"]
+                model_data["temperature_constraint"] = temp_constraint
+
+                config = ModelCapabilities(**model_data)
                configs.append(config)

            return configs
@@ -168,7 +139,7 @@ class OpenRouterModelRegistry:
        except Exception as e:
            raise ValueError(f"Error reading config from {self.config_path}: {e}")

-    def _build_maps(self, configs: list[OpenRouterModelConfig]) -> None:
+    def _build_maps(self, configs: list[ModelCapabilities]) -> None:
        """Build alias and model maps from configurations.

        Args:
@@ -211,7 +182,7 @@ class OpenRouterModelRegistry:
        self.alias_map = alias_map
        self.model_map = model_map

-    def resolve(self, name_or_alias: str) -> Optional[OpenRouterModelConfig]:
+    def resolve(self, name_or_alias: str) -> Optional[ModelCapabilities]:
        """Resolve a model name or alias to configuration.

        Args:
@@ -237,10 +208,8 @@ class OpenRouterModelRegistry:
        Returns:
            ModelCapabilities if found, None otherwise
        """
-        config = self.resolve(name_or_alias)
-        if config:
-            return config.to_capabilities()
-        return None
+        # Registry now returns ModelCapabilities directly
+        return self.resolve(name_or_alias)

    def list_models(self) -> list[str]:
        """List all available model names."""
--- a/providers/registry.py
+++ b/providers/registry.py
@@ -24,8 +24,6 @@ class ModelProviderRegistry:
            cls._instance._providers = {}
            cls._instance._initialized_providers = {}
            logging.debug(f"REGISTRY: Created instance {cls._instance}")
-        else:
-            logging.debug(f"REGISTRY: Returning existing instance {cls._instance}")
        return cls._instance

    @classmethod
@@ -129,7 +127,6 @@ class ModelProviderRegistry:
        logging.debug(f"Available providers in registry: {list(instance._providers.keys())}")

        for provider_type in PROVIDER_PRIORITY_ORDER:
-            logging.debug(f"Checking provider_type: {provider_type}")
            if provider_type in instance._providers:
                logging.debug(f"Found {provider_type} in registry")
                # Get or create provider instance
--- a/providers/xai.py
+++ b/providers/xai.py
@@ -7,7 +7,7 @@ from .base import (
    ModelCapabilities,
    ModelResponse,
    ProviderType,
-    RangeTemperatureConstraint,
+    create_temperature_constraint,
 )
 from .openai_compatible import OpenAICompatibleProvider

@@ -19,23 +19,44 @@ class XAIModelProvider(OpenAICompatibleProvider):

    FRIENDLY_NAME = "X.AI"

-    # Model configurations
+    # Model configurations using ModelCapabilities objects
    SUPPORTED_MODELS = {
-        "grok-3": {
-            "context_window": 131_072,  # 131K tokens
-            "supports_extended_thinking": False,
-            "description": "GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
-        },
-        "grok-3-fast": {
-            "context_window": 131_072,  # 131K tokens
-            "supports_extended_thinking": False,
-            "description": "GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
-        },
-        # Shorthands for convenience
-        "grok": "grok-3",  # Default to grok-3
-        "grok3": "grok-3",
-        "grok3fast": "grok-3-fast",
-        "grokfast": "grok-3-fast",
+        "grok-3": ModelCapabilities(
+            provider=ProviderType.XAI,
+            model_name="grok-3",
+            friendly_name="X.AI (Grok 3)",
+            context_window=131_072,  # 131K tokens
+            max_output_tokens=131072,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=False,  # Assuming GROK doesn't have JSON mode yet
+            supports_images=False,  # Assuming GROK is text-only for now
+            max_image_size_mb=0.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="GROK-3 (131K context) - Advanced reasoning model from X.AI, excellent for complex analysis",
+            aliases=["grok", "grok3"],
+        ),
+        "grok-3-fast": ModelCapabilities(
+            provider=ProviderType.XAI,
+            model_name="grok-3-fast",
+            friendly_name="X.AI (Grok 3 Fast)",
+            context_window=131_072,  # 131K tokens
+            max_output_tokens=131072,
+            supports_extended_thinking=False,
+            supports_system_prompts=True,
+            supports_streaming=True,
+            supports_function_calling=True,
+            supports_json_mode=False,  # Assuming GROK doesn't have JSON mode yet
+            supports_images=False,  # Assuming GROK is text-only for now
+            max_image_size_mb=0.0,
+            supports_temperature=True,
+            temperature_constraint=create_temperature_constraint("range"),
+            description="GROK-3 Fast (131K context) - Higher performance variant, faster processing but more expensive",
+            aliases=["grok3fast", "grokfast", "grok3-fast"],
+        ),
    }

    def __init__(self, api_key: str, **kwargs):
@@ -49,7 +70,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
        # Resolve shorthand
        resolved_name = self._resolve_model_name(model_name)

-        if resolved_name not in self.SUPPORTED_MODELS or isinstance(self.SUPPORTED_MODELS[resolved_name], str):
+        if resolved_name not in self.SUPPORTED_MODELS:
            raise ValueError(f"Unsupported X.AI model: {model_name}")

        # Check if model is allowed by restrictions
@@ -59,23 +80,8 @@ class XAIModelProvider(OpenAICompatibleProvider):
        if not restriction_service.is_allowed(ProviderType.XAI, resolved_name, model_name):
            raise ValueError(f"X.AI model '{model_name}' is not allowed by restriction policy.")

-        config = self.SUPPORTED_MODELS[resolved_name]
-
-        # Define temperature constraints for GROK models
-        # GROK supports the standard OpenAI temperature range
-        temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
-
-        return ModelCapabilities(
-            provider=ProviderType.XAI,
-            model_name=resolved_name,
-            friendly_name=self.FRIENDLY_NAME,
-            context_window=config["context_window"],
-            supports_extended_thinking=config["supports_extended_thinking"],
-            supports_system_prompts=True,
-            supports_streaming=True,
-            supports_function_calling=True,
-            temperature_constraint=temp_constraint,
-        )
+        # Return the ModelCapabilities object directly from SUPPORTED_MODELS
+        return self.SUPPORTED_MODELS[resolved_name]

    def get_provider_type(self) -> ProviderType:
        """Get the provider type."""
@@ -86,7 +92,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
        resolved_name = self._resolve_model_name(model_name)

        # First check if model is supported
-        if resolved_name not in self.SUPPORTED_MODELS or not isinstance(self.SUPPORTED_MODELS[resolved_name], dict):
+        if resolved_name not in self.SUPPORTED_MODELS:
            return False

        # Then check if model is allowed by restrictions
@@ -127,61 +133,3 @@ class XAIModelProvider(OpenAICompatibleProvider):
        # Currently GROK models do not support extended thinking
        # This may change with future GROK model releases
        return False
-
-    def list_models(self, respect_restrictions: bool = True) -> list[str]:
-        """Return a list of model names supported by this provider.
-
-        Args:
-            respect_restrictions: Whether to apply provider-specific restriction logic.
-
-        Returns:
-            List of model names available from this provider
-        """
-        from utils.model_restrictions import get_restriction_service
-
-        restriction_service = get_restriction_service() if respect_restrictions else None
-        models = []
-
-        for model_name, config in self.SUPPORTED_MODELS.items():
-            # Handle both base models (dict configs) and aliases (string values)
-            if isinstance(config, str):
-                # This is an alias - check if the target model would be allowed
-                target_model = config
-                if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), target_model):
-                    continue
-                # Allow the alias
-                models.append(model_name)
-            else:
-                # This is a base model with config dict
-                # Check restrictions if enabled
-                if restriction_service and not restriction_service.is_allowed(self.get_provider_type(), model_name):
-                    continue
-                models.append(model_name)
-
-        return models
-
-    def list_all_known_models(self) -> list[str]:
-        """Return all model names known by this provider, including alias targets.
-
-        Returns:
-            List of all model names and alias targets known by this provider
-        """
-        all_models = set()
-
-        for model_name, config in self.SUPPORTED_MODELS.items():
-            # Add the model name itself
-            all_models.add(model_name.lower())
-
-            # If it's an alias (string value), add the target model too
-            if isinstance(config, str):
-                all_models.add(config.lower())
-
-        return list(all_models)
-
-    def _resolve_model_name(self, model_name: str) -> str:
-        """Resolve model shorthand to full name."""
-        # Check if it's a shorthand
-        shorthand_value = self.SUPPORTED_MODELS.get(model_name)
-        if isinstance(shorthand_value, str):
-            return shorthand_value
-        return model_name