feat: added intelligence_score to the model capabilities schema; a 1-20 number that can be specified to influence the sort order of models presented to the CLI in auto selection mode

fix: model definition re-introduced into the schema but intelligently and only a summary is generated per tool. Required to ensure CLI calls and uses the correct model fix: removed `model` param from some tools where this wasn't needed fix: fixed adherence to `*_ALLOWED_MODELS` by advertising only the allowed models to the CLI fix: removed duplicates across providers when passing canonical names back to the CLI; the first enabled provider wins
2025-10-02 21:43:44 +04:00
parent e78fe35a1b
commit 6cab9e56fc
22 changed files with 525 additions and 110 deletions
--- a/README.md
+++ b/README.md
@@ -283,10 +283,12 @@ DISABLED_TOOLS=
 ## Quick Links

 **📖 Documentation**
+- [Docs Overview](docs/index.md) - High-level map of major guides
 - [Getting Started](docs/getting-started.md) - Complete setup guide
 - [Tools Reference](docs/tools/) - All tools with examples
 - [Advanced Usage](docs/advanced-usage.md) - Power user features
 - [Configuration](docs/configuration.md) - Environment variables, restrictions
+- [Model Ranking Guide](docs/model_ranking.md) - How intelligence scores drive auto-mode suggestions

 **🔧 Setup & Support**
 - [WSL Setup](docs/wsl-setup.md) - Windows users
--- a/conf/custom_models.json
+++ b/conf/custom_models.json
@@ -31,7 +31,8 @@
      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
      "is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.",
-      "description": "Human-readable description of the model"
+      "description": "Human-readable description of the model",
+      "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
    },
    "example_custom_model": {
      "model_name": "my-local-model",
@@ -46,7 +47,8 @@
      "supports_temperature": true,
      "temperature_constraint": "range",
      "is_custom": true,
-      "description": "Example custom/local model for Ollama, vLLM, etc."
+      "description": "Example custom/local model for Ollama, vLLM, etc.",
+      "intelligence_score": 12
    }
  },
  "models": [
@@ -63,7 +65,8 @@
      "supports_function_calling": false,
      "supports_images": true,
      "max_image_size_mb": 5.0,
-      "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency"
+      "description": "Claude Sonnet 4.5 - High-performance model with exceptional reasoning and efficiency",
+      "intelligence_score": 12
    },
    {
      "model_name": "anthropic/claude-opus-4.1",
@@ -75,7 +78,8 @@
      "supports_function_calling": false,
      "supports_images": true,
      "max_image_size_mb": 5.0,
-      "description": "Claude Opus 4.1 - Our most capable and intelligent model yet"
+      "description": "Claude Opus 4.1 - Our most capable and intelligent model yet",
+      "intelligence_score": 14
    },
    {
      "model_name": "anthropic/claude-sonnet-4.1",
@@ -87,7 +91,8 @@
      "supports_function_calling": false,
      "supports_images": true,
      "max_image_size_mb": 5.0,
-      "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency"
+      "description": "Claude Sonnet 4.1 - Last generation high-performance model with exceptional reasoning and efficiency",
+      "intelligence_score": 10
    },
    {
      "model_name": "anthropic/claude-3.5-haiku",
@@ -99,31 +104,34 @@
      "supports_function_calling": false,
      "supports_images": true,
      "max_image_size_mb": 5.0,
-      "description": "Claude 3 Haiku - Fast and efficient with vision"
+      "description": "Claude 3 Haiku - Fast and efficient with vision",
+      "intelligence_score": 8
    },
    {
      "model_name": "google/gemini-2.5-pro",
      "aliases": ["pro","gemini-pro", "gemini", "pro-openrouter"],
      "context_window": 1048576,
      "max_output_tokens": 65536,
-      "supports_extended_thinking": false,
+      "supports_extended_thinking": true,
      "supports_json_mode": true,
-      "supports_function_calling": false,
+      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
-      "description": "Google's Gemini 2.5 Pro via OpenRouter with vision"
+      "description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
+      "intelligence_score": 18
    },
    {
      "model_name": "google/gemini-2.5-flash",
      "aliases": ["flash","gemini-flash"],
      "context_window": 1048576,
      "max_output_tokens": 65536,
-      "supports_extended_thinking": false,
+      "supports_extended_thinking": true,
      "supports_json_mode": true,
-      "supports_function_calling": false,
+      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 15.0,
-      "description": "Google's Gemini 2.5 Flash via OpenRouter with vision"
+      "description": "Google's Gemini 2.5 Flash via OpenRouter with vision",
+      "intelligence_score": 10
    },
    {
      "model_name": "mistralai/mistral-large-2411",
@@ -135,7 +143,8 @@
      "supports_function_calling": true,
      "supports_images": false,
      "max_image_size_mb": 0.0,
-      "description": "Mistral's largest model (text-only)"
+      "description": "Mistral's largest model (text-only)",
+      "intelligence_score": 11
    },
    {
      "model_name": "meta-llama/llama-3-70b",
@@ -147,7 +156,8 @@
      "supports_function_calling": false,
      "supports_images": false,
      "max_image_size_mb": 0.0,
-      "description": "Meta's Llama 3 70B model (text-only)"
+      "description": "Meta's Llama 3 70B model (text-only)",
+      "intelligence_score": 9
    },
    {
      "model_name": "deepseek/deepseek-r1-0528",
@@ -159,7 +169,8 @@
      "supports_function_calling": false,
      "supports_images": false,
      "max_image_size_mb": 0.0,
-      "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)"
+      "description": "DeepSeek R1 with thinking mode - advanced reasoning capabilities (text-only)",
+      "intelligence_score": 15
    },
    {
      "model_name": "perplexity/llama-3-sonar-large-32k-online",
@@ -171,7 +182,8 @@
      "supports_function_calling": false,
      "supports_images": false,
      "max_image_size_mb": 0.0,
-      "description": "Perplexity's online model with web search (text-only)"
+      "description": "Perplexity's online model with web search (text-only)",
+      "intelligence_score": 9
    },
    {
      "model_name": "openai/o3",
@@ -185,7 +197,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision"
+      "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision",
+      "intelligence_score": 14
    },
    {
      "model_name": "openai/o3-mini",
@@ -199,7 +212,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3-mini model - balanced performance and speed with vision"
+      "description": "OpenAI's o3-mini model - balanced performance and speed with vision",
+      "intelligence_score": 12
    },
    {
      "model_name": "openai/o3-mini-high",
@@ -213,7 +227,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision"
+      "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision",
+      "intelligence_score": 13
    },
    {
      "model_name": "openai/o3-pro",
@@ -227,7 +242,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
-      "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision"
+      "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision",
+      "intelligence_score": 15
    },
    {
      "model_name": "openai/o4-mini",
@@ -241,7 +257,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
-      "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision"
+      "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision",
+      "intelligence_score": 11
    },
    {
      "model_name": "openai/gpt-5",
@@ -255,7 +272,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": true,
      "temperature_constraint": "range",
-      "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support"
+      "description": "GPT-5 (400K context, 128K output) - Advanced model with reasoning support",
+      "intelligence_score": 16
    },
    {
      "model_name": "openai/gpt-5-mini",
@@ -269,7 +287,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": true,
      "temperature_constraint": "fixed",
-      "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support"
+      "description": "GPT-5-mini (400K context, 128K output) - Efficient variant with reasoning support",
+      "intelligence_score": 15
    },
    {
      "model_name": "openai/gpt-5-nano",
@@ -283,7 +302,8 @@
      "max_image_size_mb": 20.0,
      "supports_temperature": true,
      "temperature_constraint": "fixed",
-      "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks"
+      "description": "GPT-5 nano (400K context, 128K output) - Fastest, cheapest version of GPT-5 for summarization and classification tasks",
+      "intelligence_score": 13
    },
    {
      "model_name": "llama3.2",
@@ -296,7 +316,8 @@
      "supports_images": false,
      "max_image_size_mb": 0.0,
      "is_custom": true,
-      "description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)"
+      "description": "Local Llama 3.2 model via custom endpoint (Ollama/vLLM) - 128K context window (text-only)",
+      "intelligence_score": 6
    }
  ]
 }
--- a/docs/adding_providers.md
+++ b/docs/adding_providers.md
@@ -10,6 +10,13 @@ Each provider:
 - Implements the minimal abstract hooks (`get_provider_type()` and `generate_content()`)
 - Gets registered automatically via environment variables

+### Intelligence score cheatsheet
+
+Set `intelligence_score` (1–20) when you want deterministic ordering in auto
+mode or the `listmodels` output. The runtime rank starts from this human score
+and adds smaller bonuses for context window, extended thinking, and other
+features ([details here](model_ranking.md)).
+
 ## Choose Your Implementation Path

 **Option A: Full Provider (`ModelProvider`)**
@@ -68,6 +75,7 @@ class ExampleModelProvider(ModelProvider):
            provider=ProviderType.EXAMPLE,
            model_name="example-large",
            friendly_name="Example Large",
+            intelligence_score=18,
            context_window=100_000,
            max_output_tokens=50_000,
            supports_extended_thinking=False,
@@ -79,6 +87,7 @@ class ExampleModelProvider(ModelProvider):
            provider=ProviderType.EXAMPLE,
            model_name="example-small",
            friendly_name="Example Small",
+            intelligence_score=14,
            context_window=32_000,
            max_output_tokens=16_000,
            temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
--- a/docs/custom_models.md
+++ b/docs/custom_models.md
@@ -60,6 +60,10 @@ The server uses `conf/custom_models.json` to map convenient aliases to both Open

 View the full list in [`conf/custom_models.json`](conf/custom_models.json). 

+To control ordering in auto mode or the `listmodels` summary, adjust the
+[`intelligence_score`](model_ranking.md) for each entry (or rely on the automatic
+heuristic described there).
+
 **Note:** While you can use any OpenRouter model by its full name, models not in the config file will use generic capabilities (32K context window, no extended thinking, etc.) which may not match the model's actual capabilities. For best results, add new models to the config file with their proper specifications.

 ## Quick Start
--- a/docs/index.md
+++ b/docs/index.md
@@ -0,0 +1,15 @@
+# Zen MCP Server Documentation
+
+| Document | Description |
+|----------|-------------|
+| [Getting Started](getting-started.md) | Installation paths, prerequisite setup, and first-run guidance. |
+| [Adding Providers](adding_providers.md) | How to register new AI providers and advertise capabilities. |
+| [Model Ranking](model_ranking.md) | How intelligence scores translate into auto-mode ordering. |
+| [Custom Models](custom_models.md) | Configure OpenRouter/custom models and aliases. |
+| [Adding Tools](adding_tools.md) | Create new tools using the shared base classes. |
+| [Advanced Usage](advanced-usage.md) | Auto-mode tricks, workflow tools, and collaboration tips. |
+| [Configuration](configuration.md) | .env options, restriction policies, logging levels. |
+| [Testing](testing.md) | Test strategy, command cheats, and coverage notes. |
+| [Troubleshooting](troubleshooting.md) | Common issues and resolutions. |
+
+Additional docs live in this directory; start with the table above to orient yourself.
--- a/docs/model_ranking.md
+++ b/docs/model_ranking.md
@@ -0,0 +1,69 @@
+# Model Capability Ranking
+
+Auto mode needs a short, trustworthy list of models to suggest. The server
+computes a capability rank for every model at runtime using a simple recipe:
+
+1. Start with the human-supplied `intelligence_score` (1–20). This is the
+   anchor—multiply it by five to map onto the 0–100 scale the server uses.
+2. Add a few light bonuses for hard capabilities:
+   - **Context window:** up to +5 (log-scale bonus when the model exceeds ~1K tokens).
+   - **Output budget:** +2 for ≥65K tokens, +1 for ≥32K.
+   - **Extended thinking:** +3 when the provider supports it.
+   - **Function calling / JSON / images:** +1 each when available.
+   - **Custom endpoints:** −1 to nudge cloud-hosted defaults ahead unless tuned.
+3. Clamp the final score to 0–100 so downstream callers can rely on the range.
+
+In code this looks like:
+
+```python
+base = clamp(intelligence_score, 1, 20) * 5
+ctx_bonus = min(5, max(0, log10(context_window) - 3))
+output_bonus = 2 if max_output_tokens >= 65_000 else 1 if >= 32_000 else 0
+feature_bonus = (
+    (3 if supports_extended_thinking else 0)
+    + (1 if supports_function_calling else 0)
+    + (1 if supports_json_mode else 0)
+    + (1 if supports_images else 0)
+)
+penalty = 1 if is_custom else 0
+
+effective_rank = clamp(base + ctx_bonus + output_bonus + feature_bonus - penalty, 0, 100)
+```
+
+The bonuses are intentionally small—the human intelligence score does most
+of the work so you can enforce organisational preferences easily.
+
+## Picking an intelligence score
+
+A straightforward rubric that mirrors typical provider tiers:
+
+| Intelligence | Guidance |
+|--------------|----------|
+| 18–19 | Frontier reasoning models (Gemini 2.5 Pro, GPT‑5) |
+| 15–17 | Strong general models with large context (O3 Pro, DeepSeek R1) |
+| 12–14 | Balanced assistants (Claude Opus/Sonnet, Mistral Large) |
+| 9–11  | Fast distillations (Gemini Flash, GPT-5 Mini, Mistral medium) |
+| 6–8   | Local or efficiency-focused models (Llama 3 70B, Claude Haiku) |
+| ≤5    | Experimental/lightweight models |
+
+Record the reasoning for your scores so future updates stay consistent.
+
+## How the rank is used
+
+The ranked list is cached per provider and consumed by:
+- Tool schemas (`model` parameter descriptions) when auto mode is active.
+- The `listmodels` tool’s “top models” sections.
+- Fallback messaging when a requested model is unavailable.
+
+Because the rank is computed after restriction filters, only allowed models
+appear in these summaries.
+
+## Customising further
+
+If you need a different weighting you can:
+- Override `intelligence_score` in your provider or custom model config.
+- Subclass the provider and override `get_effective_capability_rank()`.
+- Post-process the rank via `get_capabilities_by_rank()` before surfacing it.
+
+Most teams find that adjusting `intelligence_score` alone is enough to keep
+auto mode honest without revisiting code.
--- a/providers/base.py
+++ b/providers/base.py
@@ -42,6 +42,7 @@ class ModelProvider(ABC):
        """Initialize the provider with API key and optional configuration."""
        self.api_key = api_key
        self.config = kwargs
+        self._sorted_capabilities_cache: Optional[list[tuple[str, ModelCapabilities]]] = None

    # ------------------------------------------------------------------
    # Provider identity & capability surface
@@ -77,6 +78,27 @@ class ModelProvider(ABC):
            return {k: v for k, v in model_map.items() if isinstance(v, ModelCapabilities)}
        return {}

+    def get_capabilities_by_rank(self) -> list[tuple[str, ModelCapabilities]]:
+        """Return model capabilities sorted by effective capability rank."""
+
+        if self._sorted_capabilities_cache is not None:
+            return list(self._sorted_capabilities_cache)
+
+        model_configs = self.get_all_model_capabilities()
+        if not model_configs:
+            self._sorted_capabilities_cache = []
+            return []
+
+        items = list(model_configs.items())
+        items.sort(key=lambda item: (-item[1].get_effective_capability_rank(), item[0]))
+        self._sorted_capabilities_cache = items
+        return list(items)
+
+    def _invalidate_capability_cache(self) -> None:
+        """Clear cached sorted capability data (call after dynamic updates)."""
+
+        self._sorted_capabilities_cache = None
+
    def list_models(
        self,
        *,
--- a/providers/dial.py
+++ b/providers/dial.py
@@ -33,6 +33,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="o3-2025-04-16",
            friendly_name="DIAL (O3)",
+            intelligence_score=14,
            context_window=200_000,
            max_output_tokens=100_000,
            supports_extended_thinking=False,
@@ -51,6 +52,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="o4-mini-2025-04-16",
            friendly_name="DIAL (O4-mini)",
+            intelligence_score=11,
            context_window=200_000,
            max_output_tokens=100_000,
            supports_extended_thinking=False,
@@ -69,6 +71,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="anthropic.claude-sonnet-4.1-20250805-v1:0",
            friendly_name="DIAL (Sonnet 4.1)",
+            intelligence_score=10,
            context_window=200_000,
            max_output_tokens=64_000,
            supports_extended_thinking=False,
@@ -87,6 +90,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="anthropic.claude-sonnet-4.1-20250805-v1:0-with-thinking",
            friendly_name="DIAL (Sonnet 4.1 Thinking)",
+            intelligence_score=11,
            context_window=200_000,
            max_output_tokens=64_000,
            supports_extended_thinking=True,  # Thinking mode variant
@@ -105,6 +109,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="anthropic.claude-opus-4.1-20250805-v1:0",
            friendly_name="DIAL (Opus 4.1)",
+            intelligence_score=14,
            context_window=200_000,
            max_output_tokens=64_000,
            supports_extended_thinking=False,
@@ -123,6 +128,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="anthropic.claude-opus-4.1-20250805-v1:0-with-thinking",
            friendly_name="DIAL (Opus 4.1 Thinking)",
+            intelligence_score=15,
            context_window=200_000,
            max_output_tokens=64_000,
            supports_extended_thinking=True,  # Thinking mode variant
@@ -141,6 +147,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="gemini-2.5-pro-preview-03-25-google-search",
            friendly_name="DIAL (Gemini 2.5 Pro Search)",
+            intelligence_score=17,
            context_window=1_000_000,
            max_output_tokens=65_536,
            supports_extended_thinking=False,  # DIAL doesn't expose thinking mode
@@ -159,6 +166,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="gemini-2.5-pro-preview-05-06",
            friendly_name="DIAL (Gemini 2.5 Pro)",
+            intelligence_score=18,
            context_window=1_000_000,
            max_output_tokens=65_536,
            supports_extended_thinking=False,
@@ -177,6 +185,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.DIAL,
            model_name="gemini-2.5-flash-preview-05-20",
            friendly_name="DIAL (Gemini Flash 2.5)",
+            intelligence_score=10,
            context_window=1_000_000,
            max_output_tokens=65_536,
            supports_extended_thinking=False,
--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -33,6 +33,7 @@ class GeminiModelProvider(ModelProvider):
            provider=ProviderType.GOOGLE,
            model_name="gemini-2.5-pro",
            friendly_name="Gemini (Pro 2.5)",
+            intelligence_score=18,
            context_window=1_048_576,  # 1M tokens
            max_output_tokens=65_536,
            supports_extended_thinking=True,
@@ -52,6 +53,7 @@ class GeminiModelProvider(ModelProvider):
            provider=ProviderType.GOOGLE,
            model_name="gemini-2.0-flash",
            friendly_name="Gemini (Flash 2.0)",
+            intelligence_score=9,
            context_window=1_048_576,  # 1M tokens
            max_output_tokens=65_536,
            supports_extended_thinking=True,  # Experimental thinking mode
@@ -71,6 +73,7 @@ class GeminiModelProvider(ModelProvider):
            provider=ProviderType.GOOGLE,
            model_name="gemini-2.0-flash-lite",
            friendly_name="Gemin (Flash Lite 2.0)",
+            intelligence_score=7,
            context_window=1_048_576,  # 1M tokens
            max_output_tokens=65_536,
            supports_extended_thinking=False,  # Not supported per user request
@@ -89,6 +92,7 @@ class GeminiModelProvider(ModelProvider):
            provider=ProviderType.GOOGLE,
            model_name="gemini-2.5-flash",
            friendly_name="Gemini (Flash 2.5)",
+            intelligence_score=10,
            context_window=1_048_576,  # 1M tokens
            max_output_tokens=65_536,
            supports_extended_thinking=True,
--- a/providers/openai_provider.py
+++ b/providers/openai_provider.py
@@ -26,6 +26,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="gpt-5",
            friendly_name="OpenAI (GPT-5)",
+            intelligence_score=16,
            context_window=400_000,  # 400K tokens
            max_output_tokens=128_000,  # 128K max output tokens
            supports_extended_thinking=True,  # Supports reasoning tokens
@@ -44,6 +45,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="gpt-5-mini",
            friendly_name="OpenAI (GPT-5-mini)",
+            intelligence_score=15,
            context_window=400_000,  # 400K tokens
            max_output_tokens=128_000,  # 128K max output tokens
            supports_extended_thinking=True,  # Supports reasoning tokens
@@ -62,6 +64,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="gpt-5-nano",
            friendly_name="OpenAI (GPT-5 nano)",
+            intelligence_score=13,
            context_window=400_000,
            max_output_tokens=128_000,
            supports_extended_thinking=True,
@@ -80,6 +83,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="o3",
            friendly_name="OpenAI (O3)",
+            intelligence_score=14,
            context_window=200_000,  # 200K tokens
            max_output_tokens=65536,  # 64K max output tokens
            supports_extended_thinking=False,
@@ -98,6 +102,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="o3-mini",
            friendly_name="OpenAI (O3-mini)",
+            intelligence_score=12,
            context_window=200_000,  # 200K tokens
            max_output_tokens=65536,  # 64K max output tokens
            supports_extended_thinking=False,
@@ -116,6 +121,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="o3-pro",
            friendly_name="OpenAI (O3-Pro)",
+            intelligence_score=15,
            context_window=200_000,  # 200K tokens
            max_output_tokens=65536,  # 64K max output tokens
            supports_extended_thinking=False,
@@ -134,6 +140,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="o4-mini",
            friendly_name="OpenAI (O4-mini)",
+            intelligence_score=11,
            context_window=200_000,  # 200K tokens
            max_output_tokens=65536,  # 64K max output tokens
            supports_extended_thinking=False,
@@ -152,6 +159,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.OPENAI,
            model_name="gpt-4.1",
            friendly_name="OpenAI (GPT 4.1)",
+            intelligence_score=13,
            context_window=1_000_000,  # 1M tokens
            max_output_tokens=32_768,
            supports_extended_thinking=False,
--- a/providers/openrouter.py
+++ b/providers/openrouter.py
@@ -85,6 +85,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
                provider=ProviderType.OPENROUTER,
                model_name=canonical_name,
                friendly_name=self.FRIENDLY_NAME,
+                intelligence_score=9,
                context_window=32_768,
                max_output_tokens=32_768,
                supports_extended_thinking=False,
--- a/providers/shared/model_capabilities.py
+++ b/providers/shared/model_capabilities.py
@@ -1,5 +1,6 @@
 """Dataclass describing the feature set of a model exposed by a provider."""

+import math
 from dataclasses import dataclass, field
 from typing import Optional

@@ -32,6 +33,7 @@ class ModelCapabilities:
    provider: ProviderType
    model_name: str
    friendly_name: str
+    intelligence_score: int = 10  # Human-curated 1–20 score reflecting general capability
    description: str = ""
    aliases: list[str] = field(default_factory=list)

@@ -69,6 +71,42 @@ class ModelCapabilities:

        return self.temperature_constraint.get_corrected_value(requested_temperature)

+    def get_effective_capability_rank(self) -> int:
+        """Calculate the runtime capability rank from intelligence + capabilities."""
+
+        # Human signal drives the baseline (1–20 → 5–100 after scaling)
+        base_intelligence = self.intelligence_score if self.intelligence_score else 10
+        base_intelligence = max(1, min(20, base_intelligence))
+        score = base_intelligence * 5
+
+        # Context window bonus with gentle diminishing returns
+        ctx_bonus = 0
+        ctx = max(self.context_window, 0)
+        if ctx > 0:
+            ctx_bonus = int(min(5, max(0.0, math.log10(ctx) - 3)))
+        score += ctx_bonus
+
+        # Output token capacity adds a small bonus
+        if self.max_output_tokens >= 65_000:
+            score += 2
+        elif self.max_output_tokens >= 32_000:
+            score += 1
+
+        # Feature-level boosts
+        if self.supports_extended_thinking:
+            score += 3
+        if self.supports_function_calling:
+            score += 1
+        if self.supports_json_mode:
+            score += 1
+        if self.supports_images:
+            score += 1
+
+        if self.is_custom:
+            score -= 1
+
+        return max(0, min(100, score))
+
    @staticmethod
    def collect_aliases(model_configs: dict[str, "ModelCapabilities"]) -> dict[str, list[str]]:
        """Build a mapping of model name to aliases from capability configs."""
@@ -112,7 +150,13 @@ class ModelCapabilities:

            formatted_names.append(formatted)

-        for base_model, capabilities in model_configs.items():
+        # Sort models by capability rank (descending) then by name for deterministic ordering
+        sorted_items = sorted(
+            model_configs.items(),
+            key=lambda item: (-item[1].get_effective_capability_rank(), item[0]),
+        )
+
+        for base_model, capabilities in sorted_items:
            append_name(base_model)

            if include_aliases and capabilities.aliases:
--- a/providers/xai.py
+++ b/providers/xai.py
@@ -27,6 +27,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.XAI,
            model_name="grok-4",
            friendly_name="X.AI (Grok 4)",
+            intelligence_score=16,
            context_window=256_000,  # 256K tokens
            max_output_tokens=256_000,  # 256K tokens max output
            supports_extended_thinking=True,  # Grok-4 supports reasoning mode
@@ -45,6 +46,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.XAI,
            model_name="grok-3",
            friendly_name="X.AI (Grok 3)",
+            intelligence_score=13,
            context_window=131_072,  # 131K tokens
            max_output_tokens=131072,
            supports_extended_thinking=False,
@@ -63,6 +65,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
            provider=ProviderType.XAI,
            model_name="grok-3-fast",
            friendly_name="X.AI (Grok 3 Fast)",
+            intelligence_score=12,
            context_window=131_072,  # 131K tokens
            max_output_tokens=131072,
            supports_extended_thinking=False,
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -122,8 +122,8 @@ class TestConsensusTool:
        # relevant_files should be present as it's used by consensus
        assert "relevant_files" in schema["properties"]

-        # model field should be present for Gemini compatibility (consensus uses 'models' as well)
-        assert "model" in schema["properties"]
+        # model field should NOT be present as consensus uses 'models' field instead
+        assert "model" not in schema["properties"]

        # Verify workflow fields that should NOT be present
        assert "files_checked" not in schema["properties"]
--- a/tests/test_image_support_integration.py
+++ b/tests/test_image_support_integration.py
@@ -26,6 +26,7 @@ from utils.conversation_memory import (
    get_conversation_image_list,
    get_thread,
 )
+from utils.model_context import ModelContext


@pytest.mark.no_mock_provider
@@ -180,17 +181,18 @@ class TestImageSupportIntegration:

        try:
            # Test with an invalid model name that doesn't exist in any provider
-            result = tool._validate_image_limits(small_images, "non-existent-model-12345")
+            # Use model_context parameter name (not positional)
+            result = tool._validate_image_limits(small_images, model_context=ModelContext("non-existent-model-12345"))
            # Should return error because model not available or doesn't support images
            assert result is not None
            assert result["status"] == "error"
            assert "is not available" in result["content"] or "does not support image processing" in result["content"]

            # Test that empty/None images always pass regardless of model
-            result = tool._validate_image_limits([], "any-model")
+            result = tool._validate_image_limits([], model_context=ModelContext("gemini-2.5-pro"))
            assert result is None

-            result = tool._validate_image_limits(None, "any-model")
+            result = tool._validate_image_limits(None, model_context=ModelContext("gemini-2.5-pro"))
            assert result is None

        finally:
@@ -215,7 +217,7 @@ class TestImageSupportIntegration:
                small_image_path = temp_file.name

            # Test with the default model from test environment (gemini-2.5-flash)
-            result = tool._validate_image_limits([small_image_path], "gemini-2.5-flash")
+            result = tool._validate_image_limits([small_image_path], ModelContext("gemini-2.5-flash"))
            assert result is None  # Should pass for Gemini models

            # Create 150MB image (over typical limits)
@@ -223,7 +225,7 @@ class TestImageSupportIntegration:
                temp_file.write(b"\x00" * (150 * 1024 * 1024))  # 150MB
                large_image_path = temp_file.name

-            result = tool._validate_image_limits([large_image_path], "gemini-2.5-flash")
+            result = tool._validate_image_limits([large_image_path], ModelContext("gemini-2.5-flash"))
            # Large images should fail validation
            assert result is not None
            assert result["status"] == "error"
@@ -429,14 +431,14 @@ class TestImageSupportIntegration:
        images = [data_url]

        # Test with a dummy model that doesn't exist in any provider
-        result = tool._validate_image_limits(images, "test-dummy-model-name")
+        result = tool._validate_image_limits(images, ModelContext("test-dummy-model-name"))
        # Should return error because model not available or doesn't support images
        assert result is not None
        assert result["status"] == "error"
        assert "is not available" in result["content"] or "does not support image processing" in result["content"]

        # Test with another non-existent model to check error handling
-        result = tool._validate_image_limits(images, "another-dummy-model")
+        result = tool._validate_image_limits(images, ModelContext("another-dummy-model"))
        # Should return error because model not available
        assert result is not None
        assert result["status"] == "error"
@@ -446,11 +448,11 @@ class TestImageSupportIntegration:
        tool = ChatTool()

        # Empty list should not fail validation (no need for provider setup)
-        result = tool._validate_image_limits([], "test_model")
+        result = tool._validate_image_limits([], ModelContext("gemini-2.5-pro"))
        assert result is None

        # None should not fail validation (no need for provider setup)
-        result = tool._validate_image_limits(None, "test_model")
+        result = tool._validate_image_limits(None, ModelContext("gemini-2.5-pro"))
        assert result is None

    @patch("utils.conversation_memory.get_storage")
--- a/tests/test_listmodels_restrictions.py
+++ b/tests/test_listmodels_restrictions.py
@@ -70,11 +70,25 @@ class TestListModelsRestrictions(unittest.TestCase):
                config = MagicMock()
                config.model_name = "anthropic/claude-opus-4-20240229"
                config.context_window = 200000
+                config.get_effective_capability_rank.return_value = 90  # High rank for Opus
                return config
            elif "sonnet" in model_name.lower():
                config = MagicMock()
                config.model_name = "anthropic/claude-sonnet-4-20240229"
                config.context_window = 200000
+                config.get_effective_capability_rank.return_value = 80  # Lower rank for Sonnet
+                return config
+            elif "deepseek" in model_name.lower():
+                config = MagicMock()
+                config.model_name = "deepseek/deepseek-r1-0528:free"
+                config.context_window = 100000
+                config.get_effective_capability_rank.return_value = 70
+                return config
+            elif "qwen" in model_name.lower():
+                config = MagicMock()
+                config.model_name = "qwen/qwen3-235b-a22b-04-28:free"
+                config.context_window = 100000
+                config.get_effective_capability_rank.return_value = 60
                return config
            return None  # No config for models without aliases

@@ -90,6 +104,9 @@ class TestListModelsRestrictions(unittest.TestCase):

        mock_get_provider.side_effect = get_provider_side_effect

+        # Ensure registry is cleared before test
+        ModelProviderRegistry._registry = {}
+
        # Mock available models
        mock_get_models.return_value = {
            "gemini-2.5-flash": ProviderType.GOOGLE,
@@ -131,6 +148,9 @@ class TestListModelsRestrictions(unittest.TestCase):
        # Parse the output
        lines = result.split("\n")

+        # Debug: print the actual result for troubleshooting
+        # print(f"DEBUG: Full result:\n{result}")
+
        # Check that OpenRouter section exists
        openrouter_section_found = False
        openrouter_models = []
@@ -141,15 +161,18 @@ class TestListModelsRestrictions(unittest.TestCase):
                openrouter_section_found = True
            elif "Available Models" in line and openrouter_section_found:
                in_openrouter_section = True
-            elif in_openrouter_section and line.strip().startswith("- "):
-                # Extract model name from various line formats:
-                # - `model-name` → `full-name` (context)
-                # - `model-name`
-                line_content = line.strip()[2:]  # Remove "- "
-                if "`" in line_content:
-                    # Extract content between first pair of backticks
-                    model_name = line_content.split("`")[1]
-                    openrouter_models.append(model_name)
+            elif in_openrouter_section:
+                # Check for lines with model names in backticks
+                # Format: - `model-name` (score X)
+                if line.strip().startswith("- ") and "`" in line:
+                    # Extract model name between backticks
+                    parts = line.split("`")
+                    if len(parts) >= 2:
+                        model_name = parts[1]
+                        openrouter_models.append(model_name)
+                # Stop parsing when we hit the next section
+                elif "##" in line and in_openrouter_section:
+                    break

        self.assertTrue(openrouter_section_found, "OpenRouter section not found")
        self.assertEqual(
--- a/tests/test_openrouter_provider.py
+++ b/tests/test_openrouter_provider.py
@@ -174,6 +174,7 @@ class TestOpenRouterAutoMode:
                mock_config = Mock()
                mock_config.is_custom = False
                mock_config.aliases = []  # Empty list of aliases
+                mock_config.get_effective_capability_rank = Mock(return_value=50)  # Add ranking method
                return mock_config
            return None

@@ -220,6 +221,7 @@ class TestOpenRouterAutoMode:
        # Mock the resolve method to return model configs with aliases
        mock_model_config = Mock()
        mock_model_config.aliases = []  # Empty aliases for simplicity
+        mock_model_config.get_effective_capability_rank = Mock(return_value=50)  # Add ranking method
        mock_registry.resolve.return_value = mock_model_config

        ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)
--- a/tools/consensus.py
+++ b/tools/consensus.py
@@ -48,8 +48,9 @@ CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "relevant_files": "Optional supporting files that help the consensus analysis. Must be absolute full, non-abbreviated paths.",
    "models": (
-        "List of models to consult. Each entry may include model, stance (for/against/neutral), and stance_prompt. "
-        "Each (model, stance) pair must be unique, e.g. [{'model':'o3','stance':'for'}, {'model':'o3','stance':'against'}]."
+        "User-specified list of models to consult (provide at least two entries). "
+        "Each entry may include model, stance (for/against/neutral), and stance_prompt. "
+        "Each (model, stance) pair must be unique, e.g. [{'model':'gpt5','stance':'for'}, {'model':'pro','stance':'against'}]."
    ),
    "current_model_index": "0-based index of the next model to consult (managed internally).",
    "model_responses": "Internal log of responses gathered so far.",
@@ -233,7 +234,11 @@ of the evidence, even when it strongly points in one direction.""",
                    },
                    "required": ["model"],
                },
-                "description": CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS["models"],
+                "description": (
+                    "User-specified roster of models to consult (provide at least two entries). "
+                    + CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS["models"]
+                ),
+                "minItems": 2,
            },
            "current_model_index": {
                "type": "integer",
@@ -268,17 +273,19 @@ of the evidence, even when it strongly points in one direction.""",
            "thinking_mode",  # Not used in consensus workflow
        ]

-        # Build schema with proper field exclusion
-        # Include model field for compatibility but don't require it
-        schema = WorkflowSchemaBuilder.build_schema(
+        requires_model = self.requires_model()
+        model_field_schema = self.get_model_field_schema() if requires_model else None
+        auto_mode = self.is_effective_auto_mode() if requires_model else False
+
+        return WorkflowSchemaBuilder.build_schema(
            tool_specific_fields=consensus_field_overrides,
-            model_field_schema=self.get_model_field_schema(),
-            auto_mode=False,  # Consensus doesn't require model at MCP boundary
+            model_field_schema=model_field_schema,
+            auto_mode=auto_mode,
            tool_name=self.get_name(),
            excluded_workflow_fields=excluded_workflow_fields,
            excluded_common_fields=excluded_common_fields,
+            require_model=requires_model,
        )
-        return schema

    def get_required_actions(
        self, step_number: int, confidence: str, findings: str, total_steps: int, request=None
--- a/tools/listmodels.py
+++ b/tools/listmodels.py
@@ -40,8 +40,9 @@ class ListModelsTool(BaseTool):
        """Return the JSON schema for the tool's input"""
        return {
            "type": "object",
-            "properties": {"model": {"type": "string", "description": "Model to use (ignored by listmodels tool)"}},
+            "properties": {},
            "required": [],
+            "additionalProperties": False,
        }

    def get_annotations(self) -> Optional[dict[str, Any]]:
@@ -106,7 +107,7 @@ class ListModelsTool(BaseTool):
                output_lines.append("\n**Models**:")

                aliases = []
-                for model_name, capabilities in provider.get_all_model_capabilities().items():
+                for model_name, capabilities in provider.get_capabilities_by_rank():
                    description = capabilities.description or "No description available"
                    context_window = capabilities.context_window

@@ -153,33 +154,44 @@ class ListModelsTool(BaseTool):
                    available_models = provider.list_models(respect_restrictions=True)
                    registry = OpenRouterModelRegistry()

-                    # Group by provider for better organization
-                    providers_models = {}
-                    for model_name in available_models:  # Show ALL available models
-                        # Try to resolve to get config details
+                    # Group by provider and retain ranking information for consistent ordering
+                    providers_models: dict[str, list[tuple[int, str, Optional[Any]]]] = {}
+
+                    def _format_context(tokens: int) -> str:
+                        if not tokens:
+                            return "?"
+                        if tokens >= 1_000_000:
+                            return f"{tokens // 1_000_000}M"
+                        if tokens >= 1_000:
+                            return f"{tokens // 1_000}K"
+                        return str(tokens)
+
+                    for model_name in available_models:
                        config = registry.resolve(model_name)
-                        if config:
-                            # Extract provider from model_name
-                            provider_name = config.model_name.split("/")[0] if "/" in config.model_name else "other"
-                            if provider_name not in providers_models:
-                                providers_models[provider_name] = []
-                            providers_models[provider_name].append((model_name, config))
-                        else:
-                            # Model without config - add with basic info
-                            provider_name = model_name.split("/")[0] if "/" in model_name else "other"
-                            if provider_name not in providers_models:
-                                providers_models[provider_name] = []
-                            providers_models[provider_name].append((model_name, None))
+                        provider_name = "other"
+                        if config and "/" in config.model_name:
+                            provider_name = config.model_name.split("/")[0]
+                        elif "/" in model_name:
+                            provider_name = model_name.split("/")[0]
+
+                        providers_models.setdefault(provider_name, [])
+
+                        rank = config.get_effective_capability_rank() if config else 0
+                        providers_models[provider_name].append((rank, model_name, config))

                    output_lines.append("\n**Available Models**:")
                    for provider_name, models in sorted(providers_models.items()):
                        output_lines.append(f"\n*{provider_name.title()}:*")
-                        for alias, config in models:  # Show ALL models from each provider
+                        for rank, alias, config in sorted(models, key=lambda item: (-item[0], item[1])):
                            if config:
-                                context_str = f"{config.context_window // 1000}K" if config.context_window else "?"
-                                output_lines.append(f"- `{alias}` → `{config.model_name}` ({context_str} context)")
+                                context_str = _format_context(config.context_window)
+                                suffix_parts = [f"{context_str} context"]
+                                if getattr(config, "supports_extended_thinking", False):
+                                    suffix_parts.append("thinking")
+                                suffix = ", ".join(suffix_parts)
+                                output_lines.append(f"- `{alias}` → `{config.model_name}` (score {rank}, {suffix})")
                            else:
-                                output_lines.append(f"- `{alias}`")
+                                output_lines.append(f"- `{alias}` (score {rank})")

                    total_models = len(available_models)
                    # Show all models - no truncation message needed
--- a/tools/shared/base_tool.py
+++ b/tools/shared/base_tool.py
@@ -291,13 +291,161 @@ class BaseTool(ABC):
    def _format_available_models_list(self) -> str:
        """Return a human-friendly list of available models or guidance when none found."""

-        available_models = self._get_available_models()
-        if not available_models:
+        summaries, total, has_restrictions = self._get_ranked_model_summaries()
+        if not summaries:
            return (
                "No models detected. Configure provider credentials or set DEFAULT_MODEL to a valid option. "
                "If the user requested a specific model, respond with this notice instead of substituting another model."
            )
-        return ", ".join(available_models)
+        display = "; ".join(summaries)
+        remainder = total - len(summaries)
+        if remainder > 0:
+            display = f"{display}; +{remainder} more (use the `listmodels` tool for the full roster)"
+        return display
+
+    @staticmethod
+    def _format_context_window(tokens: int) -> Optional[str]:
+        """Convert a raw context window into a short display string."""
+
+        if not tokens or tokens <= 0:
+            return None
+
+        if tokens >= 1_000_000:
+            if tokens % 1_000_000 == 0:
+                return f"{tokens // 1_000_000}M ctx"
+            return f"{tokens / 1_000_000:.1f}M ctx"
+
+        if tokens >= 1_000:
+            if tokens % 1_000 == 0:
+                return f"{tokens // 1_000}K ctx"
+            return f"{tokens / 1_000:.1f}K ctx"
+
+        return f"{tokens} ctx"
+
+    def _collect_ranked_capabilities(self) -> list[tuple[int, str, Any]]:
+        """Gather available model capabilities sorted by capability rank."""
+
+        from providers.registry import ModelProviderRegistry
+
+        ranked: list[tuple[int, str, Any]] = []
+        available = ModelProviderRegistry.get_available_models(respect_restrictions=True)
+
+        for model_name, provider_type in available.items():
+            provider = ModelProviderRegistry.get_provider(provider_type)
+            if not provider:
+                continue
+
+            try:
+                capabilities = provider.get_capabilities(model_name)
+            except ValueError:
+                continue
+
+            rank = capabilities.get_effective_capability_rank()
+            ranked.append((rank, model_name, capabilities))
+
+        ranked.sort(key=lambda item: (-item[0], item[1]))
+        return ranked
+
+    @staticmethod
+    def _normalize_model_identifier(name: str) -> str:
+        """Normalize model names for deduplication across providers."""
+
+        normalized = name.lower()
+        if ":" in normalized:
+            normalized = normalized.split(":", 1)[0]
+        if "/" in normalized:
+            normalized = normalized.split("/", 1)[-1]
+        return normalized
+
+    def _get_ranked_model_summaries(self, limit: int = 5) -> tuple[list[str], int, bool]:
+        """Return formatted, ranked model summaries and restriction status."""
+
+        ranked = self._collect_ranked_capabilities()
+
+        # Build allowlist map (provider -> lowercase names) when restrictions are active
+        allowed_map: dict[Any, set[str]] = {}
+        try:
+            from utils.model_restrictions import get_restriction_service
+
+            restriction_service = get_restriction_service()
+            if restriction_service:
+                from providers.shared import ProviderType
+
+                for provider_type in ProviderType:
+                    allowed = restriction_service.get_allowed_models(provider_type)
+                    if allowed:
+                        allowed_map[provider_type] = {name.lower() for name in allowed if name}
+        except Exception:
+            allowed_map = {}
+
+        filtered: list[tuple[int, str, Any]] = []
+        seen_normalized: set[str] = set()
+
+        for rank, model_name, capabilities in ranked:
+            canonical_name = getattr(capabilities, "model_name", model_name)
+            canonical_lower = canonical_name.lower()
+            alias_lower = model_name.lower()
+            provider_type = getattr(capabilities, "provider", None)
+
+            if allowed_map:
+                if provider_type not in allowed_map:
+                    continue
+                allowed_set = allowed_map[provider_type]
+                if canonical_lower not in allowed_set and alias_lower not in allowed_set:
+                    continue
+
+            normalized = self._normalize_model_identifier(canonical_name)
+            if normalized in seen_normalized:
+                continue
+
+            seen_normalized.add(normalized)
+            filtered.append((rank, canonical_name, capabilities))
+
+        summaries: list[str] = []
+        for rank, canonical_name, capabilities in filtered[:limit]:
+            details: list[str] = []
+
+            context_str = self._format_context_window(getattr(capabilities, "context_window", 0))
+            if context_str:
+                details.append(context_str)
+
+            if getattr(capabilities, "supports_extended_thinking", False):
+                details.append("thinking")
+
+            base = f"{canonical_name} (score {rank}"
+            if details:
+                base = f"{base}, {', '.join(details)}"
+            summaries.append(f"{base})")
+
+        return summaries, len(filtered), bool(allowed_map)
+
+    def _get_restriction_note(self) -> Optional[str]:
+        """Return a string describing active per-provider allowlists, if any."""
+
+        env_labels = {
+            "OPENAI_ALLOWED_MODELS": "OpenAI",
+            "GOOGLE_ALLOWED_MODELS": "Google",
+            "XAI_ALLOWED_MODELS": "X.AI",
+            "OPENROUTER_ALLOWED_MODELS": "OpenRouter",
+            "DIAL_ALLOWED_MODELS": "DIAL",
+        }
+
+        notes: list[str] = []
+        for env_var, label in env_labels.items():
+            raw = os.getenv(env_var)
+            if not raw:
+                continue
+
+            models = sorted({token.strip() for token in raw.split(",") if token.strip()})
+            if not models:
+                continue
+
+            notes.append(f"{label}: {', '.join(models)}")
+
+        if not notes:
+            return None
+
+        return "Policy allows only → " + "; ".join(notes)

    def _build_model_unavailable_message(self, model_name: str) -> str:
        """Compose a consistent error message for unavailable model scenarios."""
@@ -344,8 +492,23 @@ class BaseTool(ABC):
        if self.is_effective_auto_mode():
            description = (
                "Currently in auto model selection mode. CRITICAL: When the user names a model, you MUST use that exact name unless the server rejects it. "
-                "If no model is provided, you may call the `listmodels` tool to review options and select an appropriate match."
+                "If no model is provided, you may use the `listmodels` tool to review options and select an appropriate match."
            )
+            summaries, total, restricted = self._get_ranked_model_summaries()
+            remainder = max(0, total - len(summaries))
+            if summaries:
+                top_line = "; ".join(summaries)
+                if remainder > 0:
+                    label = "Allowed models" if restricted else "Top models"
+                    top_line = f"{label}: {top_line}; +{remainder} more via `listmodels`."
+                else:
+                    label = "Allowed models" if restricted else "Top models"
+                    top_line = f"{label}: {top_line}."
+                description = f"{description} {top_line}"
+
+            restriction_note = self._get_restriction_note()
+            if restriction_note and (remainder > 0 or not summaries):
+                description = f"{description} {restriction_note}."
            return {
                "type": "string",
                "description": description,
@@ -353,8 +516,23 @@ class BaseTool(ABC):

        description = (
            f"The default model is '{DEFAULT_MODEL}'. Override only when the user explicitly requests a different model, and use that exact name. "
-            "If the requested model fails validation, surface the server error instead of substituting another model. When unsure, call the `listmodels` tool for details."
+            "If the requested model fails validation, surface the server error instead of substituting another model. When unsure, use the `listmodels` tool for details."
        )
+        summaries, total, restricted = self._get_ranked_model_summaries()
+        remainder = max(0, total - len(summaries))
+        if summaries:
+            top_line = "; ".join(summaries)
+            if remainder > 0:
+                label = "Allowed models" if restricted else "Preferred alternatives"
+                top_line = f"{label}: {top_line}; +{remainder} more via `listmodels`."
+            else:
+                label = "Allowed models" if restricted else "Preferred alternatives"
+                top_line = f"{label}: {top_line}."
+            description = f"{description} {top_line}"
+
+        restriction_note = self._get_restriction_note()
+        if restriction_note and (remainder > 0 or not summaries):
+            description = f"{description} {restriction_note}."

        return {
            "type": "string",
@@ -1242,31 +1420,6 @@ When recommending searches, be specific about what information you need and why
        import base64
        from pathlib import Path

-        # Handle legacy calls (positional model_name string)
-        if isinstance(model_context, str):
-            # Legacy call: _validate_image_limits(images, "model-name")
-            logger.warning(
-                "Legacy _validate_image_limits call with model_name string. Use model_context object instead."
-            )
-            try:
-                from utils.model_context import ModelContext
-
-                model_context = ModelContext(model_context)
-            except Exception as e:
-                logger.warning(f"Failed to create model context from legacy model_name: {e}")
-                # Generic error response for any unavailable model
-                return {
-                    "status": "error",
-                    "content": self._build_model_unavailable_message(str(model_context)),
-                    "content_type": "text",
-                    "metadata": {
-                        "error_type": "validation_error",
-                        "model_name": model_context,
-                        "supports_images": None,  # Unknown since model doesn't exist
-                        "image_count": len(images) if images else 0,
-                    },
-                }
-
        if not model_context:
            # Get from tool's stored context as fallback
            model_context = getattr(self, "_model_context", None)
--- a/tools/version.py
+++ b/tools/version.py
@@ -146,8 +146,9 @@ class VersionTool(BaseTool):
        """Return the JSON schema for the tool's input"""
        return {
            "type": "object",
-            "properties": {"model": {"type": "string", "description": "Model to use (ignored by version tool)"}},
+            "properties": {},
            "required": [],
+            "additionalProperties": False,
        }

    def get_annotations(self) -> Optional[dict[str, Any]]:
--- a/tools/workflow/base.py
+++ b/tools/workflow/base.py
@@ -139,12 +139,16 @@ class WorkflowTool(BaseTool, BaseWorkflowMixin):
        Returns:
            Complete JSON schema for the workflow tool
        """
+        requires_model = self.requires_model()
+        model_field_schema = self.get_model_field_schema() if requires_model else None
+        auto_mode = self.is_effective_auto_mode() if requires_model else False
        return WorkflowSchemaBuilder.build_schema(
            tool_specific_fields=self.get_tool_fields(),
            required_fields=self.get_required_fields(),
-            model_field_schema=self.get_model_field_schema(),
-            auto_mode=self.is_effective_auto_mode(),
+            model_field_schema=model_field_schema,
+            auto_mode=auto_mode,
            tool_name=self.get_name(),
+            require_model=requires_model,
        )

    def get_workflow_request_model(self):