feat: added intelligence_score to the model capabilities schema; a 1-20 number that can be specified to influence the sort order of models presented to the CLI in auto selection mode
fix: model definition re-introduced into the schema but intelligently and only a summary is generated per tool. Required to ensure CLI calls and uses the correct model fix: removed `model` param from some tools where this wasn't needed fix: fixed adherence to `*_ALLOWED_MODELS` by advertising only the allowed models to the CLI fix: removed duplicates across providers when passing canonical names back to the CLI; the first enabled provider wins
This commit is contained in:
@@ -42,6 +42,7 @@ class ModelProvider(ABC):
|
||||
"""Initialize the provider with API key and optional configuration."""
|
||||
self.api_key = api_key
|
||||
self.config = kwargs
|
||||
self._sorted_capabilities_cache: Optional[list[tuple[str, ModelCapabilities]]] = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Provider identity & capability surface
|
||||
@@ -77,6 +78,27 @@ class ModelProvider(ABC):
|
||||
return {k: v for k, v in model_map.items() if isinstance(v, ModelCapabilities)}
|
||||
return {}
|
||||
|
||||
def get_capabilities_by_rank(self) -> list[tuple[str, ModelCapabilities]]:
|
||||
"""Return model capabilities sorted by effective capability rank."""
|
||||
|
||||
if self._sorted_capabilities_cache is not None:
|
||||
return list(self._sorted_capabilities_cache)
|
||||
|
||||
model_configs = self.get_all_model_capabilities()
|
||||
if not model_configs:
|
||||
self._sorted_capabilities_cache = []
|
||||
return []
|
||||
|
||||
items = list(model_configs.items())
|
||||
items.sort(key=lambda item: (-item[1].get_effective_capability_rank(), item[0]))
|
||||
self._sorted_capabilities_cache = items
|
||||
return list(items)
|
||||
|
||||
def _invalidate_capability_cache(self) -> None:
|
||||
"""Clear cached sorted capability data (call after dynamic updates)."""
|
||||
|
||||
self._sorted_capabilities_cache = None
|
||||
|
||||
def list_models(
|
||||
self,
|
||||
*,
|
||||
|
||||
@@ -33,6 +33,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="o3-2025-04-16",
|
||||
friendly_name="DIAL (O3)",
|
||||
intelligence_score=14,
|
||||
context_window=200_000,
|
||||
max_output_tokens=100_000,
|
||||
supports_extended_thinking=False,
|
||||
@@ -51,6 +52,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="o4-mini-2025-04-16",
|
||||
friendly_name="DIAL (O4-mini)",
|
||||
intelligence_score=11,
|
||||
context_window=200_000,
|
||||
max_output_tokens=100_000,
|
||||
supports_extended_thinking=False,
|
||||
@@ -69,6 +71,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-sonnet-4.1-20250805-v1:0",
|
||||
friendly_name="DIAL (Sonnet 4.1)",
|
||||
intelligence_score=10,
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=False,
|
||||
@@ -87,6 +90,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-sonnet-4.1-20250805-v1:0-with-thinking",
|
||||
friendly_name="DIAL (Sonnet 4.1 Thinking)",
|
||||
intelligence_score=11,
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=True, # Thinking mode variant
|
||||
@@ -105,6 +109,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-opus-4.1-20250805-v1:0",
|
||||
friendly_name="DIAL (Opus 4.1)",
|
||||
intelligence_score=14,
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=False,
|
||||
@@ -123,6 +128,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="anthropic.claude-opus-4.1-20250805-v1:0-with-thinking",
|
||||
friendly_name="DIAL (Opus 4.1 Thinking)",
|
||||
intelligence_score=15,
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=True, # Thinking mode variant
|
||||
@@ -141,6 +147,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="gemini-2.5-pro-preview-03-25-google-search",
|
||||
friendly_name="DIAL (Gemini 2.5 Pro Search)",
|
||||
intelligence_score=17,
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False, # DIAL doesn't expose thinking mode
|
||||
@@ -159,6 +166,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="gemini-2.5-pro-preview-05-06",
|
||||
friendly_name="DIAL (Gemini 2.5 Pro)",
|
||||
intelligence_score=18,
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False,
|
||||
@@ -177,6 +185,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.DIAL,
|
||||
model_name="gemini-2.5-flash-preview-05-20",
|
||||
friendly_name="DIAL (Gemini Flash 2.5)",
|
||||
intelligence_score=10,
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False,
|
||||
|
||||
@@ -33,6 +33,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.5-pro",
|
||||
friendly_name="Gemini (Pro 2.5)",
|
||||
intelligence_score=18,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
@@ -52,6 +53,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.0-flash",
|
||||
friendly_name="Gemini (Flash 2.0)",
|
||||
intelligence_score=9,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True, # Experimental thinking mode
|
||||
@@ -71,6 +73,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.0-flash-lite",
|
||||
friendly_name="Gemin (Flash Lite 2.0)",
|
||||
intelligence_score=7,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False, # Not supported per user request
|
||||
@@ -89,6 +92,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
provider=ProviderType.GOOGLE,
|
||||
model_name="gemini-2.5-flash",
|
||||
friendly_name="Gemini (Flash 2.5)",
|
||||
intelligence_score=10,
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
|
||||
@@ -26,6 +26,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5",
|
||||
friendly_name="OpenAI (GPT-5)",
|
||||
intelligence_score=16,
|
||||
context_window=400_000, # 400K tokens
|
||||
max_output_tokens=128_000, # 128K max output tokens
|
||||
supports_extended_thinking=True, # Supports reasoning tokens
|
||||
@@ -44,6 +45,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5-mini",
|
||||
friendly_name="OpenAI (GPT-5-mini)",
|
||||
intelligence_score=15,
|
||||
context_window=400_000, # 400K tokens
|
||||
max_output_tokens=128_000, # 128K max output tokens
|
||||
supports_extended_thinking=True, # Supports reasoning tokens
|
||||
@@ -62,6 +64,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-5-nano",
|
||||
friendly_name="OpenAI (GPT-5 nano)",
|
||||
intelligence_score=13,
|
||||
context_window=400_000,
|
||||
max_output_tokens=128_000,
|
||||
supports_extended_thinking=True,
|
||||
@@ -80,6 +83,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3",
|
||||
friendly_name="OpenAI (O3)",
|
||||
intelligence_score=14,
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
@@ -98,6 +102,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3-mini",
|
||||
friendly_name="OpenAI (O3-mini)",
|
||||
intelligence_score=12,
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
@@ -116,6 +121,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o3-pro",
|
||||
friendly_name="OpenAI (O3-Pro)",
|
||||
intelligence_score=15,
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
@@ -134,6 +140,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o4-mini",
|
||||
friendly_name="OpenAI (O4-mini)",
|
||||
intelligence_score=11,
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
@@ -152,6 +159,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-4.1",
|
||||
friendly_name="OpenAI (GPT 4.1)",
|
||||
intelligence_score=13,
|
||||
context_window=1_000_000, # 1M tokens
|
||||
max_output_tokens=32_768,
|
||||
supports_extended_thinking=False,
|
||||
|
||||
@@ -85,6 +85,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.OPENROUTER,
|
||||
model_name=canonical_name,
|
||||
friendly_name=self.FRIENDLY_NAME,
|
||||
intelligence_score=9,
|
||||
context_window=32_768,
|
||||
max_output_tokens=32_768,
|
||||
supports_extended_thinking=False,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Dataclass describing the feature set of a model exposed by a provider."""
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
@@ -32,6 +33,7 @@ class ModelCapabilities:
|
||||
provider: ProviderType
|
||||
model_name: str
|
||||
friendly_name: str
|
||||
intelligence_score: int = 10 # Human-curated 1–20 score reflecting general capability
|
||||
description: str = ""
|
||||
aliases: list[str] = field(default_factory=list)
|
||||
|
||||
@@ -69,6 +71,42 @@ class ModelCapabilities:
|
||||
|
||||
return self.temperature_constraint.get_corrected_value(requested_temperature)
|
||||
|
||||
def get_effective_capability_rank(self) -> int:
|
||||
"""Calculate the runtime capability rank from intelligence + capabilities."""
|
||||
|
||||
# Human signal drives the baseline (1–20 → 5–100 after scaling)
|
||||
base_intelligence = self.intelligence_score if self.intelligence_score else 10
|
||||
base_intelligence = max(1, min(20, base_intelligence))
|
||||
score = base_intelligence * 5
|
||||
|
||||
# Context window bonus with gentle diminishing returns
|
||||
ctx_bonus = 0
|
||||
ctx = max(self.context_window, 0)
|
||||
if ctx > 0:
|
||||
ctx_bonus = int(min(5, max(0.0, math.log10(ctx) - 3)))
|
||||
score += ctx_bonus
|
||||
|
||||
# Output token capacity adds a small bonus
|
||||
if self.max_output_tokens >= 65_000:
|
||||
score += 2
|
||||
elif self.max_output_tokens >= 32_000:
|
||||
score += 1
|
||||
|
||||
# Feature-level boosts
|
||||
if self.supports_extended_thinking:
|
||||
score += 3
|
||||
if self.supports_function_calling:
|
||||
score += 1
|
||||
if self.supports_json_mode:
|
||||
score += 1
|
||||
if self.supports_images:
|
||||
score += 1
|
||||
|
||||
if self.is_custom:
|
||||
score -= 1
|
||||
|
||||
return max(0, min(100, score))
|
||||
|
||||
@staticmethod
|
||||
def collect_aliases(model_configs: dict[str, "ModelCapabilities"]) -> dict[str, list[str]]:
|
||||
"""Build a mapping of model name to aliases from capability configs."""
|
||||
@@ -112,7 +150,13 @@ class ModelCapabilities:
|
||||
|
||||
formatted_names.append(formatted)
|
||||
|
||||
for base_model, capabilities in model_configs.items():
|
||||
# Sort models by capability rank (descending) then by name for deterministic ordering
|
||||
sorted_items = sorted(
|
||||
model_configs.items(),
|
||||
key=lambda item: (-item[1].get_effective_capability_rank(), item[0]),
|
||||
)
|
||||
|
||||
for base_model, capabilities in sorted_items:
|
||||
append_name(base_model)
|
||||
|
||||
if include_aliases and capabilities.aliases:
|
||||
|
||||
@@ -27,6 +27,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-4",
|
||||
friendly_name="X.AI (Grok 4)",
|
||||
intelligence_score=16,
|
||||
context_window=256_000, # 256K tokens
|
||||
max_output_tokens=256_000, # 256K tokens max output
|
||||
supports_extended_thinking=True, # Grok-4 supports reasoning mode
|
||||
@@ -45,6 +46,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-3",
|
||||
friendly_name="X.AI (Grok 3)",
|
||||
intelligence_score=13,
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
@@ -63,6 +65,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
provider=ProviderType.XAI,
|
||||
model_name="grok-3-fast",
|
||||
friendly_name="X.AI (Grok 3 Fast)",
|
||||
intelligence_score=12,
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
|
||||
Reference in New Issue
Block a user