feat: added intelligence_score to the model capabilities schema; a 1-20 number that can be specified to influence the sort order of models presented to the CLI in auto selection mode

fix: model definition re-introduced into the schema but intelligently and only a summary is generated per tool. Required to ensure CLI calls and uses the correct model
fix: removed `model` param from some tools where this wasn't needed
fix: fixed adherence to `*_ALLOWED_MODELS` by advertising only the allowed models to the CLI
fix: removed duplicates across providers when passing canonical names back to the CLI; the first enabled provider wins
This commit is contained in:
Fahad
2025-10-02 21:43:44 +04:00
parent e78fe35a1b
commit 6cab9e56fc
22 changed files with 525 additions and 110 deletions

View File

@@ -42,6 +42,7 @@ class ModelProvider(ABC):
"""Initialize the provider with API key and optional configuration."""
self.api_key = api_key
self.config = kwargs
self._sorted_capabilities_cache: Optional[list[tuple[str, ModelCapabilities]]] = None
# ------------------------------------------------------------------
# Provider identity & capability surface
@@ -77,6 +78,27 @@ class ModelProvider(ABC):
return {k: v for k, v in model_map.items() if isinstance(v, ModelCapabilities)}
return {}
def get_capabilities_by_rank(self) -> list[tuple[str, ModelCapabilities]]:
"""Return model capabilities sorted by effective capability rank."""
if self._sorted_capabilities_cache is not None:
return list(self._sorted_capabilities_cache)
model_configs = self.get_all_model_capabilities()
if not model_configs:
self._sorted_capabilities_cache = []
return []
items = list(model_configs.items())
items.sort(key=lambda item: (-item[1].get_effective_capability_rank(), item[0]))
self._sorted_capabilities_cache = items
return list(items)
def _invalidate_capability_cache(self) -> None:
"""Clear cached sorted capability data (call after dynamic updates)."""
self._sorted_capabilities_cache = None
def list_models(
self,
*,

View File

@@ -33,6 +33,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="o3-2025-04-16",
friendly_name="DIAL (O3)",
intelligence_score=14,
context_window=200_000,
max_output_tokens=100_000,
supports_extended_thinking=False,
@@ -51,6 +52,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="o4-mini-2025-04-16",
friendly_name="DIAL (O4-mini)",
intelligence_score=11,
context_window=200_000,
max_output_tokens=100_000,
supports_extended_thinking=False,
@@ -69,6 +71,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="anthropic.claude-sonnet-4.1-20250805-v1:0",
friendly_name="DIAL (Sonnet 4.1)",
intelligence_score=10,
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=False,
@@ -87,6 +90,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="anthropic.claude-sonnet-4.1-20250805-v1:0-with-thinking",
friendly_name="DIAL (Sonnet 4.1 Thinking)",
intelligence_score=11,
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=True, # Thinking mode variant
@@ -105,6 +109,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="anthropic.claude-opus-4.1-20250805-v1:0",
friendly_name="DIAL (Opus 4.1)",
intelligence_score=14,
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=False,
@@ -123,6 +128,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="anthropic.claude-opus-4.1-20250805-v1:0-with-thinking",
friendly_name="DIAL (Opus 4.1 Thinking)",
intelligence_score=15,
context_window=200_000,
max_output_tokens=64_000,
supports_extended_thinking=True, # Thinking mode variant
@@ -141,6 +147,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="gemini-2.5-pro-preview-03-25-google-search",
friendly_name="DIAL (Gemini 2.5 Pro Search)",
intelligence_score=17,
context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False, # DIAL doesn't expose thinking mode
@@ -159,6 +166,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="gemini-2.5-pro-preview-05-06",
friendly_name="DIAL (Gemini 2.5 Pro)",
intelligence_score=18,
context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False,
@@ -177,6 +185,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
provider=ProviderType.DIAL,
model_name="gemini-2.5-flash-preview-05-20",
friendly_name="DIAL (Gemini Flash 2.5)",
intelligence_score=10,
context_window=1_000_000,
max_output_tokens=65_536,
supports_extended_thinking=False,

View File

@@ -33,6 +33,7 @@ class GeminiModelProvider(ModelProvider):
provider=ProviderType.GOOGLE,
model_name="gemini-2.5-pro",
friendly_name="Gemini (Pro 2.5)",
intelligence_score=18,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True,
@@ -52,6 +53,7 @@ class GeminiModelProvider(ModelProvider):
provider=ProviderType.GOOGLE,
model_name="gemini-2.0-flash",
friendly_name="Gemini (Flash 2.0)",
intelligence_score=9,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True, # Experimental thinking mode
@@ -71,6 +73,7 @@ class GeminiModelProvider(ModelProvider):
provider=ProviderType.GOOGLE,
model_name="gemini-2.0-flash-lite",
friendly_name="Gemin (Flash Lite 2.0)",
intelligence_score=7,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=False, # Not supported per user request
@@ -89,6 +92,7 @@ class GeminiModelProvider(ModelProvider):
provider=ProviderType.GOOGLE,
model_name="gemini-2.5-flash",
friendly_name="Gemini (Flash 2.5)",
intelligence_score=10,
context_window=1_048_576, # 1M tokens
max_output_tokens=65_536,
supports_extended_thinking=True,

View File

@@ -26,6 +26,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="gpt-5",
friendly_name="OpenAI (GPT-5)",
intelligence_score=16,
context_window=400_000, # 400K tokens
max_output_tokens=128_000, # 128K max output tokens
supports_extended_thinking=True, # Supports reasoning tokens
@@ -44,6 +45,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="gpt-5-mini",
friendly_name="OpenAI (GPT-5-mini)",
intelligence_score=15,
context_window=400_000, # 400K tokens
max_output_tokens=128_000, # 128K max output tokens
supports_extended_thinking=True, # Supports reasoning tokens
@@ -62,6 +64,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="gpt-5-nano",
friendly_name="OpenAI (GPT-5 nano)",
intelligence_score=13,
context_window=400_000,
max_output_tokens=128_000,
supports_extended_thinking=True,
@@ -80,6 +83,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="o3",
friendly_name="OpenAI (O3)",
intelligence_score=14,
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
@@ -98,6 +102,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="o3-mini",
friendly_name="OpenAI (O3-mini)",
intelligence_score=12,
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
@@ -116,6 +121,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="o3-pro",
friendly_name="OpenAI (O3-Pro)",
intelligence_score=15,
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
@@ -134,6 +140,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="o4-mini",
friendly_name="OpenAI (O4-mini)",
intelligence_score=11,
context_window=200_000, # 200K tokens
max_output_tokens=65536, # 64K max output tokens
supports_extended_thinking=False,
@@ -152,6 +159,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENAI,
model_name="gpt-4.1",
friendly_name="OpenAI (GPT 4.1)",
intelligence_score=13,
context_window=1_000_000, # 1M tokens
max_output_tokens=32_768,
supports_extended_thinking=False,

View File

@@ -85,6 +85,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
provider=ProviderType.OPENROUTER,
model_name=canonical_name,
friendly_name=self.FRIENDLY_NAME,
intelligence_score=9,
context_window=32_768,
max_output_tokens=32_768,
supports_extended_thinking=False,

View File

@@ -1,5 +1,6 @@
"""Dataclass describing the feature set of a model exposed by a provider."""
import math
from dataclasses import dataclass, field
from typing import Optional
@@ -32,6 +33,7 @@ class ModelCapabilities:
provider: ProviderType
model_name: str
friendly_name: str
intelligence_score: int = 10 # Human-curated 120 score reflecting general capability
description: str = ""
aliases: list[str] = field(default_factory=list)
@@ -69,6 +71,42 @@ class ModelCapabilities:
return self.temperature_constraint.get_corrected_value(requested_temperature)
def get_effective_capability_rank(self) -> int:
"""Calculate the runtime capability rank from intelligence + capabilities."""
# Human signal drives the baseline (120 → 5100 after scaling)
base_intelligence = self.intelligence_score if self.intelligence_score else 10
base_intelligence = max(1, min(20, base_intelligence))
score = base_intelligence * 5
# Context window bonus with gentle diminishing returns
ctx_bonus = 0
ctx = max(self.context_window, 0)
if ctx > 0:
ctx_bonus = int(min(5, max(0.0, math.log10(ctx) - 3)))
score += ctx_bonus
# Output token capacity adds a small bonus
if self.max_output_tokens >= 65_000:
score += 2
elif self.max_output_tokens >= 32_000:
score += 1
# Feature-level boosts
if self.supports_extended_thinking:
score += 3
if self.supports_function_calling:
score += 1
if self.supports_json_mode:
score += 1
if self.supports_images:
score += 1
if self.is_custom:
score -= 1
return max(0, min(100, score))
@staticmethod
def collect_aliases(model_configs: dict[str, "ModelCapabilities"]) -> dict[str, list[str]]:
"""Build a mapping of model name to aliases from capability configs."""
@@ -112,7 +150,13 @@ class ModelCapabilities:
formatted_names.append(formatted)
for base_model, capabilities in model_configs.items():
# Sort models by capability rank (descending) then by name for deterministic ordering
sorted_items = sorted(
model_configs.items(),
key=lambda item: (-item[1].get_effective_capability_rank(), item[0]),
)
for base_model, capabilities in sorted_items:
append_name(base_model)
if include_aliases and capabilities.aliases:

View File

@@ -27,6 +27,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.XAI,
model_name="grok-4",
friendly_name="X.AI (Grok 4)",
intelligence_score=16,
context_window=256_000, # 256K tokens
max_output_tokens=256_000, # 256K tokens max output
supports_extended_thinking=True, # Grok-4 supports reasoning mode
@@ -45,6 +46,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.XAI,
model_name="grok-3",
friendly_name="X.AI (Grok 3)",
intelligence_score=13,
context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False,
@@ -63,6 +65,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
provider=ProviderType.XAI,
model_name="grok-3-fast",
friendly_name="X.AI (Grok 3 Fast)",
intelligence_score=12,
context_window=131_072, # 131K tokens
max_output_tokens=131072,
supports_extended_thinking=False,