Quick test mode for simulation tests
Fixed o4-mini name, OpenAI removed o4-mini-high Add max_output_tokens property to ModelCapabilities
This commit is contained in:
@@ -132,6 +132,7 @@ class ModelCapabilities:
|
||||
model_name: str
|
||||
friendly_name: str # Human-friendly name like "Gemini" or "OpenAI"
|
||||
context_window: int # Total context window size in tokens
|
||||
max_output_tokens: int # Maximum output tokens per request
|
||||
supports_extended_thinking: bool = False
|
||||
supports_system_prompts: bool = True
|
||||
supports_streaming: bool = True
|
||||
|
||||
@@ -158,6 +158,7 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
model_name=resolved_name,
|
||||
friendly_name=f"{self.FRIENDLY_NAME} ({resolved_name})",
|
||||
context_window=32_768, # Conservative default
|
||||
max_output_tokens=32_768, # Conservative default max output
|
||||
supports_extended_thinking=False, # Most custom models don't support this
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -187,7 +188,7 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
Returns:
|
||||
True if model is intended for custom/local endpoint
|
||||
"""
|
||||
logging.debug(f"Custom provider validating model: '{model_name}'")
|
||||
# logging.debug(f"Custom provider validating model: '{model_name}'")
|
||||
|
||||
# Try to resolve through registry first
|
||||
config = self._registry.resolve(model_name)
|
||||
@@ -195,12 +196,12 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
model_id = config.model_name
|
||||
# Use explicit is_custom flag for clean validation
|
||||
if config.is_custom:
|
||||
logging.debug(f"Model '{model_name}' -> '{model_id}' validated via registry (custom model)")
|
||||
logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' validated via registry")
|
||||
return True
|
||||
else:
|
||||
# This is a cloud/OpenRouter model - CustomProvider should NOT handle these
|
||||
# Let OpenRouter provider handle them instead
|
||||
logging.debug(f"Model '{model_name}' -> '{model_id}' rejected (cloud model, defer to OpenRouter)")
|
||||
# logging.debug(f"... [Custom] Model '{model_name}' -> '{model_id}' not custom (defer to OpenRouter)")
|
||||
return False
|
||||
|
||||
# Handle version tags for unknown models (e.g., "my-model:latest")
|
||||
|
||||
@@ -37,6 +37,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="o3-2025-04-16",
|
||||
friendly_name="DIAL (O3)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=100_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -54,6 +55,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="o4-mini-2025-04-16",
|
||||
friendly_name="DIAL (O4-mini)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=100_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -71,6 +73,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="anthropic.claude-sonnet-4-20250514-v1:0",
|
||||
friendly_name="DIAL (Sonnet 4)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -88,6 +91,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="anthropic.claude-sonnet-4-20250514-v1:0-with-thinking",
|
||||
friendly_name="DIAL (Sonnet 4 Thinking)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=True, # Thinking mode variant
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -105,6 +109,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="anthropic.claude-opus-4-20250514-v1:0",
|
||||
friendly_name="DIAL (Opus 4)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -122,6 +127,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="anthropic.claude-opus-4-20250514-v1:0-with-thinking",
|
||||
friendly_name="DIAL (Opus 4 Thinking)",
|
||||
context_window=200_000,
|
||||
max_output_tokens=64_000,
|
||||
supports_extended_thinking=True, # Thinking mode variant
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -139,6 +145,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="gemini-2.5-pro-preview-03-25-google-search",
|
||||
friendly_name="DIAL (Gemini 2.5 Pro Search)",
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False, # DIAL doesn't expose thinking mode
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -156,6 +163,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="gemini-2.5-pro-preview-05-06",
|
||||
friendly_name="DIAL (Gemini 2.5 Pro)",
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -173,6 +181,7 @@ class DIALModelProvider(OpenAICompatibleProvider):
|
||||
model_name="gemini-2.5-flash-preview-05-20",
|
||||
friendly_name="DIAL (Gemini Flash 2.5)",
|
||||
context_window=1_000_000,
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
|
||||
@@ -24,6 +24,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
model_name="gemini-2.0-flash",
|
||||
friendly_name="Gemini (Flash 2.0)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True, # Experimental thinking mode
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -42,6 +43,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
model_name="gemini-2.0-flash-lite",
|
||||
friendly_name="Gemin (Flash Lite 2.0)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=False, # Not supported per user request
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -59,6 +61,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
model_name="gemini-2.5-flash",
|
||||
friendly_name="Gemini (Flash 2.5)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -77,6 +80,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
model_name="gemini-2.5-pro",
|
||||
friendly_name="Gemini (Pro 2.5)",
|
||||
context_window=1_048_576, # 1M tokens
|
||||
max_output_tokens=65_536,
|
||||
supports_extended_thinking=True,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
|
||||
@@ -687,7 +687,6 @@ class OpenAICompatibleProvider(ModelProvider):
|
||||
"o3-mini",
|
||||
"o3-pro",
|
||||
"o4-mini",
|
||||
"o4-mini-high",
|
||||
# Note: Claude models would be handled by a separate provider
|
||||
}
|
||||
supports = model_name.lower() in vision_models
|
||||
|
||||
@@ -24,6 +24,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
model_name="o3",
|
||||
friendly_name="OpenAI (O3)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -41,6 +42,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
model_name="o3-mini",
|
||||
friendly_name="OpenAI (O3-mini)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -58,6 +60,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
model_name="o3-pro-2025-06-10",
|
||||
friendly_name="OpenAI (O3-Pro)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -75,6 +78,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
model_name="o4-mini",
|
||||
friendly_name="OpenAI (O4-mini)",
|
||||
context_window=200_000, # 200K tokens
|
||||
max_output_tokens=65536, # 64K max output tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -85,30 +89,14 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
supports_temperature=False, # O4 models don't accept temperature parameter
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="Latest reasoning model (200K context) - Optimized for shorter contexts, rapid reasoning",
|
||||
aliases=["mini", "o4mini"],
|
||||
),
|
||||
"o4-mini-high": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="o4-mini-high",
|
||||
friendly_name="OpenAI (O4-mini-high)",
|
||||
context_window=200_000, # 200K tokens
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True,
|
||||
supports_json_mode=True,
|
||||
supports_images=True, # O4 models support vision
|
||||
max_image_size_mb=20.0, # 20MB per OpenAI docs
|
||||
supports_temperature=False, # O4 models don't accept temperature parameter
|
||||
temperature_constraint=create_temperature_constraint("fixed"),
|
||||
description="Enhanced O4 mini (200K context) - Higher reasoning effort for complex tasks",
|
||||
aliases=["o4minihigh", "o4minihi", "mini-high"],
|
||||
aliases=["mini", "o4mini", "o4-mini"],
|
||||
),
|
||||
"gpt-4.1-2025-04-14": ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
model_name="gpt-4.1-2025-04-14",
|
||||
friendly_name="OpenAI (GPT 4.1)",
|
||||
context_window=1_000_000, # 1M tokens
|
||||
max_output_tokens=32_768,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
|
||||
@@ -101,6 +101,7 @@ class OpenRouterProvider(OpenAICompatibleProvider):
|
||||
model_name=resolved_name,
|
||||
friendly_name=self.FRIENDLY_NAME,
|
||||
context_window=32_768, # Conservative default context window
|
||||
max_output_tokens=32_768,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
|
||||
@@ -24,8 +24,6 @@ class ModelProviderRegistry:
|
||||
cls._instance._providers = {}
|
||||
cls._instance._initialized_providers = {}
|
||||
logging.debug(f"REGISTRY: Created instance {cls._instance}")
|
||||
else:
|
||||
logging.debug(f"REGISTRY: Returning existing instance {cls._instance}")
|
||||
return cls._instance
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -26,6 +26,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
model_name="grok-3",
|
||||
friendly_name="X.AI (Grok 3)",
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
@@ -43,6 +44,7 @@ class XAIModelProvider(OpenAICompatibleProvider):
|
||||
model_name="grok-3-fast",
|
||||
friendly_name="X.AI (Grok 3 Fast)",
|
||||
context_window=131_072, # 131K tokens
|
||||
max_output_tokens=131072,
|
||||
supports_extended_thinking=False,
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
|
||||
Reference in New Issue
Block a user