Use the new Gemini 2.5 Flash

Updated to support Thinking Tokens as a ratio of the max allowed
Updated tests
Updated README
This commit is contained in:
Fahad
2025-06-12 20:46:54 +04:00
parent b34c63d710
commit 3aedb16101
27 changed files with 135 additions and 98 deletions

View File

@@ -13,26 +13,29 @@ class GeminiModelProvider(ModelProvider):
# Model configurations
SUPPORTED_MODELS = {
"gemini-2.0-flash": {
"gemini-2.5-flash-preview-05-20": {
"max_tokens": 1_048_576, # 1M tokens
"supports_extended_thinking": False,
"supports_extended_thinking": True,
"max_thinking_tokens": 24576, # Flash 2.5 thinking budget limit
},
"gemini-2.5-pro-preview-06-05": {
"max_tokens": 1_048_576, # 1M tokens
"supports_extended_thinking": True,
"max_thinking_tokens": 32768, # Pro 2.5 thinking budget limit
},
# Shorthands
"flash": "gemini-2.0-flash",
"flash": "gemini-2.5-flash-preview-05-20",
"pro": "gemini-2.5-pro-preview-06-05",
}
# Thinking mode configurations for models that support it
# Thinking mode configurations - percentages of model's max_thinking_tokens
# These percentages work across all models that support thinking
THINKING_BUDGETS = {
"minimal": 128, # Minimum for 2.5 Pro - fast responses
"low": 2048, # Light reasoning tasks
"medium": 8192, # Balanced reasoning (default)
"high": 16384, # Complex analysis
"max": 32768, # Maximum reasoning depth
"minimal": 0.005, # 0.5% of max - minimal thinking for fast responses
"low": 0.08, # 8% of max - light reasoning tasks
"medium": 0.33, # 33% of max - balanced reasoning (default)
"high": 0.67, # 67% of max - complex analysis
"max": 1.0, # 100% of max - full thinking budget
}
def __init__(self, api_key: str, **kwargs):
@@ -107,9 +110,12 @@ class GeminiModelProvider(ModelProvider):
# Add thinking configuration for models that support it
capabilities = self.get_capabilities(resolved_name)
if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS:
generation_config.thinking_config = types.ThinkingConfig(
thinking_budget=self.THINKING_BUDGETS[thinking_mode]
)
# Get model's max thinking tokens and calculate actual budget
model_config = self.SUPPORTED_MODELS.get(resolved_name)
if model_config and "max_thinking_tokens" in model_config:
max_thinking_tokens = model_config["max_thinking_tokens"]
actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)
try:
# Generate content
@@ -164,6 +170,23 @@ class GeminiModelProvider(ModelProvider):
capabilities = self.get_capabilities(model_name)
return capabilities.supports_extended_thinking
def get_thinking_budget(self, model_name: str, thinking_mode: str) -> int:
"""Get actual thinking token budget for a model and thinking mode."""
resolved_name = self._resolve_model_name(model_name)
model_config = self.SUPPORTED_MODELS.get(resolved_name, {})
if not model_config.get("supports_extended_thinking", False):
return 0
if thinking_mode not in self.THINKING_BUDGETS:
return 0
max_thinking_tokens = model_config.get("max_thinking_tokens", 0)
if max_thinking_tokens == 0:
return 0
return int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name."""
# Check if it's a shorthand

View File

@@ -67,7 +67,7 @@ class ModelProviderRegistry:
"""Get provider instance for a specific model name.
Args:
model_name: Name of the model (e.g., "gemini-2.0-flash", "o3-mini")
model_name: Name of the model (e.g., "gemini-2.5-flash-preview-05-20", "o3-mini")
Returns:
ModelProvider instance that supports this model
@@ -137,7 +137,7 @@ class ModelProviderRegistry:
2. Gemini 2.0 Flash (fast and efficient) if Gemini API key available
3. OpenAI o3 (high performance) if OpenAI API key available
4. Gemini 2.5 Pro (deep reasoning) if Gemini API key available
5. Fallback to gemini-2.0-flash (most common case)
5. Fallback to gemini-2.5-flash-preview-05-20 (most common case)
Returns:
Model name string for fallback use
@@ -150,11 +150,11 @@ class ModelProviderRegistry:
if openai_available:
return "o3-mini" # Balanced performance/cost
elif gemini_available:
return "gemini-2.0-flash" # Fast and efficient
return "gemini-2.5-flash-preview-05-20" # Fast and efficient
else:
# No API keys available - return a reasonable default
# This maintains backward compatibility for tests
return "gemini-2.0-flash"
return "gemini-2.5-flash-preview-05-20"
@classmethod
def get_available_providers_with_keys(cls) -> list[ProviderType]: