Use the new Gemini 2.5 Flash

Updated to support Thinking Tokens as a ratio of the max allowed Updated tests Updated README
2025-06-12 20:46:54 +04:00
parent b34c63d710
commit 3aedb16101
27 changed files with 135 additions and 98 deletions
--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -13,26 +13,29 @@ class GeminiModelProvider(ModelProvider):

    # Model configurations
    SUPPORTED_MODELS = {
-        "gemini-2.0-flash": {
+        "gemini-2.5-flash-preview-05-20": {
            "max_tokens": 1_048_576,  # 1M tokens
-            "supports_extended_thinking": False,
+            "supports_extended_thinking": True,
+            "max_thinking_tokens": 24576,  # Flash 2.5 thinking budget limit
        },
        "gemini-2.5-pro-preview-06-05": {
            "max_tokens": 1_048_576,  # 1M tokens
            "supports_extended_thinking": True,
+            "max_thinking_tokens": 32768,  # Pro 2.5 thinking budget limit
        },
        # Shorthands
-        "flash": "gemini-2.0-flash",
+        "flash": "gemini-2.5-flash-preview-05-20",
        "pro": "gemini-2.5-pro-preview-06-05",
    }

-    # Thinking mode configurations for models that support it
+    # Thinking mode configurations - percentages of model's max_thinking_tokens
+    # These percentages work across all models that support thinking
    THINKING_BUDGETS = {
-        "minimal": 128,  # Minimum for 2.5 Pro - fast responses
-        "low": 2048,  # Light reasoning tasks
-        "medium": 8192,  # Balanced reasoning (default)
-        "high": 16384,  # Complex analysis
-        "max": 32768,  # Maximum reasoning depth
+        "minimal": 0.005,  # 0.5% of max - minimal thinking for fast responses
+        "low": 0.08,  # 8% of max - light reasoning tasks
+        "medium": 0.33,  # 33% of max - balanced reasoning (default)
+        "high": 0.67,  # 67% of max - complex analysis
+        "max": 1.0,  # 100% of max - full thinking budget
    }

    def __init__(self, api_key: str, **kwargs):
@@ -107,9 +110,12 @@ class GeminiModelProvider(ModelProvider):
        # Add thinking configuration for models that support it
        capabilities = self.get_capabilities(resolved_name)
        if capabilities.supports_extended_thinking and thinking_mode in self.THINKING_BUDGETS:
-            generation_config.thinking_config = types.ThinkingConfig(
-                thinking_budget=self.THINKING_BUDGETS[thinking_mode]
-            )
+            # Get model's max thinking tokens and calculate actual budget
+            model_config = self.SUPPORTED_MODELS.get(resolved_name)
+            if model_config and "max_thinking_tokens" in model_config:
+                max_thinking_tokens = model_config["max_thinking_tokens"]
+                actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
+                generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)

        try:
            # Generate content
@@ -164,6 +170,23 @@ class GeminiModelProvider(ModelProvider):
        capabilities = self.get_capabilities(model_name)
        return capabilities.supports_extended_thinking

+    def get_thinking_budget(self, model_name: str, thinking_mode: str) -> int:
+        """Get actual thinking token budget for a model and thinking mode."""
+        resolved_name = self._resolve_model_name(model_name)
+        model_config = self.SUPPORTED_MODELS.get(resolved_name, {})
+
+        if not model_config.get("supports_extended_thinking", False):
+            return 0
+
+        if thinking_mode not in self.THINKING_BUDGETS:
+            return 0
+
+        max_thinking_tokens = model_config.get("max_thinking_tokens", 0)
+        if max_thinking_tokens == 0:
+            return 0
+
+        return int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
+
    def _resolve_model_name(self, model_name: str) -> str:
        """Resolve model shorthand to full name."""
        # Check if it's a shorthand
--- a/providers/registry.py
+++ b/providers/registry.py
@@ -67,7 +67,7 @@ class ModelProviderRegistry:
        """Get provider instance for a specific model name.

        Args:
-            model_name: Name of the model (e.g., "gemini-2.0-flash", "o3-mini")
+            model_name: Name of the model (e.g., "gemini-2.5-flash-preview-05-20", "o3-mini")

        Returns:
            ModelProvider instance that supports this model
@@ -137,7 +137,7 @@ class ModelProviderRegistry:
        2. Gemini 2.0 Flash (fast and efficient) if Gemini API key available
        3. OpenAI o3 (high performance) if OpenAI API key available
        4. Gemini 2.5 Pro (deep reasoning) if Gemini API key available
-        5. Fallback to gemini-2.0-flash (most common case)
+        5. Fallback to gemini-2.5-flash-preview-05-20 (most common case)

        Returns:
            Model name string for fallback use
@@ -150,11 +150,11 @@ class ModelProviderRegistry:
        if openai_available:
            return "o3-mini"  # Balanced performance/cost
        elif gemini_available:
-            return "gemini-2.0-flash"  # Fast and efficient
+            return "gemini-2.5-flash-preview-05-20"  # Fast and efficient
        else:
            # No API keys available - return a reasonable default
            # This maintains backward compatibility for tests
-            return "gemini-2.0-flash"
+            return "gemini-2.5-flash-preview-05-20"

    @classmethod
    def get_available_providers_with_keys(cls) -> list[ProviderType]: