diff --git a/conf/gemini_models.json b/conf/gemini_models.json index 23dfb6c..ee1a6ec 100644 --- a/conf/gemini_models.json +++ b/conf/gemini_models.json @@ -26,11 +26,11 @@ }, "models": [ { - "model_name": "gemini-2.5-pro", - "friendly_name": "Gemini (Pro 2.5)", + "model_name": "gemini-3-pro-preview", + "friendly_name": "Gemini Pro 3.0 Preview", "aliases": [ "pro", - "gemini pro", + "gemini3", "gemini-pro" ], "intelligence_score": 18, @@ -48,6 +48,27 @@ "allow_code_generation": true, "max_image_size_mb": 32.0 }, + { + "model_name": "gemini-2.5-pro", + "friendly_name": "Gemini Pro 2.5", + "aliases": [ + "gemini-pro-2.5" + ], + "intelligence_score": 18, + "description": "Older Model. 1M context - Complex problems, architecture, deep analysis", + "context_window": 1048576, + "max_output_tokens": 65536, + "max_thinking_tokens": 32768, + "supports_extended_thinking": true, + "supports_system_prompts": true, + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_images": true, + "supports_temperature": true, + "allow_code_generation": true, + "max_image_size_mb": 32.0 + }, { "model_name": "gemini-2.0-flash", "friendly_name": "Gemini (Flash 2.0)", diff --git a/config.py b/config.py index be6911c..53f3bb9 100644 --- a/config.py +++ b/config.py @@ -43,21 +43,25 @@ IS_AUTO_MODE = DEFAULT_MODEL.lower() == "auto" # Temperature defaults for different tool types +# NOTE: Gemini 3.0 Pro notes suggest temperature should be set at 1.0 +# in most cases. Lowering it can affect the models 'reasoning' abilities. +# Newer models / inference stacks are able to handle their randomness better. + # Temperature controls the randomness/creativity of model responses # Lower values (0.0-0.3) produce more deterministic, focused responses # Higher values (0.7-1.0) produce more creative, varied responses # TEMPERATURE_ANALYTICAL: Used for tasks requiring precision and consistency # Ideal for code review, debugging, and error analysis where accuracy is critical -TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging +TEMPERATURE_ANALYTICAL = 1.0 # For code review, debugging # TEMPERATURE_BALANCED: Middle ground for general conversations # Provides a good balance between consistency and helpful variety -TEMPERATURE_BALANCED = 0.5 # For general chat +TEMPERATURE_BALANCED = 1.0 # For general chat # TEMPERATURE_CREATIVE: Higher temperature for exploratory tasks # Used when brainstorming, exploring alternatives, or architectural discussions -TEMPERATURE_CREATIVE = 0.7 # For architecture, deep thinking +TEMPERATURE_CREATIVE = 1.0 # For architecture, deep thinking # Thinking Mode Defaults # DEFAULT_THINKING_MODE_THINKDEEP: Default thinking depth for extended reasoning tool diff --git a/providers/gemini.py b/providers/gemini.py index 27fdac4..90831ab 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -42,14 +42,6 @@ class GeminiModelProvider(RegistryBackedProviderMixin, ModelProvider): "max": 1.0, # 100% of max - full thinking budget } - # Model-specific thinking token limits - MAX_THINKING_TOKENS = { - "gemini-2.0-flash": 24576, # Same as 2.5 flash for consistency - "gemini-2.0-flash-lite": 0, # No thinking support - "gemini-2.5-flash": 24576, # Flash 2.5 thinking budget limit - "gemini-2.5-pro": 32768, # Pro 2.5 thinking budget limit - } - def __init__(self, api_key: str, **kwargs): """Initialize Gemini provider with API key and optional base URL.""" self._ensure_registry() @@ -124,7 +116,7 @@ class GeminiModelProvider(RegistryBackedProviderMixin, ModelProvider): prompt: str, model_name: str, system_prompt: Optional[str] = None, - temperature: float = 0.3, + temperature: float = 1.0, max_output_tokens: Optional[int] = None, thinking_mode: str = "medium", images: Optional[list[str]] = None, diff --git a/tests/gemini_cassettes/chat_codegen/gemini25_pro_calculator/mldev.json b/tests/gemini_cassettes/chat_codegen/gemini25_pro_calculator/mldev.json index 774e82b..e6ddcaa 100644 --- a/tests/gemini_cassettes/chat_codegen/gemini25_pro_calculator/mldev.json +++ b/tests/gemini_cassettes/chat_codegen/gemini25_pro_calculator/mldev.json @@ -23,7 +23,7 @@ } ], "generationConfig": { - "temperature": 0.5, + "temperature": 1.0, "candidateCount": 1, "thinkingConfig": { "thinkingBudget": 10813 diff --git a/tests/test_auto_mode_comprehensive.py b/tests/test_auto_mode_comprehensive.py index 67677cc..69365e7 100644 --- a/tests/test_auto_mode_comprehensive.py +++ b/tests/test_auto_mode_comprehensive.py @@ -80,7 +80,7 @@ class TestAutoModeComprehensive: "OPENROUTER_API_KEY": None, }, { - "EXTENDED_REASONING": "gemini-2.5-pro", # Pro for deep thinking + "EXTENDED_REASONING": "gemini-3-pro-preview", # Gemini 3 Pro Preview for deep thinking "FAST_RESPONSE": "gemini-2.5-flash", # Flash for speed "BALANCED": "gemini-2.5-flash", # Flash as balanced }, @@ -122,7 +122,7 @@ class TestAutoModeComprehensive: "OPENROUTER_API_KEY": None, }, { - "EXTENDED_REASONING": "gemini-2.5-pro", # Gemini comes first in priority + "EXTENDED_REASONING": "gemini-3-pro-preview", # Gemini 3 Pro Preview comes first in priority "FAST_RESPONSE": "gemini-2.5-flash", # Prefer flash for speed "BALANCED": "gemini-2.5-flash", # Prefer flash for balanced }, @@ -136,7 +136,7 @@ class TestAutoModeComprehensive: "OPENROUTER_API_KEY": None, }, { - "EXTENDED_REASONING": "gemini-2.5-pro", # Gemini comes first in priority + "EXTENDED_REASONING": "gemini-3-pro-preview", # Gemini 3 Pro Preview comes first in priority "FAST_RESPONSE": "gemini-2.5-flash", # Prefer flash for speed "BALANCED": "gemini-2.5-flash", # Prefer flash for balanced }, diff --git a/tests/test_auto_mode_provider_selection.py b/tests/test_auto_mode_provider_selection.py index 3a24c69..c60d446 100644 --- a/tests/test_auto_mode_provider_selection.py +++ b/tests/test_auto_mode_provider_selection.py @@ -59,7 +59,7 @@ class TestAutoModeProviderSelection: balanced = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.BALANCED) # Should select appropriate Gemini models - assert extended_reasoning in ["gemini-2.5-pro", "pro"] + assert extended_reasoning in ["gemini-3-pro-preview", "gemini-2.5-pro", "pro"] assert fast_response in ["gemini-2.5-flash", "flash"] assert balanced in ["gemini-2.5-flash", "flash"] @@ -139,7 +139,7 @@ class TestAutoModeProviderSelection: fast_response = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.FAST_RESPONSE) # Should prefer Gemini now (based on new provider priority: Gemini before OpenAI) - assert extended_reasoning == "gemini-2.5-pro" # Gemini has higher priority now + assert extended_reasoning == "gemini-3-pro-preview" # Gemini 3 Pro Preview has higher priority now # Should prefer Gemini for fast response assert fast_response == "gemini-2.5-flash" # Gemini has higher priority now @@ -317,7 +317,7 @@ class TestAutoModeProviderSelection: # Test that providers resolve aliases correctly test_cases = [ ("flash", ProviderType.GOOGLE, "gemini-2.5-flash"), - ("pro", ProviderType.GOOGLE, "gemini-2.5-pro"), + ("pro", ProviderType.GOOGLE, "gemini-3-pro-preview"), # "pro" now resolves to gemini-3-pro-preview ("mini", ProviderType.OPENAI, "gpt-5-mini"), # "mini" now resolves to gpt-5-mini ("o3mini", ProviderType.OPENAI, "o3-mini"), ("grok", ProviderType.XAI, "grok-4"), diff --git a/tests/test_challenge.py b/tests/test_challenge.py index e9d30a5..01c1517 100644 --- a/tests/test_challenge.py +++ b/tests/test_challenge.py @@ -28,7 +28,7 @@ class TestChallengeTool: assert "reflexive agreement" in self.tool.get_description() assert "critical thinking" in self.tool.get_description() assert "reasoned analysis" in self.tool.get_description() - assert self.tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL + assert self.tool.get_default_temperature() == 1.0 # TEMPERATURE_ANALYTICAL def test_requires_model(self): """Test that challenge tool doesn't require a model""" diff --git a/tests/test_config.py b/tests/test_config.py index ba793f9..bef283f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -35,6 +35,6 @@ class TestConfig: def test_temperature_defaults(self): """Test temperature constants""" - assert TEMPERATURE_ANALYTICAL == 0.2 - assert TEMPERATURE_BALANCED == 0.5 - assert TEMPERATURE_CREATIVE == 0.7 + assert TEMPERATURE_ANALYTICAL == 1.0 + assert TEMPERATURE_BALANCED == 1.0 + assert TEMPERATURE_CREATIVE == 1.0 diff --git a/tests/test_consensus.py b/tests/test_consensus.py index 06acd59..1a0a9e1 100644 --- a/tests/test_consensus.py +++ b/tests/test_consensus.py @@ -19,7 +19,7 @@ class TestConsensusTool: assert tool.get_name() == "consensus" assert "consensus" in tool.get_description() - assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL + assert tool.get_default_temperature() == 1.0 # TEMPERATURE_ANALYTICAL assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.requires_model() is False # Consensus manages its own models diff --git a/tests/test_consensus_integration.py b/tests/test_consensus_integration.py index 2866c29..b1e6094 100644 --- a/tests/test_consensus_integration.py +++ b/tests/test_consensus_integration.py @@ -29,6 +29,7 @@ GEMINI_REPLAY_ID = "consensus/step2_gemini25_flash_against/mldev" GEMINI_REPLAY_PATH = GEMINI_REPLAY_DIR / "consensus" / "step2_gemini25_flash_against" / "mldev.json" +@pytest.mark.integration @pytest.mark.asyncio @pytest.mark.no_mock_provider @pytest.mark.parametrize("openai_model", ["gpt-5", "gpt-5.1"]) diff --git a/tests/test_debug.py b/tests/test_debug.py index 18c3ac4..9d8d35e 100644 --- a/tests/test_debug.py +++ b/tests/test_debug.py @@ -15,7 +15,7 @@ class TestDebugTool: assert tool.get_name() == "debug" assert "debugging and root cause analysis" in tool.get_description() - assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL + assert tool.get_default_temperature() == 1.0 # TEMPERATURE_ANALYTICAL assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.requires_model() is True diff --git a/tests/test_listmodels.py b/tests/test_listmodels.py index e4d558b..8375489 100644 --- a/tests/test_listmodels.py +++ b/tests/test_listmodels.py @@ -66,7 +66,7 @@ class TestListModelsTool: # Check Gemini shows as configured assert "Google Gemini ✅" in content assert "`flash` → `gemini-2.5-flash`" in content - assert "`pro` → `gemini-2.5-pro`" in content + assert "`pro` → `gemini-3-pro-preview`" in content assert "1M context" in content assert "Supports structured code generation" in content diff --git a/tests/test_model_restrictions.py b/tests/test_model_restrictions.py index e651395..e2b98b1 100644 --- a/tests/test_model_restrictions.py +++ b/tests/test_model_restrictions.py @@ -74,7 +74,7 @@ class TestModelRestrictionService: # Check Google models assert service.is_allowed(ProviderType.GOOGLE, "flash") assert service.is_allowed(ProviderType.GOOGLE, "pro") - assert service.is_allowed(ProviderType.GOOGLE, "gemini-2.5-pro") + assert service.is_allowed(ProviderType.GOOGLE, "gemini-3-pro-preview") def test_case_insensitive_and_whitespace_handling(self): """Test that model names are case-insensitive and whitespace is trimmed.""" diff --git a/tests/test_per_tool_model_defaults.py b/tests/test_per_tool_model_defaults.py index 19b61d6..f5e71a7 100644 --- a/tests/test_per_tool_model_defaults.py +++ b/tests/test_per_tool_model_defaults.py @@ -117,7 +117,7 @@ class TestModelSelection: model = ModelProviderRegistry.get_preferred_fallback_model(ToolModelCategory.EXTENDED_REASONING) # Gemini should return one of its models for extended reasoning # The default behavior may return flash when pro is not explicitly preferred - assert model in ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash"] + assert model in ["gemini-3-pro-preview", "gemini-2.5-flash", "gemini-2.0-flash"] def test_fast_response_with_openai(self): """Test FAST_RESPONSE with OpenAI provider.""" diff --git a/tests/test_planner.py b/tests/test_planner.py index b4b8eba..7bc1a47 100644 --- a/tests/test_planner.py +++ b/tests/test_planner.py @@ -20,7 +20,7 @@ class TestPlannerTool: assert tool.get_name() == "planner" assert "sequential planning" in tool.get_description() - assert tool.get_default_temperature() == 0.5 # TEMPERATURE_BALANCED + assert tool.get_default_temperature() == 1.0 # TEMPERATURE_BALANCED assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.get_default_thinking_mode() == "medium" diff --git a/tests/test_precommit_workflow.py b/tests/test_precommit_workflow.py index 8f6416f..623760c 100644 --- a/tests/test_precommit_workflow.py +++ b/tests/test_precommit_workflow.py @@ -34,8 +34,8 @@ class TestPrecommitWorkflowTool: """Test analytical temperature setting""" tool = PrecommitTool() temp = tool.get_default_temperature() - # Should be analytical temperature (0.2) - assert temp == 0.2 + # Should be analytical temperature (now 1.0) + assert temp == 1.0 def test_request_model_basic_validation(self): """Test basic request model validation""" diff --git a/tests/test_secaudit.py b/tests/test_secaudit.py index cb08f10..531042b 100644 --- a/tests/test_secaudit.py +++ b/tests/test_secaudit.py @@ -17,7 +17,7 @@ class TestSecauditTool: assert tool.get_name() == "secaudit" assert "security audit" in tool.get_description() - assert tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL + assert tool.get_default_temperature() == 1.0 # TEMPERATURE_ANALYTICAL assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING assert tool.requires_model() is True diff --git a/tests/test_supported_models_aliases.py b/tests/test_supported_models_aliases.py index 6713a91..3cebe19 100644 --- a/tests/test_supported_models_aliases.py +++ b/tests/test_supported_models_aliases.py @@ -20,7 +20,7 @@ class TestSupportedModelsAliases: # Test specific aliases assert "flash" in provider.MODEL_CAPABILITIES["gemini-2.5-flash"].aliases - assert "pro" in provider.MODEL_CAPABILITIES["gemini-2.5-pro"].aliases + assert "pro" in provider.MODEL_CAPABILITIES["gemini-3-pro-preview"].aliases assert "flash-2.0" in provider.MODEL_CAPABILITIES["gemini-2.0-flash"].aliases assert "flash2" in provider.MODEL_CAPABILITIES["gemini-2.0-flash"].aliases assert "flashlite" in provider.MODEL_CAPABILITIES["gemini-2.0-flash-lite"].aliases @@ -28,14 +28,14 @@ class TestSupportedModelsAliases: # Test alias resolution assert provider._resolve_model_name("flash") == "gemini-2.5-flash" - assert provider._resolve_model_name("pro") == "gemini-2.5-pro" + assert provider._resolve_model_name("pro") == "gemini-3-pro-preview" assert provider._resolve_model_name("flash-2.0") == "gemini-2.0-flash" assert provider._resolve_model_name("flash2") == "gemini-2.0-flash" assert provider._resolve_model_name("flashlite") == "gemini-2.0-flash-lite" # Test case insensitive resolution assert provider._resolve_model_name("Flash") == "gemini-2.5-flash" - assert provider._resolve_model_name("PRO") == "gemini-2.5-pro" + assert provider._resolve_model_name("PRO") == "gemini-3-pro-preview" def test_openai_provider_aliases(self): """Test OpenAI provider's alias structure.""" @@ -133,7 +133,7 @@ class TestSupportedModelsAliases: gemini_models = gemini_provider.list_models(respect_restrictions=False) assert "gemini-2.5-flash" in gemini_models assert "flash" in gemini_models - assert "gemini-2.5-pro" in gemini_models + assert "gemini-3-pro-preview" in gemini_models assert "pro" in gemini_models # Test OpenAI @@ -170,7 +170,7 @@ class TestSupportedModelsAliases: ) assert "gemini-2.5-flash" in gemini_all assert "flash" in gemini_all - assert "gemini-2.5-pro" in gemini_all + assert "gemini-3-pro-preview" in gemini_all assert "pro" in gemini_all # All should be lowercase assert all(model == model.lower() for model in gemini_all) diff --git a/tests/test_tools.py b/tests/test_tools.py index a361545..eb9f23c 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -23,7 +23,7 @@ class TestThinkDeepTool: """Test tool metadata""" assert tool.get_name() == "thinkdeep" assert "investigation and reasoning" in tool.get_description() - assert tool.get_default_temperature() == 0.7 + assert tool.get_default_temperature() == 1.0 schema = tool.get_input_schema() # ThinkDeep is now a workflow tool with step-based fields @@ -124,7 +124,7 @@ class TestCodeReviewTool: """Test tool metadata""" assert tool.get_name() == "codereview" assert "code review" in tool.get_description() - assert tool.get_default_temperature() == 0.2 + assert tool.get_default_temperature() == 1.0 schema = tool.get_input_schema() assert "relevant_files" in schema["properties"] @@ -217,7 +217,7 @@ class TestAnalyzeTool: """Test tool metadata""" assert tool.get_name() == "analyze" assert "code analysis" in tool.get_description() - assert tool.get_default_temperature() == 0.2 + assert tool.get_default_temperature() == 1.0 schema = tool.get_input_schema() # New workflow tool requires step-based fields