From 3b4fd88d7e9a3f09fea616a10cb3e9d6c1a0d63b Mon Sep 17 00:00:00 2001 From: Sven Lito Date: Sat, 23 Aug 2025 18:43:51 +0700 Subject: [PATCH 1/2] fix: resolve temperature handling issues for O3/custom models (#245) - Fix consensus tool hardcoded temperature=0.2 bypassing model capabilities - Add intelligent temperature inference for unknown custom models - Support multi-model collaboration (O3, Gemini, Claude, Mistral, DeepSeek) - Only OpenAI O-series and DeepSeek reasoner models reject temperature - Most reasoning models (Gemini Pro, Claude, Mistral) DO support temperature - Comprehensive logging for temperature decisions and user guidance Resolves: https://github.com/BeehiveInnovations/zen-mcp-server/issues/245 --- .gitignore | 1 + providers/custom.py | 63 ++++++++++++++++++++++++++++++++++++++++++--- tools/consensus.py | 21 +++++++++++++-- 3 files changed, 80 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index e7cce2a..be60b01 100644 --- a/.gitignore +++ b/.gitignore @@ -187,3 +187,4 @@ logs/ /worktrees/ test_simulation_files/ +.mcp.json diff --git a/providers/custom.py b/providers/custom.py index 32d07c1..9373909 100644 --- a/providers/custom.py +++ b/providers/custom.py @@ -5,6 +5,7 @@ import os from typing import Optional from .base import ( + FixedTemperatureConstraint, ModelCapabilities, ModelResponse, ProviderType, @@ -13,6 +14,20 @@ from .base import ( from .openai_compatible import OpenAICompatibleProvider from .openrouter_registry import OpenRouterModelRegistry +# Temperature inference patterns +_TEMP_UNSUPPORTED_PATTERNS = [ + "o1", + "o3", + "o4", # OpenAI O-series models + "deepseek-reasoner", + "deepseek-r1", + "r1", # DeepSeek reasoner models +] + +_TEMP_UNSUPPORTED_KEYWORDS = [ + "reasoner", # DeepSeek reasoner variants +] + class CustomProvider(OpenAICompatibleProvider): """Custom API provider for local models. @@ -152,7 +167,16 @@ class CustomProvider(OpenAICompatibleProvider): "Consider adding to custom_models.json for specific capabilities." ) - # Create generic capabilities with conservative defaults + # Infer temperature support from model name for better defaults + supports_temperature, temperature_reason = self._infer_temperature_support(resolved_name) + + logging.warning( + f"Model '{resolved_name}' not found in custom_models.json. Using generic capabilities with inferred settings. " + f"Temperature support: {supports_temperature} ({temperature_reason}). " + "For better accuracy, add this model to your custom_models.json configuration." + ) + + # Create generic capabilities with inferred defaults capabilities = ModelCapabilities( provider=ProviderType.CUSTOM, model_name=resolved_name, @@ -163,8 +187,12 @@ class CustomProvider(OpenAICompatibleProvider): supports_system_prompts=True, supports_streaming=True, supports_function_calling=False, # Conservative default - supports_temperature=True, # Most custom models accept temperature parameter - temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7), + supports_temperature=supports_temperature, + temperature_constraint=( + FixedTemperatureConstraint(1.0) + if not supports_temperature + else RangeTemperatureConstraint(0.0, 2.0, 0.7) + ), ) # Mark as generic for validation purposes @@ -172,6 +200,35 @@ class CustomProvider(OpenAICompatibleProvider): return capabilities + def _infer_temperature_support(self, model_name: str) -> tuple[bool, str]: + """Infer temperature support from model name patterns. + + Returns: + Tuple of (supports_temperature, reason_for_decision) + """ + model_lower = model_name.lower() + + # Check for specific model patterns that don't support temperature + for pattern in _TEMP_UNSUPPORTED_PATTERNS: + if ( + pattern == model_lower + or model_lower.startswith(f"{pattern}-") + or model_lower.startswith(f"openai/{pattern}") + or model_lower.startswith(f"deepseek/{pattern}") + or model_lower.endswith(f"-{pattern}") + or f"/{pattern}" in model_lower + or f"-{pattern}-" in model_lower + ): + return False, f"detected non-temperature-supporting model pattern '{pattern}'" + + # Check for specific keywords that indicate non-supporting variants + for keyword in _TEMP_UNSUPPORTED_KEYWORDS: + if keyword in model_lower: + return False, f"detected non-temperature-supporting keyword '{keyword}'" + + # Default to supporting temperature for most models + return True, "default assumption for unknown custom models" + def get_provider_type(self) -> ProviderType: """Get the provider type.""" return ProviderType.CUSTOM diff --git a/tools/consensus.py b/tools/consensus.py index 29fc50f..7e5d169 100644 --- a/tools/consensus.py +++ b/tools/consensus.py @@ -29,6 +29,7 @@ from mcp.types import TextContent from config import TEMPERATURE_ANALYTICAL from systemprompts import CONSENSUS_PROMPT from tools.shared.base_models import WorkflowRequest +from utils.model_context import ModelContext from .workflow.base import WorkflowTool @@ -546,12 +547,28 @@ of the evidence, even when it strongly points in one direction.""", stance_prompt = model_config.get("stance_prompt") system_prompt = self._get_stance_enhanced_prompt(stance, stance_prompt) - # Call the model + # Get model context for temperature validation + model_context = ModelContext( + model_name=model_name, + provider=provider.get_provider_type(), + provider_instance=provider, + ) + + # Validate temperature against model constraints (respects supports_temperature) + validated_temperature, temp_warnings = self.validate_and_correct_temperature( + self.get_default_temperature(), model_context + ) + + # Log any temperature corrections + for warning in temp_warnings: + logger.warning(warning) + + # Call the model with validated temperature response = provider.generate_content( prompt=prompt, model_name=model_name, system_prompt=system_prompt, - temperature=0.2, # Low temperature for consistency + temperature=validated_temperature, thinking_mode="medium", images=request.images if request.images else None, ) From 6bd9d6709acfb584ab30a0a4d6891cabdb6d3ccf Mon Sep 17 00:00:00 2001 From: Sven Lito Date: Sat, 23 Aug 2025 18:50:49 +0700 Subject: [PATCH 2/2] fix: address test failures and PR feedback - Fix ModelContext constructor call in consensus tool (remove invalid parameters) - Refactor temperature pattern matching for better readability per code review - All tests now passing (799/799 passed) --- providers/custom.py | 19 ++++++++++--------- tools/consensus.py | 6 +----- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/providers/custom.py b/providers/custom.py index 9373909..64c5d68 100644 --- a/providers/custom.py +++ b/providers/custom.py @@ -210,15 +210,16 @@ class CustomProvider(OpenAICompatibleProvider): # Check for specific model patterns that don't support temperature for pattern in _TEMP_UNSUPPORTED_PATTERNS: - if ( - pattern == model_lower - or model_lower.startswith(f"{pattern}-") - or model_lower.startswith(f"openai/{pattern}") - or model_lower.startswith(f"deepseek/{pattern}") - or model_lower.endswith(f"-{pattern}") - or f"/{pattern}" in model_lower - or f"-{pattern}-" in model_lower - ): + conditions = ( + pattern == model_lower, + model_lower.startswith(f"{pattern}-"), + model_lower.startswith(f"openai/{pattern}"), + model_lower.startswith(f"deepseek/{pattern}"), + model_lower.endswith(f"-{pattern}"), + f"/{pattern}" in model_lower, + f"-{pattern}-" in model_lower, + ) + if any(conditions): return False, f"detected non-temperature-supporting model pattern '{pattern}'" # Check for specific keywords that indicate non-supporting variants diff --git a/tools/consensus.py b/tools/consensus.py index 7e5d169..cc88697 100644 --- a/tools/consensus.py +++ b/tools/consensus.py @@ -548,11 +548,7 @@ of the evidence, even when it strongly points in one direction.""", system_prompt = self._get_stance_enhanced_prompt(stance, stance_prompt) # Get model context for temperature validation - model_context = ModelContext( - model_name=model_name, - provider=provider.get_provider_type(), - provider_instance=provider, - ) + model_context = ModelContext(model_name=model_name) # Validate temperature against model constraints (respects supports_temperature) validated_temperature, temp_warnings = self.validate_and_correct_temperature(