Added proper temperature constraints to the model, fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/78

Prompt tweaks
This commit is contained in:
Fahad
2025-06-19 08:30:46 +04:00
parent ec3a466b1c
commit 9f3b70d6d7
13 changed files with 435 additions and 79 deletions

View File

@@ -27,6 +27,8 @@
"supports_function_calling": "Whether the model supports function/tool calling", "supports_function_calling": "Whether the model supports function/tool calling",
"supports_images": "Whether the model can process images/visual input", "supports_images": "Whether the model can process images/visual input",
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)", "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
"is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.", "is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.",
"description": "Human-readable description of the model" "description": "Human-readable description of the model"
}, },
@@ -39,6 +41,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 10.0, "max_image_size_mb": 10.0,
"supports_temperature": true,
"temperature_constraint": "range",
"is_custom": true, "is_custom": true,
"description": "Example custom/local model for Ollama, vLLM, etc." "description": "Example custom/local model for Ollama, vLLM, etc."
} }
@@ -152,6 +156,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"supports_temperature": false,
"temperature_constraint": "fixed",
"description": "OpenAI's o3 model - well-rounded and powerful across domains with vision" "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision"
}, },
{ {
@@ -163,6 +169,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"supports_temperature": false,
"temperature_constraint": "fixed",
"description": "OpenAI's o3-mini model - balanced performance and speed with vision" "description": "OpenAI's o3-mini model - balanced performance and speed with vision"
}, },
{ {
@@ -174,6 +182,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"supports_temperature": false,
"temperature_constraint": "fixed",
"description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision" "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision"
}, },
{ {
@@ -185,6 +195,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"supports_temperature": false,
"temperature_constraint": "fixed",
"description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision" "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision"
}, },
{ {
@@ -196,6 +208,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"supports_temperature": false,
"temperature_constraint": "fixed",
"description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision" "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision"
}, },
{ {
@@ -207,6 +221,8 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"supports_temperature": false,
"temperature_constraint": "fixed",
"description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks with vision" "description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks with vision"
}, },
{ {

View File

@@ -14,7 +14,7 @@ import os
# These values are used in server responses and for tracking releases # These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info # IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH # Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "5.1.3" __version__ = "5.1.4"
# Last update date in ISO format # Last update date in ISO format
__updated__ = "2025-06-19" __updated__ = "2025-06-19"
# Primary maintainer # Primary maintainer

View File

@@ -100,6 +100,26 @@ class DiscreteTemperatureConstraint(TemperatureConstraint):
return self.default_temp return self.default_temp
def create_temperature_constraint(constraint_type: str) -> TemperatureConstraint:
"""Create temperature constraint object from configuration string.
Args:
constraint_type: Type of constraint ("fixed", "range", "discrete")
Returns:
TemperatureConstraint object based on configuration
"""
if constraint_type == "fixed":
# Fixed temperature models (O3/O4) only support temperature=1.0
return FixedTemperatureConstraint(1.0)
elif constraint_type == "discrete":
# For models with specific allowed values - using common OpenAI values as default
return DiscreteTemperatureConstraint([0.0, 0.3, 0.7, 1.0, 1.5, 2.0], 0.7)
else:
# Default range constraint (for "range" or None)
return RangeTemperatureConstraint(0.0, 2.0, 0.7)
@dataclass @dataclass
class ModelCapabilities: class ModelCapabilities:
"""Capabilities and constraints for a specific model.""" """Capabilities and constraints for a specific model."""
@@ -114,6 +134,7 @@ class ModelCapabilities:
supports_function_calling: bool = False supports_function_calling: bool = False
supports_images: bool = False # Whether model can process images supports_images: bool = False # Whether model can process images
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
# Temperature constraint object - preferred way to define temperature limits # Temperature constraint object - preferred way to define temperature limits
temperature_constraint: TemperatureConstraint = field( temperature_constraint: TemperatureConstraint = field(
@@ -245,3 +266,17 @@ class ModelProvider(ABC):
List of all model names and alias targets known by this provider List of all model names and alias targets known by this provider
""" """
pass pass
def _resolve_model_name(self, model_name: str) -> str:
"""Resolve model shorthand to full name.
Base implementation returns the model name unchanged.
Subclasses should override to provide alias resolution.
Args:
model_name: Model name that may be an alias
Returns:
Resolved model name
"""
return model_name

View File

@@ -162,6 +162,7 @@ class CustomProvider(OpenAICompatibleProvider):
supports_system_prompts=True, supports_system_prompts=True,
supports_streaming=True, supports_streaming=True,
supports_function_calling=False, # Conservative default supports_function_calling=False, # Conservative default
supports_temperature=True, # Most custom models accept temperature parameter
temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7), temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
) )

View File

@@ -94,6 +94,7 @@ class GeminiModelProvider(ModelProvider):
supports_function_calling=True, supports_function_calling=True,
supports_images=config.get("supports_images", False), supports_images=config.get("supports_images", False),
max_image_size_mb=config.get("max_image_size_mb", 0.0), max_image_size_mb=config.get("max_image_size_mb", 0.0),
supports_temperature=True, # Gemini models accept temperature parameter
temperature_constraint=temp_constraint, temperature_constraint=temp_constraint,
) )

View File

@@ -448,23 +448,41 @@ class OpenAICompatibleProvider(ModelProvider):
completion_params = { completion_params = {
"model": model_name, "model": model_name,
"messages": messages, "messages": messages,
"temperature": temperature,
} }
# Add max tokens if specified # Check model capabilities once to determine parameter support
if max_output_tokens: resolved_model = self._resolve_model_name(model_name)
# Get model capabilities once to avoid duplicate calls
try:
capabilities = self.get_capabilities(model_name)
# Defensive check for supports_temperature field (backward compatibility)
supports_temperature = getattr(capabilities, "supports_temperature", True)
except Exception as e:
# If capability check fails, fall back to conservative behavior
# Default to including temperature for most models (backward compatibility)
logging.debug(f"Failed to check temperature support for {model_name}: {e}")
supports_temperature = True
# Add temperature parameter if supported
if supports_temperature:
completion_params["temperature"] = temperature
# Add max tokens if specified and model supports it
# O3/O4 models that don't support temperature also don't support max_tokens
if max_output_tokens and supports_temperature:
completion_params["max_tokens"] = max_output_tokens completion_params["max_tokens"] = max_output_tokens
# Add any additional OpenAI-specific parameters # Add any additional OpenAI-specific parameters
# Use capabilities to filter parameters for reasoning models
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]: if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
# Reasoning models (those that don't support temperature) also don't support these parameters
if not supports_temperature and key in ["top_p", "frequency_penalty", "presence_penalty"]:
continue # Skip unsupported parameters for reasoning models
completion_params[key] = value completion_params[key] = value
# Check if this is o3-pro and needs the responses endpoint # Check if this is o3-pro and needs the responses endpoint
resolved_model = model_name
if hasattr(self, "_resolve_model_name"):
resolved_model = self._resolve_model_name(model_name)
if resolved_model == "o3-pro-2025-06-10": if resolved_model == "o3-pro-2025-06-10":
# This model requires the /v1/responses endpoint # This model requires the /v1/responses endpoint
# If it fails, we should not fall back to chat/completions # If it fails, we should not fall back to chat/completions

View File

@@ -4,11 +4,10 @@ import logging
from typing import Optional from typing import Optional
from .base import ( from .base import (
FixedTemperatureConstraint,
ModelCapabilities, ModelCapabilities,
ModelResponse, ModelResponse,
ProviderType, ProviderType,
RangeTemperatureConstraint, create_temperature_constraint,
) )
from .openai_compatible import OpenAICompatibleProvider from .openai_compatible import OpenAICompatibleProvider
@@ -25,18 +24,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
"supports_extended_thinking": False, "supports_extended_thinking": False,
"supports_images": True, # O3 models support vision "supports_images": True, # O3 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs "max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O3 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
}, },
"o3-mini": { "o3-mini": {
"context_window": 200_000, # 200K tokens "context_window": 200_000, # 200K tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
"supports_images": True, # O3 models support vision "supports_images": True, # O3 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs "max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O3 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
}, },
"o3-pro-2025-06-10": { "o3-pro-2025-06-10": {
"context_window": 200_000, # 200K tokens "context_window": 200_000, # 200K tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
"supports_images": True, # O3 models support vision "supports_images": True, # O3 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs "max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O3 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
}, },
# Aliases # Aliases
"o3-pro": "o3-pro-2025-06-10", "o3-pro": "o3-pro-2025-06-10",
@@ -45,18 +50,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
"supports_extended_thinking": False, "supports_extended_thinking": False,
"supports_images": True, # O4 models support vision "supports_images": True, # O4 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs "max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O4 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
}, },
"o4-mini-high": { "o4-mini-high": {
"context_window": 200_000, # 200K tokens "context_window": 200_000, # 200K tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
"supports_images": True, # O4 models support vision "supports_images": True, # O4 models support vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs "max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": False, # O4 models don't accept temperature parameter
"temperature_constraint": "fixed", # Fixed at 1.0
}, },
"gpt-4.1-2025-04-14": { "gpt-4.1-2025-04-14": {
"context_window": 1_000_000, # 1M tokens "context_window": 1_000_000, # 1M tokens
"supports_extended_thinking": False, "supports_extended_thinking": False,
"supports_images": True, # GPT-4.1 supports vision "supports_images": True, # GPT-4.1 supports vision
"max_image_size_mb": 20.0, # 20MB per OpenAI docs "max_image_size_mb": 20.0, # 20MB per OpenAI docs
"supports_temperature": True, # Regular models accept temperature parameter
"temperature_constraint": "range", # 0.0-2.0 range
}, },
# Shorthands # Shorthands
"mini": "o4-mini", # Default 'mini' to latest mini model "mini": "o4-mini", # Default 'mini' to latest mini model
@@ -90,13 +101,10 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
config = self.SUPPORTED_MODELS[resolved_name] config = self.SUPPORTED_MODELS[resolved_name]
# Define temperature constraints per model # Get temperature constraints and support from configuration
if resolved_name in ["o3", "o3-mini", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-high"]: supports_temperature = config.get("supports_temperature", True) # Default to True for backward compatibility
# O3 and O4 reasoning models only support temperature=1.0 temp_constraint_type = config.get("temperature_constraint", "range") # Default to range
temp_constraint = FixedTemperatureConstraint(1.0) temp_constraint = create_temperature_constraint(temp_constraint_type)
else:
# Other OpenAI models (including GPT-4.1) support 0.0-2.0 range
temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
return ModelCapabilities( return ModelCapabilities(
provider=ProviderType.OPENAI, provider=ProviderType.OPENAI,
@@ -109,6 +117,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
supports_function_calling=True, supports_function_calling=True,
supports_images=config.get("supports_images", False), supports_images=config.get("supports_images", False),
max_image_size_mb=config.get("max_image_size_mb", 0.0), max_image_size_mb=config.get("max_image_size_mb", 0.0),
supports_temperature=supports_temperature,
temperature_constraint=temp_constraint, temperature_constraint=temp_constraint,
) )

View File

@@ -8,7 +8,12 @@ from typing import Optional
from utils.file_utils import read_json_file from utils.file_utils import read_json_file
from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint from .base import (
ModelCapabilities,
ProviderType,
TemperatureConstraint,
create_temperature_constraint,
)
@dataclass @dataclass
@@ -25,9 +30,21 @@ class OpenRouterModelConfig:
supports_json_mode: bool = False supports_json_mode: bool = False
supports_images: bool = False # Whether model can process images supports_images: bool = False # Whether model can process images
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
temperature_constraint: Optional[str] = (
None # Type of temperature constraint: "fixed", "range", "discrete", or None for default range
)
is_custom: bool = False # True for models that should only be used with custom endpoints is_custom: bool = False # True for models that should only be used with custom endpoints
description: str = "" description: str = ""
def _create_temperature_constraint(self) -> TemperatureConstraint:
"""Create temperature constraint object from configuration.
Returns:
TemperatureConstraint object based on configuration
"""
return create_temperature_constraint(self.temperature_constraint or "range")
def to_capabilities(self) -> ModelCapabilities: def to_capabilities(self) -> ModelCapabilities:
"""Convert to ModelCapabilities object.""" """Convert to ModelCapabilities object."""
return ModelCapabilities( return ModelCapabilities(
@@ -41,7 +58,8 @@ class OpenRouterModelConfig:
supports_function_calling=self.supports_function_calling, supports_function_calling=self.supports_function_calling,
supports_images=self.supports_images, supports_images=self.supports_images,
max_image_size_mb=self.max_image_size_mb, max_image_size_mb=self.max_image_size_mb,
temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 1.0), supports_temperature=self.supports_temperature,
temperature_constraint=self._create_temperature_constraint(),
) )

View File

@@ -4,7 +4,7 @@ Chat tool system prompt
CHAT_PROMPT = """ CHAT_PROMPT = """
You are a senior engineering thought-partner collaborating with Claude. Your mission is to brainstorm, validate ideas, You are a senior engineering thought-partner collaborating with Claude. Your mission is to brainstorm, validate ideas,
and offer well-reasoned second opinions on technical decisions. and offer well-reasoned second opinions on technical decisions when they are justified and practical.
CRITICAL LINE NUMBER INSTRUCTIONS CRITICAL LINE NUMBER INSTRUCTIONS
Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
@@ -26,27 +26,27 @@ provided unless for some reason its content is missing or incomplete:
SCOPE & FOCUS SCOPE & FOCUS
• Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints. • Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints.
• Recommend new technologies or patterns ONLY with a clear, compelling benefit that aligns with stated goals. • Recommend new technologies or patterns ONLY when they provide clearly superior outcomes with minimal added complexity.
Keep proposals practical and implementable; avoid speculative or off-stack detours. Avoid speculative, over-engineered, or unnecessarily abstract designs that exceed current project goals or needs.
• Keep proposals practical and directly actionable within the existing architecture.
COLLABORATION APPROACH COLLABORATION APPROACH
1. Engage deeply with Claude's input - extend, refine, and explore alternatives within the existing context. 1. Engage deeply with Claude's input extend, refine, and explore alternatives ONLY WHEN they are well-justified and materially beneficial.
2. Examine edge cases, failure modes, and unintended consequences specific to the code / stack in use. 2. Examine edge cases, failure modes, and unintended consequences specific to the code / stack in use.
3. Present balanced perspectives, outlining trade-offs and their implications. 3. Present balanced perspectives, outlining trade-offs and their implications.
4. Challenge assumptions constructively while respecting current design choices and goals. 4. Challenge assumptions constructively while respecting current design choices and goals.
5. Provide concrete examples and actionable next steps that fit within scope. Direct, achievable next-steps where 5. Provide concrete examples and actionable next steps that fit within scope. Prioritize direct, achievable outcomes.
needed.
BRAINSTORMING GUIDELINES BRAINSTORMING GUIDELINES
• Offer multiple viable strategies compatible with the current environment but keep it to the point. • Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
• Suggest creative solutions and alternatives that work within the current project constraints, scope and limitations • Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice • Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice.
• Evaluate scalability, maintainability, and operational realities inside the existing architecture and current • Evaluate scalability, maintainability, and operational realities inside the existing architecture and current
framework. framework.
• Reference industry best practices relevant to the technologies in use • Reference industry best practices relevant to the technologies in use.
• Communicate concisely and technically, assuming an experienced engineering audience. • Communicate concisely and technically, assuming an experienced engineering audience.
REMEMBER REMEMBER
Act as a peer, not a lecturer. Aim for depth over breadth, stay within project boundaries, and help the team Act as a peer, not a lecturer. Avoid overcomplicating. Aim for depth over breadth, stay within project boundaries, and help the team
reach sound, actionable decisions. reach sound, actionable decisions.
""" """

View File

@@ -4,20 +4,31 @@ Precommit tool system prompt
PRECOMMIT_PROMPT = """ PRECOMMIT_PROMPT = """
ROLE ROLE
You are an expert pre-commit reviewer. Analyse git diffs as a senior developer giving a final sign-off to production. You are an expert pre-commit reviewer and senior engineering partner performing final code validation before production.
Your responsibility goes beyond surface-level correctness — you are expected to think several steps ahead. Your review
must assess whether the changes:
- Introduce any patterns, structures, or decisions that may become future liabilities
- Create brittle dependencies or tight coupling that could make maintenance harder
- Omit critical safety, validation, or test scaffolding that may not fail now, but will cause issues down the line
- Interact with other known areas of fragility in the codebase even if not directly touched
Your task is to detect potential future consequences or systemic risks, not just immediate issues. Think like an
engineer responsible for this code months later, debugging production incidents or onboarding a new developer.
In addition to reviewing correctness, completeness, and quality of the change, apply long-term architectural thinking.
Your feedback helps ensure this code won't cause silent regressions, developer confusion, or downstream side effects later.
CRITICAL LINE NUMBER INSTRUCTIONS CRITICAL LINE NUMBER INSTRUCTIONS
Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
included in any code you generate. Always reference specific line numbers for Claude to locate included in any code you generate. Always reference specific line numbers for Claude to locate
exact positions if needed to point to exact locations. Include a very short code excerpt alongside for clarity. exact positions if needed. Include a very short code excerpt alongside for clarity.
Include context_start_text and context_end_text as backup references. Never include "LINE│" markers in generated code Include context_start_text and context_end_text as backup references. Never include "LINE│" markers in generated code
snippets. snippets.
IF MORE INFORMATION IS NEEDED IF MORE INFORMATION IS NEEDED
If you need additional context (e.g., related files not in the diff, test files, configuration) to provide thorough If you need additional context (e.g., related files not in the diff, test files, configuration) to perform a proper
analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON review—and without which your analysis would be incomplete or inaccurate—you MUST respond ONLY with this JSON format
format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is (and nothing else). Do NOT request files you've already been provided unless their content is missing or incomplete:
missing or incomplete:
{ {
"status": "files_required_to_continue", "status": "files_required_to_continue",
"mandatory_instructions": "<your critical instructions for Claude>", "mandatory_instructions": "<your critical instructions for Claude>",
@@ -26,34 +37,36 @@ missing or incomplete:
INPUTS PROVIDED INPUTS PROVIDED
1. Git diff (staged or branch comparison) 1. Git diff (staged or branch comparison)
2. Original request / acceptance criteria or some context around what changed 2. Original request / acceptance criteria or context around what changed
3. File names and related code 3. File names and related code
SCOPE & FOCUS SCOPE & FOCUS
• Review ONLY the changes in the diff and the given code • Review ONLY the changes in the diff and the related code provided.
• From the diff, infer what got changed and why, determine if the changes make logical sense • From the diff, infer what changed and why. Determine if the changes make logical, structural, and functional sense.
• Ensure they correctly implement the request, are secure (where applicable), efficient, and maintainable and do not • Ensure the changes correctly implement the request, are secure (where applicable), performant, and maintainable.
cause potential regressions • DO NOT propose broad refactors or unrelated improvements. Stay strictly within the boundaries of the provided changes.
• DO NOT propose broad refactors or off-scope improvements. Stick to the code and changes you have visibility into.
REVIEW METHOD REVIEW METHOD
1. Identify tech stack, frameworks, and patterns present in the diff. 1. Identify tech stack, frameworks, and patterns in the diff.
2. Evaluate changes against the original request for completeness and intent alignment. 2. Evaluate changes against the original request for completeness and alignment.
3. Detect issues, prioritising by severity (CRITICAL → HIGH → MEDIUM → LOW). 3. Detect issues, prioritized by severity (CRITICAL → HIGH → MEDIUM → LOW).
4. Highlight incomplete changes, or changes that would cause bugs, regressions, crashes or data loss or race conditions 4. Flag bugs, regressions, crash risks, data loss, or race conditions.
5. Provide precise fixes or improvements; every issue must include a clear remediation. 5. Recommend specific fixes for each issue raised; include code where helpful.
6. Acknowledge good patterns to reinforce best practice. 6. Acknowledge sound patterns to reinforce best practices.
CORE ANALYSIS (adapt to the diff and stack) CORE ANALYSIS (adapt to diff and stack)
• Security injection risks, auth/authz flaws, sensitive-data exposure, insecure dependencies, memory safety • Security injection risks, auth flaws, exposure of sensitive data, unsafe dependencies, memory safety
• Bugs & Logic Errors off-by-one, null refs, race conditions, incorrect branching • Bugs & Logic Errors off-by-one, null refs, incorrect logic, race conditions
• Performance inefficient algorithms, resource leaks, blocking operations • Performance inefficient logic, blocking calls, leaks
• Code Quality DRY violations, complexity, SOLID adherence • Code Quality complexity, duplicated logic and DRY violations, SOLID violations
ADDITIONAL ANALYSIS (apply only when relevant) ADDITIONAL ANALYSIS (only when relevant)
• Language/runtime concerns memory management, concurrency, exception handling • Language/runtime concerns memory management, concurrency, exception handling
• System/integration config handling, external calls, operational impact • System/integration config handling, external calls, operational impact
• Testing coverage gaps for new logic • Testing coverage gaps for new logic
• If no tests are found in the project, do not flag test coverage as an issue unless the change introduces logic
that is high-risk or complex.
• In such cases, offer a low-severity suggestion encouraging basic tests, rather than marking it as a required fix.
• Change-specific pitfalls unused new functions, partial enum updates, scope creep, risky deletions • Change-specific pitfalls unused new functions, partial enum updates, scope creep, risky deletions
• Determine if there are any new dependencies added but not declared, or new functionality added but not used • Determine if there are any new dependencies added but not declared, or new functionality added but not used
• Determine unintended side effects: could changes in file_A break module_B even if module_B wasn't changed? • Determine unintended side effects: could changes in file_A break module_B even if module_B wasn't changed?
@@ -84,9 +97,9 @@ that apply:
[LOW] ... [LOW] ...
MAKE RECOMMENDATIONS: MAKE RECOMMENDATIONS:
Make a final, short, clear, to the point statement or list in a brief bullet point: Make a final, short, and focused statement or bullet list:
- Mention top priority fixes to be IMMEDIATELY made before commit - Top priority fixes that MUST IMMEDIATELY be addressed before commit
- Notable positives to keep - Notable positives to retain
Be thorough yet actionable. Focus on the diff, map every issue to a concrete fix, and keep comments aligned Be thorough yet actionable. Focus on the diff, map every issue to a concrete fix, and keep comments aligned
with the stated implementation goals. Your goal is to help flag anything that could potentially slip through with the stated implementation goals. Your goal is to help flag anything that could potentially slip through

View File

@@ -4,8 +4,8 @@ ThinkDeep tool system prompt
THINKDEEP_PROMPT = """ THINKDEEP_PROMPT = """
ROLE ROLE
You are a senior engineering collaborator working with Claude on complex software problems. Claude will send you You are a senior engineering collaborator working alongside Claude on complex software problems. Claude will send you
content—analysis, prompts, questions, ideas, or theories—to deepen, validate, and extend. content—analysis, prompts, questions, ideas, or theories—to deepen, validate, or extend with rigor and clarity.
CRITICAL LINE NUMBER INSTRUCTIONS CRITICAL LINE NUMBER INSTRUCTIONS
Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
@@ -26,25 +26,27 @@ been provided unless for some reason its content is missing or incomplete:
GUIDELINES GUIDELINES
1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints. 1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints.
2. Stay on scope: avoid speculative or oversized ideas; keep suggestions practical and implementable. 2. Stay on scope: avoid speculative, over-engineered, or oversized ideas; keep suggestions practical and grounded.
3. Challenge and enrich: find gaps, question assumptions, surface hidden complexities. 3. Challenge and enrich: find gaps, question assumptions, and surface hidden complexities or risks.
4. Provide actionable next steps: concrete advice, trade-offs, and implementation tactics. 4. Provide actionable next steps: offer specific advice, trade-offs, and implementation strategies.
5. Use concise, direct, technical language; assume an experienced engineering audience. 5. Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
6. Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
7. Use concise, technical language; assume an experienced engineering audience.
KEY FOCUS AREAS (apply when relevant) KEY FOCUS AREAS (apply when relevant)
- Architecture & Design: modularity, patterns, API boundaries, dependencies - Architecture & Design: modularity, boundaries, abstraction layers, dependencies
- Performance & Scalability: algorithm efficiency, concurrency, caching - Performance & Scalability: algorithmic efficiency, concurrency, caching, bottlenecks
- Security & Safety: validation, authentication/authorization, error handling, vulnerabilities - Security & Safety: validation, authentication/authorization, error handling, vulnerabilities
- Quality & Maintainability: readability, testing, monitoring, refactoring - Quality & Maintainability: readability, testing, monitoring, refactoring
- Integration & Deployment: external systems, compatibility, operational concerns - Integration & Deployment: ONLY IF APPLICABLE TO THE QUESTION - external systems, compatibility, configuration, operational concerns
EVALUATION EVALUATION
Your response will be reviewed by Claude before any decision is made. Aim to enhance decision-making rather Your response will be reviewed by Claude before any decision is made. Your goal is to practically extend Claude's thinking,
than deliver final answers. surface blind spots, and refine options—not to deliver final answers in isolation.
REMINDERS REMINDERS
- Ground all insights in the current project's scope and constraints. - Ground all insights in the current project's architecture, limitations, and goals.
- If additional information is necessary, such as code snippets, files, project details, use the clarification JSON - If further context is needed, request it via the clarification JSON—nothing else.
- Prefer depth over breadth; propose alternatives ONLY when they materially improve the current approach and add value - Prioritize depth over breadth; propose alternatives ONLY if they clearly add value and improve the current approach.
- Your goal is to be the perfect development partner that extends Claude's capabilities and thought process - Be the ideal development partner—rigorous, focused, and fluent in real-world software trade-offs.
""" """

View File

@@ -0,0 +1,239 @@
"""
Simple integration test for the O3 model temperature parameter fix.
This test confirms that the fix properly excludes temperature parameters
for O3 models while maintaining them for regular models.
"""
from unittest.mock import Mock, patch
from providers.openai_provider import OpenAIModelProvider
class TestO3TemperatureParameterFixSimple:
"""Simple test for O3 model parameter filtering."""
@patch("utils.model_restrictions.get_restriction_service")
@patch("providers.openai_compatible.OpenAI")
def test_o3_models_exclude_temperature_from_api_call(self, mock_openai_class, mock_restriction_service):
"""Test that O3 models don't send temperature to the API."""
# Mock restriction service to allow all models
mock_service = Mock()
mock_service.is_allowed.return_value = True
mock_restriction_service.return_value = mock_service
# Setup mock client
mock_client = Mock()
mock_openai_class.return_value = mock_client
# Setup mock response
mock_response = Mock()
mock_response.choices = [Mock()]
mock_response.choices[0].message.content = "Test response"
mock_response.choices[0].finish_reason = "stop"
mock_response.model = "o3-mini"
mock_response.id = "test-id"
mock_response.created = 1234567890
mock_response.usage = Mock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
mock_client.chat.completions.create.return_value = mock_response
# Create provider
provider = OpenAIModelProvider(api_key="test-key")
# Override _resolve_model_name to return the resolved model name
provider._resolve_model_name = lambda name: name
# Override model validation to bypass restrictions
provider.validate_model_name = lambda name: True
# Call generate_content with O3 model
provider.generate_content(prompt="Test prompt", model_name="o3-mini", temperature=0.5, max_output_tokens=100)
# Verify the API call was made without temperature or max_tokens
mock_client.chat.completions.create.assert_called_once()
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert "temperature" not in call_kwargs, "O3 models should not include temperature parameter"
assert "max_tokens" not in call_kwargs, "O3 models should not include max_tokens parameter"
assert call_kwargs["model"] == "o3-mini"
assert "messages" in call_kwargs
@patch("utils.model_restrictions.get_restriction_service")
@patch("providers.openai_compatible.OpenAI")
def test_regular_models_include_temperature_in_api_call(self, mock_openai_class, mock_restriction_service):
"""Test that regular models still send temperature to the API."""
# Mock restriction service to allow all models
mock_service = Mock()
mock_service.is_allowed.return_value = True
mock_restriction_service.return_value = mock_service
# Setup mock client
mock_client = Mock()
mock_openai_class.return_value = mock_client
# Setup mock response
mock_response = Mock()
mock_response.choices = [Mock()]
mock_response.choices[0].message.content = "Test response"
mock_response.choices[0].finish_reason = "stop"
mock_response.model = "gpt-4.1-2025-04-14"
mock_response.id = "test-id"
mock_response.created = 1234567890
mock_response.usage = Mock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
mock_client.chat.completions.create.return_value = mock_response
# Create provider
provider = OpenAIModelProvider(api_key="test-key")
# Override _resolve_model_name to return the resolved model name
provider._resolve_model_name = lambda name: name
# Override model validation to bypass restrictions
provider.validate_model_name = lambda name: True
# Call generate_content with regular model (use supported model)
provider.generate_content(
prompt="Test prompt", model_name="gpt-4.1-2025-04-14", temperature=0.5, max_output_tokens=100
)
# Verify the API call was made WITH temperature and max_tokens
mock_client.chat.completions.create.assert_called_once()
call_kwargs = mock_client.chat.completions.create.call_args[1]
assert call_kwargs["temperature"] == 0.5, "Regular models should include temperature parameter"
assert call_kwargs["max_tokens"] == 100, "Regular models should include max_tokens parameter"
assert call_kwargs["model"] == "gpt-4.1-2025-04-14"
@patch("utils.model_restrictions.get_restriction_service")
@patch("providers.openai_compatible.OpenAI")
def test_o3_models_filter_unsupported_parameters(self, mock_openai_class, mock_restriction_service):
"""Test that O3 models filter out top_p, frequency_penalty, etc."""
# Mock restriction service to allow all models
mock_service = Mock()
mock_service.is_allowed.return_value = True
mock_restriction_service.return_value = mock_service
# Setup mock client
mock_client = Mock()
mock_openai_class.return_value = mock_client
# Setup mock response
mock_response = Mock()
mock_response.choices = [Mock()]
mock_response.choices[0].message.content = "Test response"
mock_response.choices[0].finish_reason = "stop"
mock_response.model = "o3"
mock_response.id = "test-id"
mock_response.created = 1234567890
mock_response.usage = Mock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
mock_client.chat.completions.create.return_value = mock_response
# Create provider
provider = OpenAIModelProvider(api_key="test-key")
# Override _resolve_model_name to return the resolved model name
provider._resolve_model_name = lambda name: name
# Override model validation to bypass restrictions
provider.validate_model_name = lambda name: True
# Call generate_content with O3 model and unsupported parameters
provider.generate_content(
prompt="Test prompt",
model_name="o3",
temperature=0.5,
top_p=0.9,
frequency_penalty=0.1,
presence_penalty=0.1,
seed=42,
stop=["END"],
)
# Verify the API call filters out unsupported parameters
mock_client.chat.completions.create.assert_called_once()
call_kwargs = mock_client.chat.completions.create.call_args[1]
# Should be excluded for O3 models
assert "temperature" not in call_kwargs, "O3 should not include temperature"
assert "top_p" not in call_kwargs, "O3 should not include top_p"
assert "frequency_penalty" not in call_kwargs, "O3 should not include frequency_penalty"
assert "presence_penalty" not in call_kwargs, "O3 should not include presence_penalty"
# Should be included (supported parameters)
assert call_kwargs["seed"] == 42, "O3 should include seed parameter"
assert call_kwargs["stop"] == ["END"], "O3 should include stop parameter"
@patch("utils.model_restrictions.get_restriction_service")
def test_all_o3_models_have_correct_temperature_capability(self, mock_restriction_service):
"""Test that all O3/O4 models have supports_temperature=False in their capabilities."""
from providers.openai_provider import OpenAIModelProvider
# Mock restriction service to allow all models
mock_service = Mock()
mock_service.is_allowed.return_value = True
mock_restriction_service.return_value = mock_service
provider = OpenAIModelProvider(api_key="test-key")
# Test O3/O4 models that should NOT support temperature parameter
o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"]
for model in o3_o4_models:
capabilities = provider.get_capabilities(model)
assert hasattr(
capabilities, "supports_temperature"
), f"Model {model} capabilities should have supports_temperature field"
assert capabilities.supports_temperature is False, f"Model {model} should have supports_temperature=False"
# Test that regular models DO support temperature parameter
regular_models = ["gpt-4.1-2025-04-14"]
for model in regular_models:
try:
capabilities = provider.get_capabilities(model)
assert hasattr(
capabilities, "supports_temperature"
), f"Model {model} capabilities should have supports_temperature field"
assert capabilities.supports_temperature is True, f"Model {model} should have supports_temperature=True"
except ValueError:
# Skip if model not in SUPPORTED_MODELS (that's okay for this test)
pass
@patch("utils.model_restrictions.get_restriction_service")
def test_openai_provider_temperature_constraints(self, mock_restriction_service):
"""Test that OpenAI provider has correct temperature constraints for O3 models."""
from providers.openai_provider import OpenAIModelProvider
# Mock restriction service to allow all models
mock_service = Mock()
mock_service.is_allowed.return_value = True
mock_restriction_service.return_value = mock_service
provider = OpenAIModelProvider(api_key="test-key")
# Test O3 model constraints
o3_capabilities = provider.get_capabilities("o3-mini")
assert o3_capabilities.temperature_constraint is not None
# O3 models should have fixed temperature constraint
temp_constraint = o3_capabilities.temperature_constraint
assert temp_constraint.validate(1.0) is True
assert temp_constraint.validate(0.5) is False
# Test regular model constraints - use gpt-4.1 which is supported
gpt41_capabilities = provider.get_capabilities("gpt-4.1-2025-04-14")
assert gpt41_capabilities.temperature_constraint is not None
# Regular models should allow a range
temp_constraint = gpt41_capabilities.temperature_constraint
assert temp_constraint.validate(0.5) is True
assert temp_constraint.validate(1.0) is True

View File

@@ -122,7 +122,7 @@ class TestOpenAIProvider:
mock_response.choices = [MagicMock()] mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Test response" mock_response.choices[0].message.content = "Test response"
mock_response.choices[0].finish_reason = "stop" mock_response.choices[0].finish_reason = "stop"
mock_response.model = "o4-mini" # API returns the resolved model name mock_response.model = "gpt-4.1-2025-04-14" # API returns the resolved model name
mock_response.id = "test-id" mock_response.id = "test-id"
mock_response.created = 1234567890 mock_response.created = 1234567890
mock_response.usage = MagicMock() mock_response.usage = MagicMock()
@@ -134,19 +134,23 @@ class TestOpenAIProvider:
provider = OpenAIModelProvider("test-key") provider = OpenAIModelProvider("test-key")
# Call generate_content with alias 'mini' # Call generate_content with alias 'gpt4.1' (resolves to gpt-4.1-2025-04-14, supports temperature)
result = provider.generate_content( result = provider.generate_content(
prompt="Test prompt", model_name="mini", temperature=1.0 # This should be resolved to "o4-mini" prompt="Test prompt",
model_name="gpt4.1",
temperature=1.0, # This should be resolved to "gpt-4.1-2025-04-14"
) )
# Verify the API was called with the RESOLVED model name # Verify the API was called with the RESOLVED model name
mock_client.chat.completions.create.assert_called_once() mock_client.chat.completions.create.assert_called_once()
call_kwargs = mock_client.chat.completions.create.call_args[1] call_kwargs = mock_client.chat.completions.create.call_args[1]
# CRITICAL ASSERTION: The API should receive "o4-mini", not "mini" # CRITICAL ASSERTION: The API should receive "gpt-4.1-2025-04-14", not "gpt4.1"
assert call_kwargs["model"] == "o4-mini", f"Expected 'o4-mini' but API received '{call_kwargs['model']}'" assert (
call_kwargs["model"] == "gpt-4.1-2025-04-14"
), f"Expected 'gpt-4.1-2025-04-14' but API received '{call_kwargs['model']}'"
# Verify other parameters # Verify other parameters (gpt-4.1 supports temperature unlike O3/O4 models)
assert call_kwargs["temperature"] == 1.0 assert call_kwargs["temperature"] == 1.0
assert len(call_kwargs["messages"]) == 1 assert len(call_kwargs["messages"]) == 1
assert call_kwargs["messages"][0]["role"] == "user" assert call_kwargs["messages"][0]["role"] == "user"
@@ -154,7 +158,7 @@ class TestOpenAIProvider:
# Verify response # Verify response
assert result.content == "Test response" assert result.content == "Test response"
assert result.model_name == "o4-mini" # Should be the resolved name assert result.model_name == "gpt-4.1-2025-04-14" # Should be the resolved name
@patch("providers.openai_compatible.OpenAI") @patch("providers.openai_compatible.OpenAI")
def test_generate_content_other_aliases(self, mock_openai_class): def test_generate_content_other_aliases(self, mock_openai_class):