Added proper temperature constraints to the model, fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/78
Prompt tweaks
This commit is contained in:
@@ -27,6 +27,8 @@
|
||||
"supports_function_calling": "Whether the model supports function/tool calling",
|
||||
"supports_images": "Whether the model can process images/visual input",
|
||||
"max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
|
||||
"supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
|
||||
"temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
|
||||
"is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.",
|
||||
"description": "Human-readable description of the model"
|
||||
},
|
||||
@@ -39,6 +41,8 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 10.0,
|
||||
"supports_temperature": true,
|
||||
"temperature_constraint": "range",
|
||||
"is_custom": true,
|
||||
"description": "Example custom/local model for Ollama, vLLM, etc."
|
||||
}
|
||||
@@ -152,6 +156,8 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"supports_temperature": false,
|
||||
"temperature_constraint": "fixed",
|
||||
"description": "OpenAI's o3 model - well-rounded and powerful across domains with vision"
|
||||
},
|
||||
{
|
||||
@@ -163,6 +169,8 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"supports_temperature": false,
|
||||
"temperature_constraint": "fixed",
|
||||
"description": "OpenAI's o3-mini model - balanced performance and speed with vision"
|
||||
},
|
||||
{
|
||||
@@ -174,6 +182,8 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"supports_temperature": false,
|
||||
"temperature_constraint": "fixed",
|
||||
"description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision"
|
||||
},
|
||||
{
|
||||
@@ -185,6 +195,8 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"supports_temperature": false,
|
||||
"temperature_constraint": "fixed",
|
||||
"description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision"
|
||||
},
|
||||
{
|
||||
@@ -196,6 +208,8 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"supports_temperature": false,
|
||||
"temperature_constraint": "fixed",
|
||||
"description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision"
|
||||
},
|
||||
{
|
||||
@@ -207,6 +221,8 @@
|
||||
"supports_function_calling": true,
|
||||
"supports_images": true,
|
||||
"max_image_size_mb": 20.0,
|
||||
"supports_temperature": false,
|
||||
"temperature_constraint": "fixed",
|
||||
"description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks with vision"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -14,7 +14,7 @@ import os
|
||||
# These values are used in server responses and for tracking releases
|
||||
# IMPORTANT: This is the single source of truth for version and author info
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
__version__ = "5.1.3"
|
||||
__version__ = "5.1.4"
|
||||
# Last update date in ISO format
|
||||
__updated__ = "2025-06-19"
|
||||
# Primary maintainer
|
||||
|
||||
@@ -100,6 +100,26 @@ class DiscreteTemperatureConstraint(TemperatureConstraint):
|
||||
return self.default_temp
|
||||
|
||||
|
||||
def create_temperature_constraint(constraint_type: str) -> TemperatureConstraint:
|
||||
"""Create temperature constraint object from configuration string.
|
||||
|
||||
Args:
|
||||
constraint_type: Type of constraint ("fixed", "range", "discrete")
|
||||
|
||||
Returns:
|
||||
TemperatureConstraint object based on configuration
|
||||
"""
|
||||
if constraint_type == "fixed":
|
||||
# Fixed temperature models (O3/O4) only support temperature=1.0
|
||||
return FixedTemperatureConstraint(1.0)
|
||||
elif constraint_type == "discrete":
|
||||
# For models with specific allowed values - using common OpenAI values as default
|
||||
return DiscreteTemperatureConstraint([0.0, 0.3, 0.7, 1.0, 1.5, 2.0], 0.7)
|
||||
else:
|
||||
# Default range constraint (for "range" or None)
|
||||
return RangeTemperatureConstraint(0.0, 2.0, 0.7)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelCapabilities:
|
||||
"""Capabilities and constraints for a specific model."""
|
||||
@@ -114,6 +134,7 @@ class ModelCapabilities:
|
||||
supports_function_calling: bool = False
|
||||
supports_images: bool = False # Whether model can process images
|
||||
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
|
||||
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
|
||||
|
||||
# Temperature constraint object - preferred way to define temperature limits
|
||||
temperature_constraint: TemperatureConstraint = field(
|
||||
@@ -245,3 +266,17 @@ class ModelProvider(ABC):
|
||||
List of all model names and alias targets known by this provider
|
||||
"""
|
||||
pass
|
||||
|
||||
def _resolve_model_name(self, model_name: str) -> str:
|
||||
"""Resolve model shorthand to full name.
|
||||
|
||||
Base implementation returns the model name unchanged.
|
||||
Subclasses should override to provide alias resolution.
|
||||
|
||||
Args:
|
||||
model_name: Model name that may be an alias
|
||||
|
||||
Returns:
|
||||
Resolved model name
|
||||
"""
|
||||
return model_name
|
||||
|
||||
@@ -162,6 +162,7 @@ class CustomProvider(OpenAICompatibleProvider):
|
||||
supports_system_prompts=True,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=False, # Conservative default
|
||||
supports_temperature=True, # Most custom models accept temperature parameter
|
||||
temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
|
||||
)
|
||||
|
||||
|
||||
@@ -94,6 +94,7 @@ class GeminiModelProvider(ModelProvider):
|
||||
supports_function_calling=True,
|
||||
supports_images=config.get("supports_images", False),
|
||||
max_image_size_mb=config.get("max_image_size_mb", 0.0),
|
||||
supports_temperature=True, # Gemini models accept temperature parameter
|
||||
temperature_constraint=temp_constraint,
|
||||
)
|
||||
|
||||
|
||||
@@ -448,23 +448,41 @@ class OpenAICompatibleProvider(ModelProvider):
|
||||
completion_params = {
|
||||
"model": model_name,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
}
|
||||
|
||||
# Add max tokens if specified
|
||||
if max_output_tokens:
|
||||
# Check model capabilities once to determine parameter support
|
||||
resolved_model = self._resolve_model_name(model_name)
|
||||
|
||||
# Get model capabilities once to avoid duplicate calls
|
||||
try:
|
||||
capabilities = self.get_capabilities(model_name)
|
||||
# Defensive check for supports_temperature field (backward compatibility)
|
||||
supports_temperature = getattr(capabilities, "supports_temperature", True)
|
||||
except Exception as e:
|
||||
# If capability check fails, fall back to conservative behavior
|
||||
# Default to including temperature for most models (backward compatibility)
|
||||
logging.debug(f"Failed to check temperature support for {model_name}: {e}")
|
||||
supports_temperature = True
|
||||
|
||||
# Add temperature parameter if supported
|
||||
if supports_temperature:
|
||||
completion_params["temperature"] = temperature
|
||||
|
||||
# Add max tokens if specified and model supports it
|
||||
# O3/O4 models that don't support temperature also don't support max_tokens
|
||||
if max_output_tokens and supports_temperature:
|
||||
completion_params["max_tokens"] = max_output_tokens
|
||||
|
||||
# Add any additional OpenAI-specific parameters
|
||||
# Use capabilities to filter parameters for reasoning models
|
||||
for key, value in kwargs.items():
|
||||
if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
|
||||
# Reasoning models (those that don't support temperature) also don't support these parameters
|
||||
if not supports_temperature and key in ["top_p", "frequency_penalty", "presence_penalty"]:
|
||||
continue # Skip unsupported parameters for reasoning models
|
||||
completion_params[key] = value
|
||||
|
||||
# Check if this is o3-pro and needs the responses endpoint
|
||||
resolved_model = model_name
|
||||
if hasattr(self, "_resolve_model_name"):
|
||||
resolved_model = self._resolve_model_name(model_name)
|
||||
|
||||
if resolved_model == "o3-pro-2025-06-10":
|
||||
# This model requires the /v1/responses endpoint
|
||||
# If it fails, we should not fall back to chat/completions
|
||||
|
||||
@@ -4,11 +4,10 @@ import logging
|
||||
from typing import Optional
|
||||
|
||||
from .base import (
|
||||
FixedTemperatureConstraint,
|
||||
ModelCapabilities,
|
||||
ModelResponse,
|
||||
ProviderType,
|
||||
RangeTemperatureConstraint,
|
||||
create_temperature_constraint,
|
||||
)
|
||||
from .openai_compatible import OpenAICompatibleProvider
|
||||
|
||||
@@ -25,18 +24,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O3 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
},
|
||||
"o3-mini": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O3 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
},
|
||||
"o3-pro-2025-06-10": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O3 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
},
|
||||
# Aliases
|
||||
"o3-pro": "o3-pro-2025-06-10",
|
||||
@@ -45,18 +50,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O4 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O4 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
},
|
||||
"o4-mini-high": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O4 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": False, # O4 models don't accept temperature parameter
|
||||
"temperature_constraint": "fixed", # Fixed at 1.0
|
||||
},
|
||||
"gpt-4.1-2025-04-14": {
|
||||
"context_window": 1_000_000, # 1M tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # GPT-4.1 supports vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
"supports_temperature": True, # Regular models accept temperature parameter
|
||||
"temperature_constraint": "range", # 0.0-2.0 range
|
||||
},
|
||||
# Shorthands
|
||||
"mini": "o4-mini", # Default 'mini' to latest mini model
|
||||
@@ -90,13 +101,10 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
|
||||
config = self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
# Define temperature constraints per model
|
||||
if resolved_name in ["o3", "o3-mini", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-high"]:
|
||||
# O3 and O4 reasoning models only support temperature=1.0
|
||||
temp_constraint = FixedTemperatureConstraint(1.0)
|
||||
else:
|
||||
# Other OpenAI models (including GPT-4.1) support 0.0-2.0 range
|
||||
temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
|
||||
# Get temperature constraints and support from configuration
|
||||
supports_temperature = config.get("supports_temperature", True) # Default to True for backward compatibility
|
||||
temp_constraint_type = config.get("temperature_constraint", "range") # Default to range
|
||||
temp_constraint = create_temperature_constraint(temp_constraint_type)
|
||||
|
||||
return ModelCapabilities(
|
||||
provider=ProviderType.OPENAI,
|
||||
@@ -109,6 +117,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
supports_function_calling=True,
|
||||
supports_images=config.get("supports_images", False),
|
||||
max_image_size_mb=config.get("max_image_size_mb", 0.0),
|
||||
supports_temperature=supports_temperature,
|
||||
temperature_constraint=temp_constraint,
|
||||
)
|
||||
|
||||
|
||||
@@ -8,7 +8,12 @@ from typing import Optional
|
||||
|
||||
from utils.file_utils import read_json_file
|
||||
|
||||
from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
|
||||
from .base import (
|
||||
ModelCapabilities,
|
||||
ProviderType,
|
||||
TemperatureConstraint,
|
||||
create_temperature_constraint,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -25,9 +30,21 @@ class OpenRouterModelConfig:
|
||||
supports_json_mode: bool = False
|
||||
supports_images: bool = False # Whether model can process images
|
||||
max_image_size_mb: float = 0.0 # Maximum total size for all images in MB
|
||||
supports_temperature: bool = True # Whether model accepts temperature parameter in API calls
|
||||
temperature_constraint: Optional[str] = (
|
||||
None # Type of temperature constraint: "fixed", "range", "discrete", or None for default range
|
||||
)
|
||||
is_custom: bool = False # True for models that should only be used with custom endpoints
|
||||
description: str = ""
|
||||
|
||||
def _create_temperature_constraint(self) -> TemperatureConstraint:
|
||||
"""Create temperature constraint object from configuration.
|
||||
|
||||
Returns:
|
||||
TemperatureConstraint object based on configuration
|
||||
"""
|
||||
return create_temperature_constraint(self.temperature_constraint or "range")
|
||||
|
||||
def to_capabilities(self) -> ModelCapabilities:
|
||||
"""Convert to ModelCapabilities object."""
|
||||
return ModelCapabilities(
|
||||
@@ -41,7 +58,8 @@ class OpenRouterModelConfig:
|
||||
supports_function_calling=self.supports_function_calling,
|
||||
supports_images=self.supports_images,
|
||||
max_image_size_mb=self.max_image_size_mb,
|
||||
temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 1.0),
|
||||
supports_temperature=self.supports_temperature,
|
||||
temperature_constraint=self._create_temperature_constraint(),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ Chat tool system prompt
|
||||
|
||||
CHAT_PROMPT = """
|
||||
You are a senior engineering thought-partner collaborating with Claude. Your mission is to brainstorm, validate ideas,
|
||||
and offer well-reasoned second opinions on technical decisions.
|
||||
and offer well-reasoned second opinions on technical decisions when they are justified and practical.
|
||||
|
||||
CRITICAL LINE NUMBER INSTRUCTIONS
|
||||
Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
|
||||
@@ -26,27 +26,27 @@ provided unless for some reason its content is missing or incomplete:
|
||||
|
||||
SCOPE & FOCUS
|
||||
• Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints.
|
||||
• Recommend new technologies or patterns ONLY with a clear, compelling benefit that aligns with stated goals.
|
||||
• Keep proposals practical and implementable; avoid speculative or off-stack detours.
|
||||
• Recommend new technologies or patterns ONLY when they provide clearly superior outcomes with minimal added complexity.
|
||||
• Avoid speculative, over-engineered, or unnecessarily abstract designs that exceed current project goals or needs.
|
||||
• Keep proposals practical and directly actionable within the existing architecture.
|
||||
|
||||
COLLABORATION APPROACH
|
||||
1. Engage deeply with Claude's input - extend, refine, and explore alternatives within the existing context.
|
||||
1. Engage deeply with Claude's input – extend, refine, and explore alternatives ONLY WHEN they are well-justified and materially beneficial.
|
||||
2. Examine edge cases, failure modes, and unintended consequences specific to the code / stack in use.
|
||||
3. Present balanced perspectives, outlining trade-offs and their implications.
|
||||
4. Challenge assumptions constructively while respecting current design choices and goals.
|
||||
5. Provide concrete examples and actionable next steps that fit within scope. Direct, achievable next-steps where
|
||||
needed.
|
||||
5. Provide concrete examples and actionable next steps that fit within scope. Prioritize direct, achievable outcomes.
|
||||
|
||||
BRAINSTORMING GUIDELINES
|
||||
• Offer multiple viable strategies compatible with the current environment but keep it to the point.
|
||||
• Suggest creative solutions and alternatives that work within the current project constraints, scope and limitations
|
||||
• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice
|
||||
• Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
|
||||
• Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
|
||||
• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice.
|
||||
• Evaluate scalability, maintainability, and operational realities inside the existing architecture and current
|
||||
framework.
|
||||
• Reference industry best practices relevant to the technologies in use
|
||||
• Reference industry best practices relevant to the technologies in use.
|
||||
• Communicate concisely and technically, assuming an experienced engineering audience.
|
||||
|
||||
REMEMBER
|
||||
Act as a peer, not a lecturer. Aim for depth over breadth, stay within project boundaries, and help the team
|
||||
Act as a peer, not a lecturer. Avoid overcomplicating. Aim for depth over breadth, stay within project boundaries, and help the team
|
||||
reach sound, actionable decisions.
|
||||
"""
|
||||
|
||||
@@ -4,20 +4,31 @@ Precommit tool system prompt
|
||||
|
||||
PRECOMMIT_PROMPT = """
|
||||
ROLE
|
||||
You are an expert pre-commit reviewer. Analyse git diffs as a senior developer giving a final sign-off to production.
|
||||
You are an expert pre-commit reviewer and senior engineering partner performing final code validation before production.
|
||||
Your responsibility goes beyond surface-level correctness — you are expected to think several steps ahead. Your review
|
||||
must assess whether the changes:
|
||||
- Introduce any patterns, structures, or decisions that may become future liabilities
|
||||
- Create brittle dependencies or tight coupling that could make maintenance harder
|
||||
- Omit critical safety, validation, or test scaffolding that may not fail now, but will cause issues down the line
|
||||
- Interact with other known areas of fragility in the codebase even if not directly touched
|
||||
|
||||
Your task is to detect potential future consequences or systemic risks, not just immediate issues. Think like an
|
||||
engineer responsible for this code months later, debugging production incidents or onboarding a new developer.
|
||||
|
||||
In addition to reviewing correctness, completeness, and quality of the change, apply long-term architectural thinking.
|
||||
Your feedback helps ensure this code won't cause silent regressions, developer confusion, or downstream side effects later.
|
||||
|
||||
CRITICAL LINE NUMBER INSTRUCTIONS
|
||||
Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
|
||||
included in any code you generate. Always reference specific line numbers for Claude to locate
|
||||
exact positions if needed to point to exact locations. Include a very short code excerpt alongside for clarity.
|
||||
exact positions if needed. Include a very short code excerpt alongside for clarity.
|
||||
Include context_start_text and context_end_text as backup references. Never include "LINE│" markers in generated code
|
||||
snippets.
|
||||
|
||||
IF MORE INFORMATION IS NEEDED
|
||||
If you need additional context (e.g., related files not in the diff, test files, configuration) to provide thorough
|
||||
analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON
|
||||
format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is
|
||||
missing or incomplete:
|
||||
If you need additional context (e.g., related files not in the diff, test files, configuration) to perform a proper
|
||||
review—and without which your analysis would be incomplete or inaccurate—you MUST respond ONLY with this JSON format
|
||||
(and nothing else). Do NOT request files you've already been provided unless their content is missing or incomplete:
|
||||
{
|
||||
"status": "files_required_to_continue",
|
||||
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||
@@ -26,34 +37,36 @@ missing or incomplete:
|
||||
|
||||
INPUTS PROVIDED
|
||||
1. Git diff (staged or branch comparison)
|
||||
2. Original request / acceptance criteria or some context around what changed
|
||||
2. Original request / acceptance criteria or context around what changed
|
||||
3. File names and related code
|
||||
|
||||
SCOPE & FOCUS
|
||||
• Review ONLY the changes in the diff and the given code
|
||||
• From the diff, infer what got changed and why, determine if the changes make logical sense
|
||||
• Ensure they correctly implement the request, are secure (where applicable), efficient, and maintainable and do not
|
||||
cause potential regressions
|
||||
• DO NOT propose broad refactors or off-scope improvements. Stick to the code and changes you have visibility into.
|
||||
• Review ONLY the changes in the diff and the related code provided.
|
||||
• From the diff, infer what changed and why. Determine if the changes make logical, structural, and functional sense.
|
||||
• Ensure the changes correctly implement the request, are secure (where applicable), performant, and maintainable.
|
||||
• DO NOT propose broad refactors or unrelated improvements. Stay strictly within the boundaries of the provided changes.
|
||||
|
||||
REVIEW METHOD
|
||||
1. Identify tech stack, frameworks, and patterns present in the diff.
|
||||
2. Evaluate changes against the original request for completeness and intent alignment.
|
||||
3. Detect issues, prioritising by severity (CRITICAL → HIGH → MEDIUM → LOW).
|
||||
4. Highlight incomplete changes, or changes that would cause bugs, regressions, crashes or data loss or race conditions
|
||||
5. Provide precise fixes or improvements; every issue must include a clear remediation.
|
||||
6. Acknowledge good patterns to reinforce best practice.
|
||||
1. Identify tech stack, frameworks, and patterns in the diff.
|
||||
2. Evaluate changes against the original request for completeness and alignment.
|
||||
3. Detect issues, prioritized by severity (CRITICAL → HIGH → MEDIUM → LOW).
|
||||
4. Flag bugs, regressions, crash risks, data loss, or race conditions.
|
||||
5. Recommend specific fixes for each issue raised; include code where helpful.
|
||||
6. Acknowledge sound patterns to reinforce best practices.
|
||||
|
||||
CORE ANALYSIS (adapt to the diff and stack)
|
||||
• Security – injection risks, auth/authz flaws, sensitive-data exposure, insecure dependencies, memory safety
|
||||
• Bugs & Logic Errors – off-by-one, null refs, race conditions, incorrect branching
|
||||
• Performance – inefficient algorithms, resource leaks, blocking operations
|
||||
• Code Quality – DRY violations, complexity, SOLID adherence
|
||||
CORE ANALYSIS (adapt to diff and stack)
|
||||
• Security – injection risks, auth flaws, exposure of sensitive data, unsafe dependencies, memory safety
|
||||
• Bugs & Logic Errors – off-by-one, null refs, incorrect logic, race conditions
|
||||
• Performance – inefficient logic, blocking calls, leaks
|
||||
• Code Quality – complexity, duplicated logic and DRY violations, SOLID violations
|
||||
|
||||
ADDITIONAL ANALYSIS (apply only when relevant)
|
||||
ADDITIONAL ANALYSIS (only when relevant)
|
||||
• Language/runtime concerns – memory management, concurrency, exception handling
|
||||
• System/integration – config handling, external calls, operational impact
|
||||
• Testing – coverage gaps for new logic
|
||||
• If no tests are found in the project, do not flag test coverage as an issue unless the change introduces logic
|
||||
that is high-risk or complex.
|
||||
• In such cases, offer a low-severity suggestion encouraging basic tests, rather than marking it as a required fix.
|
||||
• Change-specific pitfalls – unused new functions, partial enum updates, scope creep, risky deletions
|
||||
• Determine if there are any new dependencies added but not declared, or new functionality added but not used
|
||||
• Determine unintended side effects: could changes in file_A break module_B even if module_B wasn't changed?
|
||||
@@ -84,9 +97,9 @@ that apply:
|
||||
[LOW] ...
|
||||
|
||||
MAKE RECOMMENDATIONS:
|
||||
Make a final, short, clear, to the point statement or list in a brief bullet point:
|
||||
- Mention top priority fixes to be IMMEDIATELY made before commit
|
||||
- Notable positives to keep
|
||||
Make a final, short, and focused statement or bullet list:
|
||||
- Top priority fixes that MUST IMMEDIATELY be addressed before commit
|
||||
- Notable positives to retain
|
||||
|
||||
Be thorough yet actionable. Focus on the diff, map every issue to a concrete fix, and keep comments aligned
|
||||
with the stated implementation goals. Your goal is to help flag anything that could potentially slip through
|
||||
|
||||
@@ -4,8 +4,8 @@ ThinkDeep tool system prompt
|
||||
|
||||
THINKDEEP_PROMPT = """
|
||||
ROLE
|
||||
You are a senior engineering collaborator working with Claude on complex software problems. Claude will send you
|
||||
content—analysis, prompts, questions, ideas, or theories—to deepen, validate, and extend.
|
||||
You are a senior engineering collaborator working alongside Claude on complex software problems. Claude will send you
|
||||
content—analysis, prompts, questions, ideas, or theories—to deepen, validate, or extend with rigor and clarity.
|
||||
|
||||
CRITICAL LINE NUMBER INSTRUCTIONS
|
||||
Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
|
||||
@@ -26,25 +26,27 @@ been provided unless for some reason its content is missing or incomplete:
|
||||
|
||||
GUIDELINES
|
||||
1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints.
|
||||
2. Stay on scope: avoid speculative or oversized ideas; keep suggestions practical and implementable.
|
||||
3. Challenge and enrich: find gaps, question assumptions, surface hidden complexities.
|
||||
4. Provide actionable next steps: concrete advice, trade-offs, and implementation tactics.
|
||||
5. Use concise, direct, technical language; assume an experienced engineering audience.
|
||||
2. Stay on scope: avoid speculative, over-engineered, or oversized ideas; keep suggestions practical and grounded.
|
||||
3. Challenge and enrich: find gaps, question assumptions, and surface hidden complexities or risks.
|
||||
4. Provide actionable next steps: offer specific advice, trade-offs, and implementation strategies.
|
||||
5. Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
|
||||
6. Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
|
||||
7. Use concise, technical language; assume an experienced engineering audience.
|
||||
|
||||
KEY FOCUS AREAS (apply when relevant)
|
||||
- Architecture & Design: modularity, patterns, API boundaries, dependencies
|
||||
- Performance & Scalability: algorithm efficiency, concurrency, caching
|
||||
- Architecture & Design: modularity, boundaries, abstraction layers, dependencies
|
||||
- Performance & Scalability: algorithmic efficiency, concurrency, caching, bottlenecks
|
||||
- Security & Safety: validation, authentication/authorization, error handling, vulnerabilities
|
||||
- Quality & Maintainability: readability, testing, monitoring, refactoring
|
||||
- Integration & Deployment: external systems, compatibility, operational concerns
|
||||
- Integration & Deployment: ONLY IF APPLICABLE TO THE QUESTION - external systems, compatibility, configuration, operational concerns
|
||||
|
||||
EVALUATION
|
||||
Your response will be reviewed by Claude before any decision is made. Aim to enhance decision-making rather
|
||||
than deliver final answers.
|
||||
Your response will be reviewed by Claude before any decision is made. Your goal is to practically extend Claude's thinking,
|
||||
surface blind spots, and refine options—not to deliver final answers in isolation.
|
||||
|
||||
REMINDERS
|
||||
- Ground all insights in the current project's scope and constraints.
|
||||
- If additional information is necessary, such as code snippets, files, project details, use the clarification JSON
|
||||
- Prefer depth over breadth; propose alternatives ONLY when they materially improve the current approach and add value
|
||||
- Your goal is to be the perfect development partner that extends Claude's capabilities and thought process
|
||||
- Ground all insights in the current project's architecture, limitations, and goals.
|
||||
- If further context is needed, request it via the clarification JSON—nothing else.
|
||||
- Prioritize depth over breadth; propose alternatives ONLY if they clearly add value and improve the current approach.
|
||||
- Be the ideal development partner—rigorous, focused, and fluent in real-world software trade-offs.
|
||||
"""
|
||||
|
||||
239
tests/test_o3_temperature_fix_simple.py
Normal file
239
tests/test_o3_temperature_fix_simple.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""
|
||||
Simple integration test for the O3 model temperature parameter fix.
|
||||
|
||||
This test confirms that the fix properly excludes temperature parameters
|
||||
for O3 models while maintaining them for regular models.
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
from providers.openai_provider import OpenAIModelProvider
|
||||
|
||||
|
||||
class TestO3TemperatureParameterFixSimple:
|
||||
"""Simple test for O3 model parameter filtering."""
|
||||
|
||||
@patch("utils.model_restrictions.get_restriction_service")
|
||||
@patch("providers.openai_compatible.OpenAI")
|
||||
def test_o3_models_exclude_temperature_from_api_call(self, mock_openai_class, mock_restriction_service):
|
||||
"""Test that O3 models don't send temperature to the API."""
|
||||
# Mock restriction service to allow all models
|
||||
mock_service = Mock()
|
||||
mock_service.is_allowed.return_value = True
|
||||
mock_restriction_service.return_value = mock_service
|
||||
|
||||
# Setup mock client
|
||||
mock_client = Mock()
|
||||
mock_openai_class.return_value = mock_client
|
||||
|
||||
# Setup mock response
|
||||
mock_response = Mock()
|
||||
mock_response.choices = [Mock()]
|
||||
mock_response.choices[0].message.content = "Test response"
|
||||
mock_response.choices[0].finish_reason = "stop"
|
||||
mock_response.model = "o3-mini"
|
||||
mock_response.id = "test-id"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.usage = Mock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
# Create provider
|
||||
provider = OpenAIModelProvider(api_key="test-key")
|
||||
|
||||
# Override _resolve_model_name to return the resolved model name
|
||||
provider._resolve_model_name = lambda name: name
|
||||
# Override model validation to bypass restrictions
|
||||
provider.validate_model_name = lambda name: True
|
||||
|
||||
# Call generate_content with O3 model
|
||||
provider.generate_content(prompt="Test prompt", model_name="o3-mini", temperature=0.5, max_output_tokens=100)
|
||||
|
||||
# Verify the API call was made without temperature or max_tokens
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
|
||||
assert "temperature" not in call_kwargs, "O3 models should not include temperature parameter"
|
||||
assert "max_tokens" not in call_kwargs, "O3 models should not include max_tokens parameter"
|
||||
assert call_kwargs["model"] == "o3-mini"
|
||||
assert "messages" in call_kwargs
|
||||
|
||||
@patch("utils.model_restrictions.get_restriction_service")
|
||||
@patch("providers.openai_compatible.OpenAI")
|
||||
def test_regular_models_include_temperature_in_api_call(self, mock_openai_class, mock_restriction_service):
|
||||
"""Test that regular models still send temperature to the API."""
|
||||
# Mock restriction service to allow all models
|
||||
mock_service = Mock()
|
||||
mock_service.is_allowed.return_value = True
|
||||
mock_restriction_service.return_value = mock_service
|
||||
|
||||
# Setup mock client
|
||||
mock_client = Mock()
|
||||
mock_openai_class.return_value = mock_client
|
||||
|
||||
# Setup mock response
|
||||
mock_response = Mock()
|
||||
mock_response.choices = [Mock()]
|
||||
mock_response.choices[0].message.content = "Test response"
|
||||
mock_response.choices[0].finish_reason = "stop"
|
||||
mock_response.model = "gpt-4.1-2025-04-14"
|
||||
mock_response.id = "test-id"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.usage = Mock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
# Create provider
|
||||
provider = OpenAIModelProvider(api_key="test-key")
|
||||
|
||||
# Override _resolve_model_name to return the resolved model name
|
||||
provider._resolve_model_name = lambda name: name
|
||||
# Override model validation to bypass restrictions
|
||||
provider.validate_model_name = lambda name: True
|
||||
|
||||
# Call generate_content with regular model (use supported model)
|
||||
provider.generate_content(
|
||||
prompt="Test prompt", model_name="gpt-4.1-2025-04-14", temperature=0.5, max_output_tokens=100
|
||||
)
|
||||
|
||||
# Verify the API call was made WITH temperature and max_tokens
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
|
||||
assert call_kwargs["temperature"] == 0.5, "Regular models should include temperature parameter"
|
||||
assert call_kwargs["max_tokens"] == 100, "Regular models should include max_tokens parameter"
|
||||
assert call_kwargs["model"] == "gpt-4.1-2025-04-14"
|
||||
|
||||
@patch("utils.model_restrictions.get_restriction_service")
|
||||
@patch("providers.openai_compatible.OpenAI")
|
||||
def test_o3_models_filter_unsupported_parameters(self, mock_openai_class, mock_restriction_service):
|
||||
"""Test that O3 models filter out top_p, frequency_penalty, etc."""
|
||||
# Mock restriction service to allow all models
|
||||
mock_service = Mock()
|
||||
mock_service.is_allowed.return_value = True
|
||||
mock_restriction_service.return_value = mock_service
|
||||
|
||||
# Setup mock client
|
||||
mock_client = Mock()
|
||||
mock_openai_class.return_value = mock_client
|
||||
|
||||
# Setup mock response
|
||||
mock_response = Mock()
|
||||
mock_response.choices = [Mock()]
|
||||
mock_response.choices[0].message.content = "Test response"
|
||||
mock_response.choices[0].finish_reason = "stop"
|
||||
mock_response.model = "o3"
|
||||
mock_response.id = "test-id"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.usage = Mock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
# Create provider
|
||||
provider = OpenAIModelProvider(api_key="test-key")
|
||||
|
||||
# Override _resolve_model_name to return the resolved model name
|
||||
provider._resolve_model_name = lambda name: name
|
||||
# Override model validation to bypass restrictions
|
||||
provider.validate_model_name = lambda name: True
|
||||
|
||||
# Call generate_content with O3 model and unsupported parameters
|
||||
provider.generate_content(
|
||||
prompt="Test prompt",
|
||||
model_name="o3",
|
||||
temperature=0.5,
|
||||
top_p=0.9,
|
||||
frequency_penalty=0.1,
|
||||
presence_penalty=0.1,
|
||||
seed=42,
|
||||
stop=["END"],
|
||||
)
|
||||
|
||||
# Verify the API call filters out unsupported parameters
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
|
||||
# Should be excluded for O3 models
|
||||
assert "temperature" not in call_kwargs, "O3 should not include temperature"
|
||||
assert "top_p" not in call_kwargs, "O3 should not include top_p"
|
||||
assert "frequency_penalty" not in call_kwargs, "O3 should not include frequency_penalty"
|
||||
assert "presence_penalty" not in call_kwargs, "O3 should not include presence_penalty"
|
||||
|
||||
# Should be included (supported parameters)
|
||||
assert call_kwargs["seed"] == 42, "O3 should include seed parameter"
|
||||
assert call_kwargs["stop"] == ["END"], "O3 should include stop parameter"
|
||||
|
||||
@patch("utils.model_restrictions.get_restriction_service")
|
||||
def test_all_o3_models_have_correct_temperature_capability(self, mock_restriction_service):
|
||||
"""Test that all O3/O4 models have supports_temperature=False in their capabilities."""
|
||||
from providers.openai_provider import OpenAIModelProvider
|
||||
|
||||
# Mock restriction service to allow all models
|
||||
mock_service = Mock()
|
||||
mock_service.is_allowed.return_value = True
|
||||
mock_restriction_service.return_value = mock_service
|
||||
|
||||
provider = OpenAIModelProvider(api_key="test-key")
|
||||
|
||||
# Test O3/O4 models that should NOT support temperature parameter
|
||||
o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"]
|
||||
|
||||
for model in o3_o4_models:
|
||||
capabilities = provider.get_capabilities(model)
|
||||
assert hasattr(
|
||||
capabilities, "supports_temperature"
|
||||
), f"Model {model} capabilities should have supports_temperature field"
|
||||
assert capabilities.supports_temperature is False, f"Model {model} should have supports_temperature=False"
|
||||
|
||||
# Test that regular models DO support temperature parameter
|
||||
regular_models = ["gpt-4.1-2025-04-14"]
|
||||
|
||||
for model in regular_models:
|
||||
try:
|
||||
capabilities = provider.get_capabilities(model)
|
||||
assert hasattr(
|
||||
capabilities, "supports_temperature"
|
||||
), f"Model {model} capabilities should have supports_temperature field"
|
||||
assert capabilities.supports_temperature is True, f"Model {model} should have supports_temperature=True"
|
||||
except ValueError:
|
||||
# Skip if model not in SUPPORTED_MODELS (that's okay for this test)
|
||||
pass
|
||||
|
||||
@patch("utils.model_restrictions.get_restriction_service")
|
||||
def test_openai_provider_temperature_constraints(self, mock_restriction_service):
|
||||
"""Test that OpenAI provider has correct temperature constraints for O3 models."""
|
||||
from providers.openai_provider import OpenAIModelProvider
|
||||
|
||||
# Mock restriction service to allow all models
|
||||
mock_service = Mock()
|
||||
mock_service.is_allowed.return_value = True
|
||||
mock_restriction_service.return_value = mock_service
|
||||
|
||||
provider = OpenAIModelProvider(api_key="test-key")
|
||||
|
||||
# Test O3 model constraints
|
||||
o3_capabilities = provider.get_capabilities("o3-mini")
|
||||
assert o3_capabilities.temperature_constraint is not None
|
||||
|
||||
# O3 models should have fixed temperature constraint
|
||||
temp_constraint = o3_capabilities.temperature_constraint
|
||||
assert temp_constraint.validate(1.0) is True
|
||||
assert temp_constraint.validate(0.5) is False
|
||||
|
||||
# Test regular model constraints - use gpt-4.1 which is supported
|
||||
gpt41_capabilities = provider.get_capabilities("gpt-4.1-2025-04-14")
|
||||
assert gpt41_capabilities.temperature_constraint is not None
|
||||
|
||||
# Regular models should allow a range
|
||||
temp_constraint = gpt41_capabilities.temperature_constraint
|
||||
assert temp_constraint.validate(0.5) is True
|
||||
assert temp_constraint.validate(1.0) is True
|
||||
@@ -122,7 +122,7 @@ class TestOpenAIProvider:
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "Test response"
|
||||
mock_response.choices[0].finish_reason = "stop"
|
||||
mock_response.model = "o4-mini" # API returns the resolved model name
|
||||
mock_response.model = "gpt-4.1-2025-04-14" # API returns the resolved model name
|
||||
mock_response.id = "test-id"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.usage = MagicMock()
|
||||
@@ -134,19 +134,23 @@ class TestOpenAIProvider:
|
||||
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
# Call generate_content with alias 'mini'
|
||||
# Call generate_content with alias 'gpt4.1' (resolves to gpt-4.1-2025-04-14, supports temperature)
|
||||
result = provider.generate_content(
|
||||
prompt="Test prompt", model_name="mini", temperature=1.0 # This should be resolved to "o4-mini"
|
||||
prompt="Test prompt",
|
||||
model_name="gpt4.1",
|
||||
temperature=1.0, # This should be resolved to "gpt-4.1-2025-04-14"
|
||||
)
|
||||
|
||||
# Verify the API was called with the RESOLVED model name
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
|
||||
# CRITICAL ASSERTION: The API should receive "o4-mini", not "mini"
|
||||
assert call_kwargs["model"] == "o4-mini", f"Expected 'o4-mini' but API received '{call_kwargs['model']}'"
|
||||
# CRITICAL ASSERTION: The API should receive "gpt-4.1-2025-04-14", not "gpt4.1"
|
||||
assert (
|
||||
call_kwargs["model"] == "gpt-4.1-2025-04-14"
|
||||
), f"Expected 'gpt-4.1-2025-04-14' but API received '{call_kwargs['model']}'"
|
||||
|
||||
# Verify other parameters
|
||||
# Verify other parameters (gpt-4.1 supports temperature unlike O3/O4 models)
|
||||
assert call_kwargs["temperature"] == 1.0
|
||||
assert len(call_kwargs["messages"]) == 1
|
||||
assert call_kwargs["messages"][0]["role"] == "user"
|
||||
@@ -154,7 +158,7 @@ class TestOpenAIProvider:
|
||||
|
||||
# Verify response
|
||||
assert result.content == "Test response"
|
||||
assert result.model_name == "o4-mini" # Should be the resolved name
|
||||
assert result.model_name == "gpt-4.1-2025-04-14" # Should be the resolved name
|
||||
|
||||
@patch("providers.openai_compatible.OpenAI")
|
||||
def test_generate_content_other_aliases(self, mock_openai_class):
|
||||
|
||||
Reference in New Issue
Block a user