Added proper temperature constraints to the model, fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/78

Prompt tweaks
2025-06-19 08:30:46 +04:00
parent ec3a466b1c
commit 9f3b70d6d7
13 changed files with 435 additions and 79 deletions
--- a/conf/custom_models.json
+++ b/conf/custom_models.json
@@ -27,6 +27,8 @@
      "supports_function_calling": "Whether the model supports function/tool calling",
      "supports_images": "Whether the model can process images/visual input",
      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
+      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
+      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
      "is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.",
      "description": "Human-readable description of the model"
    },
@@ -39,6 +41,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 10.0,
+      "supports_temperature": true,
+      "temperature_constraint": "range",
      "is_custom": true,
      "description": "Example custom/local model for Ollama, vLLM, etc."
    }
@@ -152,6 +156,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
      "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision"
    },
    {
@@ -163,6 +169,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
      "description": "OpenAI's o3-mini model - balanced performance and speed with vision"
    },
    {
@@ -174,6 +182,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
      "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision"
    },
    {
@@ -185,6 +195,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
      "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision"
    },
    {
@@ -196,6 +208,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
      "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision"
    },
    {
@@ -207,6 +221,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
+      "supports_temperature": false,
+      "temperature_constraint": "fixed",
      "description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks with vision"
    },
    {
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.1.3"
+__version__ = "5.1.4"
 # Last update date in ISO format
 __updated__ = "2025-06-19"
 # Primary maintainer
--- a/providers/base.py
+++ b/providers/base.py
@@ -100,6 +100,26 @@ class DiscreteTemperatureConstraint(TemperatureConstraint):
        return self.default_temp


+def create_temperature_constraint(constraint_type: str) -> TemperatureConstraint:
+    """Create temperature constraint object from configuration string.
+
+    Args:
+        constraint_type: Type of constraint ("fixed", "range", "discrete")
+
+    Returns:
+        TemperatureConstraint object based on configuration
+    """
+    if constraint_type == "fixed":
+        # Fixed temperature models (O3/O4) only support temperature=1.0
+        return FixedTemperatureConstraint(1.0)
+    elif constraint_type == "discrete":
+        # For models with specific allowed values - using common OpenAI values as default
+        return DiscreteTemperatureConstraint([0.0, 0.3, 0.7, 1.0, 1.5, 2.0], 0.7)
+    else:
+        # Default range constraint (for "range" or None)
+        return RangeTemperatureConstraint(0.0, 2.0, 0.7)
+
+
@dataclass
 class ModelCapabilities:
    """Capabilities and constraints for a specific model."""
@@ -114,6 +134,7 @@ class ModelCapabilities:
    supports_function_calling: bool = False
    supports_images: bool = False  # Whether model can process images
    max_image_size_mb: float = 0.0  # Maximum total size for all images in MB
+    supports_temperature: bool = True  # Whether model accepts temperature parameter in API calls

    # Temperature constraint object - preferred way to define temperature limits
    temperature_constraint: TemperatureConstraint = field(
@@ -245,3 +266,17 @@ class ModelProvider(ABC):
            List of all model names and alias targets known by this provider
        """
        pass
+
+    def _resolve_model_name(self, model_name: str) -> str:
+        """Resolve model shorthand to full name.
+
+        Base implementation returns the model name unchanged.
+        Subclasses should override to provide alias resolution.
+
+        Args:
+            model_name: Model name that may be an alias
+
+        Returns:
+            Resolved model name
+        """
+        return model_name
--- a/providers/custom.py
+++ b/providers/custom.py
@@ -162,6 +162,7 @@ class CustomProvider(OpenAICompatibleProvider):
                supports_system_prompts=True,
                supports_streaming=True,
                supports_function_calling=False,  # Conservative default
+                supports_temperature=True,  # Most custom models accept temperature parameter
                temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
            )

--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -94,6 +94,7 @@ class GeminiModelProvider(ModelProvider):
            supports_function_calling=True,
            supports_images=config.get("supports_images", False),
            max_image_size_mb=config.get("max_image_size_mb", 0.0),
+            supports_temperature=True,  # Gemini models accept temperature parameter
            temperature_constraint=temp_constraint,
        )

--- a/providers/openai_compatible.py
+++ b/providers/openai_compatible.py
@@ -448,23 +448,41 @@ class OpenAICompatibleProvider(ModelProvider):
        completion_params = {
            "model": model_name,
            "messages": messages,
-            "temperature": temperature,
        }

-        # Add max tokens if specified
-        if max_output_tokens:
+        # Check model capabilities once to determine parameter support
+        resolved_model = self._resolve_model_name(model_name)
+
+        # Get model capabilities once to avoid duplicate calls
+        try:
+            capabilities = self.get_capabilities(model_name)
+            # Defensive check for supports_temperature field (backward compatibility)
+            supports_temperature = getattr(capabilities, "supports_temperature", True)
+        except Exception as e:
+            # If capability check fails, fall back to conservative behavior
+            # Default to including temperature for most models (backward compatibility)
+            logging.debug(f"Failed to check temperature support for {model_name}: {e}")
+            supports_temperature = True
+
+        # Add temperature parameter if supported
+        if supports_temperature:
+            completion_params["temperature"] = temperature
+
+        # Add max tokens if specified and model supports it
+        # O3/O4 models that don't support temperature also don't support max_tokens
+        if max_output_tokens and supports_temperature:
            completion_params["max_tokens"] = max_output_tokens

        # Add any additional OpenAI-specific parameters
+        # Use capabilities to filter parameters for reasoning models
        for key, value in kwargs.items():
            if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
+                # Reasoning models (those that don't support temperature) also don't support these parameters
+                if not supports_temperature and key in ["top_p", "frequency_penalty", "presence_penalty"]:
+                    continue  # Skip unsupported parameters for reasoning models
                completion_params[key] = value

        # Check if this is o3-pro and needs the responses endpoint
-        resolved_model = model_name
-        if hasattr(self, "_resolve_model_name"):
-            resolved_model = self._resolve_model_name(model_name)
-
        if resolved_model == "o3-pro-2025-06-10":
            # This model requires the /v1/responses endpoint
            # If it fails, we should not fall back to chat/completions
--- a/providers/openai_provider.py
+++ b/providers/openai_provider.py
@@ -4,11 +4,10 @@ import logging
 from typing import Optional

 from .base import (
-    FixedTemperatureConstraint,
    ModelCapabilities,
    ModelResponse,
    ProviderType,
-    RangeTemperatureConstraint,
+    create_temperature_constraint,
 )
 from .openai_compatible import OpenAICompatibleProvider

@@ -25,18 +24,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            "supports_extended_thinking": False,
            "supports_images": True,  # O3 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
+            "supports_temperature": False,  # O3 models don't accept temperature parameter
+            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "o3-mini": {
            "context_window": 200_000,  # 200K tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # O3 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
+            "supports_temperature": False,  # O3 models don't accept temperature parameter
+            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "o3-pro-2025-06-10": {
            "context_window": 200_000,  # 200K tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # O3 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
+            "supports_temperature": False,  # O3 models don't accept temperature parameter
+            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        # Aliases
        "o3-pro": "o3-pro-2025-06-10",
@@ -45,18 +50,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            "supports_extended_thinking": False,
            "supports_images": True,  # O4 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
+            "supports_temperature": False,  # O4 models don't accept temperature parameter
+            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "o4-mini-high": {
            "context_window": 200_000,  # 200K tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # O4 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
+            "supports_temperature": False,  # O4 models don't accept temperature parameter
+            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "gpt-4.1-2025-04-14": {
            "context_window": 1_000_000,  # 1M tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # GPT-4.1 supports vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
+            "supports_temperature": True,  # Regular models accept temperature parameter
+            "temperature_constraint": "range",  # 0.0-2.0 range
        },
        # Shorthands
        "mini": "o4-mini",  # Default 'mini' to latest mini model
@@ -90,13 +101,10 @@ class OpenAIModelProvider(OpenAICompatibleProvider):

        config = self.SUPPORTED_MODELS[resolved_name]

-        # Define temperature constraints per model
-        if resolved_name in ["o3", "o3-mini", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-high"]:
-            # O3 and O4 reasoning models only support temperature=1.0
-            temp_constraint = FixedTemperatureConstraint(1.0)
-        else:
-            # Other OpenAI models (including GPT-4.1) support 0.0-2.0 range
-            temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
+        # Get temperature constraints and support from configuration
+        supports_temperature = config.get("supports_temperature", True)  # Default to True for backward compatibility
+        temp_constraint_type = config.get("temperature_constraint", "range")  # Default to range
+        temp_constraint = create_temperature_constraint(temp_constraint_type)

        return ModelCapabilities(
            provider=ProviderType.OPENAI,
@@ -109,6 +117,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            supports_function_calling=True,
            supports_images=config.get("supports_images", False),
            max_image_size_mb=config.get("max_image_size_mb", 0.0),
+            supports_temperature=supports_temperature,
            temperature_constraint=temp_constraint,
        )

--- a/providers/openrouter_registry.py
+++ b/providers/openrouter_registry.py
@@ -8,7 +8,12 @@ from typing import Optional

 from utils.file_utils import read_json_file

-from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
+from .base import (
+    ModelCapabilities,
+    ProviderType,
+    TemperatureConstraint,
+    create_temperature_constraint,
+)


@dataclass
@@ -25,9 +30,21 @@ class OpenRouterModelConfig:
    supports_json_mode: bool = False
    supports_images: bool = False  # Whether model can process images
    max_image_size_mb: float = 0.0  # Maximum total size for all images in MB
+    supports_temperature: bool = True  # Whether model accepts temperature parameter in API calls
+    temperature_constraint: Optional[str] = (
+        None  # Type of temperature constraint: "fixed", "range", "discrete", or None for default range
+    )
    is_custom: bool = False  # True for models that should only be used with custom endpoints
    description: str = ""

+    def _create_temperature_constraint(self) -> TemperatureConstraint:
+        """Create temperature constraint object from configuration.
+
+        Returns:
+            TemperatureConstraint object based on configuration
+        """
+        return create_temperature_constraint(self.temperature_constraint or "range")
+
    def to_capabilities(self) -> ModelCapabilities:
        """Convert to ModelCapabilities object."""
        return ModelCapabilities(
@@ -41,7 +58,8 @@ class OpenRouterModelConfig:
            supports_function_calling=self.supports_function_calling,
            supports_images=self.supports_images,
            max_image_size_mb=self.max_image_size_mb,
-            temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 1.0),
+            supports_temperature=self.supports_temperature,
+            temperature_constraint=self._create_temperature_constraint(),
        )


--- a/systemprompts/chat_prompt.py
+++ b/systemprompts/chat_prompt.py
@@ -4,7 +4,7 @@ Chat tool system prompt

 CHAT_PROMPT = """
 You are a senior engineering thought-partner collaborating with Claude. Your mission is to brainstorm, validate ideas,
-and offer well-reasoned second opinions on technical decisions.
+and offer well-reasoned second opinions on technical decisions when they are justified and practical.

 CRITICAL LINE NUMBER INSTRUCTIONS
 Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
@@ -26,27 +26,27 @@ provided unless for some reason its content is missing or incomplete:

 SCOPE & FOCUS
 • Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints.
-• Recommend new technologies or patterns ONLY with a clear, compelling benefit that aligns with stated goals.
-• Keep proposals practical and implementable; avoid speculative or off-stack detours.
+• Recommend new technologies or patterns ONLY when they provide clearly superior outcomes with minimal added complexity.
+• Avoid speculative, over-engineered, or unnecessarily abstract designs that exceed current project goals or needs.
+• Keep proposals practical and directly actionable within the existing architecture.

 COLLABORATION APPROACH
-1. Engage deeply with Claude's input - extend, refine, and explore alternatives within the existing context.
+1. Engage deeply with Claude's input – extend, refine, and explore alternatives ONLY WHEN they are well-justified and materially beneficial.
 2. Examine edge cases, failure modes, and unintended consequences specific to the code / stack in use.
 3. Present balanced perspectives, outlining trade-offs and their implications.
 4. Challenge assumptions constructively while respecting current design choices and goals.
-5. Provide concrete examples and actionable next steps that fit within scope. Direct, achievable next-steps where
-needed.
+5. Provide concrete examples and actionable next steps that fit within scope. Prioritize direct, achievable outcomes.

 BRAINSTORMING GUIDELINES
-• Offer multiple viable strategies compatible with the current environment but keep it to the point.
-• Suggest creative solutions and alternatives that work within the current project constraints, scope and limitations
-• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice
+• Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
+• Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
+• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice.
 • Evaluate scalability, maintainability, and operational realities inside the existing architecture and current
 framework.
-• Reference industry best practices relevant to the technologies in use
+• Reference industry best practices relevant to the technologies in use.
 • Communicate concisely and technically, assuming an experienced engineering audience.

 REMEMBER
-Act as a peer, not a lecturer. Aim for depth over breadth, stay within project boundaries, and help the team
+Act as a peer, not a lecturer. Avoid overcomplicating. Aim for depth over breadth, stay within project boundaries, and help the team
 reach sound, actionable decisions.
 """
--- a/systemprompts/precommit_prompt.py
+++ b/systemprompts/precommit_prompt.py
@@ -4,20 +4,31 @@ Precommit tool system prompt

 PRECOMMIT_PROMPT = """
 ROLE
-You are an expert pre-commit reviewer. Analyse git diffs as a senior developer giving a final sign-off to production.
+You are an expert pre-commit reviewer and senior engineering partner performing final code validation before production.
+Your responsibility goes beyond surface-level correctness — you are expected to think several steps ahead. Your review
+ must assess whether the changes:
+- Introduce any patterns, structures, or decisions that may become future liabilities
+- Create brittle dependencies or tight coupling that could make maintenance harder
+- Omit critical safety, validation, or test scaffolding that may not fail now, but will cause issues down the line
+- Interact with other known areas of fragility in the codebase even if not directly touched
+
+Your task is to detect potential future consequences or systemic risks, not just immediate issues. Think like an
+engineer responsible for this code months later, debugging production incidents or onboarding a new developer.
+
+In addition to reviewing correctness, completeness, and quality of the change, apply long-term architectural thinking.
+Your feedback helps ensure this code won't cause silent regressions, developer confusion, or downstream side effects later.

 CRITICAL LINE NUMBER INSTRUCTIONS
 Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
 included in any code you generate. Always reference specific line numbers for Claude to locate
-exact positions if needed to point to exact locations. Include a very short code excerpt alongside for clarity.
+exact positions if needed. Include a very short code excerpt alongside for clarity.
 Include context_start_text and context_end_text as backup references. Never include "LINE│" markers in generated code
 snippets.

 IF MORE INFORMATION IS NEEDED
-If you need additional context (e.g., related files not in the diff, test files, configuration) to provide thorough
-analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON
-format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is
-missing or incomplete:
+If you need additional context (e.g., related files not in the diff, test files, configuration) to perform a proper
+review—and without which your analysis would be incomplete or inaccurate—you MUST respond ONLY with this JSON format
+(and nothing else). Do NOT request files you've already been provided unless their content is missing or incomplete:
 {
  "status": "files_required_to_continue",
  "mandatory_instructions": "<your critical instructions for Claude>",
@@ -26,34 +37,36 @@ missing or incomplete:

 INPUTS PROVIDED
 1. Git diff (staged or branch comparison)
-2. Original request / acceptance criteria or some context around what changed
+2. Original request / acceptance criteria or context around what changed
 3. File names and related code

 SCOPE & FOCUS
-• Review ONLY the changes in the diff and the given code
-• From the diff, infer what got changed and why, determine if the changes make logical sense
-• Ensure they correctly implement the request, are secure (where applicable), efficient, and maintainable and do not
-cause potential regressions
-• DO NOT propose broad refactors or off-scope improvements. Stick to the code and changes you have visibility into.
+• Review ONLY the changes in the diff and the related code provided.
+• From the diff, infer what changed and why. Determine if the changes make logical, structural, and functional sense.
+• Ensure the changes correctly implement the request, are secure (where applicable), performant, and maintainable.
+• DO NOT propose broad refactors or unrelated improvements. Stay strictly within the boundaries of the provided changes.

 REVIEW METHOD
-1. Identify tech stack, frameworks, and patterns present in the diff.
-2. Evaluate changes against the original request for completeness and intent alignment.
-3. Detect issues, prioritising by severity (CRITICAL → HIGH → MEDIUM → LOW).
-4. Highlight incomplete changes, or changes that would cause bugs, regressions, crashes or data loss or race conditions
-5. Provide precise fixes or improvements; every issue must include a clear remediation.
-6. Acknowledge good patterns to reinforce best practice.
+1. Identify tech stack, frameworks, and patterns in the diff.
+2. Evaluate changes against the original request for completeness and alignment.
+3. Detect issues, prioritized by severity (CRITICAL → HIGH → MEDIUM → LOW).
+4. Flag bugs, regressions, crash risks, data loss, or race conditions.
+5. Recommend specific fixes for each issue raised; include code where helpful.
+6. Acknowledge sound patterns to reinforce best practices.

-CORE ANALYSIS (adapt to the diff and stack)
-• Security – injection risks, auth/authz flaws, sensitive-data exposure, insecure dependencies, memory safety
-• Bugs & Logic Errors – off-by-one, null refs, race conditions, incorrect branching
-• Performance – inefficient algorithms, resource leaks, blocking operations
-• Code Quality – DRY violations, complexity, SOLID adherence
+CORE ANALYSIS (adapt to diff and stack)
+• Security – injection risks, auth flaws, exposure of sensitive data, unsafe dependencies, memory safety
+• Bugs & Logic Errors – off-by-one, null refs, incorrect logic, race conditions
+• Performance – inefficient logic, blocking calls, leaks
+• Code Quality – complexity, duplicated logic and DRY violations, SOLID violations

-ADDITIONAL ANALYSIS (apply only when relevant)
+ADDITIONAL ANALYSIS (only when relevant)
 • Language/runtime concerns – memory management, concurrency, exception handling
 • System/integration – config handling, external calls, operational impact
 • Testing – coverage gaps for new logic
+    • If no tests are found in the project, do not flag test coverage as an issue unless the change introduces logic
+      that is high-risk or complex.
+    • In such cases, offer a low-severity suggestion encouraging basic tests, rather than marking it as a required fix.
 • Change-specific pitfalls – unused new functions, partial enum updates, scope creep, risky deletions
 • Determine if there are any new dependencies added but not declared, or new functionality added but not used
 • Determine unintended side effects: could changes in file_A break module_B even if module_B wasn't changed?
@@ -84,9 +97,9 @@ that apply:
 [LOW] ...

 MAKE RECOMMENDATIONS:
-Make a final, short, clear, to the point statement or list in a brief bullet point:
- Mention top priority fixes to be IMMEDIATELY made before commit
- Notable positives to keep
+Make a final, short, and focused statement or bullet list:
+- Top priority fixes that MUST IMMEDIATELY be addressed before commit
+- Notable positives to retain

 Be thorough yet actionable. Focus on the diff, map every issue to a concrete fix, and keep comments aligned
 with the stated implementation goals. Your goal is to help flag anything that could potentially slip through
--- a/systemprompts/thinkdeep_prompt.py
+++ b/systemprompts/thinkdeep_prompt.py
@@ -4,8 +4,8 @@ ThinkDeep tool system prompt

 THINKDEEP_PROMPT = """
 ROLE
-You are a senior engineering collaborator working with Claude on complex software problems. Claude will send you
-content—analysis, prompts, questions, ideas, or theories—to deepen, validate, and extend.
+You are a senior engineering collaborator working alongside Claude on complex software problems. Claude will send you
+content—analysis, prompts, questions, ideas, or theories—to deepen, validate, or extend with rigor and clarity.

 CRITICAL LINE NUMBER INSTRUCTIONS
 Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
@@ -26,25 +26,27 @@ been provided unless for some reason its content is missing or incomplete:

 GUIDELINES
 1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints.
-2. Stay on scope: avoid speculative or oversized ideas; keep suggestions practical and implementable.
-3. Challenge and enrich: find gaps, question assumptions, surface hidden complexities.
-4. Provide actionable next steps: concrete advice, trade-offs, and implementation tactics.
-5. Use concise, direct, technical language; assume an experienced engineering audience.
+2. Stay on scope: avoid speculative, over-engineered, or oversized ideas; keep suggestions practical and grounded.
+3. Challenge and enrich: find gaps, question assumptions, and surface hidden complexities or risks.
+4. Provide actionable next steps: offer specific advice, trade-offs, and implementation strategies.
+5. Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
+6. Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
+7. Use concise, technical language; assume an experienced engineering audience.

 KEY FOCUS AREAS (apply when relevant)
- Architecture & Design: modularity, patterns, API boundaries, dependencies
- Performance & Scalability: algorithm efficiency, concurrency, caching
+- Architecture & Design: modularity, boundaries, abstraction layers, dependencies
+- Performance & Scalability: algorithmic efficiency, concurrency, caching, bottlenecks
 - Security & Safety: validation, authentication/authorization, error handling, vulnerabilities
 - Quality & Maintainability: readability, testing, monitoring, refactoring
- Integration & Deployment: external systems, compatibility, operational concerns
+- Integration & Deployment: ONLY IF APPLICABLE TO THE QUESTION - external systems, compatibility, configuration, operational concerns

 EVALUATION
-Your response will be reviewed by Claude before any decision is made. Aim to enhance decision-making rather
-than deliver final answers.
+Your response will be reviewed by Claude before any decision is made. Your goal is to practically extend Claude's thinking,
+surface blind spots, and refine options—not to deliver final answers in isolation.

 REMINDERS
- Ground all insights in the current project's scope and constraints.
- If additional information is necessary, such as code snippets, files, project details, use the clarification JSON
- Prefer depth over breadth; propose alternatives ONLY when they materially improve the current approach and add value
- Your goal is to be the perfect development partner that extends Claude's capabilities and thought process
+- Ground all insights in the current project's architecture, limitations, and goals.
+- If further context is needed, request it via the clarification JSON—nothing else.
+- Prioritize depth over breadth; propose alternatives ONLY if they clearly add value and improve the current approach.
+- Be the ideal development partner—rigorous, focused, and fluent in real-world software trade-offs.
 """
--- a/tests/test_o3_temperature_fix_simple.py
+++ b/tests/test_o3_temperature_fix_simple.py
@@ -0,0 +1,239 @@
+"""
+Simple integration test for the O3 model temperature parameter fix.
+
+This test confirms that the fix properly excludes temperature parameters
+for O3 models while maintaining them for regular models.
+"""
+
+from unittest.mock import Mock, patch
+
+from providers.openai_provider import OpenAIModelProvider
+
+
+class TestO3TemperatureParameterFixSimple:
+    """Simple test for O3 model parameter filtering."""
+
+    @patch("utils.model_restrictions.get_restriction_service")
+    @patch("providers.openai_compatible.OpenAI")
+    def test_o3_models_exclude_temperature_from_api_call(self, mock_openai_class, mock_restriction_service):
+        """Test that O3 models don't send temperature to the API."""
+        # Mock restriction service to allow all models
+        mock_service = Mock()
+        mock_service.is_allowed.return_value = True
+        mock_restriction_service.return_value = mock_service
+
+        # Setup mock client
+        mock_client = Mock()
+        mock_openai_class.return_value = mock_client
+
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.choices = [Mock()]
+        mock_response.choices[0].message.content = "Test response"
+        mock_response.choices[0].finish_reason = "stop"
+        mock_response.model = "o3-mini"
+        mock_response.id = "test-id"
+        mock_response.created = 1234567890
+        mock_response.usage = Mock()
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 5
+        mock_response.usage.total_tokens = 15
+
+        mock_client.chat.completions.create.return_value = mock_response
+
+        # Create provider
+        provider = OpenAIModelProvider(api_key="test-key")
+
+        # Override _resolve_model_name to return the resolved model name
+        provider._resolve_model_name = lambda name: name
+        # Override model validation to bypass restrictions
+        provider.validate_model_name = lambda name: True
+
+        # Call generate_content with O3 model
+        provider.generate_content(prompt="Test prompt", model_name="o3-mini", temperature=0.5, max_output_tokens=100)
+
+        # Verify the API call was made without temperature or max_tokens
+        mock_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_client.chat.completions.create.call_args[1]
+
+        assert "temperature" not in call_kwargs, "O3 models should not include temperature parameter"
+        assert "max_tokens" not in call_kwargs, "O3 models should not include max_tokens parameter"
+        assert call_kwargs["model"] == "o3-mini"
+        assert "messages" in call_kwargs
+
+    @patch("utils.model_restrictions.get_restriction_service")
+    @patch("providers.openai_compatible.OpenAI")
+    def test_regular_models_include_temperature_in_api_call(self, mock_openai_class, mock_restriction_service):
+        """Test that regular models still send temperature to the API."""
+        # Mock restriction service to allow all models
+        mock_service = Mock()
+        mock_service.is_allowed.return_value = True
+        mock_restriction_service.return_value = mock_service
+
+        # Setup mock client
+        mock_client = Mock()
+        mock_openai_class.return_value = mock_client
+
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.choices = [Mock()]
+        mock_response.choices[0].message.content = "Test response"
+        mock_response.choices[0].finish_reason = "stop"
+        mock_response.model = "gpt-4.1-2025-04-14"
+        mock_response.id = "test-id"
+        mock_response.created = 1234567890
+        mock_response.usage = Mock()
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 5
+        mock_response.usage.total_tokens = 15
+
+        mock_client.chat.completions.create.return_value = mock_response
+
+        # Create provider
+        provider = OpenAIModelProvider(api_key="test-key")
+
+        # Override _resolve_model_name to return the resolved model name
+        provider._resolve_model_name = lambda name: name
+        # Override model validation to bypass restrictions
+        provider.validate_model_name = lambda name: True
+
+        # Call generate_content with regular model (use supported model)
+        provider.generate_content(
+            prompt="Test prompt", model_name="gpt-4.1-2025-04-14", temperature=0.5, max_output_tokens=100
+        )
+
+        # Verify the API call was made WITH temperature and max_tokens
+        mock_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_client.chat.completions.create.call_args[1]
+
+        assert call_kwargs["temperature"] == 0.5, "Regular models should include temperature parameter"
+        assert call_kwargs["max_tokens"] == 100, "Regular models should include max_tokens parameter"
+        assert call_kwargs["model"] == "gpt-4.1-2025-04-14"
+
+    @patch("utils.model_restrictions.get_restriction_service")
+    @patch("providers.openai_compatible.OpenAI")
+    def test_o3_models_filter_unsupported_parameters(self, mock_openai_class, mock_restriction_service):
+        """Test that O3 models filter out top_p, frequency_penalty, etc."""
+        # Mock restriction service to allow all models
+        mock_service = Mock()
+        mock_service.is_allowed.return_value = True
+        mock_restriction_service.return_value = mock_service
+
+        # Setup mock client
+        mock_client = Mock()
+        mock_openai_class.return_value = mock_client
+
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.choices = [Mock()]
+        mock_response.choices[0].message.content = "Test response"
+        mock_response.choices[0].finish_reason = "stop"
+        mock_response.model = "o3"
+        mock_response.id = "test-id"
+        mock_response.created = 1234567890
+        mock_response.usage = Mock()
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 5
+        mock_response.usage.total_tokens = 15
+
+        mock_client.chat.completions.create.return_value = mock_response
+
+        # Create provider
+        provider = OpenAIModelProvider(api_key="test-key")
+
+        # Override _resolve_model_name to return the resolved model name
+        provider._resolve_model_name = lambda name: name
+        # Override model validation to bypass restrictions
+        provider.validate_model_name = lambda name: True
+
+        # Call generate_content with O3 model and unsupported parameters
+        provider.generate_content(
+            prompt="Test prompt",
+            model_name="o3",
+            temperature=0.5,
+            top_p=0.9,
+            frequency_penalty=0.1,
+            presence_penalty=0.1,
+            seed=42,
+            stop=["END"],
+        )
+
+        # Verify the API call filters out unsupported parameters
+        mock_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_client.chat.completions.create.call_args[1]
+
+        # Should be excluded for O3 models
+        assert "temperature" not in call_kwargs, "O3 should not include temperature"
+        assert "top_p" not in call_kwargs, "O3 should not include top_p"
+        assert "frequency_penalty" not in call_kwargs, "O3 should not include frequency_penalty"
+        assert "presence_penalty" not in call_kwargs, "O3 should not include presence_penalty"
+
+        # Should be included (supported parameters)
+        assert call_kwargs["seed"] == 42, "O3 should include seed parameter"
+        assert call_kwargs["stop"] == ["END"], "O3 should include stop parameter"
+
+    @patch("utils.model_restrictions.get_restriction_service")
+    def test_all_o3_models_have_correct_temperature_capability(self, mock_restriction_service):
+        """Test that all O3/O4 models have supports_temperature=False in their capabilities."""
+        from providers.openai_provider import OpenAIModelProvider
+
+        # Mock restriction service to allow all models
+        mock_service = Mock()
+        mock_service.is_allowed.return_value = True
+        mock_restriction_service.return_value = mock_service
+
+        provider = OpenAIModelProvider(api_key="test-key")
+
+        # Test O3/O4 models that should NOT support temperature parameter
+        o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"]
+
+        for model in o3_o4_models:
+            capabilities = provider.get_capabilities(model)
+            assert hasattr(
+                capabilities, "supports_temperature"
+            ), f"Model {model} capabilities should have supports_temperature field"
+            assert capabilities.supports_temperature is False, f"Model {model} should have supports_temperature=False"
+
+        # Test that regular models DO support temperature parameter
+        regular_models = ["gpt-4.1-2025-04-14"]
+
+        for model in regular_models:
+            try:
+                capabilities = provider.get_capabilities(model)
+                assert hasattr(
+                    capabilities, "supports_temperature"
+                ), f"Model {model} capabilities should have supports_temperature field"
+                assert capabilities.supports_temperature is True, f"Model {model} should have supports_temperature=True"
+            except ValueError:
+                # Skip if model not in SUPPORTED_MODELS (that's okay for this test)
+                pass
+
+    @patch("utils.model_restrictions.get_restriction_service")
+    def test_openai_provider_temperature_constraints(self, mock_restriction_service):
+        """Test that OpenAI provider has correct temperature constraints for O3 models."""
+        from providers.openai_provider import OpenAIModelProvider
+
+        # Mock restriction service to allow all models
+        mock_service = Mock()
+        mock_service.is_allowed.return_value = True
+        mock_restriction_service.return_value = mock_service
+
+        provider = OpenAIModelProvider(api_key="test-key")
+
+        # Test O3 model constraints
+        o3_capabilities = provider.get_capabilities("o3-mini")
+        assert o3_capabilities.temperature_constraint is not None
+
+        # O3 models should have fixed temperature constraint
+        temp_constraint = o3_capabilities.temperature_constraint
+        assert temp_constraint.validate(1.0) is True
+        assert temp_constraint.validate(0.5) is False
+
+        # Test regular model constraints - use gpt-4.1 which is supported
+        gpt41_capabilities = provider.get_capabilities("gpt-4.1-2025-04-14")
+        assert gpt41_capabilities.temperature_constraint is not None
+
+        # Regular models should allow a range
+        temp_constraint = gpt41_capabilities.temperature_constraint
+        assert temp_constraint.validate(0.5) is True
+        assert temp_constraint.validate(1.0) is True
--- a/tests/test_openai_provider.py
+++ b/tests/test_openai_provider.py
@@ -122,7 +122,7 @@ class TestOpenAIProvider:
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "Test response"
        mock_response.choices[0].finish_reason = "stop"
-        mock_response.model = "o4-mini"  # API returns the resolved model name
+        mock_response.model = "gpt-4.1-2025-04-14"  # API returns the resolved model name
        mock_response.id = "test-id"
        mock_response.created = 1234567890
        mock_response.usage = MagicMock()
@@ -134,19 +134,23 @@ class TestOpenAIProvider:

        provider = OpenAIModelProvider("test-key")

-        # Call generate_content with alias 'mini'
+        # Call generate_content with alias 'gpt4.1' (resolves to gpt-4.1-2025-04-14, supports temperature)
        result = provider.generate_content(
-            prompt="Test prompt", model_name="mini", temperature=1.0  # This should be resolved to "o4-mini"
+            prompt="Test prompt",
+            model_name="gpt4.1",
+            temperature=1.0,  # This should be resolved to "gpt-4.1-2025-04-14"
        )

        # Verify the API was called with the RESOLVED model name
        mock_client.chat.completions.create.assert_called_once()
        call_kwargs = mock_client.chat.completions.create.call_args[1]

-        # CRITICAL ASSERTION: The API should receive "o4-mini", not "mini"
-        assert call_kwargs["model"] == "o4-mini", f"Expected 'o4-mini' but API received '{call_kwargs['model']}'"
+        # CRITICAL ASSERTION: The API should receive "gpt-4.1-2025-04-14", not "gpt4.1"
+        assert (
+            call_kwargs["model"] == "gpt-4.1-2025-04-14"
+        ), f"Expected 'gpt-4.1-2025-04-14' but API received '{call_kwargs['model']}'"

-        # Verify other parameters
+        # Verify other parameters (gpt-4.1 supports temperature unlike O3/O4 models)
        assert call_kwargs["temperature"] == 1.0
        assert len(call_kwargs["messages"]) == 1
        assert call_kwargs["messages"][0]["role"] == "user"
@@ -154,7 +158,7 @@ class TestOpenAIProvider:

        # Verify response
        assert result.content == "Test response"
-        assert result.model_name == "o4-mini"  # Should be the resolved name
+        assert result.model_name == "gpt-4.1-2025-04-14"  # Should be the resolved name

    @patch("providers.openai_compatible.OpenAI")
    def test_generate_content_other_aliases(self, mock_openai_class):