Added proper temperature constraints to the model, fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/78

Prompt tweaks
2025-06-19 08:30:46 +04:00
parent ec3a466b1c
commit 9f3b70d6d7
13 changed files with 435 additions and 79 deletions
--- a/conf/custom_models.json
+++ b/conf/custom_models.json
@@ -27,6 +27,8 @@
      "supports_function_calling": "Whether the model supports function/tool calling",
      "supports_images": "Whether the model can process images/visual input",
      "max_image_size_mb": "Maximum total size in MB for all images combined (capped at 40MB max for custom models)",
      "supports_temperature": "Whether the model accepts temperature parameter in API calls (set to false for O3/O4 reasoning models)",
      "temperature_constraint": "Type of temperature constraint: 'fixed' (fixed value), 'range' (continuous range), 'discrete' (specific values), or omit for default range",
      "is_custom": "Set to true for models that should ONLY be used with custom API endpoints (Ollama, vLLM, etc.). False or omitted for OpenRouter/cloud models.",
      "description": "Human-readable description of the model"
    },
@@ -39,6 +41,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 10.0,
      "supports_temperature": true,
      "temperature_constraint": "range",
      "is_custom": true,
      "description": "Example custom/local model for Ollama, vLLM, etc."
    }
@@ -152,6 +156,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
      "description": "OpenAI's o3 model - well-rounded and powerful across domains with vision"
    },
    {
@@ -163,6 +169,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
      "description": "OpenAI's o3-mini model - balanced performance and speed with vision"
    },
    {
@@ -174,6 +182,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
      "description": "OpenAI's o3-mini with high reasoning effort - optimized for complex problems with vision"
    },
    {
@@ -185,6 +195,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
      "description": "OpenAI's o3-pro model - professional-grade reasoning and analysis with vision"
    },
    {
@@ -196,6 +208,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
      "description": "OpenAI's o4-mini model - optimized for shorter contexts with rapid reasoning and vision"
    },
    {
@@ -207,6 +221,8 @@
      "supports_function_calling": true,
      "supports_images": true,
      "max_image_size_mb": 20.0,
      "supports_temperature": false,
      "temperature_constraint": "fixed",
      "description": "OpenAI's o4-mini with high reasoning effort - enhanced for complex tasks with vision"
    },
    {
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.1.3"
+__version__ = "5.1.4"
 # Last update date in ISO format
 __updated__ = "2025-06-19"
 # Primary maintainer
--- a/providers/base.py
+++ b/providers/base.py
@@ -100,6 +100,26 @@ class DiscreteTemperatureConstraint(TemperatureConstraint):
        return self.default_temp
 def create_temperature_constraint(constraint_type: str) -> TemperatureConstraint:
    """Create temperature constraint object from configuration string.
    Args:
        constraint_type: Type of constraint ("fixed", "range", "discrete")
    Returns:
        TemperatureConstraint object based on configuration
    """
    if constraint_type == "fixed":
        # Fixed temperature models (O3/O4) only support temperature=1.0
        return FixedTemperatureConstraint(1.0)
    elif constraint_type == "discrete":
        # For models with specific allowed values - using common OpenAI values as default
        return DiscreteTemperatureConstraint([0.0, 0.3, 0.7, 1.0, 1.5, 2.0], 0.7)
    else:
        # Default range constraint (for "range" or None)
        return RangeTemperatureConstraint(0.0, 2.0, 0.7)
@dataclass
 class ModelCapabilities:
    """Capabilities and constraints for a specific model."""
@@ -114,6 +134,7 @@ class ModelCapabilities:
    supports_function_calling: bool = False
    supports_images: bool = False  # Whether model can process images
    max_image_size_mb: float = 0.0  # Maximum total size for all images in MB
    supports_temperature: bool = True  # Whether model accepts temperature parameter in API calls
    # Temperature constraint object - preferred way to define temperature limits
    temperature_constraint: TemperatureConstraint = field(
@@ -245,3 +266,17 @@ class ModelProvider(ABC):
            List of all model names and alias targets known by this provider
        """
        pass
    def _resolve_model_name(self, model_name: str) -> str:
        """Resolve model shorthand to full name.
        Base implementation returns the model name unchanged.
        Subclasses should override to provide alias resolution.
        Args:
            model_name: Model name that may be an alias
        Returns:
            Resolved model name
        """
        return model_name
--- a/providers/custom.py
+++ b/providers/custom.py
@@ -162,6 +162,7 @@ class CustomProvider(OpenAICompatibleProvider):
                supports_system_prompts=True,
                supports_streaming=True,
                supports_function_calling=False,  # Conservative default
                supports_temperature=True,  # Most custom models accept temperature parameter
                temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 0.7),
            )
--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -94,6 +94,7 @@ class GeminiModelProvider(ModelProvider):
            supports_function_calling=True,
            supports_images=config.get("supports_images", False),
            max_image_size_mb=config.get("max_image_size_mb", 0.0),
            supports_temperature=True,  # Gemini models accept temperature parameter
            temperature_constraint=temp_constraint,
        )
--- a/providers/openai_compatible.py
+++ b/providers/openai_compatible.py
@@ -448,23 +448,41 @@ class OpenAICompatibleProvider(ModelProvider):
        completion_params = {
            "model": model_name,
            "messages": messages,
            "temperature": temperature,
        }
-        # Add max tokens if specified
+        # Check model capabilities once to determine parameter support
-        if max_output_tokens:
+        resolved_model = self._resolve_model_name(model_name)
        # Get model capabilities once to avoid duplicate calls
        try:
            capabilities = self.get_capabilities(model_name)
            # Defensive check for supports_temperature field (backward compatibility)
            supports_temperature = getattr(capabilities, "supports_temperature", True)
        except Exception as e:
            # If capability check fails, fall back to conservative behavior
            # Default to including temperature for most models (backward compatibility)
            logging.debug(f"Failed to check temperature support for {model_name}: {e}")
            supports_temperature = True
        # Add temperature parameter if supported
        if supports_temperature:
            completion_params["temperature"] = temperature
        # Add max tokens if specified and model supports it
        # O3/O4 models that don't support temperature also don't support max_tokens
        if max_output_tokens and supports_temperature:
            completion_params["max_tokens"] = max_output_tokens
        # Add any additional OpenAI-specific parameters
        # Use capabilities to filter parameters for reasoning models
        for key, value in kwargs.items():
            if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
                # Reasoning models (those that don't support temperature) also don't support these parameters
                if not supports_temperature and key in ["top_p", "frequency_penalty", "presence_penalty"]:
                    continue  # Skip unsupported parameters for reasoning models
                completion_params[key] = value
        # Check if this is o3-pro and needs the responses endpoint
        resolved_model = model_name
        if hasattr(self, "_resolve_model_name"):
            resolved_model = self._resolve_model_name(model_name)
        if resolved_model == "o3-pro-2025-06-10":
            # This model requires the /v1/responses endpoint
            # If it fails, we should not fall back to chat/completions
--- a/providers/openai_provider.py
+++ b/providers/openai_provider.py
@@ -4,11 +4,10 @@ import logging
 from typing import Optional
 from .base import (
    FixedTemperatureConstraint,
    ModelCapabilities,
    ModelResponse,
    ProviderType,
-    RangeTemperatureConstraint,
+    create_temperature_constraint,
 )
 from .openai_compatible import OpenAICompatibleProvider
@@ -25,18 +24,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            "supports_extended_thinking": False,
            "supports_images": True,  # O3 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
            "supports_temperature": False,  # O3 models don't accept temperature parameter
            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "o3-mini": {
            "context_window": 200_000,  # 200K tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # O3 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
            "supports_temperature": False,  # O3 models don't accept temperature parameter
            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "o3-pro-2025-06-10": {
            "context_window": 200_000,  # 200K tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # O3 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
            "supports_temperature": False,  # O3 models don't accept temperature parameter
            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        # Aliases
        "o3-pro": "o3-pro-2025-06-10",
@@ -45,18 +50,24 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            "supports_extended_thinking": False,
            "supports_images": True,  # O4 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
            "supports_temperature": False,  # O4 models don't accept temperature parameter
            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "o4-mini-high": {
            "context_window": 200_000,  # 200K tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # O4 models support vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
            "supports_temperature": False,  # O4 models don't accept temperature parameter
            "temperature_constraint": "fixed",  # Fixed at 1.0
        },
        "gpt-4.1-2025-04-14": {
            "context_window": 1_000_000,  # 1M tokens
            "supports_extended_thinking": False,
            "supports_images": True,  # GPT-4.1 supports vision
            "max_image_size_mb": 20.0,  # 20MB per OpenAI docs
            "supports_temperature": True,  # Regular models accept temperature parameter
            "temperature_constraint": "range",  # 0.0-2.0 range
        },
        # Shorthands
        "mini": "o4-mini",  # Default 'mini' to latest mini model
@@ -90,13 +101,10 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
        config = self.SUPPORTED_MODELS[resolved_name]
-        # Define temperature constraints per model
+        # Get temperature constraints and support from configuration
-        if resolved_name in ["o3", "o3-mini", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-high"]:
+        supports_temperature = config.get("supports_temperature", True)  # Default to True for backward compatibility
-            # O3 and O4 reasoning models only support temperature=1.0
+        temp_constraint_type = config.get("temperature_constraint", "range")  # Default to range
-            temp_constraint = FixedTemperatureConstraint(1.0)
+        temp_constraint = create_temperature_constraint(temp_constraint_type)
        else:
            # Other OpenAI models (including GPT-4.1) support 0.0-2.0 range
            temp_constraint = RangeTemperatureConstraint(0.0, 2.0, 0.7)
        return ModelCapabilities(
            provider=ProviderType.OPENAI,
@@ -109,6 +117,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
            supports_function_calling=True,
            supports_images=config.get("supports_images", False),
            max_image_size_mb=config.get("max_image_size_mb", 0.0),
            supports_temperature=supports_temperature,
            temperature_constraint=temp_constraint,
        )
--- a/providers/openrouter_registry.py
+++ b/providers/openrouter_registry.py
@@ -8,7 +8,12 @@ from typing import Optional
 from utils.file_utils import read_json_file
-from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
+from .base import (
    ModelCapabilities,
    ProviderType,
    TemperatureConstraint,
    create_temperature_constraint,
 )
@dataclass
@@ -25,9 +30,21 @@ class OpenRouterModelConfig:
    supports_json_mode: bool = False
    supports_images: bool = False  # Whether model can process images
    max_image_size_mb: float = 0.0  # Maximum total size for all images in MB
    supports_temperature: bool = True  # Whether model accepts temperature parameter in API calls
    temperature_constraint: Optional[str] = (
        None  # Type of temperature constraint: "fixed", "range", "discrete", or None for default range
    )
    is_custom: bool = False  # True for models that should only be used with custom endpoints
    description: str = ""
    def _create_temperature_constraint(self) -> TemperatureConstraint:
        """Create temperature constraint object from configuration.
        Returns:
            TemperatureConstraint object based on configuration
        """
        return create_temperature_constraint(self.temperature_constraint or "range")
    def to_capabilities(self) -> ModelCapabilities:
        """Convert to ModelCapabilities object."""
        return ModelCapabilities(
@@ -41,7 +58,8 @@ class OpenRouterModelConfig:
            supports_function_calling=self.supports_function_calling,
            supports_images=self.supports_images,
            max_image_size_mb=self.max_image_size_mb,
-            temperature_constraint=RangeTemperatureConstraint(0.0, 2.0, 1.0),
+            supports_temperature=self.supports_temperature,
            temperature_constraint=self._create_temperature_constraint(),
        )
--- a/systemprompts/chat_prompt.py
+++ b/systemprompts/chat_prompt.py
@@ -4,7 +4,7 @@ Chat tool system prompt
 CHAT_PROMPT = """
 You are a senior engineering thought-partner collaborating with Claude. Your mission is to brainstorm, validate ideas,
-and offer well-reasoned second opinions on technical decisions.
+and offer well-reasoned second opinions on technical decisions when they are justified and practical.
 CRITICAL LINE NUMBER INSTRUCTIONS
 Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
@@ -26,27 +26,27 @@ provided unless for some reason its content is missing or incomplete:
 SCOPE & FOCUS
 • Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints.
-• Recommend new technologies or patterns ONLY with a clear, compelling benefit that aligns with stated goals.
+• Recommend new technologies or patterns ONLY when they provide clearly superior outcomes with minimal added complexity.
-• Keep proposals practical and implementable; avoid speculative or off-stack detours.
+• Avoid speculative, over-engineered, or unnecessarily abstract designs that exceed current project goals or needs.
 • Keep proposals practical and directly actionable within the existing architecture.
 COLLABORATION APPROACH
-1. Engage deeply with Claude's input - extend, refine, and explore alternatives within the existing context.
+1. Engage deeply with Claude's input – extend, refine, and explore alternatives ONLY WHEN they are well-justified and materially beneficial.
 2. Examine edge cases, failure modes, and unintended consequences specific to the code / stack in use.
 3. Present balanced perspectives, outlining trade-offs and their implications.
 4. Challenge assumptions constructively while respecting current design choices and goals.
-5. Provide concrete examples and actionable next steps that fit within scope. Direct, achievable next-steps where
+5. Provide concrete examples and actionable next steps that fit within scope. Prioritize direct, achievable outcomes.
 needed.
 BRAINSTORMING GUIDELINES
-• Offer multiple viable strategies compatible with the current environment but keep it to the point.
+• Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
-• Suggest creative solutions and alternatives that work within the current project constraints, scope and limitations
+• Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
-• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice
+• Surface pitfalls early, particularly those tied to the chosen frameworks, languages, design direction or choice.
 • Evaluate scalability, maintainability, and operational realities inside the existing architecture and current
 framework.
-• Reference industry best practices relevant to the technologies in use
+• Reference industry best practices relevant to the technologies in use.
 • Communicate concisely and technically, assuming an experienced engineering audience.
 REMEMBER
-Act as a peer, not a lecturer. Aim for depth over breadth, stay within project boundaries, and help the team
+Act as a peer, not a lecturer. Avoid overcomplicating. Aim for depth over breadth, stay within project boundaries, and help the team
 reach sound, actionable decisions.
 """
--- a/systemprompts/precommit_prompt.py
+++ b/systemprompts/precommit_prompt.py
@@ -4,20 +4,31 @@ Precommit tool system prompt
 PRECOMMIT_PROMPT = """
 ROLE
-You are an expert pre-commit reviewer. Analyse git diffs as a senior developer giving a final sign-off to production.
+You are an expert pre-commit reviewer and senior engineering partner performing final code validation before production.
 Your responsibility goes beyond surface-level correctness — you are expected to think several steps ahead. Your review
 must assess whether the changes:
 - Introduce any patterns, structures, or decisions that may become future liabilities
 - Create brittle dependencies or tight coupling that could make maintenance harder
 - Omit critical safety, validation, or test scaffolding that may not fail now, but will cause issues down the line
 - Interact with other known areas of fragility in the codebase even if not directly touched
 Your task is to detect potential future consequences or systemic risks, not just immediate issues. Think like an
 engineer responsible for this code months later, debugging production incidents or onboarding a new developer.
 In addition to reviewing correctness, completeness, and quality of the change, apply long-term architectural thinking.
 Your feedback helps ensure this code won't cause silent regressions, developer confusion, or downstream side effects later.
 CRITICAL LINE NUMBER INSTRUCTIONS
 Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
 included in any code you generate. Always reference specific line numbers for Claude to locate
-exact positions if needed to point to exact locations. Include a very short code excerpt alongside for clarity.
+exact positions if needed. Include a very short code excerpt alongside for clarity.
 Include context_start_text and context_end_text as backup references. Never include "LINE│" markers in generated code
 snippets.
 IF MORE INFORMATION IS NEEDED
-If you need additional context (e.g., related files not in the diff, test files, configuration) to provide thorough
+If you need additional context (e.g., related files not in the diff, test files, configuration) to perform a proper
-analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON
+review—and without which your analysis would be incomplete or inaccurate—you MUST respond ONLY with this JSON format
-format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is
+(and nothing else). Do NOT request files you've already been provided unless their content is missing or incomplete:
 missing or incomplete:
 {
  "status": "files_required_to_continue",
  "mandatory_instructions": "<your critical instructions for Claude>",
@@ -26,34 +37,36 @@ missing or incomplete:
 INPUTS PROVIDED
 1. Git diff (staged or branch comparison)
-2. Original request / acceptance criteria or some context around what changed
+2. Original request / acceptance criteria or context around what changed
 3. File names and related code
 SCOPE & FOCUS
-• Review ONLY the changes in the diff and the given code
+• Review ONLY the changes in the diff and the related code provided.
-• From the diff, infer what got changed and why, determine if the changes make logical sense
+• From the diff, infer what changed and why. Determine if the changes make logical, structural, and functional sense.
-• Ensure they correctly implement the request, are secure (where applicable), efficient, and maintainable and do not
+• Ensure the changes correctly implement the request, are secure (where applicable), performant, and maintainable.
-cause potential regressions
+• DO NOT propose broad refactors or unrelated improvements. Stay strictly within the boundaries of the provided changes.
 • DO NOT propose broad refactors or off-scope improvements. Stick to the code and changes you have visibility into.
 REVIEW METHOD
-1. Identify tech stack, frameworks, and patterns present in the diff.
+1. Identify tech stack, frameworks, and patterns in the diff.
-2. Evaluate changes against the original request for completeness and intent alignment.
+2. Evaluate changes against the original request for completeness and alignment.
-3. Detect issues, prioritising by severity (CRITICAL → HIGH → MEDIUM → LOW).
+3. Detect issues, prioritized by severity (CRITICAL → HIGH → MEDIUM → LOW).
-4. Highlight incomplete changes, or changes that would cause bugs, regressions, crashes or data loss or race conditions
+4. Flag bugs, regressions, crash risks, data loss, or race conditions.
-5. Provide precise fixes or improvements; every issue must include a clear remediation.
+5. Recommend specific fixes for each issue raised; include code where helpful.
-6. Acknowledge good patterns to reinforce best practice.
+6. Acknowledge sound patterns to reinforce best practices.
-CORE ANALYSIS (adapt to the diff and stack)
+CORE ANALYSIS (adapt to diff and stack)
-• Security – injection risks, auth/authz flaws, sensitive-data exposure, insecure dependencies, memory safety
+• Security – injection risks, auth flaws, exposure of sensitive data, unsafe dependencies, memory safety
-• Bugs & Logic Errors – off-by-one, null refs, race conditions, incorrect branching
+• Bugs & Logic Errors – off-by-one, null refs, incorrect logic, race conditions
-• Performance – inefficient algorithms, resource leaks, blocking operations
+• Performance – inefficient logic, blocking calls, leaks
-• Code Quality – DRY violations, complexity, SOLID adherence
+• Code Quality – complexity, duplicated logic and DRY violations, SOLID violations
-ADDITIONAL ANALYSIS (apply only when relevant)
+ADDITIONAL ANALYSIS (only when relevant)
 • Language/runtime concerns – memory management, concurrency, exception handling
 • System/integration – config handling, external calls, operational impact
 • Testing – coverage gaps for new logic
    • If no tests are found in the project, do not flag test coverage as an issue unless the change introduces logic
      that is high-risk or complex.
    • In such cases, offer a low-severity suggestion encouraging basic tests, rather than marking it as a required fix.
 • Change-specific pitfalls – unused new functions, partial enum updates, scope creep, risky deletions
 • Determine if there are any new dependencies added but not declared, or new functionality added but not used
 • Determine unintended side effects: could changes in file_A break module_B even if module_B wasn't changed?
@@ -84,9 +97,9 @@ that apply:
 [LOW] ...
 MAKE RECOMMENDATIONS:
-Make a final, short, clear, to the point statement or list in a brief bullet point:
+Make a final, short, and focused statement or bullet list:
- Mention top priority fixes to be IMMEDIATELY made before commit
+- Top priority fixes that MUST IMMEDIATELY be addressed before commit
- Notable positives to keep
+- Notable positives to retain
 Be thorough yet actionable. Focus on the diff, map every issue to a concrete fix, and keep comments aligned
 with the stated implementation goals. Your goal is to help flag anything that could potentially slip through
--- a/systemprompts/thinkdeep_prompt.py
+++ b/systemprompts/thinkdeep_prompt.py
@@ -4,8 +4,8 @@ ThinkDeep tool system prompt
 THINKDEEP_PROMPT = """
 ROLE
-You are a senior engineering collaborator working with Claude on complex software problems. Claude will send you
+You are a senior engineering collaborator working alongside Claude on complex software problems. Claude will send you
-content—analysis, prompts, questions, ideas, or theories—to deepen, validate, and extend.
+content—analysis, prompts, questions, ideas, or theories—to deepen, validate, or extend with rigor and clarity.
 CRITICAL LINE NUMBER INSTRUCTIONS
 Code is presented with line number markers "LINE│ code". These markers are for reference ONLY and MUST NOT be
@@ -26,25 +26,27 @@ been provided unless for some reason its content is missing or incomplete:
 GUIDELINES
 1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints.
-2. Stay on scope: avoid speculative or oversized ideas; keep suggestions practical and implementable.
+2. Stay on scope: avoid speculative, over-engineered, or oversized ideas; keep suggestions practical and grounded.
-3. Challenge and enrich: find gaps, question assumptions, surface hidden complexities.
+3. Challenge and enrich: find gaps, question assumptions, and surface hidden complexities or risks.
-4. Provide actionable next steps: concrete advice, trade-offs, and implementation tactics.
+4. Provide actionable next steps: offer specific advice, trade-offs, and implementation strategies.
-5. Use concise, direct, technical language; assume an experienced engineering audience.
+5. Offer multiple viable strategies ONLY WHEN clearly beneficial within the current environment.
 6. Suggest creative solutions that operate within real-world constraints, and avoid proposing major shifts unless truly warranted.
 7. Use concise, technical language; assume an experienced engineering audience.
 KEY FOCUS AREAS (apply when relevant)
- Architecture & Design: modularity, patterns, API boundaries, dependencies
+- Architecture & Design: modularity, boundaries, abstraction layers, dependencies
- Performance & Scalability: algorithm efficiency, concurrency, caching
+- Performance & Scalability: algorithmic efficiency, concurrency, caching, bottlenecks
 - Security & Safety: validation, authentication/authorization, error handling, vulnerabilities
 - Quality & Maintainability: readability, testing, monitoring, refactoring
- Integration & Deployment: external systems, compatibility, operational concerns
+- Integration & Deployment: ONLY IF APPLICABLE TO THE QUESTION - external systems, compatibility, configuration, operational concerns
 EVALUATION
-Your response will be reviewed by Claude before any decision is made. Aim to enhance decision-making rather
+Your response will be reviewed by Claude before any decision is made. Your goal is to practically extend Claude's thinking,
-than deliver final answers.
+surface blind spots, and refine options—not to deliver final answers in isolation.
 REMINDERS
- Ground all insights in the current project's scope and constraints.
+- Ground all insights in the current project's architecture, limitations, and goals.
- If additional information is necessary, such as code snippets, files, project details, use the clarification JSON
+- If further context is needed, request it via the clarification JSON—nothing else.
- Prefer depth over breadth; propose alternatives ONLY when they materially improve the current approach and add value
+- Prioritize depth over breadth; propose alternatives ONLY if they clearly add value and improve the current approach.
- Your goal is to be the perfect development partner that extends Claude's capabilities and thought process
+- Be the ideal development partner—rigorous, focused, and fluent in real-world software trade-offs.
 """
--- a/tests/test_o3_temperature_fix_simple.py
+++ b/tests/test_o3_temperature_fix_simple.py
@@ -0,0 +1,239 @@
 """
 Simple integration test for the O3 model temperature parameter fix.
 This test confirms that the fix properly excludes temperature parameters
 for O3 models while maintaining them for regular models.
 """
 from unittest.mock import Mock, patch
 from providers.openai_provider import OpenAIModelProvider
 class TestO3TemperatureParameterFixSimple:
    """Simple test for O3 model parameter filtering."""
    @patch("utils.model_restrictions.get_restriction_service")
    @patch("providers.openai_compatible.OpenAI")
    def test_o3_models_exclude_temperature_from_api_call(self, mock_openai_class, mock_restriction_service):
        """Test that O3 models don't send temperature to the API."""
        # Mock restriction service to allow all models
        mock_service = Mock()
        mock_service.is_allowed.return_value = True
        mock_restriction_service.return_value = mock_service
        # Setup mock client
        mock_client = Mock()
        mock_openai_class.return_value = mock_client
        # Setup mock response
        mock_response = Mock()
        mock_response.choices = [Mock()]
        mock_response.choices[0].message.content = "Test response"
        mock_response.choices[0].finish_reason = "stop"
        mock_response.model = "o3-mini"
        mock_response.id = "test-id"
        mock_response.created = 1234567890
        mock_response.usage = Mock()
        mock_response.usage.prompt_tokens = 10
        mock_response.usage.completion_tokens = 5
        mock_response.usage.total_tokens = 15
        mock_client.chat.completions.create.return_value = mock_response
        # Create provider
        provider = OpenAIModelProvider(api_key="test-key")
        # Override _resolve_model_name to return the resolved model name
        provider._resolve_model_name = lambda name: name
        # Override model validation to bypass restrictions
        provider.validate_model_name = lambda name: True
        # Call generate_content with O3 model
        provider.generate_content(prompt="Test prompt", model_name="o3-mini", temperature=0.5, max_output_tokens=100)
        # Verify the API call was made without temperature or max_tokens
        mock_client.chat.completions.create.assert_called_once()
        call_kwargs = mock_client.chat.completions.create.call_args[1]
        assert "temperature" not in call_kwargs, "O3 models should not include temperature parameter"
        assert "max_tokens" not in call_kwargs, "O3 models should not include max_tokens parameter"
        assert call_kwargs["model"] == "o3-mini"
        assert "messages" in call_kwargs
    @patch("utils.model_restrictions.get_restriction_service")
    @patch("providers.openai_compatible.OpenAI")
    def test_regular_models_include_temperature_in_api_call(self, mock_openai_class, mock_restriction_service):
        """Test that regular models still send temperature to the API."""
        # Mock restriction service to allow all models
        mock_service = Mock()
        mock_service.is_allowed.return_value = True
        mock_restriction_service.return_value = mock_service
        # Setup mock client
        mock_client = Mock()
        mock_openai_class.return_value = mock_client
        # Setup mock response
        mock_response = Mock()
        mock_response.choices = [Mock()]
        mock_response.choices[0].message.content = "Test response"
        mock_response.choices[0].finish_reason = "stop"
        mock_response.model = "gpt-4.1-2025-04-14"
        mock_response.id = "test-id"
        mock_response.created = 1234567890
        mock_response.usage = Mock()
        mock_response.usage.prompt_tokens = 10
        mock_response.usage.completion_tokens = 5
        mock_response.usage.total_tokens = 15
        mock_client.chat.completions.create.return_value = mock_response
        # Create provider
        provider = OpenAIModelProvider(api_key="test-key")
        # Override _resolve_model_name to return the resolved model name
        provider._resolve_model_name = lambda name: name
        # Override model validation to bypass restrictions
        provider.validate_model_name = lambda name: True
        # Call generate_content with regular model (use supported model)
        provider.generate_content(
            prompt="Test prompt", model_name="gpt-4.1-2025-04-14", temperature=0.5, max_output_tokens=100
        )
        # Verify the API call was made WITH temperature and max_tokens
        mock_client.chat.completions.create.assert_called_once()
        call_kwargs = mock_client.chat.completions.create.call_args[1]
        assert call_kwargs["temperature"] == 0.5, "Regular models should include temperature parameter"
        assert call_kwargs["max_tokens"] == 100, "Regular models should include max_tokens parameter"
        assert call_kwargs["model"] == "gpt-4.1-2025-04-14"
    @patch("utils.model_restrictions.get_restriction_service")
    @patch("providers.openai_compatible.OpenAI")
    def test_o3_models_filter_unsupported_parameters(self, mock_openai_class, mock_restriction_service):
        """Test that O3 models filter out top_p, frequency_penalty, etc."""
        # Mock restriction service to allow all models
        mock_service = Mock()
        mock_service.is_allowed.return_value = True
        mock_restriction_service.return_value = mock_service
        # Setup mock client
        mock_client = Mock()
        mock_openai_class.return_value = mock_client
        # Setup mock response
        mock_response = Mock()
        mock_response.choices = [Mock()]
        mock_response.choices[0].message.content = "Test response"
        mock_response.choices[0].finish_reason = "stop"
        mock_response.model = "o3"
        mock_response.id = "test-id"
        mock_response.created = 1234567890
        mock_response.usage = Mock()
        mock_response.usage.prompt_tokens = 10
        mock_response.usage.completion_tokens = 5
        mock_response.usage.total_tokens = 15
        mock_client.chat.completions.create.return_value = mock_response
        # Create provider
        provider = OpenAIModelProvider(api_key="test-key")
        # Override _resolve_model_name to return the resolved model name
        provider._resolve_model_name = lambda name: name
        # Override model validation to bypass restrictions
        provider.validate_model_name = lambda name: True
        # Call generate_content with O3 model and unsupported parameters
        provider.generate_content(
            prompt="Test prompt",
            model_name="o3",
            temperature=0.5,
            top_p=0.9,
            frequency_penalty=0.1,
            presence_penalty=0.1,
            seed=42,
            stop=["END"],
        )
        # Verify the API call filters out unsupported parameters
        mock_client.chat.completions.create.assert_called_once()
        call_kwargs = mock_client.chat.completions.create.call_args[1]
        # Should be excluded for O3 models
        assert "temperature" not in call_kwargs, "O3 should not include temperature"
        assert "top_p" not in call_kwargs, "O3 should not include top_p"
        assert "frequency_penalty" not in call_kwargs, "O3 should not include frequency_penalty"
        assert "presence_penalty" not in call_kwargs, "O3 should not include presence_penalty"
        # Should be included (supported parameters)
        assert call_kwargs["seed"] == 42, "O3 should include seed parameter"
        assert call_kwargs["stop"] == ["END"], "O3 should include stop parameter"
    @patch("utils.model_restrictions.get_restriction_service")
    def test_all_o3_models_have_correct_temperature_capability(self, mock_restriction_service):
        """Test that all O3/O4 models have supports_temperature=False in their capabilities."""
        from providers.openai_provider import OpenAIModelProvider
        # Mock restriction service to allow all models
        mock_service = Mock()
        mock_service.is_allowed.return_value = True
        mock_restriction_service.return_value = mock_service
        provider = OpenAIModelProvider(api_key="test-key")
        # Test O3/O4 models that should NOT support temperature parameter
        o3_o4_models = ["o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"]
        for model in o3_o4_models:
            capabilities = provider.get_capabilities(model)
            assert hasattr(
                capabilities, "supports_temperature"
            ), f"Model {model} capabilities should have supports_temperature field"
            assert capabilities.supports_temperature is False, f"Model {model} should have supports_temperature=False"
        # Test that regular models DO support temperature parameter
        regular_models = ["gpt-4.1-2025-04-14"]
        for model in regular_models:
            try:
                capabilities = provider.get_capabilities(model)
                assert hasattr(
                    capabilities, "supports_temperature"
                ), f"Model {model} capabilities should have supports_temperature field"
                assert capabilities.supports_temperature is True, f"Model {model} should have supports_temperature=True"
            except ValueError:
                # Skip if model not in SUPPORTED_MODELS (that's okay for this test)
                pass
    @patch("utils.model_restrictions.get_restriction_service")
    def test_openai_provider_temperature_constraints(self, mock_restriction_service):
        """Test that OpenAI provider has correct temperature constraints for O3 models."""
        from providers.openai_provider import OpenAIModelProvider
        # Mock restriction service to allow all models
        mock_service = Mock()
        mock_service.is_allowed.return_value = True
        mock_restriction_service.return_value = mock_service
        provider = OpenAIModelProvider(api_key="test-key")
        # Test O3 model constraints
        o3_capabilities = provider.get_capabilities("o3-mini")
        assert o3_capabilities.temperature_constraint is not None
        # O3 models should have fixed temperature constraint
        temp_constraint = o3_capabilities.temperature_constraint
        assert temp_constraint.validate(1.0) is True
        assert temp_constraint.validate(0.5) is False
        # Test regular model constraints - use gpt-4.1 which is supported
        gpt41_capabilities = provider.get_capabilities("gpt-4.1-2025-04-14")
        assert gpt41_capabilities.temperature_constraint is not None
        # Regular models should allow a range
        temp_constraint = gpt41_capabilities.temperature_constraint
        assert temp_constraint.validate(0.5) is True
        assert temp_constraint.validate(1.0) is True
--- a/tests/test_openai_provider.py
+++ b/tests/test_openai_provider.py
@@ -122,7 +122,7 @@ class TestOpenAIProvider:
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "Test response"
        mock_response.choices[0].finish_reason = "stop"
-        mock_response.model = "o4-mini"  # API returns the resolved model name
+        mock_response.model = "gpt-4.1-2025-04-14"  # API returns the resolved model name
        mock_response.id = "test-id"
        mock_response.created = 1234567890
        mock_response.usage = MagicMock()
@@ -134,19 +134,23 @@ class TestOpenAIProvider:
        provider = OpenAIModelProvider("test-key")
-        # Call generate_content with alias 'mini'
+        # Call generate_content with alias 'gpt4.1' (resolves to gpt-4.1-2025-04-14, supports temperature)
        result = provider.generate_content(
-            prompt="Test prompt", model_name="mini", temperature=1.0  # This should be resolved to "o4-mini"
+            prompt="Test prompt",
            model_name="gpt4.1",
            temperature=1.0,  # This should be resolved to "gpt-4.1-2025-04-14"
        )
        # Verify the API was called with the RESOLVED model name
        mock_client.chat.completions.create.assert_called_once()
        call_kwargs = mock_client.chat.completions.create.call_args[1]
-        # CRITICAL ASSERTION: The API should receive "o4-mini", not "mini"
+        # CRITICAL ASSERTION: The API should receive "gpt-4.1-2025-04-14", not "gpt4.1"
-        assert call_kwargs["model"] == "o4-mini", f"Expected 'o4-mini' but API received '{call_kwargs['model']}'"
+        assert (
            call_kwargs["model"] == "gpt-4.1-2025-04-14"
        ), f"Expected 'gpt-4.1-2025-04-14' but API received '{call_kwargs['model']}'"
-        # Verify other parameters
+        # Verify other parameters (gpt-4.1 supports temperature unlike O3/O4 models)
        assert call_kwargs["temperature"] == 1.0
        assert len(call_kwargs["messages"]) == 1
        assert call_kwargs["messages"][0]["role"] == "user"
@@ -154,7 +158,7 @@ class TestOpenAIProvider:
        # Verify response
        assert result.content == "Test response"
-        assert result.model_name == "o4-mini"  # Should be the resolved name
+        assert result.model_name == "gpt-4.1-2025-04-14"  # Should be the resolved name
    @patch("providers.openai_compatible.OpenAI")
    def test_generate_content_other_aliases(self, mock_openai_class):