Fixes O3-Pro connection https://github.com/BeehiveInnovations/zen-mcp-server/issues/56
New tests for O3-pro Improved prompts for shorthand input
This commit is contained in:
39
README.md
39
README.md
@@ -592,10 +592,41 @@ For detailed tool parameters and configuration options, see the [Advanced Usage
|
||||
|
||||
### Prompt Support
|
||||
|
||||
- `/zen:thinkdeeper with o3 check if the algorithm in @sort.py is performant and if there are alternatives we could explore`
|
||||
- `/zen:precommit use gemini pro and confirm these changes match our requirements in COOL_FEATURE.md`
|
||||
- `/zen:testgen write me tests for class ABC`
|
||||
- `/zen:refactor using local-llama propose a decomposition strategy, make a plan and save it in FIXES.md then share this with o3 to confirm along with large_file.swift`
|
||||
Zen supports powerful structured prompts in Claude Code for quick access to tools and models:
|
||||
|
||||
#### Basic Tool Prompts
|
||||
- `/zen:thinkdeeper` - Use thinkdeep tool with auto-selected model
|
||||
- `/zen:chat` - Use chat tool with auto-selected model
|
||||
- `/zen:codereview` - Use codereview tool with auto-selected model
|
||||
- `/zen:analyze` - Use analyze tool with auto-selected model
|
||||
|
||||
#### Model-Specific Tool Prompts
|
||||
- `/zen:chat:o3 hello there` - Use chat tool specifically with O3 model
|
||||
- `/zen:thinkdeep:flash analyze this quickly` - Use thinkdeep tool with Flash for speed
|
||||
- `/zen:codereview:pro review for security` - Use codereview tool with Gemini Pro for thorough analysis
|
||||
- `/zen:debug:grok help with this error` - Use debug tool with GROK model
|
||||
- `/zen:analyze:gemini-2.5-flash-preview-05-20 examine these files` - Use analyze tool with specific Gemini model
|
||||
|
||||
#### Continuation Prompts
|
||||
- `/zen:continue` - Continue previous conversation using chat tool
|
||||
- `/zen:chat:continue` - Continue previous conversation using chat tool specifically
|
||||
- `/zen:thinkdeep:continue` - Continue previous conversation using thinkdeep tool
|
||||
- `/zen:analyze:continue` - Continue previous conversation using analyze tool
|
||||
|
||||
#### Advanced Examples
|
||||
- `/zen:thinkdeeper:o3 check if the algorithm in @sort.py is performant and if there are alternatives we could explore`
|
||||
- `/zen:precommit:pro confirm these changes match our requirements in COOL_FEATURE.md`
|
||||
- `/zen:testgen:flash write me tests for class ABC`
|
||||
- `/zen:refactor:local-llama propose a decomposition strategy, make a plan and save it in FIXES.md then share this with o3 to confirm along with large_file.swift`
|
||||
|
||||
#### Syntax Format
|
||||
The structured prompt format is: `/zen:[tool]:[model / continue] [your_message]`
|
||||
|
||||
- `[tool]` - Any available tool name (chat, thinkdeep, codereview, debug, analyze, etc.)
|
||||
- `[model / continue]` - Either a specific model name (o3, flash, pro, grok, etc.) or the keyword `continue` to continue the conversation using this tool
|
||||
- `[your_message]` - Your actual prompt or question
|
||||
|
||||
**Note**: When using `:continue`, it intelligently resumes the previous conversation with the specified tool, maintaining full context and conversation history.
|
||||
|
||||
### Add Your Own Tools
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ import os
|
||||
# These values are used in server responses and for tracking releases
|
||||
# IMPORTANT: This is the single source of truth for version and author info
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
__version__ = "4.8.2"
|
||||
__version__ = "4.8.3"
|
||||
# Last update date in ISO format
|
||||
__updated__ = "2025-06-16"
|
||||
# Primary maintainer
|
||||
|
||||
@@ -32,12 +32,14 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
},
|
||||
"o3-pro": {
|
||||
"o3-pro-2025-06-10": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
"supports_images": True, # O3 models support vision
|
||||
"max_image_size_mb": 20.0, # 20MB per OpenAI docs
|
||||
},
|
||||
# Aliases
|
||||
"o3-pro": "o3-pro-2025-06-10",
|
||||
"o4-mini": {
|
||||
"context_window": 200_000, # 200K tokens
|
||||
"supports_extended_thinking": False,
|
||||
@@ -89,7 +91,7 @@ class OpenAIModelProvider(OpenAICompatibleProvider):
|
||||
config = self.SUPPORTED_MODELS[resolved_name]
|
||||
|
||||
# Define temperature constraints per model
|
||||
if resolved_name in ["o3", "o3-mini", "o3-pro", "o4-mini", "o4-mini-high"]:
|
||||
if resolved_name in ["o3", "o3-mini", "o3-pro", "o3-pro-2025-06-10", "o4-mini", "o4-mini-high"]:
|
||||
# O3 and O4 reasoning models only support temperature=1.0
|
||||
temp_constraint = FixedTemperatureConstraint(1.0)
|
||||
else:
|
||||
|
||||
@@ -224,6 +224,138 @@ class OpenAICompatibleProvider(ModelProvider):
|
||||
|
||||
return self._client
|
||||
|
||||
def _generate_with_responses_endpoint(
|
||||
self,
|
||||
model_name: str,
|
||||
messages: list,
|
||||
temperature: float,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
**kwargs,
|
||||
) -> ModelResponse:
|
||||
"""Generate content using the /v1/responses endpoint for o3-pro via OpenAI library."""
|
||||
# Convert messages to the correct format for responses endpoint
|
||||
input_messages = []
|
||||
|
||||
for message in messages:
|
||||
role = message.get("role", "")
|
||||
content = message.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
# System messages can be treated as user messages for o3-pro
|
||||
input_messages.append(
|
||||
{"role": "user", "content": [{"type": "input_text", "text": f"System: {content}"}]}
|
||||
)
|
||||
elif role == "user":
|
||||
input_messages.append({"role": "user", "content": [{"type": "input_text", "text": content}]})
|
||||
elif role == "assistant":
|
||||
input_messages.append({"role": "assistant", "content": [{"type": "output_text", "text": content}]})
|
||||
|
||||
# Prepare completion parameters for responses endpoint
|
||||
completion_params = {
|
||||
"model": model_name,
|
||||
"input": input_messages,
|
||||
"text": {"format": {"type": "text"}},
|
||||
"reasoning": {"effort": "medium", "summary": "auto"},
|
||||
"tools": [],
|
||||
"store": True,
|
||||
}
|
||||
|
||||
# Temperature is not in the documented parameters for responses endpoint
|
||||
# but we'll try to add it in case it's supported
|
||||
|
||||
# Add max tokens if specified
|
||||
if max_output_tokens:
|
||||
completion_params["max_tokens"] = max_output_tokens
|
||||
|
||||
# Add any additional OpenAI-specific parameters
|
||||
for key, value in kwargs.items():
|
||||
if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop"]:
|
||||
completion_params[key] = value
|
||||
|
||||
# Retry logic with progressive delays
|
||||
max_retries = 4
|
||||
retry_delays = [1, 3, 5, 8]
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# Use OpenAI client's responses endpoint
|
||||
response = self.client.responses.create(**completion_params)
|
||||
|
||||
# Extract content and usage from responses endpoint format
|
||||
# The response format is different for responses endpoint
|
||||
content = ""
|
||||
if hasattr(response, "output") and response.output:
|
||||
if hasattr(response.output, "content") and response.output.content:
|
||||
# Look for output_text in content
|
||||
for content_item in response.output.content:
|
||||
if hasattr(content_item, "type") and content_item.type == "output_text":
|
||||
content = content_item.text
|
||||
break
|
||||
elif hasattr(response.output, "text"):
|
||||
content = response.output.text
|
||||
|
||||
# Try to extract usage information
|
||||
usage = None
|
||||
if hasattr(response, "usage"):
|
||||
usage = self._extract_usage(response)
|
||||
elif hasattr(response, "input_tokens") and hasattr(response, "output_tokens"):
|
||||
usage = {
|
||||
"input_tokens": getattr(response, "input_tokens", 0),
|
||||
"output_tokens": getattr(response, "output_tokens", 0),
|
||||
"total_tokens": getattr(response, "input_tokens", 0) + getattr(response, "output_tokens", 0),
|
||||
}
|
||||
|
||||
return ModelResponse(
|
||||
content=content,
|
||||
usage=usage,
|
||||
model_name=model_name,
|
||||
friendly_name=self.FRIENDLY_NAME,
|
||||
provider=self.get_provider_type(),
|
||||
metadata={
|
||||
"model": getattr(response, "model", model_name),
|
||||
"id": getattr(response, "id", ""),
|
||||
"created": getattr(response, "created_at", 0),
|
||||
"endpoint": "responses",
|
||||
},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
|
||||
# Check if this is a retryable error
|
||||
error_str = str(e).lower()
|
||||
is_retryable = any(
|
||||
term in error_str
|
||||
for term in [
|
||||
"timeout",
|
||||
"connection",
|
||||
"network",
|
||||
"temporary",
|
||||
"unavailable",
|
||||
"retry",
|
||||
"429",
|
||||
"500",
|
||||
"502",
|
||||
"503",
|
||||
"504",
|
||||
]
|
||||
)
|
||||
|
||||
if is_retryable and attempt < max_retries - 1:
|
||||
delay = retry_delays[attempt]
|
||||
logging.warning(
|
||||
f"Retryable error for o3-pro responses endpoint, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
|
||||
)
|
||||
time.sleep(delay)
|
||||
else:
|
||||
break
|
||||
|
||||
# If we get here, all retries failed
|
||||
error_msg = f"o3-pro responses endpoint error after {max_retries} attempts: {str(last_exception)}"
|
||||
logging.error(error_msg)
|
||||
raise RuntimeError(error_msg) from last_exception
|
||||
|
||||
def generate_content(
|
||||
self,
|
||||
prompt: str,
|
||||
@@ -301,6 +433,22 @@ class OpenAICompatibleProvider(ModelProvider):
|
||||
if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
|
||||
completion_params[key] = value
|
||||
|
||||
# Check if this is o3-pro and needs the responses endpoint
|
||||
resolved_model = model_name
|
||||
if hasattr(self, "_resolve_model_name"):
|
||||
resolved_model = self._resolve_model_name(model_name)
|
||||
|
||||
if resolved_model == "o3-pro-2025-06-10":
|
||||
# This model requires the /v1/responses endpoint
|
||||
# If it fails, we should not fall back to chat/completions
|
||||
return self._generate_with_responses_endpoint(
|
||||
model_name=resolved_model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_output_tokens=max_output_tokens,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Retry logic with progressive delays
|
||||
max_retries = 4 # Total of 4 attempts
|
||||
retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s
|
||||
|
||||
87
server.py
87
server.py
@@ -925,6 +925,15 @@ async def handle_list_prompts() -> list[Prompt]:
|
||||
)
|
||||
)
|
||||
|
||||
# Add special "continue" prompt
|
||||
prompts.append(
|
||||
Prompt(
|
||||
name="continue",
|
||||
description="Continue the previous conversation using the chat tool",
|
||||
arguments=[],
|
||||
)
|
||||
)
|
||||
|
||||
logger.debug(f"Returning {len(prompts)} prompts to MCP client")
|
||||
return prompts
|
||||
|
||||
@@ -934,12 +943,16 @@ async def handle_get_prompt(name: str, arguments: dict[str, Any] = None) -> GetP
|
||||
"""
|
||||
Get prompt details and generate the actual prompt text.
|
||||
|
||||
This handler is called when a user invokes a prompt (e.g., /zen:thinkdeeper).
|
||||
This handler is called when a user invokes a prompt (e.g., /zen:thinkdeeper or /zen:chat:o3).
|
||||
It generates the appropriate text that Claude will then use to call the
|
||||
underlying tool.
|
||||
|
||||
Supports structured prompt names like "chat:o3" where:
|
||||
- "chat" is the tool name
|
||||
- "o3" is the model to use
|
||||
|
||||
Args:
|
||||
name: The name of the prompt to execute
|
||||
name: The name of the prompt to execute (can include model like "chat:o3")
|
||||
arguments: Optional arguments for the prompt (e.g., model, thinking_mode)
|
||||
|
||||
Returns:
|
||||
@@ -950,39 +963,74 @@ async def handle_get_prompt(name: str, arguments: dict[str, Any] = None) -> GetP
|
||||
"""
|
||||
logger.debug(f"MCP client requested prompt: {name} with args: {arguments}")
|
||||
|
||||
# Parse structured prompt names like "chat:o3" or "chat:continue"
|
||||
parsed_model = None
|
||||
is_continuation = False
|
||||
base_name = name
|
||||
|
||||
if ":" in name:
|
||||
parts = name.split(":", 1)
|
||||
base_name = parts[0]
|
||||
second_part = parts[1]
|
||||
|
||||
# Check if the second part is "continue" (special keyword)
|
||||
if second_part.lower() == "continue":
|
||||
is_continuation = True
|
||||
logger.debug(f"Parsed continuation prompt: tool='{base_name}', continue=True")
|
||||
else:
|
||||
parsed_model = second_part
|
||||
logger.debug(f"Parsed structured prompt: tool='{base_name}', model='{parsed_model}'")
|
||||
|
||||
# Handle special "continue" cases
|
||||
if base_name.lower() == "continue":
|
||||
# This is "/zen:continue" - use chat tool as default for continuation
|
||||
tool_name = "chat"
|
||||
is_continuation = True
|
||||
template_info = {
|
||||
"name": "continue",
|
||||
"description": "Continue the previous conversation",
|
||||
"template": "Continue the conversation",
|
||||
}
|
||||
logger.debug("Using /zen:continue - defaulting to chat tool with continuation")
|
||||
else:
|
||||
# Find the corresponding tool by checking prompt names
|
||||
tool_name = None
|
||||
template_info = None
|
||||
|
||||
# Check if it's a known prompt name
|
||||
# Check if it's a known prompt name (using base_name)
|
||||
for t_name, t_info in PROMPT_TEMPLATES.items():
|
||||
if t_info["name"] == name:
|
||||
if t_info["name"] == base_name:
|
||||
tool_name = t_name
|
||||
template_info = t_info
|
||||
break
|
||||
|
||||
# If not found, check if it's a direct tool name
|
||||
if not tool_name and name in TOOLS:
|
||||
tool_name = name
|
||||
if not tool_name and base_name in TOOLS:
|
||||
tool_name = base_name
|
||||
template_info = {
|
||||
"name": name,
|
||||
"description": f"Use {name} tool",
|
||||
"template": f"Use {name}",
|
||||
"name": base_name,
|
||||
"description": f"Use {base_name} tool",
|
||||
"template": f"Use {base_name}",
|
||||
}
|
||||
|
||||
if not tool_name:
|
||||
logger.error(f"Unknown prompt requested: {name}")
|
||||
logger.error(f"Unknown prompt requested: {name} (base: {base_name})")
|
||||
raise ValueError(f"Unknown prompt: {name}")
|
||||
|
||||
# Get the template
|
||||
template = template_info.get("template", f"Use {tool_name}")
|
||||
|
||||
# Safe template expansion with defaults
|
||||
# Prioritize: parsed model > arguments model > "auto"
|
||||
final_model = parsed_model or (arguments.get("model", "auto") if arguments else "auto")
|
||||
|
||||
prompt_args = {
|
||||
"model": arguments.get("model", "auto") if arguments else "auto",
|
||||
"model": final_model,
|
||||
"thinking_mode": arguments.get("thinking_mode", "medium") if arguments else "medium",
|
||||
}
|
||||
|
||||
logger.debug(f"Using model '{final_model}' for prompt '{name}'")
|
||||
|
||||
# Safely format the template
|
||||
try:
|
||||
prompt_text = template.format(**prompt_args)
|
||||
@@ -990,6 +1038,21 @@ async def handle_get_prompt(name: str, arguments: dict[str, Any] = None) -> GetP
|
||||
logger.warning(f"Missing template argument {e} for prompt {name}, using raw template")
|
||||
prompt_text = template # Fallback to raw template
|
||||
|
||||
# Generate tool call instruction based on the type of prompt
|
||||
if is_continuation:
|
||||
if base_name.lower() == "continue":
|
||||
# "/zen:continue" case
|
||||
tool_instruction = f"Continue the previous conversation using the {tool_name} tool"
|
||||
else:
|
||||
# "/zen:chat:continue" case
|
||||
tool_instruction = f"Continue the previous conversation using the {tool_name} tool"
|
||||
elif parsed_model:
|
||||
# "/zen:chat:o3" case
|
||||
tool_instruction = f"Use the {tool_name} tool with model '{parsed_model}'"
|
||||
else:
|
||||
# "/zen:chat" case
|
||||
tool_instruction = prompt_text
|
||||
|
||||
return GetPromptResult(
|
||||
prompt=Prompt(
|
||||
name=name,
|
||||
@@ -999,7 +1062,7 @@ async def handle_get_prompt(name: str, arguments: dict[str, Any] = None) -> GetP
|
||||
messages=[
|
||||
PromptMessage(
|
||||
role="user",
|
||||
content={"type": "text", "text": prompt_text},
|
||||
content={"type": "text", "text": tool_instruction},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -8,7 +8,10 @@ This test is intentionally NOT added to TEST_REGISTRY to prevent accidental exec
|
||||
It can only be run manually using:
|
||||
python communication_simulator_test.py --individual o3_pro_expensive
|
||||
|
||||
Tests that o3-pro model works with one simple chat call. That's it.
|
||||
Tests that o3-pro model:
|
||||
1. Uses the correct /v1/responses endpoint (not /v1/chat/completions)
|
||||
2. Successfully completes a chat call
|
||||
3. Returns properly formatted response
|
||||
"""
|
||||
|
||||
from .base_test import BaseSimulatorTest
|
||||
@@ -26,13 +29,16 @@ class O3ProExpensiveTest(BaseSimulatorTest):
|
||||
return "⚠️ EXPENSIVE O3-Pro basic validation (manual only)"
|
||||
|
||||
def run_test(self) -> bool:
|
||||
"""Test o3-pro model with one simple chat call - EXPENSIVE!"""
|
||||
"""Test o3-pro model with endpoint verification - EXPENSIVE!"""
|
||||
try:
|
||||
self.logger.warning("⚠️ ⚠️ ⚠️ EXPENSIVE TEST - O3-PRO COSTS ~$15-60 PER 1K TOKENS! ⚠️ ⚠️ ⚠️")
|
||||
self.logger.info("Test: O3-Pro basic chat test")
|
||||
self.logger.info("Test: O3-Pro endpoint and functionality test")
|
||||
|
||||
# First, verify we're hitting the right endpoint by checking logs
|
||||
self.logger.info("Step 1: Testing o3-pro with chat tool")
|
||||
|
||||
# One simple chat call
|
||||
response, _ = self.call_mcp_tool(
|
||||
response, tool_result = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
"prompt": "What is 2 + 2?",
|
||||
@@ -41,16 +47,44 @@ class O3ProExpensiveTest(BaseSimulatorTest):
|
||||
},
|
||||
)
|
||||
|
||||
if response:
|
||||
self.logger.info("✅ O3-Pro chat call succeeded")
|
||||
self.logger.warning("💰 Test completed - check your billing!")
|
||||
return True
|
||||
else:
|
||||
self.logger.error("❌ O3-Pro chat call failed")
|
||||
if not response:
|
||||
self.logger.error("❌ O3-Pro chat call failed - no response")
|
||||
if tool_result and "error" in tool_result:
|
||||
error_msg = tool_result["error"]
|
||||
self.logger.error(f"Error details: {error_msg}")
|
||||
# Check if it's the endpoint error we're trying to fix
|
||||
if "v1/responses" in str(error_msg) and "v1/chat/completions" in str(error_msg):
|
||||
self.logger.error(
|
||||
"❌ ENDPOINT BUG DETECTED: o3-pro is trying to use chat/completions instead of responses endpoint!"
|
||||
)
|
||||
return False
|
||||
|
||||
# Check the metadata to verify endpoint was used
|
||||
if tool_result and isinstance(tool_result, dict):
|
||||
metadata = tool_result.get("metadata", {})
|
||||
endpoint_used = metadata.get("endpoint", "unknown")
|
||||
|
||||
if endpoint_used == "responses":
|
||||
self.logger.info("✅ Correct endpoint used: /v1/responses")
|
||||
else:
|
||||
self.logger.warning(f"⚠️ Endpoint used: {endpoint_used} (expected: responses)")
|
||||
|
||||
# Verify the response content
|
||||
if response and "4" in str(response):
|
||||
self.logger.info("✅ O3-Pro response is mathematically correct")
|
||||
else:
|
||||
self.logger.warning(f"⚠️ Unexpected response: {response}")
|
||||
|
||||
self.logger.info("✅ O3-Pro test completed successfully")
|
||||
self.logger.warning("💰 Test completed - check your billing!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"O3-Pro test failed: {e}")
|
||||
self.logger.error(f"O3-Pro test failed with exception: {e}")
|
||||
# Log the full error for debugging endpoint issues
|
||||
import traceback
|
||||
|
||||
self.logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -274,6 +274,7 @@ class TestProviderIntegration:
|
||||
"""
|
||||
# Clear any cached restriction service
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
provider = GeminiModelProvider(api_key="test-key")
|
||||
@@ -302,6 +303,7 @@ class TestProviderIntegration:
|
||||
"""
|
||||
# Clear any cached restriction service
|
||||
import utils.model_restrictions
|
||||
|
||||
utils.model_restrictions._restriction_service = None
|
||||
|
||||
provider = GeminiModelProvider(api_key="test-key")
|
||||
|
||||
@@ -75,7 +75,7 @@ class TestOpenAIProvider:
|
||||
# Test full name passthrough
|
||||
assert provider._resolve_model_name("o3") == "o3"
|
||||
assert provider._resolve_model_name("o3-mini") == "o3-mini"
|
||||
assert provider._resolve_model_name("o3-pro") == "o3-pro"
|
||||
assert provider._resolve_model_name("o3-pro") == "o3-pro-2025-06-10"
|
||||
assert provider._resolve_model_name("o4-mini") == "o4-mini"
|
||||
assert provider._resolve_model_name("o4-mini-high") == "o4-mini-high"
|
||||
|
||||
@@ -196,7 +196,7 @@ class TestOpenAIProvider:
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "Test response"
|
||||
mock_response.choices[0].finish_reason = "stop"
|
||||
mock_response.model = "o3-pro"
|
||||
mock_response.model = "o3-mini"
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
@@ -205,10 +205,10 @@ class TestOpenAIProvider:
|
||||
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
# Test full model name passes through unchanged
|
||||
provider.generate_content(prompt="Test", model_name="o3-pro", temperature=1.0)
|
||||
# Test full model name passes through unchanged (use o3-mini since o3-pro has special handling)
|
||||
provider.generate_content(prompt="Test", model_name="o3-mini", temperature=1.0)
|
||||
call_kwargs = mock_client.chat.completions.create.call_args[1]
|
||||
assert call_kwargs["model"] == "o3-pro" # Should be unchanged
|
||||
assert call_kwargs["model"] == "o3-mini" # Should be unchanged
|
||||
|
||||
def test_supports_thinking_mode(self):
|
||||
"""Test thinking mode support (currently False for all OpenAI models)."""
|
||||
@@ -219,3 +219,73 @@ class TestOpenAIProvider:
|
||||
assert provider.supports_thinking_mode("o3-mini") is False
|
||||
assert provider.supports_thinking_mode("o4-mini") is False
|
||||
assert provider.supports_thinking_mode("mini") is False # Test with alias too
|
||||
|
||||
@patch("providers.openai_compatible.OpenAI")
|
||||
def test_o3_pro_routes_to_responses_endpoint(self, mock_openai_class):
|
||||
"""Test that o3-pro model routes to the /v1/responses endpoint (mock test)."""
|
||||
# Set up mock for OpenAI client responses endpoint
|
||||
mock_client = MagicMock()
|
||||
mock_openai_class.return_value = mock_client
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.output = MagicMock()
|
||||
mock_response.output.content = [MagicMock()]
|
||||
mock_response.output.content[0].type = "output_text"
|
||||
mock_response.output.content[0].text = "4"
|
||||
mock_response.model = "o3-pro-2025-06-10"
|
||||
mock_response.id = "test-id"
|
||||
mock_response.created_at = 1234567890
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
mock_client.responses.create.return_value = mock_response
|
||||
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
# Generate content with o3-pro
|
||||
result = provider.generate_content(prompt="What is 2 + 2?", model_name="o3-pro", temperature=1.0)
|
||||
|
||||
# Verify responses.create was called
|
||||
mock_client.responses.create.assert_called_once()
|
||||
call_args = mock_client.responses.create.call_args[1]
|
||||
assert call_args["model"] == "o3-pro-2025-06-10"
|
||||
assert call_args["input"][0]["role"] == "user"
|
||||
assert "What is 2 + 2?" in call_args["input"][0]["content"][0]["text"]
|
||||
|
||||
# Verify the response
|
||||
assert result.content == "4"
|
||||
assert result.model_name == "o3-pro-2025-06-10"
|
||||
assert result.metadata["endpoint"] == "responses"
|
||||
|
||||
@patch("providers.openai_compatible.OpenAI")
|
||||
def test_non_o3_pro_uses_chat_completions(self, mock_openai_class):
|
||||
"""Test that non-o3-pro models use the standard chat completions endpoint."""
|
||||
# Set up mock
|
||||
mock_client = MagicMock()
|
||||
mock_openai_class.return_value = mock_client
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "Test response"
|
||||
mock_response.choices[0].finish_reason = "stop"
|
||||
mock_response.model = "o3-mini"
|
||||
mock_response.id = "test-id"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
provider = OpenAIModelProvider("test-key")
|
||||
|
||||
# Generate content with o3-mini (not o3-pro)
|
||||
result = provider.generate_content(prompt="Test prompt", model_name="o3-mini", temperature=1.0)
|
||||
|
||||
# Verify chat.completions.create was called
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
|
||||
# Verify the response
|
||||
assert result.content == "Test response"
|
||||
assert result.model_name == "o3-mini"
|
||||
|
||||
Reference in New Issue
Block a user