GPT-5, GPT-5-mini support

Improvements to model name resolution
Improved instructions for multi-step workflows when continuation is available
Improved instructions for chat tool
Improved preferred model resolution, moved code from registry -> each provider
Updated tests
This commit is contained in:
Fahad
2025-08-08 08:51:34 +05:00
parent 9a4791cb06
commit 1a8ec2e12f
30 changed files with 792 additions and 483 deletions

View File

@@ -309,8 +309,10 @@ class OpenAICompatibleProvider(ModelProvider):
max_retries = 4
retry_delays = [1, 3, 5, 8]
last_exception = None
actual_attempts = 0
for attempt in range(max_retries):
actual_attempts = attempt + 1 # Convert from 0-based index to human-readable count
try: # Log the exact payload being sent for debugging
import json
@@ -371,14 +373,13 @@ class OpenAICompatibleProvider(ModelProvider):
if is_retryable and attempt < max_retries - 1:
delay = retry_delays[attempt]
logging.warning(
f"Retryable error for o3-pro responses endpoint, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
f"Retryable error for o3-pro responses endpoint, attempt {actual_attempts}/{max_retries}: {str(e)}. Retrying in {delay}s..."
)
time.sleep(delay)
else:
break
# If we get here, all retries failed
actual_attempts = attempt + 1 # Convert from 0-based index to human-readable count
error_msg = f"o3-pro responses endpoint error after {actual_attempts} attempt{'s' if actual_attempts > 1 else ''}: {str(last_exception)}"
logging.error(error_msg)
raise RuntimeError(error_msg) from last_exception
@@ -481,7 +482,7 @@ class OpenAICompatibleProvider(ModelProvider):
completion_params[key] = value
# Check if this is o3-pro and needs the responses endpoint
if resolved_model == "o3-pro-2025-06-10":
if resolved_model == "o3-pro":
# This model requires the /v1/responses endpoint
# If it fails, we should not fall back to chat/completions
return self._generate_with_responses_endpoint(
@@ -497,8 +498,10 @@ class OpenAICompatibleProvider(ModelProvider):
retry_delays = [1, 3, 5, 8] # Progressive delays: 1s, 3s, 5s, 8s
last_exception = None
actual_attempts = 0
for attempt in range(max_retries):
actual_attempts = attempt + 1 # Convert from 0-based index to human-readable count
try:
# Generate completion
response = self.client.chat.completions.create(**completion_params)
@@ -536,12 +539,11 @@ class OpenAICompatibleProvider(ModelProvider):
# Log retry attempt
logging.warning(
f"{self.FRIENDLY_NAME} error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
f"{self.FRIENDLY_NAME} error for model {model_name}, attempt {actual_attempts}/{max_retries}: {str(e)}. Retrying in {delay}s..."
)
time.sleep(delay)
# If we get here, all retries failed
actual_attempts = attempt + 1 # Convert from 0-based index to human-readable count
error_msg = f"{self.FRIENDLY_NAME} API error for model {model_name} after {actual_attempts} attempt{'s' if actual_attempts > 1 else ''}: {str(last_exception)}"
logging.error(error_msg)
raise RuntimeError(error_msg) from last_exception
@@ -576,11 +578,7 @@ class OpenAICompatibleProvider(ModelProvider):
try:
encoding = tiktoken.encoding_for_model(model_name)
except KeyError:
# Try common encodings based on model patterns
if "gpt-4" in model_name or "gpt-3.5" in model_name:
encoding = tiktoken.get_encoding("cl100k_base")
else:
encoding = tiktoken.get_encoding("cl100k_base") # Default
encoding = tiktoken.get_encoding("cl100k_base")
return len(encoding.encode(text))
@@ -679,11 +677,13 @@ class OpenAICompatibleProvider(ModelProvider):
"""
# Common vision-capable models - only include models that actually support images
vision_models = {
"gpt-5",
"gpt-5-mini",
"gpt-4o",
"gpt-4o-mini",
"gpt-4-turbo",
"gpt-4-vision-preview",
"gpt-4.1-2025-04-14", # GPT-4.1 supports vision
"gpt-4.1-2025-04-14",
"o3",
"o3-mini",
"o3-pro",