Lookout for non-retriable errors and exit early
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -171,3 +171,6 @@ test-setup/
|
|||||||
FEATURE_*.md
|
FEATURE_*.md
|
||||||
# Temporary files
|
# Temporary files
|
||||||
/tmp/
|
/tmp/
|
||||||
|
|
||||||
|
# Local user instructions
|
||||||
|
CLAUDE.local.md
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
- Before any commit / push to github, you must first always run and confirm run that code quality checks pass. Use @code_quality_checks.sh and confirm that we have 100% unit tests passing.
|
|
||||||
@@ -14,7 +14,7 @@ import os
|
|||||||
# These values are used in server responses and for tracking releases
|
# These values are used in server responses and for tracking releases
|
||||||
# IMPORTANT: This is the single source of truth for version and author info
|
# IMPORTANT: This is the single source of truth for version and author info
|
||||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||||
__version__ = "5.0.1"
|
__version__ = "5.0.2"
|
||||||
# Last update date in ISO format
|
# Last update date in ISO format
|
||||||
__updated__ = "2025-06-18"
|
__updated__ = "2025-06-18"
|
||||||
# Primary maintainer
|
# Primary maintainer
|
||||||
|
|||||||
@@ -196,24 +196,8 @@ class GeminiModelProvider(ModelProvider):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_exception = e
|
last_exception = e
|
||||||
|
|
||||||
# Check if this is a retryable error
|
# Check if this is a retryable error using structured error codes
|
||||||
error_str = str(e).lower()
|
is_retryable = self._is_error_retryable(e)
|
||||||
is_retryable = any(
|
|
||||||
term in error_str
|
|
||||||
for term in [
|
|
||||||
"timeout",
|
|
||||||
"connection",
|
|
||||||
"network",
|
|
||||||
"temporary",
|
|
||||||
"unavailable",
|
|
||||||
"retry",
|
|
||||||
"429",
|
|
||||||
"500",
|
|
||||||
"502",
|
|
||||||
"503",
|
|
||||||
"504",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# If this is the last attempt or not retryable, give up
|
# If this is the last attempt or not retryable, give up
|
||||||
if attempt == max_retries - 1 or not is_retryable:
|
if attempt == max_retries - 1 or not is_retryable:
|
||||||
@@ -388,6 +372,78 @@ class GeminiModelProvider(ModelProvider):
|
|||||||
}
|
}
|
||||||
return model_name in vision_models
|
return model_name in vision_models
|
||||||
|
|
||||||
|
def _is_error_retryable(self, error: Exception) -> bool:
|
||||||
|
"""Determine if an error should be retried based on structured error codes.
|
||||||
|
|
||||||
|
Uses Gemini API error structure instead of text pattern matching for reliability.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: Exception from Gemini API call
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if error should be retried, False otherwise
|
||||||
|
"""
|
||||||
|
error_str = str(error).lower()
|
||||||
|
|
||||||
|
# Check for 429 errors first - these need special handling
|
||||||
|
if "429" in error_str or "quota" in error_str or "resource_exhausted" in error_str:
|
||||||
|
# For Gemini, check for specific non-retryable error indicators
|
||||||
|
# These typically indicate permanent failures or quota/size limits
|
||||||
|
non_retryable_indicators = [
|
||||||
|
"quota exceeded",
|
||||||
|
"resource exhausted",
|
||||||
|
"context length",
|
||||||
|
"token limit",
|
||||||
|
"request too large",
|
||||||
|
"invalid request",
|
||||||
|
"quota_exceeded",
|
||||||
|
"resource_exhausted",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Also check if this is a structured error from Gemini SDK
|
||||||
|
try:
|
||||||
|
# Try to access error details if available
|
||||||
|
if hasattr(error, "details") or hasattr(error, "reason"):
|
||||||
|
# Gemini API errors may have structured details
|
||||||
|
error_details = getattr(error, "details", "") or getattr(error, "reason", "")
|
||||||
|
error_details_str = str(error_details).lower()
|
||||||
|
|
||||||
|
# Check for non-retryable error codes/reasons
|
||||||
|
if any(indicator in error_details_str for indicator in non_retryable_indicators):
|
||||||
|
logger.debug(f"Non-retryable Gemini error: {error_details}")
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check main error string for non-retryable patterns
|
||||||
|
if any(indicator in error_str for indicator in non_retryable_indicators):
|
||||||
|
logger.debug(f"Non-retryable Gemini error based on message: {error_str[:200]}...")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If it's a 429/quota error but doesn't match non-retryable patterns, it might be retryable rate limiting
|
||||||
|
logger.debug(f"Retryable Gemini rate limiting error: {error_str[:100]}...")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# For non-429 errors, check if they're retryable
|
||||||
|
retryable_indicators = [
|
||||||
|
"timeout",
|
||||||
|
"connection",
|
||||||
|
"network",
|
||||||
|
"temporary",
|
||||||
|
"unavailable",
|
||||||
|
"retry",
|
||||||
|
"internal error",
|
||||||
|
"408", # Request timeout
|
||||||
|
"500", # Internal server error
|
||||||
|
"502", # Bad gateway
|
||||||
|
"503", # Service unavailable
|
||||||
|
"504", # Gateway timeout
|
||||||
|
"ssl", # SSL errors
|
||||||
|
"handshake", # Handshake failures
|
||||||
|
]
|
||||||
|
|
||||||
|
return any(indicator in error_str for indicator in retryable_indicators)
|
||||||
|
|
||||||
def _process_image(self, image_path: str) -> Optional[dict]:
|
def _process_image(self, image_path: str) -> Optional[dict]:
|
||||||
"""Process an image for Gemini API."""
|
"""Process an image for Gemini API."""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -326,24 +326,8 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_exception = e
|
last_exception = e
|
||||||
|
|
||||||
# Check if this is a retryable error
|
# Check if this is a retryable error using structured error codes
|
||||||
error_str = str(e).lower()
|
is_retryable = self._is_error_retryable(e)
|
||||||
is_retryable = any(
|
|
||||||
term in error_str
|
|
||||||
for term in [
|
|
||||||
"timeout",
|
|
||||||
"connection",
|
|
||||||
"network",
|
|
||||||
"temporary",
|
|
||||||
"unavailable",
|
|
||||||
"retry",
|
|
||||||
"429",
|
|
||||||
"500",
|
|
||||||
"502",
|
|
||||||
"503",
|
|
||||||
"504",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_retryable and attempt < max_retries - 1:
|
if is_retryable and attempt < max_retries - 1:
|
||||||
delay = retry_delays[attempt]
|
delay = retry_delays[attempt]
|
||||||
@@ -484,24 +468,8 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_exception = e
|
last_exception = e
|
||||||
|
|
||||||
# Check if this is a retryable error
|
# Check if this is a retryable error using structured error codes
|
||||||
error_str = str(e).lower()
|
is_retryable = self._is_error_retryable(e)
|
||||||
is_retryable = any(
|
|
||||||
term in error_str
|
|
||||||
for term in [
|
|
||||||
"timeout",
|
|
||||||
"connection",
|
|
||||||
"network",
|
|
||||||
"temporary",
|
|
||||||
"unavailable",
|
|
||||||
"retry",
|
|
||||||
"429",
|
|
||||||
"500",
|
|
||||||
"502",
|
|
||||||
"503",
|
|
||||||
"504",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# If this is the last attempt or not retryable, give up
|
# If this is the last attempt or not retryable, give up
|
||||||
if attempt == max_retries - 1 or not is_retryable:
|
if attempt == max_retries - 1 or not is_retryable:
|
||||||
@@ -672,6 +640,97 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
logging.debug(f"Model '{model_name}' vision support: {supports}")
|
logging.debug(f"Model '{model_name}' vision support: {supports}")
|
||||||
return supports
|
return supports
|
||||||
|
|
||||||
|
def _is_error_retryable(self, error: Exception) -> bool:
|
||||||
|
"""Determine if an error should be retried based on structured error codes.
|
||||||
|
|
||||||
|
Uses OpenAI API error structure instead of text pattern matching for reliability.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: Exception from OpenAI API call
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if error should be retried, False otherwise
|
||||||
|
"""
|
||||||
|
error_str = str(error).lower()
|
||||||
|
|
||||||
|
# Check for 429 errors first - these need special handling
|
||||||
|
if "429" in error_str:
|
||||||
|
# Try to extract structured error information
|
||||||
|
error_type = None
|
||||||
|
error_code = None
|
||||||
|
|
||||||
|
# Parse structured error from OpenAI API response
|
||||||
|
# Format: "Error code: 429 - {'error': {'type': 'tokens', 'code': 'rate_limit_exceeded', ...}}"
|
||||||
|
try:
|
||||||
|
import ast
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Extract JSON part from error string using regex
|
||||||
|
# Look for pattern: {...} (from first { to last })
|
||||||
|
json_match = re.search(r"\{.*\}", str(error))
|
||||||
|
if json_match:
|
||||||
|
json_like_str = json_match.group(0)
|
||||||
|
|
||||||
|
# First try: parse as Python literal (handles single quotes safely)
|
||||||
|
try:
|
||||||
|
error_data = ast.literal_eval(json_like_str)
|
||||||
|
except (ValueError, SyntaxError):
|
||||||
|
# Fallback: try JSON parsing with simple quote replacement
|
||||||
|
# (for cases where it's already valid JSON or simple replacements work)
|
||||||
|
json_str = json_like_str.replace("'", '"')
|
||||||
|
error_data = json.loads(json_str)
|
||||||
|
|
||||||
|
if "error" in error_data:
|
||||||
|
error_info = error_data["error"]
|
||||||
|
error_type = error_info.get("type")
|
||||||
|
error_code = error_info.get("code")
|
||||||
|
|
||||||
|
except (json.JSONDecodeError, ValueError, SyntaxError, AttributeError):
|
||||||
|
# Fall back to checking hasattr for OpenAI SDK exception objects
|
||||||
|
if hasattr(error, "response") and hasattr(error.response, "json"):
|
||||||
|
try:
|
||||||
|
response_data = error.response.json()
|
||||||
|
if "error" in response_data:
|
||||||
|
error_info = response_data["error"]
|
||||||
|
error_type = error_info.get("type")
|
||||||
|
error_code = error_info.get("code")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Determine if 429 is retryable based on structured error codes
|
||||||
|
if error_type == "tokens":
|
||||||
|
# Token-related 429s are typically non-retryable (request too large)
|
||||||
|
logging.debug(f"Non-retryable 429: token-related error (type={error_type}, code={error_code})")
|
||||||
|
return False
|
||||||
|
elif error_code in ["invalid_request_error", "context_length_exceeded"]:
|
||||||
|
# These are permanent failures
|
||||||
|
logging.debug(f"Non-retryable 429: permanent failure (type={error_type}, code={error_code})")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Other 429s (like requests per minute) are retryable
|
||||||
|
logging.debug(f"Retryable 429: rate limiting (type={error_type}, code={error_code})")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# For non-429 errors, check if they're retryable
|
||||||
|
retryable_indicators = [
|
||||||
|
"timeout",
|
||||||
|
"connection",
|
||||||
|
"network",
|
||||||
|
"temporary",
|
||||||
|
"unavailable",
|
||||||
|
"retry",
|
||||||
|
"408", # Request timeout
|
||||||
|
"500", # Internal server error
|
||||||
|
"502", # Bad gateway
|
||||||
|
"503", # Service unavailable
|
||||||
|
"504", # Gateway timeout
|
||||||
|
"ssl", # SSL errors
|
||||||
|
"handshake", # Handshake failures
|
||||||
|
]
|
||||||
|
|
||||||
|
return any(indicator in error_str for indicator in retryable_indicators)
|
||||||
|
|
||||||
def _process_image(self, image_path: str) -> Optional[dict]:
|
def _process_image(self, image_path: str) -> Optional[dict]:
|
||||||
"""Process an image for OpenAI-compatible API."""
|
"""Process an image for OpenAI-compatible API."""
|
||||||
try:
|
try:
|
||||||
|
|||||||
140
tests/test_rate_limit_patterns.py
Normal file
140
tests/test_rate_limit_patterns.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
"""
|
||||||
|
Test to verify structured error code-based retry logic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from providers.gemini import GeminiModelProvider
|
||||||
|
from providers.openai import OpenAIModelProvider
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_structured_error_retry_logic():
|
||||||
|
"""Test OpenAI provider's structured error code retry logic."""
|
||||||
|
provider = OpenAIModelProvider(api_key="test-key")
|
||||||
|
|
||||||
|
# Test structured token-related 429 error (should NOT be retried)
|
||||||
|
class MockTokenError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
# Simulate the actual error format from OpenAI API
|
||||||
|
self.args = (
|
||||||
|
"Error code: 429 - {'error': {'message': 'Request too large for o3', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}",
|
||||||
|
)
|
||||||
|
|
||||||
|
token_error = MockTokenError()
|
||||||
|
assert not provider._is_error_retryable(token_error), "Token-related 429 should not be retryable"
|
||||||
|
|
||||||
|
# Test standard rate limiting 429 error (should be retried)
|
||||||
|
class MockRateLimitError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = (
|
||||||
|
"Error code: 429 - {'error': {'message': 'Too many requests', 'type': 'requests', 'code': 'rate_limit_exceeded'}}",
|
||||||
|
)
|
||||||
|
|
||||||
|
rate_limit_error = MockRateLimitError()
|
||||||
|
assert provider._is_error_retryable(rate_limit_error), "Request rate limiting should be retryable"
|
||||||
|
|
||||||
|
# Test context length error (should NOT be retried)
|
||||||
|
class MockContextError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = (
|
||||||
|
"Error code: 429 - {'error': {'message': 'Context length exceeded', 'code': 'context_length_exceeded'}}",
|
||||||
|
)
|
||||||
|
|
||||||
|
context_error = MockContextError()
|
||||||
|
assert not provider._is_error_retryable(context_error), "Context length errors should not be retryable"
|
||||||
|
|
||||||
|
|
||||||
|
def test_gemini_structured_error_retry_logic():
|
||||||
|
"""Test Gemini provider's structured error code retry logic."""
|
||||||
|
provider = GeminiModelProvider(api_key="test-key")
|
||||||
|
|
||||||
|
# Test quota exceeded error (should NOT be retried)
|
||||||
|
class MockQuotaError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("429 Resource exhausted: Quota exceeded for model",)
|
||||||
|
self.details = "quota_exceeded"
|
||||||
|
|
||||||
|
quota_error = MockQuotaError()
|
||||||
|
assert not provider._is_error_retryable(quota_error), "Quota exceeded should not be retryable"
|
||||||
|
|
||||||
|
# Test resource exhausted error (should NOT be retried)
|
||||||
|
class MockResourceError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("429 Resource exhausted: Token limit exceeded",)
|
||||||
|
|
||||||
|
resource_error = MockResourceError()
|
||||||
|
assert not provider._is_error_retryable(resource_error), "Resource exhausted should not be retryable"
|
||||||
|
|
||||||
|
# Test temporary rate limiting (should be retried)
|
||||||
|
class MockTempError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("429 Too many requests, please try again later",)
|
||||||
|
|
||||||
|
temp_error = MockTempError()
|
||||||
|
assert provider._is_error_retryable(temp_error), "Temporary rate limiting should be retryable"
|
||||||
|
|
||||||
|
|
||||||
|
def test_actual_log_error_from_issue_with_structured_parsing():
|
||||||
|
"""Test the specific error from the user's log using structured parsing."""
|
||||||
|
provider = OpenAIModelProvider(api_key="test-key")
|
||||||
|
|
||||||
|
# Create the exact error from the user's log
|
||||||
|
class MockUserLogError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
# This is the exact error message from the user's issue
|
||||||
|
self.args = (
|
||||||
|
"Error code: 429 - {'error': {'message': 'Request too large for o3 in organization org-MWp466of2XGyS90J8huQk4R6 on tokens per min (TPM): Limit 30000, Requested 31756. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}",
|
||||||
|
)
|
||||||
|
|
||||||
|
user_error = MockUserLogError()
|
||||||
|
|
||||||
|
# This specific error should NOT be retryable because it has type='tokens'
|
||||||
|
assert not provider._is_error_retryable(user_error), "The user's specific error should be non-retryable"
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_429_errors_still_work():
|
||||||
|
"""Test that non-429 errors are still handled correctly."""
|
||||||
|
provider = OpenAIModelProvider(api_key="test-key")
|
||||||
|
|
||||||
|
# Test retryable non-429 errors
|
||||||
|
class MockTimeoutError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("Connection timeout",)
|
||||||
|
|
||||||
|
timeout_error = MockTimeoutError()
|
||||||
|
assert provider._is_error_retryable(timeout_error), "Timeout errors should be retryable"
|
||||||
|
|
||||||
|
class Mock500Error(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("500 Internal Server Error",)
|
||||||
|
|
||||||
|
server_error = Mock500Error()
|
||||||
|
assert provider._is_error_retryable(server_error), "500 errors should be retryable"
|
||||||
|
|
||||||
|
# Test non-retryable non-429 errors
|
||||||
|
class MockAuthError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("401 Unauthorized",)
|
||||||
|
|
||||||
|
auth_error = MockAuthError()
|
||||||
|
assert not provider._is_error_retryable(auth_error), "Auth errors should not be retryable"
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_cases_and_fallbacks():
|
||||||
|
"""Test edge cases and fallback behavior."""
|
||||||
|
provider = OpenAIModelProvider(api_key="test-key")
|
||||||
|
|
||||||
|
# Test malformed JSON in error (should fall back gracefully)
|
||||||
|
class MockMalformedError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("Error code: 429 - {invalid json}",)
|
||||||
|
|
||||||
|
malformed_error = MockMalformedError()
|
||||||
|
# Should still be retryable since it's a 429 without clear non-retryable indicators
|
||||||
|
assert provider._is_error_retryable(malformed_error), "Malformed 429 errors should default to retryable"
|
||||||
|
|
||||||
|
# Test 429 without structured data (should be retryable by default)
|
||||||
|
class MockSimple429Error(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
self.args = ("429 Too Many Requests",)
|
||||||
|
|
||||||
|
simple_429_error = MockSimple429Error()
|
||||||
|
assert provider._is_error_retryable(simple_429_error), "Simple 429 without type info should be retryable"
|
||||||
Reference in New Issue
Block a user