diff --git a/.gitignore b/.gitignore index f8462b0..fa7e02d 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,6 @@ test-setup/ FEATURE_*.md # Temporary files /tmp/ + +# Local user instructions +CLAUDE.local.md diff --git a/CLAUDE.local.md b/CLAUDE.local.md deleted file mode 100644 index 729a3a7..0000000 --- a/CLAUDE.local.md +++ /dev/null @@ -1 +0,0 @@ -- Before any commit / push to github, you must first always run and confirm run that code quality checks pass. Use @code_quality_checks.sh and confirm that we have 100% unit tests passing. \ No newline at end of file diff --git a/config.py b/config.py index 85a53ec..a57d55d 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "5.0.1" +__version__ = "5.0.2" # Last update date in ISO format __updated__ = "2025-06-18" # Primary maintainer diff --git a/providers/gemini.py b/providers/gemini.py index ef12e62..2ce6310 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -196,24 +196,8 @@ class GeminiModelProvider(ModelProvider): except Exception as e: last_exception = e - # Check if this is a retryable error - error_str = str(e).lower() - is_retryable = any( - term in error_str - for term in [ - "timeout", - "connection", - "network", - "temporary", - "unavailable", - "retry", - "429", - "500", - "502", - "503", - "504", - ] - ) + # Check if this is a retryable error using structured error codes + is_retryable = self._is_error_retryable(e) # If this is the last attempt or not retryable, give up if attempt == max_retries - 1 or not is_retryable: @@ -388,6 +372,78 @@ class GeminiModelProvider(ModelProvider): } return model_name in vision_models + def _is_error_retryable(self, error: Exception) -> bool: + """Determine if an error should be retried based on structured error codes. + + Uses Gemini API error structure instead of text pattern matching for reliability. + + Args: + error: Exception from Gemini API call + + Returns: + True if error should be retried, False otherwise + """ + error_str = str(error).lower() + + # Check for 429 errors first - these need special handling + if "429" in error_str or "quota" in error_str or "resource_exhausted" in error_str: + # For Gemini, check for specific non-retryable error indicators + # These typically indicate permanent failures or quota/size limits + non_retryable_indicators = [ + "quota exceeded", + "resource exhausted", + "context length", + "token limit", + "request too large", + "invalid request", + "quota_exceeded", + "resource_exhausted", + ] + + # Also check if this is a structured error from Gemini SDK + try: + # Try to access error details if available + if hasattr(error, "details") or hasattr(error, "reason"): + # Gemini API errors may have structured details + error_details = getattr(error, "details", "") or getattr(error, "reason", "") + error_details_str = str(error_details).lower() + + # Check for non-retryable error codes/reasons + if any(indicator in error_details_str for indicator in non_retryable_indicators): + logger.debug(f"Non-retryable Gemini error: {error_details}") + return False + except Exception: + pass + + # Check main error string for non-retryable patterns + if any(indicator in error_str for indicator in non_retryable_indicators): + logger.debug(f"Non-retryable Gemini error based on message: {error_str[:200]}...") + return False + + # If it's a 429/quota error but doesn't match non-retryable patterns, it might be retryable rate limiting + logger.debug(f"Retryable Gemini rate limiting error: {error_str[:100]}...") + return True + + # For non-429 errors, check if they're retryable + retryable_indicators = [ + "timeout", + "connection", + "network", + "temporary", + "unavailable", + "retry", + "internal error", + "408", # Request timeout + "500", # Internal server error + "502", # Bad gateway + "503", # Service unavailable + "504", # Gateway timeout + "ssl", # SSL errors + "handshake", # Handshake failures + ] + + return any(indicator in error_str for indicator in retryable_indicators) + def _process_image(self, image_path: str) -> Optional[dict]: """Process an image for Gemini API.""" try: diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index d7c78a6..0163cc8 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -326,24 +326,8 @@ class OpenAICompatibleProvider(ModelProvider): except Exception as e: last_exception = e - # Check if this is a retryable error - error_str = str(e).lower() - is_retryable = any( - term in error_str - for term in [ - "timeout", - "connection", - "network", - "temporary", - "unavailable", - "retry", - "429", - "500", - "502", - "503", - "504", - ] - ) + # Check if this is a retryable error using structured error codes + is_retryable = self._is_error_retryable(e) if is_retryable and attempt < max_retries - 1: delay = retry_delays[attempt] @@ -484,24 +468,8 @@ class OpenAICompatibleProvider(ModelProvider): except Exception as e: last_exception = e - # Check if this is a retryable error - error_str = str(e).lower() - is_retryable = any( - term in error_str - for term in [ - "timeout", - "connection", - "network", - "temporary", - "unavailable", - "retry", - "429", - "500", - "502", - "503", - "504", - ] - ) + # Check if this is a retryable error using structured error codes + is_retryable = self._is_error_retryable(e) # If this is the last attempt or not retryable, give up if attempt == max_retries - 1 or not is_retryable: @@ -672,6 +640,97 @@ class OpenAICompatibleProvider(ModelProvider): logging.debug(f"Model '{model_name}' vision support: {supports}") return supports + def _is_error_retryable(self, error: Exception) -> bool: + """Determine if an error should be retried based on structured error codes. + + Uses OpenAI API error structure instead of text pattern matching for reliability. + + Args: + error: Exception from OpenAI API call + + Returns: + True if error should be retried, False otherwise + """ + error_str = str(error).lower() + + # Check for 429 errors first - these need special handling + if "429" in error_str: + # Try to extract structured error information + error_type = None + error_code = None + + # Parse structured error from OpenAI API response + # Format: "Error code: 429 - {'error': {'type': 'tokens', 'code': 'rate_limit_exceeded', ...}}" + try: + import ast + import json + import re + + # Extract JSON part from error string using regex + # Look for pattern: {...} (from first { to last }) + json_match = re.search(r"\{.*\}", str(error)) + if json_match: + json_like_str = json_match.group(0) + + # First try: parse as Python literal (handles single quotes safely) + try: + error_data = ast.literal_eval(json_like_str) + except (ValueError, SyntaxError): + # Fallback: try JSON parsing with simple quote replacement + # (for cases where it's already valid JSON or simple replacements work) + json_str = json_like_str.replace("'", '"') + error_data = json.loads(json_str) + + if "error" in error_data: + error_info = error_data["error"] + error_type = error_info.get("type") + error_code = error_info.get("code") + + except (json.JSONDecodeError, ValueError, SyntaxError, AttributeError): + # Fall back to checking hasattr for OpenAI SDK exception objects + if hasattr(error, "response") and hasattr(error.response, "json"): + try: + response_data = error.response.json() + if "error" in response_data: + error_info = response_data["error"] + error_type = error_info.get("type") + error_code = error_info.get("code") + except Exception: + pass + + # Determine if 429 is retryable based on structured error codes + if error_type == "tokens": + # Token-related 429s are typically non-retryable (request too large) + logging.debug(f"Non-retryable 429: token-related error (type={error_type}, code={error_code})") + return False + elif error_code in ["invalid_request_error", "context_length_exceeded"]: + # These are permanent failures + logging.debug(f"Non-retryable 429: permanent failure (type={error_type}, code={error_code})") + return False + else: + # Other 429s (like requests per minute) are retryable + logging.debug(f"Retryable 429: rate limiting (type={error_type}, code={error_code})") + return True + + # For non-429 errors, check if they're retryable + retryable_indicators = [ + "timeout", + "connection", + "network", + "temporary", + "unavailable", + "retry", + "408", # Request timeout + "500", # Internal server error + "502", # Bad gateway + "503", # Service unavailable + "504", # Gateway timeout + "ssl", # SSL errors + "handshake", # Handshake failures + ] + + return any(indicator in error_str for indicator in retryable_indicators) + def _process_image(self, image_path: str) -> Optional[dict]: """Process an image for OpenAI-compatible API.""" try: diff --git a/tests/test_rate_limit_patterns.py b/tests/test_rate_limit_patterns.py new file mode 100644 index 0000000..0ec446f --- /dev/null +++ b/tests/test_rate_limit_patterns.py @@ -0,0 +1,140 @@ +""" +Test to verify structured error code-based retry logic. +""" + +from providers.gemini import GeminiModelProvider +from providers.openai import OpenAIModelProvider + + +def test_openai_structured_error_retry_logic(): + """Test OpenAI provider's structured error code retry logic.""" + provider = OpenAIModelProvider(api_key="test-key") + + # Test structured token-related 429 error (should NOT be retried) + class MockTokenError(Exception): + def __init__(self): + # Simulate the actual error format from OpenAI API + self.args = ( + "Error code: 429 - {'error': {'message': 'Request too large for o3', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}", + ) + + token_error = MockTokenError() + assert not provider._is_error_retryable(token_error), "Token-related 429 should not be retryable" + + # Test standard rate limiting 429 error (should be retried) + class MockRateLimitError(Exception): + def __init__(self): + self.args = ( + "Error code: 429 - {'error': {'message': 'Too many requests', 'type': 'requests', 'code': 'rate_limit_exceeded'}}", + ) + + rate_limit_error = MockRateLimitError() + assert provider._is_error_retryable(rate_limit_error), "Request rate limiting should be retryable" + + # Test context length error (should NOT be retried) + class MockContextError(Exception): + def __init__(self): + self.args = ( + "Error code: 429 - {'error': {'message': 'Context length exceeded', 'code': 'context_length_exceeded'}}", + ) + + context_error = MockContextError() + assert not provider._is_error_retryable(context_error), "Context length errors should not be retryable" + + +def test_gemini_structured_error_retry_logic(): + """Test Gemini provider's structured error code retry logic.""" + provider = GeminiModelProvider(api_key="test-key") + + # Test quota exceeded error (should NOT be retried) + class MockQuotaError(Exception): + def __init__(self): + self.args = ("429 Resource exhausted: Quota exceeded for model",) + self.details = "quota_exceeded" + + quota_error = MockQuotaError() + assert not provider._is_error_retryable(quota_error), "Quota exceeded should not be retryable" + + # Test resource exhausted error (should NOT be retried) + class MockResourceError(Exception): + def __init__(self): + self.args = ("429 Resource exhausted: Token limit exceeded",) + + resource_error = MockResourceError() + assert not provider._is_error_retryable(resource_error), "Resource exhausted should not be retryable" + + # Test temporary rate limiting (should be retried) + class MockTempError(Exception): + def __init__(self): + self.args = ("429 Too many requests, please try again later",) + + temp_error = MockTempError() + assert provider._is_error_retryable(temp_error), "Temporary rate limiting should be retryable" + + +def test_actual_log_error_from_issue_with_structured_parsing(): + """Test the specific error from the user's log using structured parsing.""" + provider = OpenAIModelProvider(api_key="test-key") + + # Create the exact error from the user's log + class MockUserLogError(Exception): + def __init__(self): + # This is the exact error message from the user's issue + self.args = ( + "Error code: 429 - {'error': {'message': 'Request too large for o3 in organization org-MWp466of2XGyS90J8huQk4R6 on tokens per min (TPM): Limit 30000, Requested 31756. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}", + ) + + user_error = MockUserLogError() + + # This specific error should NOT be retryable because it has type='tokens' + assert not provider._is_error_retryable(user_error), "The user's specific error should be non-retryable" + + +def test_non_429_errors_still_work(): + """Test that non-429 errors are still handled correctly.""" + provider = OpenAIModelProvider(api_key="test-key") + + # Test retryable non-429 errors + class MockTimeoutError(Exception): + def __init__(self): + self.args = ("Connection timeout",) + + timeout_error = MockTimeoutError() + assert provider._is_error_retryable(timeout_error), "Timeout errors should be retryable" + + class Mock500Error(Exception): + def __init__(self): + self.args = ("500 Internal Server Error",) + + server_error = Mock500Error() + assert provider._is_error_retryable(server_error), "500 errors should be retryable" + + # Test non-retryable non-429 errors + class MockAuthError(Exception): + def __init__(self): + self.args = ("401 Unauthorized",) + + auth_error = MockAuthError() + assert not provider._is_error_retryable(auth_error), "Auth errors should not be retryable" + + +def test_edge_cases_and_fallbacks(): + """Test edge cases and fallback behavior.""" + provider = OpenAIModelProvider(api_key="test-key") + + # Test malformed JSON in error (should fall back gracefully) + class MockMalformedError(Exception): + def __init__(self): + self.args = ("Error code: 429 - {invalid json}",) + + malformed_error = MockMalformedError() + # Should still be retryable since it's a 429 without clear non-retryable indicators + assert provider._is_error_retryable(malformed_error), "Malformed 429 errors should default to retryable" + + # Test 429 without structured data (should be retryable by default) + class MockSimple429Error(Exception): + def __init__(self): + self.args = ("429 Too Many Requests",) + + simple_429_error = MockSimple429Error() + assert provider._is_error_retryable(simple_429_error), "Simple 429 without type info should be retryable"