Lookout for non-retriable errors and exit early

2025-06-18 08:17:16 +04:00
parent dad1e2d74e
commit dacafa6044
6 changed files with 313 additions and 56 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,6 @@ test-setup/
 FEATURE_*.md
 # Temporary files
 /tmp/
+
+# Local user instructions
+CLAUDE.local.md
--- a/CLAUDE.local.md
+++ b/CLAUDE.local.md
@@ -1 +0,0 @@
- Before any commit / push to github, you must first always run and confirm run that code quality checks pass. Use @code_quality_checks.sh and confirm that we have 100% unit tests passing.
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.0.1"
+__version__ = "5.0.2"
 # Last update date in ISO format
 __updated__ = "2025-06-18"
 # Primary maintainer
--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -196,24 +196,8 @@ class GeminiModelProvider(ModelProvider):
            except Exception as e:
                last_exception = e

-                # Check if this is a retryable error
-                error_str = str(e).lower()
-                is_retryable = any(
-                    term in error_str
-                    for term in [
-                        "timeout",
-                        "connection",
-                        "network",
-                        "temporary",
-                        "unavailable",
-                        "retry",
-                        "429",
-                        "500",
-                        "502",
-                        "503",
-                        "504",
-                    ]
-                )
+                # Check if this is a retryable error using structured error codes
+                is_retryable = self._is_error_retryable(e)

                # If this is the last attempt or not retryable, give up
                if attempt == max_retries - 1 or not is_retryable:
@@ -388,6 +372,78 @@ class GeminiModelProvider(ModelProvider):
        }
        return model_name in vision_models

+    def _is_error_retryable(self, error: Exception) -> bool:
+        """Determine if an error should be retried based on structured error codes.
+
+        Uses Gemini API error structure instead of text pattern matching for reliability.
+
+        Args:
+            error: Exception from Gemini API call
+
+        Returns:
+            True if error should be retried, False otherwise
+        """
+        error_str = str(error).lower()
+
+        # Check for 429 errors first - these need special handling
+        if "429" in error_str or "quota" in error_str or "resource_exhausted" in error_str:
+            # For Gemini, check for specific non-retryable error indicators
+            # These typically indicate permanent failures or quota/size limits
+            non_retryable_indicators = [
+                "quota exceeded",
+                "resource exhausted",
+                "context length",
+                "token limit",
+                "request too large",
+                "invalid request",
+                "quota_exceeded",
+                "resource_exhausted",
+            ]
+
+            # Also check if this is a structured error from Gemini SDK
+            try:
+                # Try to access error details if available
+                if hasattr(error, "details") or hasattr(error, "reason"):
+                    # Gemini API errors may have structured details
+                    error_details = getattr(error, "details", "") or getattr(error, "reason", "")
+                    error_details_str = str(error_details).lower()
+
+                    # Check for non-retryable error codes/reasons
+                    if any(indicator in error_details_str for indicator in non_retryable_indicators):
+                        logger.debug(f"Non-retryable Gemini error: {error_details}")
+                        return False
+            except Exception:
+                pass
+
+            # Check main error string for non-retryable patterns
+            if any(indicator in error_str for indicator in non_retryable_indicators):
+                logger.debug(f"Non-retryable Gemini error based on message: {error_str[:200]}...")
+                return False
+
+            # If it's a 429/quota error but doesn't match non-retryable patterns, it might be retryable rate limiting
+            logger.debug(f"Retryable Gemini rate limiting error: {error_str[:100]}...")
+            return True
+
+        # For non-429 errors, check if they're retryable
+        retryable_indicators = [
+            "timeout",
+            "connection",
+            "network",
+            "temporary",
+            "unavailable",
+            "retry",
+            "internal error",
+            "408",  # Request timeout
+            "500",  # Internal server error
+            "502",  # Bad gateway
+            "503",  # Service unavailable
+            "504",  # Gateway timeout
+            "ssl",  # SSL errors
+            "handshake",  # Handshake failures
+        ]
+
+        return any(indicator in error_str for indicator in retryable_indicators)
+
    def _process_image(self, image_path: str) -> Optional[dict]:
        """Process an image for Gemini API."""
        try:
--- a/providers/openai_compatible.py
+++ b/providers/openai_compatible.py
@@ -326,24 +326,8 @@ class OpenAICompatibleProvider(ModelProvider):
            except Exception as e:
                last_exception = e

-                # Check if this is a retryable error
-                error_str = str(e).lower()
-                is_retryable = any(
-                    term in error_str
-                    for term in [
-                        "timeout",
-                        "connection",
-                        "network",
-                        "temporary",
-                        "unavailable",
-                        "retry",
-                        "429",
-                        "500",
-                        "502",
-                        "503",
-                        "504",
-                    ]
-                )
+                # Check if this is a retryable error using structured error codes
+                is_retryable = self._is_error_retryable(e)

                if is_retryable and attempt < max_retries - 1:
                    delay = retry_delays[attempt]
@@ -484,24 +468,8 @@ class OpenAICompatibleProvider(ModelProvider):
            except Exception as e:
                last_exception = e

-                # Check if this is a retryable error
-                error_str = str(e).lower()
-                is_retryable = any(
-                    term in error_str
-                    for term in [
-                        "timeout",
-                        "connection",
-                        "network",
-                        "temporary",
-                        "unavailable",
-                        "retry",
-                        "429",
-                        "500",
-                        "502",
-                        "503",
-                        "504",
-                    ]
-                )
+                # Check if this is a retryable error using structured error codes
+                is_retryable = self._is_error_retryable(e)

                # If this is the last attempt or not retryable, give up
                if attempt == max_retries - 1 or not is_retryable:
@@ -672,6 +640,97 @@ class OpenAICompatibleProvider(ModelProvider):
        logging.debug(f"Model '{model_name}' vision support: {supports}")
        return supports

+    def _is_error_retryable(self, error: Exception) -> bool:
+        """Determine if an error should be retried based on structured error codes.
+
+        Uses OpenAI API error structure instead of text pattern matching for reliability.
+
+        Args:
+            error: Exception from OpenAI API call
+
+        Returns:
+            True if error should be retried, False otherwise
+        """
+        error_str = str(error).lower()
+
+        # Check for 429 errors first - these need special handling
+        if "429" in error_str:
+            # Try to extract structured error information
+            error_type = None
+            error_code = None
+
+            # Parse structured error from OpenAI API response
+            # Format: "Error code: 429 - {'error': {'type': 'tokens', 'code': 'rate_limit_exceeded', ...}}"
+            try:
+                import ast
+                import json
+                import re
+
+                # Extract JSON part from error string using regex
+                # Look for pattern: {...} (from first { to last })
+                json_match = re.search(r"\{.*\}", str(error))
+                if json_match:
+                    json_like_str = json_match.group(0)
+
+                    # First try: parse as Python literal (handles single quotes safely)
+                    try:
+                        error_data = ast.literal_eval(json_like_str)
+                    except (ValueError, SyntaxError):
+                        # Fallback: try JSON parsing with simple quote replacement
+                        # (for cases where it's already valid JSON or simple replacements work)
+                        json_str = json_like_str.replace("'", '"')
+                        error_data = json.loads(json_str)
+
+                    if "error" in error_data:
+                        error_info = error_data["error"]
+                        error_type = error_info.get("type")
+                        error_code = error_info.get("code")
+
+            except (json.JSONDecodeError, ValueError, SyntaxError, AttributeError):
+                # Fall back to checking hasattr for OpenAI SDK exception objects
+                if hasattr(error, "response") and hasattr(error.response, "json"):
+                    try:
+                        response_data = error.response.json()
+                        if "error" in response_data:
+                            error_info = response_data["error"]
+                            error_type = error_info.get("type")
+                            error_code = error_info.get("code")
+                    except Exception:
+                        pass
+
+            # Determine if 429 is retryable based on structured error codes
+            if error_type == "tokens":
+                # Token-related 429s are typically non-retryable (request too large)
+                logging.debug(f"Non-retryable 429: token-related error (type={error_type}, code={error_code})")
+                return False
+            elif error_code in ["invalid_request_error", "context_length_exceeded"]:
+                # These are permanent failures
+                logging.debug(f"Non-retryable 429: permanent failure (type={error_type}, code={error_code})")
+                return False
+            else:
+                # Other 429s (like requests per minute) are retryable
+                logging.debug(f"Retryable 429: rate limiting (type={error_type}, code={error_code})")
+                return True
+
+        # For non-429 errors, check if they're retryable
+        retryable_indicators = [
+            "timeout",
+            "connection",
+            "network",
+            "temporary",
+            "unavailable",
+            "retry",
+            "408",  # Request timeout
+            "500",  # Internal server error
+            "502",  # Bad gateway
+            "503",  # Service unavailable
+            "504",  # Gateway timeout
+            "ssl",  # SSL errors
+            "handshake",  # Handshake failures
+        ]
+
+        return any(indicator in error_str for indicator in retryable_indicators)
+
    def _process_image(self, image_path: str) -> Optional[dict]:
        """Process an image for OpenAI-compatible API."""
        try:
--- a/tests/test_rate_limit_patterns.py
+++ b/tests/test_rate_limit_patterns.py
@@ -0,0 +1,140 @@
+"""
+Test to verify structured error code-based retry logic.
+"""
+
+from providers.gemini import GeminiModelProvider
+from providers.openai import OpenAIModelProvider
+
+
+def test_openai_structured_error_retry_logic():
+    """Test OpenAI provider's structured error code retry logic."""
+    provider = OpenAIModelProvider(api_key="test-key")
+
+    # Test structured token-related 429 error (should NOT be retried)
+    class MockTokenError(Exception):
+        def __init__(self):
+            # Simulate the actual error format from OpenAI API
+            self.args = (
+                "Error code: 429 - {'error': {'message': 'Request too large for o3', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}",
+            )
+
+    token_error = MockTokenError()
+    assert not provider._is_error_retryable(token_error), "Token-related 429 should not be retryable"
+
+    # Test standard rate limiting 429 error (should be retried)
+    class MockRateLimitError(Exception):
+        def __init__(self):
+            self.args = (
+                "Error code: 429 - {'error': {'message': 'Too many requests', 'type': 'requests', 'code': 'rate_limit_exceeded'}}",
+            )
+
+    rate_limit_error = MockRateLimitError()
+    assert provider._is_error_retryable(rate_limit_error), "Request rate limiting should be retryable"
+
+    # Test context length error (should NOT be retried)
+    class MockContextError(Exception):
+        def __init__(self):
+            self.args = (
+                "Error code: 429 - {'error': {'message': 'Context length exceeded', 'code': 'context_length_exceeded'}}",
+            )
+
+    context_error = MockContextError()
+    assert not provider._is_error_retryable(context_error), "Context length errors should not be retryable"
+
+
+def test_gemini_structured_error_retry_logic():
+    """Test Gemini provider's structured error code retry logic."""
+    provider = GeminiModelProvider(api_key="test-key")
+
+    # Test quota exceeded error (should NOT be retried)
+    class MockQuotaError(Exception):
+        def __init__(self):
+            self.args = ("429 Resource exhausted: Quota exceeded for model",)
+            self.details = "quota_exceeded"
+
+    quota_error = MockQuotaError()
+    assert not provider._is_error_retryable(quota_error), "Quota exceeded should not be retryable"
+
+    # Test resource exhausted error (should NOT be retried)
+    class MockResourceError(Exception):
+        def __init__(self):
+            self.args = ("429 Resource exhausted: Token limit exceeded",)
+
+    resource_error = MockResourceError()
+    assert not provider._is_error_retryable(resource_error), "Resource exhausted should not be retryable"
+
+    # Test temporary rate limiting (should be retried)
+    class MockTempError(Exception):
+        def __init__(self):
+            self.args = ("429 Too many requests, please try again later",)
+
+    temp_error = MockTempError()
+    assert provider._is_error_retryable(temp_error), "Temporary rate limiting should be retryable"
+
+
+def test_actual_log_error_from_issue_with_structured_parsing():
+    """Test the specific error from the user's log using structured parsing."""
+    provider = OpenAIModelProvider(api_key="test-key")
+
+    # Create the exact error from the user's log
+    class MockUserLogError(Exception):
+        def __init__(self):
+            # This is the exact error message from the user's issue
+            self.args = (
+                "Error code: 429 - {'error': {'message': 'Request too large for o3 in organization org-MWp466of2XGyS90J8huQk4R6 on tokens per min (TPM): Limit 30000, Requested 31756. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}",
+            )
+
+    user_error = MockUserLogError()
+
+    # This specific error should NOT be retryable because it has type='tokens'
+    assert not provider._is_error_retryable(user_error), "The user's specific error should be non-retryable"
+
+
+def test_non_429_errors_still_work():
+    """Test that non-429 errors are still handled correctly."""
+    provider = OpenAIModelProvider(api_key="test-key")
+
+    # Test retryable non-429 errors
+    class MockTimeoutError(Exception):
+        def __init__(self):
+            self.args = ("Connection timeout",)
+
+    timeout_error = MockTimeoutError()
+    assert provider._is_error_retryable(timeout_error), "Timeout errors should be retryable"
+
+    class Mock500Error(Exception):
+        def __init__(self):
+            self.args = ("500 Internal Server Error",)
+
+    server_error = Mock500Error()
+    assert provider._is_error_retryable(server_error), "500 errors should be retryable"
+
+    # Test non-retryable non-429 errors
+    class MockAuthError(Exception):
+        def __init__(self):
+            self.args = ("401 Unauthorized",)
+
+    auth_error = MockAuthError()
+    assert not provider._is_error_retryable(auth_error), "Auth errors should not be retryable"
+
+
+def test_edge_cases_and_fallbacks():
+    """Test edge cases and fallback behavior."""
+    provider = OpenAIModelProvider(api_key="test-key")
+
+    # Test malformed JSON in error (should fall back gracefully)
+    class MockMalformedError(Exception):
+        def __init__(self):
+            self.args = ("Error code: 429 - {invalid json}",)
+
+    malformed_error = MockMalformedError()
+    # Should still be retryable since it's a 429 without clear non-retryable indicators
+    assert provider._is_error_retryable(malformed_error), "Malformed 429 errors should default to retryable"
+
+    # Test 429 without structured data (should be retryable by default)
+    class MockSimple429Error(Exception):
+        def __init__(self):
+            self.args = ("429 Too Many Requests",)
+
+    simple_429_error = MockSimple429Error()
+    assert provider._is_error_retryable(simple_429_error), "Simple 429 without type info should be retryable"
				`@@ -1 +0,0 @@`
				`- Before any commit / push to github, you must first always run and confirm run that code quality checks pass. Use @code_quality_checks.sh and confirm that we have 100% unit tests passing.`