Retry a few times with progressive delays before giving up

2025-06-16 17:47:42 +04:00
parent 6b09f1468f
commit ebfda1862e
2 changed files with 77 additions and 31 deletions
--- a/providers/gemini.py
+++ b/providers/gemini.py
@@ -159,9 +159,9 @@ class GeminiModelProvider(ModelProvider):
                actual_thinking_budget = int(max_thinking_tokens * self.THINKING_BUDGETS[thinking_mode])
                generation_config.thinking_config = types.ThinkingConfig(thinking_budget=actual_thinking_budget)
-        # Retry logic with exponential backoff
+        # Retry logic with progressive delays
-        max_retries = 2  # Total of 2 attempts (1 initial + 1 retry)
+        max_retries = 4  # Total of 4 attempts
-        base_delay = 1.0  # Start with 1 second delay
+        retry_delays = [1, 3, 5, 8]  # Progressive delays: 1s, 3s, 5s, 8s
        last_exception = None
@@ -217,11 +217,13 @@ class GeminiModelProvider(ModelProvider):
                if attempt == max_retries - 1 or not is_retryable:
                    break
-                # Calculate delay with exponential backoff
+                # Get progressive delay
-                delay = base_delay * (2**attempt)
+                delay = retry_delays[attempt]
-                # Log retry attempt (could add logging here if needed)
+                # Log retry attempt
-                # For now, just sleep and retry
+                logger.warning(
                    f"Gemini API error for model {resolved_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
                )
                time.sleep(delay)
        # If we get here, all retries failed
--- a/providers/openai_compatible.py
+++ b/providers/openai_compatible.py
@@ -4,6 +4,7 @@ import base64
 import ipaddress
 import logging
 import os
 import time
 from abc import abstractmethod
 from typing import Optional
 from urllib.parse import urlparse
@@ -300,6 +301,13 @@ class OpenAICompatibleProvider(ModelProvider):
            if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]:
                completion_params[key] = value
        # Retry logic with progressive delays
        max_retries = 4  # Total of 4 attempts
        retry_delays = [1, 3, 5, 8]  # Progressive delays: 1s, 3s, 5s, 8s
        last_exception = None
        for attempt in range(max_retries):
            try:
                # Generate completion
                response = self.client.chat.completions.create(**completion_params)
@@ -323,10 +331,46 @@ class OpenAICompatibleProvider(ModelProvider):
                )
            except Exception as e:
-            # Log error and re-raise with more context
+                last_exception = e
-            error_msg = f"{self.FRIENDLY_NAME} API error for model {model_name}: {str(e)}"
+
                # Check if this is a retryable error
                error_str = str(e).lower()
                is_retryable = any(
                    term in error_str
                    for term in [
                        "timeout",
                        "connection",
                        "network",
                        "temporary",
                        "unavailable",
                        "retry",
                        "429",
                        "500",
                        "502",
                        "503",
                        "504",
                    ]
                )
                # If this is the last attempt or not retryable, give up
                if attempt == max_retries - 1 or not is_retryable:
                    break
                # Get progressive delay
                delay = retry_delays[attempt]
                # Log retry attempt
                logging.warning(
                    f"{self.FRIENDLY_NAME} API error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
                )
                time.sleep(delay)
        # If we get here, all retries failed
        error_msg = (
            f"{self.FRIENDLY_NAME} API error for model {model_name} after {max_retries} attempts: {str(last_exception)}"
        )
        logging.error(error_msg)
-            raise RuntimeError(error_msg) from e
+        raise RuntimeError(error_msg) from last_exception
    def count_tokens(self, text: str, model_name: str) -> int:
        """Count tokens for the given text.